Index: svgscripts/datatypes/manuscript.py
===================================================================
--- svgscripts/datatypes/manuscript.py (revision 96)
+++ svgscripts/datatypes/manuscript.py (revision 97)
@@ -1,159 +1,164 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
""" This class can be used to represent an archival unity of manuscript pages, i.e. workbooks, notebooks, folders of handwritten pages.
"""
# Copyright (C) University of Basel 2019 {{{1
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see 1}}}
__author__ = "Christian Steiner"
__maintainer__ = __author__
__copyright__ = 'University of Basel'
__email__ = "christian.steiner@unibas.ch"
__status__ = "Development"
__license__ = "GPL v3"
__version__ = "0.0.1"
from lxml import etree as ET
from os.path import isfile
import sys
from .color import Color
from .description import Description
from .earlier_description import EarlierDescription
from .page import Page, FILE_TYPE_XML_MANUSCRIPT, FILE_TYPE_SVG_WORD_POSITION
sys.path.append('py2ttl')
from class_spec import SemanticClass
sys.path.append('shared_util')
from myxmlwriter import parse_xml_of_type, write_pretty, xml_has_type
class ArchivalManuscriptUnity(SemanticClass):
"""
This class represents an archival unity of manuscript pages (workbooks, notebooks and portfolios of handwritten pages).
@label archival unity of manuscript pages
Args:
title title of archival unity
manuscript_type type of manuscript: 'Arbeitsheft', 'Notizheft', 'Mappe'
manuscript_tree lxml.ElementTree
"""
XML_TAG = 'manuscript'
XML_COLORS_TAG = 'colors'
TYPE_DICTIONARY = { 'Mp': 'Mappe', 'N': 'Notizheft', 'W': 'Arbeitsheft' }
UNITTESTING = False
def __init__(self, title='', manuscript_type='', manuscript_tree=None):
self.colors = []
self.earlier_descriptions = []
self.description = None
self.manuscript_tree = manuscript_tree
self.manuscript_type = manuscript_type
self.pages = []
self.styles = []
self.title = title
if self.manuscript_type == '' and self.title != ''\
and self.title.split(' ')[0] in self.TYPE_DICTIONARY.keys():
self.manuscript_type = self.TYPE_DICTIONARY[self.title.split(' ')[0]]
def get_name_and_id(self):
"""Return an identification for object as 2-tuple.
"""
return '', self.title.replace(' ', '_')
@classmethod
def create_cls(cls, xml_manuscript_file, page_status_list=None, page_xpath='', update_page_styles=False):
"""Create an instance of ArchivalManuscriptUnity from a xml file of type FILE_TYPE_XML_MANUSCRIPT.
:return: ArchivalManuscriptUnity
"""
manuscript_tree = parse_xml_of_type(xml_manuscript_file, FILE_TYPE_XML_MANUSCRIPT)
title = manuscript_tree.getroot().get('title') if bool(manuscript_tree.getroot().get('title')) else ''
manuscript_type = manuscript_tree.getroot().get('type') if bool(manuscript_tree.getroot().get('type')) else ''
manuscript = cls(title=title, manuscript_type=manuscript_type, manuscript_tree=manuscript_tree)
manuscript.colors = [ Color.create_cls(node=color_node) for color_node in manuscript_tree.xpath('.//' + cls.XML_COLORS_TAG + '/' + Color.XML_TAG) ]
if page_xpath == '':
page_status = ''
if page_status_list is not None\
and type(page_status_list) is list\
and len(page_status_list) > 0:
page_status = '[' + ' and '.join([ f'contains(@status, "{status}")' for status in page_status_list ]) + ']'
page_xpath = f'//pages/page{page_status}/@output'
- manuscript.pages = [ Page(page_source)\
+ included_page_list = [ page_source\
for page_source in manuscript_tree.xpath(page_xpath)\
if isfile(page_source) and xml_has_type(FILE_TYPE_SVG_WORD_POSITION, xml_source_file=page_source) ]
+ manuscript.pages = [ Page.create_cls(page_source, create_dummy_page=(page_source not in included_page_list))\
+ for page_source in manuscript_tree.xpath('//pages/page/@output')\
+ if isfile(page_source) and xml_has_type(FILE_TYPE_SVG_WORD_POSITION, xml_source_file=page_source) ]
if update_page_styles:
- for page in manuscript.pages: page.update_styles(manuscript=manuscript, add_to_parents=True, create_css=True)
+ for page in manuscript.pages:
+ if 'xml_file' in page.__dict__.keys():
+ page.update_styles(manuscript=manuscript, add_to_parents=True, create_css=True)
description_node = manuscript_tree.xpath(Description.XML_TAG)[0]\
if len(manuscript_tree.xpath(Description.XML_TAG)) > 0\
else None
if description_node is not None:
manuscript.description = Description.create_cls_from_node(description_node.xpath(Description.ROOT_TAG)[0])\
if len(description_node.xpath(Description.ROOT_TAG)) > 0\
else None
for earlier_description_node in description_node.xpath(EarlierDescription.ROOT_TAG):
earlier_description = EarlierDescription.create_cls_from_node(earlier_description_node)
if earlier_description is not None:
manuscript.earlier_descriptions.append(earlier_description)
return manuscript
def get_color(self, hex_color) -> Color:
"""Return color if it exists or None.
"""
if hex_color in [ color.hex_color for color in self.colors ]:
return [ color for color in self.colors if color.hex_color == hex_color ][0]
return None
@classmethod
def get_semantic_dictionary(cls):
""" Creates a semantic dictionary as specified by SemanticClass.
"""
dictionary = {}
class_dict = cls.get_class_dictionary()
properties = {}
properties.update(cls.create_semantic_property_dictionary('title', str, 1))
properties.update(cls.create_semantic_property_dictionary('manuscript_type', str, 1))
properties.update(cls.create_semantic_property_dictionary('styles', list))
properties.update(cls.create_semantic_property_dictionary('pages', list))
properties.update(cls.create_semantic_property_dictionary('description', Description))
properties.update(cls.create_semantic_property_dictionary('earlier_descriptions', EarlierDescription))
dictionary.update({cls.CLASS_KEY: class_dict})
dictionary.update({cls.PROPERTIES_KEY: properties})
return cls.return_dictionary_after_updating_super_classes(dictionary)
def update_colors(self, color):
"""Update manuscript colors if color is not contained.
"""
if self.get_color(color.hex_color) is None:
self.colors.append(color)
if self.manuscript_tree is not None:
if len(self.manuscript_tree.xpath('.//' + self.XML_COLORS_TAG)) > 0:
self.manuscript_tree.xpath('.//' + self.XML_COLORS_TAG)[0].getparent().remove(self.manuscript_tree.xpath('.//' + self.XML_COLORS_TAG)[0])
colors_node = ET.SubElement(self.manuscript_tree.getroot(), self.XML_COLORS_TAG)
for color in self.colors:
color.attach_object_to_tree(colors_node)
if not self.UNITTESTING:
write_pretty(xml_element_tree=self.manuscript_tree, file_name=self.manuscript_tree.docinfo.URL,\
script_name=__file__, backup=True,\
file_type=FILE_TYPE_XML_MANUSCRIPT)
def update_styles(self, *styles):
"""Update manuscript styles.
"""
for style in styles:
if style not in self.styles:
#print(style.css_styles)
self.styles.append(style)
Index: svgscripts/datatypes/reference.py
===================================================================
--- svgscripts/datatypes/reference.py (revision 96)
+++ svgscripts/datatypes/reference.py (revision 97)
@@ -1,160 +1,167 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
""" This class can be used to represent a text reference.
"""
# Copyright (C) University of Basel 2019 {{{1
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see 1}}}
__author__ = "Christian Steiner"
__maintainer__ = __author__
__copyright__ = 'University of Basel'
__email__ = "christian.steiner@unibas.ch"
__status__ = "Development"
__license__ = "GPL v3"
__version__ = "0.0.1"
from lxml import etree as ET
import re
import sys
from .attachable_object import AttachableObject
sys.path.append('py2ttl')
from class_spec import SemanticClass
class Reference(AttachableObject,SemanticClass):
"""
This class represents a text reference.
Args:
id (int): object id
first_line (int) first line of reference
last_line (int) last line of reference
is_uncertain (bool) whether reference is uncertain
title (str) title of reference
page_number (str) page_number of reference
tag (str) xml tag
"""
XML_TAG = 'reference'
intKeys = [ 'first_line', 'last_line']
boolKeys = [ 'is_uncertain' ]
- stringKeys = [ 'title', 'page_number' ]
+ stringKeys = [ 'title', 'page_number', 'word_reference' ]
- def __init__(self, node=None, id=0, first_line=-1, last_line=-1, is_uncertain=False, title='', page_number='', tag=XML_TAG):
+ def __init__(self, node=None, id=0, first_line=-1, last_line=-1, is_uncertain=False, title=None, page_number=None, word_reference=None, tag=XML_TAG):
self.intKeys = []
self.intKeys += Reference.intKeys
self.intKeys.append('id')
self.stringKeys = []
self.stringKeys += Reference.stringKeys
self.boolKeys = []
self.boolKeys += Reference.boolKeys
self.id = id
self.first_line = first_line
self.last_line = last_line
self.is_uncertain = is_uncertain
self.title = title
self.page_number = page_number
+ self.word_reference = word_reference
self.tag = tag
def attach_object_to_tree(self, target_tree):
"""Attach object to tree.
"""
if target_tree.__class__.__name__ == '_ElementTree':
target_tree = target_tree.getroot()
obj_node = target_tree.xpath('.//' + self.tag + '[@id="%s"]' % self.id)[0] \
if(len(target_tree.xpath('.//' + self.tag + '[@id="%s"]' % self.id)) > 0) \
else ET.SubElement(target_tree, self.tag)
for key in self.boolKeys:
if self.__dict__[key] is not None:
obj_node.set(key.replace('_','-'), str(self.__dict__[key]).lower())
for key in self.intKeys:
if self.__dict__[key] is not None and self.__dict__[key] > -1:
obj_node.set(key.replace('_','-'), str(self.__dict__[key]))
for key in self.stringKeys:
if self.__dict__[key] is not None and self.__dict__[key] != '':
obj_node.set(key.replace('_','-'), str(self.__dict__[key]))
@classmethod
def create_cls_from_node(cls, node):
"""Creates a Reference from a (lxml.etree.Element) node.
:return: (datatypes.reference) Reference
"""
instance = cls()
for key in instance.boolKeys:
xml_key = key.replace('_', '-')
if bool(node.get(xml_key)):
instance.__dict__[key] = node.get(xml_key) == 'true'
for key in instance.intKeys:
xml_key = key.replace('_', '-')
if bool(node.get(xml_key)):
instance.__dict__[key] = int(node.get(xml_key))
for key in instance.stringKeys:
xml_key = key.replace('_', '-')
if bool(node.get(xml_key)):
instance.__dict__[key] = node.get(xml_key)
return instance
@classmethod
def create_cls(cls, node=None, id=0, is_uncertain=False, reference_string='', title='', page_number=''):
"""Creates a Reference from a (lxml.etree.Element) node or a reference_string.
:return: (datatypes.reference) Reference
"""
if node is not None:
return cls.create_cls_from_node(node)
else:
first_line = -1
last_line = -1
+ word_reference = None
if re.match(r'[0-9]+([a-z]+)*,[0-9]+(-[0-9]+)*', reference_string):
page_number = reference_string.split(',')[0]
line_numbers = reference_string.split(',')[1].split('-')
first_line = int(line_numbers[0])
last_line = int(line_numbers[1]) if len(line_numbers) > 1 else -1
else:
if ',' not in reference_string:
- line_numbers = reference_string.split('-')
- first_line = int(line_numbers[0])
- last_line = int(line_numbers[1]) if len(line_numbers) > 1 else -1
+ if re.match(r'\D+.*', reference_string):
+ word_reference = reference_string.strip()
+ else:
+ line_numbers = reference_string.split('-')
+ first_line = int(line_numbers[0])
+ last_line = int(line_numbers[1]) if len(line_numbers) > 1 else -1
else:
if ' ' not in reference_string:
raise Exception('String "{}" is not a valid reference_string'.format(reference_string))
title = reference_string.split(' ')[0]
return cls.create_cls(id=id, is_uncertain=is_uncertain, reference_string=reference_string[len(title)+1:],\
title=title, page_number=page_number)
return cls(id=id, is_uncertain=is_uncertain, first_line=first_line, last_line=last_line,\
- title=title, page_number=page_number)
+ title=title, page_number=page_number, word_reference=word_reference)
@classmethod
def get_semantic_dictionary(cls):
""" Creates a semantic dictionary as specified by SemanticClass.
"""
dictionary = {}
class_dict = cls.get_class_dictionary()
properties = {}
properties.update({'first_line': { 'class': int, 'cardinality': 1, 'cardinality_restriction': 'maxCardinality', 'name': 'firstLineOfReference',\
'label': 'first line of reference'}})
properties.update({'last_line': { 'class': int, 'cardinality': 1, 'cardinality_restriction': 'maxCardinality', 'name': 'lastLineOfReference',\
'label': 'last line of reference'}})
+ properties.update({'word_reference': { 'class': str, 'cardinality': 1, 'cardinality_restriction': 'maxCardinality', 'name': 'wordReference',\
+ 'label': 'refers to word on same line'}})
properties.update({'is_uncertain': { 'class': bool, 'cardinality': 0, 'name': 'IsUncertain', 'label': 'whether something is uncertain'}})
- for stringKey in cls.stringKeys:
- properties.update(cls.create_semantic_property_dictionary(stringKey, str, cardinality=1))
+ properties.update(cls.create_semantic_property_dictionary('title', str, cardinality=0))
+ properties.update(cls.create_semantic_property_dictionary('page_number', str, cardinality=0))
dictionary.update({cls.CLASS_KEY: class_dict})
dictionary.update({cls.PROPERTIES_KEY: properties})
return cls.return_dictionary_after_updating_super_classes(dictionary)
Index: svgscripts/datatypes/page.py
===================================================================
--- svgscripts/datatypes/page.py (revision 96)
+++ svgscripts/datatypes/page.py (revision 97)
@@ -1,296 +1,314 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
""" This class can be used to represent a page.
"""
# Copyright (C) University of Basel 2019 {{{1
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see 1}}}
__author__ = "Christian Steiner"
__maintainer__ = __author__
__copyright__ = 'University of Basel'
__email__ = "christian.steiner@unibas.ch"
__status__ = "Development"
__license__ = "GPL v3"
__version__ = "0.0.1"
from lxml import etree as ET
-from os.path import isfile
+from os.path import isfile, basename
from progress.bar import Bar
from svgpathtools import svg2paths2, svg_to_paths
from svgpathtools.parser import parse_path
+import re
import sys
import warnings
from .box import Box
from .color import Color
from .image import Image, SVGImage
from .faksimile_image import FaksimileImage
from .faksimile_position import FaksimilePosition
from .lineNumber import LineNumber
from .line import Line
from .mark_foreign_hands import MarkForeignHands
from .matrix import Matrix
from .path import Path
from .positional_word_part import PositionalWordPart
from .super_page import SuperPage
from .style import Style
from .text_connection_mark import TextConnectionMark
from .text_field import TextField
from .transkriptionField import TranskriptionField
from .writing_process import WritingProcess
from .word import Word
from .word_deletion_path import WordDeletionPath
from .word_insertion_mark import WordInsertionMark
sys.path.append('py2ttl')
from class_spec import SemanticClass
FILE_TYPE_SVG_WORD_POSITION = SuperPage.FILE_TYPE_SVG_WORD_POSITION
FILE_TYPE_XML_MANUSCRIPT = SuperPage.FILE_TYPE_XML_MANUSCRIPT
STATUS_MERGED_OK = SuperPage.STATUS_MERGED_OK
STATUS_POSTMERGED_OK = SuperPage.STATUS_POSTMERGED_OK
class Page(SemanticClass,SuperPage):
"""
This class represents a page.
Args:
xml_source_file (str): name of the xml file to be instantiated.
xml_target_file (str): name of the xml file to which page info will be written.
"""
UNITTESTING = False
- def __init__(self, xml_source_file, faksimile_image=None, faksimile_svgFile=None, add_deletion_paths_to_words=True):
- super(Page,self).__init__(xml_source_file)
- self.update_property_dictionary('faksimile_image', faksimile_image)
- self.update_property_dictionary('faksimile_svgFile', faksimile_svgFile)
- self.init_all_properties()
- self.add_style(style_node=self.page_tree.getroot().find('.//style'))
- self.init_node_objects()
- if add_deletion_paths_to_words:
- self.add_deletion_paths_to_words()
+ def __init__(self, xml_source_file=None, faksimile_image=None, faksimile_svgFile=None, add_deletion_paths_to_words=True, number=None):
+ if xml_source_file is not None:
+ super(Page,self).__init__(xml_source_file)
+ self.update_property_dictionary('faksimile_image', faksimile_image)
+ self.update_property_dictionary('faksimile_svgFile', faksimile_svgFile)
+ self.init_all_properties()
+ self.add_style(style_node=self.page_tree.getroot().find('.//style'))
+ self.init_node_objects()
+ if add_deletion_paths_to_words:
+ self.add_deletion_paths_to_words()
+ else:
+ self.number = number
def add_deletion_paths_to_words(self):
"""Add deletion paths to words.
"""
if (self.svg_file is not None and isfile(self.svg_file))\
or (self.source is not None and isfile(self.source)):
svg_file = self.svg_file if self.svg_file is not None else self.source
transkription_field = TranskriptionField(svg_file)
words = [ word for word in self.words if word.deleted or True in [ part.deleted for part in word.word_parts ]]
for word in words:
word.add_deletion_paths(self.word_deletion_paths, tr_xmin=transkription_field.xmin, tr_ymin=transkription_field.ymin)
@classmethod
+ def create_cls(cls, xml_source_file, create_dummy_page=False):
+ """Create a Page.
+ """
+ if not create_dummy_page:
+ return cls(xml_source_file)
+ else:
+ m = re.match(r'(.*)(page[0]*)(.*)(\.xml)', xml_source_file)
+ if m is not None and len(m.groups()) > 3:
+ number = m.group(3)
+ else:
+ number = basename(xml_source_file).replace('.xml','')
+ return cls(number=number)
+
+ @classmethod
def get_pages_from_xml_file(cls, xml_file, status_contains='', status_not_contain='', word_selection_function=None):
"""Returns a list of Page instantiating a xml_file of type FILE_TYPE_SVG_WORD_POSITION
or xml_files contained in xml_file of type FILE_TYPE_XML_MANUSCRIPT.
[optional: instantiation depends on the fulfilment of a status_contains
and/or on the selection of some words by a word_selection_function].
"""
source_tree = ET.parse(xml_file)
if source_tree.getroot().find('metadata/type').text == FILE_TYPE_SVG_WORD_POSITION:
page = cls(xml_file)
if word_selection_function is None or len(word_selection_function(page.words)) > 0:
return [ page ]
else:
return []
elif source_tree.getroot().find('metadata/type').text == FILE_TYPE_XML_MANUSCRIPT:
pages = []
xpath = '//page/@output'
if status_contains != '' and status_not_contain != '':
xpath = '//page[contains(@status, "{0}") and not(contains(@status, "{1}"))]/@output'.format(status_contains, status_not_contain)
elif status_contains != '':
xpath = '//page[contains(@status, "{0}")]/@output'.format(status_contains)
elif status_not_contain != '':
xpath = '//page[not(contains(@status, "{0}"))]/@output'.format(status_not_contain)
for xml_source_file in source_tree.xpath(xpath):
if isfile(xml_source_file):
pages += cls.get_pages_from_xml_file(xml_source_file, word_selection_function=word_selection_function)
return pages
else:
return []
@classmethod
def get_semantic_dictionary(cls):
""" Creates a semantic dictionary as specified by SemanticClass.
"""
dictionary = {}
class_dict = cls.get_class_dictionary()
- properties = { 'number': { 'class': str, 'cardinality': 1},\
- 'faksimile_image': { 'class': FaksimileImage, 'cardinality': 1},\
- 'orientation': { 'class': str, 'cardinality': 1},\
- 'svg_image': { 'class': SVGImage, 'cardinality': 1}}
+ properties = { 'number': { 'class': str, 'cardinality': 1}}
+ properties.update(cls.create_semantic_property_dictionary('faksimile_image', FaksimileImage))
+ properties.update(cls.create_semantic_property_dictionary('orientation', str))
+ properties.update(cls.create_semantic_property_dictionary('svg_image', SVGImage))
properties.update(cls.create_semantic_property_dictionary('text_field', TextField,\
cardinality=1, name='pageIsOnTextField', label='page is on text field',\
comment='Relates a page to the text field on a faksimile image.'))
- for key in [ 'lines', 'words', 'word_deletion_paths', 'word_insertion_marks']:
+ for key in [ 'lines', 'mark_foreign_hands', 'words', 'word_deletion_paths', 'word_insertion_marks']:
properties.update(cls.create_semantic_property_dictionary(key, list))
dictionary.update({cls.CLASS_KEY: class_dict})
dictionary.update({cls.PROPERTIES_KEY: properties})
return cls.return_dictionary_after_updating_super_classes(dictionary)
def init_node_objects(self):
"""Initialize all node objects.
"""
self.word_insertion_marks = [ WordInsertionMark(wim_node=wim_node) for wim_node in self.page_tree.getroot().xpath('//' + WordInsertionMark.XML_TAG) ]
self.words = [ Word.create_cls(word_node) for word_node in self.page_tree.getroot().xpath('./word') ]
self.mark_foreign_hands = [ MarkForeignHands.create_cls(node) for node in self.page_tree.getroot().xpath('//' + MarkForeignHands.XML_TAG) ]
self.text_connection_marks = [ TextConnectionMark.create_cls(node) for node in self.page_tree.getroot().xpath('//' + TextConnectionMark.XML_TAG) ]
self.line_numbers = [ LineNumber(xml_text_node=line_number_node) for line_number_node in self.page_tree.getroot().xpath('//' + LineNumber.XML_TAG) ]
self.lines = [ Line.create_cls_from_node(node=line_number_node) for line_number_node in self.page_tree.getroot().xpath('//' + LineNumber.XML_TAG) ]
self.writing_processes = [ WritingProcess.create_writing_process_from_xml(node, self.words) for node in self.page_tree.xpath('//' + WritingProcess.XML_TAG) ]
self.word_deletion_paths = [ WordDeletionPath.create_cls(node, self) for node in self.page_tree.xpath('//' + WordDeletionPath.XML_TAG) ]
if self.faksimile_image is not None and self.text_field is not None:
for simple_word in self.words + self.mark_foreign_hands + self.text_connection_marks:
simple_word.init_word(self)
for wim in self.word_insertion_marks:
if wim.line_number > -1:
wim.line = [ line for line in self.lines if line.id == wim.line_number ][0]
def update_and_attach_words2tree(self, update_function_on_word=None, include_special_words_of_type=[]):
"""Update word ids and attach them to page.page_tree.
"""
if not self.is_locked():
update_function_on_word = [ update_function_on_word ]\
if type(update_function_on_word) != list\
else update_function_on_word
for node in self.page_tree.xpath('.//word|.//' + MarkForeignHands.XML_TAG + '|.//' + TextConnectionMark.XML_TAG):
node.getparent().remove(node)
for index, word in enumerate(self.words):
word.id = index
for func in update_function_on_word:
if callable(func):
func(word)
word.attach_word_to_tree(self.page_tree)
for index, mark_foreign_hands in enumerate(self.mark_foreign_hands):
mark_foreign_hands.id = index
if MarkForeignHands in include_special_words_of_type:
for func in update_function_on_word:
if callable(update_function_on_word):
func(mark_foreign_hands)
mark_foreign_hands.attach_word_to_tree(self.page_tree)
for index, text_connection_mark in enumerate(self.text_connection_marks):
text_connection_mark.id = index
if TextConnectionMark in include_special_words_of_type:
for func in update_function_on_word:
if callable(update_function_on_word):
func(text_connection_mark)
text_connection_mark.attach_word_to_tree(self.page_tree)
else:
print('locked')
def update_data_source(self, faksimile_svgFile=None, xml_correction_file=None):
"""Update the data source of page.
"""
if faksimile_svgFile is not None:
self.faksimile_svgFile = faksimile_svgFile
data_node = self.page_tree.xpath('.//data-source')[0]\
if len(self.page_tree.xpath('.//data-source')) > 0\
else ET.SubElement(self.page_tree.getroot(), 'data-source')
data_node.set('file', self.faksimile_svgFile)
if xml_correction_file is not None:
data_node.set('xml-corrected-words', xml_correction_file)
def update_line_number_area(self, transkription_field, svg_tree=None):
"""Determines the width of the area where the line numbers are written in the page.source file.
"""
THRESHOLD = 0.4
if svg_tree is None:
svg_tree = ET.parse(self.source)
if len(self.line_numbers) > 1:
line_number = self.line_numbers[9]\
if transkription_field.is_page_verso() and len(self.line_numbers) > 8\
else self.line_numbers[1]
ln_nodes = [ item for item in svg_tree.iterfind('//text', svg_tree.getroot().nsmap)\
if Matrix.IS_NEARX_TRANSKRIPTION_FIELD(item.get('transform'), transkription_field)\
and LineNumber.IS_A_LINE_NUMBER(item)\
and LineNumber(raw_text_node=item, transkription_field=transkription_field).id == line_number.id ]
if len(ln_nodes) > 0:
matrix = Matrix(transform_matrix_string=ln_nodes[0].get('transform'))
if transkription_field.is_page_verso():
transkription_field.add_line_number_area_width(matrix.getX())
elif self.svg_file is not None and isfile(self.svg_file):
svg_path_tree = ET.parse(self.svg_file)
namespaces = { k if k is not None else 'ns': v for k, v in svg_path_tree.getroot().nsmap.items() }
svg_x = matrix.getX()
svg_y = self.line_numbers[1].bottom + transkription_field.ymin
use_nodes = svg_path_tree.xpath('//ns:use[@x>="{0}" and @x<="{1}" and @y>="{2}" and @y<="{3}"]'\
.format(svg_x-THRESHOLD, svg_x+THRESHOLD,svg_y-THRESHOLD, svg_y+THRESHOLD), namespaces=namespaces)
if len(use_nodes) > 0:
symbol_id = use_nodes[0].get('{%s}href' % namespaces['xlink']).replace('#', '')
d_strings = use_nodes[0].xpath('//ns:symbol[@id="{0}"]/ns:path/@d'.format(symbol_id), namespaces=namespaces)
if len(d_strings) > 0 and d_strings[0] != '':
path = parse_path(d_strings[0])
xmin, xmax, ymin, ymax = path.bbox()
width = xmax - xmin
transkription_field.add_line_number_area_width(matrix.getX() + width)
def update_page_type(self, transkription_field=None):
"""Adds a source to page and attaches it to page_tree.
"""
if transkription_field is None:
if self.source is None or not isfile(self.source):
raise FileNotFoundError('Page does not have a source!')
transkription_field = TranskriptionField(self.source)
self.page_type = Page.PAGE_VERSO\
if transkription_field.is_page_verso()\
else Page.PAGE_RECTO
self.page_tree.getroot().set('pageType', self.page_type)
def update_styles(self, words=None, manuscript=None, add_to_parents=False, partition_according_to_styles=False, create_css=False):
"""Update styles of words and add them to their transkription_positions.
Args:
add_to_parents: Add styles also to word (and if not None to manuscript).
partition_according_to_styles: Partition word if its transkription_positions have different styles.
"""
style_dictionary = {}
if words is None:
words = self.words
for word in words:
if len(word.word_parts) > 0:
self.update_styles(words=word.word_parts, manuscript=manuscript, create_css=create_css,\
add_to_parents=add_to_parents, partition_according_to_styles=partition_according_to_styles)
for transkription_position in word.transkription_positions:
if len(transkription_position.positional_word_parts) > 0:
style_class = transkription_position.positional_word_parts[0].style_class
writing_process_id = -1
for font_key in [ font_key for font_key in style_class.split(' ') if font_key in self.fontsizekey2stage_mapping.keys() ]:
writing_process_id = self.fontsizekey2stage_mapping.get(font_key)
style_class_key = (Style.remove_irrelevant_style_keys(style_class, self, extended_styles=create_css), writing_process_id)
if create_css:
if style_dictionary.get((style_class_key, word.deleted)) is None:
color = word.deletion_paths[0].style.color\
if len(word.deletion_paths) > 0 else None
style_dictionary[(style_class_key, word.deleted)] = Style.create_cls(self, style_class_key[0], manuscript=manuscript,\
create_css=create_css, deletion_color=color, writing_process_id=style_class_key[1] )
transkription_position.style = style_dictionary[(style_class_key, word.deleted)]
#print(style_dictionary[(style_class_key, word.deleted)])
else:
if style_dictionary.get(style_class_key) is None:
style_dictionary[style_class_key] = Style.create_cls(self, style_class_key[0], manuscript=manuscript, create_css=create_css)
style_dictionary[style_class_key].writing_process_id = style_class_key[1]
transkription_position.style = style_dictionary[style_class_key]
if add_to_parents and transkription_position.style not in word.styles:
word.styles.append(transkription_position.style)
if partition_according_to_styles:
word.split_according_to_status('style', splits_are_parts=True)
if manuscript is not None\
and add_to_parents:
manuscript.update_styles(*style_dictionary.values())
Index: svgscripts/datatypes/line.py
===================================================================
--- svgscripts/datatypes/line.py (revision 96)
+++ svgscripts/datatypes/line.py (revision 97)
@@ -1,124 +1,124 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
""" This class can be used to represent a line.
"""
# Copyright (C) University of Basel 2019 {{{1
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see 1}}}
__author__ = "Christian Steiner"
__maintainer__ = __author__
__copyright__ = 'University of Basel'
__email__ = "christian.steiner@unibas.ch"
__status__ = "Development"
__license__ = "GPL v3"
__version__ = "0.0.1"
import re
from lxml import etree as ET
from os.path import isfile
import sys
from .attachable_object import AttachableObject
from .matrix import Matrix
from .editor_comment import EditorComment
sys.path.append('py2ttl')
from class_spec import SemanticClass
from xml_conform_dictionary import XMLConformDictionary
class Line(AttachableObject,SemanticClass):
"""
This class represents a line on a page.
@label a line on a page
Args:
id (int)
bottom (float)
top (float)
"""
XML_TAG = 'line-number'
WARN_NO_LINE_NUMBER = 'No line number found'
def __init__(self, id=0, bottom=0.0, top=0.0, faksimile_outer_bottom=0.0, faksimile_outer_top=0.0, faksimile_inner_bottom=0.0, faksimile_inner_top=0.0):
self.id = id
self.is_even = self.id % 2 == 0
self.bottom = bottom
- self.editor_comment = None
+ self.editor_comments = []
self.top = top
self.faksimile_outer_bottom = faksimile_outer_bottom
self.faksimile_outer_top = faksimile_outer_top
self.faksimile_inner_bottom = faksimile_inner_bottom
self.faksimile_inner_top = faksimile_inner_top
def attach_object_to_tree(self, target_tree):
"""Attach object to tree.
"""
obj_node = self.get_or_create_node_with_id(target_tree)
+ for id, comment in enumerate(self.editor_comments): comment.id = id
xml_conform_dictionary = XMLConformDictionary.create_cls_from_data_object(self)
xml_conform_dictionary.attach_data_to_tree(obj_node)
@classmethod
def create_cls_from_node(cls, node):
"""Create a cls from node.
"""
id = int(node.get('id')) if bool(node.get('id')) else 0
bottom = float(node.get('bottom')) if bool(node.get('bottom')) else 0.0
top = float(node.get('top')) if bool(node.get('top')) else 0.0
faksimile_outer_bottom = float(node.get('faksimile-outer-bottom')) if bool(node.get('faksimile-outer-bottom')) else 0.0
faksimile_outer_top = float(node.get('faksimile-outer-top')) if bool(node.get('faksimile-outer-top')) else 0.0
faksimile_inner_bottom = float(node.get('faksimile-inner-bottom')) if bool(node.get('faksimile-inner-bottom')) else 0.0
faksimile_inner_top = float(node.get('faksimile-inner-top')) if bool(node.get('faksimile-inner-top')) else 0.0
line = cls(id=id, bottom=bottom, top=top, faksimile_inner_bottom=faksimile_inner_bottom, faksimile_inner_top=faksimile_inner_top,\
faksimile_outer_top=faksimile_outer_top, faksimile_outer_bottom=faksimile_outer_bottom)
- line.editor_comment = [ EditorComment.create_cls_from_node(enode) for enode in node.xpath('./' + EditorComment.XML_TAG) ][0]\
- if len([ enode for enode in node.xpath('./' + EditorComment.XML_TAG) ]) > 0 else None
+ line.editor_comments = [ EditorComment.create_cls_from_node(enode) for enode in node.xpath('./' + EditorComment.XML_TAG) ]
return line
@classmethod
def get_semantic_dictionary(cls) -> dict:
""" Creates and returns a semantic dictionary as specified by SemanticClass.
"""
dictionary = {}
class_dict = cls.get_class_dictionary()
properties = { 'id': { cls.CLASS_KEY: int, 'cardinality': 1,\
cls.PROPERTY_NAME: 'lineHasNumber', cls.PROPERTY_LABEL: 'line has number',\
cls.PROPERTY_COMMENT: 'Relating a line to the number it has.'}}
- properties.update(cls.create_semantic_property_dictionary('editor_comment', EditorComment,\
+ properties.update(cls.create_semantic_property_dictionary('editor_comments', EditorComment,\
name='lineHasEditorComment'))
properties.update(cls.create_semantic_property_dictionary('bottom', float, cardinality=1,\
name='lineHasBottomValueOnTranskription'))
properties.update(cls.create_semantic_property_dictionary('top', float, cardinality=1,\
name='lineHasTopValueOnTranskription'))
properties.update(cls.create_semantic_property_dictionary('faksimile_inner_bottom', float, cardinality=1,\
name='lineHasInnerBottomValueOnFaksimile', label='line has a top position on the faksimile image',\
comment='This is the top position of the area, in which the words belong to this line.'))
properties.update(cls.create_semantic_property_dictionary('faksimile_inner_top', float, cardinality=1,\
name='lineHasInnerTopValueOnFaksimile', label='line has a bottom position on the faksimile image',\
comment='This is the bottom position of the area, in which the words belong to this line.'))
properties.update(cls.create_semantic_property_dictionary('faksimile_outer_bottom', float, cardinality=1,\
name='lineHasOuterBottomValueOnFaksimile', label='line has a top position next to the faksimile image',\
comment='This is the top position of line outside of the faksimile image.'))
properties.update(cls.create_semantic_property_dictionary('faksimile_outer_top', float, cardinality=1,\
name='lineHasOuterTopValueOnFaksimile', label='line has a bottom position next to the faksimile image',\
comment='This is the bottom position of line outside of the faksimile image.'))
properties.update(cls.create_semantic_property_dictionary('is_even', bool, cardinality=1,\
name='isMainLine', label='whether or not line is a main line',\
comment='Indicates whether or not line is a main line. Lines that are not main lines, contain later inserted words.'))
dictionary.update({cls.CLASS_KEY: class_dict})
dictionary.update({cls.PROPERTIES_KEY: properties})
return cls.return_dictionary_after_updating_super_classes(dictionary)
Index: svgscripts/datatypes/faksimile_image.py
===================================================================
--- svgscripts/datatypes/faksimile_image.py (revision 96)
+++ svgscripts/datatypes/faksimile_image.py (revision 97)
@@ -1,103 +1,104 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
""" This class can be used to represent faksimile images.
"""
# Copyright (C) University of Basel 2019 {{{1
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see 1}}}
__author__ = "Christian Steiner"
__maintainer__ = __author__
__copyright__ = 'University of Basel'
__email__ = "christian.steiner@unibas.ch"
__status__ = "Development"
__license__ = "GPL v3"
__version__ = "0.0.1"
import fnmatch
from lxml import etree as ET
import os
from os.path import basename, dirname, isfile, realpath, sep
import sys
from .image import Image
from .text_field import TextField
sys.path.append('svgscripts')
from local_config import FAKSIMILE_LOCATION
class FaksimileImage(Image):
"""
This class represents a faksimile image.
Args:
file_name (str): name of the image file.
node (lxml.etree.Element) node, containing information
URL (str): URL of image file.
height (float): height of image
width (float): width of image
x (float): x
y (float): y
"""
XML_TAG = 'faksimile-image'
- NIETZSCHE_SOURCES_URL = 'http://www.nietzschesource.org/DFGAapi/api/page/download/'
+ #OLD_NIETZSCHE_SOURCES_URL = 'http://www.nietzschesource.org/DFGAapi/api/page/download/'
+ NIETZSCHE_SOURCES_URL = 'http://www.nietzschesource.org/DFGAapi/api/page/'
def __init__(self, node=None, file_name=None, local_path=None, URL=None, height=0.0, width=0.0, x=0.0, y=0.0, text_field=None):
super(FaksimileImage, self).__init__(node=node, file_name=file_name, URL=URL, local_path=local_path,\
height=height, width=width, text_field=text_field, tag=self.XML_TAG)
self.x = x
self.y = y
def get_image_joined_with_text_field(self, text_field):
"""Returns a new instance of itself that has a text_field (text_field.TextField).
"""
return FaksimileImage(file_name=self.file_name, local_path=self.local_path, URL=self.URL, height=self.height,\
width=self.width, x=self.x, y=self.y, text_field=text_field)
@classmethod
def get_semantic_dictionary(cls):
""" Creates and returns a semantic dictionary as specified by SemanticClass.
"""
dictionary = super(FaksimileImage,cls).get_semantic_dictionary()
dictionary[cls.PROPERTIES_KEY].update(cls.create_semantic_property_dictionary('text_field', TextField))
dictionary[cls.PROPERTIES_KEY].update(cls.create_semantic_property_dictionary('URL', str, cardinality=1))
return cls.return_dictionary_after_updating_super_classes(dictionary)
@staticmethod
def CREATE_IMAGE(image_node, source_file=None):
"""Instantiates a FaksimileImage from a (lxml.etree.Element) image_node.
"""
namespaces = image_node.nsmap
if len(namespaces) == 0:
namespaces = { 'xlink': '' }
local_path = image_node.get('{%s}href' % namespaces['xlink'])
file_name = basename(local_path)
if file_name != local_path and source_file is not None:
local_path = realpath(dirname(source_file)) + sep + local_path
local_path = realpath(local_path)
if not isfile(local_path):
local_path = None
for path, dirs, files in os.walk(os.path.abspath(FAKSIMILE_LOCATION)):
for filename in fnmatch.filter(files, file_name):
local_path = os.path.join(path, filename)
break
URL = FaksimileImage.NIETZSCHE_SOURCES_URL + file_name.replace('.jpg','')
height = float(image_node.get('height')) if bool(image_node.get('height')) else 0.0
width = float(image_node.get('width')) if bool(image_node.get('width')) else 0.0
x = float(image_node.get('x')) if bool(image_node.get('x')) else 0.0
y = float(image_node.get('y')) if bool(image_node.get('y')) else 0.0
return FaksimileImage(file_name=file_name, local_path=local_path, URL=URL, height=height, width=width, x=x, y=y)
Index: svgscripts/datatypes/editor_comment.py
===================================================================
--- svgscripts/datatypes/editor_comment.py (revision 96)
+++ svgscripts/datatypes/editor_comment.py (revision 97)
@@ -1,80 +1,81 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
""" This class can be used to represent a comment by the editors.
"""
# Copyright (C) University of Basel 2020 {{{1
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see 1}}}
__author__ = "Christian Steiner"
__maintainer__ = __author__
__copyright__ = 'University of Basel'
__email__ = "christian.steiner@unibas.ch"
__status__ = "Development"
__license__ = "GPL v3"
__version__ = "0.0.1"
from lxml import etree as ET
import re
import sys
from .attachable_object import AttachableObject
sys.path.append('py2ttl')
from class_spec import SemanticClass
from xml_conform_dictionary import XMLConformDictionary
class EditorComment(AttachableObject,SemanticClass):
"""
This class represents a comment by the editors.
"""
XML_TAG = 'editor-comment'
- def __init__(self, comment=None, is_uncertain=False):
+ def __init__(self, id=0, comment=None, is_uncertain=False):
+ self.id = id
self.comment = comment
self.is_uncertain = is_uncertain
def attach_object_to_tree(self, target_tree):
"""Attach object to tree.
"""
- obj_node = self.get_or_create_node(target_tree)
+ obj_node = self.get_or_create_node_with_id(target_tree)
obj_node.set('type', self.__class__.__name__)
xml_conform_dictionary = XMLConformDictionary.create_cls_from_data_object(self)
xml_conform_dictionary.attach_data_to_tree(obj_node)
@classmethod
def create_cls_from_node(cls, node):
"""Initialize a cls from node.
[:return:] cls
"""
target_cls = cls
cls_type = node.get('type')
target_classes = [ target for target in cls.__subclasses__() if target.__name__ == cls_type ]
if len(target_classes) > 0:
target_cls = target_classes[0]
return XMLConformDictionary.CREATE_INSTANCEOF_CLASS_FROM_NODE(target_cls, node)
@classmethod
def get_semantic_dictionary(cls):
""" Creates a semantic dictionary as specified by SemanticClass.
"""
properties = {}
properties.update(cls.create_semantic_property_dictionary('is_uncertain', bool,\
name='isUncertain', label='whether something is uncertain'))
properties.update(cls.create_semantic_property_dictionary('comment', str))
dictionary = { cls.CLASS_KEY: cls.get_class_dictionary(), cls.PROPERTIES_KEY: properties }
return cls.return_dictionary_after_updating_super_classes(dictionary)
Index: svgscripts/process_words_post_merging.py
===================================================================
--- svgscripts/process_words_post_merging.py (revision 96)
+++ svgscripts/process_words_post_merging.py (revision 97)
@@ -1,473 +1,473 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
""" This program can be used to process words after they have been merged with faksimile data.
"""
# Copyright (C) University of Basel 2019 {{{1
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see 1}}}
from colorama import Fore, Style
from deprecated import deprecated
from functools import cmp_to_key
import getopt
import inspect
import lxml.etree as ET
from operator import attrgetter
import os
from os import listdir, sep, path, setpgrp, devnull
from os.path import exists, isfile, isdir, dirname, basename
from pathlib import Path as PathlibPath
from progress.bar import Bar
import re
import shutil
import string
from svgpathtools import svg2paths2, svg_to_paths
from svgpathtools.path import Path as SVGPath
from svgpathtools.path import Line
import sys
import tempfile
import warnings
if dirname(__file__) not in sys.path:
sys.path.append(dirname(__file__))
from datatypes.box import Box
from datatypes.manuscript import ArchivalManuscriptUnity
from datatypes.mark_foreign_hands import MarkForeignHands
from datatypes.page import Page, STATUS_MERGED_OK, STATUS_POSTMERGED_OK
from datatypes.path import Path
from datatypes.text_connection_mark import TextConnectionMark
from datatypes.transkriptionField import TranskriptionField
from datatypes.word import Word, do_paths_intersect_saveMode, update_transkription_position_ids
from util import back_up
from process_files import update_svgposfile_status
+from process_footnotes import categorize_footnotes
sys.path.append('shared_util')
from myxmlwriter import write_pretty, xml_has_type, FILE_TYPE_SVG_WORD_POSITION, FILE_TYPE_XML_MANUSCRIPT
__author__ = "Christian Steiner"
__maintainer__ = __author__
__copyright__ = 'University of Basel'
__email__ = "christian.steiner@unibas.ch"
__status__ = "Development"
__license__ = "GPL v3"
__version__ = "0.0.1"
UNITTESTING = False
DEBUG_WORD = None
MERGED_DIR = 'merged'
def categorize_paths(page, transkription_field=None):
"""Categorize all paths that are part of the transkription field.
:return: a dictionary containig a list for each category of path.
"""
if page.source is not None and isfile(page.source):
MAX_HEIGHT_LINES = 1
max_line = sorted(\
[line_number.bottom-line_number.top for line_number in page.line_numbers if line_number.id % 2 == 0],\
reverse=True)[0] + 2 if len(page.line_numbers) > 0 else 17
tr_xmin = transkription_field.xmin if transkription_field is not None else 0.0
tr_ymin = transkription_field.ymin if transkription_field is not None else 0.0
paths, attributes = svg_to_paths.svg2paths(page.source)
allpaths_on_tf = []
allpaths_outside_tf = []
attributes_outside_tf = []
if transkription_field is None:
transkription_field = TranskriptionField(page.source)
for index, path in enumerate(paths):
attribute = attributes[index]
if len(path) > 0\
and path != transkription_field.path\
and path.bbox()[0] >= tr_xmin\
and path.bbox()[1] <= transkription_field.xmax:
allpaths_on_tf.append(Path.create_cls(id=index, path=path, style_class=attribute.get('class'), page=page))
elif len(path) > 0\
and path != transkription_field.path:
allpaths_outside_tf.append(Path.create_cls(id=index, path=path, style_class=attribute.get('class'), page=page))
#print(index, allpaths_outside_tf[len(allpaths_outside_tf)-1].path, path)
attributes_outside_tf.append(attribute)
path_dict = { 'text_area_deletion_paths': [],\
'deletion_or_underline_paths': [],\
'box_paths': [],\
'dots_paths': [],\
'word_connector_paths': [],\
'uncategorized_paths': [] }
for mypath in allpaths_on_tf:
xmin, xmax, ymin, ymax = mypath.path.bbox()
start_line_number = page.get_line_number(mypath.path.start.imag-tr_ymin)
if abs(xmax-xmin) < 1 and abs(ymax-ymin) < 1:
path_dict.get('dots_paths').append(mypath)
elif abs(ymax-ymin) > MAX_HEIGHT_LINES and abs(ymax-ymin) < max_line and mypath.path.iscontinuous() and mypath.path.isclosed():
path_dict.get('box_paths').append(mypath)
elif abs(ymax-ymin) > MAX_HEIGHT_LINES and abs(ymax-ymin) > max_line and mypath.path.iscontinuous() and not mypath.path.isclosed():
path_dict.get('word_connector_paths').append(mypath)
elif abs(ymax-ymin) < MAX_HEIGHT_LINES:
mypath.start_line_number = start_line_number
path_dict.get('deletion_or_underline_paths').append(mypath)
elif start_line_number != -1 and start_line_number != page.get_line_number(mypath.path.end.imag-tr_ymin):
# Check for "ladder", i.e. a path with 3 segments (seg0 is horizontal on line x, seg1 moves to line x+1, seg2 is horizontal on line x+1)
if start_line_number + 1 == page.get_line_number(mypath.path.end.imag-tr_ymin)\
and len(mypath.path._segments) == 3\
and abs(mypath.path._segments[0].bbox()[3]-mypath.path._segments[0].bbox()[2]) < MAX_HEIGHT_LINES\
and abs(mypath.path._segments[2].bbox()[3]-mypath.path._segments[2].bbox()[2]) < MAX_HEIGHT_LINES:
for index in 0, 2:
new_path = Path(parent_path=mypath, path=SVGPath(mypath.path._segments[index]))
new_path.start_line_number = page.get_line_number(new_path.path.start.imag-tr_ymin)
path_dict.get('deletion_or_underline_paths').append(new_path)
else:
path_dict.get('text_area_deletion_paths').append(mypath)
else:
path_dict.get('uncategorized_paths').append(mypath)
underline_path = mark_words_intersecting_with_paths_as_deleted(page, path_dict.get('deletion_or_underline_paths'), tr_xmin, tr_ymin)
path_dict.update({'underline_path': underline_path})
path_dict['uncategorized_paths'] += process_word_boxes(page, path_dict.get('box_paths'), transkription_field,\
paths=allpaths_outside_tf, attributes=attributes_outside_tf, max_line=max_line)
return path_dict
elif not UNITTESTING:
error_msg = 'Svg source file {} does not exist!'.format(page.source)\
if page.source is not None else 'Page does not contain a source file!'
raise FileNotFoundError(error_msg)
return {}
def copy_page_to_merged_directory(page, manuscript_file=None):
"""Copy page to directory that contains the first version of all svg_pos_files that have been
merged with the faksimile position data. MERGED_DIR is a subfolder of svg_pos_files-directory.
"""
svg_pos_file = PathlibPath(page.page_tree.docinfo.URL)
target_dir = svg_pos_file.parent / MERGED_DIR
if not target_dir.is_dir():
target_dir.mkdir()
target_pos_file = target_dir / svg_pos_file.name
save_page(page, str(svg_pos_file), target_svg_pos_file=str(target_pos_file), status=STATUS_MERGED_OK, manuscript_file=manuscript_file)
-
-
def find_special_words(page, transkription_field=None):
"""Find special words, remove them from words, process their content.
"""
if page.source is None or not isfile(page.source):
raise FileNotFoundError('Page does not have a source!')
if transkription_field is None:
transkription_field = TranskriptionField(page.source)
special_char_list = MarkForeignHands.get_special_char_list()
special_char_list += TextConnectionMark.get_special_char_list()
single_char_words = [ word for word in page.words if len(word.text) == 1 and word.text in special_char_list ]
if not UNITTESTING:
bar = Bar('find special words', max=len(single_char_words))
for word in single_char_words:
not bool(UNITTESTING) and bar.next()
if word.text == MarkForeignHands.CLASS_MARK:
id = len(page.mark_foreign_hands)
page.mark_foreign_hands.append(MarkForeignHands.create_cls_from_word(word, id=id))
page.words.remove(word)
elif word.text in TextConnectionMark.SPECIAL_CHAR_LIST[0]\
or (word.text in TextConnectionMark.SPECIAL_CHAR_LIST\
and any(style in page.sonderzeichen_list for style\
in word.transkription_positions[0].positional_word_parts[0].style_class.split(' '))):
id = len(page.text_connection_marks)
page.text_connection_marks.append(TextConnectionMark.create_cls_from_word(word, id=id))
page.words.remove(word)
not bool(UNITTESTING) and bar.finish()
svg_tree = ET.parse(page.source)
page.update_page_type(transkription_field=transkription_field)
page.update_line_number_area(transkription_field, svg_tree=svg_tree)
italic_classes = [ key for key in page.style_dict\
if bool(page.style_dict[key].get('font-family')) and page.style_dict[key]['font-family'].endswith('Italic') ]
if len(page.mark_foreign_hands) > 0:
MarkForeignHands.find_content(page.mark_foreign_hands, transkription_field, svg_tree, italic_classes=italic_classes,\
SonderzeichenList=page.sonderzeichen_list)
if len(page.text_connection_marks) > 0:
TextConnectionMark.find_content_in_footnotes(page.text_connection_marks, transkription_field, svg_tree,\
title=page.title, page_number=page.number)
def mark_words_intersecting_with_paths_as_deleted(page, deletion_paths, tr_xmin=0.0, tr_ymin=0.0):
"""Marks all words that intersect with deletion paths as deleted
and adds these paths to word_deletion_paths.
[:return:] list of .path.Path that might be word_underline_paths
"""
if not UNITTESTING:
bar = Bar('mark words that intersect with deletion paths', max=len(page.words))
for word in page.words:
not bool(UNITTESTING) and bar.next()
word = mark_word_if_it_intersects_with_paths_as_deleted(word, page, deletion_paths, tr_xmin=tr_xmin, tr_ymin=tr_ymin)
for part_word in word.word_parts:
part_word = mark_word_if_it_intersects_with_paths_as_deleted(part_word, page, deletion_paths, tr_xmin=tr_xmin, tr_ymin=tr_ymin)
word.partition_according_to_deletion()
not bool(UNITTESTING) and bar.finish()
# return those paths in deletion_paths that are not in page.word_deletion_paths
return [ word_underline_path for word_underline_path in set(deletion_paths) - set(page.word_deletion_paths) ]
def mark_word_if_it_intersects_with_paths_as_deleted(word, page, deletion_paths, tr_xmin=0.0, tr_ymin=0.0):
"""Marks word if it intersects with deletion paths as deleted
and adds these paths to word_deletion_paths.
[:return:] word
"""
word.deleted = False
for transkription_position in word.transkription_positions:
word_path = Path.create_path_from_transkription_position(transkription_position,\
tr_xmin=tr_xmin, tr_ymin=tr_ymin)
intersecting_paths = [ deletion_path for deletion_path in deletion_paths\
if do_paths_intersect_saveMode(deletion_path, word_path) ]
if DEBUG_WORD is not None and word.text == DEBUG_WORD.text and word.line_number == DEBUG_WORD.line_number:
relevant_paths = [ path for path in deletion_paths if path.start_line_number == DEBUG_WORD.line_number ]
#print(word.line_number, word_path.path.bbox(), [ path.path.bbox() for path in relevant_paths])
if len(intersecting_paths) > 0:
#print(f'{word.line_number}: {word.id}, {word.text}: {intersecting_paths}')
transkription_position.deleted = True
for deletion_path in intersecting_paths:
if deletion_path.parent_path is not None:
deletion_path = deletion_path.parent_path
if deletion_path not in page.word_deletion_paths:
deletion_path.tag = Path.WORD_DELETION_PATH_TAG
deletion_path.attach_object_to_tree(page.page_tree)
page.word_deletion_paths.append(deletion_path)
return word
def post_merging_processing_and_saving(svg_pos_file=None, new_words=None, page=None, manuscript_file=None, target_svg_pos_file=None):
"""Process words after merging with faksimile word positions.
"""
if page is None and svg_pos_file is None:
raise Exception('ERROR: post_merging_processing_and_saving needs either a Page or a svg_pos_file!')
if page is None:
page = Page(svg_pos_file)
if page.source is None or not isfile(page.source):
raise FileNotFoundError('Page instantiated from {} does not contain an existing source!'.format(svg_pos_file))
if svg_pos_file is None:
svg_pos_file = page.page_tree.docinfo.URL
if new_words is not None:
page.words = sorted(new_words, key=attrgetter('id'))
for word_node in page.page_tree.xpath('.//word'):
word_node.getparent().remove(word_node)
manuscript = ArchivalManuscriptUnity.create_cls(manuscript_file)\
if manuscript_file is not None\
else None
copy_page_to_merged_directory(page, manuscript_file=manuscript_file)
transkription_field = TranskriptionField(page.source)
update_faksimile_line_positions(page)
find_special_words(page, transkription_field=transkription_field)
#update_writing_process_ids(page)
page.update_styles(manuscript=manuscript, partition_according_to_styles=True)
#TODO: find_hyphenated_words(page)
categorize_paths(page, transkription_field=transkription_field)
+ categorize_footnotes(page)
save_page(page, svg_pos_file, target_svg_pos_file=target_svg_pos_file, status=STATUS_POSTMERGED_OK, manuscript_file=manuscript_file)
def process_word_boxes(page, box_paths, transkription_field, paths=None, attributes=None, max_line=17) -> list:
"""Process word boxes: partition words according to word boxes.
[:return:] a list of paths that are not boxes
"""
MAX_HEIGHT_LINES = 1
not_boxes = []
if not UNITTESTING:
bar = Bar('process word boxes', max=len(page.words))
svg_tree = ET.parse(page.source)
namespaces = { k if k is not None else 'ns': v for k, v in svg_tree.getroot().nsmap.items() }
allpaths_on_margin_field = []
if paths is None or attributes is None:
paths = []
raw_paths, attributes = svg_to_paths.svg2paths(page.source)
for index, raw_path in enumerate(raw_paths):
paths.append(Path.create_cls(id=index, path=raw_path, style_class=attributes[index].get('class'), page=page))
for index, mypath in enumerate(paths):
path = mypath.path
xmin, xmax, ymin, ymax = path.bbox()
attribute = attributes[index]
if len(path) > 0\
and path != transkription_field.path\
and ((path.bbox()[1] < transkription_field.xmin and transkription_field.is_page_verso())\
or (path.bbox()[0] > transkription_field.xmax and not transkription_field.is_page_verso()))\
and abs(ymax-ymin) < max_line:
allpaths_on_margin_field.append(mypath)#Path.create_cls(id=index, path=path, style_class=attribute.get('class'), page=page))
box_line_number_dict = {}
for box_path in sorted(box_paths, key=lambda path: path.get_median_y()):
line_number = page.get_line_number(box_path.get_median_y(tr_ymin=transkription_field.ymin))
if line_number > 0:
if line_number not in box_line_number_dict.keys():
box_line_number_dict.update({ line_number: [ box_path ]})
else:
box_line_number_dict.get(line_number).append(box_path)
boxes = []
for line_number in box_line_number_dict.keys():
box_paths_on_line = sorted(box_line_number_dict[line_number], key=lambda path: path.get_x())
margin_boxes_on_line = sorted([ margin_box for margin_box in allpaths_on_margin_field\
if page.get_line_number(margin_box.get_median_y(tr_ymin=transkription_field.ymin)) == line_number ],\
key=lambda path: path.get_x())
threshold = 3 if line_number % 2 == 0 else 1.5
if len(margin_boxes_on_line) > 0:
for box_path in box_paths_on_line:
#print(line_number, box_path.path.d(), len(margin_boxes_on_line))
box = Box.create_box(box_path, margin_boxes_on_line, svg_tree=svg_tree,\
transkription_field=transkription_field, namespaces=namespaces, threshold=threshold)
if box is not None:
boxes.append(box)
else:
not_boxes += box_paths_on_line
if len(boxes) > 0:
for word in page.words:
word.process_boxes(boxes, tr_xmin=transkription_field.xmin, tr_ymin=transkription_field.ymin)
word.create_correction_history(page)
if not bool(UNITTESTING):
bar.next()
elif word.earlier_version is not None:
#print(f'{word.text} -> {word.earlier_version.text}')
if word.earlier_version.earlier_version is not None:
print(f'{word.earlier_version.earlier_version.text}')
not bool(UNITTESTING) and bar.finish()
return not_boxes
def reset_page(page):
"""Reset all words that have word_parts in order to run the script a second time.
"""
svg_pos_file = PathlibPath(page.page_tree.docinfo.URL)
first_merge_version = svg_pos_file.parent / MERGED_DIR / svg_pos_file.name
if first_merge_version.exists():
page = Page(str(first_merge_version))
else:
word_with_wordparts = [ word for word in page.words if len(word.word_parts) > 0 ]
word_with_wordparts += [ word for word in page.words if word.earlier_version is not None ]
page_changed = False
if len(word_with_wordparts) > 0:
for word in word_with_wordparts:
word.undo_partitioning()
update_transkription_position_ids(word)
page_changed = True
no_line_numbers = [ word for word in page.words if word.line_number == -1 ]
if len(no_line_numbers) > 0:
for word in no_line_numbers:
if len(word.transkription_positions) > 0:
word.line_number = page.get_line_number((word.transkription_positions[0].top+word.transkription_positions[0].bottom)/2)
else:
msg = f'Word {word.id} {word.text} has no transkription_position!'
warnings.warn(msg)
page_changed = True
if page_changed:
page.update_and_attach_words2tree()
def save_page(page, svg_pos_file, target_svg_pos_file=None, status=None, manuscript_file=None):
"""Save page to target_file and update status of file.
"""
page.update_and_attach_words2tree()
if not UNITTESTING:
if target_svg_pos_file is None:
target_svg_pos_file = svg_pos_file
if status is not None:
update_svgposfile_status(svg_pos_file, manuscript_file=manuscript_file, status=status)
write_pretty(xml_element_tree=page.page_tree, file_name=target_svg_pos_file, script_name=__file__, file_type=FILE_TYPE_SVG_WORD_POSITION)
def update_faksimile_line_positions(page):
"""Update faksimile_positions of the lines
"""
num_lines = len(page.line_numbers)
ymin = page.text_field.ymin\
if page.text_field is not None\
else 0.0
for line_number in page.line_numbers:
if len([ word.faksimile_positions[0] for word in page.words\
if len(word.faksimile_positions) > 0 and word.line_number == line_number.id ]) > 0:
line_number.faksimile_inner_top = min([ word.faksimile_positions[0].top for word in page.words\
if len(word.faksimile_positions) > 0 and word.line_number == line_number.id ])
line_number.faksimile_inner_bottom = max([ word.faksimile_positions[0].bottom for word in page.words\
if len(word.faksimile_positions) > 0 and word.line_number == line_number.id ])
if line_number.id % 2 == 0:
line_number.faksimile_outer_top = line_number.faksimile_inner_top - ymin
line_number.faksimile_outer_bottom = line_number.faksimile_inner_bottom - ymin
for index, line_number in enumerate(page.line_numbers):
if line_number.faksimile_inner_bottom == 0.0\
or line_number.faksimile_inner_bottom < line_number.faksimile_inner_top:
if index == 0 and num_lines > 1:
line_number.faksimile_inner_bottom = page.line_numbers[index+1].top
elif index == num_lines-1 and page.text_field is not None:
line_number.faksimile_inner_bottom = round(page.text_field.height + page.text_field.ymin, 3)
elif index > 0 and index < num_lines-1:
line_number.faksimile_inner_bottom = page.line_numbers[index+1].faksimile_inner_top\
if page.line_numbers[index+1].faksimile_inner_top > page.line_numbers[index-1].faksimile_inner_bottom\
else page.line_numbers[index-1].faksimile_inner_bottom
line_number.attach_object_to_tree(page.page_tree)
def update_writing_process_ids(page):
"""Update the writing_process_ids of the words and split accordingly.
"""
for word in page.words:
word.set_writing_process_id_to_transkription_positions(page)
word.partition_according_to_writing_process_id()
def usage():
"""prints information on how to use the script
"""
print(main.__doc__)
def main(argv):
"""This program can be used to process words after they have been merged with faksimile data.
svgscripts/process_words_post_merging.py [OPTIONS] a xml file about a manuscript, containing information about its pages.
a xml file about a page, containing information about svg word positions.
OPTIONS:
-h|--help show help
-i|--include-missing-line-number run script on files that contain words without line numbers
-r|--rerun rerun script on a svg_pos_file that has already been processed
:return: exit code (int)
"""
status_not_contain = STATUS_POSTMERGED_OK
include_missing_line_number = False
try:
opts, args = getopt.getopt(argv, "hir", ["help", "include-missing-line-number", "rerun" ])
except getopt.GetoptError:
usage()
return 2
for opt, arg in opts:
if opt in ('-h', '--help'):
usage()
return 0
elif opt in ('-i', '--include-missing-line-number'):
include_missing_line_number = True
elif opt in ('-r', '--rerun'):
status_not_contain = ''
if len(args) < 1:
usage()
return 2
exit_status = 0
file_a = args[0]
if isfile(file_a):
manuscript_file = file_a\
if xml_has_type(FILE_TYPE_XML_MANUSCRIPT, xml_source_file=file_a)\
else None
counter = 0
for page in Page.get_pages_from_xml_file(file_a, status_contains=STATUS_MERGED_OK, status_not_contain=status_not_contain):
reset_page(page)
no_line_numbers = [ word for word in page.words if word.line_number == -1 ]
if not include_missing_line_number and len(no_line_numbers) > 0:
not UNITTESTING and print(Fore.RED + f'Page {page.title}, {page.number} has words with no line number!')
for word in no_line_numbers:
not UNITTESTING and print(f'Word {word.id}: {word.text}')
else:
back_up(page, page.xml_file)
not UNITTESTING and print(Fore.CYAN + f'Processing {page.title}, {page.number} ...' + Style.RESET_ALL)
post_merging_processing_and_saving(page=page, manuscript_file=manuscript_file)
counter += 1
not UNITTESTING and print(Style.RESET_ALL + f'[{counter} pages processed]')
else:
raise FileNotFoundError('File {} does not exist!'.format(file_a))
return exit_status
if __name__ == "__main__":
sys.exit(main(sys.argv[1:]))
Index: svgscripts/extract_line_continuation.py
===================================================================
--- svgscripts/extract_line_continuation.py (revision 0)
+++ svgscripts/extract_line_continuation.py (revision 97)
@@ -0,0 +1,218 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+
+""" This program can be used to extract line continuations.
+"""
+# Copyright (C) University of Basel 2019 {{{1
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program. If not, see 1}}}
+
+from colorama import Fore, Style
+import getopt
+import lxml.etree as ET
+import re
+import sys
+from os import listdir, sep, path
+from os.path import isfile, isdir, dirname
+import warnings
+
+
+
+__author__ = "Christian Steiner"
+__maintainer__ = __author__
+__copyright__ = 'University of Basel'
+__email__ = "christian.steiner@unibas.ch"
+__status__ = "Development"
+__license__ = "GPL v3"
+__version__ = "0.0.1"
+
+from datatypes.box import text_node_is_inside_match_box, tspan_node_is_inside_match_box
+from datatypes.line import Line
+from datatypes.line_continuation import LineContinuation
+from datatypes.matrix import Matrix
+from datatypes.page import Page, STATUS_MERGED_OK, STATUS_POSTMERGED_OK
+from datatypes.reference import Reference
+from datatypes.transkriptionField import TranskriptionField
+
+from util import back_up
+sys.path.append('shared_util')
+from myxmlwriter import write_pretty, xml_has_type, FILE_TYPE_SVG_WORD_POSITION, FILE_TYPE_XML_MANUSCRIPT
+
+UNITTESTING = False
+DEBUG = False
+
+def extract_line_continuations(page: Page, svg_file=None):
+ """Extract line continuations.
+ """
+ if svg_file is None:
+ if page.source is None or not isfile(page.source):
+ raise Exception('Function "extract_line_continuations" needs a page with a valid source or a svg_file!')
+ svg_file = page.source
+ svg_tree = ET.parse(svg_file)
+ transkription_field = TranskriptionField(svg_file)
+ page.update_line_number_area(transkription_field, svg_tree=svg_tree)
+ for line in page.lines: line.editor_comments = []
+ namespaces = { k if k is not None else 'ns': v for k, v in svg_tree.getroot().nsmap.items() }
+ arrow_style_key = [ key for key, value in page.style_dict.items() if value.get('font-family') == 'Frutiger-Europeen'][0]\
+ if len([ key for key, value in page.style_dict.items() if value.get('font-family') == 'Frutiger-Europeen']) > 0\
+ else None
+ if arrow_style_key is not None:
+ for arrow in _extract_arrow_nodes(svg_tree, arrow_style_key, transkription_field, namespaces):
+ matrix = Matrix(transform_matrix_string=arrow.get('transform'))\
+ if not arrow.tag.endswith('tspan')\
+ else Matrix(transform_matrix_string=arrow.getparent().get('transform'))
+ line = _get_line_of_arrow(arrow, page, transkription_field)
+ if line is not None:
+ reference_counter = 0
+ reference = None
+ while reference is None and reference_counter < 2:
+ reference = _get_reference(svg_tree, arrow, matrix, transkription_field, namespaces, is_from_reference=(reference_counter==0))
+ reference_counter += 1
+ if reference is not None:
+ line.editor_comments.append(LineContinuation(reference=reference, to_reference=(reference_counter>1)))
+ else:
+ to_reference = (matrix.getX() > transkription_field.xmax)
+ line.editor_comments.append(LineContinuation(reference=Reference(), to_reference=to_reference))
+ else:
+ y = round(matrix.getY() - transkription_field.ymin, 2)
+ warnings.warn(f'There is no line for {y}')
+ for line in page.lines: line.attach_object_to_tree(page.page_tree)
+ if not UNITTESTING:
+ write_pretty(xml_element_tree=page.page_tree, file_name=page.page_tree.docinfo.URL,\
+ script_name=__file__, file_type=FILE_TYPE_SVG_WORD_POSITION)
+
+def _extract_arrow_nodes(svg_tree: ET.ElementTree, arrow_style_key: str, transkription_field=None, namespaces=None) ->list:
+ """Extract arrow nodes from svg_tree.
+ """
+ if transkription_field is None:
+ transkription_field = TranskriptionField(svg_tree.docinfo.URL)
+ if namespaces is None:
+ namespaces = { k if k is not None else 'ns': v for k, v in svg_tree.getroot().nsmap.items() }
+ return [ arrow for arrow in svg_tree.xpath('//ns:text[contains(@class, "{0}")]'.format(arrow_style_key)\
+ + '|//ns:tspan[contains(@class, "{0}")]'.format(arrow_style_key),\
+ namespaces=namespaces)\
+ if arrow.text == ')' and node_is_on_marginalia(arrow, transkription_field) ]
+
+def _get_arrow_y(arrow: ET.Element, matrix=None) ->float:
+ """Return y of arrow node.
+ """
+ if matrix is None:
+ matrix = Matrix(transform_matrix_string=arrow.get('transform'))\
+ if not arrow.tag.endswith('tspan')\
+ else Matrix(transform_matrix_string=arrow.getparent().get('transform'))
+ if arrow.tag.endswith('tspan'):
+ return matrix.add2Y(add_to_y=arrow.get('y'))
+ else:
+ return matrix.getY()
+
+def _get_line_of_arrow(arrow: ET.Element, page: Page, transkription_field: TranskriptionField, matrix=None) ->Line:
+ """Return Line next to arrow.
+ """
+ arrow_y = _get_arrow_y(arrow, matrix=matrix)
+ line_number = page.get_line_number(round(arrow_y - transkription_field.ymin, 2) -.5)
+ lines = [ line for line in page.lines if line.id == line_number ]
+ if len(lines) > 0:
+ return lines[0]
+ return None
+
+def _get_reference(svg_tree: ET.ElementTree, arrow: ET.Element, arrow_matrix: Matrix, transkription_field: TranskriptionField, namespaces: dict, is_from_reference=True) ->Reference:
+ """Return reference.
+ """
+ reference = None
+ arrow_left = arrow_matrix.add2X(add_to_x=arrow.get('x'))\
+ if arrow.tag.endswith('tspan')\
+ else arrow_matrix.getX()
+ arrow_y = _get_arrow_y(arrow, matrix=arrow_matrix)
+ xmin = 0\
+ if arrow_left < transkription_field.xmin\
+ else transkription_field.xmax + transkription_field.line_number_area_width
+ xmax = arrow_left
+ ymin = arrow_y -5
+ ymax = arrow_y +5
+ if not is_from_reference:
+ xmin = xmax
+ xmax = transkription_field.xmin - transkription_field.line_number_area_width\
+ if arrow_left < transkription_field.xmin\
+ else transkription_field.documentWidth + transkription_field.line_number_area_width
+ text_nodes_on_arrow_line = sorted([ text_node for text_node in svg_tree.xpath('//ns:text', namespaces=namespaces)\
+ if text_node != arrow and text_node_is_inside_match_box(text_node, xmin, xmax, ymin, ymax) ],\
+ key=lambda node: Matrix(transform_matrix_string=node.get('transform')).getX())
+ reference_string = ''
+ for text_node in text_nodes_on_arrow_line:
+ reference_string += ''.join([ child.text for child in text_node.getchildren()])\
+ if len(text_node.getchildren()) > 0\
+ else text_node.text
+ if reference_string != '':
+ reference = Reference.create_cls(reference_string=reference_string)
+ return reference
+
+def node_is_on_marginalia(node: ET.Element, transkription_field: TranskriptionField) ->bool:
+ """Return true if node is on marginalia.
+ """
+ if node.tag.endswith('tspan'):
+ return tspan_node_is_inside_match_box(node, 0, transkription_field.xmin, transkription_field.ymin, transkription_field.ymax)\
+ or tspan_node_is_inside_match_box(node, transkription_field.xmax, transkription_field.documentWidth, transkription_field.ymin, transkription_field.ymax)
+ return text_node_is_inside_match_box(node, 0, transkription_field.xmin, transkription_field.ymin, transkription_field.ymax)\
+ or text_node_is_inside_match_box(node, transkription_field.xmax, transkription_field.documentWidth, transkription_field.ymin, transkription_field.ymax)
+
+def usage():
+ """prints information on how to use the script
+ """
+ print(main.__doc__)
+
+def main(argv):
+ """This program can be used to extract the line continuations.
+
+ svgscripts/extract_line_continuation.py [OPTIONS]
+
+ a xml file about a manuscript, containing information about its pages.
+ a xml file about a page, containing information about svg word positions.
+
+ OPTIONS:
+ -h|--help show help
+
+ :return: exit code (int)
+ """
+ try:
+ opts, args = getopt.getopt(argv, "h", ["help" ])
+ except getopt.GetoptError:
+ usage()
+ return 2
+ for opt, arg in opts:
+ if opt in ('-h', '--help'):
+ usage()
+ return 0
+ if len(args) < 1:
+ usage()
+ return 2
+ exit_status = 0
+ file_a = args[0]
+ if isfile(file_a):
+ manuscript_file = file_a\
+ if xml_has_type(FILE_TYPE_XML_MANUSCRIPT, xml_source_file=file_a)\
+ else None
+ counter = 0
+ for page in Page.get_pages_from_xml_file(file_a, status_contains=STATUS_MERGED_OK):
+ if not UNITTESTING:
+ print(Fore.CYAN + f'Extracting line continuations on {page.title}, {page.number} ...' + Style.RESET_ALL)
+ back_up(page, page.xml_file)
+ extract_line_continuations(page)
+ counter += 1
+ not UNITTESTING and print(Style.RESET_ALL + f'[{counter} pages processed]')
+ else:
+ raise FileNotFoundError('File {} does not exist!'.format(file_a))
+ return exit_status
+
+if __name__ == "__main__":
+ sys.exit(main(sys.argv[1:]))
Index: svgscripts/process_footnotes.py
===================================================================
--- svgscripts/process_footnotes.py (revision 96)
+++ svgscripts/process_footnotes.py (revision 97)
@@ -1,251 +1,251 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
""" This program can be used to process words after they have been merged with faksimile data.
"""
# Copyright (C) University of Basel 2019 {{{1
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see 1}}}
from colorama import Fore, Style
import getopt
import lxml.etree as ET
import os
from os import listdir, sep, path, setpgrp, devnull
from os.path import exists, isfile, isdir, dirname, basename
from pathlib import Path as PathlibPath
from progress.bar import Bar
import re
import shutil
import sys
import warnings
if dirname(__file__) not in sys.path:
sys.path.append(dirname(__file__))
from datatypes.manuscript import ArchivalManuscriptUnity
from datatypes.page import Page, STATUS_MERGED_OK, STATUS_POSTMERGED_OK
from datatypes.atypical_writing import AtypicalWriting
from datatypes.clarification import Clarification
from datatypes.editor_comment import EditorComment
from datatypes.editor_correction import EditorCorrection
from datatypes.footnotes import extract_footnotes
from datatypes.line_continuation import LineContinuation
from datatypes.standoff_tag import StandoffTag
from datatypes.text import Text
from datatypes.uncertain_decipherment import UncertainDecipherment
from util import back_up
from process_files import update_svgposfile_status
sys.path.append('shared_util')
from myxmlwriter import write_pretty, xml_has_type, FILE_TYPE_SVG_WORD_POSITION, FILE_TYPE_XML_MANUSCRIPT
__author__ = "Christian Steiner"
__maintainer__ = __author__
__copyright__ = 'University of Basel'
__email__ = "christian.steiner@unibas.ch"
__status__ = "Development"
__license__ = "GPL v3"
__version__ = "0.0.1"
UNITTESTING = False
ATYPICAL_GROUP = re.compile(r'(.*:.*]\s*)(¿)(.*)')
CLARIFICATION_GROUP = re.compile(r'(.*:.*]\s*)(Vk)(.*)')
CONTINUATION_GROUP = re.compile(r'(.*:\s*)(Fortsetzung\s*)')
COMMENT_GROUP = re.compile(r'(.*:.*])')
EDITOR_CORRECTION_GROUP = re.compile(r'(.*:.*]\s*)(>[?]*)(.*)')
LINE_REFERENCE_GROUP = re.compile(r'(\d+-)*([0-9]+)(:.*)')
LINE_COMMENT_GROUP = re.compile(r'(.*\d+:)')
UNCERTAINTY_WORD_GROUP = re.compile(r'(.*:.*]\s*)([>]*\?)(.*)')
UNCERTAINTY_EDITOR_GROUP = re.compile(r'(.*)(\?)')
WORD_REFERENCE_GROUP = re.compile(r'(.*[0-9]+:\s*)(.*)(].*)')
DEBUG = False
-def categorize_footnotes(page, footnotes=None, manuscript_file=None, debug=False):
+def categorize_footnotes(page, footnotes=None, debug=False):
"""Categorize footnotes.
"""
DEBUG = debug
if footnotes is None:
footnotes = extract_footnotes(page)
for footnote in footnotes:
line_match = re.match(LINE_REFERENCE_GROUP, footnote.content)
if line_match is not None:
_process_line_match(page, footnote, line_match)
else:
warnings.warn(f'Unknown editor comment without a line reference: <{footnote}>')
page.update_and_attach_words2tree()
for line in page.lines: line.attach_object_to_tree(page.page_tree)
DEBUG = False
if not UNITTESTING:
write_pretty(xml_element_tree=page.page_tree, file_name=page.page_tree.docinfo.URL,\
script_name=__file__, file_type=FILE_TYPE_SVG_WORD_POSITION)
def _is_uncertain(footnote) -> bool:
"""Return whether footnote contains sign for uncertainty.
"""
uncertain_match = re.match(UNCERTAINTY_EDITOR_GROUP, footnote.content)
return (uncertain_match is not None\
and len([ markup for markup in footnote.standoff_markups\
if markup.css_string.endswith('italic;')\
and uncertain_match.end() >= markup.startIndex\
and uncertain_match.end() <= markup.endIndex ]) > 0)
def _process_line_match(page, footnote, line_match):
"""Process footnote if reference to a line matches.
"""
word_match = re.match(WORD_REFERENCE_GROUP, footnote.content)
start_line_number = end_line_number = int(line_match.group(2))
if line_match.group(1) is not None:
start_line_number = int(line_match.group(1)[0:-1])
lines = [ line for line in page.lines if line.id >= start_line_number and line.id <= end_line_number ]
if word_match is not None:
_process_word_match(page, footnote, line_match, word_match.group(2), start_line_number)
elif len(lines) > 0:
uncertain_match = re.match(UNCERTAINTY_EDITOR_GROUP, footnote.content)
for line in lines:
_process_line_reference(page, footnote, line, _is_uncertain(footnote))
else:
warnings.warn(f'Footnote refers to missing line {line_number}: {footnote}')
def _process_line_reference(page, footnote, line, is_uncertain):
"""Process footnote if there is a line reference.
"""
continuation_match = re.match(CONTINUATION_GROUP, footnote.content)
if continuation_match is not None:
reference_string = footnote.content[continuation_match.end():]
if is_uncertain:
reference_string = reference_string[:-1]
- line.editor_comment = LineContinuation.create_cls(reference_string=reference_string, is_uncertain=is_uncertain)
+ line.editor_comments.append(LineContinuation.create_cls(reference_string=reference_string, is_uncertain=is_uncertain))
else:
comment_match = re.match(LINE_COMMENT_GROUP, footnote.content)
if comment_match is not None:
is_uncertain = _is_uncertain(footnote)
comment = footnote.content[comment_match.end():-1].strip()\
if is_uncertain\
else footnote.content[comment_match.end():].strip()
- line.editor_comment = EditorComment(comment=comment, is_uncertain=is_uncertain)
+ line.editor_comments.append(EditorComment(comment=comment, is_uncertain=is_uncertain))
else:
warnings.warn(f'Unknown editor comment for line "{line.id}": <{footnote}>')
def _process_word_match(page, footnote, line_match, word_text, line_number, parent_word_composition=None):
"""Process footnote if there is a word reference.
"""
referred_words = [ word for word in page.words\
if word.line_number == line_number\
and (word.text == word_text\
or re.match(rf'\W*{word_text}\W', word.text)\
or word.edited_text == word_text) ]
overwritten_word_matches = [ word for word in page.words\
if word.line_number == line_number\
and len(word.word_parts) > 0\
and len([word_part for word_part in word.word_parts\
if word_part.overwrites_word is not None\
and word_part.overwrites_word.text == word_text]) > 0]
if len(referred_words) > 0\
or len(overwritten_word_matches) > 0:
word = None
if len(referred_words) == 1:
word = referred_words[0]
elif len(overwritten_word_matches) > 0:
word = [ word_part.overwrites_word for word_part in overwritten_word_matches[0].word_parts\
if word_part.overwrites_word is not None and word_part.overwrites_word.text == word_text][0]
else:
word = [ better_word for better_word in referred_words if better_word.text == word_text][0]
atypical_match = re.match(ATYPICAL_GROUP, footnote.content)
correction_match = re.match(EDITOR_CORRECTION_GROUP, footnote.content)
clarification_match = re.match(CLARIFICATION_GROUP, footnote.content)
is_uncertain = re.match(UNCERTAINTY_WORD_GROUP, footnote.content) is not None
if correction_match is not None:
correction = correction_match.group(3).strip()
word.editor_comment = EditorCorrection(correction_text=correction, is_uncertain=is_uncertain)
if not is_uncertain:
word.edited_text = correction
elif clarification_match is not None:
word.editor_comment = Clarification(text=footnote.extract_part(word_text, css_filter='bold;'))
elif atypical_match is not None:
text = footnote.extract_part(word_text, css_filter='bold;')\
if footnote.markup_contains_css_filter('bold;')\
else None
word.editor_comment = AtypicalWriting(text=text)
elif is_uncertain:
word.editor_comment = UncertainDecipherment()
else:
comment_match = re.match(COMMENT_GROUP, footnote.content)
if comment_match is not None:
is_uncertain = _is_uncertain(footnote)
comment = footnote.content[comment_match.end():-1].strip()\
if is_uncertain\
else footnote.content[comment_match.end():].strip()
word.editor_comment = EditorComment(comment=comment, is_uncertain=is_uncertain)
else:
warnings.warn(f'Unknown editor comment for word "{word.text}": <{footnote}>')
elif re.match(r'.*\s.*', word_text):
for word_part in word_text.split(' '):
_process_word_match(page, footnote, line_match, word_part, line_number, parent_word_composition=word_text)
else:
warnings.warn(f'No word found with text "{word_text}" on line {line_number}: <{footnote}>')
def usage():
"""prints information on how to use the script
"""
print(main.__doc__)
def main(argv):
"""This program can be used to process the footnotes of a page.
svgscripts/process_footnotes.py [OPTIONS] a xml file about a manuscript, containing information about its pages.
a xml file about a page, containing information about svg word positions.
OPTIONS:
-h|--help show help
:return: exit code (int)
"""
try:
opts, args = getopt.getopt(argv, "h", ["help" ])
except getopt.GetoptError:
usage()
return 2
for opt, arg in opts:
if opt in ('-h', '--help'):
usage()
return 0
if len(args) < 1:
usage()
return 2
exit_status = 0
file_a = args[0]
if isfile(file_a):
manuscript_file = file_a\
if xml_has_type(FILE_TYPE_XML_MANUSCRIPT, xml_source_file=file_a)\
else None
counter = 0
for page in Page.get_pages_from_xml_file(file_a, status_contains=STATUS_MERGED_OK):
if not UNITTESTING:
print(Fore.CYAN + f'Processing {page.title}, {page.number} ...' + Style.RESET_ALL)
back_up(page, page.xml_file)
- categorize_footnotes(page, manuscript_file=manuscript_file)
+ categorize_footnotes(page)
counter += 1
not UNITTESTING and print(Style.RESET_ALL + f'[{counter} pages processed]')
else:
raise FileNotFoundError('File {} does not exist!'.format(file_a))
return exit_status
if __name__ == "__main__":
sys.exit(main(sys.argv[1:]))
Index: tests_svgscripts/test_manuscript.py
===================================================================
--- tests_svgscripts/test_manuscript.py (revision 96)
+++ tests_svgscripts/test_manuscript.py (revision 97)
@@ -1,56 +1,56 @@
import unittest
from os import sep, path
from os.path import basename, dirname, isfile
import lxml.etree as ET
import sys
sys.path.append('svgscripts')
from datatypes.manuscript import ArchivalManuscriptUnity
from datatypes.color import Color
class TestArchivalManuscriptUnity(unittest.TestCase):
def setUp(self):
ArchivalManuscriptUnity.UNITTESTING = True
DATADIR = dirname(__file__) + sep + 'test_data'
self.test_manuscript = DATADIR + sep + 'N_VII_1.xml'
def test_init(self):
title = 'Test I 1'
manuscript = ArchivalManuscriptUnity(title=title)
self.assertEqual(manuscript.title, title)
def test_get_semanticAndDataDict(self):
semantic_dict = ArchivalManuscriptUnity.get_semantic_dictionary()
#print(semantic_dict)
def test_create_cls(self):
manuscript = ArchivalManuscriptUnity.create_cls(self.test_manuscript)
self.assertTrue(manuscript.description is not None)
self.assertEqual(len(manuscript.earlier_descriptions), 2)
self.assertEqual(manuscript.title, basename(self.test_manuscript).replace('.xml','').replace('_', ' '))
self.assertEqual(manuscript.manuscript_type, 'Notizheft')
self.assertEqual(len(manuscript.pages), 4)
manuscript = ArchivalManuscriptUnity.create_cls(self.test_manuscript, page_status_list=['faksimile merged'])
- self.assertEqual(len(manuscript.pages), 2)
+ self.assertEqual(len([ page for page in manuscript.pages if 'xml_file' in page.__dict__.keys()]), 2)
manuscript = ArchivalManuscriptUnity.create_cls(self.test_manuscript, page_status_list=['faksimile merged', 'words processed'])
- self.assertEqual(len(manuscript.pages), 1)
+ self.assertEqual(len([ page for page in manuscript.pages if 'xml_file' in page.__dict__.keys()]), 1)
manuscript = ArchivalManuscriptUnity.create_cls(self.test_manuscript, page_xpath='//pages/page/@output')
self.assertEqual(len(manuscript.pages), 4)
def test_get_color(self):
color = Color()
manuscript = ArchivalManuscriptUnity.create_cls(self.test_manuscript)
self.assertEqual(manuscript.get_color(color.hex_color) is not None, True)
self.assertEqual(manuscript.get_color("#F7F6F5") is None, True)
def test_update_colors(self):
color = Color()
manuscript = ArchivalManuscriptUnity.create_cls(self.test_manuscript)
manuscript.update_colors(color)
self.assertEqual(len(manuscript.colors), 2)
#print(ET.dump(manuscript.manuscript_tree.getroot()))
if __name__ == "__main__":
unittest.main()
Index: tests_svgscripts/test_extract_line_continuation.py
===================================================================
--- tests_svgscripts/test_extract_line_continuation.py (revision 0)
+++ tests_svgscripts/test_extract_line_continuation.py (revision 97)
@@ -0,0 +1,48 @@
+import unittest
+from os import sep, path, remove
+from os.path import isfile
+import lxml.etree as ET
+import warnings
+import sys
+
+sys.path.append('svgscripts')
+import extract_line_continuation
+from datatypes.page import Page
+from datatypes.transkriptionField import TranskriptionField
+
+class TestExtractLineContinuation(unittest.TestCase):
+ def setUp(self):
+ extract_line_continuation.UNITTESTING = True
+ DATADIR = path.dirname(__file__) + sep + 'test_data'
+ self.w_I_8_125_svg = DATADIR + sep + 'W_I_8_neu_125-01.svg'
+ self.w_I_8_125_xml = DATADIR + sep + 'W_I_8_new_page125.xml'
+
+ def test_get_arrow_y(self):
+ arrow = ET.Element('text')
+ arrow.set('transform', 'matrix(1 0 0 1 10 20)')
+ self.assertEqual(extract_line_continuation._get_arrow_y(arrow), 20.0)
+ tspan = ET.SubElement(arrow, 'tspan')
+ tspan.set('y', '10.0')
+ self.assertEqual(extract_line_continuation._get_arrow_y(tspan), 30.0)
+
+ def test_get_line_of_arrow(self):
+ svg_tree = ET.parse(self.w_I_8_125_svg)
+ page = Page(self.w_I_8_125_xml)
+ transkription_field = TranskriptionField(self.w_I_8_125_svg)
+ arrows = extract_line_continuation._extract_arrow_nodes(svg_tree, 'st7')
+ line = extract_line_continuation._get_line_of_arrow(arrows[0], page, transkription_field)
+ self.assertEqual(line.id, 15)
+
+ def test_extract_line_continuations(self):
+ page = Page(self.w_I_8_125_xml)
+ extract_line_continuation.extract_line_continuations(page, svg_file=self.w_I_8_125_svg)
+ lines_with_continuations = [ line for line in page.lines if len(line.editor_comments) > 0]
+ self.assertEqual(len(lines_with_continuations), 2)
+ page = Page('xml/N_VII_1_page029.xml')
+ extract_line_continuation.extract_line_continuations(page)
+ lines_with_continuations = [ line for line in page.lines if len(line.editor_comments) > 0]
+ print(lines_with_continuations)
+ self.assertEqual(len(lines_with_continuations), 1)
+
+if __name__ == "__main__":
+ unittest.main()
Index: tests_svgscripts/test_page.py
===================================================================
--- tests_svgscripts/test_page.py (revision 96)
+++ tests_svgscripts/test_page.py (revision 97)
@@ -1,154 +1,156 @@
import unittest
from os import sep, path
from os.path import isdir, isfile, dirname, basename
import lxml.etree as ET
import sys
import sys
sys.path.append('svgscripts')
dir_changed = False
if not isdir('datatypes'):
sys.path.append(dirname(sys.path[0]))
dir_changed = True
from datatypes.lineNumber import LineNumber
from datatypes.mark_foreign_hands import MarkForeignHands
from datatypes.page import Page, STATUS_MERGED_OK, STATUS_POSTMERGED_OK
from datatypes.path import Path
from datatypes.text_connection_mark import TextConnectionMark
from datatypes.transkriptionField import TranskriptionField
from datatypes.style import Style
from datatypes.writing_process import WritingProcess
from datatypes.word import Word
class TestPage(unittest.TestCase):
def setUp(self):
DATADIR = dirname(__file__) + sep + 'test_data'
if not isdir(DATADIR):
DATADIR = dirname(dirname(__file__)) + sep + 'test_data'
self.test_file = DATADIR + sep + 'test.xml'
self.test_svg_file = DATADIR + sep + 'test421.svg'
self.pdf_xml = DATADIR + sep + 'W_I_8_page125.xml'
self.xml_file = DATADIR + sep + 'N_VII_1_page005.xml'
self.xml_fileB = DATADIR + sep + 'N_VII_1_page006.xml'
self.pdf_xml_source = DATADIR + sep + 'W_I_8_neu_125-01.svg'
self.test_tcm_xml = DATADIR + sep + 'N_VII_1_page001.xml'
self.test_manuscript = DATADIR + sep + 'N_VII_1.xml'
self.test_styles_color = DATADIR + sep + 'N_VII_1_page013.xml'
def test_Page(self):
page = Page(self.test_file)
self.assertEqual(page.title, 'Mp XIV 1')
self.assertEqual(page.number, '421')
self.assertEqual(len(page.sonderzeichen_list), 2)
self.assertEqual('st21' in page.sonderzeichen_list, True)
self.assertEqual('st23' in page.sonderzeichen_list, True)
self.assertEqual(page.style_dict['st0']['fill'], '#F8F9F8')
stage0 = [ key for key, value in page.fontsizekey2stage_mapping.items() if value == 0 ]
stage1 = [ key for key, value in page.fontsizekey2stage_mapping.items() if value == 1 ]
stage2 = [ key for key, value in page.fontsizekey2stage_mapping.items() if value == 2 ]
fontStage0 = float(page.style_dict.get(stage0[0]).get('font-size').replace('px',''))
fontStage1 = float(page.style_dict.get(stage1[0]).get('font-size').replace('px',''))
fontStage2 = float(page.style_dict.get(stage2[0]).get('font-size').replace('px',''))
self.assertEqual(fontStage0 > fontStage1, True)
self.assertEqual(fontStage1 > fontStage2, True)
+ page = Page.create_cls(self.test_tcm_xml, create_dummy_page=True)
+ self.assertEqual(page.number, '1')
def test_get_biggest_fontSize4styles(self):
page = Page(self.test_file)
style_set = { 'st12', 'st2', 'st14', 'st13' }
self.assertEqual(page.get_biggest_fontSize4styles(style_set=style_set), 10)
def test_get_words(self):
page = Page(self.test_file)
words = page.words
self.assertEqual(len(words), 440)
self.assertEqual(words[0].text, '$')
self.assertEqual(words[439].text, 'mußte!')
def test_get_line_number(self):
page = Page(self.test_file)
self.assertEqual(page.get_line_number( (page.words[0].transkription_positions[0].bottom+page.words[0].transkription_positions[0].top)/2), 1)
self.assertEqual(page.get_line_number( (page.words[27].transkription_positions[0].bottom+page.words[27].transkription_positions[0].top)/2), 2)
self.assertEqual(page.get_line_number( (page.words[105].transkription_positions[0].bottom+page.words[105].transkription_positions[0].top)/2), 7)
def test_update_page_type(self):
page = Page(self.pdf_xml)
tf = TranskriptionField(self.pdf_xml_source)
page.update_page_type(transkription_field=tf)
self.assertEqual(page.page_type, Page.PAGE_VERSO)
#page = Page(self.xml_fileB)
#page.update_page_type()
#self.assertEqual(page.page_type, Page.PAGE_RECTO)
def test_update_line_number_area(self):
page = Page(self.xml_file)
transkription_field = TranskriptionField(page.source)
page.update_line_number_area(transkription_field)
self.assertEqual(transkription_field.line_number_area_width > 0, True)
self.assertEqual(transkription_field.line_number_area_width < 15, True)
page = Page(self.xml_fileB)
transkription_field = TranskriptionField(page.source)
page.update_line_number_area(transkription_field)
self.assertEqual(transkription_field.line_number_area_width > 0, True)
self.assertEqual(transkription_field.line_number_area_width < 15, True)
def test_get_pages_from_xml_file(self):
pages = Page.get_pages_from_xml_file(self.test_manuscript)
self.assertEqual(len(pages), 4)
self.assertEqual(pages[0].number, '5')
self.assertEqual(pages[1].number, '6')
pages = Page.get_pages_from_xml_file(self.test_manuscript, status_contains=STATUS_MERGED_OK)
self.assertEqual(len(pages), 2)
self.assertEqual(pages[0].number, '5')
pages = Page.get_pages_from_xml_file(self.test_manuscript, status_contains=STATUS_MERGED_OK, status_not_contain=STATUS_POSTMERGED_OK)
self.assertEqual(len(pages), 1)
def test_get_semantic_dictionary(self):
dictionary = Page.get_semantic_dictionary()
#print(dictionary)
def test_update_styles(self):
page = Page(self.pdf_xml)
page.words = [ word for word in page.words if word.text == 'Schopenhauer' ]
page.update_styles(add_to_parents=True)
self.assertEqual(len(page.words[0].styles), 1)
self.assertEqual(page.words[0].styles[0].color.name, 'black')
self.assertEqual(page.words[0].styles[0].font, Style.NIETSCHES_FONTS['latin'])
self.assertEqual(page.words[0].styles[0].writing_instrument, Style.WRITING_INSTRUMENTS[('black',False)])
page = Page(self.test_styles_color)
page.words = [ word for word in page.words if word.text == 'Versöhnlichkeit' ]
page.update_styles(add_to_parents=True)
self.assertEqual(len(page.words[0].styles), 1)
self.assertEqual(page.words[0].styles[0].color.name, 'green')
self.assertEqual(page.words[0].styles[0].font, Style.NIETSCHES_FONTS['german'])
self.assertEqual(page.words[0].styles[0].writing_instrument, Style.WRITING_INSTRUMENTS[('green',False)])
self.assertEqual(page.words[0].styles[0].writing_process_id, WritingProcess.INSERTION_AND_ADDITION)
page = Page(self.test_styles_color)
page.words = [ word for word in page.words if word.text == 'Versöhnlichkeit' or word.text == 'gewisse' ]
self.assertEqual(len(page.words), 2)
word = page.words[0]
word.transkription_positions += page.words[1].transkription_positions
page.words = [ word ]
page.update_styles(add_to_parents=True, partition_according_to_styles=True)
self.assertEqual(len(page.words[0].word_parts), 2)
page = Page(self.test_styles_color)
page.update_styles(add_to_parents=True, create_css=True)
for word in page.words:
self.assertTrue(len(word.styles) > 0)
for style in word.styles:
self.assertTrue(len(style.css_styles) > 0)
def test_lock(self):
page = Page(self.test_tcm_xml)
self.assertEqual(page.is_locked(), False)
page.lock('asdf.txt')
self.assertEqual(page.is_locked(), True)
self.assertEqual(page.page_tree.xpath('//lock/reference-file/text()')[0], 'asdf.txt')
page.unlock()
self.assertEqual(page.is_locked(), False)
if __name__ == "__main__":
unittest.main()
Index: tests_svgscripts/test_data/N_VII_1_page006.xml
===================================================================
--- tests_svgscripts/test_data/N_VII_1_page006.xml (revision 96)
+++ tests_svgscripts/test_data/N_VII_1_page006.xml (revision 97)
@@ -1,1276 +1,1276 @@
svgWordPosition2019-08-02 15:17:372019-08-02 15:17:372019-08-02 15:30:592019-08-02 15:30:59
- 2020-06-19 15:50:47
+ 2020-07-16 16:48:15
Index: tests_svgscripts/test_data/N_VII_1.xml
===================================================================
--- tests_svgscripts/test_data/N_VII_1.xml (revision 96)
+++ tests_svgscripts/test_data/N_VII_1.xml (revision 97)
@@ -1,178 +1,178 @@
xmlManuscriptFile2019-08-02 15:28:572019-08-02 15:31:252019-12-09 08:43:592019-12-09 08:43:59H. J. MetteIn schwarzen Lederdeckel gebundenes Oktavheft (10,5x17), 194 durchweg beschriebene Seiten; Studien aus der Umwertungszeit, die zum Teil für das <i>Jenseits </i>verwandt worden sind.BAW 1, XCVIM. MontinariOktavheft, 10,5x17. 194 Seiten. Schwarze und violette Tinte, sowie Bleistift. Überwiegend deutsche Schrift. Von hinten nach vorn beschrieben. Alte Signatur: N XLIII.KGW VII 4/2, 632Oktavheft. Schwarzer Ledereinband mit Goldprägung (vorn und hinten senkrechte Linie, parallel zum Rücken; vorn rechts unten Initialen „F. N.“, Einzelstempel) und umlaufender Blindlinie. Am hinteren Deckel lederne Stifthülse. Buchblock stellenweise gelockert. Vorsätze aus Moiré-Papier. 194 Seiten, 10,8x17,3, unliniiert.April bis Juni 188516. April 1885 bis Anfang Juni 1885
-
+ KGW VII 34[1-256]M. Montinari (zu 34[257]): „dieses und die beiden folgenden Fragmente 34[258.259] wurden von N in einen Brief von Paul Lanzky von Anfang Juni 1885 (KGB III 4, S. 28, Nr. 281) eingetragen.“ KGW VII 4/2, 374.Vorderer DeckelVorsatz RektoVorsatz Verso (kaschiert)11194Lage, 6 BlattVorsatz - 11111Einzelblatt12-131213Lage, 4 Blatt14-211421Lage, 8 Blatt22-372237Lage, 8 Blatt38-533853Lage, 8 Blatt54-695469Lage, 8 Blatt70-857085Lage, 8 Blatt86-10186101Lage, 8 Blatt102-117102117Lage, 8 Blatt118-133118133Lage, 8 Blatt134-149134149Lage, 8 Blatt150-165150165Lage, 8 Blatt166-181166181Lage, 8 Blatt182 - Vorsatz182 194Vorsatz Rekto (kaschiert)1941194Vorsatz VersoHinterer Deckel1885-4-11885-6-28KGW IX 12001Bearbeitet von Marie-Luise Haase, Michael Kohlenbach, Johannes Neininger, Wolfert von Rahden, Thomas Riebe und René Stockmar unter Mitarbeit von Dirk Setton.Marie-Luise Haase und Michael Kohlenbach71/209N XLIII
Index: tests_svgscripts/test_data/W_I_8_new_page125.xml
===================================================================
--- tests_svgscripts/test_data/W_I_8_new_page125.xml (revision 96)
+++ tests_svgscripts/test_data/W_I_8_new_page125.xml (revision 97)
@@ -1,4371 +1,4337 @@
svgWordPosition2019-05-03 13:15:352019-05-06 10:25:332019-05-03 20:00:06
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
Index: tests_svgscripts/test_line.py
===================================================================
--- tests_svgscripts/test_line.py (revision 96)
+++ tests_svgscripts/test_line.py (revision 97)
@@ -1,45 +1,47 @@
import unittest
from os import sep, path
from os.path import isdir, dirname
import lxml.etree as ET
import sys
import sys
sys.path.append('svgscripts')
from datatypes.line import Line
from datatypes.line_continuation import LineContinuation
class TestLineNumber(unittest.TestCase):
def setUp(self):
DATADIR = dirname(__file__) + sep + 'test_data'
self.test_target_file = DATADIR + sep + 'test.xml'
self.id = 24
def test_init(self):
lnr = Line(id=self.id)
self.assertEqual(lnr.id, self.id)
def test_init_from_xml(self):
xml_tree = ET.parse(self.test_target_file)
lines = [ Line.create_cls_from_node(node=node) for node in xml_tree.getroot().xpath(Line.XML_TAG) ]
self.assertEqual(len(lines), 49)
self.assertEqual(lines[0].id, 1)
self.assertEqual(lines[48].id, 49)
def test_get_semanticAndDataDict(self):
#print(Line.get_semantic_dictionary())
pass
def test_attach(self):
tree = ET.Element('page')
xml_tree = ET.parse(self.test_target_file)
line = [ Line.create_cls_from_node(node=node) for node in xml_tree.getroot().xpath(Line.XML_TAG) ][1]
- line.editor_comment = LineContinuation.create_cls('1,3')
+ line.editor_comments.append(LineContinuation.create_cls('1,3'))
+ line.editor_comments.append(LineContinuation.create_cls('1,3'))
line.attach_object_to_tree(tree)
lineB = [ Line.create_cls_from_node(node=node) for node in tree.xpath(Line.XML_TAG) ][0]
self.assertEqual(lineB.id, line.id)
- self.assertEqual(lineB.editor_comment.reference.first_line, line.editor_comment.reference.first_line)
+ self.assertEqual(lineB.editor_comments[0].reference.first_line, line.editor_comments[0].reference.first_line)
+ self.assertEqual(len(lineB.editor_comments), 2)
if __name__ == "__main__":
unittest.main()
Index: tests_svgscripts/test_process_footnotes.py
===================================================================
--- tests_svgscripts/test_process_footnotes.py (revision 96)
+++ tests_svgscripts/test_process_footnotes.py (revision 97)
@@ -1,47 +1,47 @@
import unittest
from os import sep, path, remove
from os.path import isdir, isfile, dirname
import shutil
import sys
import lxml.etree as ET
import warnings
import sys
sys.path.append('svgscripts')
from datatypes.footnotes import extract_footnotes
from datatypes.page import Page
import process_footnotes
from process_footnotes import categorize_footnotes, main
class TestExtractFootnotes(unittest.TestCase):
def setUp(self):
process_footnotes.UNITTESTING = True
DATADIR = path.dirname(__file__) + sep + 'test_data'
self.test_footnote = DATADIR + sep + 'W_I_8_neu_125-01.svg'
self.test_footnote_verso = DATADIR + sep + 'N_VII_1_xp5_4_page5.svg'
self.test_footnote_recto = DATADIR + sep + 'N_VII_1_xp5_4_page6.svg'
self.test_footnote_multi = DATADIR + sep + 'N_VII_1_xp5_4_page13.svg'
self.test_footnote_multi_xml = DATADIR + sep + 'N_VII_1_page013.xml'
self.test_categorize_footnote = DATADIR + sep + 'N_VII_1_page006.xml'
def test_categorize_footnotes(self):
page = Page(self.test_categorize_footnote)
footnotes = extract_footnotes(page, svg_file=self.test_footnote_recto)
categorize_footnotes(page, footnotes)
words_with_comments = [ word for word in page.words if word.editor_comment is not None ]
self.assertEqual(len(words_with_comments), 4)
- lines_with_comments = [ line for line in page.lines if line.editor_comment is not None ]
+ lines_with_comments = [ line for line in page.lines if len(line.editor_comments) > 0 ]
self.assertEqual(len(lines_with_comments), 1)
page = Page('xml/W_II_1_page141.xml')
footnotes = extract_footnotes(page)
categorize_footnotes(page, footnotes, debug=True)
words_with_comments = [ word for word in page.words if word.editor_comment is not None ]
#self.assertEqual(len(words_with_comments), 9)
def test_main(self):
self.assertEqual(main(['xml/N_VII_1_page005.xml']), 0)
if __name__ == "__main__":
unittest.main()
Index: py2ttl/py2ttl_data.py
===================================================================
--- py2ttl/py2ttl_data.py (revision 96)
+++ py2ttl/py2ttl_data.py (revision 97)
@@ -1,143 +1,143 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
""" This program can be used to convert py objects to data in turtle format.
"""
# Copyright (C) University of Basel 2019 {{{1
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see 1}}}
from colorama import Fore, Style
import getopt
import lxml.etree as ET
from os import sep, path, listdir
from os.path import isfile, isdir, dirname, basename
from progress.bar import Bar
import re
import sys
sys.path.append('svgscripts')
from datatypes.manuscript import ArchivalManuscriptUnity
from datatypes.super_page import SuperPage
if dirname(__file__) not in sys.path:
sys.path.append(dirname(__file__))
from class_spec import SemanticClass
from config import check_config_files_exist, get_datatypes_dir, PROJECT_NAME, PROJECT_ONTOLOGY_FILE, PROJECT_URL
from data_handler import RDFDataHandler
sys.path.append('shared_util')
from myxmlwriter import xml2dict
__author__ = "Christian Steiner"
__maintainer__ = __author__
__copyright__ = 'University of Basel'
__email__ = "christian.steiner@unibas.ch"
__status__ = "Development"
__license__ = "GPL v3"
__version__ = "0.0.1"
class Py2TTLDataConverter:
"""This class can be used convert py objects to rdf data in turtle format.
"""
UNITTESTING = False
def __init__(self, manuscript_file, xml_dictionary_file=None, mapping_dictionary=None):
if mapping_dictionary is None and xml_dictionary_file is not None:
if not Py2TTLDataConverter.UNITTESTING:
print(Fore.CYAN + 'initializing mapping dictionary from file "{}" ...'.format(xml_dictionary_file))
self.mapping_dictionary = xml2dict(xml_dictionary_file)
if not Py2TTLDataConverter.UNITTESTING:
print(Fore.GREEN + '[{} classes added]'.format(str(len(self.mapping_dictionary['classes']))))
elif mapping_dictionary is not None:
self.mapping_dictionary = mapping_dictionary
else:
raise Exception('Error: Py2TTLDataConverter init expects either a xml_dictionary_file or a mapping_dictionary!')
self.manuscript_file = manuscript_file
def convert(self, page_status_list=None):
"""Convert manuscript instantiated with manuscript_file to rdf data and write to target_file.
"""
if page_status_list is None or len(page_status_list) < 1:
page_status_list = ['OK', SuperPage.STATUS_MERGED_OK]
not Py2TTLDataConverter.UNITTESTING and print(Fore.CYAN + 'initializing python objects with file "{}" ...'.format(self.manuscript_file))
manuscript = ArchivalManuscriptUnity.create_cls(self.manuscript_file, page_status_list=page_status_list, update_page_styles=True)
target_data_file = manuscript.title.replace(' ', '_') + '_DATA.ttl'
data_handler = RDFDataHandler(target_data_file, self.mapping_dictionary)
if not Py2TTLDataConverter.UNITTESTING:
- print(Fore.GREEN + '[{} pages added]'.format(str(len(manuscript.pages))))
+ print(Fore.GREEN + '[{} pages added]'.format(str(len([ page for page in manuscript.pages if 'xml_file' in page.__dict__.keys()]))))
print(Fore.CYAN + 'adding triples to rdf graph ... ')
data_handler.add_data(manuscript, '')
if not Py2TTLDataConverter.UNITTESTING:
print(Fore.GREEN + '[{} statements added]'.format(str(len(data_handler.data_graph))))
print(Fore.CYAN + 'writing graph to file "{}" ...'.format(target_data_file))
data_handler.write()
if not Py2TTLDataConverter.UNITTESTING:
print(Fore.GREEN + '[OK]')
print(Style.RESET_ALL)
def usage():
"""prints information on how to use the script
"""
print(main.__doc__)
def main(argv):
"""This program can be used to convert py objects to rdf data in turtle format.
py2ttl/py2ttl_data.py [OPTIONS] xml file of type shared_util.myxmlwriter.FILE_TYPE_XML_MANUSCRIPT.
OPTIONS:
-h|--help: show help
-i|--include-status=STATUS include pages with status = STATUS. STATUS is a ':' seperated string of status, e.g. 'OK:faksimile merged'.
-m|--mapping=mapping_dict.xml xml file generated by py2ttl/py2ttl.py containing mapping information for each property of a class.
:return: exit code (int)
"""
check_config_files_exist()
datatypes_dir = get_datatypes_dir()
target_ontology_file = '.{0}{1}-ontology_autogenerated.ttl'.format(sep, PROJECT_NAME)
xml_dictionary_file = 'mapping_file4' + datatypes_dir.replace(sep, '.') + '2' + target_ontology_file.replace('.' + sep, '').replace(sep, '.').replace('.ttl', '.xml')
manuscript_file = None
page_status_list = None
try:
opts, args = getopt.getopt(argv, "hi:m:", ["help", "include-status=", "mapping="])
except getopt.GetoptError:
usage()
return 2
for opt, arg in opts:
if opt in ('-h', '--help'):
usage()
return 0
elif opt in ('-i', '--include-status'):
page_status_list = arg.split(':')
elif opt in ('-m', '--mapping'):
xml_dictionary_file = arg
if len(args) < 1 :
usage()
return 2
manuscript_file = args[0]
if not isfile(xml_dictionary_file) or not isfile(manuscript_file):
usage()
return 2
converter = Py2TTLDataConverter(manuscript_file, xml_dictionary_file=xml_dictionary_file)
converter.convert(page_status_list=page_status_list)
return 0
if __name__ == "__main__":
sys.exit(main(sys.argv[1:]))
Index: tests_py2ttl/test_data/mapping_dict.xml
===================================================================
--- tests_py2ttl/test_data/mapping_dict.xml (revision 96)
+++ tests_py2ttl/test_data/mapping_dict.xml (revision 97)
@@ -1,342 +1,344 @@
tlnhttp://www.nie.org/ontology/nietzsche#./tln-ontology_autogenerated.ttlhttp://www.nie.org/ontology/nietzsche#ArchivalManuscriptUnityhttp://www.nie.org/ontology/nietzsche#hasTitlehttp://www.nie.org/ontology/nietzsche#hasManuscriptTypehttp://www.nie.org/ontology/nietzsche#hasStyleshttp://www.nie.org/ontology/nietzsche#hasPageshttp://www.nie.org/ontology/nietzsche#hasDescriptionhttp://www.nie.org/ontology/nietzsche#hasEarlierDescriptionshttp://www.nie.org/ontology/nietzsche#EditorCommenthttp://www.nie.org/ontology/nietzsche#isUncertainhttp://www.nie.org/ontology/nietzsche#hasCommenthttp://www.nie.org/ontology/nietzsche#AtypicalWritinghttp://www.nie.org/ontology/nietzsche#isUncertainhttp://www.nie.org/ontology/nietzsche#hasCommenthttp://www.nie.org/ontology/nietzsche#atypicalWritingHasTexthttp://www.nie.org/ontology/nietzsche#Pathhttp://www.nie.org/ontology/nietzsche#hasDAttributehttp://www.nie.org/ontology/nietzsche#Boxhttp://www.nie.org/ontology/nietzsche#hasDAttributehttp://www.nie.org/ontology/nietzsche#hasEarlierTexthttp://www.nie.org/ontology/nietzsche#Clarificationhttp://www.nie.org/ontology/nietzsche#isUncertainhttp://www.nie.org/ontology/nietzsche#hasCommenthttp://www.nie.org/ontology/nietzsche#clarificationHasTexthttp://www.nie.org/ontology/nietzsche#Colorhttp://www.nie.org/ontology/nietzsche#colorHasNamehttp://www.nie.org/ontology/nietzsche#hasHexadecimalValuehttp://www.nie.org/ontology/nietzsche#Texthttp://www.nie.org/ontology/nietzsche#textHasContenthttp://www.nie.org/ontology/nietzsche#textHasMarkuphttp://www.nie.org/ontology/nietzsche#Descriptionhttp://www.nie.org/ontology/nietzsche#textHasContenthttp://www.nie.org/ontology/nietzsche#textHasMarkuphttp://www.nie.org/ontology/nietzsche#EarlierDescriptionhttp://www.nie.org/ontology/nietzsche#textHasContenthttp://www.nie.org/ontology/nietzsche#hasAuthorhttp://www.nie.org/ontology/nietzsche#hasCitationhttp://www.nie.org/ontology/nietzsche#textHasMarkuphttp://www.nie.org/ontology/nietzsche#EditorCorrectionhttp://www.nie.org/ontology/nietzsche#isUncertainhttp://www.nie.org/ontology/nietzsche#hasCommenthttp://www.nie.org/ontology/nietzsche#hasCorrectionTexthttp://www.nie.org/ontology/nietzsche#Imagehttp://www.nie.org/ontology/nietzsche#hasHeighthttp://www.nie.org/ontology/nietzsche#hasWidthhttp://www.nie.org/ontology/nietzsche#hasFileNamehttp://www.nie.org/ontology/nietzsche#FaksimileImagehttp://www.nie.org/ontology/nietzsche#hasHeighthttp://www.nie.org/ontology/nietzsche#hasWidthhttp://www.nie.org/ontology/nietzsche#hasFileNamehttp://www.nie.org/ontology/nietzsche#hasUrlhttp://www.nie.org/ontology/nietzsche#hasTextFieldhttp://www.nie.org/ontology/nietzsche#PositionalObjecthttp://www.nie.org/ontology/nietzsche#hasHeighthttp://www.nie.org/ontology/nietzsche#hasWidthhttp://www.nie.org/ontology/nietzsche#hasLefthttp://www.nie.org/ontology/nietzsche#hasTophttp://www.nie.org/ontology/nietzsche#hasBottomhttp://www.nie.org/ontology/nietzsche#hasTransformhttp://www.nie.org/ontology/nietzsche#WordPositionhttp://www.nie.org/ontology/nietzsche#hasHeighthttp://www.nie.org/ontology/nietzsche#hasWidthhttp://www.nie.org/ontology/nietzsche#hasLefthttp://www.nie.org/ontology/nietzsche#hasTophttp://www.nie.org/ontology/nietzsche#hasBottomhttp://www.nie.org/ontology/nietzsche#hasTransformhttp://www.nie.org/ontology/nietzsche#FaksimilePositionhttp://www.nie.org/ontology/nietzsche#hasHeighthttp://www.nie.org/ontology/nietzsche#hasWidthhttp://www.nie.org/ontology/nietzsche#hasLefthttp://www.nie.org/ontology/nietzsche#hasTophttp://www.nie.org/ontology/nietzsche#hasBottomhttp://www.nie.org/ontology/nietzsche#hasTransformhttp://www.nie.org/ontology/nietzsche#isOnFaksimileImagehttp://www.nie.org/ontology/nietzsche#isOnTextFieldhttp://www.nie.org/ontology/nietzsche#Linehttp://www.nie.org/ontology/nietzsche#lineHasNumberhttp://www.nie.org/ontology/nietzsche#lineHasBottomValueOnTranskriptionhttp://www.nie.org/ontology/nietzsche#lineHasTopValueOnTranskriptionhttp://www.nie.org/ontology/nietzsche#lineHasInnerBottomValueOnFaksimilehttp://www.nie.org/ontology/nietzsche#lineHasInnerTopValueOnFaksimilehttp://www.nie.org/ontology/nietzsche#lineHasOuterBottomValueOnFaksimilehttp://www.nie.org/ontology/nietzsche#lineHasOuterTopValueOnFaksimilehttp://www.nie.org/ontology/nietzsche#isMainLine
- http://www.nie.org/ontology/nietzsche#lineHasLineContinuation
+ http://www.nie.org/ontology/nietzsche#lineHasEditorCommenthttp://www.nie.org/ontology/nietzsche#LineContinuationhttp://www.nie.org/ontology/nietzsche#isUncertainhttp://www.nie.org/ontology/nietzsche#hasCommenthttp://www.nie.org/ontology/nietzsche#isLineAContinuationTohttp://www.nie.org/ontology/nietzsche#lineContinuationHasReferencehttp://www.nie.org/ontology/nietzsche#SimpleWordhttp://www.nie.org/ontology/nietzsche#hasTexthttp://www.nie.org/ontology/nietzsche#wordBelongsToLinehttp://www.nie.org/ontology/nietzsche#hasTranskriptionPositionhttp://www.nie.org/ontology/nietzsche#hasFaksimilePositionhttp://www.nie.org/ontology/nietzsche#SpecialWordhttp://www.nie.org/ontology/nietzsche#hasTexthttp://www.nie.org/ontology/nietzsche#wordBelongsToLinehttp://www.nie.org/ontology/nietzsche#hasTranskriptionPositionhttp://www.nie.org/ontology/nietzsche#hasFaksimilePositionhttp://www.nie.org/ontology/nietzsche#MarkForeignHandshttp://www.nie.org/ontology/nietzsche#hasTexthttp://www.nie.org/ontology/nietzsche#textOfForeignHandshttp://www.nie.org/ontology/nietzsche#penOfForeignHandshttp://www.nie.org/ontology/nietzsche#wordBelongsToLinehttp://www.nie.org/ontology/nietzsche#hasTranskriptionPositionhttp://www.nie.org/ontology/nietzsche#hasFaksimilePositionhttp://www.nie.org/ontology/nietzsche#Pagehttp://www.nie.org/ontology/nietzsche#hasNumberhttp://www.nie.org/ontology/nietzsche#hasOrientationhttp://www.nie.org/ontology/nietzsche#hasLines
+ http://www.nie.org/ontology/nietzsche#hasMarkForeignHandshttp://www.nie.org/ontology/nietzsche#hasWordshttp://www.nie.org/ontology/nietzsche#hasWordDeletionPathshttp://www.nie.org/ontology/nietzsche#hasWordInsertionMarkshttp://www.nie.org/ontology/nietzsche#hasFaksimileImagehttp://www.nie.org/ontology/nietzsche#hasSvgImagehttp://www.nie.org/ontology/nietzsche#pageIsOnTextFieldhttp://www.nie.org/ontology/nietzsche#Referencehttp://www.nie.org/ontology/nietzsche#firstLineOfReferencehttp://www.nie.org/ontology/nietzsche#lastLineOfReference
+ http://www.nie.org/ontology/nietzsche#wordReferencehttp://www.nie.org/ontology/nietzsche#IsUncertainhttp://www.nie.org/ontology/nietzsche#hasTitlehttp://www.nie.org/ontology/nietzsche#hasPageNumberhttp://www.nie.org/ontology/nietzsche#SVGImagehttp://www.nie.org/ontology/nietzsche#hasHeighthttp://www.nie.org/ontology/nietzsche#hasWidthhttp://www.nie.org/ontology/nietzsche#hasFileNamehttp://www.nie.org/ontology/nietzsche#StandoffTaghttp://www.nie.org/ontology/nietzsche#standoffTagHasStartIndexhttp://www.nie.org/ontology/nietzsche#standoffTagHasEndIndexhttp://www.nie.org/ontology/nietzsche#standoffTagHasCSShttp://www.nie.org/ontology/nietzsche#TextConnectionMarkhttp://www.nie.org/ontology/nietzsche#hasTexthttp://www.nie.org/ontology/nietzsche#wordBelongsToLinehttp://www.nie.org/ontology/nietzsche#hasTranskriptionPositionhttp://www.nie.org/ontology/nietzsche#hasFaksimilePositionhttp://www.nie.org/ontology/nietzsche#textConnectionMarkHasTextSourcehttp://www.nie.org/ontology/nietzsche#TextFieldhttp://www.nie.org/ontology/nietzsche#hasHeighthttp://www.nie.org/ontology/nietzsche#hasWidthhttp://www.nie.org/ontology/nietzsche#hasLefthttp://www.nie.org/ontology/nietzsche#hasTophttp://www.nie.org/ontology/nietzsche#hasBottomhttp://www.nie.org/ontology/nietzsche#hasTransformhttp://www.nie.org/ontology/nietzsche#TranskriptionPositionhttp://www.nie.org/ontology/nietzsche#hasHeighthttp://www.nie.org/ontology/nietzsche#hasWidthhttp://www.nie.org/ontology/nietzsche#hasLefthttp://www.nie.org/ontology/nietzsche#hasTophttp://www.nie.org/ontology/nietzsche#hasBottomhttp://www.nie.org/ontology/nietzsche#hasTransformhttp://www.nie.org/ontology/nietzsche#isOnSvgImagehttp://www.nie.org/ontology/nietzsche#UncertainDeciphermenthttp://www.nie.org/ontology/nietzsche#isUncertainhttp://www.nie.org/ontology/nietzsche#hasCommenthttp://www.nie.org/ontology/nietzsche#Wordhttp://www.nie.org/ontology/nietzsche#hasTexthttp://www.nie.org/ontology/nietzsche#hasEditedTexthttp://www.nie.org/ontology/nietzsche#wordHasWordPartshttp://www.nie.org/ontology/nietzsche#wordBelongsToLinehttp://www.nie.org/ontology/nietzsche#hasTranskriptionPositionhttp://www.nie.org/ontology/nietzsche#hasFaksimilePositionhttp://www.nie.org/ontology/nietzsche#wordHasStylehttp://www.nie.org/ontology/nietzsche#overwritesWordhttp://www.nie.org/ontology/nietzsche#isTransformationOfWordhttp://www.nie.org/ontology/nietzsche#isExtensionOfWordhttp://www.nie.org/ontology/nietzsche#isDeletionOfWordhttp://www.nie.org/ontology/nietzsche#isClarificationOfWordhttp://www.nie.org/ontology/nietzsche#wordHasEarlierVersionhttp://www.nie.org/ontology/nietzsche#wordHasCorrectionhttp://www.nie.org/ontology/nietzsche#wordIsDeletedByPathhttp://www.nie.org/ontology/nietzsche#wordHasEditorCommenthttp://www.nie.org/ontology/nietzsche#WordDeletionPathhttp://www.nie.org/ontology/nietzsche#hasDAttributehttp://www.nie.org/ontology/nietzsche#WordInsertionMarkhttp://www.nie.org/ontology/nietzsche#hasHeighthttp://www.nie.org/ontology/nietzsche#hasWidthhttp://www.nie.org/ontology/nietzsche#hasLefthttp://www.nie.org/ontology/nietzsche#hasTophttp://www.nie.org/ontology/nietzsche#hasBottomhttp://www.nie.org/ontology/nietzsche#hasTransformhttp://www.nie.org/ontology/nietzsche#hasMarkTypehttp://www.nie.org/ontology/nietzsche#hasSymbolIdhttp://www.nie.org/ontology/nietzsche#hasNextWordhttp://www.nie.org/ontology/nietzsche#hasPreviousWordhttp://www.nie.org/ontology/nietzsche#wordInsertionMarkBelongsToLinexml-dictionary
- 2020-06-19 14:07:47
+ 2020-07-15 19:50:04