Index: Friedrich-Nietzsche-late-work-ontology.ttl =================================================================== --- Friedrich-Nietzsche-late-work-ontology.ttl (revision 103) +++ Friedrich-Nietzsche-late-work-ontology.ttl (revision 104) @@ -1,57 +1,63 @@ @prefix dct: . @prefix document: . @prefix homotypic: . @prefix stoff: . @prefix text: . @prefix owl: . @prefix rdfs: . @prefix xsd: . @prefix tln: . a owl:Ontology; dct:license ; dct:title "An ontology about the collected late works of Friedrich Nietzsche"@en; dct:description """Formal description of specific concepts in the scientific study of Friedrich Nietzsches late work."""@en; dct:creator "Dominique Steinbach, tool coordinator/software developer, NIE-INE/digital edition of der späte Nietzsche, Basel University, Switzerland"@en; dct:contributor "Christian Steiner, software developer, digital edition of der späte Nietzsche, University of Basel, Switzerland"@en; dct:publisher "Basel University, Switzerland"@en. tln:Page a owl:Class ; rdfs:subClassOf document:Page . tln:hasImage a owl:ObjectProperty ; rdfs:label "relates a page to a image"@en ; rdfs:comment "relates a page to an image that has a textfield that specifies the area where the writing that constitutes the page can be found."@en ; rdfs:isDefinedBy ; rdfs:domain tln:Page ; rdfs:range tln:Image . +tln:hasUrl a owl:DatatypeProperty ; + rdfs:label "has Url"@en ; + rdfs:domain tln:Image ; + rdfs:isDefinedBy ; + rdfs:range xsd:anyURI . + tln:inheritOverwritesWord a owl:ObjectProperty ; rdfs:subPropertyOf tln:overwritesWord; rdfs:label "word overwrites word (inherited from tln:wordHasCorrection)"@en ; rdfs:comment "The author has used this word in order to overwrite that word."@en ; rdfs:isDefinedBy ; owl:propertyChainAxiom ( tln:wordHasCorrection tln:overwritesWord ). tln:lineContinuesOn a owl:ObjectProperty ; rdfs:label "writing from subject line continues on object line"@en ; rdfs:comment "the writing that ends on subject line continues on object line"@en ; rdfs:isDefinedBy ; rdfs:domain tln:Line ; rdfs:range tln:Line . tln:pageIsOnTextField a owl:ObjectProperty ; rdfs:label "page is on text field"@en ; rdfs:comment "the writing that is referred to as subject can be found on object"@en ; rdfs:isDefinedBy ; rdfs:domain tln:Page ; rdfs:range tln:TextField . tln:writingContinuesWithWord a owl:ObjectProperty ; rdfs:label "writing continues with next word"@en ; rdfs:isDefinedBy ; rdfs:domain tln:Word ; rdfs:range tln:Word . Index: svgscripts/datatypes/path.py =================================================================== --- svgscripts/datatypes/path.py (revision 103) +++ svgscripts/datatypes/path.py (revision 104) @@ -1,200 +1,200 @@ #!/usr/bin/env python3 # -*- coding: utf-8 -*- """ This super class can be used to represent all svg path types. """ # Copyright (C) University of Basel 2019 {{{1 # # This program is free software: you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation, either version 3 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program. If not, see 1}}} __author__ = "Christian Steiner" __maintainer__ = __author__ __copyright__ = 'University of Basel' __email__ = "christian.steiner@unibas.ch" __status__ = "Development" __license__ = "GPL v3" __version__ = "0.0.1" from lxml import etree as ET from os.path import isfile from svgpathtools.parser import parse_path from svgpathtools.path import Line from svgpathtools.path import Path as SVGPath import sys from .attachable_object import AttachableObject sys.path.append('py2ttl') from class_spec import SemanticClass class Path(AttachableObject,SemanticClass): """ This super class represents all types of svg paths. Args: node (lxml.etree.Element) node, containing information path (svgpathtools.path.Path) svg path representation. """ XML_TAG = 'path' WORD_DELETION_PATH_TAG = 'word-deletion-path' BOX_TAG = 'box-path' def __init__(self, id=0, node=None, path=None, parent_path=None, d_string=None, style_class='', tag=XML_TAG): self.intKeys = [ 'id' ] self.stringKeys = [ 'style_class' ] self.floatKeys = [] self.start_line_number = -1 self.parent_path = parent_path if node is not None: self.id = int(node.get('id')) if bool(node.get('id')) else 0 self.path = parse_path(node.get('d')) if bool(node.get('d')) else None self.d_attribute = node.get('d') self.style_class = node.get('style-class') self.tag = node.tag else: self.tag = tag self.id = id self.path = path if self.path is None\ and d_string is not None\ and d_string != '': self.path = parse_path(d_string) self.d_attribute = self.path.d() if self.path is not None else '' self.style_class = style_class def attach_object_to_tree(self, target_tree): """Attach object to tree. """ if target_tree.__class__.__name__ == '_ElementTree': target_tree = target_tree.getroot() obj_node = target_tree.xpath('.//' + self.tag + '[@id="%s"]' % self.id)[0] \ if(len(target_tree.xpath('.//' + self.tag + '[@id="%s"]' % self.id)) > 0) \ else ET.SubElement(target_tree, self.tag) for key in self.floatKeys: if self.__dict__[key] is not None: obj_node.set(key.replace('_','-'), str(round(self.__dict__[key], 3))) for key in self.intKeys + self.stringKeys: if self.__dict__[key] is not None: obj_node.set(key.replace('_','-'), str(self.__dict__[key])) if self.path is not None: obj_node.set('d', self.path.d()) @classmethod def create_cls(cls, id=0, path=None, style_class='', page=None, tag=XML_TAG, stroke_width=0.0): """Create and return a cls. """ if path is not None\ and path.start.imag <= path.end.imag\ and page is not None\ and style_class != ''\ and len(path._segments) == 1\ and type(path._segments[0]) == Line\ and ((style_class in page.style_dict.keys()\ and 'stroke-width' in page.style_dict[style_class].keys())\ or stroke_width > 0.0): # If path is a Line and its style_class specifies a stroke-width, correct path stroke_width_correction = float(page.style_dict[style_class]['stroke-width'])/2\ if stroke_width == 0.0\ else stroke_width xmin = path.start.real xmax = path.end.real ymin = path.start.imag-stroke_width_correction ymax = path.end.imag+stroke_width_correction #path = parse_path(f'M {xmin}, {ymin} L {xmax}, {ymin} L {xmax}, {ymax} L {xmin}, {ymax} z') path = SVGPath(Line(start=(complex(f'{xmin}+{ymin}j')), end=(complex(f'{xmax}+{ymin}j'))),\ Line(start=(complex(f'{xmax}+{ymin}j')), end=(complex(f'{xmax}+{ymax}j'))),\ Line(start=(complex(f'{xmax}+{ymax}j')), end=(complex(f'{xmin}+{ymax}j'))),\ Line(start=(complex(f'{xmin}+{ymax}j')), end=(complex(f'{xmin}+{ymin}j')))) return cls(id=id, path=path, style_class=style_class, tag=tag) def contains_path(self, other_path): """Returns true if other_path is contained in this path. """ this_xmin, this_xmax, this_ymin, this_ymax = self.path.bbox() other_xmin, other_xmax, other_ymin, other_ymax = other_path.path.bbox() return other_xmin >= this_xmin and other_xmax <= this_xmax\ and other_ymin >= this_ymin and other_ymax <= this_ymax def contains_start_of_path(self, other_path): """Returns true if start of other_path is contained in this path. """ this_xmin, this_xmax, this_ymin, this_ymax = self.path.bbox() other_xmin, other_xmax, other_ymin, other_ymax = other_path.path.bbox() return other_xmin >= this_xmin and other_xmin < this_xmax\ and other_ymin >= this_ymin and other_ymax <= this_ymax def contains_end_of_path(self, other_path): """Returns true if end of other_path is contained in this path. """ this_xmin, this_xmax, this_ymin, this_ymax = self.path.bbox() other_xmin, other_xmax, other_ymin, other_ymax = other_path.path.bbox() return other_xmax >= this_xmin and other_xmax < this_xmax\ and other_ymin >= this_ymin and other_ymax <= this_ymax @classmethod - def create_path_from_transkription_position(cls, transkription_position, tr_xmin=0.0, tr_ymin=0.0): + def create_path_from_transkription_position(cls, transkription_position, tr_xmin=0.0, tr_ymin=0.0, include_pwps=True): """Create a .path.Path from a .transkription_position.TranskriptionPosition. """ - if len(transkription_position.positional_word_parts) > 0: + if include_pwps and len(transkription_position.positional_word_parts) > 0: first_pwp = transkription_position.positional_word_parts[0] last_pwp = transkription_position.positional_word_parts[len(transkription_position.positional_word_parts)-1] xmin = tr_xmin + first_pwp.left xmax = tr_xmin + last_pwp.left + last_pwp.width ymin = tr_ymin + sorted(pwp.top for pwp in transkription_position.positional_word_parts)[0] ymax = tr_ymin + sorted([pwp.bottom for pwp in transkription_position.positional_word_parts], reverse=True)[0] else: xmin = tr_xmin + transkription_position.left xmax = xmin + transkription_position.width ymin = tr_ymin + transkription_position.top ymax = ymin + transkription_position.height word_path = parse_path('M {}, {} L {}, {} L {}, {} L {}, {} z'.format(xmin, ymin, xmax, ymin, xmax, ymax, xmin, ymax)) return cls(path=word_path) def do_paths_intersect(self, other_path): """Returns true if paths intersect, false if not or if there was an exception. """ try: return self.path.intersect(other_path.path, justonemode=True) except AssertionError: return False def get_median_y(self, tr_ymin=0.0): """Return the median of ymin + ymax. """ return (self.path.bbox()[2] + self.path.bbox()[3])/2 - tr_ymin def get_x(self, tr_xmin=0.0): """Return xmin. """ return self.path.bbox()[0] - tr_xmin @classmethod def get_semantic_dictionary(cls): """ Creates and returns a semantic dictionary as specified by SemanticClass. """ dictionary = {} class_dict = cls.get_class_dictionary() properties = {'d_attribute': { 'class': str, 'cardinality': 0,\ 'name': 'hasDAttribute', 'label': 'svg path has d attribute',\ 'comment': 'The d attribute defines a path to be drawn.'}} #properties.update(cls.create_semantic_property_dictionary('style_class', str)) dictionary.update({cls.CLASS_KEY: class_dict}) dictionary.update({cls.PROPERTIES_KEY: properties}) return cls.return_dictionary_after_updating_super_classes(dictionary) def is_partially_contained_by(self, other_path): """Returns true if other_path containes this path partially. """ return other_path.contains_start_of_path(self) or other_path.contains_end_of_path(self) Index: svgscripts/datatypes/word.py =================================================================== --- svgscripts/datatypes/word.py (revision 103) +++ svgscripts/datatypes/word.py (revision 104) @@ -1,871 +1,873 @@ #!/usr/bin/env python3 # -*- coding: utf-8 -*- """ This class can be used to represent a word. """ # Copyright (C) University of Basel 2019 {{{1 # # This program is free software: you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation, either version 3 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program. If not, see 1}}} __author__ = "Christian Steiner" __maintainer__ = __author__ __copyright__ = 'University of Basel' __email__ = "christian.steiner@unibas.ch" __status__ = "Development" __license__ = "GPL v3" __version__ = "0.0.1" import copy import inspect from lxml import etree as ET from operator import attrgetter import re import string import sys import warnings from .box import Box from .editor_comment import EditorComment from .matrix import Matrix from .path import Path from .simple_word import SimpleWord from .style import Style from .word_deletion_path import WordDeletionPath from .word_position import WordPosition from .transkription_position import TranskriptionPosition from .writing_process import WritingProcess SINGLE_PUNCTUATION_PATTERN = r"^[{}–]$".format(string.punctuation) def execute_function_on_parts(word_parts, func_name): """Execute function on parts and add those parts instead of original word to word_parts. :return: new word_parts, output from func """ copy_parts = word_parts[:] for word in word_parts: output = eval('word.{0}()'.format(func_name)) if len(word.word_parts) > 0: for part_word in word.word_parts: copy_parts.insert(copy_parts.index(word), part_word) copy_parts.remove(word) word.word_parts = [] return copy_parts, output def update_transkription_position_ids(word): """Update transkription_position' ids according to index. """ word_part_ids = [ wp.id for wp in word.word_parts ] if len(word_part_ids) != len(set(word_part_ids)): for id, wp in enumerate(word.word_parts): wp.id = id for index, transkription_position in enumerate(sorted(word.transkription_positions, key=attrgetter('left'))): transkription_position.id = index transkription_position.has_box = None transkription_position.deleted = False class Word(SimpleWord): """ This class represents a word. """ COPY_PROPERTY_KEY = [ 'line_number', 'deleted', 'writing_process_id' ] APPEND_PROPERTY2LIST_SOURCE_TARGET_KEYS = { 'style': 'styles' } DATA = 'debug-data' RDFS_SUBCLASSOF_LIST = ['http://www.e-editiones.ch/ontology/text#HandwrittenText'] XML_TAG = 'word' XML_EARLIER_VERSION = 'earlier-version' XML_OVERWRITES = 'overwrites' XML_CORRECTION_DICT = { 'isClarificationOfWord': 'clarifiesWord',\ 'isDeletionOfWord': 'deletesEarlierPart',\ 'isExtensionOfWord': 'extendsEarlierVersion',\ 'isTransformationOfWord': 'transformsEarlierPart' } def __init__(self, id=0, text='', line_number=-1, deleted=False, transkription_positions=None, faksimile_positions=None, word_part_objs=None, word_parts=None, writing_process_id=-1, earlier_version=None, box_paths=None, styles=None): super(Word,self).__init__(id=id, text=text, line_number=line_number, transkription_positions=transkription_positions,\ faksimile_positions=faksimile_positions) self.corrections = [] self.deleted = deleted self.deletion_paths = [] self.debug_container = {} self.debug_msg = None self.earlier_version = earlier_version self.edited_text = None self.editor_comment = None self.isClarificationOfWord = None self.isDeletionOfWord = None self.isExtensionOfWord = None self.isTransformationOfWord = None if len(self.text) == 0 and len(''.join([ tp.get_text() for tp in self.transkription_positions if type(tp) == TranskriptionPosition ])) > 0: self.text = ''.join([ tp.get_text() for tp in self.transkription_positions ]) self.overwrites_word = None self.styles = styles\ if styles is not None\ else [] self.verified = None self.writing_process_id = writing_process_id self.writing_processes = [] self.word_insertion_mark = None self.word_box = None self.word_parts = word_parts if word_parts is not None else [] self.word_part_objs = word_part_objs if word_part_objs is not None else [] def add_deletion_paths(self, deletion_paths, tr_xmin=0.0, tr_ymin=0.0): """Add a word deletion path to word. """ if len(self.word_parts) > 0: for part in self.word_parts: part.add_deletion_paths(deletion_paths, tr_xmin=tr_xmin, tr_ymin=tr_ymin) elif self.deleted and len(self.transkription_positions) > 0: + include_pwps = (len(self.transkription_positions[0].positional_word_parts) > 0 + and abs(self.transkription_positions[0].left-self.transkription_positions[0].positional_word_parts[0].left) < 10) word_path = Path.create_path_from_transkription_position(self.transkription_positions[0],\ - tr_xmin=tr_xmin, tr_ymin=tr_ymin) + tr_xmin=tr_xmin, tr_ymin=tr_ymin, include_pwps=include_pwps) self.deletion_paths = [ deletion_path for deletion_path in deletion_paths\ if do_paths_intersect_saveMode(deletion_path, word_path) ] def attach_word_to_tree(self, target_tree): """Attaches word to tree target_tree. """ word_node = super(Word,self).attach_word_to_tree(target_tree) if self.deleted is not None: word_node.set('deleted', str(self.deleted).lower()) if self.verified is not None: word_node.set('verified', str(self.verified).lower()) if self.edited_text is not None: word_node.set('edited-text', self.edited_text) if self.editor_comment is not None: self.editor_comment.attach_object_to_tree(word_node) if self.writing_process_id > -1: word_node.set('writing-process-id', str(self.writing_process_id)) for index, word_part in enumerate(self.word_parts): word_part.id = index word_part.attach_word_to_tree(word_node) if self.earlier_version is not None: earlier_node = ET.SubElement(word_node, self.XML_EARLIER_VERSION) self.earlier_version.attach_word_to_tree(earlier_node) if self.overwrites_word is not None\ and len(self.overwrites_word.transkription_positions) > 0: overwrite_node = ET.SubElement(word_node, self.XML_OVERWRITES) self.overwrites_word.attach_word_to_tree(overwrite_node) if self.word_box is not None: self.word_box.attach_object_to_tree(word_node) if len(self.corrections) > 0: word_node.set('corrections', ' '.join(set([ str(word.id) for word in self.corrections ]))) for key in self.XML_CORRECTION_DICT.keys(): if self.__dict__[key] is not None: word_node.set(self.XML_CORRECTION_DICT[key], 'true') return word_node def belongs_to_multiple_writing_processes(self, include_parts=False): """Returns true if transkription_positions belong to different WritingProcesses. """ if len(self.word_parts) > 0 and include_parts: return len(set(word.writing_process_id for word in self.word_parts)) > 1 return len(set(tp.writing_process_id for tp in self.transkription_positions )) > 1 def set_parent_word_writing_process_id(self): """Set writing_process_id for parent word. """ ids = set(word.transkription_positions[0].style for word in self.word_parts\ if len(word.transkription_positions) > 0 and word.transkription_positions[0].style is not None) if len(ids) > 1: self.writing_process_id = max([style.writing_process_id for style in ids]) if len(set(word.transkription_positions[0].style.create_a_copy_wo_writing_process_id()\ for word in self.word_parts\ if len(word.transkription_positions) > 0 and word.transkription_positions[0].style is not None))\ > 1: self.writing_process_id += 1 @classmethod def create_cls(cls, word_node): """Creates a word from a (lxml.Element) node. [:return:] Word """ cls = super(Word,cls).create_cls(word_node) cls.writing_process_id = int(word_node.get('writing-process-id')) if bool(word_node.get('writing-process-id')) else -1 cls.split_strings = None cls.join_string = word_node.get('join') if bool(word_node.get('split')): cls.split_strings = word_node.get('split').split(' ') if ''.join(cls.split_strings) != cls.text: error_msg = 'Error in file {0}: word with id="{1}" has split attributes that do not correspond to its text attribute!\n'.\ format(word_node.getroottree().docinfo.URL, str(cls.id))\ + 'Split attributes: "{0}".\n'.format(' '.join(cls.split_strings))\ + 'Text attribute: "{0}".\n'.format(cls.text) raise Exception(error_msg) cls.verified = word_node.get('verified') == 'true'\ if bool(word_node.get('verified')) else None cls.deleted = word_node.get('deleted') == 'true'\ if bool(word_node.get('deleted')) else None cls.edited_text = word_node.get('edited-text') cls.editor_comment = [ EditorComment.create_cls_from_node(node) for node in word_node.xpath('./' + EditorComment.XML_TAG) ][0]\ if len([ node for node in word_node.xpath('./' + EditorComment.XML_TAG) ]) > 0 else None cls.word_parts = [ cls.create_cls(node) for node in word_node.xpath('./' + cls.XML_TAG) ] if bool(word_node.get('corrections')): for index in [ int(i) for i in word_node.get('corrections').split(' ') ]: if index < len(cls.word_parts): cls.corrections.append(cls.word_parts[index]) cls.earlier_version = None if len(word_node.xpath('./' + cls.XML_EARLIER_VERSION + '/' + cls.XML_TAG)) > 0: cls.earlier_version = [ cls.create_cls(node) for node in word_node.xpath('./' + cls.XML_EARLIER_VERSION + '/' + cls.XML_TAG) ][0] for key_value in cls.XML_CORRECTION_DICT.values(): if word_node.get(key_value) == 'true': cls.__dict__[key_value] = True if cls.earlier_version is not None: for word_part in cls.word_parts: for key in [ key for key, value in cls.XML_CORRECTION_DICT.items() if value.endswith('Part') ]: if cls.XML_CORRECTION_DICT[key] in word_part.__dict__.keys() and word_part.__dict__[cls.XML_CORRECTION_DICT[key]]\ and len(cls.word_parts) <= len(cls.earlier_version.word_parts): try: word_part.__dict__[key] = cls.earlier_version.word_parts[word_part.id] except Exception: msg = f'{cls.id} {cls.text}: {word_part.id}' raise Exception(msg) for key in [ key for key, value in cls.XML_CORRECTION_DICT.items() if value.endswith('EarlierVersion') ]: if cls.XML_CORRECTION_DICT[key] in word_part.__dict__.keys() and word_part.__dict__[cls.XML_CORRECTION_DICT[key]]: word_part.__dict__[key] = cls.earlier_version for key in [ key for key, value in cls.XML_CORRECTION_DICT.items() if value.endswith('Word') ]: if cls.XML_CORRECTION_DICT[key] in word_part.__dict__.keys() and word_part.__dict__[cls.XML_CORRECTION_DICT[key]]: word_part.__dict__[key] = cls cls.overwrites_word = [ cls.create_cls(node) for node in word_node.xpath('./' + cls.XML_OVERWRITES + '/' + cls.XML_TAG)][0]\ if len(word_node.xpath('./' + cls.XML_OVERWRITES + '/' + cls.XML_TAG)) > 0\ else None cls.word_box = [ Box(node=node) for node in word_node.xpath('./' + Box.XML_TAG) ][0]\ if len(word_node.xpath('./' + Box.XML_TAG)) > 0\ else None return cls @classmethod def join_words(cls, list_of_words, add_white_space_between_words=False): """Creates a word from a list of words. [:return:] Word """ if len(list_of_words) > 1: deleted = True in [ word.deleted for word in list_of_words ]\ and len(set([ word.deleted for word in list_of_words ])) == 1 line_number = list_of_words[0].line_number\ if len(set([ word.line_number for word in list_of_words ])) == 1\ else -1 for word in list_of_words: if len(word.word_parts) > 0: index = list_of_words.index(word) list_of_words.remove(word) for part_word in reversed(word.word_parts): list_of_words.insert(index, part_word) new_word_text = ''.join([word.text for word in list_of_words])\ if not add_white_space_between_words\ else ' '.join([word.text for word in list_of_words]) new_word = cls(id=list_of_words[0].id, text=new_word_text,\ line_number=line_number, deleted=deleted, word_parts=list_of_words) if True in [ word.text.endswith('-') or word.text.endswith('=') for word in new_word.word_parts[:-1]]: change_text = [ word.text for word in new_word.word_parts[:-1] if word.text.endswith('-') or word.text.endswith('=') ][0] new_word.edited_text = new_word.text.replace(change_text, change_text[:-1]) for id, word in enumerate(new_word.word_parts): word.id = id return new_word if len(list_of_words) > 0: return list_of_words[0] else: return None def create_earlier_version(self, root_word=None, id=0): """Create an earlier version of word. """ if root_word is None: root_word = self root_word.set_parent_word_writing_process_id() word_parts = [] non_single_punctuation_word_parts = [ word_part for word_part in self.word_parts\ if not re.match(SINGLE_PUNCTUATION_PATTERN, word_part.text) ] non_single_punctuation_word_parts_length = len(non_single_punctuation_word_parts) if non_single_punctuation_word_parts_length > 0\ and len([ word_part for word_part in non_single_punctuation_word_parts\ if word_part.deleted ])\ == non_single_punctuation_word_parts_length: self.deleted = True for word_part in non_single_punctuation_word_parts: word_part.deleted = False for id, word_part in enumerate(self.word_parts): earlierWordPart = word_part.create_earlier_version(root_word=root_word, id=id) if word_part.deleted: word_part.isDeletionOfWord = earlierWordPart word_parts.append(earlierWordPart) if word_part not in self.corrections: self.corrections.append(word_part) elif word_part.overwrites_word is not None\ and ((len(word_part.transkription_positions) > 0\ and word_part.overwrites_word.transkription_positions[0].style is not None\ and word_part.transkription_positions[0].style is not None\ and word_part.transkription_positions[0].style\ != word_part.overwrites_word.transkription_positions[0].style) or word_part.word_box.earlier_version): word_part.overwrites_word.id = word_part.id word_parts.append(word_part.overwrites_word) word_part.isTransformationOfWord = word_part.overwrites_word #print(f'transform: {self.text}') if word_part not in self.corrections: self.corrections.append(word_part) elif root_word.writing_process_id > -1\ and (len(word_part.transkription_positions) > 0\ and word_part.transkription_positions[0].style is not None\ and word_part.transkription_positions[0].style.writing_process_id\ == root_word.writing_process_id): word_part.extendsEarlierVersion = True #print('extends') if word_part not in self.corrections: self.corrections.append(word_part) else: if word_part.deleted: word_part.isDeletionOfWord = earlierWordPart word_parts.append(earlierWordPart) if word_part not in self.corrections: self.corrections.append(word_part) else: #print(f'default: {self.text}') word_parts.append(earlierWordPart) text = ''.join([ word.text for word in word_parts ])\ if len(word_parts) > 0\ else self.text if len(word_parts) == 1: self.transkription_positions += word_parts[0].transkription_positions self.faksimile_positions += word_parts[0].faksimile_positions word_parts = [] new_transkription_positions = copy.deepcopy(self.transkription_positions) if len(self.transkription_positions) > 0\ and self.transkription_positions[0].style is not None: writing_process_id = self.transkription_positions[0].style.writing_process_id for new_tp in new_transkription_positions: new_tp.style.writing_process_id = writing_process_id return Word(id=id, text=text, transkription_positions=new_transkription_positions,\ faksimile_positions=self.faksimile_positions, line_number=self.line_number,\ word_parts=word_parts) def create_correction_history(self, page=None, box_style=None): """Create correction history. """ if self.word_box is not None: manuscript = self.transkription_positions[0].style.manuscript\ if len(self.transkription_positions) > 0\ and self.transkription_positions[0].style is not None\ else None style = Style() if box_style is not None: style = box_style if page is not None: style = Style.create_cls(page, self.word_box.text_style_class, manuscript=manuscript) for font_key in [ font_key for font_key in self.word_box.text_style_class.split(' ') if font_key in page.fontsizekey2stage_mapping.keys() ]: style.writing_process_id = page.fontsizekey2stage_mapping.get(font_key) transkription_positions = TranskriptionPosition.copy_list_of_cls(self.transkription_positions) for transkription_position in transkription_positions: transkription_position.style = style self.overwrites_word = Word(text=self.word_box.earlier_text, transkription_positions=transkription_positions,\ line_number=self.line_number) for word_part in self.word_parts: word_part.create_correction_history(page=page, box_style=box_style) if len(self.word_parts) > 0: earlier_version = self.create_earlier_version() extending_words = self._get_parts_with_property_key('extendsEarlierVersion') if len(extending_words) > 0: for word in extending_words: word.isExtensionOfWord = earlier_version if self.has_mixed_status('deleted', include_parts=True): self.edited_text = ''.join([ word.text for word in self.word_parts if not word.deleted ]) if len(self.corrections) > 0: self.earlier_version = earlier_version @staticmethod def CREATE_WORD(word_node=None, page=None, word_part_objs=[], id=0, height=0, endX=0, endSign=None, matrix=None, line_number=-1, debug_msg=None): """Creates a word from a (lxml.Element) node or word_part_objs. [:return:] Word """ if word_node is not None: # init word from xml node id = int(word_node.get('id')) line_number = int(word_node.get('line-number')) if bool(word_node.get('line-number')) else line_number text = word_node.get('text') deleted = bool(word_node.get('deleted')) and word_node.get('deleted') == 'true' transkription_positions = [ TranskriptionPosition(node=node) for node in word_node.findall('.//' + WordPosition.TRANSKRIPTION) ] faksimile_positions = [ WordPosition(node=node) for node in word_node.findall('.//' + WordPosition.FAKSIMILE) ] word_part_objs = [ item.attrib for item in word_node.findall('.//' + Word.DATA + '/part')]\ if len(word_node.findall('.//' + Word.DATA)) > 0\ else [ item.attrib for item in word_node.findall('.//part')] return Word(id=id, text=text, deleted=deleted, line_number=line_number, transkription_positions=transkription_positions,\ faksimile_positions=faksimile_positions, word_part_objs=word_part_objs) elif len(word_part_objs) > 0: # init word from word_part_obj that has been extracted from svg file WIDTH = 5 TOPCORRECTION = 2.0 FONTWIDTHFACTOR = 0.7 # factor that multiplies lastCharFontSize height = height x = round(float(word_part_objs[0]['x']), 3) if(page is not None and bool(page.style_dict)): HEIGHT_FACTOR = 1.1 # factor that multiplies biggest_font_size -> height style_set = set(' '.join(set( dict['class'] for dict in word_part_objs)).split(' ')) biggest_font_size = page.get_biggest_fontSize4styles(style_set=style_set) height = round(biggest_font_size * HEIGHT_FACTOR + HEIGHT_FACTOR / biggest_font_size, 3) TOPCORRECTION = 1 + HEIGHT_FACTOR / biggest_font_size if endSign is not None and '%' in endSign: lastCharFontSizeList = [ float(page.style_dict[key]['font-size'].replace('px',''))\ for key in word_part_objs[len(word_part_objs)-1]['class'].split(' ')\ if bool(page.style_dict[key].get('font-size'))] lastCharFontSize = lastCharFontSizeList[0] if len(lastCharFontSizeList) > 0 else 1 endX = float(endX) + lastCharFontSize * FONTWIDTHFACTOR elif endSign is not None and '%' in endSign: endX = float(endX) + WIDTH bottom = round(float(word_part_objs[0]['y']), 3) y = round(bottom - height + TOPCORRECTION, 3) width = round(float(endX) - x, 3) transkription_positions = [ WordPosition(height=height, width=width, x=x, y=y, matrix=matrix, tag=WordPosition.TRANSKRIPTION) ] text = ''.join([ dict['text'] for dict in word_part_objs]) line_number = page.get_line_number( (y + bottom)/2) if page is not None else line_number word = Word(id=id, text=text, line_number=line_number, transkription_positions=transkription_positions, word_part_objs=word_part_objs) word.debug_msg = debug_msg return word else: error_msg = 'word_node has not been defined' if (word_node is None) else 'word_part_objs is empty' raise Exception('Error: {}'.format(error_msg)) @classmethod def get_semantic_dictionary(cls): """ Creates and returns a semantic dictionary as specified by SemanticClass. """ dictionary = super(Word,cls).get_semantic_dictionary() dictionary[cls.PROPERTIES_KEY].update(cls.create_semantic_property_dictionary('styles', Style,\ cardinality=1, cardinality_restriction='minCardinality',\ name='wordHasStyle', label='word has style', comment='Word has an appearance that is characterized by this style.')) dictionary[cls.PROPERTIES_KEY].update(cls.create_semantic_property_dictionary('corrections', Word,\ name='wordHasCorrection', label='word has corrections', comment='Word has a correction made by the author.')) dictionary[cls.PROPERTIES_KEY].update(cls.create_semantic_property_dictionary('deletion_paths', WordDeletionPath,\ name='wordIsDeletedByPath', label='word has been deleted with a deletion path',\ comment='Word has been deleted by the author using a deletion path.')) dictionary[cls.PROPERTIES_KEY].update(cls.create_semantic_property_dictionary('editor_comment', EditorComment,\ name='wordHasEditorComment', label='word has a comment by the editors', comment='Word has been commented by the editors.')) dictionary[cls.PROPERTIES_KEY].update(cls.create_semantic_property_dictionary('earlier_version', Word,\ name='wordHasEarlierVersion', label='word has an earlier version', comment='There is a earlier version of this word.')) dictionary[cls.PROPERTIES_KEY].update(cls.create_semantic_property_dictionary('edited_text', str,\ name='hasEditedText', label='word has an edited text', comment='Word has a text that is edited automatically by removing deleted parts or hyphens.')) dictionary[cls.PROPERTIES_KEY].update(cls.create_semantic_property_dictionary('isClarificationOfWord', Word,\ name='isClarificationOfWord', label='word is a clarification of word',\ comment='The author has used this part of the word in order to clarify the appearance of that word.')) dictionary[cls.PROPERTIES_KEY].update(cls.create_semantic_property_dictionary('isDeletionOfWord', Word,\ name='isDeletionOfWord', label='word is a deletion of word',\ comment='The author has used this part of a word in order to delete the corresponding part of an earlier version of this word.')) dictionary[cls.PROPERTIES_KEY].update(cls.create_semantic_property_dictionary('isExtensionOfWord', Word,\ name='isExtensionOfWord', label='word is a extension of word',\ comment='The author has used this part of a word in order to extend an earlier version of this word.')) dictionary[cls.PROPERTIES_KEY].update(cls.create_semantic_property_dictionary('isTransformationOfWord', Word,\ name='isTransformationOfWord', label='word is a transformation of word',\ comment='The author has used this part of a word in order to transform the corresponding part of an earlier version of this word.')) dictionary[cls.PROPERTIES_KEY].update(cls.create_semantic_property_dictionary('overwrites_word', Word,\ name='overwritesWord', label='word overwrites word',\ comment='The author has used this word in order to overwrite that word.')) # This makes wordHasWordParts a subproperty of cls.HAS_HOMOTYPIC_PARTS_URL_STRING, # cls.return_dictionary_after_updating_super_classes will subclass Word under the corresponding super class. dictionary[cls.PROPERTIES_KEY].update(cls.create_semantic_property_dictionary('word_parts', list,\ name='wordHasWordParts', label='word has word parts', comment='Word consists of a list of words.',\ subPropertyOf=cls.HAS_HOMOTYPIC_PARTS_URL_STRING)) super_property_dictionary = cls.create_semantic_property_dictionary(cls.SUPER_PROPERTY, Word,\ name='isCorrectionOfWord', label='word is a correction of word',\ comment='The author has used this word in order to correct that word.') for key in cls.XML_CORRECTION_DICT.keys(): correction_dict = dictionary[cls.PROPERTIES_KEY].get(key) correction_dict.update(super_property_dictionary) dictionary[cls.PROPERTIES_KEY].update({key: correction_dict}) return cls.return_dictionary_after_updating_super_classes(dictionary) def has_mixed_status(self, property_key, include_parts=False, concerns_word=True): """Returns true if transkription_positions have mixed status concerning the property_key in their __dict__. """ if False in set(property_key in tp.__dict__.keys() for tp in self.transkription_positions): return False if len(self.word_parts) > 0 and include_parts: if concerns_word: if False in set(property_key in word.__dict__.keys() for word in self.word_parts): return False return len(set(word.__dict__[property_key] for word in self.word_parts)) > 1 else: return len(set(word.transkription_positions[0].__dict__[property_key] for word in self.word_parts\ if len(word.transkription_positions) > 0 and property_key in word.transkription_positions[0].__dict__.keys())) > 1 return len(set(tp.__dict__[property_key] for tp in self.transkription_positions )) > 1 def init_word(self, page): """Initialize word with objects from page. """ super(Word,self).init_word(page) if self.writing_process_id > -1: self.writing_processes += [ wp for wp in page.writing_processes if wp.id == self.writing_process_id ] writing_processes = self.writing_processes for word_part in self.word_parts: word_part.init_word(page) self.lines += word_part.lines self.writing_processes += word_part.writing_processes self.lines = [ line for line in set(self.lines) ] self.writing_processes = [ wp for wp in set(self.writing_processes)] if self.overwrites_word is not None: self.overwrites_word.init_word(page) if self.earlier_version is not None: if self.earlier_version.writing_process_id == -1: self.earlier_version.writing_process_id = self.writing_process_id-1 if self.earlier_version.line_number == -1: self.earlier_version.line_number = self.line_number self.earlier_version.init_word(page) def join(self, other_word, append_at_end_of_new_word=True, add_white_space_between_words=False): """Joins other_word with this word by changing the text of current word and adding other_word.transkription_positions. """ if append_at_end_of_new_word: self.text = self.text + other_word.text\ if not add_white_space_between_words\ else self.text + ' ' + other_word.text for position in other_word.transkription_positions: position.id = str(len(self.transkription_positions)) self.transkription_positions.append(position) else: self.text = other_word.text + self.text index = 0 for position in other_word.transkription_positions: self.transkription_positions.insert(index, position) index += 1 while index < len(self.transkription_positions): self.transkription_positions[index].id = str(index) index += 1 self.simplify_transkription_positions() def partition_according_to_deletion(self): """Partition a word according to its transkription_positions' deletion status ->split word and add partial words as its parts. """ if self.has_mixed_status('deleted'): transkription_positions = [] last_status = None for transkription_position in self.transkription_positions: if transkription_position.deleted != last_status\ and len(transkription_positions) > 0: newWord = Word(id=len(self.word_parts), line_number=self.line_number,\ transkription_positions=transkription_positions, deleted=last_status, writing_process_id=self.writing_process_id) self.word_parts.append(newWord) transkription_positions = [] transkription_positions.append(transkription_position) last_status = transkription_position.deleted if len(transkription_positions) > 0: newWord = Word(id=len(self.word_parts), line_number=self.line_number,\ transkription_positions=transkription_positions, deleted=last_status, writing_process_id=self.writing_process_id) self.word_parts.append(newWord) self.transkription_positions = [] self.line_number = -1 self.deleted = False elif len(self.word_parts) > 0: self.word_parts, none = execute_function_on_parts(self.word_parts, 'partition_according_to_deletion') elif not self.deleted\ and len(self.transkription_positions) > 0\ and self.transkription_positions[0].deleted: self.deleted = True def partition_according_to_writing_process_id(self): """Partition a word according to its transkription_positions' writing_process_ids ->split word and add partial words as its parts. """ if self.belongs_to_multiple_writing_processes(): last_writing_process_id = -1 transkription_positions = [] for transkription_position in self.transkription_positions: if transkription_position.writing_process_id != last_writing_process_id\ and len(transkription_positions) > 0: newWord = Word(id=len(self.word_parts), line_number=self.line_number,\ transkription_positions=transkription_positions, writing_process_id=last_writing_process_id) self.word_parts.append(newWord) transkription_positions = [] transkription_positions.append(transkription_position) last_writing_process_id = transkription_position.writing_process_id if len(transkription_positions) > 0: newWord = Word(id=len(self.word_parts), line_number=self.line_number,\ transkription_positions=transkription_positions, writing_process_id=last_writing_process_id) self.word_parts.append(newWord) self.transkription_positions = [] elif len(self.word_parts) > 0: self.word_parts, none = execute_function_on_parts(self.word_parts, 'partition_according_to_writing_process_id') if self.belongs_to_multiple_writing_processes(include_parts=True): self.writing_process_id = sorted(set([ word.writing_process_id for word in self.word_parts ]), reverse=True)[0] elif len(self.transkription_positions) > 0: self.writing_process_id = self.transkription_positions[0].writing_process_id def process_boxes(self, box_paths, tr_xmin=0.0, tr_ymin=0.0, previous_word_has_box=False): """Determines whether word is over a word box. """ word_over_box = None if len(self.word_parts) > 0: for word in self.word_parts: current_word = word.process_boxes(box_paths, tr_xmin=tr_xmin, tr_ymin=tr_ymin, previous_word_has_box=(word_over_box is not None)) if current_word is not None and current_word.word_box is not None: word_over_box = current_word else: new_tp_dict = {} for index, transkription_position in enumerate(self.transkription_positions): if previous_word_has_box and index == 0: if len(transkription_position.positional_word_parts) > 0: transkription_position.positional_word_parts[0].left += transkription_position.positional_word_parts[0].width/2 #print(f'{self.text}: {transkription_position.positional_word_parts[0].left}') else: transkription_position.left += 1 word_path = Path.create_path_from_transkription_position(transkription_position,\ tr_xmin=tr_xmin, tr_ymin=tr_ymin) containing_boxes = [ box_path for box_path in box_paths\ if word_path.is_partially_contained_by(box_path)\ or box_path.do_paths_intersect(word_path) ] if len(containing_boxes) > 0: if previous_word_has_box: print(f'{self.text}: {word_path.path.bbox()} {containing_boxes[0].path.bbox()}') self._set_box_to_transkription_position(containing_boxes[0], word_path,\ transkription_position, new_tp_dict, tr_xmin) box_paths.remove(containing_boxes[0]) for replace_tp in new_tp_dict.keys(): for tp in new_tp_dict.get(replace_tp): self.transkription_positions.insert(self.transkription_positions.index(replace_tp), tp) self.transkription_positions.remove(replace_tp) word_over_box = self._get_partial_word_over_box() update_transkription_position_ids(self) return word_over_box def set_word_insertion_mark(self, word_insertion_mark): """Sets word_insertion_mark """ self.word_insertion_mark = word_insertion_mark def set_writing_process_id_to_transkription_positions(self, page): """Determines the writing process id of the transkription_positions. """ for transkription_position in self.transkription_positions: if len(transkription_position.positional_word_parts) > 0: for font_key in transkription_position.positional_word_parts[0].style_class.split(' '): if font_key in page.fontsizekey2stage_mapping.keys(): transkription_position.writing_process_id = page.fontsizekey2stage_mapping.get(font_key) def simplify_transkription_positions(self): """Merge transkription_positions if possible. """ index = len(self.transkription_positions)-1 while index > 0\ and False not in [ 'positional_word_parts' in tp.__dict__.keys() for tp in self.transkription_positions ]: current_tp = self.transkription_positions[index] index -= 1 previous_tp = self.transkription_positions[index] if previous_tp.is_mergebale_with(current_tp): positional_word_parts = previous_tp.positional_word_parts positional_word_parts += current_tp.positional_word_parts transkription_positions = TranskriptionPosition.CREATE_TRANSKRIPTION_POSITION_LIST_FROM_PWPS(\ positional_word_parts, debug_msg_string='simplifying transkription positions', transkription_position_id=previous_tp.id) if len(transkription_positions) == 1: transkription_positions[0].writing_process_id = previous_tp.writing_process_id\ if previous_tp.writing_process_id != -1\ else current_tp.writing_process_id self.transkription_positions.pop(index+1) self.transkription_positions[index] = transkription_positions[0] #print(self.text, len(self.transkription_positions)) def split(self, split_string, start_id=0): """Splits the word and returns an 3-tuple of new words. """ previousString, currentString, nextString = self.text.partition(split_string) currentWord = None previousWord = None nextWord = None previousIndex = 0 current_id = start_id all_positional_word_parts = [] for position in self.transkription_positions: all_positional_word_parts += position.positional_word_parts if len(all_positional_word_parts) == 0: warnings.warn('ATTENTION: Word: {} {} with Strings "{}, {}, {}": there are no parts!'.format(self.id, self.text, previousString, currentString, nextString)) if len(previousString) > 0: previous_pwps = [] while previousIndex < len(all_positional_word_parts) and previousString != ''.join([ pwp.text for pwp in previous_pwps ]): previous_pwps.append(all_positional_word_parts[previousIndex]) previousIndex += 1 if previousString != ''.join([ pwp.text for pwp in previous_pwps ]): warnings.warn('ATTENTION: "{}" does not match a word_part_obj!'.format(previousString)) else: previous_transkription_positions = TranskriptionPosition.CREATE_TRANSKRIPTION_POSITION_LIST_FROM_PWPS(previous_pwps, debug_msg_string='word.split') previous_text = ''.join([ pwp.text for pwp in previous_pwps ]) previousWord = Word(text=previous_text, id=current_id, line_number=self.line_number, transkription_positions=previous_transkription_positions) previousWord.faksimile_positions = self.faksimile_positions current_id += 1 all_positional_word_parts = all_positional_word_parts[previousIndex:] if len(nextString) > 0: tmp_pwps = [] index = 0 while index < len(all_positional_word_parts) and currentString != ''.join([ pwp.text for pwp in tmp_pwps ]): tmp_pwps.append(all_positional_word_parts[index]) index += 1 if currentString != ''.join([ pwp.text for pwp in tmp_pwps ]): warnings.warn('ATTENTION: "{}" does not match a word_part_obj!'.format(currentString)) else: next_pwps = all_positional_word_parts[index:] next_transkription_positions = TranskriptionPosition.CREATE_TRANSKRIPTION_POSITION_LIST_FROM_PWPS(next_pwps, debug_msg_string='word.split') next_text = ''.join([ pwp.text for pwp in next_pwps ]) nextWord = Word(text=next_text, id=current_id+1, line_number=self.line_number, transkription_positions=next_transkription_positions) nextWord.faksimile_positions = self.faksimile_positions all_positional_word_parts = all_positional_word_parts[:index] current_transkription_positions = TranskriptionPosition.CREATE_TRANSKRIPTION_POSITION_LIST_FROM_PWPS(all_positional_word_parts, debug_msg_string='word.split') current_text = ''.join([ pwp.text for pwp in all_positional_word_parts ]) currentWord = Word(text=current_text, id=current_id, line_number=self.line_number, transkription_positions=current_transkription_positions) currentWord.faksimile_positions = self.faksimile_positions return previousWord, currentWord, nextWord def split_according_to_status(self, status, splits_are_parts=False): """Split a word according to its transkription_positions' text. :return: a list of new word.Word """ new_words = [] if self.has_mixed_status(status): last_status = None transkription_positions = [] for transkription_position in self.transkription_positions: if transkription_position.__dict__[status] != last_status\ and len(transkription_positions) > 0: new_words.append(\ self._create_new_word(transkription_positions, status, new_id=self.id+len(new_words))) transkription_positions = [] transkription_positions.append(transkription_position) last_status = transkription_position.__dict__[status] if len(transkription_positions) > 0: new_words.append(\ self._create_new_word(transkription_positions, status, new_id=self.id+len(new_words))) if splits_are_parts: self.word_parts += new_words if len(self.word_parts) > 0: self.transkription_positions = [] return new_words def undo_partitioning(self): """Undo partitioning. """ if len(self.word_parts) > 0: for word_part in self.word_parts: word_part.undo_partitioning() if self.text != ''.join([ tp.get_text() for tp in self.transkription_positions ]): self.transkription_positions += word_part.transkription_positions self.earlier_version = None self.edited_text = None self.word_box = None self.word_parts = [] self.corrections = [] self.earlier_versions = [] self.box_paths = [] def _create_new_word(self, transkription_positions, status, new_id=0): """Create a new word from self and transkription_positions. """ newWord = Word(id=new_id, transkription_positions=transkription_positions) for key in self.COPY_PROPERTY_KEY: if key != status and key in self.__dict__.keys(): newWord.__dict__[key] = self.__dict__[key] if status in self.APPEND_PROPERTY2LIST_SOURCE_TARGET_KEYS.keys(): newWord.__dict__[self.APPEND_PROPERTY2LIST_SOURCE_TARGET_KEYS[status]].append(transkription_positions[0].__dict__[status]) else: newWord.__dict__[status] = transkription_positions[0].__dict__[status] return newWord def _get_parts_with_property_key(self, property_key): """Return a list of word_parts with property == property_key. """ word_parts = [] for word_part in self.word_parts: if property_key in word_part.__dict__.keys(): word_parts.append(word_part) else: word_parts += word_part._get_parts_with_property_key(property_key) return word_parts def _get_partial_word_over_box(self): """Partition a word according to its transkription_positions' has_box ->split word and add partial words as its parts. :return: word over box or self """ word_over_box = None if self.has_mixed_status('has_box'): transkription_positions = [] last_word_box = None for transkription_position in self.transkription_positions: if transkription_position.has_box != last_word_box\ and len(transkription_positions) > 0: newWord = Word(id=len(self.word_parts), line_number=self.line_number,\ transkription_positions=transkription_positions, deleted=self.deleted, writing_process_id=self.writing_process_id) self.word_parts.append(newWord) if last_word_box is not None: word_over_box = newWord word_over_box.word_box = last_word_box transkription_positions = [] transkription_positions.append(transkription_position) last_word_box = transkription_position.has_box if len(transkription_positions) > 0: newWord = Word(id=len(self.word_parts), line_number=self.line_number,\ transkription_positions=transkription_positions, deleted=self.deleted, writing_process_id=self.writing_process_id) self.word_parts.append(newWord) if last_word_box is not None: word_over_box = newWord word_over_box.word_box = last_word_box self.transkription_positions = [] elif len(self.word_parts) > 0: #self.word_parts, word_over_box = execute_function_on_parts(self.word_parts, inspect.currentframe().f_code.co_name) #'get_partial_word_over_box') for word_part in self.word_parts: if word_over_box is None: word_over_box = word_part._get_partial_word_over_box() else: break elif len([ tp for tp in self.transkription_positions if tp.has_box is not None]) == 1: word_over_box = self word_over_box.word_box = [ tp for tp in self.transkription_positions if tp.has_box is not None][0].has_box return word_over_box def _set_box_to_transkription_position(self, box_path, word_path, transkription_position, new_transkription_positions_dictionary, tr_xmin): """Set box_path to transkription_position that is contained by box_path. Create new transkription_positions by splitting old ones if necessaryand add them to new_transkription_positions_dictionary. """ if box_path.contains_path(word_path): transkription_position.has_box = box_path elif box_path.contains_start_of_path(word_path): split_position = box_path.path.bbox()[1] - tr_xmin new_tps = transkription_position.split(split_position) if len(new_tps) == 2: new_tps[0].has_box = box_path new_transkription_positions_dictionary.update({ transkription_position: new_tps }) else: transkription_position.has_box = box_path elif box_path.contains_end_of_path(word_path): split_position = box_path.path.bbox()[0] - tr_xmin new_tps = transkription_position.split(split_position) if len(new_tps) == 2: new_tps[1].has_box = box_path new_transkription_positions_dictionary.update({ transkription_position: new_tps }) else: transkription_position.has_box = box_path else: # box_path in the middle of word_pathz split_position1 = box_path.path.bbox()[0] - tr_xmin split_position2 = box_path.path.bbox()[1] - tr_xmin new_tps = transkription_position.split(split_position1, split_position2) if len(new_tps) >= 2: new_tps[1].has_box = box_path new_transkription_positions_dictionary.update({ transkription_position: new_tps }) else: transkription_position.has_box = box_path def do_paths_intersect_saveMode(mypath1, mypath2): """Returns true if paths intersect, false if not or if there was an exception. """ try: return mypath1.path.intersect(mypath2.path, justonemode=True)\ or mypath1.is_partially_contained_by(mypath2) except AssertionError: return False Index: svgscripts/datatypes/image.py =================================================================== --- svgscripts/datatypes/image.py (revision 103) +++ svgscripts/datatypes/image.py (revision 104) @@ -1,138 +1,149 @@ #!/usr/bin/env python3 # -*- coding: utf-8 -*- """ This super class can be used to represent all image types. """ # Copyright (C) University of Basel 2019 {{{1 # # This program is free software: you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation, either version 3 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program. If not, see 1}}} __author__ = "Christian Steiner" __maintainer__ = __author__ __copyright__ = 'University of Basel' __email__ = "christian.steiner@unibas.ch" __status__ = "Development" __license__ = "GPL v3" __version__ = "0.0.1" from lxml import etree as ET from os.path import isfile import sys from .attachable_object import AttachableObject from .matrix import Matrix from .text_field import TextField sys.path.append('py2ttl') from class_spec import SemanticClass class Image(AttachableObject,SemanticClass): """ This super class represents all types of images. Args: file_name (str): name of the image file. node (lxml.etree.Element) node, containing information URL (str): URL of image file. height (float): height of image width (float): width of image text_field (.text_field.TextField) text_field on image representation """ stringKeys = [ 'file_name', 'URL', 'local_path' ] floatKeys = [ 'height', 'width' ] XML_TAG = 'image' + SECONDARY_URL = 'http://localhost:8000/' + FAKSIMILE_DIR = 'faksimiles/' def __init__(self, node=None, file_name=None, local_path=None, URL=None, height=0.0, width=0.0, matrix=None, text_field=None, tag=XML_TAG): self.text_field = text_field self.tag = tag if node is not None: self.file_name = node.get('file-name') self.local_path = node.get('local-path') self.URL = node.get('URL') self.height = float(node.get('height')) self.width = float(node.get('width')) self.transform = Matrix(node.get('transform')) if bool(node.get('transform')) and 'matrix(' in node.get('transform') else None if len(node.findall(TextField.XML_TAG)) > 0: self.text_field = TextField(node=node.find(TextField.XML_TAG)) else: self.file_name = file_name self.local_path = local_path self.URL = URL self.height = height self.width = width self.transform = matrix + self.primaryURL = self.URL + self.secondaryURL = None + if self.file_name is not None: + self.secondaryURL = self.SECONDARY_URL + self.file_name.replace('./','')\ + if self.file_name is not None and self.file_name.endswith('svg')\ + else self.SECONDARY_URL + self.FAKSIMILE_DIR + self.file_name def attach_object_to_tree(self, target_tree): """Attach object to tree. """ obj_node = target_tree.getroot().find('.//' + self.tag) \ if(len(target_tree.getroot().findall('.//' + self.tag)) > 0) \ else ET.SubElement(target_tree.getroot(), self.tag) for key in self.floatKeys: if self.__dict__[key] is not None: obj_node.set(key.replace('_','-'), str(round(self.__dict__[key], 3))) for key in self.stringKeys: if self.__dict__[key] is not None: obj_node.set(key.replace('_','-'), self.__dict__[key]) if self.transform is not None and self.transform.isRotationMatrix(): obj_node.set('transform', self.transform.toString()) if self.text_field is not None: self.text_field.attach_object_to_tree(obj_node) @classmethod def get_semantic_dictionary(cls): """ Creates and returns a semantic dictionary as specified by SemanticClass. """ dictionary = {} class_dict = cls.get_class_dictionary() properties = {} for floatKey in Image.floatKeys: properties.update(cls.create_semantic_property_dictionary(floatKey, float, cardinality=1)) properties.update(cls.create_semantic_property_dictionary('file_name', str, cardinality=1)) properties.update(cls.create_semantic_property_dictionary('text_field', TextField)) properties.update(cls.create_semantic_property_dictionary('transform', str)) - properties.update(cls.create_semantic_property_dictionary('URL', str, cardinality=1)) + properties.update(cls.create_semantic_property_dictionary('primaryURL', str, cardinality=1, subPropertyOf=cls.HAS_URL)) + properties.update(cls.create_semantic_property_dictionary('secondaryURL', str, cardinality=1, subPropertyOf=cls.HAS_URL)) dictionary.update({'class': class_dict}) dictionary.update({'properties': properties}) return dictionary class SVGImage(Image): """This class represents a svg image. """ XML_TAG = 'svg-image' + URL_PREFIX = 'http://existdb-test.dasch.swiss/exist/rest/db/storage/nietzsche/' def __init__(self, node=None, file_name=None, URL=None, height=0.0, width=0.0, text_field=None, tag=XML_TAG): if node is not None and node.tag != self.XML_TAG: file_name = node.get('file') height = float(node.get('height')) if bool(node.get('height')) else 0.0 width = float(node.get('width')) if bool(node.get('width')) else 0.0 node = None super(SVGImage, self).__init__(node=node, file_name=file_name, URL=URL,\ height=height, width=width, text_field=text_field, tag=self.XML_TAG) + self.primaryURL = self.URL_PREFIX + self.file_name.replace('./', '') def decontextualize_file_name(self, update_url=None): """Decontextualize file name. """ self.file_name = self.file_name.replace('./', '') if update_url is not None: self.URL = update_url + self.file_name # @classmethod # def get_semantic_dictionary(cls): # """ Creates and returns a semantic dictionary as specified by SemanticClass. # """ # dictionary = super(SVGImage,cls).get_semantic_dictionary() # return cls.return_dictionary_after_updating_super_classes(dictionary) Index: svgscripts/datatypes/faksimile_image.py =================================================================== --- svgscripts/datatypes/faksimile_image.py (revision 103) +++ svgscripts/datatypes/faksimile_image.py (revision 104) @@ -1,108 +1,122 @@ #!/usr/bin/env python3 # -*- coding: utf-8 -*- """ This class can be used to represent faksimile images. """ # Copyright (C) University of Basel 2019 {{{1 # # This program is free software: you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation, either version 3 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program. If not, see 1}}} __author__ = "Christian Steiner" __maintainer__ = __author__ __copyright__ = 'University of Basel' __email__ = "christian.steiner@unibas.ch" __status__ = "Development" __license__ = "GPL v3" __version__ = "0.0.1" import fnmatch from lxml import etree as ET import os from os.path import basename, dirname, isfile, realpath, sep import sys from .image import Image from .matrix import Matrix from .text_field import TextField sys.path.append('svgscripts') from local_config import FAKSIMILE_LOCATION class FaksimileImage(Image): """ This class represents a faksimile image. Args: file_name (str): name of the image file. node (lxml.etree.Element) node, containing information URL (str): URL of image file. height (float): height of image width (float): width of image x (float): x y (float): y """ XML_TAG = 'faksimile-image' - #OLD_NIETZSCHE_SOURCES_URL = 'http://www.nietzschesource.org/DFGAapi/api/page/download/' - NIETZSCHE_SOURCES_URL = 'http://www.nietzschesource.org/DFGAapi/api/page/' + NIETZSCHE_SOURCES_URL = 'http://www.nietzschesource.org/DFGAapi/api/page/download/' + NIETZSCHE_SOURCES_API_URL = 'http://www.nietzschesource.org/DFGAapi/api/page/' + NIETZSCHE_SOURCES_IMAGE_API_URL = 'http://www.nietzschesource.org/DFGAapi/images/DFGA/' def __init__(self, node=None, file_name=None, local_path=None, URL=None, height=0.0, width=0.0, x=0.0, y=0.0, matrix=None, text_field=None): super(FaksimileImage, self).__init__(node=node, file_name=file_name, URL=URL, local_path=local_path,\ height=height, width=width, matrix=matrix, text_field=text_field, tag=self.XML_TAG) self.x = x self.y = y + self.apiURL = None + self.thumbURL = None + self.mediumURL = None + if self.file_name is not None: + nsource_page_name = self.file_name.replace('.jpg','') + nsource_manuscript_name = nsource_page_name.split(',')[0] + self.apiURL = self.NIETZSCHE_SOURCES_API_URL + nsource_page_name + self.thumbURL = self.NIETZSCHE_SOURCES_IMAGE_API_URL + nsource_manuscript_name + '/mini/' + self.file_name + self.mediumURL = self.NIETZSCHE_SOURCES_IMAGE_API_URL + nsource_manuscript_name + '/medium/' + self.file_name + if self.primaryURL is not None and self.primaryURL.startswith(self.NIETZSCHE_SOURCES_API_URL): + self.apiURL = self.primaryURL + self.primaryURL = self.NIETZSCHE_SOURCES_URL + basename(self.primaryURL) def get_image_joined_with_text_field(self, text_field): """Returns a new instance of itself that has a text_field (text_field.TextField). """ return FaksimileImage(file_name=self.file_name, local_path=self.local_path, URL=self.URL, height=self.height,\ width=self.width, x=self.x, y=self.y, text_field=text_field) -# @classmethod -# def get_semantic_dictionary(cls): -# """ Creates and returns a semantic dictionary as specified by SemanticClass. -# """ -# dictionary = super(FaksimileImage,cls).get_semantic_dictionary() -# dictionary[cls.PROPERTIES_KEY].update(cls.create_semantic_property_dictionary('text_field', TextField)) -# #dictionary[cls.PROPERTIES_KEY].update(cls.create_semantic_property_dictionary('URL', str, cardinality=1)) -# return cls.return_dictionary_after_updating_super_classes(dictionary) + @classmethod + def get_semantic_dictionary(cls): + """ Creates and returns a semantic dictionary as specified by SemanticClass. + """ + dictionary = super(FaksimileImage,cls).get_semantic_dictionary() + dictionary[cls.PROPERTIES_KEY].update(cls.create_semantic_property_dictionary('apiURL', str, subPropertyOf=cls.HAS_URL)) + dictionary[cls.PROPERTIES_KEY].update(cls.create_semantic_property_dictionary('thumbURL', str, subPropertyOf=cls.HAS_URL)) + dictionary[cls.PROPERTIES_KEY].update(cls.create_semantic_property_dictionary('mediumURL', str, subPropertyOf=cls.HAS_URL)) + return cls.return_dictionary_after_updating_super_classes(dictionary) @staticmethod def CREATE_IMAGE(image_node, source_file=None): """Instantiates a FaksimileImage from a (lxml.etree.Element) image_node. """ namespaces = image_node.nsmap if len(namespaces) == 0: namespaces = { 'xlink': '' } local_path = image_node.get('{%s}href' % namespaces['xlink']) file_name = basename(local_path) if file_name != local_path and source_file is not None: local_path = realpath(dirname(source_file)) + sep + local_path local_path = realpath(local_path) if not isfile(local_path): local_path = None for path, dirs, files in os.walk(os.path.abspath(FAKSIMILE_LOCATION)): for filename in fnmatch.filter(files, file_name): local_path = os.path.join(path, filename) break URL = FaksimileImage.NIETZSCHE_SOURCES_URL + file_name.replace('.jpg','') height = float(image_node.get('height')) if bool(image_node.get('height')) else 0.0 width = float(image_node.get('width')) if bool(image_node.get('width')) else 0.0 x = float(image_node.get('x')) if bool(image_node.get('x')) else 0.0 y = float(image_node.get('y')) if bool(image_node.get('y')) else 0.0 matrix = Matrix(transform_matrix_string=image_node.get('transform'))\ if bool(image_node.get('transform'))\ else None return FaksimileImage(file_name=file_name, local_path=local_path, URL=URL, height=height, width=width, x=x, y=y, matrix=matrix) Index: tests_svgscripts/test_word.py =================================================================== --- tests_svgscripts/test_word.py (revision 103) +++ tests_svgscripts/test_word.py (revision 104) @@ -1,487 +1,494 @@ import unittest from os import sep, path import lxml.etree as ET import sys sys.path.append('svgscripts') from process_words_post_merging import reset_page, update_writing_process_ids from datatypes.box import Box from datatypes.manuscript import ArchivalManuscriptUnity from datatypes.matrix import Matrix import datatypes.page from datatypes.path import Path from datatypes.positional_word_part import PositionalWordPart from datatypes.style import Style from datatypes.transkriptionField import TranskriptionField from datatypes.transkription_position import TranskriptionPosition from datatypes.word import Word, execute_function_on_parts, update_transkription_position_ids from datatypes.word_deletion_path import WordDeletionPath from datatypes.word_position import WordPosition sys.path.append('py2ttl') from class_spec import SemanticClass class Page: def __init__(self): self.svg_file = None def get_line_number(self, input=0): return -1 def get_biggest_fontSize4styles(self, style_set={}): return 7 class TestWord(unittest.TestCase): TESTCASE = None def setUp(self): DATADIR = path.dirname(__file__) + sep + 'test_data' self.test_file = DATADIR + sep + 'N_VII_1_page009.xml' self.word_deletion_path_file = DATADIR + sep + 'N_VII_1_page138.xml' self.pdf_xml = DATADIR + sep + 'W_I_8_page125.xml' self.pdf_xml_source = DATADIR + sep + 'W_I_8_neu_125-01.svg' self.word_part_objs = [{'text': 'a' }, {'text': 'b' }, {'text': 'c' }] x = 0 for dict in self.word_part_objs: dict['class'] = 'st22' dict['x'] = x dict['y'] = 11 x += 1 mylist = {'text': 'abc', 'id': '0', 'line-number': '2', 'deleted': 'true' } word_position = TranskriptionPosition(x=0, y=1, height=10, width=10, matrix=Matrix('matrix(0.94 0.342 -0.342 0.94 0 0)')) self.transkription_positions = [ word_position ] self.word_node = ET.Element('word', attrib=mylist) word_position.attach_object_to_tree(self.word_node) x = 0 for char in mylist['text']: ET.SubElement(self.word_node, 'part', attrib={'text': char, 'x': str(x), 'y': '11', 'class': 'st22' }) x += 1 def test_add_deletion_paths(self): page = datatypes.page.Page(self.word_deletion_path_file, add_deletion_paths_to_words=False) word = [ word for word in page.words if word.text == 'AufBau'][0] #self.assertTrue(word.deleted) self.assertTrue(len(word.word_parts) > 0) self.assertTrue(word.word_parts[0].deleted) word.add_deletion_paths(page.word_deletion_paths, tr_xmin=28.347656, tr_ymin=49.921875) self.assertTrue(len(word.word_parts[0].deletion_paths) > 0) #print(word.deletion_paths) + """ + page = datatypes.page.Page('xml/Mp_XIV_page420.xml') + words = [ word for word in page.words if word.deleted or True in [ part.deleted for part in word.word_parts ]] + words[0].add_deletion_paths(page.word_deletion_paths) + print(words[0].deletion_paths) + """ + def test_join_words(self): words = [ Word(id=4, text='asdf-', line_number=1, deleted=True), Word(id=5, text='bsdf', line_number=2, deleted=False) ] new_word = Word.join_words(words) self.assertEqual(new_word.id, 4) self.assertEqual(new_word.text, 'asdf-bsdf') self.assertEqual(new_word.edited_text, 'asdfbsdf') self.assertEqual(new_word.deleted, False) self.assertEqual(new_word.line_number, -1) words = [ Word(id=1, word_parts=[Word(id=4, text='asdf-', line_number=1, deleted=True), Word(id=5, text='bsdf', line_number=2, deleted=False)]),\ Word(id=4, text='.', line_number=2, deleted=True), Word(id=5, text='.', line_number=2, deleted=False) ] new_word = Word.join_words(words) self.assertEqual(new_word.text, 'asdf-bsdf..') new_word = Word.join_words(words, add_white_space_between_words=True) self.assertEqual(new_word.text, 'asdf- bsdf . .') def test_Word_with_word_part_objs(self): word = Word.CREATE_WORD(word_part_objs=self.word_part_objs, height=10, endX=10) self.assertEqual(word.id, 0) self.assertEqual(word.transkription_positions[0].bottom, 13) self.assertEqual(word.transkription_positions[0].height, 10) self.assertEqual(word.transkription_positions[0].top, 3) self.assertEqual(word.transkription_positions[0].left, 0) self.assertEqual(word.transkription_positions[0].width, 10) self.assertEqual(word.text, 'abc') def test_Word_with_word_node(self): word = Word.create_cls(self.word_node) self.assertEqual(word.id, 0) self.assertEqual(word.deleted, True) self.assertEqual(word.transkription_positions[0].bottom, 11) self.assertEqual(word.transkription_positions[0].height, 10) self.assertEqual(word.transkription_positions[0].top, 1) self.assertEqual(word.transkription_positions[0].left, 0) self.assertEqual(word.transkription_positions[0].width, 10) self.assertEqual(word.text, 'abc') self.assertEqual(word.line_number, 2) self.assertEqual(word.transkription_positions[0].transform.isRotationMatrix(), True) def test_attach_word_to_tree(self): newWord = Word.CREATE_WORD(word_part_objs=self.word_part_objs, height=10, endX=10) empty_tree = ET.ElementTree(ET.Element('page')) newWord.attach_word_to_tree(empty_tree) for word_node in empty_tree.getroot().xpath('//word'): word = Word.CREATE_WORD(word_node=word_node) self.assertEqual(word.id, 0) self.assertEqual(word.deleted, False) self.assertEqual(word.transkription_positions[0].bottom, 13) self.assertEqual(word.transkription_positions[0].height, 10) self.assertEqual(word.transkription_positions[0].top, 3) self.assertEqual(word.transkription_positions[0].left, 0) self.assertEqual(word.transkription_positions[0].width, 10) self.assertEqual(word.text, 'abc') @unittest.skipUnless(TESTCASE is None or TESTCASE == 0, 'Not testing this case') def test_create_correction_history_case0(self): # Case 1: whole word over box box = Box(earlier_text='XYX') word = Word(text='ASDF', transkription_positions=[TranskriptionPosition()]) word.word_box = box word.create_correction_history() self.assertEqual(word.earlier_version is None, True) self.assertEqual(word.overwrites_word is not None, True) @unittest.skipUnless(TESTCASE is None or TESTCASE == 1, 'Not testing this case') def test_create_correction_history_case1(self): # Case 2: part of word over box box = Box(earlier_text='XYX') partA = Word(text='A', transkription_positions=[TranskriptionPosition()]) partA.word_box = box partB = Word(text='SDF', transkription_positions=[TranskriptionPosition()]) word = Word(text='ASDF', word_parts=[ partA, partB]) word.create_correction_history() self.assertEqual(word.earlier_version is None, True) self.assertEqual(word.word_parts[0].overwrites_word is not None, True) @unittest.skipUnless(TESTCASE is None or TESTCASE == 2, 'Not testing this case') def test_create_correction_history_case3(self): # Case 3: part of word over box, word under box is part of earlier version box = Box(earlier_text='XYX') tp0 = TranskriptionPosition() tp0.style = Style(writing_process_id=0) tp1 = TranskriptionPosition() tp1.style = Style(writing_process_id=1) partA = Word(id=0, text='Test', transkription_positions=[ tp0]) partB = Word(id=1, text='er', transkription_positions=[ tp1]) partB.word_box = box word = Word(text='Tester', writing_process_id=1, word_parts=[ partA, partB ] ) word.create_correction_history(box_style=tp0.style) self.assertEqual(word.text, 'Tester') self.assertEqual(word.earlier_version is not None, True) self.assertEqual(word.earlier_version.text, 'TestXYX') self.assertEqual(word.word_parts[1].isTransformationOfWord, word.earlier_version.word_parts[1]) @unittest.skipUnless(TESTCASE is None or TESTCASE == 3, 'Not testing this case') def test_create_correction_history_case4(self): # Case 4: part of word is deleted partA = Word(id=0, text='A', deleted=True, transkription_positions=[TranskriptionPosition()]) partB = Word(id=1, text='SDF', transkription_positions=[TranskriptionPosition()]) word = Word(text='ASDF', word_parts=[ partA, partB]) word.create_correction_history() self.assertEqual(word.earlier_version is not None, True) self.assertEqual(word.word_parts[0].isDeletionOfWord is not None, True) self.assertEqual(word.word_parts[0].isDeletionOfWord, word.earlier_version.word_parts[0]) self.assertEqual(word.edited_text, 'SDF') @unittest.skipUnless(TESTCASE is None or TESTCASE == 4, 'Not testing this case') def test_create_correction_history_case5(self): tp0 = TranskriptionPosition() tp0.style = Style(writing_process_id=0) tp1 = TranskriptionPosition() tp1.style = Style(writing_process_id=1) partA = Word(id=0, text='Test', transkription_positions=[ tp0]) partB = Word(id=1, text='er', transkription_positions=[ tp1]) word = Word(text='Tester', word_parts=[ partA, partB ] ) word.create_correction_history() self.assertEqual(word.earlier_version is not None, True) self.assertEqual(word.word_parts[1].extendsEarlierVersion, True) self.assertEqual(word.word_parts[1].isExtensionOfWord, word.earlier_version) #@unittest.skipUnless(TESTCASE is None or TESTCASE == 5, 'Not testing this case') #@unittest.skip('case tested, relies on a local xml file') def test_create_correction_history_case_full(self): page = datatypes.page.Page('xml/N_VII_1_page138.xml') manuscript = ArchivalManuscriptUnity() reset_page(page) update_writing_process_ids(page) word = [ word for word in page.words if word.text == 'Verschiedenes' and word.line_number == 4 ][0] wordAufBau = [ word for word in page.words if word.text == 'AufBau' ][0] #page.words = [ word ] page.update_styles(manuscript=manuscript, partition_according_to_styles=True) word.word_parts[0].transkription_positions[0].has_box = Box(earlier_text='v') self.assertEqual(len(word.word_parts), 2) word_over_box = word._get_partial_word_over_box() update_transkription_position_ids(word) word.create_correction_history(page) self.assertEqual(word.writing_process_id, 1) self.assertEqual(word.earlier_version is not None, True) self.assertEqual(word.earlier_version.text, 'verschiedenes') #print(word.earlier_version.id, [ (w.id, w.text) for w in word.earlier_version.word_parts ]) empty_tree = ET.ElementTree(ET.Element('page')) word_node = word.attach_word_to_tree(empty_tree) #print(ET.dump(word_node)) """ self.assertEqual(word.word_parts[0].isDeletionOfWord, word.earlier_version.word_parts[0]) self.assertEqual(word.word_parts[1].isTransformationOfWord, word.earlier_version.word_parts[1]) self.assertEqual(word.word_parts[1].overwrites_word is not None, True) """ word = wordAufBau page.words = [ word ] page.update_styles(manuscript=manuscript, partition_according_to_styles=True) word.word_parts[0].deleted = True word.word_parts[1].transkription_positions[0].has_box = Box(earlier_text='b') self.assertEqual(len(word.word_parts), 3) word_over_box = word._get_partial_word_over_box() self.assertEqual(len(word.word_parts), 3) update_transkription_position_ids(word) word.create_correction_history(page) self.assertEqual(word.writing_process_id, 2) self.assertEqual(word.earlier_version is not None, True) self.assertEqual(word.text, 'AufBau') self.assertEqual(word.edited_text, 'Bau') self.assertEqual(word.earlier_version.text, 'Aufbau') self.assertEqual(word.word_parts[0].isDeletionOfWord, word.earlier_version.word_parts[0]) self.assertEqual(word.word_parts[1].isTransformationOfWord, word.earlier_version.word_parts[1]) self.assertEqual(word.word_parts[1].overwrites_word is not None, True) empty_tree = ET.ElementTree(ET.Element('page')) word_node = word.attach_word_to_tree(empty_tree) #print(ET.dump(word_node)) newWord = Word.create_cls(word_node) #@unittest.skip('') def test_earlier_version(self): partA = Word(id=0, text='A', deleted=True, transkription_positions=[TranskriptionPosition()]) partB = Word(id=1, text='SDF', transkription_positions=[TranskriptionPosition()]) word = Word(text='ASDF', word_parts=[ partA, partB]) earlier_version = word.create_earlier_version() self.assertEqual(earlier_version is not None, True) self.assertEqual(word.word_parts[0].isDeletionOfWord is not None, True) self.assertEqual(word.word_parts[0].isDeletionOfWord, earlier_version.word_parts[0]) def test_undo_partitioning(self): tps = [] for i, xy in enumerate([ 3, 4, 5 ]): tps.append(TranskriptionPosition(id=i, x=xy, y=xy, height=10, width=10)) partA = Word(id=0, text='Auf', writing_process_id=1, deleted=True, transkription_positions=[ tps[0]]) partB = Word(id=1, text='B', writing_process_id=2, transkription_positions=[tps[1]]) partC = Word(id=2, text='au', writing_process_id=1,transkription_positions=[tps[2]]) word = Word(text='Aufbau', writing_process_id=2, word_parts=[ partA, partB, partC ] ) word.undo_partitioning() self.assertEqual(len(word.transkription_positions), len(tps)) self.assertEqual(len(word.word_parts), 0) """ page = datatypes.page.Page('xml/N_VII_1_page138.xml') word = page.words[77] word.undo_partitioning() self.assertEqual(len(word.word_parts), 0) self.assertEqual(len(word.transkription_positions), 3) update_transkription_position_ids(word) empty_tree = ET.ElementTree(ET.Element('page')) word_node = word.attach_word_to_tree(empty_tree) print(ET.dump(word_node)) """ def test_split(self): page = Page() pwps = PositionalWordPart.CREATE_SIMPLE_POSITIONAL_WORD_PART_LIST(page, self.word_part_objs) transkription_positions = TranskriptionPosition.CREATE_TRANSKRIPTION_POSITION_LIST_FROM_PWPS(pwps) word = Word(text=''.join([pwp.text for pwp in pwps]), transkription_positions=transkription_positions) previousWord, currentWord, nextWord = word.split('b') self.assertEqual(previousWord.id, 0) self.assertEqual(previousWord.text, 'a') self.assertEqual(currentWord.id, 1) self.assertEqual(nextWord.id, 2) word = Word(text=''.join([pwp.text for pwp in pwps]), transkription_positions=transkription_positions) previousWord, currentWord, nextWord = word.split('bc') self.assertEqual(previousWord.id, 0) self.assertEqual(previousWord.text, 'a') self.assertEqual(currentWord.id, 1) word = Word(text=''.join([pwp.text for pwp in pwps]), transkription_positions=transkription_positions) previousWord, currentWord, nextWord = word.split('ab', start_id=10) self.assertEqual(currentWord.id, 10) self.assertEqual(currentWord.text, 'ab') self.assertEqual(currentWord.transkription_positions[0].width, 2.1) self.assertEqual(nextWord.id, 11) self.assertEqual(nextWord.transkription_positions[0].width, 5.2) word_part_objs=[{'text': 'x', 'class':'st22', 'x': 0, 'y': 0},\ {'text': 'Insofern', 'class':'st22', 'x': 1, 'y': 0},\ {'text': 'x', 'class':'st22', 'x': 10, 'y': 0}] pwps = PositionalWordPart.CREATE_SIMPLE_POSITIONAL_WORD_PART_LIST(page, word_part_objs) transkription_positions = TranskriptionPosition.CREATE_TRANSKRIPTION_POSITION_LIST_FROM_PWPS(pwps) word = Word(text=''.join([pwp.text for pwp in pwps]), transkription_positions=transkription_positions) with self.assertWarns(Warning): previousWord, currentWord, nextWord = word.split('Insofer') word_part_objs=[{'text': 'xInsofern', 'class':'st22', 'x': 0, 'y': 0}] pwps = PositionalWordPart.CREATE_SIMPLE_POSITIONAL_WORD_PART_LIST(page, word_part_objs) transkription_positions = TranskriptionPosition.CREATE_TRANSKRIPTION_POSITION_LIST_FROM_PWPS(pwps) word = Word(text=''.join([pwp.text for pwp in pwps]), transkription_positions=transkription_positions) with self.assertWarns(Warning): previousWord, currentWord, nextWord = word.split('Insofern') def test_join(self): word = Word.CREATE_WORD(word_part_objs=self.word_part_objs, height=10, endX=10) other_word = Word.CREATE_WORD(word_part_objs=[{'text': '.', 'class':'st22', 'x': 3, 'y': 11}]) word.join(other_word, add_white_space_between_words=True) self.assertEqual(word.text, 'abc .') word = Word.CREATE_WORD(word_part_objs=self.word_part_objs, height=10, endX=10) other_word = Word.CREATE_WORD(word_part_objs=[{'text': '.', 'class':'st22', 'x': 3, 'y': 11}]) word.join(other_word) self.assertEqual(word.text, 'abc.') other_word = Word.CREATE_WORD(word_part_objs=[{'text': '.', 'class':'st22', 'x': 3, 'y': 11}]) word.join(other_word, append_at_end_of_new_word=False) self.assertEqual(word.text, '.abc.') """ tree = ET.ElementTree(ET.Element('page')) word.attach_word_to_tree(tree) print(ET.dump(tree.getroot())) """ def test_get_semanticAndDataDict(self): dictionary = Word.get_semantic_dictionary() #print(dictionary) info_dict = dictionary['properties'].get('isDeletionOfWord') self.assertEqual(SemanticClass.SUPER_PROPERTY in info_dict.keys(), True) super_info_dict = info_dict[SemanticClass.SUPER_PROPERTY] #print(info_dict[SemanticClass.SUPER_PROPERTY].get(SemanticClass.PROPERTY_NAME)) def test_simplify_transkription_positions(self): node_string = """ """ nodeA = ET.fromstring(node_string) node_string = """ """ nodeB = ET.fromstring(node_string) word = Word(text="Si", transkription_positions=[ TranskriptionPosition(node=nodeA), TranskriptionPosition(node=nodeB) ]) self.assertEqual(len(word.transkription_positions), 2) word.simplify_transkription_positions() self.assertEqual(len(word.transkription_positions), 1) word = Word(text="Si", transkription_positions=[ TranskriptionPosition(node=nodeA), TranskriptionPosition(node=nodeB) ]) word.transkription_positions[1].writing_process_id = -1 word.simplify_transkription_positions() self.assertEqual(len(word.transkription_positions), 1) self.assertEqual(word.transkription_positions[0].writing_process_id, 0) """ tree = ET.ElementTree(ET.Element('page')) word.attach_word_to_tree(tree) print(ET.dump(tree.getroot())) """ def test_partition(self): page = datatypes.page.Page(self.test_file) word = page.words[67] self.assertEqual(word.belongs_to_multiple_writing_processes(), True) word.partition_according_to_writing_process_id() self.assertEqual(len(word.word_parts), 3) self.assertEqual(word.belongs_to_multiple_writing_processes(), False) self.assertEqual(word.belongs_to_multiple_writing_processes(include_parts=True), True) empty_tree = ET.ElementTree(ET.Element('page')) word_node = word.attach_word_to_tree(empty_tree) newWord = Word.create_cls(word_node) self.assertEqual(len(newWord.word_parts), 3) #print(ET.dump(empty_tree.getroot())) def test_partition_deletion(self): page = datatypes.page.Page(self.test_file) word = page.words[67] for transkription_position in word.transkription_positions: transkription_position.deleted = transkription_position.writing_process_id == 1 self.assertEqual(word.has_mixed_status('deleted'), True) word.partition_according_to_deletion() self.assertEqual(len(word.word_parts), 3) self.assertEqual(word.has_mixed_status('deleted'), False) self.assertEqual(word.has_mixed_status('deleted', include_parts=True), True) page = datatypes.page.Page(self.test_file) word = page.words[67] word.partition_according_to_writing_process_id() #print([(word.text, word.deleted) for word in word.word_parts]) word.word_parts[1].transkription_positions[1].deleted = True word.partition_according_to_deletion() self.assertEqual(len(word.word_parts), 4) #print([(word.text, word.deleted) for word in word.word_parts]) partA = Word(text='A', deleted=True) partB = Word(text='SDF', deleted=False) word = Word(text='ASDF', word_parts=[ partA, partB]) self.assertEqual(word.has_mixed_status('deleted', include_parts=True), True) def test_execute_function_on_parts(self): page = datatypes.page.Page(self.test_file) word_parts = [ page.words[67], page.words[68] ] word_parts, none = execute_function_on_parts(word_parts, 'partition_according_to_writing_process_id') self.assertEqual(len(word_parts) == 4, True) def test_process_word_boxes(self): page = datatypes.page.Page(self.pdf_xml) page.source = self.pdf_xml_source page.update_styles(partition_according_to_styles=True) tr = TranskriptionField(page.source) box_path_d = ['M 598.11,626.565 L 603.557,626.565 L 603.557,632.565 L 598.11,632.565 L 598.11,626.565',\ 'M 557.443,683.44 L 574.182,683.44 L 574.182,694.815 L 557.443,694.815 L 557.443,683.44',\ 'M 404.193,659.565 L 407.80699999999996,659.565 L 407.80699999999996,668.94 L 404.193,668.94 L 404.193,659.565',\ 'M 587.932,634.065 L 598.318,634.065 L 598.318,643.19 L 587.932,643.19 L 587.932,634.065',\ 'M 570.443,221.315 L 576.557,221.315 L 576.557,230.065 L 570.443,230.065 L 570.443,221.315'] box_paths = [ Box(d_string=d_string, earlier_text='test') for d_string in box_path_d ] indices = [30, 277, 288, 297, 321] for word_id, index in enumerate(indices): word_over_box = page.words[index].process_boxes(box_paths, tr_xmin=tr.xmin, tr_ymin=tr.ymin) self.assertEqual(word_over_box is not None, True) self.assertEqual(word_over_box == page.words[index] or word_over_box in page.words[index].word_parts, True) #self.assertEqual(word_over_box in page.words[index].word_parts, True) def test_process_word_several_boxesOn1LIne(self): page = datatypes.page.Page(self.pdf_xml) page.source = self.pdf_xml_source for word in page.words: word.set_writing_process_id_to_transkription_positions(page) word.partition_according_to_writing_process_id() tr = TranskriptionField(page.source) box_path_d = ['M 598.11,626.565 L 603.557,626.565 L 603.557,632.565 L 598.11,632.565 L 598.11,626.565',\ 'M 557.443,683.44 L 574.182,683.44 L 574.182,694.815 L 557.443,694.815 L 557.443,683.44',\ 'M 404.193,659.565 L 407.80699999999996,659.565 L 407.80699999999996,668.94 L 404.193,668.94 L 404.193,659.565',\ 'M 587.932,634.065 L 598.318,634.065 L 598.318,643.19 L 587.932,643.19 L 587.932,634.065',\ 'M 570.443,221.315 L 576.557,221.315 L 576.557,230.065 L 570.443,230.065 L 570.443,221.315'] box_paths = [ Box(d_string=d_string, earlier_text='test') for d_string in box_path_d ] indices = [30, 277, 288, 297, 321] empty_tree = ET.ElementTree(ET.Element('page')) for word_id, index in enumerate(indices): word_over_box = page.words[index].process_boxes(box_paths, tr_xmin=tr.xmin, tr_ymin=tr.ymin) self.assertEqual(word_over_box is not None, True) def test_split_according_to_status(self): page = datatypes.page.Page(self.test_file) word = page.words[67] for transkription_position in word.transkription_positions: transkription_position.text = 'asdf'\ if transkription_position.writing_process_id == 1\ else word.text self.assertEqual(word.has_mixed_status('text'), True) new_words = word.split_according_to_status('text') #print([word.text for word in new_words ]) self.assertEqual(len(new_words) > 1, True) self.assertEqual(new_words[0].id, word.id) self.assertEqual(new_words[0].deleted, word.deleted) self.assertEqual(new_words[1].id, word.id+1) manuscript = ArchivalManuscriptUnity() page = datatypes.page.Page(self.test_file) word = page.words[67] page.words = [ word ] page.update_styles(manuscript=manuscript) new_words = word.split_according_to_status('style', splits_are_parts=True) self.assertEqual(len(word.word_parts), 3) def test__create_new_word(self): manuscript = ArchivalManuscriptUnity() page = datatypes.page.Page(self.test_file) word = page.words[67] page.words = [ word ] page.update_styles(manuscript=manuscript) newWord = word._create_new_word([ word.transkription_positions[0] ], 'style') for key in Word.COPY_PROPERTY_KEY: self.assertEqual(newWord.__dict__[key], word.__dict__[key]) self.assertEqual(len(newWord.styles), 1) def test__get_partial_word_over_box(self): word = Word(text='test', transkription_positions=[ TranskriptionPosition(id=0), TranskriptionPosition(id=1) ]) word.transkription_positions[0].has_box = Box(earlier_text='asdf') word._get_partial_word_over_box() self.assertEqual(len(word.word_parts), 2) partA = Word(id=0, text='A', transkription_positions=[TranskriptionPosition()]) partB = Word(id=1, text='SDF', transkription_positions=[TranskriptionPosition(), TranskriptionPosition(id=1)]) partB.transkription_positions[0].has_box = Box(earlier_text='asdf') word = Word(text='ASDF', word_parts=[ partA, partB]) word._get_partial_word_over_box() self.assertEqual(len(word.word_parts), 2) if __name__ == "__main__": unittest.main() Index: tests_svgscripts/test_page.py =================================================================== --- tests_svgscripts/test_page.py (revision 103) +++ tests_svgscripts/test_page.py (revision 104) @@ -1,151 +1,151 @@ import unittest from os import sep, path from os.path import isdir, isfile, dirname, basename import lxml.etree as ET import sys import sys sys.path.append('svgscripts') dir_changed = False if not isdir('datatypes'): sys.path.append(dirname(sys.path[0])) dir_changed = True from datatypes.lineNumber import LineNumber from datatypes.mark_foreign_hands import MarkForeignHands from datatypes.page import Page, STATUS_MERGED_OK, STATUS_POSTMERGED_OK from datatypes.path import Path from datatypes.text_connection_mark import TextConnectionMark from datatypes.transkriptionField import TranskriptionField from datatypes.style import Style from datatypes.writing_process import WritingProcess from datatypes.word import Word class TestPage(unittest.TestCase): def setUp(self): DATADIR = dirname(__file__) + sep + 'test_data' if not isdir(DATADIR): DATADIR = dirname(dirname(__file__)) + sep + 'test_data' self.test_file = DATADIR + sep + 'test.xml' self.test_svg_file = DATADIR + sep + 'test421.svg' self.pdf_xml = DATADIR + sep + 'W_I_8_page125.xml' self.xml_file = DATADIR + sep + 'N_VII_1_page005.xml' self.xml_fileB = DATADIR + sep + 'N_VII_1_page006.xml' self.pdf_xml_source = DATADIR + sep + 'W_I_8_neu_125-01.svg' self.test_tcm_xml = DATADIR + sep + 'N_VII_1_page001.xml' self.test_manuscript = DATADIR + sep + 'N_VII_1.xml' self.test_styles_color = DATADIR + sep + 'N_VII_1_page013.xml' def test_Page(self): page = Page(self.test_file) self.assertEqual(page.title, 'Mp XIV 1') self.assertEqual(page.number, '421') self.assertEqual(len(page.sonderzeichen_list), 2) self.assertEqual('st21' in page.sonderzeichen_list, True) self.assertEqual('st23' in page.sonderzeichen_list, True) self.assertEqual(page.style_dict['st0']['fill'], '#F8F9F8') stage0 = [ key for key, value in page.fontsizekey2stage_mapping.items() if value == 0 ] stage1 = [ key for key, value in page.fontsizekey2stage_mapping.items() if value == 1 ] stage2 = [ key for key, value in page.fontsizekey2stage_mapping.items() if value == 2 ] fontStage0 = float(page.style_dict.get(stage0[0]).get('font-size').replace('px','')) fontStage1 = float(page.style_dict.get(stage1[0]).get('font-size').replace('px','')) fontStage2 = float(page.style_dict.get(stage2[0]).get('font-size').replace('px','')) self.assertEqual(fontStage0 > fontStage1, True) self.assertEqual(fontStage1 > fontStage2, True) page = Page.create_cls(self.test_tcm_xml, create_dummy_page=True) self.assertEqual(page.number, '1') def test_get_biggest_fontSize4styles(self): page = Page(self.test_file) style_set = { 'st12', 'st2', 'st14', 'st13' } self.assertEqual(page.get_biggest_fontSize4styles(style_set=style_set), 10) def test_get_words(self): page = Page(self.test_file) words = page.words self.assertEqual(len(words), 440) self.assertEqual(words[0].text, '$') self.assertEqual(words[439].text, 'mußte!') def test_update_page_type(self): page = Page(self.pdf_xml) tf = TranskriptionField(self.pdf_xml_source) page.update_page_type(transkription_field=tf) self.assertEqual(page.page_type, Page.PAGE_VERSO) #page = Page(self.xml_fileB) #page.update_page_type() #self.assertEqual(page.page_type, Page.PAGE_RECTO) def test_update_line_number_area(self): page = Page(self.xml_file) transkription_field = TranskriptionField(page.source) page.update_line_number_area(transkription_field) self.assertEqual(transkription_field.line_number_area_width > 0, True) self.assertEqual(transkription_field.line_number_area_width < 15, True) page = Page(self.xml_fileB) transkription_field = TranskriptionField(page.source) page.update_line_number_area(transkription_field) self.assertEqual(transkription_field.line_number_area_width > 0, True) self.assertEqual(transkription_field.line_number_area_width < 15, True) def test_get_pages_from_xml_file(self): pages = Page.get_pages_from_xml_file(self.test_manuscript) self.assertEqual(len(pages), 4) self.assertEqual(pages[0].number, '5') self.assertEqual(pages[1].number, '6') pages = Page.get_pages_from_xml_file(self.test_manuscript, status_contains=STATUS_MERGED_OK) self.assertEqual(len(pages), 2) self.assertEqual(pages[0].number, '5') pages = Page.get_pages_from_xml_file(self.test_manuscript, status_contains=STATUS_MERGED_OK, status_not_contain=STATUS_POSTMERGED_OK) self.assertEqual(len(pages), 1) def test_get_semantic_dictionary(self): dictionary = Page.get_semantic_dictionary() #print(dictionary) def test_update_styles(self): page = Page(self.pdf_xml) page.words = [ word for word in page.words if word.text == 'Schopenhauer' ] page.update_styles(add_to_parents=True) self.assertEqual(len(page.words[0].styles), 1) self.assertEqual(page.words[0].styles[0].color.name, 'black') self.assertEqual(page.words[0].styles[0].font, Style.NIETSCHES_FONTS['latin']) self.assertEqual(page.words[0].styles[0].writing_instrument, Style.WRITING_INSTRUMENTS[('black',False)]) page = Page(self.test_styles_color) page.words = [ word for word in page.words if word.text == 'Versöhnlichkeit' ] page.update_styles(add_to_parents=True) self.assertEqual(len(page.words[0].styles), 1) self.assertEqual(page.words[0].styles[0].color.name, 'green') self.assertEqual(page.words[0].styles[0].font, Style.NIETSCHES_FONTS['german']) self.assertEqual(page.words[0].styles[0].writing_instrument, Style.WRITING_INSTRUMENTS[('green',False)]) self.assertEqual(page.words[0].styles[0].writing_process_id, WritingProcess.INSERTION_AND_ADDITION) page = Page(self.test_styles_color) page.words = [ word for word in page.words if word.text == 'Versöhnlichkeit' or word.text == 'gewisse' ] self.assertEqual(len(page.words), 2) word = page.words[0] word.transkription_positions += page.words[1].transkription_positions page.words = [ word ] page.update_styles(add_to_parents=True, partition_according_to_styles=True) self.assertEqual(len(page.words[0].word_parts), 2) page = Page(self.test_styles_color) page.update_styles(add_to_parents=True, create_css=True) for word in page.words: self.assertTrue(len(word.styles) > 0) for style in word.styles: self.assertTrue(len(style.css_styles) > 0) - + def test_lock(self): page = Page(self.test_tcm_xml) self.assertEqual(page.is_locked(), False) page.lock('asdf.txt') self.assertEqual(page.is_locked(), True) self.assertEqual(page.page_tree.xpath('//lock/reference-file/text()')[0], 'asdf.txt') page.unlock() self.assertEqual(page.is_locked(), False) if __name__ == "__main__": unittest.main() Index: py2ttl/class_spec.py =================================================================== --- py2ttl/class_spec.py (revision 103) +++ py2ttl/class_spec.py (revision 104) @@ -1,254 +1,255 @@ #!/usr/bin/env python3 # -*- coding: utf-8 -*- """ This is an abstract class for all classes that are semantically relevant. """ # Copyright (C) University of Basel 2019 {{{1 # # This program is free software: you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation, either version 3 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program. If not, see 1}}} __author__ = "Christian Steiner" __maintainer__ = __author__ __copyright__ = 'University of Basel' __email__ = "christian.steiner@unibas.ch" __status__ = "Development" __license__ = "GPL v3" __version__ = "0.0.1" import abc import inspect import warnings class UnSemanticClass: """ Subclasses of this class are not semantically relevant, even if their superclasses are. """ pass class SemanticClass(metaclass=abc.ABCMeta): """ This is an abstract class for all classes that are semantically relevant. """ HAS_PART = 'has_part' HAS_SEQNUM = 'has_seqnum' SINGLE_VALUE = 1 LIST = -99 CLASS_KEY = 'class' CARDINALITY = "cardinality" CARDINALITY_RESTRICTION = "cardinality_restriction" HAS_HOMOTYPIC_PARTS_URL_STRING = 'http://www.nie.org/ontology/homotypic#hasHomotypicParts' HAS_IMAGE = 'http://www.nie.org/ontology/nietzsche#hasImage' + HAS_URL = 'http://www.nie.org/ontology/nietzsche#hasUrl' HOMOTYPIC_HAS_TEXT_URL_STRING = 'http://www.nie.org/ontology/homotypic#hasText' STOFF_STYLE_HAS_CSS_URL_STRING = 'http://www.nie.org/ontology/standoff#styleHasCSS' PAGE_IS_ON_TEXTFIELD = 'http://www.nie.org/ontology/nietzsche#pageIsOnTextField' PROPERTY_NAME = "name" PROPERTY_LABEL = "label" PROPERTY_COMMENT = "comment" PROPERTIES_KEY = "properties" SUBCLASS_OF = "rdfs:subClassOf" SUBPROPERTYOF = "subPropertyOf" SUPER_CLASSES_DICT = { 'http://www.nie.org/ontology/homotypic': 'HomotypicEntity', 'http://www.nie.org/ontology/standoff': 'Style' } SUPER_PROPERTY = "super_property" THIS = "this" TYPE = "type" @classmethod def create_semantic_property_dictionary(cls, property_key, class_type, cardinality=0, cardinality_restriction='cardinality', name='', label='', comment='', subPropertyOf='') -> dict: """Create a semantic property dicitonary. Here is how to make a subproperty: Pass the IRI of the super property as subPropertyOf=IRI, be sure that base_uri of IRI (as key) and Class identifier of super class (as value) are in cls.SUPER_CLASSES_DICT, then call cls.return_dictionary_after_updating_super_classes -> it will subclass the class that owns the subproperty to the super class. :return: semantic property dicitonary (dict) """ property_content = { SemanticClass.CLASS_KEY: class_type } if cardinality > 0: property_content.update({ SemanticClass.CARDINALITY: cardinality}) property_content.update({ SemanticClass.CARDINALITY_RESTRICTION: cardinality_restriction}) if name != '': property_content.update({ SemanticClass.PROPERTY_NAME: name}) if label != '': property_content.update({ SemanticClass.PROPERTY_LABEL: label}) if comment != '': property_content.update({ SemanticClass.PROPERTY_COMMENT: comment}) if subPropertyOf != '': property_content.update({ SemanticClass.SUBPROPERTYOF: subPropertyOf}) return { property_key: property_content } @classmethod def get_class_dictionary(cls): """Creates and returns a class_dictionary with the keys cls.THIS [, cls.SUBCLASS_OF, cls.TYPE]. """ class_dict = {cls.THIS: cls } if cls.__dict__.get('OWL_EQUIVALENTCLASSES') and len(cls.OWL_EQUIVALENTCLASSES) > 0: class_dict.update({'owl:equivalentClass': cls.OWL_EQUIVALENTCLASSES }) if cls.__dict__.get('RDFS_SUBCLASSOF_LIST') and len(cls.RDFS_SUBCLASSOF_LIST) > 0: class_dict.update({cls.SUBCLASS_OF: cls.RDFS_SUBCLASSOF_LIST }) direct_super_class = inspect.getclasstree([cls],unique=True)[0][0] if issubclass(direct_super_class, SemanticClass) and direct_super_class != SemanticClass: class_dict.update({cls.TYPE: direct_super_class}) return class_dict def get_name_and_id(self): """Return an identification for object as 2-tuple. """ id = 0 if 'id' in self.__dict__.keys(): id = self.id elif 'number' in self.__dict__.keys(): id = self.number elif 'title' in self.__dict__.keys(): id = self.title.replace(' ', '_') return type(self).__name__, id def _get_list_of_type(self, list_type): """Return list of type == list_type if list is not empty. """ list_of_type = [] for object_list in [ list_obj for list_obj in self.__dict__.values()\ if type(list_obj) == list ]: if len(object_list) > 0 and type(object_list[0]) == list_type: return object_list return list_of_type def get_object_from_list_with_id(self, object_type, object_id): """Return object from list if object has id == object_id, None if not found. """ list_with_object = [ item for item in self._get_list_of_type(object_type)\ if item.id == object_id ] if len(list_with_object) > 0: return list_with_object[0] return None @classmethod def get_cls_hasPart_objectCls_dictionaries(cls, object_cls, xpath, object_seqnum_xpath=None, cardinality=0, cardinality_restriction='minCardinality'): """Return a dictionary containing the information for creating a class that can act as an intermediary between cls and a number of object_cls if object_cls has a position in a sequence of object_classes that belong to cls. """ part_name = object_cls.__name__ + 'Part' has_part_name = object_cls.__name__.lower() + 'PartHas' + object_cls.__name__ has_seqnum_name = object_cls.__name__.lower() + 'HasSeqNum' if object_seqnum_xpath is None: object_seqnum_xpath = xpath + '/@id' object_part_dictionary = { 'class': object_cls, 'cardinality': 1, 'xpath': xpath,\ 'name': has_part_name, 'label': '{0} has a {1}'.format(part_name, object_cls.__name__),\ 'comment': '{0} has a part, that is a {1}'.format(part_name, object_cls.__name__)} object_seqnum_dictionary = { 'class': int, 'cardinality': 1, 'xpath': object_seqnum_xpath,\ 'name': has_seqnum_name, 'label': '{0} has a sequence number'.format(part_name),\ 'comment': '{0} has a part, that stands in a sequence with this number'.format(part_name, object_cls.__name__)} object_dictionary = { 'class_name': part_name, SemanticClass.HAS_PART: object_part_dictionary, SemanticClass.HAS_SEQNUM: object_seqnum_dictionary,\ 'label': '{0} part'.format(object_cls.__name__.lower()),\ 'comment': 'This class servers as a intermediary between {0} and {1}. {0} has some {1} in a specific sequence.'.format(cls.__name__, object_cls.__name__)} dictionary = { 'flag': 'ordered_list' , 'class': object_dictionary, 'cardinality': cardinality, 'cardinality_restriction': cardinality_restriction, 'xpath': xpath,\ 'name': cls.__name__.lower() + 'Has' + part_name, 'label': '{0} has a part that connects it with a {1}'.format(cls.__name__, object_cls.__name__),\ 'comment': '{0} has a part that connects it with a {1}, that has a position in a sequence of {1}'.format(cls.__name__, object_cls.__name__)} return dictionary @classmethod @abc.abstractmethod def get_semantic_dictionary(cls): """Creates a semantic dictionary with cls.CLASS_KEY and cls.PROPERTIES_KEY as its keys. The class-key points to a class_dictionary with the keys: cls.THIS [, cls.SUBCLASS_OF, cls.TYPE]. Create initial dictionary using cls.get_class_dictionary(): dictionary = { cls.CLASS_KEY: cls.get_class_dictionary(), cls.PROPERTIES_KEY: {} } The properties_key points to a properties_dictionary with semantically relevant keys of self.__dict__ as keys. Use cls.create_semantic_property_dictionary(...) in order to add a property dictionary for each property as follows: dictionary[cls.PROPERTIES_KEY].update(cls.create_semantic_property_dictionary(property_key, ...)) Return dictionary by using: cls.return_dictionary_after_updating_super_classes(dictionary) """ pass def get_xml_conform_key_value_dictionary(self) -> dict: """Return a xml conform key value dictionary. """ property_d = self.get_semantic_dictionary()[self.PROPERTIES_KEY] attachable, attachable_list, builtins, builtin_list = 'attachable', 'attachable-list', 'builtins', 'builtin-list' xml_d = { attachable: {}, attachable_list: {}, builtins: {}, builtin_list: {}} for key in property_d.keys(): value = self.__dict__.get(key) if value is not None and (type(value) != list or len(value) > 0): semantic_type = property_d[key][self.CLASS_KEY]\ if type(property_d[key]) is dict\ else property_d[key][0] if type(value) != list and semantic_type.__module__ == builtins: if semantic_type == bool: xml_d[builtins].update({key.replace('_','-'): str(value).lower()}) else: xml_d[builtins].update({key.replace('_','-'): str(value)}) elif semantic_type.__module__ != builtins: attachable_key = attachable if type(value) != list else attachable_list xml_d[attachable_key].update({key.replace('_','-'): value}) else: xml_d[builtin_list].update({key.replace('_','-'): value}) return xml_d @classmethod def return_dictionary_after_updating_super_classes(cls, dictionary): """Return semantic dictionary after updating super classes if necessary. """ if cls.PROPERTIES_KEY not in dictionary.keys(): return dictionary subproperty_base_uri_set = set( value.get(cls.SUBPROPERTYOF).split('#')[0]\ for value in dictionary[cls.PROPERTIES_KEY].values()\ if bool(value.get(cls.SUBPROPERTYOF)) ) for sub_property_base in subproperty_base_uri_set: if bool(cls.SUPER_CLASSES_DICT.get(sub_property_base))\ and (\ cls.SUBCLASS_OF not in dictionary[cls.CLASS_KEY].keys()\ or len(dictionary[cls.CLASS_KEY][cls.SUBCLASS_OF]) == 0\ or len([ url for url in dictionary[cls.CLASS_KEY][cls.SUBCLASS_OF] if sub_property_base in url]) == 0\ # above instead of beneath, there might be more than one Class that share a sub_property_base. #or sub_property_base + '#' + cls.SUPER_CLASSES_DICT.get(sub_property_base) not in dictionary[cls.CLASS_KEY][cls.SUBCLASS_OF]\ ): subclass_list = dictionary[cls.CLASS_KEY][cls.SUBCLASS_OF]\ if cls.SUBCLASS_OF in dictionary[cls.CLASS_KEY].keys()\ and len(dictionary[cls.CLASS_KEY].get(cls.SUBCLASS_OF)) > 0\ else [] subclass_list.append(sub_property_base + '#' + cls.SUPER_CLASSES_DICT.get(sub_property_base)) dictionary[cls.CLASS_KEY].update({cls.SUBCLASS_OF: subclass_list}) return dictionary def __repr__(self) -> str: """Return a representation of all semantically relevant properties. """ data_string = self.__str__() return f'<{data_string}>' def __str__(self) -> str: """Return a str of all semantically relevant properties. """ name = type(self).__name__ data = [] for key in self.get_semantic_dictionary()[self.PROPERTIES_KEY].keys(): if key in self.__dict__.keys() and\ (self.__dict__[key] != None or (type(self.__dict__[key]) == list and len(self.__dict__[key]) > 0)): data.append(f'{key}: {self.__dict__[key]}') data_string = ', '.join(data) return f'{name} {data_string}' Index: tests_py2ttl/test_data/mapping_dict.xml =================================================================== --- tests_py2ttl/test_data/mapping_dict.xml (revision 103) +++ tests_py2ttl/test_data/mapping_dict.xml (revision 104) @@ -1,352 +1,355 @@ tln http://www.nie.org/ontology/nietzsche# ./tln-ontology_autogenerated.ttl http://www.nie.org/ontology/nietzsche#ArchivalManuscriptUnity http://www.nie.org/ontology/nietzsche#hasTitle http://www.nie.org/ontology/nietzsche#hasManuscriptType http://www.nie.org/ontology/nietzsche#hasStyles http://www.nie.org/ontology/nietzsche#hasPages http://www.nie.org/ontology/nietzsche#hasDescription http://www.nie.org/ontology/nietzsche#hasEarlierDescriptions http://www.nie.org/ontology/nietzsche#EditorComment http://www.nie.org/ontology/nietzsche#isUncertain http://www.nie.org/ontology/nietzsche#hasComment http://www.nie.org/ontology/nietzsche#AtypicalWriting http://www.nie.org/ontology/nietzsche#isUncertain http://www.nie.org/ontology/nietzsche#hasComment http://www.nie.org/ontology/nietzsche#atypicalWritingHasText http://www.nie.org/ontology/nietzsche#Path http://www.nie.org/ontology/nietzsche#hasDAttribute http://www.nie.org/ontology/nietzsche#Box http://www.nie.org/ontology/nietzsche#hasDAttribute http://www.nie.org/ontology/nietzsche#hasEarlierText http://www.nie.org/ontology/nietzsche#Clarification http://www.nie.org/ontology/nietzsche#isUncertain http://www.nie.org/ontology/nietzsche#hasComment http://www.nie.org/ontology/nietzsche#clarificationHasText http://www.nie.org/ontology/nietzsche#Color http://www.nie.org/ontology/nietzsche#colorHasName http://www.nie.org/ontology/nietzsche#hasHexadecimalValue http://www.nie.org/ontology/nietzsche#Text http://www.nie.org/ontology/nietzsche#textHasContent http://www.nie.org/ontology/nietzsche#textHasMarkup http://www.nie.org/ontology/nietzsche#Description http://www.nie.org/ontology/nietzsche#textHasContent http://www.nie.org/ontology/nietzsche#textHasMarkup http://www.nie.org/ontology/nietzsche#EarlierDescription http://www.nie.org/ontology/nietzsche#textHasContent http://www.nie.org/ontology/nietzsche#hasAuthor http://www.nie.org/ontology/nietzsche#hasCitation http://www.nie.org/ontology/nietzsche#textHasMarkup http://www.nie.org/ontology/nietzsche#EditorCorrection http://www.nie.org/ontology/nietzsche#isUncertain http://www.nie.org/ontology/nietzsche#hasComment http://www.nie.org/ontology/nietzsche#hasCorrectionText http://www.nie.org/ontology/nietzsche#Image http://www.nie.org/ontology/nietzsche#hasHeight http://www.nie.org/ontology/nietzsche#hasWidth http://www.nie.org/ontology/nietzsche#hasFileName http://www.nie.org/ontology/nietzsche#hasTransform - http://www.nie.org/ontology/nietzsche#hasUrl + http://www.nie.org/ontology/nietzsche#hasPrimaryurl + http://www.nie.org/ontology/nietzsche#hasSecondaryurl http://www.nie.org/ontology/nietzsche#hasTextField http://www.nie.org/ontology/nietzsche#FaksimileImage http://www.nie.org/ontology/nietzsche#hasHeight http://www.nie.org/ontology/nietzsche#hasWidth http://www.nie.org/ontology/nietzsche#hasFileName http://www.nie.org/ontology/nietzsche#hasTransform - http://www.nie.org/ontology/nietzsche#hasUrl + http://www.nie.org/ontology/nietzsche#hasPrimaryurl + http://www.nie.org/ontology/nietzsche#hasSecondaryurl + http://www.nie.org/ontology/nietzsche#hasApiurl + http://www.nie.org/ontology/nietzsche#hasThumburl + http://www.nie.org/ontology/nietzsche#hasMediumurl http://www.nie.org/ontology/nietzsche#hasTextField http://www.nie.org/ontology/nietzsche#PositionalObject http://www.nie.org/ontology/nietzsche#hasHeight http://www.nie.org/ontology/nietzsche#hasWidth http://www.nie.org/ontology/nietzsche#hasLeft http://www.nie.org/ontology/nietzsche#hasTop http://www.nie.org/ontology/nietzsche#hasBottom - http://www.nie.org/ontology/nietzsche#hasTransform + http://www.nie.org/ontology/nietzsche#hasTransform http://www.nie.org/ontology/nietzsche#WordPosition http://www.nie.org/ontology/nietzsche#hasHeight http://www.nie.org/ontology/nietzsche#hasWidth http://www.nie.org/ontology/nietzsche#hasLeft http://www.nie.org/ontology/nietzsche#hasTop http://www.nie.org/ontology/nietzsche#hasBottom - http://www.nie.org/ontology/nietzsche#hasTransform + http://www.nie.org/ontology/nietzsche#hasTransform http://www.nie.org/ontology/nietzsche#FaksimilePosition http://www.nie.org/ontology/nietzsche#hasHeight http://www.nie.org/ontology/nietzsche#hasWidth http://www.nie.org/ontology/nietzsche#hasLeft http://www.nie.org/ontology/nietzsche#hasTop http://www.nie.org/ontology/nietzsche#hasBottom - http://www.nie.org/ontology/nietzsche#hasTransform - http://www.nie.org/ontology/nietzsche#isOnFaksimileImage - http://www.nie.org/ontology/nietzsche#isOnTextField + http://www.nie.org/ontology/nietzsche#hasTransform http://www.nie.org/ontology/nietzsche#Line http://www.nie.org/ontology/nietzsche#lineHasNumber http://www.nie.org/ontology/nietzsche#lineHasBottomValueOnTranskription http://www.nie.org/ontology/nietzsche#lineHasTopValueOnTranskription http://www.nie.org/ontology/nietzsche#lineHasInnerBottomValueOnFaksimile http://www.nie.org/ontology/nietzsche#lineHasInnerTopValueOnFaksimile http://www.nie.org/ontology/nietzsche#lineHasOuterBottomValueOnFaksimile http://www.nie.org/ontology/nietzsche#lineHasOuterTopValueOnFaksimile http://www.nie.org/ontology/nietzsche#isMainLine http://www.nie.org/ontology/nietzsche#lineHasEditorComment http://www.nie.org/ontology/nietzsche#LineContinuation http://www.nie.org/ontology/nietzsche#isUncertain http://www.nie.org/ontology/nietzsche#hasComment http://www.nie.org/ontology/nietzsche#isLineAContinuationTo http://www.nie.org/ontology/nietzsche#lineContinuationHasReference http://www.nie.org/ontology/nietzsche#SimpleWord http://www.nie.org/ontology/nietzsche#hasText http://www.nie.org/ontology/nietzsche#wordBelongsToLine http://www.nie.org/ontology/nietzsche#hasTranskriptionPosition http://www.nie.org/ontology/nietzsche#hasFaksimilePosition http://www.nie.org/ontology/nietzsche#SpecialWord http://www.nie.org/ontology/nietzsche#hasText http://www.nie.org/ontology/nietzsche#wordBelongsToLine http://www.nie.org/ontology/nietzsche#hasTranskriptionPosition http://www.nie.org/ontology/nietzsche#hasFaksimilePosition http://www.nie.org/ontology/nietzsche#MarkForeignHands http://www.nie.org/ontology/nietzsche#hasText http://www.nie.org/ontology/nietzsche#textOfForeignHands http://www.nie.org/ontology/nietzsche#penOfForeignHands http://www.nie.org/ontology/nietzsche#wordBelongsToLine http://www.nie.org/ontology/nietzsche#hasTranskriptionPosition http://www.nie.org/ontology/nietzsche#hasFaksimilePosition http://www.nie.org/ontology/nietzsche#Page http://www.nie.org/ontology/nietzsche#hasNumber http://www.nie.org/ontology/nietzsche#hasOrientation http://www.nie.org/ontology/nietzsche#hasLines http://www.nie.org/ontology/nietzsche#hasMarkForeignHands http://www.nie.org/ontology/nietzsche#hasWords http://www.nie.org/ontology/nietzsche#hasWordDeletionPaths http://www.nie.org/ontology/nietzsche#hasWordInsertionMarks http://www.nie.org/ontology/nietzsche#hasFaksimileImage http://www.nie.org/ontology/nietzsche#pageIsOnSVGTextField http://www.nie.org/ontology/nietzsche#pageIsOnFaksimileTextField http://www.nie.org/ontology/nietzsche#hasSvgImage http://www.nie.org/ontology/nietzsche#Reference http://www.nie.org/ontology/nietzsche#firstLineOfReference http://www.nie.org/ontology/nietzsche#lastLineOfReference http://www.nie.org/ontology/nietzsche#wordReference http://www.nie.org/ontology/nietzsche#IsUncertain http://www.nie.org/ontology/nietzsche#hasTitle http://www.nie.org/ontology/nietzsche#hasPageNumber http://www.nie.org/ontology/nietzsche#SVGImage http://www.nie.org/ontology/nietzsche#hasHeight http://www.nie.org/ontology/nietzsche#hasWidth http://www.nie.org/ontology/nietzsche#hasFileName http://www.nie.org/ontology/nietzsche#hasTransform - http://www.nie.org/ontology/nietzsche#hasUrl + http://www.nie.org/ontology/nietzsche#hasPrimaryurl + http://www.nie.org/ontology/nietzsche#hasSecondaryurl http://www.nie.org/ontology/nietzsche#hasTextField http://www.nie.org/ontology/nietzsche#StandoffTag http://www.nie.org/ontology/nietzsche#standoffTagHasStartIndex http://www.nie.org/ontology/nietzsche#standoffTagHasEndIndex http://www.nie.org/ontology/nietzsche#standoffTagHasCSS http://www.nie.org/ontology/nietzsche#TextConnectionMark http://www.nie.org/ontology/nietzsche#hasText http://www.nie.org/ontology/nietzsche#wordBelongsToLine http://www.nie.org/ontology/nietzsche#hasTranskriptionPosition http://www.nie.org/ontology/nietzsche#hasFaksimilePosition http://www.nie.org/ontology/nietzsche#textConnectionMarkHasTextSource http://www.nie.org/ontology/nietzsche#TextField http://www.nie.org/ontology/nietzsche#hasHeight http://www.nie.org/ontology/nietzsche#hasWidth http://www.nie.org/ontology/nietzsche#hasLeft http://www.nie.org/ontology/nietzsche#hasTop http://www.nie.org/ontology/nietzsche#hasBottom - http://www.nie.org/ontology/nietzsche#hasTransform + http://www.nie.org/ontology/nietzsche#hasTransform http://www.nie.org/ontology/nietzsche#TranskriptionPosition http://www.nie.org/ontology/nietzsche#hasHeight http://www.nie.org/ontology/nietzsche#hasWidth http://www.nie.org/ontology/nietzsche#hasLeft http://www.nie.org/ontology/nietzsche#hasTop http://www.nie.org/ontology/nietzsche#hasBottom - http://www.nie.org/ontology/nietzsche#hasTransform - http://www.nie.org/ontology/nietzsche#isOnSvgImage + http://www.nie.org/ontology/nietzsche#hasTransform http://www.nie.org/ontology/nietzsche#UncertainDecipherment http://www.nie.org/ontology/nietzsche#isUncertain http://www.nie.org/ontology/nietzsche#hasComment http://www.nie.org/ontology/nietzsche#Word http://www.nie.org/ontology/nietzsche#hasText http://www.nie.org/ontology/nietzsche#hasEditedText http://www.nie.org/ontology/nietzsche#wordHasWordParts http://www.nie.org/ontology/nietzsche#wordBelongsToLine http://www.nie.org/ontology/nietzsche#hasTranskriptionPosition http://www.nie.org/ontology/nietzsche#hasFaksimilePosition http://www.nie.org/ontology/nietzsche#wordHasStyle http://www.nie.org/ontology/nietzsche#overwritesWord http://www.nie.org/ontology/nietzsche#isTransformationOfWord http://www.nie.org/ontology/nietzsche#isExtensionOfWord http://www.nie.org/ontology/nietzsche#isDeletionOfWord http://www.nie.org/ontology/nietzsche#isClarificationOfWord http://www.nie.org/ontology/nietzsche#wordHasEarlierVersion http://www.nie.org/ontology/nietzsche#wordHasCorrection http://www.nie.org/ontology/nietzsche#wordIsDeletedByPath http://www.nie.org/ontology/nietzsche#wordHasEditorComment http://www.nie.org/ontology/nietzsche#WordDeletionPath http://www.nie.org/ontology/nietzsche#hasDAttribute http://www.nie.org/ontology/nietzsche#WordInsertionMark http://www.nie.org/ontology/nietzsche#hasHeight http://www.nie.org/ontology/nietzsche#hasWidth http://www.nie.org/ontology/nietzsche#hasLeft http://www.nie.org/ontology/nietzsche#hasTop http://www.nie.org/ontology/nietzsche#hasBottom - http://www.nie.org/ontology/nietzsche#hasTransform + http://www.nie.org/ontology/nietzsche#hasTransform http://www.nie.org/ontology/nietzsche#hasMarkType http://www.nie.org/ontology/nietzsche#hasSymbolId http://www.nie.org/ontology/nietzsche#hasNextWord http://www.nie.org/ontology/nietzsche#hasPreviousWord http://www.nie.org/ontology/nietzsche#wordInsertionMarkBelongsToLine xml-dictionary - 2020-11-11 15:04:42 + 2020-12-07 10:55:12