Index: Friedrich-Nietzsche-late-work-ontology.ttl
===================================================================
--- Friedrich-Nietzsche-late-work-ontology.ttl (revision 103)
+++ Friedrich-Nietzsche-late-work-ontology.ttl (revision 104)
@@ -1,57 +1,63 @@
@prefix dct: .
@prefix document: .
@prefix homotypic: .
@prefix stoff: .
@prefix text: .
@prefix owl: .
@prefix rdfs: .
@prefix xsd: .
@prefix tln: .
a owl:Ontology;
dct:license ;
dct:title "An ontology about the collected late works of Friedrich Nietzsche"@en;
dct:description """Formal description of specific concepts in the scientific study of Friedrich Nietzsches late work."""@en;
dct:creator "Dominique Steinbach, tool coordinator/software developer, NIE-INE/digital edition of der späte Nietzsche, Basel University, Switzerland"@en;
dct:contributor "Christian Steiner, software developer, digital edition of der späte Nietzsche, University of Basel, Switzerland"@en;
dct:publisher "Basel University, Switzerland"@en.
tln:Page a owl:Class ;
rdfs:subClassOf document:Page .
tln:hasImage a owl:ObjectProperty ;
rdfs:label "relates a page to a image"@en ;
rdfs:comment "relates a page to an image that has a textfield that specifies the area where the writing that constitutes the page can be found."@en ;
rdfs:isDefinedBy ;
rdfs:domain tln:Page ;
rdfs:range tln:Image .
+tln:hasUrl a owl:DatatypeProperty ;
+ rdfs:label "has Url"@en ;
+ rdfs:domain tln:Image ;
+ rdfs:isDefinedBy ;
+ rdfs:range xsd:anyURI .
+
tln:inheritOverwritesWord a owl:ObjectProperty ;
rdfs:subPropertyOf tln:overwritesWord;
rdfs:label "word overwrites word (inherited from tln:wordHasCorrection)"@en ;
rdfs:comment "The author has used this word in order to overwrite that word."@en ;
rdfs:isDefinedBy ;
owl:propertyChainAxiom ( tln:wordHasCorrection tln:overwritesWord ).
tln:lineContinuesOn a owl:ObjectProperty ;
rdfs:label "writing from subject line continues on object line"@en ;
rdfs:comment "the writing that ends on subject line continues on object line"@en ;
rdfs:isDefinedBy ;
rdfs:domain tln:Line ;
rdfs:range tln:Line .
tln:pageIsOnTextField a owl:ObjectProperty ;
rdfs:label "page is on text field"@en ;
rdfs:comment "the writing that is referred to as subject can be found on object"@en ;
rdfs:isDefinedBy ;
rdfs:domain tln:Page ;
rdfs:range tln:TextField .
tln:writingContinuesWithWord a owl:ObjectProperty ;
rdfs:label "writing continues with next word"@en ;
rdfs:isDefinedBy ;
rdfs:domain tln:Word ;
rdfs:range tln:Word .
Index: svgscripts/datatypes/path.py
===================================================================
--- svgscripts/datatypes/path.py (revision 103)
+++ svgscripts/datatypes/path.py (revision 104)
@@ -1,200 +1,200 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
""" This super class can be used to represent all svg path types.
"""
# Copyright (C) University of Basel 2019 {{{1
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see 1}}}
__author__ = "Christian Steiner"
__maintainer__ = __author__
__copyright__ = 'University of Basel'
__email__ = "christian.steiner@unibas.ch"
__status__ = "Development"
__license__ = "GPL v3"
__version__ = "0.0.1"
from lxml import etree as ET
from os.path import isfile
from svgpathtools.parser import parse_path
from svgpathtools.path import Line
from svgpathtools.path import Path as SVGPath
import sys
from .attachable_object import AttachableObject
sys.path.append('py2ttl')
from class_spec import SemanticClass
class Path(AttachableObject,SemanticClass):
"""
This super class represents all types of svg paths.
Args:
node (lxml.etree.Element) node, containing information
path (svgpathtools.path.Path) svg path representation.
"""
XML_TAG = 'path'
WORD_DELETION_PATH_TAG = 'word-deletion-path'
BOX_TAG = 'box-path'
def __init__(self, id=0, node=None, path=None, parent_path=None, d_string=None, style_class='', tag=XML_TAG):
self.intKeys = [ 'id' ]
self.stringKeys = [ 'style_class' ]
self.floatKeys = []
self.start_line_number = -1
self.parent_path = parent_path
if node is not None:
self.id = int(node.get('id')) if bool(node.get('id')) else 0
self.path = parse_path(node.get('d')) if bool(node.get('d')) else None
self.d_attribute = node.get('d')
self.style_class = node.get('style-class')
self.tag = node.tag
else:
self.tag = tag
self.id = id
self.path = path
if self.path is None\
and d_string is not None\
and d_string != '':
self.path = parse_path(d_string)
self.d_attribute = self.path.d() if self.path is not None else ''
self.style_class = style_class
def attach_object_to_tree(self, target_tree):
"""Attach object to tree.
"""
if target_tree.__class__.__name__ == '_ElementTree':
target_tree = target_tree.getroot()
obj_node = target_tree.xpath('.//' + self.tag + '[@id="%s"]' % self.id)[0] \
if(len(target_tree.xpath('.//' + self.tag + '[@id="%s"]' % self.id)) > 0) \
else ET.SubElement(target_tree, self.tag)
for key in self.floatKeys:
if self.__dict__[key] is not None:
obj_node.set(key.replace('_','-'), str(round(self.__dict__[key], 3)))
for key in self.intKeys + self.stringKeys:
if self.__dict__[key] is not None:
obj_node.set(key.replace('_','-'), str(self.__dict__[key]))
if self.path is not None:
obj_node.set('d', self.path.d())
@classmethod
def create_cls(cls, id=0, path=None, style_class='', page=None, tag=XML_TAG, stroke_width=0.0):
"""Create and return a cls.
"""
if path is not None\
and path.start.imag <= path.end.imag\
and page is not None\
and style_class != ''\
and len(path._segments) == 1\
and type(path._segments[0]) == Line\
and ((style_class in page.style_dict.keys()\
and 'stroke-width' in page.style_dict[style_class].keys())\
or stroke_width > 0.0):
# If path is a Line and its style_class specifies a stroke-width, correct path
stroke_width_correction = float(page.style_dict[style_class]['stroke-width'])/2\
if stroke_width == 0.0\
else stroke_width
xmin = path.start.real
xmax = path.end.real
ymin = path.start.imag-stroke_width_correction
ymax = path.end.imag+stroke_width_correction
#path = parse_path(f'M {xmin}, {ymin} L {xmax}, {ymin} L {xmax}, {ymax} L {xmin}, {ymax} z')
path = SVGPath(Line(start=(complex(f'{xmin}+{ymin}j')), end=(complex(f'{xmax}+{ymin}j'))),\
Line(start=(complex(f'{xmax}+{ymin}j')), end=(complex(f'{xmax}+{ymax}j'))),\
Line(start=(complex(f'{xmax}+{ymax}j')), end=(complex(f'{xmin}+{ymax}j'))),\
Line(start=(complex(f'{xmin}+{ymax}j')), end=(complex(f'{xmin}+{ymin}j'))))
return cls(id=id, path=path, style_class=style_class, tag=tag)
def contains_path(self, other_path):
"""Returns true if other_path is contained in this path.
"""
this_xmin, this_xmax, this_ymin, this_ymax = self.path.bbox()
other_xmin, other_xmax, other_ymin, other_ymax = other_path.path.bbox()
return other_xmin >= this_xmin and other_xmax <= this_xmax\
and other_ymin >= this_ymin and other_ymax <= this_ymax
def contains_start_of_path(self, other_path):
"""Returns true if start of other_path is contained in this path.
"""
this_xmin, this_xmax, this_ymin, this_ymax = self.path.bbox()
other_xmin, other_xmax, other_ymin, other_ymax = other_path.path.bbox()
return other_xmin >= this_xmin and other_xmin < this_xmax\
and other_ymin >= this_ymin and other_ymax <= this_ymax
def contains_end_of_path(self, other_path):
"""Returns true if end of other_path is contained in this path.
"""
this_xmin, this_xmax, this_ymin, this_ymax = self.path.bbox()
other_xmin, other_xmax, other_ymin, other_ymax = other_path.path.bbox()
return other_xmax >= this_xmin and other_xmax < this_xmax\
and other_ymin >= this_ymin and other_ymax <= this_ymax
@classmethod
- def create_path_from_transkription_position(cls, transkription_position, tr_xmin=0.0, tr_ymin=0.0):
+ def create_path_from_transkription_position(cls, transkription_position, tr_xmin=0.0, tr_ymin=0.0, include_pwps=True):
"""Create a .path.Path from a .transkription_position.TranskriptionPosition.
"""
- if len(transkription_position.positional_word_parts) > 0:
+ if include_pwps and len(transkription_position.positional_word_parts) > 0:
first_pwp = transkription_position.positional_word_parts[0]
last_pwp = transkription_position.positional_word_parts[len(transkription_position.positional_word_parts)-1]
xmin = tr_xmin + first_pwp.left
xmax = tr_xmin + last_pwp.left + last_pwp.width
ymin = tr_ymin + sorted(pwp.top for pwp in transkription_position.positional_word_parts)[0]
ymax = tr_ymin + sorted([pwp.bottom for pwp in transkription_position.positional_word_parts], reverse=True)[0]
else:
xmin = tr_xmin + transkription_position.left
xmax = xmin + transkription_position.width
ymin = tr_ymin + transkription_position.top
ymax = ymin + transkription_position.height
word_path = parse_path('M {}, {} L {}, {} L {}, {} L {}, {} z'.format(xmin, ymin, xmax, ymin, xmax, ymax, xmin, ymax))
return cls(path=word_path)
def do_paths_intersect(self, other_path):
"""Returns true if paths intersect, false if not or if there was an exception.
"""
try:
return self.path.intersect(other_path.path, justonemode=True)
except AssertionError:
return False
def get_median_y(self, tr_ymin=0.0):
"""Return the median of ymin + ymax.
"""
return (self.path.bbox()[2] + self.path.bbox()[3])/2 - tr_ymin
def get_x(self, tr_xmin=0.0):
"""Return xmin.
"""
return self.path.bbox()[0] - tr_xmin
@classmethod
def get_semantic_dictionary(cls):
""" Creates and returns a semantic dictionary as specified by SemanticClass.
"""
dictionary = {}
class_dict = cls.get_class_dictionary()
properties = {'d_attribute': { 'class': str, 'cardinality': 0,\
'name': 'hasDAttribute', 'label': 'svg path has d attribute',\
'comment': 'The d attribute defines a path to be drawn.'}}
#properties.update(cls.create_semantic_property_dictionary('style_class', str))
dictionary.update({cls.CLASS_KEY: class_dict})
dictionary.update({cls.PROPERTIES_KEY: properties})
return cls.return_dictionary_after_updating_super_classes(dictionary)
def is_partially_contained_by(self, other_path):
"""Returns true if other_path containes this path partially.
"""
return other_path.contains_start_of_path(self) or other_path.contains_end_of_path(self)
Index: svgscripts/datatypes/word.py
===================================================================
--- svgscripts/datatypes/word.py (revision 103)
+++ svgscripts/datatypes/word.py (revision 104)
@@ -1,871 +1,873 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
""" This class can be used to represent a word.
"""
# Copyright (C) University of Basel 2019 {{{1
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see 1}}}
__author__ = "Christian Steiner"
__maintainer__ = __author__
__copyright__ = 'University of Basel'
__email__ = "christian.steiner@unibas.ch"
__status__ = "Development"
__license__ = "GPL v3"
__version__ = "0.0.1"
import copy
import inspect
from lxml import etree as ET
from operator import attrgetter
import re
import string
import sys
import warnings
from .box import Box
from .editor_comment import EditorComment
from .matrix import Matrix
from .path import Path
from .simple_word import SimpleWord
from .style import Style
from .word_deletion_path import WordDeletionPath
from .word_position import WordPosition
from .transkription_position import TranskriptionPosition
from .writing_process import WritingProcess
SINGLE_PUNCTUATION_PATTERN = r"^[{}–]$".format(string.punctuation)
def execute_function_on_parts(word_parts, func_name):
"""Execute function on parts and add those parts instead of original word to word_parts.
:return: new word_parts, output from func
"""
copy_parts = word_parts[:]
for word in word_parts:
output = eval('word.{0}()'.format(func_name))
if len(word.word_parts) > 0:
for part_word in word.word_parts:
copy_parts.insert(copy_parts.index(word), part_word)
copy_parts.remove(word)
word.word_parts = []
return copy_parts, output
def update_transkription_position_ids(word):
"""Update transkription_position' ids according to index.
"""
word_part_ids = [ wp.id for wp in word.word_parts ]
if len(word_part_ids) != len(set(word_part_ids)):
for id, wp in enumerate(word.word_parts):
wp.id = id
for index, transkription_position in enumerate(sorted(word.transkription_positions, key=attrgetter('left'))):
transkription_position.id = index
transkription_position.has_box = None
transkription_position.deleted = False
class Word(SimpleWord):
"""
This class represents a word.
"""
COPY_PROPERTY_KEY = [ 'line_number', 'deleted', 'writing_process_id' ]
APPEND_PROPERTY2LIST_SOURCE_TARGET_KEYS = { 'style': 'styles' }
DATA = 'debug-data'
RDFS_SUBCLASSOF_LIST = ['http://www.e-editiones.ch/ontology/text#HandwrittenText']
XML_TAG = 'word'
XML_EARLIER_VERSION = 'earlier-version'
XML_OVERWRITES = 'overwrites'
XML_CORRECTION_DICT = { 'isClarificationOfWord': 'clarifiesWord',\
'isDeletionOfWord': 'deletesEarlierPart',\
'isExtensionOfWord': 'extendsEarlierVersion',\
'isTransformationOfWord': 'transformsEarlierPart' }
def __init__(self, id=0, text='', line_number=-1, deleted=False, transkription_positions=None, faksimile_positions=None, word_part_objs=None, word_parts=None, writing_process_id=-1, earlier_version=None, box_paths=None, styles=None):
super(Word,self).__init__(id=id, text=text, line_number=line_number, transkription_positions=transkription_positions,\
faksimile_positions=faksimile_positions)
self.corrections = []
self.deleted = deleted
self.deletion_paths = []
self.debug_container = {}
self.debug_msg = None
self.earlier_version = earlier_version
self.edited_text = None
self.editor_comment = None
self.isClarificationOfWord = None
self.isDeletionOfWord = None
self.isExtensionOfWord = None
self.isTransformationOfWord = None
if len(self.text) == 0 and len(''.join([ tp.get_text() for tp in self.transkription_positions if type(tp) == TranskriptionPosition ])) > 0:
self.text = ''.join([ tp.get_text() for tp in self.transkription_positions ])
self.overwrites_word = None
self.styles = styles\
if styles is not None\
else []
self.verified = None
self.writing_process_id = writing_process_id
self.writing_processes = []
self.word_insertion_mark = None
self.word_box = None
self.word_parts = word_parts if word_parts is not None else []
self.word_part_objs = word_part_objs if word_part_objs is not None else []
def add_deletion_paths(self, deletion_paths, tr_xmin=0.0, tr_ymin=0.0):
"""Add a word deletion path to word.
"""
if len(self.word_parts) > 0:
for part in self.word_parts: part.add_deletion_paths(deletion_paths, tr_xmin=tr_xmin, tr_ymin=tr_ymin)
elif self.deleted and len(self.transkription_positions) > 0:
+ include_pwps = (len(self.transkription_positions[0].positional_word_parts) > 0
+ and abs(self.transkription_positions[0].left-self.transkription_positions[0].positional_word_parts[0].left) < 10)
word_path = Path.create_path_from_transkription_position(self.transkription_positions[0],\
- tr_xmin=tr_xmin, tr_ymin=tr_ymin)
+ tr_xmin=tr_xmin, tr_ymin=tr_ymin, include_pwps=include_pwps)
self.deletion_paths = [ deletion_path for deletion_path in deletion_paths\
if do_paths_intersect_saveMode(deletion_path, word_path) ]
def attach_word_to_tree(self, target_tree):
"""Attaches word to tree target_tree.
"""
word_node = super(Word,self).attach_word_to_tree(target_tree)
if self.deleted is not None:
word_node.set('deleted', str(self.deleted).lower())
if self.verified is not None:
word_node.set('verified', str(self.verified).lower())
if self.edited_text is not None:
word_node.set('edited-text', self.edited_text)
if self.editor_comment is not None:
self.editor_comment.attach_object_to_tree(word_node)
if self.writing_process_id > -1:
word_node.set('writing-process-id', str(self.writing_process_id))
for index, word_part in enumerate(self.word_parts):
word_part.id = index
word_part.attach_word_to_tree(word_node)
if self.earlier_version is not None:
earlier_node = ET.SubElement(word_node, self.XML_EARLIER_VERSION)
self.earlier_version.attach_word_to_tree(earlier_node)
if self.overwrites_word is not None\
and len(self.overwrites_word.transkription_positions) > 0:
overwrite_node = ET.SubElement(word_node, self.XML_OVERWRITES)
self.overwrites_word.attach_word_to_tree(overwrite_node)
if self.word_box is not None:
self.word_box.attach_object_to_tree(word_node)
if len(self.corrections) > 0:
word_node.set('corrections', ' '.join(set([ str(word.id) for word in self.corrections ])))
for key in self.XML_CORRECTION_DICT.keys():
if self.__dict__[key] is not None:
word_node.set(self.XML_CORRECTION_DICT[key], 'true')
return word_node
def belongs_to_multiple_writing_processes(self, include_parts=False):
"""Returns true if transkription_positions belong to different WritingProcesses.
"""
if len(self.word_parts) > 0 and include_parts:
return len(set(word.writing_process_id for word in self.word_parts)) > 1
return len(set(tp.writing_process_id for tp in self.transkription_positions )) > 1
def set_parent_word_writing_process_id(self):
"""Set writing_process_id for parent word.
"""
ids = set(word.transkription_positions[0].style for word in self.word_parts\
if len(word.transkription_positions) > 0 and word.transkription_positions[0].style is not None)
if len(ids) > 1:
self.writing_process_id = max([style.writing_process_id for style in ids])
if len(set(word.transkription_positions[0].style.create_a_copy_wo_writing_process_id()\
for word in self.word_parts\
if len(word.transkription_positions) > 0 and word.transkription_positions[0].style is not None))\
> 1:
self.writing_process_id += 1
@classmethod
def create_cls(cls, word_node):
"""Creates a word from a (lxml.Element) node.
[:return:] Word
"""
cls = super(Word,cls).create_cls(word_node)
cls.writing_process_id = int(word_node.get('writing-process-id')) if bool(word_node.get('writing-process-id')) else -1
cls.split_strings = None
cls.join_string = word_node.get('join')
if bool(word_node.get('split')):
cls.split_strings = word_node.get('split').split(' ')
if ''.join(cls.split_strings) != cls.text:
error_msg = 'Error in file {0}: word with id="{1}" has split attributes that do not correspond to its text attribute!\n'.\
format(word_node.getroottree().docinfo.URL, str(cls.id))\
+ 'Split attributes: "{0}".\n'.format(' '.join(cls.split_strings))\
+ 'Text attribute: "{0}".\n'.format(cls.text)
raise Exception(error_msg)
cls.verified = word_node.get('verified') == 'true'\
if bool(word_node.get('verified')) else None
cls.deleted = word_node.get('deleted') == 'true'\
if bool(word_node.get('deleted')) else None
cls.edited_text = word_node.get('edited-text')
cls.editor_comment = [ EditorComment.create_cls_from_node(node) for node in word_node.xpath('./' + EditorComment.XML_TAG) ][0]\
if len([ node for node in word_node.xpath('./' + EditorComment.XML_TAG) ]) > 0 else None
cls.word_parts = [ cls.create_cls(node) for node in word_node.xpath('./' + cls.XML_TAG) ]
if bool(word_node.get('corrections')):
for index in [ int(i) for i in word_node.get('corrections').split(' ') ]:
if index < len(cls.word_parts):
cls.corrections.append(cls.word_parts[index])
cls.earlier_version = None
if len(word_node.xpath('./' + cls.XML_EARLIER_VERSION + '/' + cls.XML_TAG)) > 0:
cls.earlier_version = [ cls.create_cls(node) for node in word_node.xpath('./' + cls.XML_EARLIER_VERSION + '/' + cls.XML_TAG) ][0]
for key_value in cls.XML_CORRECTION_DICT.values():
if word_node.get(key_value) == 'true':
cls.__dict__[key_value] = True
if cls.earlier_version is not None:
for word_part in cls.word_parts:
for key in [ key for key, value in cls.XML_CORRECTION_DICT.items() if value.endswith('Part') ]:
if cls.XML_CORRECTION_DICT[key] in word_part.__dict__.keys() and word_part.__dict__[cls.XML_CORRECTION_DICT[key]]\
and len(cls.word_parts) <= len(cls.earlier_version.word_parts):
try:
word_part.__dict__[key] = cls.earlier_version.word_parts[word_part.id]
except Exception:
msg = f'{cls.id} {cls.text}: {word_part.id}'
raise Exception(msg)
for key in [ key for key, value in cls.XML_CORRECTION_DICT.items() if value.endswith('EarlierVersion') ]:
if cls.XML_CORRECTION_DICT[key] in word_part.__dict__.keys() and word_part.__dict__[cls.XML_CORRECTION_DICT[key]]:
word_part.__dict__[key] = cls.earlier_version
for key in [ key for key, value in cls.XML_CORRECTION_DICT.items() if value.endswith('Word') ]:
if cls.XML_CORRECTION_DICT[key] in word_part.__dict__.keys() and word_part.__dict__[cls.XML_CORRECTION_DICT[key]]:
word_part.__dict__[key] = cls
cls.overwrites_word = [ cls.create_cls(node) for node in word_node.xpath('./' + cls.XML_OVERWRITES + '/' + cls.XML_TAG)][0]\
if len(word_node.xpath('./' + cls.XML_OVERWRITES + '/' + cls.XML_TAG)) > 0\
else None
cls.word_box = [ Box(node=node) for node in word_node.xpath('./' + Box.XML_TAG) ][0]\
if len(word_node.xpath('./' + Box.XML_TAG)) > 0\
else None
return cls
@classmethod
def join_words(cls, list_of_words, add_white_space_between_words=False):
"""Creates a word from a list of words.
[:return:] Word
"""
if len(list_of_words) > 1:
deleted = True in [ word.deleted for word in list_of_words ]\
and len(set([ word.deleted for word in list_of_words ])) == 1
line_number = list_of_words[0].line_number\
if len(set([ word.line_number for word in list_of_words ])) == 1\
else -1
for word in list_of_words:
if len(word.word_parts) > 0:
index = list_of_words.index(word)
list_of_words.remove(word)
for part_word in reversed(word.word_parts):
list_of_words.insert(index, part_word)
new_word_text = ''.join([word.text for word in list_of_words])\
if not add_white_space_between_words\
else ' '.join([word.text for word in list_of_words])
new_word = cls(id=list_of_words[0].id, text=new_word_text,\
line_number=line_number, deleted=deleted, word_parts=list_of_words)
if True in [ word.text.endswith('-') or word.text.endswith('=') for word in new_word.word_parts[:-1]]:
change_text = [ word.text for word in new_word.word_parts[:-1] if word.text.endswith('-') or word.text.endswith('=') ][0]
new_word.edited_text = new_word.text.replace(change_text, change_text[:-1])
for id, word in enumerate(new_word.word_parts): word.id = id
return new_word
if len(list_of_words) > 0:
return list_of_words[0]
else:
return None
def create_earlier_version(self, root_word=None, id=0):
"""Create an earlier version of word.
"""
if root_word is None:
root_word = self
root_word.set_parent_word_writing_process_id()
word_parts = []
non_single_punctuation_word_parts = [ word_part for word_part in self.word_parts\
if not re.match(SINGLE_PUNCTUATION_PATTERN, word_part.text) ]
non_single_punctuation_word_parts_length = len(non_single_punctuation_word_parts)
if non_single_punctuation_word_parts_length > 0\
and len([ word_part for word_part in non_single_punctuation_word_parts\
if word_part.deleted ])\
== non_single_punctuation_word_parts_length:
self.deleted = True
for word_part in non_single_punctuation_word_parts: word_part.deleted = False
for id, word_part in enumerate(self.word_parts):
earlierWordPart = word_part.create_earlier_version(root_word=root_word, id=id)
if word_part.deleted:
word_part.isDeletionOfWord = earlierWordPart
word_parts.append(earlierWordPart)
if word_part not in self.corrections:
self.corrections.append(word_part)
elif word_part.overwrites_word is not None\
and ((len(word_part.transkription_positions) > 0\
and word_part.overwrites_word.transkription_positions[0].style is not None\
and word_part.transkription_positions[0].style is not None\
and word_part.transkription_positions[0].style\
!= word_part.overwrites_word.transkription_positions[0].style)
or word_part.word_box.earlier_version):
word_part.overwrites_word.id = word_part.id
word_parts.append(word_part.overwrites_word)
word_part.isTransformationOfWord = word_part.overwrites_word
#print(f'transform: {self.text}')
if word_part not in self.corrections:
self.corrections.append(word_part)
elif root_word.writing_process_id > -1\
and (len(word_part.transkription_positions) > 0\
and word_part.transkription_positions[0].style is not None\
and word_part.transkription_positions[0].style.writing_process_id\
== root_word.writing_process_id):
word_part.extendsEarlierVersion = True
#print('extends')
if word_part not in self.corrections:
self.corrections.append(word_part)
else:
if word_part.deleted:
word_part.isDeletionOfWord = earlierWordPart
word_parts.append(earlierWordPart)
if word_part not in self.corrections:
self.corrections.append(word_part)
else:
#print(f'default: {self.text}')
word_parts.append(earlierWordPart)
text = ''.join([ word.text for word in word_parts ])\
if len(word_parts) > 0\
else self.text
if len(word_parts) == 1:
self.transkription_positions += word_parts[0].transkription_positions
self.faksimile_positions += word_parts[0].faksimile_positions
word_parts = []
new_transkription_positions = copy.deepcopy(self.transkription_positions)
if len(self.transkription_positions) > 0\
and self.transkription_positions[0].style is not None:
writing_process_id = self.transkription_positions[0].style.writing_process_id
for new_tp in new_transkription_positions:
new_tp.style.writing_process_id = writing_process_id
return Word(id=id, text=text, transkription_positions=new_transkription_positions,\
faksimile_positions=self.faksimile_positions, line_number=self.line_number,\
word_parts=word_parts)
def create_correction_history(self, page=None, box_style=None):
"""Create correction history.
"""
if self.word_box is not None:
manuscript = self.transkription_positions[0].style.manuscript\
if len(self.transkription_positions) > 0\
and self.transkription_positions[0].style is not None\
else None
style = Style()
if box_style is not None:
style = box_style
if page is not None:
style = Style.create_cls(page, self.word_box.text_style_class, manuscript=manuscript)
for font_key in [ font_key for font_key in self.word_box.text_style_class.split(' ') if font_key in page.fontsizekey2stage_mapping.keys() ]:
style.writing_process_id = page.fontsizekey2stage_mapping.get(font_key)
transkription_positions = TranskriptionPosition.copy_list_of_cls(self.transkription_positions)
for transkription_position in transkription_positions:
transkription_position.style = style
self.overwrites_word = Word(text=self.word_box.earlier_text, transkription_positions=transkription_positions,\
line_number=self.line_number)
for word_part in self.word_parts:
word_part.create_correction_history(page=page, box_style=box_style)
if len(self.word_parts) > 0:
earlier_version = self.create_earlier_version()
extending_words = self._get_parts_with_property_key('extendsEarlierVersion')
if len(extending_words) > 0:
for word in extending_words:
word.isExtensionOfWord = earlier_version
if self.has_mixed_status('deleted', include_parts=True):
self.edited_text = ''.join([ word.text for word in self.word_parts if not word.deleted ])
if len(self.corrections) > 0:
self.earlier_version = earlier_version
@staticmethod
def CREATE_WORD(word_node=None, page=None, word_part_objs=[], id=0, height=0, endX=0, endSign=None, matrix=None, line_number=-1, debug_msg=None):
"""Creates a word from a (lxml.Element) node or word_part_objs.
[:return:] Word
"""
if word_node is not None: # init word from xml node
id = int(word_node.get('id'))
line_number = int(word_node.get('line-number')) if bool(word_node.get('line-number')) else line_number
text = word_node.get('text')
deleted = bool(word_node.get('deleted')) and word_node.get('deleted') == 'true'
transkription_positions = [ TranskriptionPosition(node=node) for node in word_node.findall('.//' + WordPosition.TRANSKRIPTION) ]
faksimile_positions = [ WordPosition(node=node) for node in word_node.findall('.//' + WordPosition.FAKSIMILE) ]
word_part_objs = [ item.attrib for item in word_node.findall('.//' + Word.DATA + '/part')]\
if len(word_node.findall('.//' + Word.DATA)) > 0\
else [ item.attrib for item in word_node.findall('.//part')]
return Word(id=id, text=text, deleted=deleted, line_number=line_number, transkription_positions=transkription_positions,\
faksimile_positions=faksimile_positions, word_part_objs=word_part_objs)
elif len(word_part_objs) > 0: # init word from word_part_obj that has been extracted from svg file
WIDTH = 5
TOPCORRECTION = 2.0
FONTWIDTHFACTOR = 0.7 # factor that multiplies lastCharFontSize
height = height
x = round(float(word_part_objs[0]['x']), 3)
if(page is not None and bool(page.style_dict)):
HEIGHT_FACTOR = 1.1 # factor that multiplies biggest_font_size -> height
style_set = set(' '.join(set( dict['class'] for dict in word_part_objs)).split(' '))
biggest_font_size = page.get_biggest_fontSize4styles(style_set=style_set)
height = round(biggest_font_size * HEIGHT_FACTOR + HEIGHT_FACTOR / biggest_font_size, 3)
TOPCORRECTION = 1 + HEIGHT_FACTOR / biggest_font_size
if endSign is not None and '%' in endSign:
lastCharFontSizeList = [ float(page.style_dict[key]['font-size'].replace('px',''))\
for key in word_part_objs[len(word_part_objs)-1]['class'].split(' ')\
if bool(page.style_dict[key].get('font-size'))]
lastCharFontSize = lastCharFontSizeList[0] if len(lastCharFontSizeList) > 0 else 1
endX = float(endX) + lastCharFontSize * FONTWIDTHFACTOR
elif endSign is not None and '%' in endSign:
endX = float(endX) + WIDTH
bottom = round(float(word_part_objs[0]['y']), 3)
y = round(bottom - height + TOPCORRECTION, 3)
width = round(float(endX) - x, 3)
transkription_positions = [ WordPosition(height=height, width=width, x=x, y=y, matrix=matrix, tag=WordPosition.TRANSKRIPTION) ]
text = ''.join([ dict['text'] for dict in word_part_objs])
line_number = page.get_line_number( (y + bottom)/2) if page is not None else line_number
word = Word(id=id, text=text, line_number=line_number, transkription_positions=transkription_positions, word_part_objs=word_part_objs)
word.debug_msg = debug_msg
return word
else:
error_msg = 'word_node has not been defined' if (word_node is None) else 'word_part_objs is empty'
raise Exception('Error: {}'.format(error_msg))
@classmethod
def get_semantic_dictionary(cls):
""" Creates and returns a semantic dictionary as specified by SemanticClass.
"""
dictionary = super(Word,cls).get_semantic_dictionary()
dictionary[cls.PROPERTIES_KEY].update(cls.create_semantic_property_dictionary('styles', Style,\
cardinality=1, cardinality_restriction='minCardinality',\
name='wordHasStyle', label='word has style', comment='Word has an appearance that is characterized by this style.'))
dictionary[cls.PROPERTIES_KEY].update(cls.create_semantic_property_dictionary('corrections', Word,\
name='wordHasCorrection', label='word has corrections', comment='Word has a correction made by the author.'))
dictionary[cls.PROPERTIES_KEY].update(cls.create_semantic_property_dictionary('deletion_paths', WordDeletionPath,\
name='wordIsDeletedByPath', label='word has been deleted with a deletion path',\
comment='Word has been deleted by the author using a deletion path.'))
dictionary[cls.PROPERTIES_KEY].update(cls.create_semantic_property_dictionary('editor_comment', EditorComment,\
name='wordHasEditorComment', label='word has a comment by the editors', comment='Word has been commented by the editors.'))
dictionary[cls.PROPERTIES_KEY].update(cls.create_semantic_property_dictionary('earlier_version', Word,\
name='wordHasEarlierVersion', label='word has an earlier version', comment='There is a earlier version of this word.'))
dictionary[cls.PROPERTIES_KEY].update(cls.create_semantic_property_dictionary('edited_text', str,\
name='hasEditedText', label='word has an edited text', comment='Word has a text that is edited automatically by removing deleted parts or hyphens.'))
dictionary[cls.PROPERTIES_KEY].update(cls.create_semantic_property_dictionary('isClarificationOfWord', Word,\
name='isClarificationOfWord', label='word is a clarification of word',\
comment='The author has used this part of the word in order to clarify the appearance of that word.'))
dictionary[cls.PROPERTIES_KEY].update(cls.create_semantic_property_dictionary('isDeletionOfWord', Word,\
name='isDeletionOfWord', label='word is a deletion of word',\
comment='The author has used this part of a word in order to delete the corresponding part of an earlier version of this word.'))
dictionary[cls.PROPERTIES_KEY].update(cls.create_semantic_property_dictionary('isExtensionOfWord', Word,\
name='isExtensionOfWord', label='word is a extension of word',\
comment='The author has used this part of a word in order to extend an earlier version of this word.'))
dictionary[cls.PROPERTIES_KEY].update(cls.create_semantic_property_dictionary('isTransformationOfWord', Word,\
name='isTransformationOfWord', label='word is a transformation of word',\
comment='The author has used this part of a word in order to transform the corresponding part of an earlier version of this word.'))
dictionary[cls.PROPERTIES_KEY].update(cls.create_semantic_property_dictionary('overwrites_word', Word,\
name='overwritesWord', label='word overwrites word',\
comment='The author has used this word in order to overwrite that word.'))
# This makes wordHasWordParts a subproperty of cls.HAS_HOMOTYPIC_PARTS_URL_STRING,
# cls.return_dictionary_after_updating_super_classes will subclass Word under the corresponding super class.
dictionary[cls.PROPERTIES_KEY].update(cls.create_semantic_property_dictionary('word_parts', list,\
name='wordHasWordParts', label='word has word parts', comment='Word consists of a list of words.',\
subPropertyOf=cls.HAS_HOMOTYPIC_PARTS_URL_STRING))
super_property_dictionary = cls.create_semantic_property_dictionary(cls.SUPER_PROPERTY, Word,\
name='isCorrectionOfWord', label='word is a correction of word',\
comment='The author has used this word in order to correct that word.')
for key in cls.XML_CORRECTION_DICT.keys():
correction_dict = dictionary[cls.PROPERTIES_KEY].get(key)
correction_dict.update(super_property_dictionary)
dictionary[cls.PROPERTIES_KEY].update({key: correction_dict})
return cls.return_dictionary_after_updating_super_classes(dictionary)
def has_mixed_status(self, property_key, include_parts=False, concerns_word=True):
"""Returns true if transkription_positions have mixed status concerning the property_key in their __dict__.
"""
if False in set(property_key in tp.__dict__.keys() for tp in self.transkription_positions):
return False
if len(self.word_parts) > 0 and include_parts:
if concerns_word:
if False in set(property_key in word.__dict__.keys() for word in self.word_parts):
return False
return len(set(word.__dict__[property_key] for word in self.word_parts)) > 1
else:
return len(set(word.transkription_positions[0].__dict__[property_key] for word in self.word_parts\
if len(word.transkription_positions) > 0 and property_key in word.transkription_positions[0].__dict__.keys())) > 1
return len(set(tp.__dict__[property_key] for tp in self.transkription_positions )) > 1
def init_word(self, page):
"""Initialize word with objects from page.
"""
super(Word,self).init_word(page)
if self.writing_process_id > -1:
self.writing_processes += [ wp for wp in page.writing_processes if wp.id == self.writing_process_id ]
writing_processes = self.writing_processes
for word_part in self.word_parts:
word_part.init_word(page)
self.lines += word_part.lines
self.writing_processes += word_part.writing_processes
self.lines = [ line for line in set(self.lines) ]
self.writing_processes = [ wp for wp in set(self.writing_processes)]
if self.overwrites_word is not None:
self.overwrites_word.init_word(page)
if self.earlier_version is not None:
if self.earlier_version.writing_process_id == -1:
self.earlier_version.writing_process_id = self.writing_process_id-1
if self.earlier_version.line_number == -1:
self.earlier_version.line_number = self.line_number
self.earlier_version.init_word(page)
def join(self, other_word, append_at_end_of_new_word=True, add_white_space_between_words=False):
"""Joins other_word with this word by changing the text of current word and adding other_word.transkription_positions.
"""
if append_at_end_of_new_word:
self.text = self.text + other_word.text\
if not add_white_space_between_words\
else self.text + ' ' + other_word.text
for position in other_word.transkription_positions:
position.id = str(len(self.transkription_positions))
self.transkription_positions.append(position)
else:
self.text = other_word.text + self.text
index = 0
for position in other_word.transkription_positions:
self.transkription_positions.insert(index, position)
index += 1
while index < len(self.transkription_positions):
self.transkription_positions[index].id = str(index)
index += 1
self.simplify_transkription_positions()
def partition_according_to_deletion(self):
"""Partition a word according to its transkription_positions' deletion status
->split word and add partial words as its parts.
"""
if self.has_mixed_status('deleted'):
transkription_positions = []
last_status = None
for transkription_position in self.transkription_positions:
if transkription_position.deleted != last_status\
and len(transkription_positions) > 0:
newWord = Word(id=len(self.word_parts), line_number=self.line_number,\
transkription_positions=transkription_positions, deleted=last_status, writing_process_id=self.writing_process_id)
self.word_parts.append(newWord)
transkription_positions = []
transkription_positions.append(transkription_position)
last_status = transkription_position.deleted
if len(transkription_positions) > 0:
newWord = Word(id=len(self.word_parts), line_number=self.line_number,\
transkription_positions=transkription_positions, deleted=last_status, writing_process_id=self.writing_process_id)
self.word_parts.append(newWord)
self.transkription_positions = []
self.line_number = -1
self.deleted = False
elif len(self.word_parts) > 0:
self.word_parts, none = execute_function_on_parts(self.word_parts, 'partition_according_to_deletion')
elif not self.deleted\
and len(self.transkription_positions) > 0\
and self.transkription_positions[0].deleted:
self.deleted = True
def partition_according_to_writing_process_id(self):
"""Partition a word according to its transkription_positions' writing_process_ids
->split word and add partial words as its parts.
"""
if self.belongs_to_multiple_writing_processes():
last_writing_process_id = -1
transkription_positions = []
for transkription_position in self.transkription_positions:
if transkription_position.writing_process_id != last_writing_process_id\
and len(transkription_positions) > 0:
newWord = Word(id=len(self.word_parts), line_number=self.line_number,\
transkription_positions=transkription_positions, writing_process_id=last_writing_process_id)
self.word_parts.append(newWord)
transkription_positions = []
transkription_positions.append(transkription_position)
last_writing_process_id = transkription_position.writing_process_id
if len(transkription_positions) > 0:
newWord = Word(id=len(self.word_parts), line_number=self.line_number,\
transkription_positions=transkription_positions, writing_process_id=last_writing_process_id)
self.word_parts.append(newWord)
self.transkription_positions = []
elif len(self.word_parts) > 0:
self.word_parts, none = execute_function_on_parts(self.word_parts, 'partition_according_to_writing_process_id')
if self.belongs_to_multiple_writing_processes(include_parts=True):
self.writing_process_id = sorted(set([ word.writing_process_id for word in self.word_parts ]), reverse=True)[0]
elif len(self.transkription_positions) > 0:
self.writing_process_id = self.transkription_positions[0].writing_process_id
def process_boxes(self, box_paths, tr_xmin=0.0, tr_ymin=0.0, previous_word_has_box=False):
"""Determines whether word is over a word box.
"""
word_over_box = None
if len(self.word_parts) > 0:
for word in self.word_parts:
current_word = word.process_boxes(box_paths, tr_xmin=tr_xmin, tr_ymin=tr_ymin, previous_word_has_box=(word_over_box is not None))
if current_word is not None and current_word.word_box is not None:
word_over_box = current_word
else:
new_tp_dict = {}
for index, transkription_position in enumerate(self.transkription_positions):
if previous_word_has_box and index == 0:
if len(transkription_position.positional_word_parts) > 0:
transkription_position.positional_word_parts[0].left += transkription_position.positional_word_parts[0].width/2
#print(f'{self.text}: {transkription_position.positional_word_parts[0].left}')
else:
transkription_position.left += 1
word_path = Path.create_path_from_transkription_position(transkription_position,\
tr_xmin=tr_xmin, tr_ymin=tr_ymin)
containing_boxes = [ box_path for box_path in box_paths\
if word_path.is_partially_contained_by(box_path)\
or box_path.do_paths_intersect(word_path) ]
if len(containing_boxes) > 0:
if previous_word_has_box:
print(f'{self.text}: {word_path.path.bbox()} {containing_boxes[0].path.bbox()}')
self._set_box_to_transkription_position(containing_boxes[0], word_path,\
transkription_position, new_tp_dict, tr_xmin)
box_paths.remove(containing_boxes[0])
for replace_tp in new_tp_dict.keys():
for tp in new_tp_dict.get(replace_tp):
self.transkription_positions.insert(self.transkription_positions.index(replace_tp), tp)
self.transkription_positions.remove(replace_tp)
word_over_box = self._get_partial_word_over_box()
update_transkription_position_ids(self)
return word_over_box
def set_word_insertion_mark(self, word_insertion_mark):
"""Sets word_insertion_mark
"""
self.word_insertion_mark = word_insertion_mark
def set_writing_process_id_to_transkription_positions(self, page):
"""Determines the writing process id of the transkription_positions.
"""
for transkription_position in self.transkription_positions:
if len(transkription_position.positional_word_parts) > 0:
for font_key in transkription_position.positional_word_parts[0].style_class.split(' '):
if font_key in page.fontsizekey2stage_mapping.keys():
transkription_position.writing_process_id = page.fontsizekey2stage_mapping.get(font_key)
def simplify_transkription_positions(self):
"""Merge transkription_positions if possible.
"""
index = len(self.transkription_positions)-1
while index > 0\
and False not in [ 'positional_word_parts' in tp.__dict__.keys() for tp in self.transkription_positions ]:
current_tp = self.transkription_positions[index]
index -= 1
previous_tp = self.transkription_positions[index]
if previous_tp.is_mergebale_with(current_tp):
positional_word_parts = previous_tp.positional_word_parts
positional_word_parts += current_tp.positional_word_parts
transkription_positions = TranskriptionPosition.CREATE_TRANSKRIPTION_POSITION_LIST_FROM_PWPS(\
positional_word_parts, debug_msg_string='simplifying transkription positions', transkription_position_id=previous_tp.id)
if len(transkription_positions) == 1:
transkription_positions[0].writing_process_id = previous_tp.writing_process_id\
if previous_tp.writing_process_id != -1\
else current_tp.writing_process_id
self.transkription_positions.pop(index+1)
self.transkription_positions[index] = transkription_positions[0]
#print(self.text, len(self.transkription_positions))
def split(self, split_string, start_id=0):
"""Splits the word and returns an 3-tuple of new words.
"""
previousString, currentString, nextString = self.text.partition(split_string)
currentWord = None
previousWord = None
nextWord = None
previousIndex = 0
current_id = start_id
all_positional_word_parts = []
for position in self.transkription_positions:
all_positional_word_parts += position.positional_word_parts
if len(all_positional_word_parts) == 0:
warnings.warn('ATTENTION: Word: {} {} with Strings "{}, {}, {}": there are no parts!'.format(self.id, self.text, previousString, currentString, nextString))
if len(previousString) > 0:
previous_pwps = []
while previousIndex < len(all_positional_word_parts) and previousString != ''.join([ pwp.text for pwp in previous_pwps ]):
previous_pwps.append(all_positional_word_parts[previousIndex])
previousIndex += 1
if previousString != ''.join([ pwp.text for pwp in previous_pwps ]):
warnings.warn('ATTENTION: "{}" does not match a word_part_obj!'.format(previousString))
else:
previous_transkription_positions = TranskriptionPosition.CREATE_TRANSKRIPTION_POSITION_LIST_FROM_PWPS(previous_pwps, debug_msg_string='word.split')
previous_text = ''.join([ pwp.text for pwp in previous_pwps ])
previousWord = Word(text=previous_text, id=current_id, line_number=self.line_number, transkription_positions=previous_transkription_positions)
previousWord.faksimile_positions = self.faksimile_positions
current_id += 1
all_positional_word_parts = all_positional_word_parts[previousIndex:]
if len(nextString) > 0:
tmp_pwps = []
index = 0
while index < len(all_positional_word_parts) and currentString != ''.join([ pwp.text for pwp in tmp_pwps ]):
tmp_pwps.append(all_positional_word_parts[index])
index += 1
if currentString != ''.join([ pwp.text for pwp in tmp_pwps ]):
warnings.warn('ATTENTION: "{}" does not match a word_part_obj!'.format(currentString))
else:
next_pwps = all_positional_word_parts[index:]
next_transkription_positions = TranskriptionPosition.CREATE_TRANSKRIPTION_POSITION_LIST_FROM_PWPS(next_pwps, debug_msg_string='word.split')
next_text = ''.join([ pwp.text for pwp in next_pwps ])
nextWord = Word(text=next_text, id=current_id+1, line_number=self.line_number, transkription_positions=next_transkription_positions)
nextWord.faksimile_positions = self.faksimile_positions
all_positional_word_parts = all_positional_word_parts[:index]
current_transkription_positions = TranskriptionPosition.CREATE_TRANSKRIPTION_POSITION_LIST_FROM_PWPS(all_positional_word_parts, debug_msg_string='word.split')
current_text = ''.join([ pwp.text for pwp in all_positional_word_parts ])
currentWord = Word(text=current_text, id=current_id, line_number=self.line_number, transkription_positions=current_transkription_positions)
currentWord.faksimile_positions = self.faksimile_positions
return previousWord, currentWord, nextWord
def split_according_to_status(self, status, splits_are_parts=False):
"""Split a word according to its transkription_positions' text.
:return: a list of new word.Word
"""
new_words = []
if self.has_mixed_status(status):
last_status = None
transkription_positions = []
for transkription_position in self.transkription_positions:
if transkription_position.__dict__[status] != last_status\
and len(transkription_positions) > 0:
new_words.append(\
self._create_new_word(transkription_positions, status, new_id=self.id+len(new_words)))
transkription_positions = []
transkription_positions.append(transkription_position)
last_status = transkription_position.__dict__[status]
if len(transkription_positions) > 0:
new_words.append(\
self._create_new_word(transkription_positions, status, new_id=self.id+len(new_words)))
if splits_are_parts:
self.word_parts += new_words
if len(self.word_parts) > 0:
self.transkription_positions = []
return new_words
def undo_partitioning(self):
"""Undo partitioning.
"""
if len(self.word_parts) > 0:
for word_part in self.word_parts:
word_part.undo_partitioning()
if self.text != ''.join([ tp.get_text() for tp in self.transkription_positions ]):
self.transkription_positions += word_part.transkription_positions
self.earlier_version = None
self.edited_text = None
self.word_box = None
self.word_parts = []
self.corrections = []
self.earlier_versions = []
self.box_paths = []
def _create_new_word(self, transkription_positions, status, new_id=0):
"""Create a new word from self and transkription_positions.
"""
newWord = Word(id=new_id, transkription_positions=transkription_positions)
for key in self.COPY_PROPERTY_KEY:
if key != status and key in self.__dict__.keys():
newWord.__dict__[key] = self.__dict__[key]
if status in self.APPEND_PROPERTY2LIST_SOURCE_TARGET_KEYS.keys():
newWord.__dict__[self.APPEND_PROPERTY2LIST_SOURCE_TARGET_KEYS[status]].append(transkription_positions[0].__dict__[status])
else:
newWord.__dict__[status] = transkription_positions[0].__dict__[status]
return newWord
def _get_parts_with_property_key(self, property_key):
"""Return a list of word_parts with property == property_key.
"""
word_parts = []
for word_part in self.word_parts:
if property_key in word_part.__dict__.keys():
word_parts.append(word_part)
else:
word_parts += word_part._get_parts_with_property_key(property_key)
return word_parts
def _get_partial_word_over_box(self):
"""Partition a word according to its transkription_positions' has_box
->split word and add partial words as its parts.
:return: word over box or self
"""
word_over_box = None
if self.has_mixed_status('has_box'):
transkription_positions = []
last_word_box = None
for transkription_position in self.transkription_positions:
if transkription_position.has_box != last_word_box\
and len(transkription_positions) > 0:
newWord = Word(id=len(self.word_parts), line_number=self.line_number,\
transkription_positions=transkription_positions, deleted=self.deleted, writing_process_id=self.writing_process_id)
self.word_parts.append(newWord)
if last_word_box is not None:
word_over_box = newWord
word_over_box.word_box = last_word_box
transkription_positions = []
transkription_positions.append(transkription_position)
last_word_box = transkription_position.has_box
if len(transkription_positions) > 0:
newWord = Word(id=len(self.word_parts), line_number=self.line_number,\
transkription_positions=transkription_positions, deleted=self.deleted, writing_process_id=self.writing_process_id)
self.word_parts.append(newWord)
if last_word_box is not None:
word_over_box = newWord
word_over_box.word_box = last_word_box
self.transkription_positions = []
elif len(self.word_parts) > 0:
#self.word_parts, word_over_box = execute_function_on_parts(self.word_parts, inspect.currentframe().f_code.co_name) #'get_partial_word_over_box')
for word_part in self.word_parts:
if word_over_box is None:
word_over_box = word_part._get_partial_word_over_box()
else:
break
elif len([ tp for tp in self.transkription_positions if tp.has_box is not None]) == 1:
word_over_box = self
word_over_box.word_box = [ tp for tp in self.transkription_positions if tp.has_box is not None][0].has_box
return word_over_box
def _set_box_to_transkription_position(self, box_path, word_path, transkription_position, new_transkription_positions_dictionary, tr_xmin):
"""Set box_path to transkription_position that is contained by box_path.
Create new transkription_positions by splitting old ones if necessaryand add them to new_transkription_positions_dictionary.
"""
if box_path.contains_path(word_path):
transkription_position.has_box = box_path
elif box_path.contains_start_of_path(word_path):
split_position = box_path.path.bbox()[1] - tr_xmin
new_tps = transkription_position.split(split_position)
if len(new_tps) == 2:
new_tps[0].has_box = box_path
new_transkription_positions_dictionary.update({ transkription_position: new_tps })
else:
transkription_position.has_box = box_path
elif box_path.contains_end_of_path(word_path):
split_position = box_path.path.bbox()[0] - tr_xmin
new_tps = transkription_position.split(split_position)
if len(new_tps) == 2:
new_tps[1].has_box = box_path
new_transkription_positions_dictionary.update({ transkription_position: new_tps })
else:
transkription_position.has_box = box_path
else: # box_path in the middle of word_pathz
split_position1 = box_path.path.bbox()[0] - tr_xmin
split_position2 = box_path.path.bbox()[1] - tr_xmin
new_tps = transkription_position.split(split_position1, split_position2)
if len(new_tps) >= 2:
new_tps[1].has_box = box_path
new_transkription_positions_dictionary.update({ transkription_position: new_tps })
else:
transkription_position.has_box = box_path
def do_paths_intersect_saveMode(mypath1, mypath2):
"""Returns true if paths intersect, false if not or if there was an exception.
"""
try:
return mypath1.path.intersect(mypath2.path, justonemode=True)\
or mypath1.is_partially_contained_by(mypath2)
except AssertionError:
return False
Index: svgscripts/datatypes/image.py
===================================================================
--- svgscripts/datatypes/image.py (revision 103)
+++ svgscripts/datatypes/image.py (revision 104)
@@ -1,138 +1,149 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
""" This super class can be used to represent all image types.
"""
# Copyright (C) University of Basel 2019 {{{1
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see 1}}}
__author__ = "Christian Steiner"
__maintainer__ = __author__
__copyright__ = 'University of Basel'
__email__ = "christian.steiner@unibas.ch"
__status__ = "Development"
__license__ = "GPL v3"
__version__ = "0.0.1"
from lxml import etree as ET
from os.path import isfile
import sys
from .attachable_object import AttachableObject
from .matrix import Matrix
from .text_field import TextField
sys.path.append('py2ttl')
from class_spec import SemanticClass
class Image(AttachableObject,SemanticClass):
"""
This super class represents all types of images.
Args:
file_name (str): name of the image file.
node (lxml.etree.Element) node, containing information
URL (str): URL of image file.
height (float): height of image
width (float): width of image
text_field (.text_field.TextField) text_field on image representation
"""
stringKeys = [ 'file_name', 'URL', 'local_path' ]
floatKeys = [ 'height', 'width' ]
XML_TAG = 'image'
+ SECONDARY_URL = 'http://localhost:8000/'
+ FAKSIMILE_DIR = 'faksimiles/'
def __init__(self, node=None, file_name=None, local_path=None, URL=None, height=0.0, width=0.0, matrix=None, text_field=None, tag=XML_TAG):
self.text_field = text_field
self.tag = tag
if node is not None:
self.file_name = node.get('file-name')
self.local_path = node.get('local-path')
self.URL = node.get('URL')
self.height = float(node.get('height'))
self.width = float(node.get('width'))
self.transform = Matrix(node.get('transform')) if bool(node.get('transform')) and 'matrix(' in node.get('transform') else None
if len(node.findall(TextField.XML_TAG)) > 0:
self.text_field = TextField(node=node.find(TextField.XML_TAG))
else:
self.file_name = file_name
self.local_path = local_path
self.URL = URL
self.height = height
self.width = width
self.transform = matrix
+ self.primaryURL = self.URL
+ self.secondaryURL = None
+ if self.file_name is not None:
+ self.secondaryURL = self.SECONDARY_URL + self.file_name.replace('./','')\
+ if self.file_name is not None and self.file_name.endswith('svg')\
+ else self.SECONDARY_URL + self.FAKSIMILE_DIR + self.file_name
def attach_object_to_tree(self, target_tree):
"""Attach object to tree.
"""
obj_node = target_tree.getroot().find('.//' + self.tag) \
if(len(target_tree.getroot().findall('.//' + self.tag)) > 0) \
else ET.SubElement(target_tree.getroot(), self.tag)
for key in self.floatKeys:
if self.__dict__[key] is not None:
obj_node.set(key.replace('_','-'), str(round(self.__dict__[key], 3)))
for key in self.stringKeys:
if self.__dict__[key] is not None:
obj_node.set(key.replace('_','-'), self.__dict__[key])
if self.transform is not None and self.transform.isRotationMatrix():
obj_node.set('transform', self.transform.toString())
if self.text_field is not None:
self.text_field.attach_object_to_tree(obj_node)
@classmethod
def get_semantic_dictionary(cls):
""" Creates and returns a semantic dictionary as specified by SemanticClass.
"""
dictionary = {}
class_dict = cls.get_class_dictionary()
properties = {}
for floatKey in Image.floatKeys:
properties.update(cls.create_semantic_property_dictionary(floatKey, float, cardinality=1))
properties.update(cls.create_semantic_property_dictionary('file_name', str, cardinality=1))
properties.update(cls.create_semantic_property_dictionary('text_field', TextField))
properties.update(cls.create_semantic_property_dictionary('transform', str))
- properties.update(cls.create_semantic_property_dictionary('URL', str, cardinality=1))
+ properties.update(cls.create_semantic_property_dictionary('primaryURL', str, cardinality=1, subPropertyOf=cls.HAS_URL))
+ properties.update(cls.create_semantic_property_dictionary('secondaryURL', str, cardinality=1, subPropertyOf=cls.HAS_URL))
dictionary.update({'class': class_dict})
dictionary.update({'properties': properties})
return dictionary
class SVGImage(Image):
"""This class represents a svg image.
"""
XML_TAG = 'svg-image'
+ URL_PREFIX = 'http://existdb-test.dasch.swiss/exist/rest/db/storage/nietzsche/'
def __init__(self, node=None, file_name=None, URL=None, height=0.0, width=0.0, text_field=None, tag=XML_TAG):
if node is not None and node.tag != self.XML_TAG:
file_name = node.get('file')
height = float(node.get('height')) if bool(node.get('height')) else 0.0
width = float(node.get('width')) if bool(node.get('width')) else 0.0
node = None
super(SVGImage, self).__init__(node=node, file_name=file_name, URL=URL,\
height=height, width=width, text_field=text_field, tag=self.XML_TAG)
+ self.primaryURL = self.URL_PREFIX + self.file_name.replace('./', '')
def decontextualize_file_name(self, update_url=None):
"""Decontextualize file name.
"""
self.file_name = self.file_name.replace('./', '')
if update_url is not None:
self.URL = update_url + self.file_name
# @classmethod
# def get_semantic_dictionary(cls):
# """ Creates and returns a semantic dictionary as specified by SemanticClass.
# """
# dictionary = super(SVGImage,cls).get_semantic_dictionary()
# return cls.return_dictionary_after_updating_super_classes(dictionary)
Index: svgscripts/datatypes/faksimile_image.py
===================================================================
--- svgscripts/datatypes/faksimile_image.py (revision 103)
+++ svgscripts/datatypes/faksimile_image.py (revision 104)
@@ -1,108 +1,122 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
""" This class can be used to represent faksimile images.
"""
# Copyright (C) University of Basel 2019 {{{1
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see 1}}}
__author__ = "Christian Steiner"
__maintainer__ = __author__
__copyright__ = 'University of Basel'
__email__ = "christian.steiner@unibas.ch"
__status__ = "Development"
__license__ = "GPL v3"
__version__ = "0.0.1"
import fnmatch
from lxml import etree as ET
import os
from os.path import basename, dirname, isfile, realpath, sep
import sys
from .image import Image
from .matrix import Matrix
from .text_field import TextField
sys.path.append('svgscripts')
from local_config import FAKSIMILE_LOCATION
class FaksimileImage(Image):
"""
This class represents a faksimile image.
Args:
file_name (str): name of the image file.
node (lxml.etree.Element) node, containing information
URL (str): URL of image file.
height (float): height of image
width (float): width of image
x (float): x
y (float): y
"""
XML_TAG = 'faksimile-image'
- #OLD_NIETZSCHE_SOURCES_URL = 'http://www.nietzschesource.org/DFGAapi/api/page/download/'
- NIETZSCHE_SOURCES_URL = 'http://www.nietzschesource.org/DFGAapi/api/page/'
+ NIETZSCHE_SOURCES_URL = 'http://www.nietzschesource.org/DFGAapi/api/page/download/'
+ NIETZSCHE_SOURCES_API_URL = 'http://www.nietzschesource.org/DFGAapi/api/page/'
+ NIETZSCHE_SOURCES_IMAGE_API_URL = 'http://www.nietzschesource.org/DFGAapi/images/DFGA/'
def __init__(self, node=None, file_name=None, local_path=None, URL=None, height=0.0, width=0.0, x=0.0, y=0.0, matrix=None, text_field=None):
super(FaksimileImage, self).__init__(node=node, file_name=file_name, URL=URL, local_path=local_path,\
height=height, width=width, matrix=matrix, text_field=text_field, tag=self.XML_TAG)
self.x = x
self.y = y
+ self.apiURL = None
+ self.thumbURL = None
+ self.mediumURL = None
+ if self.file_name is not None:
+ nsource_page_name = self.file_name.replace('.jpg','')
+ nsource_manuscript_name = nsource_page_name.split(',')[0]
+ self.apiURL = self.NIETZSCHE_SOURCES_API_URL + nsource_page_name
+ self.thumbURL = self.NIETZSCHE_SOURCES_IMAGE_API_URL + nsource_manuscript_name + '/mini/' + self.file_name
+ self.mediumURL = self.NIETZSCHE_SOURCES_IMAGE_API_URL + nsource_manuscript_name + '/medium/' + self.file_name
+ if self.primaryURL is not None and self.primaryURL.startswith(self.NIETZSCHE_SOURCES_API_URL):
+ self.apiURL = self.primaryURL
+ self.primaryURL = self.NIETZSCHE_SOURCES_URL + basename(self.primaryURL)
def get_image_joined_with_text_field(self, text_field):
"""Returns a new instance of itself that has a text_field (text_field.TextField).
"""
return FaksimileImage(file_name=self.file_name, local_path=self.local_path, URL=self.URL, height=self.height,\
width=self.width, x=self.x, y=self.y, text_field=text_field)
-# @classmethod
-# def get_semantic_dictionary(cls):
-# """ Creates and returns a semantic dictionary as specified by SemanticClass.
-# """
-# dictionary = super(FaksimileImage,cls).get_semantic_dictionary()
-# dictionary[cls.PROPERTIES_KEY].update(cls.create_semantic_property_dictionary('text_field', TextField))
-# #dictionary[cls.PROPERTIES_KEY].update(cls.create_semantic_property_dictionary('URL', str, cardinality=1))
-# return cls.return_dictionary_after_updating_super_classes(dictionary)
+ @classmethod
+ def get_semantic_dictionary(cls):
+ """ Creates and returns a semantic dictionary as specified by SemanticClass.
+ """
+ dictionary = super(FaksimileImage,cls).get_semantic_dictionary()
+ dictionary[cls.PROPERTIES_KEY].update(cls.create_semantic_property_dictionary('apiURL', str, subPropertyOf=cls.HAS_URL))
+ dictionary[cls.PROPERTIES_KEY].update(cls.create_semantic_property_dictionary('thumbURL', str, subPropertyOf=cls.HAS_URL))
+ dictionary[cls.PROPERTIES_KEY].update(cls.create_semantic_property_dictionary('mediumURL', str, subPropertyOf=cls.HAS_URL))
+ return cls.return_dictionary_after_updating_super_classes(dictionary)
@staticmethod
def CREATE_IMAGE(image_node, source_file=None):
"""Instantiates a FaksimileImage from a (lxml.etree.Element) image_node.
"""
namespaces = image_node.nsmap
if len(namespaces) == 0:
namespaces = { 'xlink': '' }
local_path = image_node.get('{%s}href' % namespaces['xlink'])
file_name = basename(local_path)
if file_name != local_path and source_file is not None:
local_path = realpath(dirname(source_file)) + sep + local_path
local_path = realpath(local_path)
if not isfile(local_path):
local_path = None
for path, dirs, files in os.walk(os.path.abspath(FAKSIMILE_LOCATION)):
for filename in fnmatch.filter(files, file_name):
local_path = os.path.join(path, filename)
break
URL = FaksimileImage.NIETZSCHE_SOURCES_URL + file_name.replace('.jpg','')
height = float(image_node.get('height')) if bool(image_node.get('height')) else 0.0
width = float(image_node.get('width')) if bool(image_node.get('width')) else 0.0
x = float(image_node.get('x')) if bool(image_node.get('x')) else 0.0
y = float(image_node.get('y')) if bool(image_node.get('y')) else 0.0
matrix = Matrix(transform_matrix_string=image_node.get('transform'))\
if bool(image_node.get('transform'))\
else None
return FaksimileImage(file_name=file_name, local_path=local_path, URL=URL, height=height, width=width, x=x, y=y, matrix=matrix)
Index: tests_svgscripts/test_word.py
===================================================================
--- tests_svgscripts/test_word.py (revision 103)
+++ tests_svgscripts/test_word.py (revision 104)
@@ -1,487 +1,494 @@
import unittest
from os import sep, path
import lxml.etree as ET
import sys
sys.path.append('svgscripts')
from process_words_post_merging import reset_page, update_writing_process_ids
from datatypes.box import Box
from datatypes.manuscript import ArchivalManuscriptUnity
from datatypes.matrix import Matrix
import datatypes.page
from datatypes.path import Path
from datatypes.positional_word_part import PositionalWordPart
from datatypes.style import Style
from datatypes.transkriptionField import TranskriptionField
from datatypes.transkription_position import TranskriptionPosition
from datatypes.word import Word, execute_function_on_parts, update_transkription_position_ids
from datatypes.word_deletion_path import WordDeletionPath
from datatypes.word_position import WordPosition
sys.path.append('py2ttl')
from class_spec import SemanticClass
class Page:
def __init__(self):
self.svg_file = None
def get_line_number(self, input=0):
return -1
def get_biggest_fontSize4styles(self, style_set={}):
return 7
class TestWord(unittest.TestCase):
TESTCASE = None
def setUp(self):
DATADIR = path.dirname(__file__) + sep + 'test_data'
self.test_file = DATADIR + sep + 'N_VII_1_page009.xml'
self.word_deletion_path_file = DATADIR + sep + 'N_VII_1_page138.xml'
self.pdf_xml = DATADIR + sep + 'W_I_8_page125.xml'
self.pdf_xml_source = DATADIR + sep + 'W_I_8_neu_125-01.svg'
self.word_part_objs = [{'text': 'a' }, {'text': 'b' }, {'text': 'c' }]
x = 0
for dict in self.word_part_objs:
dict['class'] = 'st22'
dict['x'] = x
dict['y'] = 11
x += 1
mylist = {'text': 'abc', 'id': '0', 'line-number': '2', 'deleted': 'true' }
word_position = TranskriptionPosition(x=0, y=1, height=10, width=10, matrix=Matrix('matrix(0.94 0.342 -0.342 0.94 0 0)'))
self.transkription_positions = [ word_position ]
self.word_node = ET.Element('word', attrib=mylist)
word_position.attach_object_to_tree(self.word_node)
x = 0
for char in mylist['text']:
ET.SubElement(self.word_node, 'part', attrib={'text': char, 'x': str(x), 'y': '11', 'class': 'st22' })
x += 1
def test_add_deletion_paths(self):
page = datatypes.page.Page(self.word_deletion_path_file, add_deletion_paths_to_words=False)
word = [ word for word in page.words if word.text == 'AufBau'][0]
#self.assertTrue(word.deleted)
self.assertTrue(len(word.word_parts) > 0)
self.assertTrue(word.word_parts[0].deleted)
word.add_deletion_paths(page.word_deletion_paths, tr_xmin=28.347656, tr_ymin=49.921875)
self.assertTrue(len(word.word_parts[0].deletion_paths) > 0)
#print(word.deletion_paths)
+ """
+ page = datatypes.page.Page('xml/Mp_XIV_page420.xml')
+ words = [ word for word in page.words if word.deleted or True in [ part.deleted for part in word.word_parts ]]
+ words[0].add_deletion_paths(page.word_deletion_paths)
+ print(words[0].deletion_paths)
+ """
+
def test_join_words(self):
words = [ Word(id=4, text='asdf-', line_number=1, deleted=True), Word(id=5, text='bsdf', line_number=2, deleted=False) ]
new_word = Word.join_words(words)
self.assertEqual(new_word.id, 4)
self.assertEqual(new_word.text, 'asdf-bsdf')
self.assertEqual(new_word.edited_text, 'asdfbsdf')
self.assertEqual(new_word.deleted, False)
self.assertEqual(new_word.line_number, -1)
words = [ Word(id=1, word_parts=[Word(id=4, text='asdf-', line_number=1, deleted=True), Word(id=5, text='bsdf', line_number=2, deleted=False)]),\
Word(id=4, text='.', line_number=2, deleted=True), Word(id=5, text='.', line_number=2, deleted=False) ]
new_word = Word.join_words(words)
self.assertEqual(new_word.text, 'asdf-bsdf..')
new_word = Word.join_words(words, add_white_space_between_words=True)
self.assertEqual(new_word.text, 'asdf- bsdf . .')
def test_Word_with_word_part_objs(self):
word = Word.CREATE_WORD(word_part_objs=self.word_part_objs, height=10, endX=10)
self.assertEqual(word.id, 0)
self.assertEqual(word.transkription_positions[0].bottom, 13)
self.assertEqual(word.transkription_positions[0].height, 10)
self.assertEqual(word.transkription_positions[0].top, 3)
self.assertEqual(word.transkription_positions[0].left, 0)
self.assertEqual(word.transkription_positions[0].width, 10)
self.assertEqual(word.text, 'abc')
def test_Word_with_word_node(self):
word = Word.create_cls(self.word_node)
self.assertEqual(word.id, 0)
self.assertEqual(word.deleted, True)
self.assertEqual(word.transkription_positions[0].bottom, 11)
self.assertEqual(word.transkription_positions[0].height, 10)
self.assertEqual(word.transkription_positions[0].top, 1)
self.assertEqual(word.transkription_positions[0].left, 0)
self.assertEqual(word.transkription_positions[0].width, 10)
self.assertEqual(word.text, 'abc')
self.assertEqual(word.line_number, 2)
self.assertEqual(word.transkription_positions[0].transform.isRotationMatrix(), True)
def test_attach_word_to_tree(self):
newWord = Word.CREATE_WORD(word_part_objs=self.word_part_objs, height=10, endX=10)
empty_tree = ET.ElementTree(ET.Element('page'))
newWord.attach_word_to_tree(empty_tree)
for word_node in empty_tree.getroot().xpath('//word'):
word = Word.CREATE_WORD(word_node=word_node)
self.assertEqual(word.id, 0)
self.assertEqual(word.deleted, False)
self.assertEqual(word.transkription_positions[0].bottom, 13)
self.assertEqual(word.transkription_positions[0].height, 10)
self.assertEqual(word.transkription_positions[0].top, 3)
self.assertEqual(word.transkription_positions[0].left, 0)
self.assertEqual(word.transkription_positions[0].width, 10)
self.assertEqual(word.text, 'abc')
@unittest.skipUnless(TESTCASE is None or TESTCASE == 0, 'Not testing this case')
def test_create_correction_history_case0(self):
# Case 1: whole word over box
box = Box(earlier_text='XYX')
word = Word(text='ASDF', transkription_positions=[TranskriptionPosition()])
word.word_box = box
word.create_correction_history()
self.assertEqual(word.earlier_version is None, True)
self.assertEqual(word.overwrites_word is not None, True)
@unittest.skipUnless(TESTCASE is None or TESTCASE == 1, 'Not testing this case')
def test_create_correction_history_case1(self):
# Case 2: part of word over box
box = Box(earlier_text='XYX')
partA = Word(text='A', transkription_positions=[TranskriptionPosition()])
partA.word_box = box
partB = Word(text='SDF', transkription_positions=[TranskriptionPosition()])
word = Word(text='ASDF', word_parts=[ partA, partB])
word.create_correction_history()
self.assertEqual(word.earlier_version is None, True)
self.assertEqual(word.word_parts[0].overwrites_word is not None, True)
@unittest.skipUnless(TESTCASE is None or TESTCASE == 2, 'Not testing this case')
def test_create_correction_history_case3(self):
# Case 3: part of word over box, word under box is part of earlier version
box = Box(earlier_text='XYX')
tp0 = TranskriptionPosition()
tp0.style = Style(writing_process_id=0)
tp1 = TranskriptionPosition()
tp1.style = Style(writing_process_id=1)
partA = Word(id=0, text='Test', transkription_positions=[ tp0])
partB = Word(id=1, text='er', transkription_positions=[ tp1])
partB.word_box = box
word = Word(text='Tester', writing_process_id=1, word_parts=[ partA, partB ] )
word.create_correction_history(box_style=tp0.style)
self.assertEqual(word.text, 'Tester')
self.assertEqual(word.earlier_version is not None, True)
self.assertEqual(word.earlier_version.text, 'TestXYX')
self.assertEqual(word.word_parts[1].isTransformationOfWord, word.earlier_version.word_parts[1])
@unittest.skipUnless(TESTCASE is None or TESTCASE == 3, 'Not testing this case')
def test_create_correction_history_case4(self):
# Case 4: part of word is deleted
partA = Word(id=0, text='A', deleted=True, transkription_positions=[TranskriptionPosition()])
partB = Word(id=1, text='SDF', transkription_positions=[TranskriptionPosition()])
word = Word(text='ASDF', word_parts=[ partA, partB])
word.create_correction_history()
self.assertEqual(word.earlier_version is not None, True)
self.assertEqual(word.word_parts[0].isDeletionOfWord is not None, True)
self.assertEqual(word.word_parts[0].isDeletionOfWord, word.earlier_version.word_parts[0])
self.assertEqual(word.edited_text, 'SDF')
@unittest.skipUnless(TESTCASE is None or TESTCASE == 4, 'Not testing this case')
def test_create_correction_history_case5(self):
tp0 = TranskriptionPosition()
tp0.style = Style(writing_process_id=0)
tp1 = TranskriptionPosition()
tp1.style = Style(writing_process_id=1)
partA = Word(id=0, text='Test', transkription_positions=[ tp0])
partB = Word(id=1, text='er', transkription_positions=[ tp1])
word = Word(text='Tester', word_parts=[ partA, partB ] )
word.create_correction_history()
self.assertEqual(word.earlier_version is not None, True)
self.assertEqual(word.word_parts[1].extendsEarlierVersion, True)
self.assertEqual(word.word_parts[1].isExtensionOfWord, word.earlier_version)
#@unittest.skipUnless(TESTCASE is None or TESTCASE == 5, 'Not testing this case')
#@unittest.skip('case tested, relies on a local xml file')
def test_create_correction_history_case_full(self):
page = datatypes.page.Page('xml/N_VII_1_page138.xml')
manuscript = ArchivalManuscriptUnity()
reset_page(page)
update_writing_process_ids(page)
word = [ word for word in page.words if word.text == 'Verschiedenes' and word.line_number == 4 ][0]
wordAufBau = [ word for word in page.words if word.text == 'AufBau' ][0]
#page.words = [ word ]
page.update_styles(manuscript=manuscript, partition_according_to_styles=True)
word.word_parts[0].transkription_positions[0].has_box = Box(earlier_text='v')
self.assertEqual(len(word.word_parts), 2)
word_over_box = word._get_partial_word_over_box()
update_transkription_position_ids(word)
word.create_correction_history(page)
self.assertEqual(word.writing_process_id, 1)
self.assertEqual(word.earlier_version is not None, True)
self.assertEqual(word.earlier_version.text, 'verschiedenes')
#print(word.earlier_version.id, [ (w.id, w.text) for w in word.earlier_version.word_parts ])
empty_tree = ET.ElementTree(ET.Element('page'))
word_node = word.attach_word_to_tree(empty_tree)
#print(ET.dump(word_node))
"""
self.assertEqual(word.word_parts[0].isDeletionOfWord, word.earlier_version.word_parts[0])
self.assertEqual(word.word_parts[1].isTransformationOfWord, word.earlier_version.word_parts[1])
self.assertEqual(word.word_parts[1].overwrites_word is not None, True)
"""
word = wordAufBau
page.words = [ word ]
page.update_styles(manuscript=manuscript, partition_according_to_styles=True)
word.word_parts[0].deleted = True
word.word_parts[1].transkription_positions[0].has_box = Box(earlier_text='b')
self.assertEqual(len(word.word_parts), 3)
word_over_box = word._get_partial_word_over_box()
self.assertEqual(len(word.word_parts), 3)
update_transkription_position_ids(word)
word.create_correction_history(page)
self.assertEqual(word.writing_process_id, 2)
self.assertEqual(word.earlier_version is not None, True)
self.assertEqual(word.text, 'AufBau')
self.assertEqual(word.edited_text, 'Bau')
self.assertEqual(word.earlier_version.text, 'Aufbau')
self.assertEqual(word.word_parts[0].isDeletionOfWord, word.earlier_version.word_parts[0])
self.assertEqual(word.word_parts[1].isTransformationOfWord, word.earlier_version.word_parts[1])
self.assertEqual(word.word_parts[1].overwrites_word is not None, True)
empty_tree = ET.ElementTree(ET.Element('page'))
word_node = word.attach_word_to_tree(empty_tree)
#print(ET.dump(word_node))
newWord = Word.create_cls(word_node)
#@unittest.skip('')
def test_earlier_version(self):
partA = Word(id=0, text='A', deleted=True, transkription_positions=[TranskriptionPosition()])
partB = Word(id=1, text='SDF', transkription_positions=[TranskriptionPosition()])
word = Word(text='ASDF', word_parts=[ partA, partB])
earlier_version = word.create_earlier_version()
self.assertEqual(earlier_version is not None, True)
self.assertEqual(word.word_parts[0].isDeletionOfWord is not None, True)
self.assertEqual(word.word_parts[0].isDeletionOfWord, earlier_version.word_parts[0])
def test_undo_partitioning(self):
tps = []
for i, xy in enumerate([ 3, 4, 5 ]):
tps.append(TranskriptionPosition(id=i, x=xy, y=xy, height=10, width=10))
partA = Word(id=0, text='Auf', writing_process_id=1, deleted=True, transkription_positions=[ tps[0]])
partB = Word(id=1, text='B', writing_process_id=2, transkription_positions=[tps[1]])
partC = Word(id=2, text='au', writing_process_id=1,transkription_positions=[tps[2]])
word = Word(text='Aufbau', writing_process_id=2, word_parts=[ partA, partB, partC ] )
word.undo_partitioning()
self.assertEqual(len(word.transkription_positions), len(tps))
self.assertEqual(len(word.word_parts), 0)
"""
page = datatypes.page.Page('xml/N_VII_1_page138.xml')
word = page.words[77]
word.undo_partitioning()
self.assertEqual(len(word.word_parts), 0)
self.assertEqual(len(word.transkription_positions), 3)
update_transkription_position_ids(word)
empty_tree = ET.ElementTree(ET.Element('page'))
word_node = word.attach_word_to_tree(empty_tree)
print(ET.dump(word_node))
"""
def test_split(self):
page = Page()
pwps = PositionalWordPart.CREATE_SIMPLE_POSITIONAL_WORD_PART_LIST(page, self.word_part_objs)
transkription_positions = TranskriptionPosition.CREATE_TRANSKRIPTION_POSITION_LIST_FROM_PWPS(pwps)
word = Word(text=''.join([pwp.text for pwp in pwps]), transkription_positions=transkription_positions)
previousWord, currentWord, nextWord = word.split('b')
self.assertEqual(previousWord.id, 0)
self.assertEqual(previousWord.text, 'a')
self.assertEqual(currentWord.id, 1)
self.assertEqual(nextWord.id, 2)
word = Word(text=''.join([pwp.text for pwp in pwps]), transkription_positions=transkription_positions)
previousWord, currentWord, nextWord = word.split('bc')
self.assertEqual(previousWord.id, 0)
self.assertEqual(previousWord.text, 'a')
self.assertEqual(currentWord.id, 1)
word = Word(text=''.join([pwp.text for pwp in pwps]), transkription_positions=transkription_positions)
previousWord, currentWord, nextWord = word.split('ab', start_id=10)
self.assertEqual(currentWord.id, 10)
self.assertEqual(currentWord.text, 'ab')
self.assertEqual(currentWord.transkription_positions[0].width, 2.1)
self.assertEqual(nextWord.id, 11)
self.assertEqual(nextWord.transkription_positions[0].width, 5.2)
word_part_objs=[{'text': 'x', 'class':'st22', 'x': 0, 'y': 0},\
{'text': 'Insofern', 'class':'st22', 'x': 1, 'y': 0},\
{'text': 'x', 'class':'st22', 'x': 10, 'y': 0}]
pwps = PositionalWordPart.CREATE_SIMPLE_POSITIONAL_WORD_PART_LIST(page, word_part_objs)
transkription_positions = TranskriptionPosition.CREATE_TRANSKRIPTION_POSITION_LIST_FROM_PWPS(pwps)
word = Word(text=''.join([pwp.text for pwp in pwps]), transkription_positions=transkription_positions)
with self.assertWarns(Warning):
previousWord, currentWord, nextWord = word.split('Insofer')
word_part_objs=[{'text': 'xInsofern', 'class':'st22', 'x': 0, 'y': 0}]
pwps = PositionalWordPart.CREATE_SIMPLE_POSITIONAL_WORD_PART_LIST(page, word_part_objs)
transkription_positions = TranskriptionPosition.CREATE_TRANSKRIPTION_POSITION_LIST_FROM_PWPS(pwps)
word = Word(text=''.join([pwp.text for pwp in pwps]), transkription_positions=transkription_positions)
with self.assertWarns(Warning):
previousWord, currentWord, nextWord = word.split('Insofern')
def test_join(self):
word = Word.CREATE_WORD(word_part_objs=self.word_part_objs, height=10, endX=10)
other_word = Word.CREATE_WORD(word_part_objs=[{'text': '.', 'class':'st22', 'x': 3, 'y': 11}])
word.join(other_word, add_white_space_between_words=True)
self.assertEqual(word.text, 'abc .')
word = Word.CREATE_WORD(word_part_objs=self.word_part_objs, height=10, endX=10)
other_word = Word.CREATE_WORD(word_part_objs=[{'text': '.', 'class':'st22', 'x': 3, 'y': 11}])
word.join(other_word)
self.assertEqual(word.text, 'abc.')
other_word = Word.CREATE_WORD(word_part_objs=[{'text': '.', 'class':'st22', 'x': 3, 'y': 11}])
word.join(other_word, append_at_end_of_new_word=False)
self.assertEqual(word.text, '.abc.')
"""
tree = ET.ElementTree(ET.Element('page'))
word.attach_word_to_tree(tree)
print(ET.dump(tree.getroot()))
"""
def test_get_semanticAndDataDict(self):
dictionary = Word.get_semantic_dictionary()
#print(dictionary)
info_dict = dictionary['properties'].get('isDeletionOfWord')
self.assertEqual(SemanticClass.SUPER_PROPERTY in info_dict.keys(), True)
super_info_dict = info_dict[SemanticClass.SUPER_PROPERTY]
#print(info_dict[SemanticClass.SUPER_PROPERTY].get(SemanticClass.PROPERTY_NAME))
def test_simplify_transkription_positions(self):
node_string = """
"""
nodeA = ET.fromstring(node_string)
node_string = """
"""
nodeB = ET.fromstring(node_string)
word = Word(text="Si", transkription_positions=[ TranskriptionPosition(node=nodeA), TranskriptionPosition(node=nodeB) ])
self.assertEqual(len(word.transkription_positions), 2)
word.simplify_transkription_positions()
self.assertEqual(len(word.transkription_positions), 1)
word = Word(text="Si", transkription_positions=[ TranskriptionPosition(node=nodeA), TranskriptionPosition(node=nodeB) ])
word.transkription_positions[1].writing_process_id = -1
word.simplify_transkription_positions()
self.assertEqual(len(word.transkription_positions), 1)
self.assertEqual(word.transkription_positions[0].writing_process_id, 0)
"""
tree = ET.ElementTree(ET.Element('page'))
word.attach_word_to_tree(tree)
print(ET.dump(tree.getroot()))
"""
def test_partition(self):
page = datatypes.page.Page(self.test_file)
word = page.words[67]
self.assertEqual(word.belongs_to_multiple_writing_processes(), True)
word.partition_according_to_writing_process_id()
self.assertEqual(len(word.word_parts), 3)
self.assertEqual(word.belongs_to_multiple_writing_processes(), False)
self.assertEqual(word.belongs_to_multiple_writing_processes(include_parts=True), True)
empty_tree = ET.ElementTree(ET.Element('page'))
word_node = word.attach_word_to_tree(empty_tree)
newWord = Word.create_cls(word_node)
self.assertEqual(len(newWord.word_parts), 3)
#print(ET.dump(empty_tree.getroot()))
def test_partition_deletion(self):
page = datatypes.page.Page(self.test_file)
word = page.words[67]
for transkription_position in word.transkription_positions:
transkription_position.deleted = transkription_position.writing_process_id == 1
self.assertEqual(word.has_mixed_status('deleted'), True)
word.partition_according_to_deletion()
self.assertEqual(len(word.word_parts), 3)
self.assertEqual(word.has_mixed_status('deleted'), False)
self.assertEqual(word.has_mixed_status('deleted', include_parts=True), True)
page = datatypes.page.Page(self.test_file)
word = page.words[67]
word.partition_according_to_writing_process_id()
#print([(word.text, word.deleted) for word in word.word_parts])
word.word_parts[1].transkription_positions[1].deleted = True
word.partition_according_to_deletion()
self.assertEqual(len(word.word_parts), 4)
#print([(word.text, word.deleted) for word in word.word_parts])
partA = Word(text='A', deleted=True)
partB = Word(text='SDF', deleted=False)
word = Word(text='ASDF', word_parts=[ partA, partB])
self.assertEqual(word.has_mixed_status('deleted', include_parts=True), True)
def test_execute_function_on_parts(self):
page = datatypes.page.Page(self.test_file)
word_parts = [ page.words[67], page.words[68] ]
word_parts, none = execute_function_on_parts(word_parts, 'partition_according_to_writing_process_id')
self.assertEqual(len(word_parts) == 4, True)
def test_process_word_boxes(self):
page = datatypes.page.Page(self.pdf_xml)
page.source = self.pdf_xml_source
page.update_styles(partition_according_to_styles=True)
tr = TranskriptionField(page.source)
box_path_d = ['M 598.11,626.565 L 603.557,626.565 L 603.557,632.565 L 598.11,632.565 L 598.11,626.565',\
'M 557.443,683.44 L 574.182,683.44 L 574.182,694.815 L 557.443,694.815 L 557.443,683.44',\
'M 404.193,659.565 L 407.80699999999996,659.565 L 407.80699999999996,668.94 L 404.193,668.94 L 404.193,659.565',\
'M 587.932,634.065 L 598.318,634.065 L 598.318,643.19 L 587.932,643.19 L 587.932,634.065',\
'M 570.443,221.315 L 576.557,221.315 L 576.557,230.065 L 570.443,230.065 L 570.443,221.315']
box_paths = [ Box(d_string=d_string, earlier_text='test') for d_string in box_path_d ]
indices = [30, 277, 288, 297, 321]
for word_id, index in enumerate(indices):
word_over_box = page.words[index].process_boxes(box_paths, tr_xmin=tr.xmin, tr_ymin=tr.ymin)
self.assertEqual(word_over_box is not None, True)
self.assertEqual(word_over_box == page.words[index] or word_over_box in page.words[index].word_parts, True)
#self.assertEqual(word_over_box in page.words[index].word_parts, True)
def test_process_word_several_boxesOn1LIne(self):
page = datatypes.page.Page(self.pdf_xml)
page.source = self.pdf_xml_source
for word in page.words:
word.set_writing_process_id_to_transkription_positions(page)
word.partition_according_to_writing_process_id()
tr = TranskriptionField(page.source)
box_path_d = ['M 598.11,626.565 L 603.557,626.565 L 603.557,632.565 L 598.11,632.565 L 598.11,626.565',\
'M 557.443,683.44 L 574.182,683.44 L 574.182,694.815 L 557.443,694.815 L 557.443,683.44',\
'M 404.193,659.565 L 407.80699999999996,659.565 L 407.80699999999996,668.94 L 404.193,668.94 L 404.193,659.565',\
'M 587.932,634.065 L 598.318,634.065 L 598.318,643.19 L 587.932,643.19 L 587.932,634.065',\
'M 570.443,221.315 L 576.557,221.315 L 576.557,230.065 L 570.443,230.065 L 570.443,221.315']
box_paths = [ Box(d_string=d_string, earlier_text='test') for d_string in box_path_d ]
indices = [30, 277, 288, 297, 321]
empty_tree = ET.ElementTree(ET.Element('page'))
for word_id, index in enumerate(indices):
word_over_box = page.words[index].process_boxes(box_paths, tr_xmin=tr.xmin, tr_ymin=tr.ymin)
self.assertEqual(word_over_box is not None, True)
def test_split_according_to_status(self):
page = datatypes.page.Page(self.test_file)
word = page.words[67]
for transkription_position in word.transkription_positions:
transkription_position.text = 'asdf'\
if transkription_position.writing_process_id == 1\
else word.text
self.assertEqual(word.has_mixed_status('text'), True)
new_words = word.split_according_to_status('text')
#print([word.text for word in new_words ])
self.assertEqual(len(new_words) > 1, True)
self.assertEqual(new_words[0].id, word.id)
self.assertEqual(new_words[0].deleted, word.deleted)
self.assertEqual(new_words[1].id, word.id+1)
manuscript = ArchivalManuscriptUnity()
page = datatypes.page.Page(self.test_file)
word = page.words[67]
page.words = [ word ]
page.update_styles(manuscript=manuscript)
new_words = word.split_according_to_status('style', splits_are_parts=True)
self.assertEqual(len(word.word_parts), 3)
def test__create_new_word(self):
manuscript = ArchivalManuscriptUnity()
page = datatypes.page.Page(self.test_file)
word = page.words[67]
page.words = [ word ]
page.update_styles(manuscript=manuscript)
newWord = word._create_new_word([ word.transkription_positions[0] ], 'style')
for key in Word.COPY_PROPERTY_KEY:
self.assertEqual(newWord.__dict__[key], word.__dict__[key])
self.assertEqual(len(newWord.styles), 1)
def test__get_partial_word_over_box(self):
word = Word(text='test', transkription_positions=[ TranskriptionPosition(id=0), TranskriptionPosition(id=1) ])
word.transkription_positions[0].has_box = Box(earlier_text='asdf')
word._get_partial_word_over_box()
self.assertEqual(len(word.word_parts), 2)
partA = Word(id=0, text='A', transkription_positions=[TranskriptionPosition()])
partB = Word(id=1, text='SDF', transkription_positions=[TranskriptionPosition(), TranskriptionPosition(id=1)])
partB.transkription_positions[0].has_box = Box(earlier_text='asdf')
word = Word(text='ASDF', word_parts=[ partA, partB])
word._get_partial_word_over_box()
self.assertEqual(len(word.word_parts), 2)
if __name__ == "__main__":
unittest.main()
Index: tests_svgscripts/test_page.py
===================================================================
--- tests_svgscripts/test_page.py (revision 103)
+++ tests_svgscripts/test_page.py (revision 104)
@@ -1,151 +1,151 @@
import unittest
from os import sep, path
from os.path import isdir, isfile, dirname, basename
import lxml.etree as ET
import sys
import sys
sys.path.append('svgscripts')
dir_changed = False
if not isdir('datatypes'):
sys.path.append(dirname(sys.path[0]))
dir_changed = True
from datatypes.lineNumber import LineNumber
from datatypes.mark_foreign_hands import MarkForeignHands
from datatypes.page import Page, STATUS_MERGED_OK, STATUS_POSTMERGED_OK
from datatypes.path import Path
from datatypes.text_connection_mark import TextConnectionMark
from datatypes.transkriptionField import TranskriptionField
from datatypes.style import Style
from datatypes.writing_process import WritingProcess
from datatypes.word import Word
class TestPage(unittest.TestCase):
def setUp(self):
DATADIR = dirname(__file__) + sep + 'test_data'
if not isdir(DATADIR):
DATADIR = dirname(dirname(__file__)) + sep + 'test_data'
self.test_file = DATADIR + sep + 'test.xml'
self.test_svg_file = DATADIR + sep + 'test421.svg'
self.pdf_xml = DATADIR + sep + 'W_I_8_page125.xml'
self.xml_file = DATADIR + sep + 'N_VII_1_page005.xml'
self.xml_fileB = DATADIR + sep + 'N_VII_1_page006.xml'
self.pdf_xml_source = DATADIR + sep + 'W_I_8_neu_125-01.svg'
self.test_tcm_xml = DATADIR + sep + 'N_VII_1_page001.xml'
self.test_manuscript = DATADIR + sep + 'N_VII_1.xml'
self.test_styles_color = DATADIR + sep + 'N_VII_1_page013.xml'
def test_Page(self):
page = Page(self.test_file)
self.assertEqual(page.title, 'Mp XIV 1')
self.assertEqual(page.number, '421')
self.assertEqual(len(page.sonderzeichen_list), 2)
self.assertEqual('st21' in page.sonderzeichen_list, True)
self.assertEqual('st23' in page.sonderzeichen_list, True)
self.assertEqual(page.style_dict['st0']['fill'], '#F8F9F8')
stage0 = [ key for key, value in page.fontsizekey2stage_mapping.items() if value == 0 ]
stage1 = [ key for key, value in page.fontsizekey2stage_mapping.items() if value == 1 ]
stage2 = [ key for key, value in page.fontsizekey2stage_mapping.items() if value == 2 ]
fontStage0 = float(page.style_dict.get(stage0[0]).get('font-size').replace('px',''))
fontStage1 = float(page.style_dict.get(stage1[0]).get('font-size').replace('px',''))
fontStage2 = float(page.style_dict.get(stage2[0]).get('font-size').replace('px',''))
self.assertEqual(fontStage0 > fontStage1, True)
self.assertEqual(fontStage1 > fontStage2, True)
page = Page.create_cls(self.test_tcm_xml, create_dummy_page=True)
self.assertEqual(page.number, '1')
def test_get_biggest_fontSize4styles(self):
page = Page(self.test_file)
style_set = { 'st12', 'st2', 'st14', 'st13' }
self.assertEqual(page.get_biggest_fontSize4styles(style_set=style_set), 10)
def test_get_words(self):
page = Page(self.test_file)
words = page.words
self.assertEqual(len(words), 440)
self.assertEqual(words[0].text, '$')
self.assertEqual(words[439].text, 'mußte!')
def test_update_page_type(self):
page = Page(self.pdf_xml)
tf = TranskriptionField(self.pdf_xml_source)
page.update_page_type(transkription_field=tf)
self.assertEqual(page.page_type, Page.PAGE_VERSO)
#page = Page(self.xml_fileB)
#page.update_page_type()
#self.assertEqual(page.page_type, Page.PAGE_RECTO)
def test_update_line_number_area(self):
page = Page(self.xml_file)
transkription_field = TranskriptionField(page.source)
page.update_line_number_area(transkription_field)
self.assertEqual(transkription_field.line_number_area_width > 0, True)
self.assertEqual(transkription_field.line_number_area_width < 15, True)
page = Page(self.xml_fileB)
transkription_field = TranskriptionField(page.source)
page.update_line_number_area(transkription_field)
self.assertEqual(transkription_field.line_number_area_width > 0, True)
self.assertEqual(transkription_field.line_number_area_width < 15, True)
def test_get_pages_from_xml_file(self):
pages = Page.get_pages_from_xml_file(self.test_manuscript)
self.assertEqual(len(pages), 4)
self.assertEqual(pages[0].number, '5')
self.assertEqual(pages[1].number, '6')
pages = Page.get_pages_from_xml_file(self.test_manuscript, status_contains=STATUS_MERGED_OK)
self.assertEqual(len(pages), 2)
self.assertEqual(pages[0].number, '5')
pages = Page.get_pages_from_xml_file(self.test_manuscript, status_contains=STATUS_MERGED_OK, status_not_contain=STATUS_POSTMERGED_OK)
self.assertEqual(len(pages), 1)
def test_get_semantic_dictionary(self):
dictionary = Page.get_semantic_dictionary()
#print(dictionary)
def test_update_styles(self):
page = Page(self.pdf_xml)
page.words = [ word for word in page.words if word.text == 'Schopenhauer' ]
page.update_styles(add_to_parents=True)
self.assertEqual(len(page.words[0].styles), 1)
self.assertEqual(page.words[0].styles[0].color.name, 'black')
self.assertEqual(page.words[0].styles[0].font, Style.NIETSCHES_FONTS['latin'])
self.assertEqual(page.words[0].styles[0].writing_instrument, Style.WRITING_INSTRUMENTS[('black',False)])
page = Page(self.test_styles_color)
page.words = [ word for word in page.words if word.text == 'Versöhnlichkeit' ]
page.update_styles(add_to_parents=True)
self.assertEqual(len(page.words[0].styles), 1)
self.assertEqual(page.words[0].styles[0].color.name, 'green')
self.assertEqual(page.words[0].styles[0].font, Style.NIETSCHES_FONTS['german'])
self.assertEqual(page.words[0].styles[0].writing_instrument, Style.WRITING_INSTRUMENTS[('green',False)])
self.assertEqual(page.words[0].styles[0].writing_process_id, WritingProcess.INSERTION_AND_ADDITION)
page = Page(self.test_styles_color)
page.words = [ word for word in page.words if word.text == 'Versöhnlichkeit' or word.text == 'gewisse' ]
self.assertEqual(len(page.words), 2)
word = page.words[0]
word.transkription_positions += page.words[1].transkription_positions
page.words = [ word ]
page.update_styles(add_to_parents=True, partition_according_to_styles=True)
self.assertEqual(len(page.words[0].word_parts), 2)
page = Page(self.test_styles_color)
page.update_styles(add_to_parents=True, create_css=True)
for word in page.words:
self.assertTrue(len(word.styles) > 0)
for style in word.styles:
self.assertTrue(len(style.css_styles) > 0)
-
+
def test_lock(self):
page = Page(self.test_tcm_xml)
self.assertEqual(page.is_locked(), False)
page.lock('asdf.txt')
self.assertEqual(page.is_locked(), True)
self.assertEqual(page.page_tree.xpath('//lock/reference-file/text()')[0], 'asdf.txt')
page.unlock()
self.assertEqual(page.is_locked(), False)
if __name__ == "__main__":
unittest.main()
Index: py2ttl/class_spec.py
===================================================================
--- py2ttl/class_spec.py (revision 103)
+++ py2ttl/class_spec.py (revision 104)
@@ -1,254 +1,255 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
""" This is an abstract class for all classes that are semantically relevant.
"""
# Copyright (C) University of Basel 2019 {{{1
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see 1}}}
__author__ = "Christian Steiner"
__maintainer__ = __author__
__copyright__ = 'University of Basel'
__email__ = "christian.steiner@unibas.ch"
__status__ = "Development"
__license__ = "GPL v3"
__version__ = "0.0.1"
import abc
import inspect
import warnings
class UnSemanticClass:
"""
Subclasses of this class are not semantically relevant, even if their superclasses are.
"""
pass
class SemanticClass(metaclass=abc.ABCMeta):
"""
This is an abstract class for all classes that are semantically relevant.
"""
HAS_PART = 'has_part'
HAS_SEQNUM = 'has_seqnum'
SINGLE_VALUE = 1
LIST = -99
CLASS_KEY = 'class'
CARDINALITY = "cardinality"
CARDINALITY_RESTRICTION = "cardinality_restriction"
HAS_HOMOTYPIC_PARTS_URL_STRING = 'http://www.nie.org/ontology/homotypic#hasHomotypicParts'
HAS_IMAGE = 'http://www.nie.org/ontology/nietzsche#hasImage'
+ HAS_URL = 'http://www.nie.org/ontology/nietzsche#hasUrl'
HOMOTYPIC_HAS_TEXT_URL_STRING = 'http://www.nie.org/ontology/homotypic#hasText'
STOFF_STYLE_HAS_CSS_URL_STRING = 'http://www.nie.org/ontology/standoff#styleHasCSS'
PAGE_IS_ON_TEXTFIELD = 'http://www.nie.org/ontology/nietzsche#pageIsOnTextField'
PROPERTY_NAME = "name"
PROPERTY_LABEL = "label"
PROPERTY_COMMENT = "comment"
PROPERTIES_KEY = "properties"
SUBCLASS_OF = "rdfs:subClassOf"
SUBPROPERTYOF = "subPropertyOf"
SUPER_CLASSES_DICT = { 'http://www.nie.org/ontology/homotypic': 'HomotypicEntity', 'http://www.nie.org/ontology/standoff': 'Style' }
SUPER_PROPERTY = "super_property"
THIS = "this"
TYPE = "type"
@classmethod
def create_semantic_property_dictionary(cls, property_key, class_type, cardinality=0, cardinality_restriction='cardinality', name='', label='', comment='', subPropertyOf='') -> dict:
"""Create a semantic property dicitonary.
Here is how to make a subproperty:
Pass the IRI of the super property as subPropertyOf=IRI,
be sure that base_uri of IRI (as key) and Class identifier of super class (as value) are in cls.SUPER_CLASSES_DICT,
then call cls.return_dictionary_after_updating_super_classes -> it will subclass the class that owns the subproperty
to the super class.
:return: semantic property dicitonary (dict)
"""
property_content = { SemanticClass.CLASS_KEY: class_type }
if cardinality > 0:
property_content.update({ SemanticClass.CARDINALITY: cardinality})
property_content.update({ SemanticClass.CARDINALITY_RESTRICTION: cardinality_restriction})
if name != '':
property_content.update({ SemanticClass.PROPERTY_NAME: name})
if label != '':
property_content.update({ SemanticClass.PROPERTY_LABEL: label})
if comment != '':
property_content.update({ SemanticClass.PROPERTY_COMMENT: comment})
if subPropertyOf != '':
property_content.update({ SemanticClass.SUBPROPERTYOF: subPropertyOf})
return { property_key: property_content }
@classmethod
def get_class_dictionary(cls):
"""Creates and returns a class_dictionary with the keys cls.THIS [, cls.SUBCLASS_OF, cls.TYPE].
"""
class_dict = {cls.THIS: cls }
if cls.__dict__.get('OWL_EQUIVALENTCLASSES') and len(cls.OWL_EQUIVALENTCLASSES) > 0:
class_dict.update({'owl:equivalentClass': cls.OWL_EQUIVALENTCLASSES })
if cls.__dict__.get('RDFS_SUBCLASSOF_LIST') and len(cls.RDFS_SUBCLASSOF_LIST) > 0:
class_dict.update({cls.SUBCLASS_OF: cls.RDFS_SUBCLASSOF_LIST })
direct_super_class = inspect.getclasstree([cls],unique=True)[0][0]
if issubclass(direct_super_class, SemanticClass) and direct_super_class != SemanticClass:
class_dict.update({cls.TYPE: direct_super_class})
return class_dict
def get_name_and_id(self):
"""Return an identification for object as 2-tuple.
"""
id = 0
if 'id' in self.__dict__.keys():
id = self.id
elif 'number' in self.__dict__.keys():
id = self.number
elif 'title' in self.__dict__.keys():
id = self.title.replace(' ', '_')
return type(self).__name__, id
def _get_list_of_type(self, list_type):
"""Return list of type == list_type if list is not empty.
"""
list_of_type = []
for object_list in [ list_obj for list_obj in self.__dict__.values()\
if type(list_obj) == list ]:
if len(object_list) > 0 and type(object_list[0]) == list_type:
return object_list
return list_of_type
def get_object_from_list_with_id(self, object_type, object_id):
"""Return object from list if object has id == object_id,
None if not found.
"""
list_with_object = [ item for item in self._get_list_of_type(object_type)\
if item.id == object_id ]
if len(list_with_object) > 0:
return list_with_object[0]
return None
@classmethod
def get_cls_hasPart_objectCls_dictionaries(cls, object_cls, xpath, object_seqnum_xpath=None, cardinality=0, cardinality_restriction='minCardinality'):
"""Return a dictionary containing the information for creating a class that can act
as an intermediary between cls and a number of object_cls if object_cls has
a position in a sequence of object_classes that belong to cls.
"""
part_name = object_cls.__name__ + 'Part'
has_part_name = object_cls.__name__.lower() + 'PartHas' + object_cls.__name__
has_seqnum_name = object_cls.__name__.lower() + 'HasSeqNum'
if object_seqnum_xpath is None:
object_seqnum_xpath = xpath + '/@id'
object_part_dictionary = { 'class': object_cls, 'cardinality': 1, 'xpath': xpath,\
'name': has_part_name, 'label': '{0} has a {1}'.format(part_name, object_cls.__name__),\
'comment': '{0} has a part, that is a {1}'.format(part_name, object_cls.__name__)}
object_seqnum_dictionary = { 'class': int, 'cardinality': 1, 'xpath': object_seqnum_xpath,\
'name': has_seqnum_name, 'label': '{0} has a sequence number'.format(part_name),\
'comment': '{0} has a part, that stands in a sequence with this number'.format(part_name, object_cls.__name__)}
object_dictionary = { 'class_name': part_name, SemanticClass.HAS_PART: object_part_dictionary, SemanticClass.HAS_SEQNUM: object_seqnum_dictionary,\
'label': '{0} part'.format(object_cls.__name__.lower()),\
'comment': 'This class servers as a intermediary between {0} and {1}. {0} has some {1} in a specific sequence.'.format(cls.__name__, object_cls.__name__)}
dictionary = { 'flag': 'ordered_list' , 'class': object_dictionary, 'cardinality': cardinality, 'cardinality_restriction': cardinality_restriction, 'xpath': xpath,\
'name': cls.__name__.lower() + 'Has' + part_name, 'label': '{0} has a part that connects it with a {1}'.format(cls.__name__, object_cls.__name__),\
'comment': '{0} has a part that connects it with a {1}, that has a position in a sequence of {1}'.format(cls.__name__, object_cls.__name__)}
return dictionary
@classmethod
@abc.abstractmethod
def get_semantic_dictionary(cls):
"""Creates a semantic dictionary with cls.CLASS_KEY and cls.PROPERTIES_KEY as its keys.
The class-key points to a class_dictionary with the keys: cls.THIS [, cls.SUBCLASS_OF, cls.TYPE].
Create initial dictionary using cls.get_class_dictionary():
dictionary = { cls.CLASS_KEY: cls.get_class_dictionary(), cls.PROPERTIES_KEY: {} }
The properties_key points to a properties_dictionary with semantically relevant keys
of self.__dict__ as keys. Use cls.create_semantic_property_dictionary(...) in order to
add a property dictionary for each property as follows:
dictionary[cls.PROPERTIES_KEY].update(cls.create_semantic_property_dictionary(property_key, ...))
Return dictionary by using:
cls.return_dictionary_after_updating_super_classes(dictionary)
"""
pass
def get_xml_conform_key_value_dictionary(self) -> dict:
"""Return a xml conform key value dictionary.
"""
property_d = self.get_semantic_dictionary()[self.PROPERTIES_KEY]
attachable, attachable_list, builtins, builtin_list = 'attachable', 'attachable-list', 'builtins', 'builtin-list'
xml_d = { attachable: {}, attachable_list: {}, builtins: {}, builtin_list: {}}
for key in property_d.keys():
value = self.__dict__.get(key)
if value is not None and (type(value) != list or len(value) > 0):
semantic_type = property_d[key][self.CLASS_KEY]\
if type(property_d[key]) is dict\
else property_d[key][0]
if type(value) != list and semantic_type.__module__ == builtins:
if semantic_type == bool:
xml_d[builtins].update({key.replace('_','-'): str(value).lower()})
else:
xml_d[builtins].update({key.replace('_','-'): str(value)})
elif semantic_type.__module__ != builtins:
attachable_key = attachable if type(value) != list else attachable_list
xml_d[attachable_key].update({key.replace('_','-'): value})
else:
xml_d[builtin_list].update({key.replace('_','-'): value})
return xml_d
@classmethod
def return_dictionary_after_updating_super_classes(cls, dictionary):
"""Return semantic dictionary after updating super classes if necessary.
"""
if cls.PROPERTIES_KEY not in dictionary.keys():
return dictionary
subproperty_base_uri_set = set( value.get(cls.SUBPROPERTYOF).split('#')[0]\
for value in dictionary[cls.PROPERTIES_KEY].values()\
if bool(value.get(cls.SUBPROPERTYOF)) )
for sub_property_base in subproperty_base_uri_set:
if bool(cls.SUPER_CLASSES_DICT.get(sub_property_base))\
and (\
cls.SUBCLASS_OF not in dictionary[cls.CLASS_KEY].keys()\
or len(dictionary[cls.CLASS_KEY][cls.SUBCLASS_OF]) == 0\
or len([ url for url in dictionary[cls.CLASS_KEY][cls.SUBCLASS_OF] if sub_property_base in url]) == 0\
# above instead of beneath, there might be more than one Class that share a sub_property_base.
#or sub_property_base + '#' + cls.SUPER_CLASSES_DICT.get(sub_property_base) not in dictionary[cls.CLASS_KEY][cls.SUBCLASS_OF]\
):
subclass_list = dictionary[cls.CLASS_KEY][cls.SUBCLASS_OF]\
if cls.SUBCLASS_OF in dictionary[cls.CLASS_KEY].keys()\
and len(dictionary[cls.CLASS_KEY].get(cls.SUBCLASS_OF)) > 0\
else []
subclass_list.append(sub_property_base + '#' + cls.SUPER_CLASSES_DICT.get(sub_property_base))
dictionary[cls.CLASS_KEY].update({cls.SUBCLASS_OF: subclass_list})
return dictionary
def __repr__(self) -> str:
"""Return a representation of all semantically relevant properties.
"""
data_string = self.__str__()
return f'<{data_string}>'
def __str__(self) -> str:
"""Return a str of all semantically relevant properties.
"""
name = type(self).__name__
data = []
for key in self.get_semantic_dictionary()[self.PROPERTIES_KEY].keys():
if key in self.__dict__.keys() and\
(self.__dict__[key] != None or
(type(self.__dict__[key]) == list and len(self.__dict__[key]) > 0)):
data.append(f'{key}: {self.__dict__[key]}')
data_string = ', '.join(data)
return f'{name} {data_string}'
Index: tests_py2ttl/test_data/mapping_dict.xml
===================================================================
--- tests_py2ttl/test_data/mapping_dict.xml (revision 103)
+++ tests_py2ttl/test_data/mapping_dict.xml (revision 104)
@@ -1,352 +1,355 @@
tln
http://www.nie.org/ontology/nietzsche#
./tln-ontology_autogenerated.ttl
http://www.nie.org/ontology/nietzsche#ArchivalManuscriptUnity
http://www.nie.org/ontology/nietzsche#hasTitle
http://www.nie.org/ontology/nietzsche#hasManuscriptType
http://www.nie.org/ontology/nietzsche#hasStyles
http://www.nie.org/ontology/nietzsche#hasPages
http://www.nie.org/ontology/nietzsche#hasDescription
http://www.nie.org/ontology/nietzsche#hasEarlierDescriptions
http://www.nie.org/ontology/nietzsche#EditorComment
http://www.nie.org/ontology/nietzsche#isUncertain
http://www.nie.org/ontology/nietzsche#hasComment
http://www.nie.org/ontology/nietzsche#AtypicalWriting
http://www.nie.org/ontology/nietzsche#isUncertain
http://www.nie.org/ontology/nietzsche#hasComment
http://www.nie.org/ontology/nietzsche#atypicalWritingHasText
http://www.nie.org/ontology/nietzsche#Path
http://www.nie.org/ontology/nietzsche#hasDAttribute
http://www.nie.org/ontology/nietzsche#Box
http://www.nie.org/ontology/nietzsche#hasDAttribute
http://www.nie.org/ontology/nietzsche#hasEarlierText
http://www.nie.org/ontology/nietzsche#Clarification
http://www.nie.org/ontology/nietzsche#isUncertain
http://www.nie.org/ontology/nietzsche#hasComment
http://www.nie.org/ontology/nietzsche#clarificationHasText
http://www.nie.org/ontology/nietzsche#Color
http://www.nie.org/ontology/nietzsche#colorHasName
http://www.nie.org/ontology/nietzsche#hasHexadecimalValue
http://www.nie.org/ontology/nietzsche#Text
http://www.nie.org/ontology/nietzsche#textHasContent
http://www.nie.org/ontology/nietzsche#textHasMarkup
http://www.nie.org/ontology/nietzsche#Description
http://www.nie.org/ontology/nietzsche#textHasContent
http://www.nie.org/ontology/nietzsche#textHasMarkup
http://www.nie.org/ontology/nietzsche#EarlierDescription
http://www.nie.org/ontology/nietzsche#textHasContent
http://www.nie.org/ontology/nietzsche#hasAuthor
http://www.nie.org/ontology/nietzsche#hasCitation
http://www.nie.org/ontology/nietzsche#textHasMarkup
http://www.nie.org/ontology/nietzsche#EditorCorrection
http://www.nie.org/ontology/nietzsche#isUncertain
http://www.nie.org/ontology/nietzsche#hasComment
http://www.nie.org/ontology/nietzsche#hasCorrectionText
http://www.nie.org/ontology/nietzsche#Image
http://www.nie.org/ontology/nietzsche#hasHeight
http://www.nie.org/ontology/nietzsche#hasWidth
http://www.nie.org/ontology/nietzsche#hasFileName
http://www.nie.org/ontology/nietzsche#hasTransform
- http://www.nie.org/ontology/nietzsche#hasUrl
+ http://www.nie.org/ontology/nietzsche#hasPrimaryurl
+ http://www.nie.org/ontology/nietzsche#hasSecondaryurl
http://www.nie.org/ontology/nietzsche#hasTextField
http://www.nie.org/ontology/nietzsche#FaksimileImage
http://www.nie.org/ontology/nietzsche#hasHeight
http://www.nie.org/ontology/nietzsche#hasWidth
http://www.nie.org/ontology/nietzsche#hasFileName
http://www.nie.org/ontology/nietzsche#hasTransform
- http://www.nie.org/ontology/nietzsche#hasUrl
+ http://www.nie.org/ontology/nietzsche#hasPrimaryurl
+ http://www.nie.org/ontology/nietzsche#hasSecondaryurl
+ http://www.nie.org/ontology/nietzsche#hasApiurl
+ http://www.nie.org/ontology/nietzsche#hasThumburl
+ http://www.nie.org/ontology/nietzsche#hasMediumurl
http://www.nie.org/ontology/nietzsche#hasTextField
http://www.nie.org/ontology/nietzsche#PositionalObject
http://www.nie.org/ontology/nietzsche#hasHeight
http://www.nie.org/ontology/nietzsche#hasWidth
http://www.nie.org/ontology/nietzsche#hasLeft
http://www.nie.org/ontology/nietzsche#hasTop
http://www.nie.org/ontology/nietzsche#hasBottom
- http://www.nie.org/ontology/nietzsche#hasTransform
+ http://www.nie.org/ontology/nietzsche#hasTransform
http://www.nie.org/ontology/nietzsche#WordPosition
http://www.nie.org/ontology/nietzsche#hasHeight
http://www.nie.org/ontology/nietzsche#hasWidth
http://www.nie.org/ontology/nietzsche#hasLeft
http://www.nie.org/ontology/nietzsche#hasTop
http://www.nie.org/ontology/nietzsche#hasBottom
- http://www.nie.org/ontology/nietzsche#hasTransform
+ http://www.nie.org/ontology/nietzsche#hasTransform
http://www.nie.org/ontology/nietzsche#FaksimilePosition
http://www.nie.org/ontology/nietzsche#hasHeight
http://www.nie.org/ontology/nietzsche#hasWidth
http://www.nie.org/ontology/nietzsche#hasLeft
http://www.nie.org/ontology/nietzsche#hasTop
http://www.nie.org/ontology/nietzsche#hasBottom
- http://www.nie.org/ontology/nietzsche#hasTransform
- http://www.nie.org/ontology/nietzsche#isOnFaksimileImage
- http://www.nie.org/ontology/nietzsche#isOnTextField
+ http://www.nie.org/ontology/nietzsche#hasTransform
http://www.nie.org/ontology/nietzsche#Line
http://www.nie.org/ontology/nietzsche#lineHasNumber
http://www.nie.org/ontology/nietzsche#lineHasBottomValueOnTranskription
http://www.nie.org/ontology/nietzsche#lineHasTopValueOnTranskription
http://www.nie.org/ontology/nietzsche#lineHasInnerBottomValueOnFaksimile
http://www.nie.org/ontology/nietzsche#lineHasInnerTopValueOnFaksimile
http://www.nie.org/ontology/nietzsche#lineHasOuterBottomValueOnFaksimile
http://www.nie.org/ontology/nietzsche#lineHasOuterTopValueOnFaksimile
http://www.nie.org/ontology/nietzsche#isMainLine
http://www.nie.org/ontology/nietzsche#lineHasEditorComment
http://www.nie.org/ontology/nietzsche#LineContinuation
http://www.nie.org/ontology/nietzsche#isUncertain
http://www.nie.org/ontology/nietzsche#hasComment
http://www.nie.org/ontology/nietzsche#isLineAContinuationTo
http://www.nie.org/ontology/nietzsche#lineContinuationHasReference
http://www.nie.org/ontology/nietzsche#SimpleWord
http://www.nie.org/ontology/nietzsche#hasText
http://www.nie.org/ontology/nietzsche#wordBelongsToLine
http://www.nie.org/ontology/nietzsche#hasTranskriptionPosition
http://www.nie.org/ontology/nietzsche#hasFaksimilePosition
http://www.nie.org/ontology/nietzsche#SpecialWord
http://www.nie.org/ontology/nietzsche#hasText
http://www.nie.org/ontology/nietzsche#wordBelongsToLine
http://www.nie.org/ontology/nietzsche#hasTranskriptionPosition
http://www.nie.org/ontology/nietzsche#hasFaksimilePosition
http://www.nie.org/ontology/nietzsche#MarkForeignHands
http://www.nie.org/ontology/nietzsche#hasText
http://www.nie.org/ontology/nietzsche#textOfForeignHands
http://www.nie.org/ontology/nietzsche#penOfForeignHands
http://www.nie.org/ontology/nietzsche#wordBelongsToLine
http://www.nie.org/ontology/nietzsche#hasTranskriptionPosition
http://www.nie.org/ontology/nietzsche#hasFaksimilePosition
http://www.nie.org/ontology/nietzsche#Page
http://www.nie.org/ontology/nietzsche#hasNumber
http://www.nie.org/ontology/nietzsche#hasOrientation
http://www.nie.org/ontology/nietzsche#hasLines
http://www.nie.org/ontology/nietzsche#hasMarkForeignHands
http://www.nie.org/ontology/nietzsche#hasWords
http://www.nie.org/ontology/nietzsche#hasWordDeletionPaths
http://www.nie.org/ontology/nietzsche#hasWordInsertionMarks
http://www.nie.org/ontology/nietzsche#hasFaksimileImage
http://www.nie.org/ontology/nietzsche#pageIsOnSVGTextField
http://www.nie.org/ontology/nietzsche#pageIsOnFaksimileTextField
http://www.nie.org/ontology/nietzsche#hasSvgImage
http://www.nie.org/ontology/nietzsche#Reference
http://www.nie.org/ontology/nietzsche#firstLineOfReference
http://www.nie.org/ontology/nietzsche#lastLineOfReference
http://www.nie.org/ontology/nietzsche#wordReference
http://www.nie.org/ontology/nietzsche#IsUncertain
http://www.nie.org/ontology/nietzsche#hasTitle
http://www.nie.org/ontology/nietzsche#hasPageNumber
http://www.nie.org/ontology/nietzsche#SVGImage
http://www.nie.org/ontology/nietzsche#hasHeight
http://www.nie.org/ontology/nietzsche#hasWidth
http://www.nie.org/ontology/nietzsche#hasFileName
http://www.nie.org/ontology/nietzsche#hasTransform
- http://www.nie.org/ontology/nietzsche#hasUrl
+ http://www.nie.org/ontology/nietzsche#hasPrimaryurl
+ http://www.nie.org/ontology/nietzsche#hasSecondaryurl
http://www.nie.org/ontology/nietzsche#hasTextField
http://www.nie.org/ontology/nietzsche#StandoffTag
http://www.nie.org/ontology/nietzsche#standoffTagHasStartIndex
http://www.nie.org/ontology/nietzsche#standoffTagHasEndIndex
http://www.nie.org/ontology/nietzsche#standoffTagHasCSS
http://www.nie.org/ontology/nietzsche#TextConnectionMark
http://www.nie.org/ontology/nietzsche#hasText
http://www.nie.org/ontology/nietzsche#wordBelongsToLine
http://www.nie.org/ontology/nietzsche#hasTranskriptionPosition
http://www.nie.org/ontology/nietzsche#hasFaksimilePosition
http://www.nie.org/ontology/nietzsche#textConnectionMarkHasTextSource
http://www.nie.org/ontology/nietzsche#TextField
http://www.nie.org/ontology/nietzsche#hasHeight
http://www.nie.org/ontology/nietzsche#hasWidth
http://www.nie.org/ontology/nietzsche#hasLeft
http://www.nie.org/ontology/nietzsche#hasTop
http://www.nie.org/ontology/nietzsche#hasBottom
- http://www.nie.org/ontology/nietzsche#hasTransform
+ http://www.nie.org/ontology/nietzsche#hasTransform
http://www.nie.org/ontology/nietzsche#TranskriptionPosition
http://www.nie.org/ontology/nietzsche#hasHeight
http://www.nie.org/ontology/nietzsche#hasWidth
http://www.nie.org/ontology/nietzsche#hasLeft
http://www.nie.org/ontology/nietzsche#hasTop
http://www.nie.org/ontology/nietzsche#hasBottom
- http://www.nie.org/ontology/nietzsche#hasTransform
- http://www.nie.org/ontology/nietzsche#isOnSvgImage
+ http://www.nie.org/ontology/nietzsche#hasTransform
http://www.nie.org/ontology/nietzsche#UncertainDecipherment
http://www.nie.org/ontology/nietzsche#isUncertain
http://www.nie.org/ontology/nietzsche#hasComment
http://www.nie.org/ontology/nietzsche#Word
http://www.nie.org/ontology/nietzsche#hasText
http://www.nie.org/ontology/nietzsche#hasEditedText
http://www.nie.org/ontology/nietzsche#wordHasWordParts
http://www.nie.org/ontology/nietzsche#wordBelongsToLine
http://www.nie.org/ontology/nietzsche#hasTranskriptionPosition
http://www.nie.org/ontology/nietzsche#hasFaksimilePosition
http://www.nie.org/ontology/nietzsche#wordHasStyle
http://www.nie.org/ontology/nietzsche#overwritesWord
http://www.nie.org/ontology/nietzsche#isTransformationOfWord
http://www.nie.org/ontology/nietzsche#isExtensionOfWord
http://www.nie.org/ontology/nietzsche#isDeletionOfWord
http://www.nie.org/ontology/nietzsche#isClarificationOfWord
http://www.nie.org/ontology/nietzsche#wordHasEarlierVersion
http://www.nie.org/ontology/nietzsche#wordHasCorrection
http://www.nie.org/ontology/nietzsche#wordIsDeletedByPath
http://www.nie.org/ontology/nietzsche#wordHasEditorComment
http://www.nie.org/ontology/nietzsche#WordDeletionPath
http://www.nie.org/ontology/nietzsche#hasDAttribute
http://www.nie.org/ontology/nietzsche#WordInsertionMark
http://www.nie.org/ontology/nietzsche#hasHeight
http://www.nie.org/ontology/nietzsche#hasWidth
http://www.nie.org/ontology/nietzsche#hasLeft
http://www.nie.org/ontology/nietzsche#hasTop
http://www.nie.org/ontology/nietzsche#hasBottom
- http://www.nie.org/ontology/nietzsche#hasTransform
+ http://www.nie.org/ontology/nietzsche#hasTransform
http://www.nie.org/ontology/nietzsche#hasMarkType
http://www.nie.org/ontology/nietzsche#hasSymbolId
http://www.nie.org/ontology/nietzsche#hasNextWord
http://www.nie.org/ontology/nietzsche#hasPreviousWord
http://www.nie.org/ontology/nietzsche#wordInsertionMarkBelongsToLine
xml-dictionary
- 2020-11-11 15:04:42
+ 2020-12-07 10:55:12