Index: svgscripts/datatypes/line.py
===================================================================
--- svgscripts/datatypes/line.py (revision 95)
+++ svgscripts/datatypes/line.py (revision 96)
@@ -1,108 +1,124 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
""" This class can be used to represent a line.
"""
# Copyright (C) University of Basel 2019 {{{1
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see 1}}}
__author__ = "Christian Steiner"
__maintainer__ = __author__
__copyright__ = 'University of Basel'
__email__ = "christian.steiner@unibas.ch"
__status__ = "Development"
__license__ = "GPL v3"
__version__ = "0.0.1"
import re
from lxml import etree as ET
from os.path import isfile
import sys
+from .attachable_object import AttachableObject
from .matrix import Matrix
+from .editor_comment import EditorComment
sys.path.append('py2ttl')
from class_spec import SemanticClass
+from xml_conform_dictionary import XMLConformDictionary
-class Line(SemanticClass):
+class Line(AttachableObject,SemanticClass):
"""
This class represents a line on a page.
@label a line on a page
Args:
id (int)
bottom (float)
top (float)
"""
XML_TAG = 'line-number'
WARN_NO_LINE_NUMBER = 'No line number found'
def __init__(self, id=0, bottom=0.0, top=0.0, faksimile_outer_bottom=0.0, faksimile_outer_top=0.0, faksimile_inner_bottom=0.0, faksimile_inner_top=0.0):
self.id = id
self.is_even = self.id % 2 == 0
self.bottom = bottom
+ self.editor_comment = None
self.top = top
self.faksimile_outer_bottom = faksimile_outer_bottom
self.faksimile_outer_top = faksimile_outer_top
self.faksimile_inner_bottom = faksimile_inner_bottom
self.faksimile_inner_top = faksimile_inner_top
+ def attach_object_to_tree(self, target_tree):
+ """Attach object to tree.
+ """
+ obj_node = self.get_or_create_node_with_id(target_tree)
+ xml_conform_dictionary = XMLConformDictionary.create_cls_from_data_object(self)
+ xml_conform_dictionary.attach_data_to_tree(obj_node)
+
@classmethod
def create_cls_from_node(cls, node):
"""Create a cls from node.
"""
id = int(node.get('id')) if bool(node.get('id')) else 0
bottom = float(node.get('bottom')) if bool(node.get('bottom')) else 0.0
top = float(node.get('top')) if bool(node.get('top')) else 0.0
faksimile_outer_bottom = float(node.get('faksimile-outer-bottom')) if bool(node.get('faksimile-outer-bottom')) else 0.0
faksimile_outer_top = float(node.get('faksimile-outer-top')) if bool(node.get('faksimile-outer-top')) else 0.0
faksimile_inner_bottom = float(node.get('faksimile-inner-bottom')) if bool(node.get('faksimile-inner-bottom')) else 0.0
faksimile_inner_top = float(node.get('faksimile-inner-top')) if bool(node.get('faksimile-inner-top')) else 0.0
- return cls(id=id, bottom=bottom, top=top, faksimile_inner_bottom=faksimile_inner_bottom, faksimile_inner_top=faksimile_inner_top,\
+ line = cls(id=id, bottom=bottom, top=top, faksimile_inner_bottom=faksimile_inner_bottom, faksimile_inner_top=faksimile_inner_top,\
faksimile_outer_top=faksimile_outer_top, faksimile_outer_bottom=faksimile_outer_bottom)
+ line.editor_comment = [ EditorComment.create_cls_from_node(enode) for enode in node.xpath('./' + EditorComment.XML_TAG) ][0]\
+ if len([ enode for enode in node.xpath('./' + EditorComment.XML_TAG) ]) > 0 else None
+ return line
@classmethod
def get_semantic_dictionary(cls) -> dict:
""" Creates and returns a semantic dictionary as specified by SemanticClass.
"""
dictionary = {}
class_dict = cls.get_class_dictionary()
properties = { 'id': { cls.CLASS_KEY: int, 'cardinality': 1,\
cls.PROPERTY_NAME: 'lineHasNumber', cls.PROPERTY_LABEL: 'line has number',\
cls.PROPERTY_COMMENT: 'Relating a line to the number it has.'}}
+ properties.update(cls.create_semantic_property_dictionary('editor_comment', EditorComment,\
+ name='lineHasEditorComment'))
properties.update(cls.create_semantic_property_dictionary('bottom', float, cardinality=1,\
name='lineHasBottomValueOnTranskription'))
properties.update(cls.create_semantic_property_dictionary('top', float, cardinality=1,\
name='lineHasTopValueOnTranskription'))
properties.update(cls.create_semantic_property_dictionary('faksimile_inner_bottom', float, cardinality=1,\
name='lineHasInnerBottomValueOnFaksimile', label='line has a top position on the faksimile image',\
comment='This is the top position of the area, in which the words belong to this line.'))
properties.update(cls.create_semantic_property_dictionary('faksimile_inner_top', float, cardinality=1,\
name='lineHasInnerTopValueOnFaksimile', label='line has a bottom position on the faksimile image',\
comment='This is the bottom position of the area, in which the words belong to this line.'))
properties.update(cls.create_semantic_property_dictionary('faksimile_outer_bottom', float, cardinality=1,\
name='lineHasOuterBottomValueOnFaksimile', label='line has a top position next to the faksimile image',\
comment='This is the top position of line outside of the faksimile image.'))
properties.update(cls.create_semantic_property_dictionary('faksimile_outer_top', float, cardinality=1,\
name='lineHasOuterTopValueOnFaksimile', label='line has a bottom position next to the faksimile image',\
comment='This is the bottom position of line outside of the faksimile image.'))
properties.update(cls.create_semantic_property_dictionary('is_even', bool, cardinality=1,\
name='isMainLine', label='whether or not line is a main line',\
comment='Indicates whether or not line is a main line. Lines that are not main lines, contain later inserted words.'))
dictionary.update({cls.CLASS_KEY: class_dict})
dictionary.update({cls.PROPERTIES_KEY: properties})
return cls.return_dictionary_after_updating_super_classes(dictionary)
Index: svgscripts/datatypes/matrix.py
===================================================================
--- svgscripts/datatypes/matrix.py (revision 95)
+++ svgscripts/datatypes/matrix.py (revision 96)
@@ -1,307 +1,323 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
""" This class can be used to transform a svg/text[@transform] matrix-string into a matrix representation.
"""
# Copyright (C) University of Basel 2019 {{{1
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see 1}}}
__author__ = "Christian Steiner"
__maintainer__ = __author__
__copyright__ = 'University of Basel'
__email__ = "christian.steiner@unibas.ch"
__status__ = "Development"
__license__ = "GPL v3"
__version__ = "0.0.1"
import re
import math
class Matrix:
"""
This class transforms a svg @transform matrix-string into a matrix representation.
Args:
transform_matrix_string (str) string of the form 'matrix(1.0 0.0 0.0 1.0 0.0 0.0)' or 'rotate(10)'
"""
A = 0
B = 1
C = 2
D = 3
E = 4
F = 5
XINDEX = 4
YINDEX = 5
MATRIX_LENGTH = 6
DOWN = 1
STRAIGHT = 0
UP = -1
def __init__(self, transform_matrix_string=None, transkription_field=None, matrix_list=[]):
self.matrix = [ 0.0 for i in range(Matrix.MATRIX_LENGTH) ] if len(matrix_list) < 6 else matrix_list
if transform_matrix_string is not None:
m = re.search('(?<=rotate\()[-]*[0-9]+', transform_matrix_string)
if m is not None: # transform='rotate(a)' to transform='matrix(cos(a), sin(a), -sin(a), cos(a), 0, 0)'
angle = float(m.group(0))
self.matrix[Matrix.A] = round(math.cos(math.radians(angle)), 3)
self.matrix[Matrix.B] = round(math.sin(math.radians(angle)), 3)
self.matrix[Matrix.C] = round(math.sin(math.radians(angle))*-1, 3)
self.matrix[Matrix.D] = round(math.cos(math.radians(angle)), 3)
self.matrix[Matrix.E] = 0
self.matrix[Matrix.F] = 0
elif re.search(r'matrix\(\s*([-]*\d+(\.\d+(e-\d+)*)*[,\s][\s]*){5}[-]*\d+(\.\d+)*.*\s*\)', transform_matrix_string):
#elif re.search(r'matrix\(\s*([-]*[0-9].*\s){5}[-]*[0-9].*\s*\)', transform_matrix_string):
# old-> does not include comma separated matrix string
self.matrix = [ float(i) for i in transform_matrix_string.replace('matrix(','').\
replace(', ', ',').replace(',', ' ').replace(')','').split(' ') ]
else:
raise Exception('Error: string "{}" is not a valid transform matrix string!'.format(transform_matrix_string))
if transkription_field is not None:
self.matrix[Matrix.XINDEX] -= transkription_field.xmin
self.matrix[Matrix.YINDEX] -= transkription_field.ymin
if(len(self.matrix) < Matrix.MATRIX_LENGTH):
raise Exception('Error: string "{}" is not a valid matrix string!'.format(transform_matrix_string))
def add2X(self, add_to_x=0):
"""Return x-value of matrix (float) + add_to_x.
"""
return self.matrix[Matrix.XINDEX] + float(add_to_x)
def add2Y(self, add_to_y=0):
"""Return y-value of matrix (float) + add_to_y.
"""
return self.matrix[Matrix.YINDEX] + float(add_to_y)
def getX(self):
"""Return x-value of matrix (float).
"""
return self.matrix[Matrix.XINDEX]
def getY(self):
"""Return y-value of matrix (float).
"""
return self.matrix[Matrix.YINDEX]
def is_matrix_horizontal(self):
"""Returns whether matrix is horizontal.
[:return:] True/False
"""
return self.matrix[Matrix.A] == 1 and self.matrix[Matrix.B] == 0 and self.matrix[Matrix.C] == 0 and self.matrix[Matrix.D] == 1
def get_new_x(self, x=0.0, y=0.0):
"""Returns new position of x.
:return: (float) x
"""
top_left_x = x - self.matrix[self.E] if x != 0.0 else 0.0
top_left_y = y - self.matrix[self.F] if y != 0.0 else 0.0
return self.matrix[Matrix.A] * top_left_x + self.matrix[Matrix.C] * top_left_y + self.matrix[self.E]
def get_new_y(self, x=0.0, y=0.0):
"""Returns new position of y.
:return: (float) y
"""
top_left_x = x - self.matrix[self.E] if x != 0.0 else 0.0
top_left_y = y - self.matrix[self.F] if y != 0.0 else 0.0
return self.matrix[Matrix.B] * top_left_x + self.matrix[Matrix.D] * top_left_y + self.matrix[self.F]
def get_old_x(self, x=0.0, y=0.0):
"""Returns old position of x.
:return: (float) x
"""
old_x = (self.matrix[self.D]*x - self.matrix[Matrix.D]*self.matrix[Matrix.E] - self.matrix[Matrix.C]*y + self.matrix[Matrix.C]*self.matrix[Matrix.F])\
/(self.matrix[Matrix.A]*self.matrix[Matrix.D] - self.matrix[Matrix.B]*self.matrix[Matrix.C])
return self.add2X(old_x)
def get_transformed_positions(self, x=0.0, y=0.0, width=0.0, height=0.0):
"""Returns transformed x, y, width and height.
"""
top_left_x = x
top_left_y = y
top_right_x = x + width
top_right_y = y
bottom_left_x = x
bottom_left_y = y + height
bottom_right_x = x + width
bottom_right_y = y + height
new_x = self.matrix[Matrix.A] * top_left_x + self.matrix[Matrix.C] * top_left_y + self.matrix[self.E]
new_y = self.matrix[Matrix.B] * top_left_x + self.matrix[Matrix.D] * top_left_y + self.matrix[self.F]
new_top_right_x = self.matrix[Matrix.A] * top_right_x + self.matrix[Matrix.C] * top_right_y + self.matrix[self.E]
new_top_right_y = self.matrix[Matrix.B] * top_right_x + self.matrix[Matrix.D] * top_right_y + self.matrix[self.F]
new_bottom_left_x = self.matrix[Matrix.A] * bottom_left_x + self.matrix[Matrix.C] * bottom_left_y + self.matrix[self.E]
new_bottom_left_y = self.matrix[Matrix.B] * bottom_left_x + self.matrix[Matrix.D] * bottom_left_y + self.matrix[self.F]
new_bottom_right_x = self.matrix[Matrix.A] * bottom_right_x + self.matrix[Matrix.C] * bottom_right_y + self.matrix[self.E]
new_bottom_right_y = self.matrix[Matrix.B] * bottom_right_x + self.matrix[Matrix.D] * bottom_right_y + self.matrix[self.F]
new_width = abs(new_top_right_x - new_x)\
if abs(new_top_right_x - new_x) >= abs(new_bottom_right_x - new_bottom_left_x)\
else abs(new_bottom_right_x - new_bottom_left_x)
new_height = abs(new_bottom_left_y - new_y)\
if abs(new_bottom_left_y - new_y) >= abs(new_top_right_y - new_bottom_right_y)\
else abs(new_top_right_y - new_bottom_right_y)
return new_x, new_y, new_width, new_height
def clone_transformation_matrix(self):
"""Returns a matrix that contains only the transformation part.
[:return:] (Matrix) a clone of this matrix
"""
return Matrix(matrix_list=self.matrix[0:4]+[0,0])
def isRotationMatrix(self):
"""Return whether matrix is a rotation matrix.
"""
return self.matrix[Matrix.A] < 1 or self.matrix[Matrix.B] != 0
def toCSSTransformString(self):
"""Returns the CSS3 transform string: 'rotate(Xdeg)' where X is the angle.
"""
angle = 0
if self.isRotationMatrix():
angle = int(round(math.degrees(math.asin(self.matrix[Matrix.B])), 0))
if angle == 0:
angle = int(round(math.degrees(math.acos(self.matrix[Matrix.A])), 0))
return 'rotate({}deg)'.format(angle)
def toString(self):
"""Returns a transform_matrix_string representation of the matrix.
[:returns:] (str) 'matrix(X X X X X X)'
"""
return 'matrix(' + ' '.join([ str(round(x, 5)) for x in self.matrix ]) + ')'
def get_rotation_direction(self):
"""Get rotation direction of rotation matrix.
[:return:] (int) direction code Matrix.UP, Matrix.STRAIGHT, Matrix.DOWN
"""
if not self.isRotationMatrix():
return self.STRAIGHT
else:
angle = int(round(math.degrees(math.asin(self.matrix[Matrix.B])), 0))
return self.UP if angle < 0 else self.DOWN
@staticmethod
- def IS_IN_FOOTNOTE_AREA(transform_matrix_string, transkription_field):
+ def IS_IN_FOOTNOTE_AREA(transform_matrix_string, transkription_field, x=0.0):
"""Returns true if matrix specifies a position that is part of the footnote area.
text_node (lxml.etree.Element)
transkription_field (datatypes.transkription_field.TranskriptionField)
"""
matrix = Matrix(transform_matrix_string=transform_matrix_string)
if matrix.getY() < transkription_field.ymax:
return False
- is_part = matrix.getX() > transkription_field.xmin\
+ is_part = matrix.getX() + x > transkription_field.xmin\
if transkription_field.is_page_verso()\
- else matrix.getX() > transkription_field.documentWidth/4
+ else matrix.getX() + x > transkription_field.documentWidth/4
+ return is_part
+
+ @staticmethod
+ def NODE_HAS_CONTENT_IN_FOOTNOTE_AREA(node, transkription_field):
+ """Returns true if matrix specifies a position that is part of the footnote area.
+
+ text_node (lxml.etree.Element)
+ transkription_field (datatypes.transkription_field.TranskriptionField)
+ """
+ matrix = Matrix(transform_matrix_string=node.get('transform'))
+ if matrix.getY() < transkription_field.ymax:
+ return False
+ x = sorted([ float(x.get('x')) for x in node.getchildren()])[-1]\
+ if len(node.getchildren()) > 0 else 0.0
+ is_part = matrix.getX() + x > transkription_field.xmin\
+ if transkription_field.is_page_verso()\
+ else matrix.getX() + x > transkription_field.documentWidth/4
return is_part
@staticmethod
def IS_IN_MARGIN_FIELD(transform_matrix_string, transkription_field):
"""Returns true if matrix specifies a position that is part of the margin field.
text_node (lxml.etree.Element)
transkription_field (datatypes.transkription_field.TranskriptionField)
"""
line_number_area_width = 15\
if transkription_field.line_number_area_width == 0.0\
else transkription_field.line_number_area_width
matrix = Matrix(transform_matrix_string=transform_matrix_string)
if matrix.getY() < transkription_field.ymin or matrix.getY() > transkription_field.ymax:
return False
is_part = matrix.getX() < transkription_field.xmin - line_number_area_width\
if transkription_field.is_page_verso()\
else matrix.getX() > transkription_field.xmax + line_number_area_width
return is_part
@staticmethod
def IS_IN_PLACE_OF_PRINTING_AREA(transform_matrix_string, transkription_field):
"""Returns true if matrix specifies a position that is part of the area where the places of printing ('Druckorte') are printed.
text_node (lxml.etree.Element)
transkription_field (datatypes.transkription_field.TranskriptionField)
"""
matrix = Matrix(transform_matrix_string=transform_matrix_string)
if matrix.getY() < transkription_field.ymax:
return False
is_part = matrix.getX() < transkription_field.xmin\
if transkription_field.is_page_verso()\
else matrix.getX() < transkription_field.documentWidth/4
return is_part
@staticmethod
def IS_PART_OF_TRANSKRIPTION_FIELD(transkription_field, text_node=None, matrix=None):
"""Returns true if matrix specifies a position that is part of transkription field.
text_node (lxml.etree.Element)
transkription_field (datatypes.transkription_field.TranskriptionField)
"""
if matrix is None and not bool(text_node.get('transform')):
return False
if matrix is None:
matrix = Matrix(transform_matrix_string=text_node.get('transform'))
is_part = matrix.getX() > transkription_field.xmin and matrix.getX() < transkription_field.xmax\
and matrix.getY() > transkription_field.ymin and matrix.getY() < transkription_field.ymax
if not is_part and matrix.isRotationMatrix() and len([child.text for child in text_node.getchildren() if not re.match(r'^\s*$', child.text)]) > 0:
first_tspan_node = [ child for child in text_node.getchildren() if not re.match(r'^\s*$', child.text)][0]
x = matrix.add2X(float(first_tspan_node.get('x')))
y = matrix.add2Y(float(first_tspan_node.get('y')))
new_x = matrix.get_new_x(x=x, y=y)
new_y = matrix.get_new_y(x=x, y=y)
return new_x > transkription_field.xmin and new_x < transkription_field.xmax\
and new_y > transkription_field.ymin and new_y < transkription_field.ymax
return is_part
@staticmethod
def IS_NEARX_TRANSKRIPTION_FIELD(transform_matrix_string, transkription_field, diffx=20.0):
"""Returns true if matrix specifies a position that is on its x axis near the transkription_field.
transform_matrix_string (str): string from which to init Matrix.
transkription_field (svgscripts.TranskriptionField)
diffx (float): defines threshold for positions that count as near.
"""
matrix = Matrix(transform_matrix_string=transform_matrix_string)
MINLEFT = transkription_field.xmin - diffx
MAXRIGHT = transkription_field.xmax + diffx
return matrix.getY() > transkription_field.ymin and matrix.getY() < transkription_field.ymax\
and ((matrix.getX() > MINLEFT and matrix.getX() < transkription_field.xmin)\
or (matrix.getX() > transkription_field.xmax and matrix.getX() < MAXRIGHT))
@staticmethod
def DO_CONVERSION_FACTORS_DIFFER(matrix_a, matrix_b, diff_threshold=0.001):
"""Returns whether the conversion factors (a-d) differ more than diff_threshold.
"""
if matrix_a is None or matrix_b is None:
return not (matrix_a is None and matrix_b is None)
return abs(matrix_a.matrix[Matrix.A] - matrix_b.matrix[Matrix.A]) > diff_threshold\
or abs(matrix_a.matrix[Matrix.B] - matrix_b.matrix[Matrix.B]) > diff_threshold\
or abs(matrix_a.matrix[Matrix.C] - matrix_b.matrix[Matrix.C]) > diff_threshold\
or abs(matrix_a.matrix[Matrix.D] - matrix_b.matrix[Matrix.D]) > diff_threshold
-
def __eq__(self, other):
"""Return self.matrix == other.matrix.
"""
if other is None:
return False
return self.matrix == other.matrix
def __hash__(self):
"""Return hash value.
"""
return hash((self.matrix[Matrix.E], self.matrix[Matrix.F]))
Index: svgscripts/datatypes/editor_comment.py
===================================================================
--- svgscripts/datatypes/editor_comment.py (revision 0)
+++ svgscripts/datatypes/editor_comment.py (revision 96)
@@ -0,0 +1,80 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+
+""" This class can be used to represent a comment by the editors.
+"""
+# Copyright (C) University of Basel 2020 {{{1
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program. If not, see 1}}}
+
+__author__ = "Christian Steiner"
+__maintainer__ = __author__
+__copyright__ = 'University of Basel'
+__email__ = "christian.steiner@unibas.ch"
+__status__ = "Development"
+__license__ = "GPL v3"
+__version__ = "0.0.1"
+
+from lxml import etree as ET
+import re
+import sys
+
+from .attachable_object import AttachableObject
+sys.path.append('py2ttl')
+from class_spec import SemanticClass
+from xml_conform_dictionary import XMLConformDictionary
+
+class EditorComment(AttachableObject,SemanticClass):
+ """
+ This class represents a comment by the editors.
+ """
+ XML_TAG = 'editor-comment'
+
+ def __init__(self, comment=None, is_uncertain=False):
+ self.comment = comment
+ self.is_uncertain = is_uncertain
+
+
+ def attach_object_to_tree(self, target_tree):
+ """Attach object to tree.
+ """
+ obj_node = self.get_or_create_node(target_tree)
+ obj_node.set('type', self.__class__.__name__)
+ xml_conform_dictionary = XMLConformDictionary.create_cls_from_data_object(self)
+ xml_conform_dictionary.attach_data_to_tree(obj_node)
+
+ @classmethod
+ def create_cls_from_node(cls, node):
+ """Initialize a cls from node.
+
+ [:return:] cls
+ """
+ target_cls = cls
+ cls_type = node.get('type')
+ target_classes = [ target for target in cls.__subclasses__() if target.__name__ == cls_type ]
+ if len(target_classes) > 0:
+ target_cls = target_classes[0]
+ return XMLConformDictionary.CREATE_INSTANCEOF_CLASS_FROM_NODE(target_cls, node)
+
+ @classmethod
+ def get_semantic_dictionary(cls):
+ """ Creates a semantic dictionary as specified by SemanticClass.
+ """
+ properties = {}
+ properties.update(cls.create_semantic_property_dictionary('is_uncertain', bool,\
+ name='isUncertain', label='whether something is uncertain'))
+ properties.update(cls.create_semantic_property_dictionary('comment', str))
+ dictionary = { cls.CLASS_KEY: cls.get_class_dictionary(), cls.PROPERTIES_KEY: properties }
+ return cls.return_dictionary_after_updating_super_classes(dictionary)
+
Index: svgscripts/datatypes/editor_correction.py
===================================================================
--- svgscripts/datatypes/editor_correction.py (revision 0)
+++ svgscripts/datatypes/editor_correction.py (revision 96)
@@ -0,0 +1,51 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+
+""" This class can be used to represent a word correction by the editors.
+"""
+# Copyright (C) University of Basel 2020 {{{1
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program. If not, see 1}}}
+
+__author__ = "Christian Steiner"
+__maintainer__ = __author__
+__copyright__ = 'University of Basel'
+__email__ = "christian.steiner@unibas.ch"
+__status__ = "Development"
+__license__ = "GPL v3"
+__version__ = "0.0.1"
+
+from lxml import etree as ET
+import re
+import sys
+
+from .editor_comment import EditorComment
+
+class EditorCorrection(EditorComment):
+ """
+ This class represents a word correction by the editors.
+ """
+
+ def __init__(self, correction_text='', is_uncertain=False):
+ super(EditorCorrection, self).__init__(is_uncertain=is_uncertain)
+ self.correction_text = correction_text
+
+ @classmethod
+ def get_semantic_dictionary(cls):
+ """ Creates a semantic dictionary as specified by SemanticClass.
+ """
+ dictionary = super(EditorCorrection,cls).get_semantic_dictionary()
+ dictionary[cls.PROPERTIES_KEY].update(cls.create_semantic_property_dictionary('correction_text', str))
+ return cls.return_dictionary_after_updating_super_classes(dictionary)
+
Index: svgscripts/datatypes/attachable_object.py
===================================================================
--- svgscripts/datatypes/attachable_object.py (revision 95)
+++ svgscripts/datatypes/attachable_object.py (revision 96)
@@ -1,40 +1,66 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
""" This is an abstract class for all objects that can be attached to a lxml.etree.
"""
# Copyright (C) University of Basel 2019 {{{1
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see 1}}}
__author__ = "Christian Steiner"
__maintainer__ = __author__
__copyright__ = 'University of Basel'
__email__ = "christian.steiner@unibas.ch"
__status__ = "Development"
__license__ = "GPL v3"
__version__ = "0.0.1"
import abc
+import lxml.etree as ET
class AttachableObject(metaclass=abc.ABCMeta):
"""
This is an abstract class for all objects that can be attached to a lxml.etree..
"""
+ XML_TAG = 'attachable'
@abc.abstractmethod
def attach_object_to_tree(self, target_tree):
"""Attach object to tree.
"""
pass
+
+ def get_or_create_node(self, target_tree):
+ """Return xml node of attable object.
+ """
+ if target_tree.__class__.__name__ == '_ElementTree':
+ target_tree = target_tree.getroot()
+ obj_node = target_tree.xpath('.//' + self.XML_TAG)[0] \
+ if(len(target_tree.xpath('.//' + self.XML_TAG)) > 0) \
+ else ET.SubElement(target_tree, self.XML_TAG)
+ return obj_node
+
+ def get_or_create_node_with_id(self, target_tree):
+ """Return xml node of attable object.
+ """
+ if 'id' not in self.__dict__:
+ return self.get_or_create_node(target_tree)
+ if target_tree.__class__.__name__ == '_ElementTree':
+ target_tree = target_tree.getroot()
+ if len(target_tree.xpath('.//' + self.XML_TAG + '[@id="%s"]' % self.id)) > 0:
+ obj_node = target_tree.xpath('.//' + self.XML_TAG + '[@id="%s"]' % self.id)[0]
+ obj_node.getparent().remove(obj_node)
+ return ET.SubElement(target_tree, self.XML_TAG, attrib={'id': str(self.id)})
+
+
Index: svgscripts/datatypes/reference.py
===================================================================
--- svgscripts/datatypes/reference.py (revision 95)
+++ svgscripts/datatypes/reference.py (revision 96)
@@ -1,152 +1,160 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
""" This class can be used to represent a text reference.
"""
# Copyright (C) University of Basel 2019 {{{1
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see 1}}}
__author__ = "Christian Steiner"
__maintainer__ = __author__
__copyright__ = 'University of Basel'
__email__ = "christian.steiner@unibas.ch"
__status__ = "Development"
__license__ = "GPL v3"
__version__ = "0.0.1"
from lxml import etree as ET
import re
import sys
from .attachable_object import AttachableObject
sys.path.append('py2ttl')
from class_spec import SemanticClass
class Reference(AttachableObject,SemanticClass):
"""
This class represents a text reference.
Args:
id (int): object id
first_line (int) first line of reference
last_line (int) last line of reference
is_uncertain (bool) whether reference is uncertain
title (str) title of reference
page_number (str) page_number of reference
tag (str) xml tag
"""
XML_TAG = 'reference'
intKeys = [ 'first_line', 'last_line']
boolKeys = [ 'is_uncertain' ]
stringKeys = [ 'title', 'page_number' ]
def __init__(self, node=None, id=0, first_line=-1, last_line=-1, is_uncertain=False, title='', page_number='', tag=XML_TAG):
self.intKeys = []
self.intKeys += Reference.intKeys
self.intKeys.append('id')
self.stringKeys = []
self.stringKeys += Reference.stringKeys
self.boolKeys = []
self.boolKeys += Reference.boolKeys
self.id = id
self.first_line = first_line
self.last_line = last_line
self.is_uncertain = is_uncertain
self.title = title
self.page_number = page_number
self.tag = tag
def attach_object_to_tree(self, target_tree):
"""Attach object to tree.
"""
if target_tree.__class__.__name__ == '_ElementTree':
target_tree = target_tree.getroot()
obj_node = target_tree.xpath('.//' + self.tag + '[@id="%s"]' % self.id)[0] \
if(len(target_tree.xpath('.//' + self.tag + '[@id="%s"]' % self.id)) > 0) \
else ET.SubElement(target_tree, self.tag)
for key in self.boolKeys:
if self.__dict__[key] is not None:
obj_node.set(key.replace('_','-'), str(self.__dict__[key]).lower())
for key in self.intKeys:
if self.__dict__[key] is not None and self.__dict__[key] > -1:
obj_node.set(key.replace('_','-'), str(self.__dict__[key]))
for key in self.stringKeys:
if self.__dict__[key] is not None and self.__dict__[key] != '':
obj_node.set(key.replace('_','-'), str(self.__dict__[key]))
@classmethod
+ def create_cls_from_node(cls, node):
+ """Creates a Reference from a (lxml.etree.Element) node.
+
+ :return: (datatypes.reference) Reference
+ """
+ instance = cls()
+ for key in instance.boolKeys:
+ xml_key = key.replace('_', '-')
+ if bool(node.get(xml_key)):
+ instance.__dict__[key] = node.get(xml_key) == 'true'
+ for key in instance.intKeys:
+ xml_key = key.replace('_', '-')
+ if bool(node.get(xml_key)):
+ instance.__dict__[key] = int(node.get(xml_key))
+ for key in instance.stringKeys:
+ xml_key = key.replace('_', '-')
+ if bool(node.get(xml_key)):
+ instance.__dict__[key] = node.get(xml_key)
+ return instance
+
+ @classmethod
def create_cls(cls, node=None, id=0, is_uncertain=False, reference_string='', title='', page_number=''):
"""Creates a Reference from a (lxml.etree.Element) node or a reference_string.
:return: (datatypes.reference) Reference
"""
if node is not None:
- instance = cls()
- for key in instance.boolKeys:
- xml_key = key.replace('_', '-')
- if bool(node.get(xml_key)):
- instance.__dict__[key] = node.get(xml_key) == 'true'
- for key in instance.intKeys:
- xml_key = key.replace('_', '-')
- if bool(node.get(xml_key)):
- instance.__dict__[key] = int(node.get(xml_key))
- for key in instance.stringKeys:
- xml_key = key.replace('_', '-')
- if bool(node.get(xml_key)):
- instance.__dict__[key] = node.get(xml_key)
- return instance
+ return cls.create_cls_from_node(node)
else:
first_line = -1
last_line = -1
if re.match(r'[0-9]+([a-z]+)*,[0-9]+(-[0-9]+)*', reference_string):
page_number = reference_string.split(',')[0]
line_numbers = reference_string.split(',')[1].split('-')
first_line = int(line_numbers[0])
last_line = int(line_numbers[1]) if len(line_numbers) > 1 else -1
else:
if ',' not in reference_string:
line_numbers = reference_string.split('-')
first_line = int(line_numbers[0])
last_line = int(line_numbers[1]) if len(line_numbers) > 1 else -1
else:
if ' ' not in reference_string:
raise Exception('String "{}" is not a valid reference_string'.format(reference_string))
title = reference_string.split(' ')[0]
return cls.create_cls(id=id, is_uncertain=is_uncertain, reference_string=reference_string[len(title)+1:],\
title=title, page_number=page_number)
return cls(id=id, is_uncertain=is_uncertain, first_line=first_line, last_line=last_line,\
title=title, page_number=page_number)
@classmethod
def get_semantic_dictionary(cls):
""" Creates a semantic dictionary as specified by SemanticClass.
"""
dictionary = {}
class_dict = cls.get_class_dictionary()
properties = {}
properties.update({'first_line': { 'class': int, 'cardinality': 1, 'cardinality_restriction': 'maxCardinality', 'name': 'firstLineOfReference',\
'label': 'first line of reference'}})
properties.update({'last_line': { 'class': int, 'cardinality': 1, 'cardinality_restriction': 'maxCardinality', 'name': 'lastLineOfReference',\
'label': 'last line of reference'}})
properties.update({'is_uncertain': { 'class': bool, 'cardinality': 0, 'name': 'IsUncertain', 'label': 'whether something is uncertain'}})
for stringKey in cls.stringKeys:
properties.update(cls.create_semantic_property_dictionary(stringKey, str, cardinality=1))
dictionary.update({cls.CLASS_KEY: class_dict})
dictionary.update({cls.PROPERTIES_KEY: properties})
return cls.return_dictionary_after_updating_super_classes(dictionary)
Index: svgscripts/datatypes/clarification.py
===================================================================
--- svgscripts/datatypes/clarification.py (revision 0)
+++ svgscripts/datatypes/clarification.py (revision 96)
@@ -0,0 +1,51 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+
+""" This class can be used to represent a word clarification.
+"""
+# Copyright (C) University of Basel 2020 {{{1
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program. If not, see 1}}}
+
+__author__ = "Christian Steiner"
+__maintainer__ = __author__
+__copyright__ = 'University of Basel'
+__email__ = "christian.steiner@unibas.ch"
+__status__ = "Development"
+__license__ = "GPL v3"
+__version__ = "0.0.1"
+
+from lxml import etree as ET
+import re
+import sys
+
+from .editor_comment import EditorComment
+from .text import Text
+
+class Clarification(EditorComment):
+ """
+ This class represents a word clarification.
+ """
+
+ def __init__(self, text=None):
+ super(Clarification, self).__init__(is_uncertain=False)
+ self.text = text
+
+ @classmethod
+ def get_semantic_dictionary(cls):
+ """ Creates a semantic dictionary as specified by SemanticClass.
+ """
+ dictionary = super(Clarification,cls).get_semantic_dictionary()
+ dictionary[cls.PROPERTIES_KEY].update(cls.create_semantic_property_dictionary('text', Text, name='clarificationHasText', cardinality=1))
+ return cls.return_dictionary_after_updating_super_classes(dictionary)
Index: svgscripts/datatypes/atypical_writing.py
===================================================================
--- svgscripts/datatypes/atypical_writing.py (revision 0)
+++ svgscripts/datatypes/atypical_writing.py (revision 96)
@@ -0,0 +1,52 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+
+""" This class can be used to represent a atpycial writing by the author.
+"""
+# Copyright (C) University of Basel 2020 {{{1
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program. If not, see 1}}}
+
+__author__ = "Christian Steiner"
+__maintainer__ = __author__
+__copyright__ = 'University of Basel'
+__email__ = "christian.steiner@unibas.ch"
+__status__ = "Development"
+__license__ = "GPL v3"
+__version__ = "0.0.1"
+
+from lxml import etree as ET
+import re
+import sys
+
+from .editor_comment import EditorComment
+from .text import Text
+
+class AtypicalWriting(EditorComment):
+ """
+ This class represents a a atpycial writing by the author.
+ """
+
+ def __init__(self, text=None):
+ super(AtypicalWriting, self).__init__(is_uncertain=False)
+ self.text = text
+
+ @classmethod
+ def get_semantic_dictionary(cls):
+ """ Creates a semantic dictionary as specified by SemanticClass.
+ """
+ dictionary = super(AtypicalWriting,cls).get_semantic_dictionary()
+ dictionary[cls.PROPERTIES_KEY].update(cls.create_semantic_property_dictionary('text', Text, name='atypicalWritingHasText'))
+ return cls.return_dictionary_after_updating_super_classes(dictionary)
+
Index: svgscripts/datatypes/word.py
===================================================================
--- svgscripts/datatypes/word.py (revision 95)
+++ svgscripts/datatypes/word.py (revision 96)
@@ -1,828 +1,832 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
""" This class can be used to represent a word.
"""
# Copyright (C) University of Basel 2019 {{{1
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see 1}}}
__author__ = "Christian Steiner"
__maintainer__ = __author__
__copyright__ = 'University of Basel'
__email__ = "christian.steiner@unibas.ch"
__status__ = "Development"
__license__ = "GPL v3"
__version__ = "0.0.1"
import copy
import inspect
from lxml import etree as ET
from operator import attrgetter
import re
import string
import sys
import warnings
from .box import Box
+from .editor_comment import EditorComment
from .matrix import Matrix
from .path import Path
from .simple_word import SimpleWord
from .style import Style
from .word_deletion_path import WordDeletionPath
from .word_position import WordPosition
from .transkription_position import TranskriptionPosition
from .writing_process import WritingProcess
SINGLE_PUNCTUATION_PATTERN = r"^[{}–]$".format(string.punctuation)
def execute_function_on_parts(word_parts, func_name):
"""Execute function on parts and add those parts instead of original word to word_parts.
:return: new word_parts, output from func
"""
copy_parts = word_parts[:]
for word in word_parts:
output = eval('word.{0}()'.format(func_name))
if len(word.word_parts) > 0:
for part_word in word.word_parts:
copy_parts.insert(copy_parts.index(word), part_word)
copy_parts.remove(word)
word.word_parts = []
return copy_parts, output
def update_transkription_position_ids(word):
"""Update transkription_position' ids according to index.
"""
word_part_ids = [ wp.id for wp in word.word_parts ]
if len(word_part_ids) != len(set(word_part_ids)):
for id, wp in enumerate(word.word_parts):
wp.id = id
for index, transkription_position in enumerate(sorted(word.transkription_positions, key=attrgetter('left'))):
transkription_position.id = index
transkription_position.has_box = None
transkription_position.deleted = False
class Word(SimpleWord):
"""
This class represents a word.
"""
COPY_PROPERTY_KEY = [ 'line_number', 'deleted', 'writing_process_id' ]
APPEND_PROPERTY2LIST_SOURCE_TARGET_KEYS = { 'style': 'styles' }
DATA = 'debug-data'
RDFS_SUBCLASSOF_LIST = ['http://www.e-editiones.ch/ontology/text#HandwrittenText']
XML_TAG = 'word'
XML_EARLIER_VERSION = 'earlier-version'
XML_OVERWRITES = 'overwrites'
XML_CORRECTION_DICT = { 'isClarificationOfWord': 'clarifiesWord',\
'isDeletionOfWord': 'deletesEarlierPart',\
'isExtensionOfWord': 'extendsEarlierVersion',\
'isTransformationOfWord': 'transformsEarlierPart' }
def __init__(self, id=0, text='', line_number=-1, deleted=False, transkription_positions=None, faksimile_positions=None, word_part_objs=None, word_parts=None, writing_process_id=-1, earlier_version=None, box_paths=None, styles=None):
super(Word,self).__init__(id=id, text=text, line_number=line_number, transkription_positions=transkription_positions,\
faksimile_positions=faksimile_positions)
self.corrections = []
self.deleted = deleted
self.deletion_paths = []
self.debug_container = {}
self.debug_msg = None
self.earlier_version = earlier_version
self.edited_text = None
+ self.editor_comment = None
self.isClarificationOfWord = None
self.isDeletionOfWord = None
self.isExtensionOfWord = None
self.isTransformationOfWord = None
if len(self.text) == 0 and len(''.join([ tp.get_text() for tp in self.transkription_positions if type(tp) == TranskriptionPosition ])) > 0:
self.text = ''.join([ tp.get_text() for tp in self.transkription_positions ])
self.overwrites_word = None
self.styles = styles\
if styles is not None\
else []
self.verified = None
self.writing_process_id = writing_process_id
self.writing_processes = []
self.word_insertion_mark = None
self.word_box = None
self.word_parts = word_parts if word_parts is not None else []
self.word_part_objs = word_part_objs if word_part_objs is not None else []
def add_deletion_paths(self, deletion_paths, tr_xmin=0.0, tr_ymin=0.0):
"""Add a word deletion path to word.
"""
if len(self.word_parts) > 0:
for part in self.word_parts: part.add_deletion_paths(deletion_paths, tr_xmin=tr_xmin, tr_ymin=tr_ymin)
elif self.deleted and len(self.transkription_positions) > 0:
word_path = Path.create_path_from_transkription_position(self.transkription_positions[0],\
tr_xmin=tr_xmin, tr_ymin=tr_ymin)
self.deletion_paths = [ deletion_path for deletion_path in deletion_paths\
if do_paths_intersect_saveMode(deletion_path, word_path) ]
-
-
def attach_word_to_tree(self, target_tree):
"""Attaches word to tree target_tree.
"""
word_node = super(Word,self).attach_word_to_tree(target_tree)
if self.deleted is not None:
word_node.set('deleted', str(self.deleted).lower())
if self.verified is not None:
word_node.set('verified', str(self.verified).lower())
if self.edited_text is not None:
word_node.set('edited-text', self.edited_text)
+ if self.editor_comment is not None:
+ self.editor_comment.attach_object_to_tree(word_node)
if self.writing_process_id > -1:
word_node.set('writing-process-id', str(self.writing_process_id))
for index, word_part in enumerate(self.word_parts):
word_part.id = index
word_part.attach_word_to_tree(word_node)
if self.earlier_version is not None:
earlier_node = ET.SubElement(word_node, self.XML_EARLIER_VERSION)
self.earlier_version.attach_word_to_tree(earlier_node)
if self.overwrites_word is not None\
and len(self.overwrites_word.transkription_positions) > 0:
overwrite_node = ET.SubElement(word_node, self.XML_OVERWRITES)
self.overwrites_word.attach_word_to_tree(overwrite_node)
if self.word_box is not None:
self.word_box.attach_object_to_tree(word_node)
if len(self.corrections) > 0:
word_node.set('corrections', ' '.join(set([ str(word.id) for word in self.corrections ])))
for key in self.XML_CORRECTION_DICT.keys():
if self.__dict__[key] is not None:
word_node.set(self.XML_CORRECTION_DICT[key], 'true')
return word_node
def belongs_to_multiple_writing_processes(self, include_parts=False):
"""Returns true if transkription_positions belong to different WritingProcesses.
"""
if len(self.word_parts) > 0 and include_parts:
return len(set(word.writing_process_id for word in self.word_parts)) > 1
return len(set(tp.writing_process_id for tp in self.transkription_positions )) > 1
def set_parent_word_writing_process_id(self):
"""Set writing_process_id for parent word.
"""
ids = set(word.transkription_positions[0].style for word in self.word_parts\
if len(word.transkription_positions) > 0 and word.transkription_positions[0].style is not None)
if len(ids) > 1:
self.writing_process_id = max([style.writing_process_id for style in ids])
if len(set(word.transkription_positions[0].style.create_a_copy_wo_writing_process_id()\
for word in self.word_parts\
if len(word.transkription_positions) > 0 and word.transkription_positions[0].style is not None))\
> 1:
self.writing_process_id += 1
@classmethod
def create_cls(cls, word_node):
"""Creates a word from a (lxml.Element) node.
[:return:] Word
"""
cls = super(Word,cls).create_cls(word_node)
cls.writing_process_id = int(word_node.get('writing-process-id')) if bool(word_node.get('writing-process-id')) else -1
cls.split_strings = None
cls.join_string = word_node.get('join')
if bool(word_node.get('split')):
cls.split_strings = word_node.get('split').split(' ')
if ''.join(cls.split_strings) != cls.text:
error_msg = 'Error in file {0}: word with id="{1}" has split attributes that do not correspond to its text attribute!\n'.\
format(word_node.getroottree().docinfo.URL, str(cls.id))\
+ 'Split attributes: "{0}".\n'.format(' '.join(cls.split_strings))\
+ 'Text attribute: "{0}".\n'.format(cls.text)
raise Exception(error_msg)
cls.verified = word_node.get('verified') == 'true'\
if bool(word_node.get('verified')) else None
cls.deleted = word_node.get('deleted') == 'true'\
if bool(word_node.get('deleted')) else None
cls.edited_text = word_node.get('edited-text')
+ cls.editor_comment = [ EditorComment.create_cls_from_node(node) for node in word_node.xpath('./' + EditorComment.XML_TAG) ][0]\
+ if len([ node for node in word_node.xpath('./' + EditorComment.XML_TAG) ]) > 0 else None
cls.word_parts = [ cls.create_cls(node) for node in word_node.xpath('./' + cls.XML_TAG) ]
if bool(word_node.get('corrections')):
for index in [ int(i) for i in word_node.get('corrections').split(' ') ]:
if index < len(cls.word_parts):
cls.corrections.append(cls.word_parts[index])
cls.earlier_version = None
if len(word_node.xpath('./' + cls.XML_EARLIER_VERSION + '/' + cls.XML_TAG)) > 0:
cls.earlier_version = [ cls.create_cls(node) for node in word_node.xpath('./' + cls.XML_EARLIER_VERSION + '/' + cls.XML_TAG) ][0]
for key_value in cls.XML_CORRECTION_DICT.values():
if word_node.get(key_value) == 'true':
cls.__dict__[key_value] = True
if cls.earlier_version is not None:
for word_part in cls.word_parts:
for key in [ key for key, value in cls.XML_CORRECTION_DICT.items() if value.endswith('Part') ]:
if cls.XML_CORRECTION_DICT[key] in word_part.__dict__.keys() and word_part.__dict__[cls.XML_CORRECTION_DICT[key]]\
and len(cls.word_parts) <= len(cls.earlier_version.word_parts):
try:
word_part.__dict__[key] = cls.earlier_version.word_parts[word_part.id]
except Exception:
msg = f'{cls.id} {cls.text}: {word_part.id}'
raise Exception(msg)
for key in [ key for key, value in cls.XML_CORRECTION_DICT.items() if value.endswith('EarlierVersion') ]:
if cls.XML_CORRECTION_DICT[key] in word_part.__dict__.keys() and word_part.__dict__[cls.XML_CORRECTION_DICT[key]]:
word_part.__dict__[key] = cls.earlier_version
for key in [ key for key, value in cls.XML_CORRECTION_DICT.items() if value.endswith('Word') ]:
if cls.XML_CORRECTION_DICT[key] in word_part.__dict__.keys() and word_part.__dict__[cls.XML_CORRECTION_DICT[key]]:
word_part.__dict__[key] = cls
cls.overwrites_word = [ cls.create_cls(node) for node in word_node.xpath('./' + cls.XML_OVERWRITES + '/' + cls.XML_TAG)][0]\
if len(word_node.xpath('./' + cls.XML_OVERWRITES + '/' + cls.XML_TAG)) > 0\
else None
cls.word_box = [ Box(node=node) for node in word_node.xpath('./' + Box.XML_TAG) ][0]\
if len(word_node.xpath('./' + Box.XML_TAG)) > 0\
else None
return cls
def create_earlier_version(self, root_word=None, id=0):
"""Create an earlier version of word.
"""
if root_word is None:
root_word = self
root_word.set_parent_word_writing_process_id()
word_parts = []
non_single_punctuation_word_parts = [ word_part for word_part in self.word_parts\
if not re.match(SINGLE_PUNCTUATION_PATTERN, word_part.text) ]
non_single_punctuation_word_parts_length = len(non_single_punctuation_word_parts)
if non_single_punctuation_word_parts_length > 0\
and len([ word_part for word_part in non_single_punctuation_word_parts\
if word_part.deleted ])\
== non_single_punctuation_word_parts_length:
self.deleted = True
for word_part in non_single_punctuation_word_parts: word_part.deleted = False
for id, word_part in enumerate(self.word_parts):
earlierWordPart = word_part.create_earlier_version(root_word=root_word, id=id)
if word_part.deleted:
word_part.isDeletionOfWord = earlierWordPart
word_parts.append(earlierWordPart)
if word_part not in self.corrections:
self.corrections.append(word_part)
elif word_part.overwrites_word is not None\
and (len(word_part.transkription_positions) > 0\
and word_part.overwrites_word.transkription_positions[0].style is not None\
and word_part.transkription_positions[0].style is not None\
and word_part.transkription_positions[0].style\
!= word_part.overwrites_word.transkription_positions[0].style):
word_part.overwrites_word.id = word_part.id
word_parts.append(word_part.overwrites_word)
word_part.isTransformationOfWord = word_part.overwrites_word
#print(f'transform: {self.text}')
if word_part not in self.corrections:
self.corrections.append(word_part)
elif root_word.writing_process_id > -1\
and (len(word_part.transkription_positions) > 0\
and word_part.transkription_positions[0].style is not None\
and word_part.transkription_positions[0].style.writing_process_id\
== root_word.writing_process_id):
word_part.extendsEarlierVersion = True
#print('extends')
if word_part not in self.corrections:
self.corrections.append(word_part)
else:
if word_part.deleted:
word_part.isDeletionOfWord = earlierWordPart
word_parts.append(earlierWordPart)
if word_part not in self.corrections:
self.corrections.append(word_part)
else:
#print(f'default: {self.text}')
word_parts.append(earlierWordPart)
text = ''.join([ word.text for word in word_parts ])\
if len(word_parts) > 0\
else self.text
if len(word_parts) == 1:
self.transkription_positions += word_parts[0].transkription_positions
self.faksimile_positions += word_parts[0].faksimile_positions
word_parts = []
new_transkription_positions = copy.deepcopy(self.transkription_positions)
if len(self.transkription_positions) > 0\
and self.transkription_positions[0].style is not None:
writing_process_id = self.transkription_positions[0].style.writing_process_id
for new_tp in new_transkription_positions:
new_tp.style.writing_process_id = writing_process_id
return Word(id=id, text=text, transkription_positions=new_transkription_positions,\
faksimile_positions=self.faksimile_positions, line_number=self.line_number,\
word_parts=word_parts)
def create_correction_history(self, page=None, box_style=None):
"""Create correction history.
"""
if self.word_box is not None:
manuscript = self.transkription_positions[0].style.manuscript\
if len(self.transkription_positions) > 0\
and self.transkription_positions[0].style is not None\
else None
style = Style()
if box_style is not None:
style = box_style
if page is not None:
style = Style.create_cls(page, self.word_box.text_style_class, manuscript=manuscript)
for font_key in [ font_key for font_key in self.word_box.text_style_class.split(' ') if font_key in page.fontsizekey2stage_mapping.keys() ]:
style.writing_process_id = page.fontsizekey2stage_mapping.get(font_key)
transkription_positions = TranskriptionPosition.copy_list_of_cls(self.transkription_positions)
for transkription_position in transkription_positions:
transkription_position.style = style
self.overwrites_word = Word(text=self.word_box.earlier_text, transkription_positions=transkription_positions,\
line_number=self.line_number)
for word_part in self.word_parts:
word_part.create_correction_history(page=page, box_style=box_style)
if len(self.word_parts) > 0:
earlier_version = self.create_earlier_version()
extending_words = self._get_parts_with_property_key('extendsEarlierVersion')
if len(extending_words) > 0:
for word in extending_words:
word.isExtensionOfWord = earlier_version
if self.has_mixed_status('deleted', include_parts=True):
self.edited_text = ''.join([ word.text for word in self.word_parts if not word.deleted ])
if len(self.corrections) > 0:
self.earlier_version = earlier_version
@staticmethod
def CREATE_WORD(word_node=None, page=None, word_part_objs=[], id=0, height=0, endX=0, endSign=None, matrix=None, line_number=-1, debug_msg=None):
"""Creates a word from a (lxml.Element) node or word_part_objs.
[:return:] Word
"""
if word_node is not None: # init word from xml node
id = int(word_node.get('id'))
line_number = int(word_node.get('line-number')) if bool(word_node.get('line-number')) else line_number
text = word_node.get('text')
deleted = bool(word_node.get('deleted')) and word_node.get('deleted') == 'true'
transkription_positions = [ TranskriptionPosition(node=node) for node in word_node.findall('.//' + WordPosition.TRANSKRIPTION) ]
faksimile_positions = [ WordPosition(node=node) for node in word_node.findall('.//' + WordPosition.FAKSIMILE) ]
word_part_objs = [ item.attrib for item in word_node.findall('.//' + Word.DATA + '/part')]\
if len(word_node.findall('.//' + Word.DATA)) > 0\
else [ item.attrib for item in word_node.findall('.//part')]
return Word(id=id, text=text, deleted=deleted, line_number=line_number, transkription_positions=transkription_positions,\
faksimile_positions=faksimile_positions, word_part_objs=word_part_objs)
elif len(word_part_objs) > 0: # init word from word_part_obj that has been extracted from svg file
WIDTH = 5
TOPCORRECTION = 2.0
FONTWIDTHFACTOR = 0.7 # factor that multiplies lastCharFontSize
height = height
x = round(float(word_part_objs[0]['x']), 3)
if(page is not None and bool(page.style_dict)):
HEIGHT_FACTOR = 1.1 # factor that multiplies biggest_font_size -> height
style_set = set(' '.join(set( dict['class'] for dict in word_part_objs)).split(' '))
biggest_font_size = page.get_biggest_fontSize4styles(style_set=style_set)
height = round(biggest_font_size * HEIGHT_FACTOR + HEIGHT_FACTOR / biggest_font_size, 3)
TOPCORRECTION = 1 + HEIGHT_FACTOR / biggest_font_size
if endSign is not None and '%' in endSign:
lastCharFontSizeList = [ float(page.style_dict[key]['font-size'].replace('px',''))\
for key in word_part_objs[len(word_part_objs)-1]['class'].split(' ')\
if bool(page.style_dict[key].get('font-size'))]
lastCharFontSize = lastCharFontSizeList[0] if len(lastCharFontSizeList) > 0 else 1
endX = float(endX) + lastCharFontSize * FONTWIDTHFACTOR
elif endSign is not None and '%' in endSign:
endX = float(endX) + WIDTH
bottom = round(float(word_part_objs[0]['y']), 3)
y = round(bottom - height + TOPCORRECTION, 3)
width = round(float(endX) - x, 3)
transkription_positions = [ WordPosition(height=height, width=width, x=x, y=y, matrix=matrix, tag=WordPosition.TRANSKRIPTION) ]
text = ''.join([ dict['text'] for dict in word_part_objs])
line_number = page.get_line_number( (y + bottom)/2) if page is not None else line_number
word = Word(id=id, text=text, line_number=line_number, transkription_positions=transkription_positions, word_part_objs=word_part_objs)
word.debug_msg = debug_msg
return word
else:
error_msg = 'word_node has not been defined' if (word_node is None) else 'word_part_objs is empty'
raise Exception('Error: {}'.format(error_msg))
@classmethod
def get_semantic_dictionary(cls):
""" Creates and returns a semantic dictionary as specified by SemanticClass.
"""
dictionary = super(Word,cls).get_semantic_dictionary()
dictionary[cls.PROPERTIES_KEY].update(cls.create_semantic_property_dictionary('styles', Style,\
cardinality=1, cardinality_restriction='minCardinality',\
name='wordHasStyle', label='word has style', comment='Word has an appearance that is characterized by this style.'))
dictionary[cls.PROPERTIES_KEY].update(cls.create_semantic_property_dictionary('corrections', Word,\
name='wordHasCorrection', label='word has corrections', comment='Word has a correction made by the author.'))
dictionary[cls.PROPERTIES_KEY].update(cls.create_semantic_property_dictionary('deletion_paths', WordDeletionPath,\
name='wordIsDeletedByPath', label='word has been deleted with a deletion path',\
comment='Word has been deleted by the author using a deletion path.'))
- #dictionary[cls.PROPERTIES_KEY].update(cls.create_semantic_property_dictionary('deleted', bool,\
- # name='isWordDeleted', label='has word been deleted', comment='Word has been deleted by the author.'))
+ dictionary[cls.PROPERTIES_KEY].update(cls.create_semantic_property_dictionary('editor_comment', EditorComment,\
+ name='wordHasEditorComment', label='word has a comment by the editors', comment='Word has been commented by the editors.'))
dictionary[cls.PROPERTIES_KEY].update(cls.create_semantic_property_dictionary('earlier_version', Word,\
name='wordHasEarlierVersion', label='word has an earlier version', comment='There is a earlier version of this word.'))
dictionary[cls.PROPERTIES_KEY].update(cls.create_semantic_property_dictionary('edited_text', str,\
name='hasEditedText', label='word has an edited text', comment='Word has a text that is edited automatically by removing deleted parts or hyphens.'))
dictionary[cls.PROPERTIES_KEY].update(cls.create_semantic_property_dictionary('isClarificationOfWord', Word,\
name='isClarificationOfWord', label='word is a clarification of word',\
comment='The author has used this part of the word in order to clarify the appearance of that word.'))
dictionary[cls.PROPERTIES_KEY].update(cls.create_semantic_property_dictionary('isDeletionOfWord', Word,\
name='isDeletionOfWord', label='word is a deletion of word',\
comment='The author has used this part of a word in order to delete the corresponding part of an earlier version of this word.'))
dictionary[cls.PROPERTIES_KEY].update(cls.create_semantic_property_dictionary('isExtensionOfWord', Word,\
name='isExtensionOfWord', label='word is a extension of word',\
comment='The author has used this part of a word in order to extend an earlier version of this word.'))
dictionary[cls.PROPERTIES_KEY].update(cls.create_semantic_property_dictionary('isTransformationOfWord', Word,\
name='isTransformationOfWord', label='word is a transformation of word',\
comment='The author has used this part of a word in order to transform the corresponding part of an earlier version of this word.'))
dictionary[cls.PROPERTIES_KEY].update(cls.create_semantic_property_dictionary('overwrites_word', Word,\
name='overwritesWord', label='word overwrites word',\
comment='The author has used this word in order to overwrite that word.'))
# This makes wordHasWordParts a subproperty of cls.HAS_HOMOTYPIC_PARTS_URL_STRING,
# cls.return_dictionary_after_updating_super_classes will subclass Word under the corresponding super class.
dictionary[cls.PROPERTIES_KEY].update(cls.create_semantic_property_dictionary('word_parts', list,\
name='wordHasWordParts', label='word has word parts', comment='Word consists of a list of words.',\
subPropertyOf=cls.HAS_HOMOTYPIC_PARTS_URL_STRING))
super_property_dictionary = cls.create_semantic_property_dictionary(cls.SUPER_PROPERTY, Word,\
name='isCorrectionOfWord', label='word is a correction of word',\
comment='The author has used this word in order to correct that word.')
for key in cls.XML_CORRECTION_DICT.keys():
correction_dict = dictionary[cls.PROPERTIES_KEY].get(key)
correction_dict.update(super_property_dictionary)
dictionary[cls.PROPERTIES_KEY].update({key: correction_dict})
return cls.return_dictionary_after_updating_super_classes(dictionary)
def has_mixed_status(self, property_key, include_parts=False, concerns_word=True):
"""Returns true if transkription_positions have mixed status concerning the property_key in their __dict__.
"""
if False in set(property_key in tp.__dict__.keys() for tp in self.transkription_positions):
return False
if len(self.word_parts) > 0 and include_parts:
if concerns_word:
if False in set(property_key in word.__dict__.keys() for word in self.word_parts):
return False
return len(set(word.__dict__[property_key] for word in self.word_parts)) > 1
else:
return len(set(word.transkription_positions[0].__dict__[property_key] for word in self.word_parts\
if len(word.transkription_positions) > 0 and property_key in word.transkription_positions[0].__dict__.keys())) > 1
return len(set(tp.__dict__[property_key] for tp in self.transkription_positions )) > 1
def init_word(self, page):
"""Initialize word with objects from page.
"""
super(Word,self).init_word(page)
if self.writing_process_id > -1:
self.writing_processes += [ wp for wp in page.writing_processes if wp.id == self.writing_process_id ]
writing_processes = self.writing_processes
for word_part in self.word_parts:
word_part.init_word(page)
self.lines += word_part.lines
self.writing_processes += word_part.writing_processes
self.lines = [ line for line in set(self.lines) ]
self.writing_processes = [ wp for wp in set(self.writing_processes)]
if self.overwrites_word is not None:
self.overwrites_word.init_word(page)
if self.earlier_version is not None:
if self.earlier_version.writing_process_id == -1:
self.earlier_version.writing_process_id = self.writing_process_id-1
if self.earlier_version.line_number == -1:
self.earlier_version.line_number = self.line_number
self.earlier_version.init_word(page)
def join(self, other_word, append_at_end_of_new_word=True):
"""Joins other_word with this word by changing the text of current word and adding other_word.transkription_positions.
"""
if append_at_end_of_new_word:
self.text = self.text + other_word.text
for position in other_word.transkription_positions:
position.id = str(len(self.transkription_positions))
self.transkription_positions.append(position)
else:
self.text = other_word.text + self.text
index = 0
for position in other_word.transkription_positions:
self.transkription_positions.insert(index, position)
index += 1
while index < len(self.transkription_positions):
self.transkription_positions[index].id = str(index)
index += 1
self.simplify_transkription_positions()
def partition_according_to_deletion(self):
"""Partition a word according to its transkription_positions' deletion status
->split word and add partial words as its parts.
"""
if self.has_mixed_status('deleted'):
transkription_positions = []
last_status = None
for transkription_position in self.transkription_positions:
if transkription_position.deleted != last_status\
and len(transkription_positions) > 0:
newWord = Word(id=len(self.word_parts), line_number=self.line_number,\
transkription_positions=transkription_positions, deleted=last_status, writing_process_id=self.writing_process_id)
self.word_parts.append(newWord)
transkription_positions = []
transkription_positions.append(transkription_position)
last_status = transkription_position.deleted
if len(transkription_positions) > 0:
newWord = Word(id=len(self.word_parts), line_number=self.line_number,\
transkription_positions=transkription_positions, deleted=last_status, writing_process_id=self.writing_process_id)
self.word_parts.append(newWord)
self.transkription_positions = []
self.line_number = -1
self.deleted = False
elif len(self.word_parts) > 0:
self.word_parts, none = execute_function_on_parts(self.word_parts, 'partition_according_to_deletion')
elif not self.deleted\
and len(self.transkription_positions) > 0\
and self.transkription_positions[0].deleted:
self.deleted = True
def partition_according_to_writing_process_id(self):
"""Partition a word according to its transkription_positions' writing_process_ids
->split word and add partial words as its parts.
"""
if self.belongs_to_multiple_writing_processes():
last_writing_process_id = -1
transkription_positions = []
for transkription_position in self.transkription_positions:
if transkription_position.writing_process_id != last_writing_process_id\
and len(transkription_positions) > 0:
newWord = Word(id=len(self.word_parts), line_number=self.line_number,\
transkription_positions=transkription_positions, writing_process_id=last_writing_process_id)
self.word_parts.append(newWord)
transkription_positions = []
transkription_positions.append(transkription_position)
last_writing_process_id = transkription_position.writing_process_id
if len(transkription_positions) > 0:
newWord = Word(id=len(self.word_parts), line_number=self.line_number,\
transkription_positions=transkription_positions, writing_process_id=last_writing_process_id)
self.word_parts.append(newWord)
self.transkription_positions = []
elif len(self.word_parts) > 0:
self.word_parts, none = execute_function_on_parts(self.word_parts, 'partition_according_to_writing_process_id')
if self.belongs_to_multiple_writing_processes(include_parts=True):
self.writing_process_id = sorted(set([ word.writing_process_id for word in self.word_parts ]), reverse=True)[0]
elif len(self.transkription_positions) > 0:
self.writing_process_id = self.transkription_positions[0].writing_process_id
def process_boxes(self, box_paths, tr_xmin=0.0, tr_ymin=0.0, previous_word_has_box=False):
"""Determines whether word is over a word box.
"""
word_over_box = None
if len(self.word_parts) > 0:
for word in self.word_parts:
current_word = word.process_boxes(box_paths, tr_xmin=tr_xmin, tr_ymin=tr_ymin, previous_word_has_box=(word_over_box is not None))
if current_word is not None and current_word.word_box is not None:
word_over_box = current_word
else:
new_tp_dict = {}
for index, transkription_position in enumerate(self.transkription_positions):
if previous_word_has_box and index == 0:
if len(transkription_position.positional_word_parts) > 0:
transkription_position.positional_word_parts[0].left += transkription_position.positional_word_parts[0].width/2
#print(f'{self.text}: {transkription_position.positional_word_parts[0].left}')
else:
transkription_position.left += 1
word_path = Path.create_path_from_transkription_position(transkription_position,\
tr_xmin=tr_xmin, tr_ymin=tr_ymin)
containing_boxes = [ box_path for box_path in box_paths\
if word_path.is_partially_contained_by(box_path)\
or box_path.do_paths_intersect(word_path) ]
if len(containing_boxes) > 0:
if previous_word_has_box:
print(f'{self.text}: {word_path.path.bbox()} {containing_boxes[0].path.bbox()}')
self._set_box_to_transkription_position(containing_boxes[0], word_path,\
transkription_position, new_tp_dict, tr_xmin)
box_paths.remove(containing_boxes[0])
for replace_tp in new_tp_dict.keys():
for tp in new_tp_dict.get(replace_tp):
self.transkription_positions.insert(self.transkription_positions.index(replace_tp), tp)
self.transkription_positions.remove(replace_tp)
word_over_box = self._get_partial_word_over_box()
update_transkription_position_ids(self)
return word_over_box
def set_word_insertion_mark(self, word_insertion_mark):
"""Sets word_insertion_mark
"""
self.word_insertion_mark = word_insertion_mark
def set_writing_process_id_to_transkription_positions(self, page):
"""Determines the writing process id of the transkription_positions.
"""
for transkription_position in self.transkription_positions:
if len(transkription_position.positional_word_parts) > 0:
for font_key in transkription_position.positional_word_parts[0].style_class.split(' '):
if font_key in page.fontsizekey2stage_mapping.keys():
transkription_position.writing_process_id = page.fontsizekey2stage_mapping.get(font_key)
def simplify_transkription_positions(self):
"""Merge transkription_positions if possible.
"""
index = len(self.transkription_positions)-1
while index > 0\
and False not in [ 'positional_word_parts' in tp.__dict__.keys() for tp in self.transkription_positions ]:
current_tp = self.transkription_positions[index]
index -= 1
previous_tp = self.transkription_positions[index]
if previous_tp.is_mergebale_with(current_tp):
positional_word_parts = previous_tp.positional_word_parts
positional_word_parts += current_tp.positional_word_parts
transkription_positions = TranskriptionPosition.CREATE_TRANSKRIPTION_POSITION_LIST_FROM_PWPS(\
positional_word_parts, debug_msg_string='simplifying transkription positions', transkription_position_id=previous_tp.id)
if len(transkription_positions) == 1:
transkription_positions[0].writing_process_id = previous_tp.writing_process_id\
if previous_tp.writing_process_id != -1\
else current_tp.writing_process_id
self.transkription_positions.pop(index+1)
self.transkription_positions[index] = transkription_positions[0]
#print(self.text, len(self.transkription_positions))
def split(self, split_string, start_id=0):
"""Splits the word and returns an 3-tuple of new words.
"""
previousString, currentString, nextString = self.text.partition(split_string)
currentWord = None
previousWord = None
nextWord = None
previousIndex = 0
current_id = start_id
all_positional_word_parts = []
for position in self.transkription_positions:
all_positional_word_parts += position.positional_word_parts
if len(all_positional_word_parts) == 0:
warnings.warn('ATTENTION: Word: {} {} with Strings "{}, {}, {}": there are no parts!'.format(self.id, self.text, previousString, currentString, nextString))
if len(previousString) > 0:
previous_pwps = []
while previousIndex < len(all_positional_word_parts) and previousString != ''.join([ pwp.text for pwp in previous_pwps ]):
previous_pwps.append(all_positional_word_parts[previousIndex])
previousIndex += 1
if previousString != ''.join([ pwp.text for pwp in previous_pwps ]):
warnings.warn('ATTENTION: "{}" does not match a word_part_obj!'.format(previousString))
else:
previous_transkription_positions = TranskriptionPosition.CREATE_TRANSKRIPTION_POSITION_LIST_FROM_PWPS(previous_pwps, debug_msg_string='word.split')
previous_text = ''.join([ pwp.text for pwp in previous_pwps ])
previousWord = Word(text=previous_text, id=current_id, line_number=self.line_number, transkription_positions=previous_transkription_positions)
current_id += 1
all_positional_word_parts = all_positional_word_parts[previousIndex:]
if len(nextString) > 0:
tmp_pwps = []
index = 0
while index < len(all_positional_word_parts) and currentString != ''.join([ pwp.text for pwp in tmp_pwps ]):
tmp_pwps.append(all_positional_word_parts[index])
index += 1
if currentString != ''.join([ pwp.text for pwp in tmp_pwps ]):
warnings.warn('ATTENTION: "{}" does not match a word_part_obj!'.format(currentString))
else:
next_pwps = all_positional_word_parts[index:]
next_transkription_positions = TranskriptionPosition.CREATE_TRANSKRIPTION_POSITION_LIST_FROM_PWPS(next_pwps, debug_msg_string='word.split')
next_text = ''.join([ pwp.text for pwp in next_pwps ])
nextWord = Word(text=next_text, id=current_id+1, line_number=self.line_number, transkription_positions=next_transkription_positions)
all_positional_word_parts = all_positional_word_parts[:index]
current_transkription_positions = TranskriptionPosition.CREATE_TRANSKRIPTION_POSITION_LIST_FROM_PWPS(all_positional_word_parts, debug_msg_string='word.split')
current_text = ''.join([ pwp.text for pwp in all_positional_word_parts ])
currentWord = Word(text=current_text, id=current_id, line_number=self.line_number, transkription_positions=current_transkription_positions)
return previousWord, currentWord, nextWord
def split_according_to_status(self, status, splits_are_parts=False):
"""Split a word according to its transkription_positions' text.
:return: a list of new word.Word
"""
new_words = []
if self.has_mixed_status(status):
last_status = None
transkription_positions = []
for transkription_position in self.transkription_positions:
if transkription_position.__dict__[status] != last_status\
and len(transkription_positions) > 0:
new_words.append(\
self._create_new_word(transkription_positions, status, new_id=self.id+len(new_words)))
transkription_positions = []
transkription_positions.append(transkription_position)
last_status = transkription_position.__dict__[status]
if len(transkription_positions) > 0:
new_words.append(\
self._create_new_word(transkription_positions, status, new_id=self.id+len(new_words)))
if splits_are_parts:
self.word_parts += new_words
if len(self.word_parts) > 0:
self.transkription_positions = []
return new_words
def undo_partitioning(self):
"""Undo partitioning.
"""
if len(self.word_parts) > 0:
for word_part in self.word_parts:
word_part.undo_partitioning()
if self.text != ''.join([ tp.get_text() for tp in self.transkription_positions ]):
self.transkription_positions += word_part.transkription_positions
self.earlier_version = None
self.edited_text = None
self.word_box = None
self.word_parts = []
self.corrections = []
self.earlier_versions = []
self.box_paths = []
def _create_new_word(self, transkription_positions, status, new_id=0):
"""Create a new word from self and transkription_positions.
"""
newWord = Word(id=new_id, transkription_positions=transkription_positions)
for key in self.COPY_PROPERTY_KEY:
if key != status and key in self.__dict__.keys():
newWord.__dict__[key] = self.__dict__[key]
if status in self.APPEND_PROPERTY2LIST_SOURCE_TARGET_KEYS.keys():
newWord.__dict__[self.APPEND_PROPERTY2LIST_SOURCE_TARGET_KEYS[status]].append(transkription_positions[0].__dict__[status])
else:
newWord.__dict__[status] = transkription_positions[0].__dict__[status]
return newWord
def _get_parts_with_property_key(self, property_key):
"""Return a list of word_parts with property == property_key.
"""
word_parts = []
for word_part in self.word_parts:
if property_key in word_part.__dict__.keys():
word_parts.append(word_part)
else:
word_parts += word_part._get_parts_with_property_key(property_key)
return word_parts
def _get_partial_word_over_box(self):
"""Partition a word according to its transkription_positions' has_box
->split word and add partial words as its parts.
:return: word over box or self
"""
word_over_box = None
if self.has_mixed_status('has_box'):
transkription_positions = []
last_word_box = None
for transkription_position in self.transkription_positions:
if transkription_position.has_box != last_word_box\
and len(transkription_positions) > 0:
newWord = Word(id=len(self.word_parts), line_number=self.line_number,\
transkription_positions=transkription_positions, deleted=self.deleted, writing_process_id=self.writing_process_id)
self.word_parts.append(newWord)
if last_word_box is not None:
word_over_box = newWord
word_over_box.word_box = last_word_box
transkription_positions = []
transkription_positions.append(transkription_position)
last_word_box = transkription_position.has_box
if len(transkription_positions) > 0:
newWord = Word(id=len(self.word_parts), line_number=self.line_number,\
transkription_positions=transkription_positions, deleted=self.deleted, writing_process_id=self.writing_process_id)
self.word_parts.append(newWord)
if last_word_box is not None:
word_over_box = newWord
word_over_box.word_box = last_word_box
self.transkription_positions = []
elif len(self.word_parts) > 0:
#self.word_parts, word_over_box = execute_function_on_parts(self.word_parts, inspect.currentframe().f_code.co_name) #'get_partial_word_over_box')
for word_part in self.word_parts:
if word_over_box is None:
word_over_box = word_part._get_partial_word_over_box()
else:
break
elif len([ tp for tp in self.transkription_positions if tp.has_box is not None]) == 1:
word_over_box = self
word_over_box.word_box = [ tp for tp in self.transkription_positions if tp.has_box is not None][0].has_box
return word_over_box
def _set_box_to_transkription_position(self, box_path, word_path, transkription_position, new_transkription_positions_dictionary, tr_xmin):
"""Set box_path to transkription_position that is contained by box_path.
Create new transkription_positions by splitting old ones if necessaryand add them to new_transkription_positions_dictionary.
"""
if box_path.contains_path(word_path):
transkription_position.has_box = box_path
elif box_path.contains_start_of_path(word_path):
split_position = box_path.path.bbox()[1] - tr_xmin
new_tps = transkription_position.split(split_position)
if len(new_tps) == 2:
new_tps[0].has_box = box_path
new_transkription_positions_dictionary.update({ transkription_position: new_tps })
else:
transkription_position.has_box = box_path
elif box_path.contains_end_of_path(word_path):
split_position = box_path.path.bbox()[0] - tr_xmin
new_tps = transkription_position.split(split_position)
if len(new_tps) == 2:
new_tps[1].has_box = box_path
new_transkription_positions_dictionary.update({ transkription_position: new_tps })
else:
transkription_position.has_box = box_path
else: # box_path in the middle of word_pathz
split_position1 = box_path.path.bbox()[0] - tr_xmin
split_position2 = box_path.path.bbox()[1] - tr_xmin
new_tps = transkription_position.split(split_position1, split_position2)
if len(new_tps) >= 2:
new_tps[1].has_box = box_path
new_transkription_positions_dictionary.update({ transkription_position: new_tps })
else:
transkription_position.has_box = box_path
def do_paths_intersect_saveMode(mypath1, mypath2):
"""Returns true if paths intersect, false if not or if there was an exception.
"""
try:
return mypath1.path.intersect(mypath2.path, justonemode=True)\
or mypath1.is_partially_contained_by(mypath2)
except AssertionError:
return False
Index: svgscripts/datatypes/uncertain_decipherment.py
===================================================================
--- svgscripts/datatypes/uncertain_decipherment.py (revision 0)
+++ svgscripts/datatypes/uncertain_decipherment.py (revision 96)
@@ -0,0 +1,41 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+
+""" This class can be used to represent a uncertain decipherment by the editors.
+"""
+# Copyright (C) University of Basel 2020 {{{1
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program. If not, see 1}}}
+
+__author__ = "Christian Steiner"
+__maintainer__ = __author__
+__copyright__ = 'University of Basel'
+__email__ = "christian.steiner@unibas.ch"
+__status__ = "Development"
+__license__ = "GPL v3"
+__version__ = "0.0.1"
+
+from lxml import etree as ET
+import re
+import sys
+
+from .editor_comment import EditorComment
+
+class UncertainDecipherment(EditorComment):
+ """
+ This class represents a uncertain decipherment by the editors.
+ """
+
+ def __init__(self):
+ super(UncertainDecipherment, self).__init__(is_uncertain=True)
Index: svgscripts/datatypes/footnotes.py
===================================================================
--- svgscripts/datatypes/footnotes.py (revision 95)
+++ svgscripts/datatypes/footnotes.py (revision 96)
@@ -1,263 +1,312 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
""" This program can be used to extract footnotes from a svg file.
"""
# Copyright (C) University of Basel 2019 {{{1
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see 1}}}
-import getopt
import re
import sys
from os import listdir, sep, path
from os.path import isfile, isdir, dirname
import lxml.etree as ET
+import warnings
__author__ = "Christian Steiner"
__maintainer__ = __author__
__copyright__ = 'University of Basel'
__email__ = "christian.steiner@unibas.ch"
__status__ = "Development"
__license__ = "GPL v3"
__version__ = "0.0.1"
+from .atypical_writing import AtypicalWriting
+from .clarification import Clarification
+from .editor_correction import EditorCorrection
+from .line_continuation import LineContinuation
from .matrix import Matrix
from .standoff_tag import StandoffTag
from .text import Text
from .transkriptionField import TranskriptionField
+from .uncertain_decipherment import UncertainDecipherment
UNITTESTING = False
+DEBUG = False
class FootnoteColumns:
"""This class represents footnote columns.
"""
- REFERNCE_PATTERN = re.compile('.*[0-9]+:')
+ REFERENCE_PATTERN = re.compile('.*(\d+-)*[0-9]+:')
+ REFERENCE_GROUP = re.compile('(.*\D)((\d+-)*[0-9]+:)')
- def __init__(self, nsmap, nodes, bottom_values, style_dict):
+ def __init__(self, nsmap, nodes, bottom_values, style_dict, debug=False):
self.bottom_values = bottom_values
self.footnote_columns = []
self.footnote_keys = {}
self.index = 0
self.nodes = nodes
self.nsmap = nsmap
self.style_dict = style_dict
- for node in [ item for item in nodes\
- if Matrix(transform_matrix_string=item.get('transform')).getY() == self.bottom_values[0] ]:
+ self._init_columns()
+
+ def _init_columns(self):
+ """Initialize footnote column positions
+ by creating lists in self.footnote_columns and adding the positions a keys
+ to self.footnote_keys while the index of self.footnote_columns are their values.
+ """
+ first_line_fn_nodes = sorted([ item for item in self.nodes\
+ if round(Matrix(transform_matrix_string=item.get('transform')).getY(), 2) == round(self.bottom_values[0], 2) ],\
+ key=lambda node: Matrix(transform_matrix_string=node.get('transform')).getX())
+ current_nodes = []
+ for node in first_line_fn_nodes:
matrix = Matrix(transform_matrix_string=node.get('transform'))
- if node.getchildren() == 0:
- if re.match(self.REFERNCE_PATTERN, node.text):
- self.footnote_columns.append([])
- self.footnote_keys.update({ round(matrix.getX()): len(self.footnote_columns)-1 })
- else:
- items = [ item for item in node.findall('tspan', self.nsmap)]
- if re.match(self.REFERNCE_PATTERN, ''.join([ item.text for item in items])):
- self.footnote_columns.append([])
- self.footnote_keys.update({ round(matrix.add2X(float(items[0].get('x')))): len(self.footnote_columns)-1 })
+ if len(node.getchildren()) > 0:
+ for tspan in node.findall('tspan', self.nsmap):
+ x = matrix.add2X(float(tspan.get('x')))
+ current_nodes.append({ 'x': x, 'text': tspan.text })
+ elif node.text is not None:
+ x = matrix.getX()
+ current_nodes.append({ 'x': x, 'text': node.text })
+ if re.match(self.REFERENCE_PATTERN,\
+ ''.join([ item.get('text') for item in current_nodes])):
+ current_nodes = self._remove_unused_texts(current_nodes)
+ self.footnote_columns.append([])
+ self.footnote_keys.update({ round(current_nodes[0].get('x')): len(self.footnote_columns)-1 })
+ current_nodes = []
+ if len(self.footnote_keys) == 0:
+ raise Exception(f'ERROR: there are no footnote_keys')
+
+ def _remove_unused_texts(self, nodes):
+ """Remove tspan that contain text that is not a line reference.
+ """
+ threshold = 100
+ match = re.match(self.REFERENCE_GROUP, ''.join([ item.get('text') for item in nodes]))
+ if match is not None and match.group(1) is not None:
+ unused_text = ''
+ index = 0
+ for item in nodes:
+ unused_text += item.get('text')
+ if match.group(1).startswith(unused_text):
+ index += 1
+ else:
+ break
+ if len(nodes) > index+1:
+ counter = 0
+ has_gap = False
+ for item in nodes[index:]:
+ if len(nodes) > index+counter+1\
+ and nodes[index+counter+1].get('x')-nodes[index+counter].get('x') > threshold:
+ index += counter+1
+ has_gap = True
+ break
+ counter += 1
+ if has_gap:
+ return nodes[index+1:]
+ return nodes[index:]
+ return nodes
def append(self, footnote):
"""Append footnote to a column
"""
self.footnote_columns[self.index].append(footnote)
@classmethod
def create_cls(cls, style_dict=None, page=None, transkription_field=None, svg_tree=None, svg_file=None):
"""Returns all footnotes as a list of Text.
"""
+ if page is not None and page.source is not None and svg_file is None:
+ svg_file = page.source
if transkription_field is None and svg_file is not None:
transkription_field = TranskriptionField(svg_file)
if svg_tree is None and svg_file is not None:
svg_tree = ET.parse(svg_file)
if style_dict is None and page is not None:
style_dict = StandoffTag.create_relevant_style_dictionary(page)
- nodes_in_footnote_area = [ item for item in filter(lambda x: Matrix.IS_IN_FOOTNOTE_AREA(x.get('transform'), transkription_field),\
- svg_tree.getroot().iterfind('.//text', svg_tree.getroot().nsmap))]
- bottom_values = sorted([ bottom_value for bottom_value in set(Matrix(transform_matrix_string=item.get('transform')).getY() for item in nodes_in_footnote_area) ])
+ nodes_in_footnote_area = cls.EXTRACT_NODES_IN_FOOTNOTE_AREA(svg_tree, transkription_field)
+ bottom_values = cls.GET_BOTTOM_VALUES(nodes_in_footnote_area)
if len(bottom_values) == 0:
return None
else:
return cls(svg_tree.getroot().nsmap, nodes_in_footnote_area, bottom_values, style_dict)
- def extract_footnotes(self, contains_string=''):
+ def extract_footnotes(self, contains_string='') -> list:
"""Returns all footnotes as a list of Text.
"""
left_value = -1
for bottom_value in self.bottom_values:
nodes_on_line = [ item for item in self.nodes if Matrix(transform_matrix_string=item.get('transform')).getY() == bottom_value ]
nodes_on_line = sorted(nodes_on_line, key=lambda x: Matrix(transform_matrix_string=x.get('transform')).getX())
footnote = None
matrix = None
for node in nodes_on_line:
matrix = Matrix(transform_matrix_string=node.get('transform'))
footnote, left_value = self._process_content_and_markup(node, footnote, matrix)
if footnote is not None:
self.append(footnote)
footnotes = self.toList()
if contains_string != '':
footnotes = [ footnote for footnote in footnotes if contains_string in footnote.content ]
return footnotes
def get_index(self, left_value) -> int:
"""Return index of column for left value.
"""
index = -1
if round(left_value) in self.footnote_keys.keys():
index = self.footnote_keys[round(left_value)]
else:
for key, value in self.footnote_keys.items():
if abs(key - round(left_value)) < 2:
index = value
break
return index
def register_index(self, left_value):
"""Register index for next column to be used.
"""
index = self.get_index(left_value)
if index > -1:
self.index = index
else:
error_value = round(left_value)
msg = f'Left value not part of columns: {error_value} -> {self.footnote_keys}'
raise Exception(msg)
def toList(self):
"""Return footnotes as a list of Text.
"""
footnotes = []
for footnote_list in self.footnote_columns:
for footnote in footnote_list:
- if re.match(self.REFERNCE_PATTERN, footnote.content):
+ if re.match(self.REFERENCE_PATTERN, footnote.content):
footnotes.append(footnote)
else:
footnotes[-1].join(footnote)
return footnotes
def _process_content_and_markup(self, node, footnote, matrix):
"""Process content and markup of node.
[:return:] (footnote: Text, left_value: float)
"""
startIndex = 0
next_text = node.text
left_value = matrix.getX()
items = [ item for item in node.findall('tspan', self.nsmap)]
if len(items) > 0:
next_text = ''.join([ item.text for item in items])
left_value = matrix.add2X(float(items[0].get('x')))
+ elif bool(node.get('x')):
+ left_value = matrix.add2X(float(node.get('x')))
if footnote != None and\
- (re.match(r'.*[0-9]+:', next_text)\
+ ((re.match(r'.*[0-9]+:', next_text)
+ and not re.match(r'.*\d-', footnote.content))\
or (self.get_index(left_value) > -1\
and self.get_index(left_value) != self.index)):
self.append(footnote)
footnote = None
if len(items) > 0:
for item in items:
footnote, left_value = self._process_content_and_markup(item, footnote, matrix)
else:
if footnote is None:
footnote = Text(content=next_text)
- self.register_index(left_value)
+ try:
+ self.register_index(left_value)
+ except Exception:
+ print(self.footnote_columns)
+ raise Exception(f'{footnote}')
else:
startIndex = footnote.append(next_text)
if bool(node.get('class')):
standoff_markups = StandoffTag.create_cls(startIndex, len(footnote.content), node.get('class'), style_dict=self.style_dict)
if len(standoff_markups) > 0:
if len(footnote.standoff_markups) > 0:
standoff_markups = footnote.standoff_markups[-1].join_list(standoff_markups)
if len(standoff_markups) > 0:
footnote.standoff_markups += standoff_markups
return footnote, left_value
+ @staticmethod
+ def EXTRACT_NODES_IN_FOOTNOTE_AREA(svg_tree, transkription_field=None) ->list:
+ """Return a list of nodes that are in footnote area.
+ """
+ if transkription_field is None:
+ transkription_field = TranskriptionField(svg_tree.docinfo.URL)
+ nodes_in_footnote_area = [ item for item in filter(lambda node: Matrix.NODE_HAS_CONTENT_IN_FOOTNOTE_AREA(node, transkription_field),\
+ svg_tree.getroot().iterfind('.//text', svg_tree.getroot().nsmap))]
+ for node in nodes_in_footnote_area:
+ if not Matrix.IS_IN_FOOTNOTE_AREA(node.get('transform'), transkription_field):
+ for child in node.getchildren():
+ if not Matrix.IS_IN_FOOTNOTE_AREA(node.get('transform'), transkription_field, x=float(child.get('x'))):
+ node.remove(child)
+ return nodes_in_footnote_area
+
+ @staticmethod
+ def GET_BOTTOM_VALUES(nodes_in_footnote_area) ->list:
+ """Return sorted list of bottom values.
+ """
+ return sorted([ bottom_value for bottom_value in set(Matrix(transform_matrix_string=item.get('transform')).getY() for item in nodes_in_footnote_area) ])
+
def extract_footnotes_as_strings(transkription_field=None, svg_tree=None, svg_file=None, contains_string=''):
"""Returns all footnotes as a list of strings.
"""
if transkription_field is None and svg_file is not None:
transkription_field = TranskriptionField(svg_file)
if svg_tree is None and svg_file is not None:
svg_tree = ET.parse(svg_file)
footnotes = []
nodes_in_footnote_area = [ item for item in filter(lambda x: Matrix.IS_IN_FOOTNOTE_AREA(x.get('transform'), transkription_field),\
svg_tree.getroot().iterfind('.//text', svg_tree.getroot().nsmap))]
bottom_values = sorted([ bottom_value for bottom_value in set(Matrix(transform_matrix_string=item.get('transform')).getY() for item in nodes_in_footnote_area) ])
for bottom_value in bottom_values:
nodes_on_line = [ item for item in nodes_in_footnote_area if Matrix(transform_matrix_string=item.get('transform')).getY() == bottom_value ]
nodes_on_line = sorted(nodes_on_line, key=lambda x: Matrix(transform_matrix_string=x.get('transform')).getX())
footnote_string = ''
for node in nodes_on_line:
if len(node.getchildren()) == 0:
if footnote_string != '' and re.match(r'.*[0-9]+:', node.text):
footnotes.append(footnote_string)
footnote_string = node.text
else:
footnote_string += node.text
else:
next_string = ''.join([ item.text for item in node.findall('tspan', svg_tree.getroot().nsmap)])
if footnote_string != '' and re.match(r'.*[0-9]+:', next_string):
footnotes.append(footnote_string)
footnote_string = next_string
else:
footnote_string += next_string
footnotes.append(footnote_string)
if contains_string != '':
footnotes = [ footnote_string for footnote_string in footnotes if contains_string in footnote_string ]
return footnotes
-def extract_footnotes(page, transkription_field=None, svg_tree=None, svg_file=None, contains_string=''):
+def extract_footnotes(page, transkription_field=None, svg_tree=None, svg_file=None, contains_string='') ->list:
"""Returns all footnotes as a list of Text.
"""
footnote_columns = FootnoteColumns.create_cls(page=page, transkription_field=transkription_field,\
svg_tree=svg_tree, svg_file=svg_file)
+ if footnote_columns is None:
+ return []
return footnote_columns.extract_footnotes(contains_string=contains_string)
-def _process_content_and_markup(node, footnote, footnote_columns, style_dict, svg_tree, matrix):
- """Process content and markup of node.
- [:return:] (footnote: Text, left_value: float)
- """
- startIndex = 0
- next_text = node.text
- left_value = matrix.getX()
- items = [ item for item in node.findall('tspan', svg_tree.getroot().nsmap)]
- if len(items) > 0:
- next_text = ''.join([ item.text for item in items])
- left_value = matrix.add2X(float(items[0].get('x')))
- if footnote != None and\
- (re.match(r'.*[0-9]+:', next_text)\
- or (footnote_columns.get_index(left_value) > -1\
- and footnote_columns.get_index(left_value) != footnote_columns.index)):
- footnote_columns.append(footnote)
- footnote = None
- if len(items) > 0:
- for item in items:
- footnote, left_value = _process_content_and_markup(item, footnote, footnote_columns, style_dict, svg_tree, matrix)
- else:
- if footnote is None:
- footnote = Text(content=next_text)
- footnote_columns.register_index(left_value)
- else:
- startIndex = footnote.append(next_text)
- if bool(node.get('class')):
- standoff_markups = StandoffTag.create_cls(startIndex, len(footnote.content), node.get('class'), style_dict=style_dict)
- if len(standoff_markups) > 0:
- if len(footnote.standoff_markups) > 0:
- standoff_markups = footnote.standoff_markups[-1].join_list(standoff_markups)
- if len(standoff_markups) > 0:
- footnote.standoff_markups += standoff_markups
- return footnote, left_value
if __name__ == "__main__":
sys.exit(main(sys.argv[1:]))
Index: svgscripts/datatypes/line_continuation.py
===================================================================
--- svgscripts/datatypes/line_continuation.py (revision 0)
+++ svgscripts/datatypes/line_continuation.py (revision 96)
@@ -0,0 +1,66 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+
+""" This class can be used to represent a line continuation.
+"""
+# Copyright (C) University of Basel 2020 {{{1
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program. If not, see 1}}}
+
+__author__ = "Christian Steiner"
+__maintainer__ = __author__
+__copyright__ = 'University of Basel'
+__email__ = "christian.steiner@unibas.ch"
+__status__ = "Development"
+__license__ = "GPL v3"
+__version__ = "0.0.1"
+
+from lxml import etree as ET
+import re
+import sys
+
+from .editor_comment import EditorComment
+from .reference import Reference
+
+class LineContinuation(EditorComment):
+ """
+ This class represents a line continuation.
+ """
+
+ def __init__(self, reference=None, to_reference=True, is_uncertain=False):
+ super(LineContinuation, self).__init__(is_uncertain=is_uncertain)
+ self.reference = reference
+ self.to_reference = to_reference
+
+ @classmethod
+ def create_cls(cls, reference_string, is_uncertain=False):
+ """Creates a cls from reference_string.
+ """
+ to_reference = True
+ if reference_string.startswith('von'):
+ to_reference = False
+ reference_string = reference_string[len('von')+1:]
+ reference = Reference.create_cls(reference_string=reference_string, is_uncertain=is_uncertain)
+ return cls(reference=reference, to_reference=to_reference, is_uncertain=is_uncertain)
+
+ @classmethod
+ def get_semantic_dictionary(cls):
+ """ Creates a semantic dictionary as specified by SemanticClass.
+ """
+ dictionary = super(LineContinuation,cls).get_semantic_dictionary()
+ dictionary[cls.PROPERTIES_KEY].update(cls.create_semantic_property_dictionary('reference', Reference, cardinality=1,\
+ name='lineContinuationHasReference', label='line continues from/to reference'))
+ dictionary[cls.PROPERTIES_KEY].update(cls.create_semantic_property_dictionary('to_reference', bool, cardinality=1,\
+ name='isLineAContinuationTo', label='whether line continues on reference'))
+ return cls.return_dictionary_after_updating_super_classes(dictionary)
Index: svgscripts/datatypes/text.py
===================================================================
--- svgscripts/datatypes/text.py (revision 95)
+++ svgscripts/datatypes/text.py (revision 96)
@@ -1,144 +1,185 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
""" This class can be used to represent a text that may have standoff markup.
"""
# Copyright (C) University of Basel 2020 {{{1
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see 1}}}
__author__ = "Christian Steiner"
__maintainer__ = __author__
__copyright__ = 'University of Basel'
__email__ = "christian.steiner@unibas.ch"
__status__ = "Development"
__license__ = "GPL v3"
__version__ = "0.0.1"
+import copy
from lxml import etree as ET
import re
import sys
from .attachable_object import AttachableObject
from .standoff_tag import StandoffTag
sys.path.append('py2ttl')
from class_spec import SemanticClass
class Text(AttachableObject,SemanticClass):
"""
This class represents a text that may have standoff markup.
"""
TAG_PATTERN = re.compile(r'([^<]*)(<[^/]+>)')
XML_TAG = 'text-with-markup'
XML_SUB_TAG = 'text'
- def __init__(self, content: str, standoff_markups=None, id=0, tag=XML_TAG):
+ def __init__(self, content=None, standoff_markups=None, id=0, tag=XML_TAG):
self.id = str(id)
self.tag = tag
self.content = content
self.standoff_markups = standoff_markups\
if standoff_markups is not None\
else []
def append(self, content: str) -> int:
"""Extend text with content.
[:return:] startIndex of appended content
"""
startIndex = len(self.content)
self.content += content
return startIndex
def attach_object_to_tree(self, target_tree):
"""Attach object to tree.
"""
if target_tree.__class__.__name__ == '_ElementTree':
target_tree = target_tree.getroot()
obj_node = target_tree.xpath('.//' + self.tag + '[@id="%s"]' % self.id)[0] \
if(len(target_tree.xpath('.//' + self.tag + '[@id="%s"]' % self.id)) > 0) \
else ET.SubElement(target_tree, self.tag)
obj_node.set('id', self.id)
text_node = ET.SubElement(obj_node, self.XML_SUB_TAG)
text_node.text = self.content
for index, markup in enumerate(self.standoff_markups):
markup.id = str(index)
markup.attach_object_to_tree(obj_node)
+ def extract_part(self, text_part, css_filter=';'):
+ """Extract part of text for which text_part matchs content.
+
+ :return: datatypes.text.Text
+ """
+ if not css_filter.endswith(';'):
+ css_filter += ';'
+ if text_part in self.content:
+ part_start_index = self.content.find(text_part)
+ part_end_index = part_start_index + len(text_part)
+ standoff_markups = [ markup for markup in self.standoff_markups\
+ if markup.css_string.endswith(css_filter)\
+ if (markup.startIndex <= part_start_index\
+ and markup.endIndex > part_start_index)\
+ or (markup.startIndex >= part_start_index\
+ and markup.startIndex < part_end_index\
+ and markup.endIndex <= part_end_index)\
+ or (markup.startIndex < part_end_index\
+ and markup.endIndex >= part_end_index)]
+ new_markups = []
+ for markup in standoff_markups:
+ startIndex = markup.startIndex - part_start_index\
+ if markup.startIndex > part_start_index else 0
+ endIndex = markup.endIndex - part_start_index\
+ if markup.endIndex <= part_end_index\
+ else len(text_part)
+ new_markups.append(StandoffTag(markup.markup, startIndex, endIndex))
+ return Text(content=text_part, standoff_markups=new_markups)
+ else:
+ msg = f'ERRROR {text_part} is not a part of {self.content}!'
+ raise Exception(msg)
+
def join(self, other):
"""Join self and other.
"""
correction = self.append(' ' + other.content) + 1
for standoff_markup in other.standoff_markups:
standoff_markup.startIndex += correction
standoff_markup.endIndex += correction
self.standoff_markups += other.standoff_markups
del other
+ def markup_contains_css_filter(self, css_filter) ->bool:
+ """Returns true if markup contains css_filter.
+ """
+ if not css_filter.endswith(';'):
+ css_filter += ';'
+ return len([ markup for markup in self.standoff_markups\
+ if markup.css_string.endswith(css_filter) ]) > 0
+
@classmethod
def create_cls_from_node(cls, node):
"""Initialize a cls from node.
[:return:] cls
"""
standoff_markups = [ StandoffTag.create_cls_from_node(item) for item in\
node.xpath('./' + '|./'.join(StandoffTag.MARKUP_STYLES)) ]
text = node.xpath('./' + cls.XML_SUB_TAG + '/text()')[0]\
if len(node.xpath('./' + cls.XML_SUB_TAG + '/text()')) > 0\
else ''
return cls(text, standoff_markups=standoff_markups, id=node.get('id'), tag=node.tag)
@classmethod
def create_cls_from_html(cls, html):
"""Creates a Text from a html string.
:return: a (datatypes.text) Text
"""
standoff_markups = []
tag_matched = re.match(cls.TAG_PATTERN, html)
while tag_matched is not None:
tag = tag_matched.group(2)
tags = [ t for t in tag.split('<') if t != '']
tags.reverse()
endTag = ''.join([ '' + t for t in tags])
startIndex = tag_matched.end() - len(tag)
inner_tag_matched = re.match(cls.TAG_PATTERN, html[0:startIndex])
html = html[0:startIndex] + html[tag_matched.end():]
endTag_matched = re.match(rf'(.*)({endTag})', html)
if endTag_matched is not None:
endIndex = endTag_matched.end() - len(endTag)
html = html[0:endIndex] + html[endTag_matched.end():]
for markup in [ StandoffTag.HTML_TAG_DICTIONARY['<'+tag] for tag in tags\
if bool(StandoffTag.HTML_TAG_DICTIONARY.get('<'+tag)) ]:
standoff_markups.append(StandoffTag(markup, startIndex, endIndex))
else:
msg = f'HTML string contains no ending tag for {tag}!'
raise Exception(msg)
tag_matched = re.match(cls.TAG_PATTERN, html)
return cls(html, standoff_markups=standoff_markups)
@classmethod
def get_semantic_dictionary(cls):
""" Creates a semantic dictionary as specified by SemanticClass.
"""
properties = {}
properties.update(cls.create_semantic_property_dictionary('content', str, cardinality=1,\
name='textHasContent', label='content of text', comment='Connects a text with its content.'))
properties.update(cls.create_semantic_property_dictionary('standoff_markups', StandoffTag,\
name='textHasMarkup', label='standoff markup of text', comment='Connects a text with a list of standoff tags.'))
dictionary = { cls.CLASS_KEY: cls.get_class_dictionary(), cls.PROPERTIES_KEY: properties }
return cls.return_dictionary_after_updating_super_classes(dictionary)
Index: svgscripts/process_footnotes.py
===================================================================
--- svgscripts/process_footnotes.py (revision 0)
+++ svgscripts/process_footnotes.py (revision 96)
@@ -0,0 +1,251 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+
+""" This program can be used to process words after they have been merged with faksimile data.
+"""
+# Copyright (C) University of Basel 2019 {{{1
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program. If not, see 1}}}
+
+from colorama import Fore, Style
+import getopt
+import lxml.etree as ET
+import os
+from os import listdir, sep, path, setpgrp, devnull
+from os.path import exists, isfile, isdir, dirname, basename
+from pathlib import Path as PathlibPath
+from progress.bar import Bar
+import re
+import shutil
+import sys
+import warnings
+
+if dirname(__file__) not in sys.path:
+ sys.path.append(dirname(__file__))
+
+from datatypes.manuscript import ArchivalManuscriptUnity
+from datatypes.page import Page, STATUS_MERGED_OK, STATUS_POSTMERGED_OK
+from datatypes.atypical_writing import AtypicalWriting
+from datatypes.clarification import Clarification
+from datatypes.editor_comment import EditorComment
+from datatypes.editor_correction import EditorCorrection
+from datatypes.footnotes import extract_footnotes
+from datatypes.line_continuation import LineContinuation
+from datatypes.standoff_tag import StandoffTag
+from datatypes.text import Text
+from datatypes.uncertain_decipherment import UncertainDecipherment
+
+from util import back_up
+from process_files import update_svgposfile_status
+
+sys.path.append('shared_util')
+from myxmlwriter import write_pretty, xml_has_type, FILE_TYPE_SVG_WORD_POSITION, FILE_TYPE_XML_MANUSCRIPT
+
+
+__author__ = "Christian Steiner"
+__maintainer__ = __author__
+__copyright__ = 'University of Basel'
+__email__ = "christian.steiner@unibas.ch"
+__status__ = "Development"
+__license__ = "GPL v3"
+__version__ = "0.0.1"
+
+UNITTESTING = False
+
+ATYPICAL_GROUP = re.compile(r'(.*:.*]\s*)(¿)(.*)')
+CLARIFICATION_GROUP = re.compile(r'(.*:.*]\s*)(Vk)(.*)')
+CONTINUATION_GROUP = re.compile(r'(.*:\s*)(Fortsetzung\s*)')
+COMMENT_GROUP = re.compile(r'(.*:.*])')
+EDITOR_CORRECTION_GROUP = re.compile(r'(.*:.*]\s*)(>[?]*)(.*)')
+LINE_REFERENCE_GROUP = re.compile(r'(\d+-)*([0-9]+)(:.*)')
+LINE_COMMENT_GROUP = re.compile(r'(.*\d+:)')
+UNCERTAINTY_WORD_GROUP = re.compile(r'(.*:.*]\s*)([>]*\?)(.*)')
+UNCERTAINTY_EDITOR_GROUP = re.compile(r'(.*)(\?)')
+WORD_REFERENCE_GROUP = re.compile(r'(.*[0-9]+:\s*)(.*)(].*)')
+DEBUG = False
+
+def categorize_footnotes(page, footnotes=None, manuscript_file=None, debug=False):
+ """Categorize footnotes.
+ """
+ DEBUG = debug
+ if footnotes is None:
+ footnotes = extract_footnotes(page)
+ for footnote in footnotes:
+ line_match = re.match(LINE_REFERENCE_GROUP, footnote.content)
+ if line_match is not None:
+ _process_line_match(page, footnote, line_match)
+ else:
+ warnings.warn(f'Unknown editor comment without a line reference: <{footnote}>')
+ page.update_and_attach_words2tree()
+ for line in page.lines: line.attach_object_to_tree(page.page_tree)
+ DEBUG = False
+ if not UNITTESTING:
+ write_pretty(xml_element_tree=page.page_tree, file_name=page.page_tree.docinfo.URL,\
+ script_name=__file__, file_type=FILE_TYPE_SVG_WORD_POSITION)
+
+def _is_uncertain(footnote) -> bool:
+ """Return whether footnote contains sign for uncertainty.
+ """
+ uncertain_match = re.match(UNCERTAINTY_EDITOR_GROUP, footnote.content)
+ return (uncertain_match is not None\
+ and len([ markup for markup in footnote.standoff_markups\
+ if markup.css_string.endswith('italic;')\
+ and uncertain_match.end() >= markup.startIndex\
+ and uncertain_match.end() <= markup.endIndex ]) > 0)
+
+def _process_line_match(page, footnote, line_match):
+ """Process footnote if reference to a line matches.
+ """
+ word_match = re.match(WORD_REFERENCE_GROUP, footnote.content)
+ start_line_number = end_line_number = int(line_match.group(2))
+ if line_match.group(1) is not None:
+ start_line_number = int(line_match.group(1)[0:-1])
+ lines = [ line for line in page.lines if line.id >= start_line_number and line.id <= end_line_number ]
+ if word_match is not None:
+ _process_word_match(page, footnote, line_match, word_match.group(2), start_line_number)
+ elif len(lines) > 0:
+ uncertain_match = re.match(UNCERTAINTY_EDITOR_GROUP, footnote.content)
+ for line in lines:
+ _process_line_reference(page, footnote, line, _is_uncertain(footnote))
+ else:
+ warnings.warn(f'Footnote refers to missing line {line_number}: {footnote}')
+
+def _process_line_reference(page, footnote, line, is_uncertain):
+ """Process footnote if there is a line reference.
+ """
+ continuation_match = re.match(CONTINUATION_GROUP, footnote.content)
+ if continuation_match is not None:
+ reference_string = footnote.content[continuation_match.end():]
+ if is_uncertain:
+ reference_string = reference_string[:-1]
+ line.editor_comment = LineContinuation.create_cls(reference_string=reference_string, is_uncertain=is_uncertain)
+ else:
+ comment_match = re.match(LINE_COMMENT_GROUP, footnote.content)
+ if comment_match is not None:
+ is_uncertain = _is_uncertain(footnote)
+ comment = footnote.content[comment_match.end():-1].strip()\
+ if is_uncertain\
+ else footnote.content[comment_match.end():].strip()
+ line.editor_comment = EditorComment(comment=comment, is_uncertain=is_uncertain)
+ else:
+ warnings.warn(f'Unknown editor comment for line "{line.id}": <{footnote}>')
+
+def _process_word_match(page, footnote, line_match, word_text, line_number, parent_word_composition=None):
+ """Process footnote if there is a word reference.
+ """
+ referred_words = [ word for word in page.words\
+ if word.line_number == line_number\
+ and (word.text == word_text\
+ or re.match(rf'\W*{word_text}\W', word.text)\
+ or word.edited_text == word_text) ]
+ overwritten_word_matches = [ word for word in page.words\
+ if word.line_number == line_number\
+ and len(word.word_parts) > 0\
+ and len([word_part for word_part in word.word_parts\
+ if word_part.overwrites_word is not None\
+ and word_part.overwrites_word.text == word_text]) > 0]
+ if len(referred_words) > 0\
+ or len(overwritten_word_matches) > 0:
+ word = None
+ if len(referred_words) == 1:
+ word = referred_words[0]
+ elif len(overwritten_word_matches) > 0:
+ word = [ word_part.overwrites_word for word_part in overwritten_word_matches[0].word_parts\
+ if word_part.overwrites_word is not None and word_part.overwrites_word.text == word_text][0]
+ else:
+ word = [ better_word for better_word in referred_words if better_word.text == word_text][0]
+ atypical_match = re.match(ATYPICAL_GROUP, footnote.content)
+ correction_match = re.match(EDITOR_CORRECTION_GROUP, footnote.content)
+ clarification_match = re.match(CLARIFICATION_GROUP, footnote.content)
+ is_uncertain = re.match(UNCERTAINTY_WORD_GROUP, footnote.content) is not None
+ if correction_match is not None:
+ correction = correction_match.group(3).strip()
+ word.editor_comment = EditorCorrection(correction_text=correction, is_uncertain=is_uncertain)
+ if not is_uncertain:
+ word.edited_text = correction
+ elif clarification_match is not None:
+ word.editor_comment = Clarification(text=footnote.extract_part(word_text, css_filter='bold;'))
+ elif atypical_match is not None:
+ text = footnote.extract_part(word_text, css_filter='bold;')\
+ if footnote.markup_contains_css_filter('bold;')\
+ else None
+ word.editor_comment = AtypicalWriting(text=text)
+ elif is_uncertain:
+ word.editor_comment = UncertainDecipherment()
+ else:
+ comment_match = re.match(COMMENT_GROUP, footnote.content)
+ if comment_match is not None:
+ is_uncertain = _is_uncertain(footnote)
+ comment = footnote.content[comment_match.end():-1].strip()\
+ if is_uncertain\
+ else footnote.content[comment_match.end():].strip()
+ word.editor_comment = EditorComment(comment=comment, is_uncertain=is_uncertain)
+ else:
+ warnings.warn(f'Unknown editor comment for word "{word.text}": <{footnote}>')
+ elif re.match(r'.*\s.*', word_text):
+ for word_part in word_text.split(' '):
+ _process_word_match(page, footnote, line_match, word_part, line_number, parent_word_composition=word_text)
+ else:
+ warnings.warn(f'No word found with text "{word_text}" on line {line_number}: <{footnote}>')
+
+def usage():
+ """prints information on how to use the script
+ """
+ print(main.__doc__)
+
+def main(argv):
+ """This program can be used to process the footnotes of a page.
+
+ svgscripts/process_footnotes.py [OPTIONS]
+
+ a xml file about a manuscript, containing information about its pages.
+ a xml file about a page, containing information about svg word positions.
+
+ OPTIONS:
+ -h|--help show help
+
+ :return: exit code (int)
+ """
+ try:
+ opts, args = getopt.getopt(argv, "h", ["help" ])
+ except getopt.GetoptError:
+ usage()
+ return 2
+ for opt, arg in opts:
+ if opt in ('-h', '--help'):
+ usage()
+ return 0
+ if len(args) < 1:
+ usage()
+ return 2
+ exit_status = 0
+ file_a = args[0]
+ if isfile(file_a):
+ manuscript_file = file_a\
+ if xml_has_type(FILE_TYPE_XML_MANUSCRIPT, xml_source_file=file_a)\
+ else None
+ counter = 0
+ for page in Page.get_pages_from_xml_file(file_a, status_contains=STATUS_MERGED_OK):
+ if not UNITTESTING:
+ print(Fore.CYAN + f'Processing {page.title}, {page.number} ...' + Style.RESET_ALL)
+ back_up(page, page.xml_file)
+ categorize_footnotes(page, manuscript_file=manuscript_file)
+ counter += 1
+ not UNITTESTING and print(Style.RESET_ALL + f'[{counter} pages processed]')
+ else:
+ raise FileNotFoundError('File {} does not exist!'.format(file_a))
+ return exit_status
+
+if __name__ == "__main__":
+ sys.exit(main(sys.argv[1:]))
Index: tests_svgscripts/test_clarification.py
===================================================================
--- tests_svgscripts/test_clarification.py (revision 0)
+++ tests_svgscripts/test_clarification.py (revision 96)
@@ -0,0 +1,29 @@
+import unittest
+from os import sep, path
+from os.path import dirname, basename, isfile, isdir
+import lxml.etree as ET
+import sys
+
+sys.path.append('svgscripts')
+from datatypes.clarification import Clarification
+from datatypes.editor_comment import EditorComment
+from datatypes.text import Text
+
+class TestClarification(unittest.TestCase):
+ def test_attach(self):
+ text = Text(content='text')
+ comment = Clarification(text=text)
+ tree = ET.Element('asdf')
+ comment.attach_object_to_tree(tree)
+ commentB = EditorComment.create_cls_from_node(tree.xpath(EditorComment.XML_TAG)[0])
+ self.assertEqual(type(commentB), Clarification)
+ self.assertEqual(commentB.text.content, comment.text.content)
+
+ def test_semantic(self):
+ pass
+ #print(Clarification.get_semantic_dictionary())
+
+
+
+if __name__ == "__main__":
+ unittest.main()
Index: tests_svgscripts/test_atypical_writing.py
===================================================================
--- tests_svgscripts/test_atypical_writing.py (revision 0)
+++ tests_svgscripts/test_atypical_writing.py (revision 96)
@@ -0,0 +1,28 @@
+import unittest
+from os import sep, path
+from os.path import dirname, basename, isfile, isdir
+import lxml.etree as ET
+import sys
+
+sys.path.append('svgscripts')
+from datatypes.page import Page
+from datatypes.editor_comment import EditorComment
+from datatypes.uncertain_decipherment import UncertainDecipherment
+
+class TestUncertainDecipherment(unittest.TestCase):
+ def test_attach(self):
+ comment = UncertainDecipherment()
+ tree = ET.Element('asdf')
+ comment.attach_object_to_tree(tree)
+ commentB = EditorComment.create_cls_from_node(tree.xpath(EditorComment.XML_TAG)[0])
+ self.assertEqual(type(commentB), UncertainDecipherment)
+ self.assertEqual(commentB.is_uncertain, comment.is_uncertain)
+
+ def test_semantic(self):
+ pass
+ #print(UncertainDecipherment.get_semantic_dictionary())
+
+
+
+if __name__ == "__main__":
+ unittest.main()
Index: tests_svgscripts/test_process_footnotes.py
===================================================================
--- tests_svgscripts/test_process_footnotes.py (revision 0)
+++ tests_svgscripts/test_process_footnotes.py (revision 96)
@@ -0,0 +1,47 @@
+import unittest
+from os import sep, path, remove
+from os.path import isdir, isfile, dirname
+import shutil
+import sys
+import lxml.etree as ET
+import warnings
+import sys
+
+sys.path.append('svgscripts')
+
+from datatypes.footnotes import extract_footnotes
+from datatypes.page import Page
+import process_footnotes
+from process_footnotes import categorize_footnotes, main
+
+class TestExtractFootnotes(unittest.TestCase):
+ def setUp(self):
+ process_footnotes.UNITTESTING = True
+ DATADIR = path.dirname(__file__) + sep + 'test_data'
+ self.test_footnote = DATADIR + sep + 'W_I_8_neu_125-01.svg'
+ self.test_footnote_verso = DATADIR + sep + 'N_VII_1_xp5_4_page5.svg'
+ self.test_footnote_recto = DATADIR + sep + 'N_VII_1_xp5_4_page6.svg'
+ self.test_footnote_multi = DATADIR + sep + 'N_VII_1_xp5_4_page13.svg'
+ self.test_footnote_multi_xml = DATADIR + sep + 'N_VII_1_page013.xml'
+ self.test_categorize_footnote = DATADIR + sep + 'N_VII_1_page006.xml'
+
+ def test_categorize_footnotes(self):
+ page = Page(self.test_categorize_footnote)
+ footnotes = extract_footnotes(page, svg_file=self.test_footnote_recto)
+ categorize_footnotes(page, footnotes)
+ words_with_comments = [ word for word in page.words if word.editor_comment is not None ]
+ self.assertEqual(len(words_with_comments), 4)
+ lines_with_comments = [ line for line in page.lines if line.editor_comment is not None ]
+ self.assertEqual(len(lines_with_comments), 1)
+ page = Page('xml/W_II_1_page141.xml')
+ footnotes = extract_footnotes(page)
+ categorize_footnotes(page, footnotes, debug=True)
+ words_with_comments = [ word for word in page.words if word.editor_comment is not None ]
+ #self.assertEqual(len(words_with_comments), 9)
+
+ def test_main(self):
+ self.assertEqual(main(['xml/N_VII_1_page005.xml']), 0)
+
+
+if __name__ == "__main__":
+ unittest.main()
Index: tests_svgscripts/test_uncertain_decipherment.py
===================================================================
--- tests_svgscripts/test_uncertain_decipherment.py (revision 0)
+++ tests_svgscripts/test_uncertain_decipherment.py (revision 96)
@@ -0,0 +1,28 @@
+import unittest
+from os import sep, path
+from os.path import dirname, basename, isfile, isdir
+import lxml.etree as ET
+import sys
+
+sys.path.append('svgscripts')
+from datatypes.page import Page
+from datatypes.editor_comment import EditorComment
+from datatypes.uncertain_decipherment import UncertainDecipherment
+
+class TestUncertainDecipherment(unittest.TestCase):
+ def test_attach(self):
+ comment = UncertainDecipherment()
+ tree = ET.Element('asdf')
+ comment.attach_object_to_tree(tree)
+ commentB = EditorComment.create_cls_from_node(tree.xpath(EditorComment.XML_TAG)[0])
+ self.assertEqual(type(commentB), UncertainDecipherment)
+ self.assertEqual(commentB.is_uncertain, comment.is_uncertain)
+
+ def test_semantic(self):
+ pass
+ #print(UncertainDecipherment.get_semantic_dictionary())
+
+
+
+if __name__ == "__main__":
+ unittest.main()
Index: tests_svgscripts/test_footnotes.py
===================================================================
--- tests_svgscripts/test_footnotes.py (revision 95)
+++ tests_svgscripts/test_footnotes.py (revision 96)
@@ -1,53 +1,67 @@
import unittest
from os import sep, path, remove
from os.path import isdir, isfile, dirname
import shutil
import sys
import lxml.etree as ET
import warnings
import sys
sys.path.append('svgscripts')
-from datatypes.footnotes import FootnoteColumns, extract_footnotes, extract_footnotes_as_strings, UNITTESTING
+import datatypes.footnotes
+from datatypes.footnotes import FootnoteColumns, extract_footnotes, extract_footnotes_as_strings, UNITTESTING, DEBUG
from datatypes.matrix import Matrix
from datatypes.page import Page
from datatypes.positional_word_part import PositionalWordPart
from datatypes.transkriptionField import TranskriptionField
class TestExtractFootnotes(unittest.TestCase):
def setUp(self):
- UNITTESTING = True
+ datatypes.footnotes.UNITTESTING = True
DATADIR = path.dirname(__file__) + sep + 'test_data'
self.test_footnote = DATADIR + sep + 'W_I_8_neu_125-01.svg'
self.test_footnote_verso = DATADIR + sep + 'N_VII_1_xp5_4_page5.svg'
self.test_footnote_recto = DATADIR + sep + 'N_VII_1_xp5_4_page6.svg'
self.test_footnote_multi = DATADIR + sep + 'N_VII_1_xp5_4_page13.svg'
self.test_footnote_multi_xml = DATADIR + sep + 'N_VII_1_page013.xml'
+ self.test_categorize_footnote = DATADIR + sep + 'N_VII_1_page006.xml'
def test_extract_footnotes(self):
footnotes = extract_footnotes_as_strings(svg_file=self.test_footnote_multi, contains_string='Anschlußzeichen')
self.assertEqual(len(footnotes), 4)
page = Page(self.test_footnote_multi_xml)
footnotes = extract_footnotes(page, svg_file=self.test_footnote_multi, contains_string='Anschlußzeichen')
self.assertEqual(len(footnotes), 4)
footnotes = extract_footnotes(page, svg_file=self.test_footnote_multi)
self.assertEqual(len(footnotes), 7)
- #print([ footnote for footnote in footnotes if footnote.content.startswith('23: Philosophen')])
+ """
+ page = Page('xml/N_VII_1_page137.xml')
+ datatypes.footnotes.DEBUG = True
+ footnotes = extract_footnotes(page)
+ datatypes.footnotes.DEBUG = False
+ """
def test_columns(self):
svg_tree = ET.parse(self.test_footnote_multi)
transkription_field = TranskriptionField(self.test_footnote_multi)
nodes_in_footnote_area = [ item for item in filter(lambda x: Matrix.IS_IN_FOOTNOTE_AREA(x.get('transform'), transkription_field),\
svg_tree.getroot().iterfind('.//text', svg_tree.getroot().nsmap))]
bottom_values = sorted([ bottom_value for bottom_value in set(Matrix(transform_matrix_string=item.get('transform')).getY() for item in nodes_in_footnote_area) ])
footnote_columns = FootnoteColumns(svg_tree.getroot().nsmap, nodes_in_footnote_area, bottom_values, None)
self.assertEqual(len(footnote_columns.footnote_columns), 2)
footnote_columns.register_index(184)
footnote_columns.append('asdf')
self.assertEqual(len(footnote_columns.footnote_columns[0]), 1)
- #print(footnote_columns.footnote_columns[0])
-
+ with self.assertRaises(Exception):
+ FootnoteColumns(svg_tree.getroot().nsmap, [], bottom_values, None)
+ fail_svg = '/home/knister0/ownCloud/myNietzscheDE/data_ai_export//N_VII_1_xp5_4_page137.svg'
+ svg_tree = ET.parse(fail_svg)
+ transkription_field = TranskriptionField(fail_svg)
+ nodes_in_footnote_area = FootnoteColumns.EXTRACT_NODES_IN_FOOTNOTE_AREA(svg_tree, transkription_field)
+ bottom_values = FootnoteColumns.GET_BOTTOM_VALUES(nodes_in_footnote_area)
+ footnote_columns = FootnoteColumns(svg_tree.getroot().nsmap, nodes_in_footnote_area, bottom_values, None, debug=True)
+ self.assertEqual(len(footnote_columns.footnote_columns), 2)
if __name__ == "__main__":
unittest.main()
Index: tests_svgscripts/test_line_continuation.py
===================================================================
--- tests_svgscripts/test_line_continuation.py (revision 0)
+++ tests_svgscripts/test_line_continuation.py (revision 96)
@@ -0,0 +1,29 @@
+import unittest
+from os import sep, path
+from os.path import dirname, basename, isfile, isdir
+import lxml.etree as ET
+import sys
+
+sys.path.append('svgscripts')
+from datatypes.line_continuation import LineContinuation
+from datatypes.editor_comment import EditorComment
+from datatypes.reference import Reference
+
+class TestLineContinuation(unittest.TestCase):
+ def test_attach(self):
+ tree = ET.Element('asdf')
+ comment = LineContinuation.create_cls('1,3')
+ #comment = LineContinuation(reference)
+ comment.attach_object_to_tree(tree)
+ commentB = EditorComment.create_cls_from_node(tree.xpath(EditorComment.XML_TAG)[0])
+ self.assertEqual(type(commentB), LineContinuation)
+ self.assertEqual(commentB.reference.first_line, comment.reference.first_line)
+
+ def test_semantic(self):
+ pass
+ #print(LineContinuation.get_semantic_dictionary())
+
+
+
+if __name__ == "__main__":
+ unittest.main()
Index: tests_svgscripts/test_text.py
===================================================================
--- tests_svgscripts/test_text.py (revision 95)
+++ tests_svgscripts/test_text.py (revision 96)
@@ -1,65 +1,91 @@
import unittest
from os import sep, path
from os.path import dirname, basename, isfile, isdir
import lxml.etree as ET
import sys
sys.path.append('svgscripts')
from datatypes.page import Page
from datatypes.standoff_tag import StandoffTag
from datatypes.text import Text
class TestText(unittest.TestCase):
def setUp(self):
DATADIR = dirname(__file__) + sep + 'test_data'
if not isdir(DATADIR):
DATADIR = dirname(dirname(__file__)) + sep + 'test_data'
self.test_file = DATADIR + sep + 'test.xml'
self.test_svg_file = DATADIR + sep + 'test421.svg'
self.pdf_xml = DATADIR + sep + 'W_I_8_page125.xml'
self.xml_file = DATADIR + sep + 'N_VII_1_page005.xml'
self.xml_fileB = DATADIR + sep + 'N_VII_1_page006.xml'
self.pdf_xml_source = DATADIR + sep + 'W_I_8_neu_125-01.svg'
self.test_page = DATADIR + sep + 'N_VII_1_page001.xml'
self.test_manuscript = DATADIR + sep + 'N_VII_1.xml'
def test_semantic(self):
pass
#print(Text.get_semantic_dictionary())
def test_attach_to_tree(self):
empty_tree = ET.ElementTree(ET.Element('page'))
content = 'asdf'
standoff_tag = StandoffTag('bold', 0, len(content)-1)
standoff_tag2 = StandoffTag('italic', int(len(content)/2), len(content),id='1')
text = Text(content, standoff_markups=[ standoff_tag, standoff_tag2 ])
text.attach_object_to_tree(empty_tree)
text = Text.create_cls_from_node(empty_tree.xpath('//' + Text.XML_TAG)[0])
self.assertEqual(text.content, content)
self.assertEqual(text.id, '0')
self.assertEqual(len(text.standoff_markups), 2)
#print(ET.dump(empty_tree.getroot()))
+ def test_extract(self):
+ content = 'asdfa'
+ standoff_tag = StandoffTag('bold', 0, len(content)-2)
+ standoff_tag2 = StandoffTag('italic', int(len(content)/2), len(content)-1,id='1')
+ textA = Text(content, standoff_markups=[ standoff_tag, standoff_tag2 ])
+ textB = textA.extract_part('sdf')
+ self.assertEqual(len(textB.standoff_markups), 2)
+ textB = textA.extract_part('sdf', css_filter='bold')
+ self.assertEqual(len(textB.standoff_markups), 1)
+ """
+ content = '26: von „Regel]¿'
+ textA = Text(content, standoff_markups=[ StandoffTag('bold', 6, 9)])
+ print(textA.extract_part('von', css_filter='bold'))
+ print(textA.extract_part('„Regel', css_filter='bold'))
+ """
+
+ def test_markup_contains_css_filter(self):
+ content = 'asdfa'
+ standoff_tag = StandoffTag('bold', 0, len(content)-2)
+ standoff_tag2 = StandoffTag('italic', int(len(content)/2), len(content)-1,id='1')
+ textA = Text(content, standoff_markups=[ standoff_tag, standoff_tag2 ])
+ self.assertTrue(textA.markup_contains_css_filter('bold'))
+ self.assertTrue(textA.markup_contains_css_filter('italic'))
+ textA.standoff_markups.pop(0)
+ self.assertFalse(textA.markup_contains_css_filter('bold'))
+
def test_join(self):
content = 'asdfa'
standoff_tag = StandoffTag('bold', 0, len(content)-2)
standoff_tag2 = StandoffTag('italic', int(len(content)/2), len(content)-1,id='1')
textA = Text(content, standoff_markups=[ standoff_tag, standoff_tag2 ])
standoff_tag = StandoffTag('bold', 0, len(content)-2)
standoff_tag2 = StandoffTag('italic', int(len(content)/2), len(content)-1,id='1')
textB = Text(content, standoff_markups=[ standoff_tag, standoff_tag2 ])
textA.join(textB)
self.assertEqual(textA.content, content + ' ' + content)
def test_create_from_html(self):
html = 'asdf test the best'
text = Text.create_cls_from_html(html)
self.assertEqual(len(text.standoff_markups), 3)
self.assertEqual(text.standoff_markups[0].startIndex, text.standoff_markups[1].startIndex)
self.assertEqual(text.standoff_markups[0].endIndex, text.standoff_markups[1].endIndex)
html = 'asdf test'
text = Text.create_cls_from_html(html)
self.assertEqual(len(text.standoff_markups), 1)
if __name__ == "__main__":
unittest.main()
Index: tests_svgscripts/test_line.py
===================================================================
--- tests_svgscripts/test_line.py (revision 95)
+++ tests_svgscripts/test_line.py (revision 96)
@@ -1,35 +1,45 @@
import unittest
from os import sep, path
from os.path import isdir, dirname
import lxml.etree as ET
import sys
import sys
sys.path.append('svgscripts')
from datatypes.line import Line
+from datatypes.line_continuation import LineContinuation
class TestLineNumber(unittest.TestCase):
def setUp(self):
DATADIR = dirname(__file__) + sep + 'test_data'
self.test_target_file = DATADIR + sep + 'test.xml'
self.id = 24
def test_init(self):
lnr = Line(id=self.id)
self.assertEqual(lnr.id, self.id)
def test_init_from_xml(self):
xml_tree = ET.parse(self.test_target_file)
- lines = [ Line.create_cls_from_node(node=node) for node in xml_tree.getroot().xpath('.//line-number') ]
+ lines = [ Line.create_cls_from_node(node=node) for node in xml_tree.getroot().xpath(Line.XML_TAG) ]
self.assertEqual(len(lines), 49)
self.assertEqual(lines[0].id, 1)
self.assertEqual(lines[48].id, 49)
def test_get_semanticAndDataDict(self):
#print(Line.get_semantic_dictionary())
pass
+ def test_attach(self):
+ tree = ET.Element('page')
+ xml_tree = ET.parse(self.test_target_file)
+ line = [ Line.create_cls_from_node(node=node) for node in xml_tree.getroot().xpath(Line.XML_TAG) ][1]
+ line.editor_comment = LineContinuation.create_cls('1,3')
+ line.attach_object_to_tree(tree)
+ lineB = [ Line.create_cls_from_node(node=node) for node in tree.xpath(Line.XML_TAG) ][0]
+ self.assertEqual(lineB.id, line.id)
+ self.assertEqual(lineB.editor_comment.reference.first_line, line.editor_comment.reference.first_line)
if __name__ == "__main__":
unittest.main()
Index: tests_svgscripts/test_matrix.py
===================================================================
--- tests_svgscripts/test_matrix.py (revision 95)
+++ tests_svgscripts/test_matrix.py (revision 96)
@@ -1,206 +1,214 @@
import unittest
import lxml.etree as ET
from os import sep, path
from os.path import isdir, dirname
import sys
sys.path.append('svgscripts')
from datatypes.matrix import Matrix
from datatypes.transkriptionField import TranskriptionField
class FakeTF:
def __init__(self):
self.xmin = 297.6379999999997
self.xmax = 765.354
self.ymin = 157.328
self.ymax = 752.6040160033832
class TestMatrix(unittest.TestCase):
def setUp(self):
self.x = 219.4058
self.y = 106.4634
self.matrix_string = 'matrix(1 0 0 1 {} {})'.format(str(self.x), str(self.y))
self.test_data_dir = dirname(__file__) + sep + 'test_data'
if not isdir(self.test_data_dir):
self.test_data_dir = dirname(dirname(__file__)) + sep + 'test_data'
self.test_file = self.test_data_dir + sep + 'test_ai.svg'
self.rotation_angle = 20
self.rotation_matrix_string = 'matrix(0.94 0.342 -0.342 0.94 0 0)'
self.test_margin_field_file = self.test_data_dir + sep + 'W_I_8_neu_125-01.svg'
self.test_place_printing_verso = self.test_data_dir + sep + 'N_VII_1_xp5_4_page5.svg'
self.test_place_printing_recto = self.test_data_dir + sep + 'N_VII_1_xp5_4_page6.svg'
def test_Matrix(self):
matrix = Matrix(self.matrix_string)
self.assertEqual(matrix.getX(), self.x)
self.assertEqual(matrix.add2X(1), self.x + 1)
self.assertEqual(matrix.getY(), self.y)
matrix = Matrix('matrix(0.98966578,0.1433933,-0.0913015,0.9958233,0,0)')
self.assertEqual(matrix.getX(), 0)
matrix = Matrix('matrix(1 2.998719e-04 -2.998719e-04 1 415.3643 476.7988)')
def test_Matrix_rotation(self):
rotation_string = 'rotate({})'.format(self.rotation_angle)
rotation_stringC = 'rotate(-{})'.format(self.rotation_angle)
matrixA = Matrix(rotation_string)
matrixB = Matrix(self.rotation_matrix_string)
matrixC = Matrix(rotation_stringC)
self.assertEqual(matrixA.matrix[Matrix.A], matrixB.matrix[Matrix.A])
self.assertEqual(matrixA.matrix[Matrix.B], matrixB.matrix[Matrix.B])
self.assertEqual(matrixA.matrix[Matrix.C], matrixB.matrix[Matrix.C])
self.assertEqual(matrixA.matrix[Matrix.D], matrixB.matrix[Matrix.D])
self.assertEqual(matrixA.matrix[Matrix.E], matrixB.matrix[Matrix.E])
self.assertEqual(matrixA.matrix[Matrix.F], matrixB.matrix[Matrix.F])
self.assertEqual(matrixA.toString(), self.rotation_matrix_string)
self.assertEqual(matrixC.toCSSTransformString(), 'rotate(-{}deg)'.format(self.rotation_angle))
def test_get_rotation_direction(self):
rotation_string = 'rotate(-{})'.format(self.rotation_angle)
matrixA = Matrix(rotation_string)
matrixB = Matrix(self.rotation_matrix_string)
matrixC = Matrix(self.matrix_string)
self.assertEqual(matrixA.get_rotation_direction(), Matrix.UP)
self.assertEqual(matrixB.get_rotation_direction(), Matrix.DOWN)
self.assertEqual(matrixC.get_rotation_direction(), Matrix.STRAIGHT)
def test_isRotationMatrix(self):
rotation_string = 'rotate({})'.format(self.rotation_angle)
matrixA = Matrix(rotation_string)
self.assertEqual(matrixA.isRotationMatrix(), True)
matrixB = Matrix(self.matrix_string)
self.assertEqual(matrixB.isRotationMatrix(), False)
def test_toCSSTransformString(self):
rotation_string = 'rotate({})'.format(self.rotation_angle)
matrixA = Matrix(rotation_string)
self.assertEqual(matrixA.toCSSTransformString(), 'rotate({}deg)'.format(self.rotation_angle))
matrixB = Matrix(self.rotation_matrix_string)
self.assertEqual(matrixB.toCSSTransformString(), 'rotate({}deg)'.format(self.rotation_angle))
def test_Matrix_Exception(self):
with self.assertRaises(Exception):
Matrix('matrix({})'.format(' '.join([ '0.0' for i in range(5)])))
def test_Matrix_TranskriptionField(self):
tf = TranskriptionField(self.test_file)
matrix = Matrix(self.matrix_string, transkription_field=tf)
self.assertEqual(round(matrix.getX(), 3) , 28.706)
self.assertEqual(round(matrix.getY(), 3) , 31.563)
def test_get_transformed_positions(self):
# Test relies on the example from "https://developer.mozilla.org/en-US/docs/Web/SVG/Attribute/transform"
x = 10
y = 10
width = 30
height = 20
matrix = Matrix(transform_matrix_string='matrix(3 1 -1 3 30 40)')
new_x, new_y, new_width, new_height = matrix.get_transformed_positions(x=x, y=y, width=width, height=height)
self.assertEqual(new_x, 50)
self.assertEqual(new_y, 80)
self.assertEqual(new_width, 90)
self.assertEqual(new_height, 60)
def test_is_matrix_horizontal(self):
matrix = Matrix(transform_matrix_string='matrix(3 1 -1 3 30 40)')
self.assertEqual(matrix.is_matrix_horizontal(), False)
matrix = Matrix(transform_matrix_string='matrix(1 0 0 1 30 40)')
self.assertEqual(matrix.is_matrix_horizontal(), True)
def test_is_part_of_transkription_field(self):
tf = TranskriptionField(self.test_file)
text_node = ET.Element('text', attrib={'transform': 'matrix(1 0 0 1 244.1211 91.7134)'})
self.assertEqual(Matrix.IS_PART_OF_TRANSKRIPTION_FIELD(tf, text_node=text_node), True)
text_node = ET.Element('text', attrib={'transform': 'matrix(1 0 0 1 244.1211 51.7134)'})
self.assertEqual(Matrix.IS_PART_OF_TRANSKRIPTION_FIELD(tf, text_node=text_node), False)
text_node = ET.Element('text', attrib={'transform': 'matrix(1 0 0 1 44.1211 91.7134)'})
self.assertEqual(Matrix.IS_PART_OF_TRANSKRIPTION_FIELD(tf, text_node=text_node), False)
text_node = ET.Element('text', attrib={'transform': 'matrix(1 0 0 1 244.1211 891.7134)'})
self.assertEqual(Matrix.IS_PART_OF_TRANSKRIPTION_FIELD(tf, text_node=text_node), False)
text_node = ET.Element('text', attrib={'transform': 'matrix(1 0 0 1 844.1211 91.7134)'})
self.assertEqual(Matrix.IS_PART_OF_TRANSKRIPTION_FIELD(tf, text_node=text_node), False)
text_node = ET.Element('text', attrib={'transform': 'matrix(0.866 -0.5 0.5 0.866 356.4303 753.4836)'})
tspan_node = ET.SubElement(text_node, 'tspan', attrib={'x': '41.82', 'y': '0'})
tspan_node.text = 'De'
fake_tf = FakeTF()
self.assertEqual(Matrix.IS_PART_OF_TRANSKRIPTION_FIELD(fake_tf, text_node=text_node), True)
def test_is_nearx_tf(self):
tf = TranskriptionField(self.test_file)
matrix_string = 'matrix(1 0 0 1 180.8755 315.9131)'
self.assertEqual(Matrix.IS_NEARX_TRANSKRIPTION_FIELD(matrix_string, tf), True)
matrix_string = 'matrix(1 0 0 1 100.8755 315.9131)'
self.assertEqual(Matrix.IS_NEARX_TRANSKRIPTION_FIELD(matrix_string, tf), False)
def test_do_conversion_factors_differ(self):
self.assertEqual(Matrix.DO_CONVERSION_FACTORS_DIFFER(None, None), False)
matrix_a = Matrix('matrix(1 0 0 1 180.8755 315.9131)')
self.assertEqual(Matrix.DO_CONVERSION_FACTORS_DIFFER(matrix_a, None), True)
matrix_b = Matrix('matrix(1 0 0 1 100.8755 315.9131)')
self.assertEqual(Matrix.DO_CONVERSION_FACTORS_DIFFER(matrix_a, matrix_b), False)
matrix_b = Matrix('matrix(0 0 0 1 100.8755 315.9131)')
self.assertEqual(Matrix.DO_CONVERSION_FACTORS_DIFFER(matrix_a, matrix_b), True)
matrix_b = Matrix('matrix(1 1 0 1 100.8755 315.9131)')
self.assertEqual(Matrix.DO_CONVERSION_FACTORS_DIFFER(matrix_a, matrix_b), True)
matrix_b = Matrix('matrix(1 0 1 1 100.8755 315.9131)')
self.assertEqual(Matrix.DO_CONVERSION_FACTORS_DIFFER(matrix_a, matrix_b), True)
matrix_b = Matrix('matrix(1 0 0 0 100.8755 315.9131)')
self.assertEqual(Matrix.DO_CONVERSION_FACTORS_DIFFER(matrix_a, matrix_b), True)
def test_clone_transformation_matrix(self):
matrix_a = Matrix(matrix_list=[ 1, 0, 0, 1, 180.8755, 315.9131 ])
matrix_b = matrix_a.clone_transformation_matrix()
self.assertEqual(Matrix.DO_CONVERSION_FACTORS_DIFFER(matrix_a, matrix_b), False)
self.assertEqual(matrix_b.matrix[Matrix.E], 0)
self.assertEqual(matrix_b.matrix[Matrix.F], 0)
def test_toString(self):
matrix_string = 'matrix(1.0 0.0 0.0 1.0 180.8755 315.9131)'
matrix = Matrix(matrix_string)
self.assertEqual(matrix.toString(), matrix_string)
def test_get_semanticAndDataDict(self):
matrix = Matrix('rotate(20)')
#self.assertEqual(matrix.get_data_dictionary()['body'].get('matrix'), matrix.matrix)
def test_is_in_margin_field(self):
tf = TranskriptionField(self.test_margin_field_file)
text_node = ET.Element('text', attrib={'transform': 'matrix(1 0 0 1 178.8916 182.0127)'})
self.assertEqual(Matrix.IS_IN_MARGIN_FIELD(text_node.get('transform'), tf), True)
text_node = ET.Element('text', attrib={'transform': 'matrix(1 0 0 1 357.7339 818.3276)'})
self.assertEqual(Matrix.IS_IN_MARGIN_FIELD(text_node.get('transform'), tf), False)
def test_is_in_place_of_printing_area(self):
tf = TranskriptionField(self.test_place_printing_verso)
text_node = ET.Element('text', attrib={'transform': 'matrix(1 0 0 1 42.5195 575.8736)'})
self.assertEqual(Matrix.IS_IN_PLACE_OF_PRINTING_AREA(text_node.get('transform'), tf), True)
text_node = ET.Element('text', attrib={'transform': 'matrix(1 0 0 1 109.145 575.8736)'})
self.assertEqual(Matrix.IS_IN_PLACE_OF_PRINTING_AREA(text_node.get('transform'), tf), True)
text_node = ET.Element('text', attrib={'transform': 'matrix(1 0 0 1 191.0571 575.8736)'})
self.assertEqual(Matrix.IS_IN_PLACE_OF_PRINTING_AREA(text_node.get('transform'), tf), False)
tf = TranskriptionField(self.test_place_printing_recto)
text_node = ET.Element('text', attrib={'transform': 'matrix(1 0 0 1 28.3462 575.8736)'})
self.assertEqual(Matrix.IS_IN_PLACE_OF_PRINTING_AREA(text_node.get('transform'), tf), True)
text_node = ET.Element('text', attrib={'transform': 'matrix(1 0 0 1 28.3462 583.8736)'})
self.assertEqual(Matrix.IS_IN_PLACE_OF_PRINTING_AREA(text_node.get('transform'), tf), True)
text_node = ET.Element('text', attrib={'transform': 'matrix(1 0 0 1 170.0791 575.8736)'})
self.assertEqual(Matrix.IS_IN_PLACE_OF_PRINTING_AREA(text_node.get('transform'), tf), False)
def test_is_in_footnote_area(self):
tf = TranskriptionField(self.test_place_printing_verso)
text_node = ET.Element('text', attrib={'transform': 'matrix(1 0 0 1 42.5195 575.8736)'})
self.assertEqual(Matrix.IS_IN_FOOTNOTE_AREA(text_node.get('transform'), tf), False)
text_node = ET.Element('text', attrib={'transform': 'matrix(1 0 0 1 109.145 575.8736)'})
self.assertEqual(Matrix.IS_IN_FOOTNOTE_AREA(text_node.get('transform'), tf), False)
text_node = ET.Element('text', attrib={'transform': 'matrix(1 0 0 1 191.0571 575.8736)'})
self.assertEqual(Matrix.IS_IN_FOOTNOTE_AREA(text_node.get('transform'), tf), True)
tf = TranskriptionField(self.test_place_printing_recto)
text_node = ET.Element('text', attrib={'transform': 'matrix(1 0 0 1 28.3462 575.8736)'})
self.assertEqual(Matrix.IS_IN_FOOTNOTE_AREA(text_node.get('transform'), tf), False)
text_node = ET.Element('text', attrib={'transform': 'matrix(1 0 0 1 28.3462 583.8736)'})
self.assertEqual(Matrix.IS_IN_FOOTNOTE_AREA(text_node.get('transform'), tf), False)
text_node = ET.Element('text', attrib={'transform': 'matrix(1 0 0 1 170.0791 575.8736)'})
self.assertEqual(Matrix.IS_IN_FOOTNOTE_AREA(text_node.get('transform'), tf), True)
+ """
+ svg_tree = ET.parse('/home/knister0/ownCloud/myNietzscheDE/data_ai_export//N_VII_1_xp5_4_page10.svg')
+ tf = TranskriptionField(svg_tree.docinfo.URL)
+ namespaces = { k if k is not None else 'ns': v for k, v in svg_tree.getroot().nsmap.items() }
+ node = svg_tree.xpath('//ns:text[@transform="matrix(1 0 0 1 28.3462 575.8736)"]', namespaces=namespaces)[0]
+ self.assertFalse(Matrix.IS_IN_FOOTNOTE_AREA(node.get('transform'), tf))
+ self.assertTrue(Matrix.NODE_HAS_CONTENT_IN_FOOTNOTE_AREA(node, tf))
+ """
if __name__ == "__main__":
unittest.main()
Index: tests_svgscripts/test_editor_comment.py
===================================================================
--- tests_svgscripts/test_editor_comment.py (revision 0)
+++ tests_svgscripts/test_editor_comment.py (revision 96)
@@ -0,0 +1,37 @@
+import unittest
+from os import sep, path
+from os.path import dirname, basename, isfile, isdir
+import lxml.etree as ET
+import sys
+
+sys.path.append('svgscripts')
+from datatypes.page import Page
+from datatypes.editor_comment import EditorComment
+from datatypes.clarification import Clarification
+
+class TestEditorComment(unittest.TestCase):
+ def test_attach(self):
+ comment = EditorComment(comment="test")
+ tree = ET.ElementTree(ET.Element('asdf'))
+ comment.attach_object_to_tree(tree)
+ commentB = EditorComment.create_cls_from_node(tree.xpath(EditorComment.XML_TAG)[0])
+ self.assertEqual(commentB.is_uncertain, comment.is_uncertain)
+ #print(ET.dump(tree.getroot()))
+
+ def test_semantic(self):
+ pass
+ #print(EditorComment.get_semantic_dictionary())
+
+ @unittest.skip('tested with local file')
+ def test_create_cls_from_node(self):
+ tree = ET.parse('xml/N_VII_1_page005.xml')
+ node = tree.xpath('//' + EditorComment.XML_TAG)[0]
+ comment = EditorComment.create_cls_from_node(node)
+ self.assertEqual(type(comment), Clarification)
+ page = Page('xml/N_VII_1_page005.xml')
+ word = [ word for word in page.words if word.text == 'Geschehen' ][0]
+ self.assertEqual(type(word.editor_comment), Clarification)
+
+
+if __name__ == "__main__":
+ unittest.main()
Index: tests_svgscripts/test_editor_correction.py
===================================================================
--- tests_svgscripts/test_editor_correction.py (revision 0)
+++ tests_svgscripts/test_editor_correction.py (revision 96)
@@ -0,0 +1,29 @@
+import unittest
+from os import sep, path
+from os.path import dirname, basename, isfile, isdir
+import lxml.etree as ET
+import sys
+
+sys.path.append('svgscripts')
+from datatypes.page import Page
+from datatypes.editor_comment import EditorComment
+from datatypes.editor_correction import EditorCorrection
+
+class TestEditorCorrection(unittest.TestCase):
+ def test_attach(self):
+ comment = EditorCorrection('test')
+ tree = ET.Element('asdf')
+ comment.attach_object_to_tree(tree)
+ commentB = EditorComment.create_cls_from_node(tree.xpath(EditorComment.XML_TAG)[0])
+ self.assertEqual(type(commentB), EditorCorrection)
+ self.assertEqual(commentB.is_uncertain, comment.is_uncertain)
+ self.assertEqual(commentB.correction_text, comment.correction_text)
+
+ def test_semantic(self):
+ pass
+ #print(EditorCorrection.get_semantic_dictionary())
+
+
+
+if __name__ == "__main__":
+ unittest.main()
Index: tests_svgscripts/test_attachable_object.py
===================================================================
--- tests_svgscripts/test_attachable_object.py (revision 95)
+++ tests_svgscripts/test_attachable_object.py (revision 96)
@@ -1,29 +1,47 @@
import unittest
from os import sep, path
import lxml.etree as ET
import sys
sys.path.append('svgscripts')
from datatypes.attachable_object import AttachableObject
class TestObjectFail(AttachableObject):
def __init__(self):
pass
class TestObjectOK(AttachableObject):
+ def __init__(self):
+ self.id = 1
def attach_object_to_tree(self, target_tree):
return True
class TestPositionalObject(unittest.TestCase):
def test_fail(self):
with self.assertRaises(TypeError):
TestObjectFail()
def test_success(self):
test = TestObjectOK()
self.assertEqual(test.attach_object_to_tree(ET.ElementTree(ET.Element('asdf'))), True)
+ def test_get_or_create_node(self):
+ test = TestObjectOK()
+ tree = ET.ElementTree(ET.Element('asdf'))
+ node = test.get_or_create_node(tree)
+ self.assertEqual(node.tag, test.XML_TAG)
+ tree = ET.Element('asdf')
+ node = test.get_or_create_node(tree)
+ self.assertEqual(node.tag, test.XML_TAG)
+ tree = ET.Element('asdf')
+ node = test.get_or_create_node_with_id(tree)
+ self.assertEqual(node.tag, test.XML_TAG)
+ test.id = 2
+ node = test.get_or_create_node_with_id(tree)
+ node = test.get_or_create_node_with_id(tree)
+ #print(ET.dump(tree))
+ self.assertEqual(len(tree.xpath(test.XML_TAG)), 2)
if __name__ == "__main__":
unittest.main()
Index: tests_svgscripts/test_data/N_VII_1_page006.xml
===================================================================
--- tests_svgscripts/test_data/N_VII_1_page006.xml (revision 95)
+++ tests_svgscripts/test_data/N_VII_1_page006.xml (revision 96)
@@ -1,1276 +1,1276 @@
svgWordPosition2019-08-02 15:17:372019-08-02 15:17:372019-08-02 15:30:592019-08-02 15:30:59
- 2020-06-05 11:30:54
+ 2020-06-19 15:50:47
Index: py2ttl/class_spec.py
===================================================================
--- py2ttl/class_spec.py (revision 95)
+++ py2ttl/class_spec.py (revision 96)
@@ -1,228 +1,252 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
""" This is an abstract class for all classes that are semantically relevant.
"""
# Copyright (C) University of Basel 2019 {{{1
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see 1}}}
__author__ = "Christian Steiner"
__maintainer__ = __author__
__copyright__ = 'University of Basel'
__email__ = "christian.steiner@unibas.ch"
__status__ = "Development"
__license__ = "GPL v3"
__version__ = "0.0.1"
import abc
import inspect
import warnings
class UnSemanticClass:
"""
Subclasses of this class are not semantically relevant, even if their superclasses are.
"""
pass
class SemanticClass(metaclass=abc.ABCMeta):
"""
This is an abstract class for all classes that are semantically relevant.
"""
HAS_PART = 'has_part'
HAS_SEQNUM = 'has_seqnum'
SINGLE_VALUE = 1
LIST = -99
CLASS_KEY = 'class'
CARDINALITY = "cardinality"
CARDINALITY_RESTRICTION = "cardinality_restriction"
HAS_HOMOTYPIC_PARTS_URL_STRING = 'http://www.nie.org/ontology/homotypic#hasHomotypicParts'
HOMOTYPIC_HAS_TEXT_URL_STRING = 'http://www.nie.org/ontology/homotypic#hasText'
STOFF_STYLE_HAS_CSS_URL_STRING = 'http://www.nie.org/ontology/standoff#styleHasCSS'
PROPERTY_NAME = "name"
PROPERTY_LABEL = "label"
PROPERTY_COMMENT = "comment"
PROPERTIES_KEY = "properties"
SUBCLASS_OF = "rdfs:subClassOf"
SUBPROPERTYOF = "subPropertyOf"
SUPER_CLASSES_DICT = { 'http://www.nie.org/ontology/homotypic': 'HomotypicEntity', 'http://www.nie.org/ontology/standoff': 'Style' }
SUPER_PROPERTY = "super_property"
THIS = "this"
TYPE = "type"
@classmethod
def create_semantic_property_dictionary(cls, property_key, class_type, cardinality=0, cardinality_restriction='cardinality', name='', label='', comment='', subPropertyOf='') -> dict:
"""Create a semantic property dicitonary.
Here is how to make a subproperty:
Pass the IRI of the super property as subPropertyOf=IRI,
be sure that base_uri of IRI (as key) and Class identifier of super class (as value) are in cls.SUPER_CLASSES_DICT,
then call cls.return_dictionary_after_updating_super_classes -> it will subclass the class that owns the subproperty
to the super class.
:return: semantic property dicitonary (dict)
"""
property_content = { SemanticClass.CLASS_KEY: class_type }
if cardinality > 0:
property_content.update({ SemanticClass.CARDINALITY: cardinality})
property_content.update({ SemanticClass.CARDINALITY_RESTRICTION: cardinality_restriction})
if name != '':
property_content.update({ SemanticClass.PROPERTY_NAME: name})
if label != '':
property_content.update({ SemanticClass.PROPERTY_LABEL: label})
if comment != '':
property_content.update({ SemanticClass.PROPERTY_COMMENT: comment})
if subPropertyOf != '':
property_content.update({ SemanticClass.SUBPROPERTYOF: subPropertyOf})
return { property_key: property_content }
@classmethod
def get_class_dictionary(cls):
"""Creates and returns a class_dictionary with the keys cls.THIS [, cls.SUBCLASS_OF, cls.TYPE].
"""
class_dict = {cls.THIS: cls }
if cls.__dict__.get('OWL_EQUIVALENTCLASSES') and len(cls.OWL_EQUIVALENTCLASSES) > 0:
class_dict.update({'owl:equivalentClass': cls.OWL_EQUIVALENTCLASSES })
if cls.__dict__.get('RDFS_SUBCLASSOF_LIST') and len(cls.RDFS_SUBCLASSOF_LIST) > 0:
class_dict.update({cls.SUBCLASS_OF: cls.RDFS_SUBCLASSOF_LIST })
direct_super_class = inspect.getclasstree([cls],unique=True)[0][0]
if issubclass(direct_super_class, SemanticClass) and direct_super_class != SemanticClass:
class_dict.update({cls.TYPE: direct_super_class})
return class_dict
def get_name_and_id(self):
"""Return an identification for object as 2-tuple.
"""
id = 0
if 'id' in self.__dict__.keys():
id = self.id
elif 'number' in self.__dict__.keys():
id = self.number
elif 'title' in self.__dict__.keys():
id = self.title.replace(' ', '_')
return type(self).__name__, id
def _get_list_of_type(self, list_type):
"""Return list of type == list_type if list is not empty.
"""
list_of_type = []
for object_list in [ list_obj for list_obj in self.__dict__.values()\
if type(list_obj) == list ]:
if len(object_list) > 0 and type(object_list[0]) == list_type:
return object_list
return list_of_type
def get_object_from_list_with_id(self, object_type, object_id):
"""Return object from list if object has id == object_id,
None if not found.
"""
list_with_object = [ item for item in self._get_list_of_type(object_type)\
if item.id == object_id ]
if len(list_with_object) > 0:
return list_with_object[0]
return None
@classmethod
def get_cls_hasPart_objectCls_dictionaries(cls, object_cls, xpath, object_seqnum_xpath=None, cardinality=0, cardinality_restriction='minCardinality'):
"""Return a dictionary containing the information for creating a class that can act
as an intermediary between cls and a number of object_cls if object_cls has
a position in a sequence of object_classes that belong to cls.
"""
part_name = object_cls.__name__ + 'Part'
has_part_name = object_cls.__name__.lower() + 'PartHas' + object_cls.__name__
has_seqnum_name = object_cls.__name__.lower() + 'HasSeqNum'
if object_seqnum_xpath is None:
object_seqnum_xpath = xpath + '/@id'
object_part_dictionary = { 'class': object_cls, 'cardinality': 1, 'xpath': xpath,\
'name': has_part_name, 'label': '{0} has a {1}'.format(part_name, object_cls.__name__),\
'comment': '{0} has a part, that is a {1}'.format(part_name, object_cls.__name__)}
object_seqnum_dictionary = { 'class': int, 'cardinality': 1, 'xpath': object_seqnum_xpath,\
'name': has_seqnum_name, 'label': '{0} has a sequence number'.format(part_name),\
'comment': '{0} has a part, that stands in a sequence with this number'.format(part_name, object_cls.__name__)}
object_dictionary = { 'class_name': part_name, SemanticClass.HAS_PART: object_part_dictionary, SemanticClass.HAS_SEQNUM: object_seqnum_dictionary,\
'label': '{0} part'.format(object_cls.__name__.lower()),\
'comment': 'This class servers as a intermediary between {0} and {1}. {0} has some {1} in a specific sequence.'.format(cls.__name__, object_cls.__name__)}
dictionary = { 'flag': 'ordered_list' , 'class': object_dictionary, 'cardinality': cardinality, 'cardinality_restriction': cardinality_restriction, 'xpath': xpath,\
'name': cls.__name__.lower() + 'Has' + part_name, 'label': '{0} has a part that connects it with a {1}'.format(cls.__name__, object_cls.__name__),\
'comment': '{0} has a part that connects it with a {1}, that has a position in a sequence of {1}'.format(cls.__name__, object_cls.__name__)}
return dictionary
@classmethod
@abc.abstractmethod
def get_semantic_dictionary(cls):
"""Creates a semantic dictionary with cls.CLASS_KEY and cls.PROPERTIES_KEY as its keys.
The class-key points to a class_dictionary with the keys: cls.THIS [, cls.SUBCLASS_OF, cls.TYPE].
Create initial dictionary using cls.get_class_dictionary():
dictionary = { cls.CLASS_KEY: cls.get_class_dictionary(), cls.PROPERTIES_KEY: {} }
The properties_key points to a properties_dictionary with semantically relevant keys
of self.__dict__ as keys. Use cls.create_semantic_property_dictionary(...) in order to
add a property dictionary for each property as follows:
dictionary[cls.PROPERTIES_KEY].update(cls.create_semantic_property_dictionary(property_key, ...))
Return dictionary by using:
cls.return_dictionary_after_updating_super_classes(dictionary)
"""
pass
+ def get_xml_conform_key_value_dictionary(self) -> dict:
+ """Return a xml conform key value dictionary.
+ """
+ property_d = self.get_semantic_dictionary()[self.PROPERTIES_KEY]
+ attachable, attachable_list, builtins, builtin_list = 'attachable', 'attachable-list', 'builtins', 'builtin-list'
+ xml_d = { attachable: {}, attachable_list: {}, builtins: {}, builtin_list: {}}
+ for key in property_d.keys():
+ value = self.__dict__.get(key)
+ if value is not None and (type(value) != list or len(value) > 0):
+ semantic_type = property_d[key][self.CLASS_KEY]\
+ if type(property_d[key]) is dict\
+ else property_d[key][0]
+ if type(value) != list and semantic_type.__module__ == builtins:
+ if semantic_type == bool:
+ xml_d[builtins].update({key.replace('_','-'): str(value).lower()})
+ else:
+ xml_d[builtins].update({key.replace('_','-'): str(value)})
+ elif semantic_type.__module__ != builtins:
+ attachable_key = attachable if type(value) != list else attachable_list
+ xml_d[attachable_key].update({key.replace('_','-'): value})
+ else:
+ xml_d[builtin_list].update({key.replace('_','-'): value})
+ return xml_d
+
@classmethod
def return_dictionary_after_updating_super_classes(cls, dictionary):
"""Return semantic dictionary after updating super classes if necessary.
"""
if cls.PROPERTIES_KEY not in dictionary.keys():
return dictionary
subproperty_base_uri_set = set( value.get(cls.SUBPROPERTYOF).split('#')[0]\
for value in dictionary[cls.PROPERTIES_KEY].values()\
if bool(value.get(cls.SUBPROPERTYOF)) )
for sub_property_base in subproperty_base_uri_set:
if bool(cls.SUPER_CLASSES_DICT.get(sub_property_base))\
and (\
cls.SUBCLASS_OF not in dictionary[cls.CLASS_KEY].keys()\
or len(dictionary[cls.CLASS_KEY][cls.SUBCLASS_OF]) == 0\
or len([ url for url in dictionary[cls.CLASS_KEY][cls.SUBCLASS_OF] if sub_property_base in url]) == 0\
# above instead of beneath, there might be more than one Class that share a sub_property_base.
#or sub_property_base + '#' + cls.SUPER_CLASSES_DICT.get(sub_property_base) not in dictionary[cls.CLASS_KEY][cls.SUBCLASS_OF]\
):
subclass_list = dictionary[cls.CLASS_KEY][cls.SUBCLASS_OF]\
if cls.SUBCLASS_OF in dictionary[cls.CLASS_KEY].keys()\
and len(dictionary[cls.CLASS_KEY].get(cls.SUBCLASS_OF)) > 0\
else []
subclass_list.append(sub_property_base + '#' + cls.SUPER_CLASSES_DICT.get(sub_property_base))
dictionary[cls.CLASS_KEY].update({cls.SUBCLASS_OF: subclass_list})
return dictionary
def __repr__(self) -> str:
"""Return a representation of all semantically relevant properties.
"""
data_string = self.__str__()
return f'<{data_string}>'
def __str__(self) -> str:
"""Return a str of all semantically relevant properties.
"""
name = type(self).__name__
data = []
for key in self.get_semantic_dictionary()[self.PROPERTIES_KEY].keys():
if key in self.__dict__.keys() and\
(self.__dict__[key] != None or
(type(self.__dict__[key]) == list and len(self.__dict__[key]) > 0)):
data.append(f'{key}: {self.__dict__[key]}')
data_string = ', '.join(data)
return f'{name} {data_string}'
Index: py2ttl/convert.py
===================================================================
--- py2ttl/convert.py (revision 95)
+++ py2ttl/convert.py (revision 96)
@@ -1,106 +1,113 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
""" This program can be used to convert py objects to ontology and data in turtle format.
"""
# Copyright (C) University of Basel 2019 {{{1
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see 1}}}
from colorama import Fore, Style
import getopt
import lxml.etree as ET
from os import sep, path, listdir
from os.path import isfile, isdir, dirname, basename
from progress.bar import Bar
import re
import sys
sys.path.append('svgscripts')
from datatypes.manuscript import ArchivalManuscriptUnity
if dirname(__file__) not in sys.path:
sys.path.append(dirname(__file__))
from class_spec import SemanticClass
from config import check_config_files_exist, get_datatypes_dir, PROJECT_NAME, PROJECT_ONTOLOGY_FILE, PROJECT_URL
from py2ttl_data import Py2TTLDataConverter
from py2ttl_ontology import Py2TTLOntologyConverter
sys.path.append('shared_util')
from myxmlwriter import xml2dict
__author__ = "Christian Steiner"
__maintainer__ = __author__
__copyright__ = 'University of Basel'
__email__ = "christian.steiner@unibas.ch"
__status__ = "Development"
__license__ = "GPL v3"
__version__ = "0.0.1"
def usage():
"""prints information on how to use the script
"""
print(main.__doc__)
def main(argv):
"""This program can be used to convert py objects to a owl:Ontology and rdf data in turtle format.
- py2ttl/py2ttl_data.py [OPTIONS]
+ py2ttl/py2ttl_data.py [OPTIONS] [ ...]
xml file of type shared_util.myxmlwriter.FILE_TYPE_XML_MANUSCRIPT.
OPTIONS:
-h|--help: show help
-i|--include-status=STATUS include pages with status = STATUS. STATUS is a ':' seperated string of status, e.g. 'OK:faksimile merged'.
:return: exit code (int)
"""
check_config_files_exist()
datatypes_dir = get_datatypes_dir()
source_ontology_file = PROJECT_ONTOLOGY_FILE
target_ontology_file = '.{0}{1}-ontology_autogenerated.ttl'.format(sep, PROJECT_NAME)
manuscript_file = None
page_status_list = None
try:
opts, args = getopt.getopt(argv, "hi:", ["help", "include-status="])
except getopt.GetoptError:
usage()
return 2
for opt, arg in opts:
if opt in ('-h', '--help'):
usage()
return 0
elif opt in ('-i', '--include-status'):
page_status_list = arg.split(':')
if len(args) < 1 :
usage()
return 2
- manuscript_file = args[0]
- if not isfile(manuscript_file):
- usage()
- return 2
+ ontology_created = False
ontology_converter = Py2TTLOntologyConverter(project_ontology_file=source_ontology_file)
- print(Fore.CYAN + 'Create ontology and data from "{}" ...'.format(manuscript_file))
- if ontology_converter.create_ontology(datatypes_dir, target_ontology_file) == 0:
- print(Fore.GREEN + '[Ontology file {0} created]'.format(target_ontology_file))
+ output = 2
+ for manuscript_file in args:
+ if not isfile(manuscript_file):
+ usage()
+ return 2
+ if not ontology_created:
+ print(Fore.CYAN + 'Create ontology from "{}" ...'.format(manuscript_file))
+ if ontology_converter.create_ontology(datatypes_dir, target_ontology_file) == 0:
+ print(Fore.GREEN + '[Ontology file {0} created]'.format(target_ontology_file))
+ ontology_created = True
+ else:
+ return 2
+ print(Fore.CYAN + 'Create data from "{}" ...'.format(manuscript_file))
data_converter = Py2TTLDataConverter(manuscript_file, mapping_dictionary=ontology_converter.uri_mapping4cls_and_properties)
- return data_converter.convert(page_status_list=page_status_list)
- return 2
+ output = data_converter.convert(page_status_list=page_status_list)
+ return output
if __name__ == "__main__":
sys.exit(main(sys.argv[1:]))
Index: py2ttl/data_handler.py
===================================================================
--- py2ttl/data_handler.py (revision 95)
+++ py2ttl/data_handler.py (revision 96)
@@ -1,195 +1,196 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
""" This class can be used to add data to a rdf graph.
"""
# Copyright (C) University of Basel 2019 {{{1
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see 1}}}
__author__ = "Christian Steiner"
__maintainer__ = __author__
__copyright__ = 'University of Basel'
__email__ = "christian.steiner@unibas.ch"
__status__ = "Development"
__license__ = "GPL v3"
__version__ = "0.0.1"
from rdflib import Graph, URIRef, Literal, BNode, OWL, RDF, RDFS, XSD
from rdflib import RDF as ns_rdf
from os.path import isfile
import random
import warnings
from class_spec import SemanticClass
from config import DATA_URL
class RDFDataHandler:
"""
This class can be used to add data to a rdf graph.
"""
UNITTESTING = False
SIMPLE_DATA_TYPE_MAPPING = { int: XSD.integer, float: XSD.float, str: XSD.string, bool: XSD.boolean, list: RDF.List }
def __init__(self, target_file, mapping_dictionary):
self.target_file = target_file
self.mapping_dictionary = mapping_dictionary
self.ontology_graph = Graph()
self.data_graph = Graph()
self.data_identifier_mapping = {}
if bool(self.mapping_dictionary.get('ontology')):
self.project_name = self.mapping_dictionary['ontology'].get('project_name')
self.project_uri = URIRef(self.mapping_dictionary['ontology'].get('project_uri'))
ontology_file = self.mapping_dictionary['ontology'].get('ontology_file')
if bool(ontology_file) and isfile(ontology_file):
self.ontology_graph.parse(ontology_file, format="turtle")
self.ns = { uriref: ns for ns, uriref in self.data_graph.namespace_manager.namespaces() }
self.data_graph.bind(self.project_name, self.project_uri)
self.data_graph.bind('data', DATA_URL + '#')
else:
raise Exception('Error: mapping_dictionary does not contain key "ontology"!')
def add_data(self, data_instance, identifier_prefix, parent_data_instance=None):
"""Add a data rdf instance of data_instance to the data_graph.
:return: (rdflib.URIRef) subject_uri of data instance
"""
identifier_uri = self.create_identifier_uri(data_instance, identifier_prefix)
if bool(self.mapping_dictionary['classes'].get(type(data_instance).__name__)):
class_uri = self.mapping_dictionary['classes'][type(data_instance).__name__]['class_uri']
self.data_identifier_mapping.update({data_instance: identifier_uri})
self.data_graph_add((identifier_uri, RDF.type, class_uri))
semantic_dict = data_instance.get_semantic_dictionary()
for key, content in semantic_dict['properties'].items():
if bool(self.mapping_dictionary['classes'][type(data_instance).__name__]['properties'].get(key)):
datatype = content.get('class')
cardinality = content.get('cardinality')\
if bool(content.get('cardinality')) else 0
if data_instance.__dict__.get(key) is not None\
and (type(data_instance.__dict__.get(key)) != int or data_instance.__dict__.get(key) != -1):
predicate_uri = self.mapping_dictionary['classes'][type(data_instance).__name__]['properties'][key]
child_data_instance = data_instance.__dict__.get(key)
new_identifier_prefix = identifier_uri[identifier_uri.index('#')+1:]
if datatype is list:
self.add_ordered_list(child_data_instance, identifier_uri, predicate_uri,\
new_identifier_prefix, data_instance)
elif issubclass(datatype, SemanticClass):
if type(child_data_instance) is not list:
- if type(child_data_instance) != datatype:
+ if type(child_data_instance) != datatype\
+ and not issubclass(type(child_data_instance), datatype):
child_id = child_data_instance
child_data_instance = parent_data_instance.get_object_from_list_with_id(datatype,\
child_id)
if child_data_instance is None:
print(key, content)# parent_data_instance.number, child_id, type(child_id), datatype)
msg = 'No child_data_instance found for data_instance {0}: looking for {1} with id {2}'.format(\
type(parent_data_instance), datatype, child_id)
raise Exception(msg)
else:
new_list_name = 'list_of_' + datatype.__name__ + 's'
if new_list_name in data_instance.__dict__.keys():
data_instance.__dict__[new_list_name].append(child_data_instance)
else:
data_instance.__dict__.update({ new_list_name: [ child_data_instance ]})
if child_data_instance not in self.data_identifier_mapping.keys():
child_identifier_uri = self.add_data(child_data_instance, new_identifier_prefix,\
parent_data_instance=data_instance)
else:
child_identifier_uri = self.data_identifier_mapping[child_data_instance]
self.data_graph_add((identifier_uri, predicate_uri, child_identifier_uri))
else:
for child_item in child_data_instance:
if child_item not in self.data_identifier_mapping.keys():
child_identifier_uri = self.add_data(child_item, new_identifier_prefix,\
parent_data_instance=data_instance)
else:
child_identifier_uri = self.data_identifier_mapping[child_item]
self.data_graph_add((identifier_uri, predicate_uri, child_identifier_uri))
else:
literal_datatype = RDFDataHandler.SIMPLE_DATA_TYPE_MAPPING[datatype]
ontology_datatypes = [ o for o in self.ontology_graph.objects(subject=predicate_uri, predicate=RDFS.range) ]
if len(ontology_datatypes) > 0:
literal_datatype = ontology_datatypes[0]
if type(child_data_instance) is list:
for child_item in child_data_instance:
object_literal = Literal(str(child_item), datatype=literal_datatype)
self.data_graph_add((identifier_uri, predicate_uri, object_literal))
else:
object_literal = Literal(str(child_data_instance), datatype=literal_datatype)
self.data_graph_add((identifier_uri, predicate_uri, object_literal))
else:
msg = 'Mapping dictionary for {0} does not contain a entry for {1}!'.format(type(data_instance).__name__, key)
raise Exception(msg)
else:
msg = 'Mapping dictionary does not contain a entry for {}!'.format(type(data_instance).__name__)
raise Exception(msg)
return identifier_uri
def add_ordered_list(self, data_instance_list, identifier_uri, predicate_uri, identifier_prefix, data_instance):
"""Add a data rdf instance of data_instance to the data_graph.
"""
if len(data_instance_list) > 0:
child_identifiers = []
for item in data_instance_list:
if item not in self.data_identifier_mapping.keys():
child_identifiers.append(self.add_data(item, identifier_prefix, data_instance))
else:
child_identifiers.append(self.data_identifier_mapping[item])
list_node = self.generate_RDF_collection(child_identifiers)
self.data_graph_add((identifier_uri, predicate_uri, list_node))
def create_identifier_uri(self, data_instance, identifier_prefix):
"""Return a data identifier uri.
:return: (rdflib.URIRef) subject_uri of data instance
"""
data_type, id = data_instance.get_name_and_id()
identifier_uri = URIRef(DATA_URL + '#' + identifier_prefix + '_' + data_type + str(id))
randombit_length = 5
while (identifier_uri, None, None) in self.data_graph:
identifier_uri = URIRef(DATA_URL + '#' + identifier_prefix + '_' + data_type + str(random.getrandbits(randombit_length)))
randombit_length += 1
return identifier_uri
def data_graph_add(self, rdf_triple):
"""Add a triple to the graph.
"""
#not RDFDataHandler.UNITTESTING and print(rdf_triple)
self.data_graph.add(rdf_triple)
def generate_RDF_collection(self, vals ) -> BNode:
"""
Generate an RDF List from vals, returns the head of the list
@URL:
@organization: U{World Wide Web Consortium}
@author: U{Ivan Herman}
@license:
U{W3C® SOFTWARE NOTICE AND LICENSE}
@param graph: RDF graph
@type graph: RDFLib Graph
@param vals: array of RDF Resources
@return: head of the List (an RDF Resource)
"""
heads = [ BNode() for r in vals ] + [ ns_rdf["nil"] ]
for i in range(0, len(vals)) :
self.data_graph_add( (heads[i], ns_rdf["first"], vals[i]) )
self.data_graph_add( (heads[i], ns_rdf["rest"], heads[i+1]) )
return heads[0]
def write(self, output_format="turtle"):
"""Write graph.
"""
f = open(self.target_file, 'wb+')
f.write(self.data_graph.serialize(format=output_format))
f.close()
Index: py2ttl/xml_conform_dictionary.py
===================================================================
--- py2ttl/xml_conform_dictionary.py (revision 0)
+++ py2ttl/xml_conform_dictionary.py (revision 96)
@@ -0,0 +1,121 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+
+""" This represents a xml conform dictionary of data.
+"""
+# Copyright (C) University of Basel 2019 {{{1
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program. If not, see 1}}}
+
+__author__ = "Christian Steiner"
+__maintainer__ = __author__
+__copyright__ = 'University of Basel'
+__email__ = "christian.steiner@unibas.ch"
+__status__ = "Development"
+__license__ = "GPL v3"
+__version__ = "0.0.1"
+
+import inspect
+import re
+import warnings
+
+from class_spec import SemanticClass
+
+class XMLConformDictionary:
+ """
+ This represents a xml conform dictionary of data.
+ """
+
+ def __init__(self):
+ self.attachables = []
+ self.builtins = {}
+ self.builtin_list = {}
+
+ def attach_data_to_tree(self, node):
+ """Attach data to node
+ """
+ for xml_key, value in self.builtins.items():
+ node.set(xml_key, value)
+ for xml_key, value_list in self.builtin_list.items():
+ node.set(xml_key, ' '.join([ str(i) for i in value_list]))
+ for attachable in self.attachables:
+ attachable.attach_object_to_tree(node)
+
+ @classmethod
+ def create_cls_from_data_object(cls, data_object):
+ """Create a XMLConformDictionary.
+ """
+ if not issubclass(type(data_object), SemanticClass):
+ msg = f'{type(data_object)} is not a subclass of {SemanticClass}'
+ raise TypeError(msg)
+ property_d = data_object.get_semantic_dictionary()[data_object.PROPERTIES_KEY]
+ xml_d = cls()
+ for key in property_d.keys():
+ value = data_object.__dict__.get(key)
+ if value is not None and (type(value) != list or len(value) > 0):
+ semantic_type = property_d[key][data_object.CLASS_KEY]\
+ if type(property_d[key]) is dict\
+ else property_d[key][0]
+ if type(value) != list and semantic_type.__module__ == 'builtins':
+ if semantic_type == bool:
+ xml_d.builtins.update({key.replace('_','-'): str(value).lower()})
+ else:
+ xml_d.builtins.update({key.replace('_','-'): str(value)})
+ elif semantic_type.__module__ != 'builtins':
+ if type(value) != list:
+ xml_d.attachables.append(value)
+ else:
+ for item in value:
+ xml_d.attachables.append(item)
+ else:
+ xml_d.builtin_list.update({key.replace('_','-'): value})
+ return xml_d
+
+ @staticmethod
+ def CREATE_INSTANCEOF_CLASS_FROM_NODE(semantic_class, node):
+ """Create a instance of semantic_class from node.
+ """
+ if not issubclass(semantic_class, SemanticClass):
+ msg = f'{semantic_class} is not a subclass of {SemanticClass}'
+ raise TypeError(msg)
+ property_d = semantic_class.get_semantic_dictionary()[semantic_class.PROPERTIES_KEY]
+ class_instance = semantic_class()
+ for key in property_d.keys():
+ semantic_type = property_d[key][semantic_class.CLASS_KEY]\
+ if type(property_d[key]) is dict\
+ else property_d[key][0]
+ if semantic_type.__module__ == 'builtins' and semantic_type != list:
+ value = node.get(key.replace('_','-'))
+ if semantic_type == bool:
+ class_instance.__dict__.update({key: (value == 'true')})
+ elif semantic_type != str:
+ if re.match(r'(.*)(\s)', value):
+ class_instance.__dict__.update({key: [ semantic_type(item) for item in value.split(' ')] })
+ else:
+ class_instance.__dict__.update({key: semantic_type(value)})
+ else:
+ class_instance.__dict__.update({key: value})
+ else:
+ attachables = []
+ for sub_node in node.xpath(semantic_type.XML_TAG):
+ sub_instance = semantic_type.create_cls_from_node(sub_node)\
+ if 'create_cls_from_node' in semantic_type.__dict__\
+ else XMLConformDictionary.CREATE_INSTANCEOF_CLASS_FROM_NODE(semantic_type, sub_node)
+ attachables.append(sub_instance)
+ if len(attachables) > 0:
+ if len(attachables) > 1:
+ class_instance.__dict__.update({key: attachables})
+ else:
+ class_instance.__dict__.update({key: attachables[0]})
+ return class_instance
Index: tests_py2ttl/test_xml_conform_dictionary.py
===================================================================
--- tests_py2ttl/test_xml_conform_dictionary.py (revision 0)
+++ tests_py2ttl/test_xml_conform_dictionary.py (revision 96)
@@ -0,0 +1,80 @@
+import unittest
+import lxml.etree as ET
+from os import sep, path
+import inspect
+from rdflib import Graph, URIRef, Literal, BNode, OWL, RDF, RDFS, XSD
+import sys
+
+sys.path.append('svgscripts')
+from datatypes.attachable_object import AttachableObject
+from datatypes.image import Image
+from datatypes.word import Word
+from datatypes.simple_word import SimpleWord
+
+sys.path.append('py2ttl')
+from class_spec import SemanticClass
+from xml_conform_dictionary import XMLConformDictionary
+
+class TestSemanticClassOK(SemanticClass,AttachableObject):
+ def __init__(self, id=0):
+ self.msg = 'Hello World!'
+ self.id = id
+ @staticmethod
+ def get_semantic_dictionary():
+ return {'class': { 'this': TestSemanticClassOK}, 'properties': { 'msg': (str, SemanticClass.SINGLE_VALUE) }}
+ def attach_object_to_tree(self, node):
+ mynode = self.get_or_create_node(node)
+ mynode.set('msg', self.msg)
+
+class TestSemanticClassB(SemanticClass):
+ def __init__(self):
+ self.data = 1
+ self.dataList = [ 1, 2, 3, 4 ]
+ self.testList = [ TestSemanticClassOK(0), TestSemanticClassOK(1) ]
+ self.test = TestSemanticClassOK(2)
+
+ @classmethod
+ def get_semantic_dictionary(cls):
+ prop_dict = TestSemanticClassB.create_semantic_property_dictionary('data', int)
+ prop_dict.update(TestSemanticClassB.create_semantic_property_dictionary('dataList', int))
+ prop_dict.update(TestSemanticClassB.create_semantic_property_dictionary('test', TestSemanticClassOK))
+ prop_dict.update(TestSemanticClassB.create_semantic_property_dictionary('testList', TestSemanticClassOK))
+ return { 'class': {'this': TestSemanticClassB }, 'properties': prop_dict}
+ def get_super(self):
+ return inspect.getclasstree([self.__class__],unique=True)[0][0]
+
+class TestSemanticClassC(TestSemanticClassB):
+ pass
+
+class TestSemanticClass(unittest.TestCase):
+
+ def test_create_cls_from_data_object(self):
+ #test = TestSemanticClassOK()
+ #xml_d = test.create_cls_from_data_object()
+ test = TestSemanticClassB()
+ xml_d = XMLConformDictionary.create_cls_from_data_object(test)
+ self.assertTrue(len(xml_d.attachables) > 0)
+ self.assertTrue(len(xml_d.builtin_list) > 0)
+ self.assertTrue(len(xml_d.builtins) > 0)
+
+ def test_fail(self):
+ class Dummy:
+ pass
+ d = Dummy()
+ with self.assertRaises(TypeError):
+ XMLConformDictionary.create_cls_from_data_object(d)
+
+ def test_attach_dettach(self):
+ tree = ET.ElementTree(ET.Element('asdf'))
+ node = ET.SubElement(tree.getroot(), 'test')
+ test = TestSemanticClassB()
+ xml_d = XMLConformDictionary.create_cls_from_data_object(test)
+ xml_d.attach_data_to_tree(node)
+ testB = XMLConformDictionary.CREATE_INSTANCEOF_CLASS_FROM_NODE(TestSemanticClassB, node)
+ self.assertEqual(testB.data, test.data)
+ self.assertEqual(testB.dataList, test.dataList)
+ self.assertEqual(testB.test.msg, test.test.msg)
+ #print(ET.dump(tree.getroot()))
+
+if __name__ == "__main__":
+ unittest.main()
Index: tests_py2ttl/test_class_spec.py
===================================================================
--- tests_py2ttl/test_class_spec.py (revision 95)
+++ tests_py2ttl/test_class_spec.py (revision 96)
@@ -1,91 +1,106 @@
import unittest
from os import sep, path
import inspect
from rdflib import Graph, URIRef, Literal, BNode, OWL, RDF, RDFS, XSD
import sys
sys.path.append('svgscripts')
from datatypes.image import Image
from datatypes.word import Word
from datatypes.simple_word import SimpleWord
sys.path.append('py2ttl')
try:
from class_spec import SemanticClass
except ImportError:
sys.path.append(dirname(dirname(realpath(__file__))))
from py2ttl.class_spec import SemanticClass
class TestSemanticClassFail(SemanticClass):
def __init__(self):
pass
class Dummy:
def __init__(self, id):
self.id = id
class TestSemanticClassOK(SemanticClass):
def __init__(self):
self.msg = 'Hello World!'
self.id = 0
self.mylist = [ Dummy(0), Dummy(1), Dummy(2) ]
@staticmethod
def get_semantic_dictionary():
return {'class': { 'this': TestSemanticClassOK}, 'properties': { 'msg': (str, SemanticClass.SINGLE_VALUE) }}
class TestSemanticClassB(SemanticClass):
def __init__(self):
- self.data = [ 1, 2, 3, 4 ]
- self.test = [ TestSemanticClassOK(), TestSemanticClassOK() ]
- @staticmethod
- def get_semantic_dictionary():
- return { 'class': {'this': TestSemanticClassB }, 'properties': TestSemanticClassB.create_semantic_property_dictionary('data', int)}
+ self.data = 1
+ self.dataList = [ 1, 2, 3, 4 ]
+ self.testList = [ TestSemanticClassOK(), TestSemanticClassOK() ]
+ self.test = TestSemanticClassOK()
+
+ @classmethod
+ def get_semantic_dictionary(cls):
+ prop_dict = TestSemanticClassB.create_semantic_property_dictionary('data', int)
+ prop_dict.update(TestSemanticClassB.create_semantic_property_dictionary('dataList', int))
+ prop_dict.update(TestSemanticClassB.create_semantic_property_dictionary('test', TestSemanticClassOK))
+ prop_dict.update(TestSemanticClassB.create_semantic_property_dictionary('testList', TestSemanticClassOK))
+ return { 'class': {'this': TestSemanticClassB }, 'properties': prop_dict}
def get_super(self):
return inspect.getclasstree([self.__class__],unique=True)[0][0]
class TestSemanticClassC(TestSemanticClassB):
pass
class TestSemanticClass(unittest.TestCase):
def test_fail(self):
with self.assertRaises(TypeError):
TestSemanticClassFail()
def test_success(self):
test = TestSemanticClassOK()
self.assertEqual(TestSemanticClassOK.get_semantic_dictionary()['properties'], { 'msg': (str, 1) })
test = TestSemanticClassB()
self.assertEqual(test.get_semantic_dictionary()['class'].get('this'), TestSemanticClassB)
dictionary = test.return_dictionary_after_updating_super_classes(TestSemanticClassB.get_semantic_dictionary())
+ def test_get_xml_conform_key_value_dictionary(self):
+ #test = TestSemanticClassOK()
+ #xml_d = test.get_xml_conform_key_value_dictionary()
+ test = TestSemanticClassB()
+ xml_d = test.get_xml_conform_key_value_dictionary()
+ attachable, attachable_list, builtins, builtin_list = 'attachable', 'attachable-list', 'builtins', 'builtin-list'
+ self.assertTrue(len(xml_d['attachable']) > 0)
+
def test_get_class_dictionary(self):
test = TestSemanticClassC()
self.assertEqual(test.get_class_dictionary().get('type') is not None, True)
self.assertEqual(test.get_class_dictionary().get('type'), TestSemanticClassB)
#print(test.create_semantic_property_dictionary('is_true', bool, cardinality=1, name='IsTrue', label='is true', comment='test comment'))
def test_get_cls_hasPart_objectCls_dictionaries(self):
dictionary = SemanticClass.get_cls_hasPart_objectCls_dictionaries(SemanticClass, 'asdf/asdf')
#print(dictionary)
def test_get_object_from_list_with_id(self):
test = TestSemanticClassOK()
#mylist = test._get_list_of_type(Dummy)
d_1 = test.get_object_from_list_with_id(Dummy, 1)
self.assertEqual(d_1 is not None, True)
self.assertEqual(d_1.id, 1)
def test_return_dictionary_after_updating_super_classes(self):
class TestWord(Word):
RDFS_SUBCLASSOF_LIST = [ 'http://www.example.com#Test' ]
dictionary = TestWord.get_semantic_dictionary()
self.assertEqual(TestWord.SUBCLASS_OF in dictionary[TestWord.CLASS_KEY].keys(), True)
self.assertEqual(len(dictionary[TestWord.CLASS_KEY][TestWord.SUBCLASS_OF]), 2)
def test_repr(self):
word = Word()
#print(word)
if __name__ == "__main__":
unittest.main()
Index: tests_py2ttl/test_data/mapping_dict.xml
===================================================================
--- tests_py2ttl/test_data/mapping_dict.xml (revision 95)
+++ tests_py2ttl/test_data/mapping_dict.xml (revision 96)
@@ -1,293 +1,342 @@
tlnhttp://www.nie.org/ontology/nietzsche#./tln-ontology_autogenerated.ttlhttp://www.nie.org/ontology/nietzsche#ArchivalManuscriptUnityhttp://www.nie.org/ontology/nietzsche#hasTitlehttp://www.nie.org/ontology/nietzsche#hasManuscriptTypehttp://www.nie.org/ontology/nietzsche#hasStyleshttp://www.nie.org/ontology/nietzsche#hasPageshttp://www.nie.org/ontology/nietzsche#hasDescriptionhttp://www.nie.org/ontology/nietzsche#hasEarlierDescriptions
+
+ http://www.nie.org/ontology/nietzsche#EditorComment
+
+ http://www.nie.org/ontology/nietzsche#isUncertain
+ http://www.nie.org/ontology/nietzsche#hasComment
+
+
+
+ http://www.nie.org/ontology/nietzsche#AtypicalWriting
+
+ http://www.nie.org/ontology/nietzsche#isUncertain
+ http://www.nie.org/ontology/nietzsche#hasComment
+ http://www.nie.org/ontology/nietzsche#atypicalWritingHasText
+
+ http://www.nie.org/ontology/nietzsche#Pathhttp://www.nie.org/ontology/nietzsche#hasDAttributehttp://www.nie.org/ontology/nietzsche#Boxhttp://www.nie.org/ontology/nietzsche#hasDAttributehttp://www.nie.org/ontology/nietzsche#hasEarlierText
+
+ http://www.nie.org/ontology/nietzsche#Clarification
+
+ http://www.nie.org/ontology/nietzsche#isUncertain
+ http://www.nie.org/ontology/nietzsche#hasComment
+ http://www.nie.org/ontology/nietzsche#clarificationHasText
+
+ http://www.nie.org/ontology/nietzsche#Colorhttp://www.nie.org/ontology/nietzsche#colorHasNamehttp://www.nie.org/ontology/nietzsche#hasHexadecimalValuehttp://www.nie.org/ontology/nietzsche#Texthttp://www.nie.org/ontology/nietzsche#textHasContenthttp://www.nie.org/ontology/nietzsche#textHasMarkuphttp://www.nie.org/ontology/nietzsche#Descriptionhttp://www.nie.org/ontology/nietzsche#textHasContenthttp://www.nie.org/ontology/nietzsche#textHasMarkuphttp://www.nie.org/ontology/nietzsche#EarlierDescriptionhttp://www.nie.org/ontology/nietzsche#textHasContenthttp://www.nie.org/ontology/nietzsche#hasAuthorhttp://www.nie.org/ontology/nietzsche#hasCitationhttp://www.nie.org/ontology/nietzsche#textHasMarkup
+
+ http://www.nie.org/ontology/nietzsche#EditorCorrection
+
+ http://www.nie.org/ontology/nietzsche#isUncertain
+ http://www.nie.org/ontology/nietzsche#hasComment
+ http://www.nie.org/ontology/nietzsche#hasCorrectionText
+
+ http://www.nie.org/ontology/nietzsche#Imagehttp://www.nie.org/ontology/nietzsche#hasHeighthttp://www.nie.org/ontology/nietzsche#hasWidthhttp://www.nie.org/ontology/nietzsche#hasFileNamehttp://www.nie.org/ontology/nietzsche#FaksimileImagehttp://www.nie.org/ontology/nietzsche#hasHeighthttp://www.nie.org/ontology/nietzsche#hasWidthhttp://www.nie.org/ontology/nietzsche#hasFileNamehttp://www.nie.org/ontology/nietzsche#hasUrlhttp://www.nie.org/ontology/nietzsche#hasTextFieldhttp://www.nie.org/ontology/nietzsche#PositionalObjecthttp://www.nie.org/ontology/nietzsche#hasHeighthttp://www.nie.org/ontology/nietzsche#hasWidthhttp://www.nie.org/ontology/nietzsche#hasLefthttp://www.nie.org/ontology/nietzsche#hasTophttp://www.nie.org/ontology/nietzsche#hasBottomhttp://www.nie.org/ontology/nietzsche#hasTransformhttp://www.nie.org/ontology/nietzsche#WordPositionhttp://www.nie.org/ontology/nietzsche#hasHeighthttp://www.nie.org/ontology/nietzsche#hasWidthhttp://www.nie.org/ontology/nietzsche#hasLefthttp://www.nie.org/ontology/nietzsche#hasTophttp://www.nie.org/ontology/nietzsche#hasBottomhttp://www.nie.org/ontology/nietzsche#hasTransformhttp://www.nie.org/ontology/nietzsche#FaksimilePositionhttp://www.nie.org/ontology/nietzsche#hasHeighthttp://www.nie.org/ontology/nietzsche#hasWidthhttp://www.nie.org/ontology/nietzsche#hasLefthttp://www.nie.org/ontology/nietzsche#hasTophttp://www.nie.org/ontology/nietzsche#hasBottomhttp://www.nie.org/ontology/nietzsche#hasTransformhttp://www.nie.org/ontology/nietzsche#isOnFaksimileImagehttp://www.nie.org/ontology/nietzsche#isOnTextFieldhttp://www.nie.org/ontology/nietzsche#Linehttp://www.nie.org/ontology/nietzsche#lineHasNumberhttp://www.nie.org/ontology/nietzsche#lineHasBottomValueOnTranskriptionhttp://www.nie.org/ontology/nietzsche#lineHasTopValueOnTranskriptionhttp://www.nie.org/ontology/nietzsche#lineHasInnerBottomValueOnFaksimilehttp://www.nie.org/ontology/nietzsche#lineHasInnerTopValueOnFaksimilehttp://www.nie.org/ontology/nietzsche#lineHasOuterBottomValueOnFaksimilehttp://www.nie.org/ontology/nietzsche#lineHasOuterTopValueOnFaksimilehttp://www.nie.org/ontology/nietzsche#isMainLine
+ http://www.nie.org/ontology/nietzsche#lineHasLineContinuation
+
+ http://www.nie.org/ontology/nietzsche#LineContinuation
+
+ http://www.nie.org/ontology/nietzsche#isUncertain
+ http://www.nie.org/ontology/nietzsche#hasComment
+ http://www.nie.org/ontology/nietzsche#isLineAContinuationTo
+ http://www.nie.org/ontology/nietzsche#lineContinuationHasReference
+
+ http://www.nie.org/ontology/nietzsche#SimpleWordhttp://www.nie.org/ontology/nietzsche#hasTexthttp://www.nie.org/ontology/nietzsche#wordBelongsToLinehttp://www.nie.org/ontology/nietzsche#hasTranskriptionPositionhttp://www.nie.org/ontology/nietzsche#hasFaksimilePositionhttp://www.nie.org/ontology/nietzsche#SpecialWordhttp://www.nie.org/ontology/nietzsche#hasTexthttp://www.nie.org/ontology/nietzsche#wordBelongsToLinehttp://www.nie.org/ontology/nietzsche#hasTranskriptionPositionhttp://www.nie.org/ontology/nietzsche#hasFaksimilePositionhttp://www.nie.org/ontology/nietzsche#MarkForeignHandshttp://www.nie.org/ontology/nietzsche#hasTexthttp://www.nie.org/ontology/nietzsche#textOfForeignHandshttp://www.nie.org/ontology/nietzsche#penOfForeignHandshttp://www.nie.org/ontology/nietzsche#wordBelongsToLinehttp://www.nie.org/ontology/nietzsche#hasTranskriptionPositionhttp://www.nie.org/ontology/nietzsche#hasFaksimilePositionhttp://www.nie.org/ontology/nietzsche#Pagehttp://www.nie.org/ontology/nietzsche#hasNumberhttp://www.nie.org/ontology/nietzsche#hasOrientationhttp://www.nie.org/ontology/nietzsche#hasLineshttp://www.nie.org/ontology/nietzsche#hasWordshttp://www.nie.org/ontology/nietzsche#hasWordDeletionPathshttp://www.nie.org/ontology/nietzsche#hasWordInsertionMarkshttp://www.nie.org/ontology/nietzsche#hasFaksimileImagehttp://www.nie.org/ontology/nietzsche#hasSvgImagehttp://www.nie.org/ontology/nietzsche#pageIsOnTextFieldhttp://www.nie.org/ontology/nietzsche#Referencehttp://www.nie.org/ontology/nietzsche#firstLineOfReferencehttp://www.nie.org/ontology/nietzsche#lastLineOfReferencehttp://www.nie.org/ontology/nietzsche#IsUncertainhttp://www.nie.org/ontology/nietzsche#hasTitlehttp://www.nie.org/ontology/nietzsche#hasPageNumberhttp://www.nie.org/ontology/nietzsche#SVGImagehttp://www.nie.org/ontology/nietzsche#hasHeighthttp://www.nie.org/ontology/nietzsche#hasWidthhttp://www.nie.org/ontology/nietzsche#hasFileNamehttp://www.nie.org/ontology/nietzsche#StandoffTaghttp://www.nie.org/ontology/nietzsche#standoffTagHasStartIndexhttp://www.nie.org/ontology/nietzsche#standoffTagHasEndIndexhttp://www.nie.org/ontology/nietzsche#standoffTagHasCSShttp://www.nie.org/ontology/nietzsche#TextConnectionMarkhttp://www.nie.org/ontology/nietzsche#hasTexthttp://www.nie.org/ontology/nietzsche#wordBelongsToLinehttp://www.nie.org/ontology/nietzsche#hasTranskriptionPositionhttp://www.nie.org/ontology/nietzsche#hasFaksimilePositionhttp://www.nie.org/ontology/nietzsche#textConnectionMarkHasTextSourcehttp://www.nie.org/ontology/nietzsche#TextFieldhttp://www.nie.org/ontology/nietzsche#hasHeighthttp://www.nie.org/ontology/nietzsche#hasWidthhttp://www.nie.org/ontology/nietzsche#hasLefthttp://www.nie.org/ontology/nietzsche#hasTophttp://www.nie.org/ontology/nietzsche#hasBottomhttp://www.nie.org/ontology/nietzsche#hasTransformhttp://www.nie.org/ontology/nietzsche#TranskriptionPositionhttp://www.nie.org/ontology/nietzsche#hasHeighthttp://www.nie.org/ontology/nietzsche#hasWidthhttp://www.nie.org/ontology/nietzsche#hasLefthttp://www.nie.org/ontology/nietzsche#hasTophttp://www.nie.org/ontology/nietzsche#hasBottomhttp://www.nie.org/ontology/nietzsche#hasTransformhttp://www.nie.org/ontology/nietzsche#isOnSvgImage
+
+ http://www.nie.org/ontology/nietzsche#UncertainDecipherment
+
+ http://www.nie.org/ontology/nietzsche#isUncertain
+ http://www.nie.org/ontology/nietzsche#hasComment
+
+ http://www.nie.org/ontology/nietzsche#Wordhttp://www.nie.org/ontology/nietzsche#hasTexthttp://www.nie.org/ontology/nietzsche#hasEditedTexthttp://www.nie.org/ontology/nietzsche#wordHasWordPartshttp://www.nie.org/ontology/nietzsche#wordBelongsToLinehttp://www.nie.org/ontology/nietzsche#hasTranskriptionPositionhttp://www.nie.org/ontology/nietzsche#hasFaksimilePositionhttp://www.nie.org/ontology/nietzsche#wordHasStylehttp://www.nie.org/ontology/nietzsche#overwritesWordhttp://www.nie.org/ontology/nietzsche#isTransformationOfWordhttp://www.nie.org/ontology/nietzsche#isExtensionOfWordhttp://www.nie.org/ontology/nietzsche#isDeletionOfWordhttp://www.nie.org/ontology/nietzsche#isClarificationOfWordhttp://www.nie.org/ontology/nietzsche#wordHasEarlierVersionhttp://www.nie.org/ontology/nietzsche#wordHasCorrectionhttp://www.nie.org/ontology/nietzsche#wordIsDeletedByPath
+ http://www.nie.org/ontology/nietzsche#wordHasEditorCommenthttp://www.nie.org/ontology/nietzsche#WordDeletionPathhttp://www.nie.org/ontology/nietzsche#hasDAttributehttp://www.nie.org/ontology/nietzsche#WordInsertionMarkhttp://www.nie.org/ontology/nietzsche#hasHeighthttp://www.nie.org/ontology/nietzsche#hasWidthhttp://www.nie.org/ontology/nietzsche#hasLefthttp://www.nie.org/ontology/nietzsche#hasTophttp://www.nie.org/ontology/nietzsche#hasBottomhttp://www.nie.org/ontology/nietzsche#hasTransformhttp://www.nie.org/ontology/nietzsche#hasMarkTypehttp://www.nie.org/ontology/nietzsche#hasSymbolIdhttp://www.nie.org/ontology/nietzsche#hasNextWordhttp://www.nie.org/ontology/nietzsche#hasPreviousWordhttp://www.nie.org/ontology/nietzsche#wordInsertionMarkBelongsToLinexml-dictionary
- 2020-06-05 10:25:21
+ 2020-06-19 14:07:47