Index: py2ttl/class_spec.py
===================================================================
--- py2ttl/class_spec.py (revision 94)
+++ py2ttl/class_spec.py (revision 95)
@@ -1,226 +1,228 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
""" This is an abstract class for all classes that are semantically relevant.
"""
# Copyright (C) University of Basel 2019 {{{1
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see 1}}}
__author__ = "Christian Steiner"
__maintainer__ = __author__
__copyright__ = 'University of Basel'
__email__ = "christian.steiner@unibas.ch"
__status__ = "Development"
__license__ = "GPL v3"
__version__ = "0.0.1"
import abc
import inspect
import warnings
class UnSemanticClass:
"""
Subclasses of this class are not semantically relevant, even if their superclasses are.
"""
pass
class SemanticClass(metaclass=abc.ABCMeta):
"""
This is an abstract class for all classes that are semantically relevant.
"""
HAS_PART = 'has_part'
HAS_SEQNUM = 'has_seqnum'
SINGLE_VALUE = 1
LIST = -99
CLASS_KEY = 'class'
CARDINALITY = "cardinality"
CARDINALITY_RESTRICTION = "cardinality_restriction"
HAS_HOMOTYPIC_PARTS_URL_STRING = 'http://www.nie.org/ontology/homotypic#hasHomotypicParts'
HOMOTYPIC_HAS_TEXT_URL_STRING = 'http://www.nie.org/ontology/homotypic#hasText'
STOFF_STYLE_HAS_CSS_URL_STRING = 'http://www.nie.org/ontology/standoff#styleHasCSS'
PROPERTY_NAME = "name"
PROPERTY_LABEL = "label"
PROPERTY_COMMENT = "comment"
PROPERTIES_KEY = "properties"
SUBCLASS_OF = "rdfs:subClassOf"
SUBPROPERTYOF = "subPropertyOf"
SUPER_CLASSES_DICT = { 'http://www.nie.org/ontology/homotypic': 'HomotypicEntity', 'http://www.nie.org/ontology/standoff': 'Style' }
SUPER_PROPERTY = "super_property"
THIS = "this"
TYPE = "type"
@classmethod
def create_semantic_property_dictionary(cls, property_key, class_type, cardinality=0, cardinality_restriction='cardinality', name='', label='', comment='', subPropertyOf='') -> dict:
"""Create a semantic property dicitonary.
Here is how to make a subproperty:
Pass the IRI of the super property as subPropertyOf=IRI,
be sure that base_uri of IRI (as key) and Class identifier of super class (as value) are in cls.SUPER_CLASSES_DICT,
then call cls.return_dictionary_after_updating_super_classes -> it will subclass the class that owns the subproperty
to the super class.
:return: semantic property dicitonary (dict)
"""
property_content = { SemanticClass.CLASS_KEY: class_type }
if cardinality > 0:
property_content.update({ SemanticClass.CARDINALITY: cardinality})
property_content.update({ SemanticClass.CARDINALITY_RESTRICTION: cardinality_restriction})
if name != '':
property_content.update({ SemanticClass.PROPERTY_NAME: name})
if label != '':
property_content.update({ SemanticClass.PROPERTY_LABEL: label})
if comment != '':
property_content.update({ SemanticClass.PROPERTY_COMMENT: comment})
if subPropertyOf != '':
property_content.update({ SemanticClass.SUBPROPERTYOF: subPropertyOf})
return { property_key: property_content }
@classmethod
def get_class_dictionary(cls):
"""Creates and returns a class_dictionary with the keys cls.THIS [, cls.SUBCLASS_OF, cls.TYPE].
"""
class_dict = {cls.THIS: cls }
if cls.__dict__.get('OWL_EQUIVALENTCLASSES') and len(cls.OWL_EQUIVALENTCLASSES) > 0:
class_dict.update({'owl:equivalentClass': cls.OWL_EQUIVALENTCLASSES })
if cls.__dict__.get('RDFS_SUBCLASSOF_LIST') and len(cls.RDFS_SUBCLASSOF_LIST) > 0:
class_dict.update({cls.SUBCLASS_OF: cls.RDFS_SUBCLASSOF_LIST })
direct_super_class = inspect.getclasstree([cls],unique=True)[0][0]
if issubclass(direct_super_class, SemanticClass) and direct_super_class != SemanticClass:
class_dict.update({cls.TYPE: direct_super_class})
return class_dict
def get_name_and_id(self):
"""Return an identification for object as 2-tuple.
"""
id = 0
if 'id' in self.__dict__.keys():
id = self.id
elif 'number' in self.__dict__.keys():
id = self.number
elif 'title' in self.__dict__.keys():
id = self.title.replace(' ', '_')
return type(self).__name__, id
def _get_list_of_type(self, list_type):
"""Return list of type == list_type if list is not empty.
"""
list_of_type = []
for object_list in [ list_obj for list_obj in self.__dict__.values()\
if type(list_obj) == list ]:
if len(object_list) > 0 and type(object_list[0]) == list_type:
return object_list
return list_of_type
def get_object_from_list_with_id(self, object_type, object_id):
"""Return object from list if object has id == object_id,
None if not found.
"""
list_with_object = [ item for item in self._get_list_of_type(object_type)\
if item.id == object_id ]
if len(list_with_object) > 0:
return list_with_object[0]
return None
@classmethod
def get_cls_hasPart_objectCls_dictionaries(cls, object_cls, xpath, object_seqnum_xpath=None, cardinality=0, cardinality_restriction='minCardinality'):
"""Return a dictionary containing the information for creating a class that can act
as an intermediary between cls and a number of object_cls if object_cls has
a position in a sequence of object_classes that belong to cls.
"""
part_name = object_cls.__name__ + 'Part'
has_part_name = object_cls.__name__.lower() + 'PartHas' + object_cls.__name__
has_seqnum_name = object_cls.__name__.lower() + 'HasSeqNum'
if object_seqnum_xpath is None:
object_seqnum_xpath = xpath + '/@id'
object_part_dictionary = { 'class': object_cls, 'cardinality': 1, 'xpath': xpath,\
'name': has_part_name, 'label': '{0} has a {1}'.format(part_name, object_cls.__name__),\
'comment': '{0} has a part, that is a {1}'.format(part_name, object_cls.__name__)}
object_seqnum_dictionary = { 'class': int, 'cardinality': 1, 'xpath': object_seqnum_xpath,\
'name': has_seqnum_name, 'label': '{0} has a sequence number'.format(part_name),\
'comment': '{0} has a part, that stands in a sequence with this number'.format(part_name, object_cls.__name__)}
object_dictionary = { 'class_name': part_name, SemanticClass.HAS_PART: object_part_dictionary, SemanticClass.HAS_SEQNUM: object_seqnum_dictionary,\
'label': '{0} part'.format(object_cls.__name__.lower()),\
'comment': 'This class servers as a intermediary between {0} and {1}. {0} has some {1} in a specific sequence.'.format(cls.__name__, object_cls.__name__)}
dictionary = { 'flag': 'ordered_list' , 'class': object_dictionary, 'cardinality': cardinality, 'cardinality_restriction': cardinality_restriction, 'xpath': xpath,\
'name': cls.__name__.lower() + 'Has' + part_name, 'label': '{0} has a part that connects it with a {1}'.format(cls.__name__, object_cls.__name__),\
'comment': '{0} has a part that connects it with a {1}, that has a position in a sequence of {1}'.format(cls.__name__, object_cls.__name__)}
return dictionary
@classmethod
@abc.abstractmethod
def get_semantic_dictionary(cls):
"""Creates a semantic dictionary with cls.CLASS_KEY and cls.PROPERTIES_KEY as its keys.
The class-key points to a class_dictionary with the keys: cls.THIS [, cls.SUBCLASS_OF, cls.TYPE].
Create initial dictionary using cls.get_class_dictionary():
dictionary = { cls.CLASS_KEY: cls.get_class_dictionary(), cls.PROPERTIES_KEY: {} }
The properties_key points to a properties_dictionary with semantically relevant keys
of self.__dict__ as keys. Use cls.create_semantic_property_dictionary(...) in order to
add a property dictionary for each property as follows:
dictionary[cls.PROPERTIES_KEY].update(cls.create_semantic_property_dictionary(property_key, ...))
Return dictionary by using:
cls.return_dictionary_after_updating_super_classes(dictionary)
"""
pass
@classmethod
def return_dictionary_after_updating_super_classes(cls, dictionary):
"""Return semantic dictionary after updating super classes if necessary.
"""
if cls.PROPERTIES_KEY not in dictionary.keys():
return dictionary
subproperty_base_uri_set = set( value.get(cls.SUBPROPERTYOF).split('#')[0]\
for value in dictionary[cls.PROPERTIES_KEY].values()\
if bool(value.get(cls.SUBPROPERTYOF)) )
for sub_property_base in subproperty_base_uri_set:
if bool(cls.SUPER_CLASSES_DICT.get(sub_property_base))\
and (\
cls.SUBCLASS_OF not in dictionary[cls.CLASS_KEY].keys()\
or len(dictionary[cls.CLASS_KEY][cls.SUBCLASS_OF]) == 0\
- or sub_property_base + '#' + cls.SUPER_CLASSES_DICT.get(sub_property_base) not in dictionary[cls.CLASS_KEY][cls.SUBCLASS_OF]\
+ or len([ url for url in dictionary[cls.CLASS_KEY][cls.SUBCLASS_OF] if sub_property_base in url]) == 0\
+ # above instead of beneath, there might be more than one Class that share a sub_property_base.
+ #or sub_property_base + '#' + cls.SUPER_CLASSES_DICT.get(sub_property_base) not in dictionary[cls.CLASS_KEY][cls.SUBCLASS_OF]\
):
subclass_list = dictionary[cls.CLASS_KEY][cls.SUBCLASS_OF]\
if cls.SUBCLASS_OF in dictionary[cls.CLASS_KEY].keys()\
and len(dictionary[cls.CLASS_KEY].get(cls.SUBCLASS_OF)) > 0\
else []
subclass_list.append(sub_property_base + '#' + cls.SUPER_CLASSES_DICT.get(sub_property_base))
dictionary[cls.CLASS_KEY].update({cls.SUBCLASS_OF: subclass_list})
return dictionary
def __repr__(self) -> str:
"""Return a representation of all semantically relevant properties.
"""
data_string = self.__str__()
return f'<{data_string}>'
def __str__(self) -> str:
"""Return a str of all semantically relevant properties.
"""
name = type(self).__name__
data = []
for key in self.get_semantic_dictionary()[self.PROPERTIES_KEY].keys():
if key in self.__dict__.keys() and\
(self.__dict__[key] != None or
(type(self.__dict__[key]) == list and len(self.__dict__[key]) > 0)):
data.append(f'{key}: {self.__dict__[key]}')
data_string = ', '.join(data)
return f'{name} {data_string}'
Index: tests_py2ttl/test_data/mapping_dict.xml
===================================================================
--- tests_py2ttl/test_data/mapping_dict.xml (revision 94)
+++ tests_py2ttl/test_data/mapping_dict.xml (revision 95)
@@ -1,283 +1,293 @@
tlnhttp://www.nie.org/ontology/nietzsche#./tln-ontology_autogenerated.ttlhttp://www.nie.org/ontology/nietzsche#ArchivalManuscriptUnityhttp://www.nie.org/ontology/nietzsche#hasTitlehttp://www.nie.org/ontology/nietzsche#hasManuscriptTypehttp://www.nie.org/ontology/nietzsche#hasStyleshttp://www.nie.org/ontology/nietzsche#hasPages
+ http://www.nie.org/ontology/nietzsche#hasDescription
+ http://www.nie.org/ontology/nietzsche#hasEarlierDescriptionshttp://www.nie.org/ontology/nietzsche#Pathhttp://www.nie.org/ontology/nietzsche#hasDAttributehttp://www.nie.org/ontology/nietzsche#Boxhttp://www.nie.org/ontology/nietzsche#hasDAttributehttp://www.nie.org/ontology/nietzsche#hasEarlierTexthttp://www.nie.org/ontology/nietzsche#Colorhttp://www.nie.org/ontology/nietzsche#colorHasNamehttp://www.nie.org/ontology/nietzsche#hasHexadecimalValue
+
+ http://www.nie.org/ontology/nietzsche#Text
+
+ http://www.nie.org/ontology/nietzsche#textHasContent
+ http://www.nie.org/ontology/nietzsche#textHasMarkup
+
+
+
+ http://www.nie.org/ontology/nietzsche#Description
+
+ http://www.nie.org/ontology/nietzsche#textHasContent
+ http://www.nie.org/ontology/nietzsche#textHasMarkup
+
+
+
+ http://www.nie.org/ontology/nietzsche#EarlierDescription
+
+ http://www.nie.org/ontology/nietzsche#textHasContent
+ http://www.nie.org/ontology/nietzsche#hasAuthor
+ http://www.nie.org/ontology/nietzsche#hasCitation
+ http://www.nie.org/ontology/nietzsche#textHasMarkup
+
+ http://www.nie.org/ontology/nietzsche#Imagehttp://www.nie.org/ontology/nietzsche#hasHeighthttp://www.nie.org/ontology/nietzsche#hasWidthhttp://www.nie.org/ontology/nietzsche#hasFileNamehttp://www.nie.org/ontology/nietzsche#FaksimileImagehttp://www.nie.org/ontology/nietzsche#hasHeighthttp://www.nie.org/ontology/nietzsche#hasWidthhttp://www.nie.org/ontology/nietzsche#hasFileNamehttp://www.nie.org/ontology/nietzsche#hasUrlhttp://www.nie.org/ontology/nietzsche#hasTextFieldhttp://www.nie.org/ontology/nietzsche#PositionalObjecthttp://www.nie.org/ontology/nietzsche#hasHeighthttp://www.nie.org/ontology/nietzsche#hasWidthhttp://www.nie.org/ontology/nietzsche#hasLefthttp://www.nie.org/ontology/nietzsche#hasTophttp://www.nie.org/ontology/nietzsche#hasBottomhttp://www.nie.org/ontology/nietzsche#hasTransformhttp://www.nie.org/ontology/nietzsche#WordPositionhttp://www.nie.org/ontology/nietzsche#hasHeighthttp://www.nie.org/ontology/nietzsche#hasWidthhttp://www.nie.org/ontology/nietzsche#hasLefthttp://www.nie.org/ontology/nietzsche#hasTophttp://www.nie.org/ontology/nietzsche#hasBottomhttp://www.nie.org/ontology/nietzsche#hasTransformhttp://www.nie.org/ontology/nietzsche#FaksimilePositionhttp://www.nie.org/ontology/nietzsche#hasHeighthttp://www.nie.org/ontology/nietzsche#hasWidthhttp://www.nie.org/ontology/nietzsche#hasLefthttp://www.nie.org/ontology/nietzsche#hasTophttp://www.nie.org/ontology/nietzsche#hasBottomhttp://www.nie.org/ontology/nietzsche#hasTransformhttp://www.nie.org/ontology/nietzsche#isOnFaksimileImagehttp://www.nie.org/ontology/nietzsche#isOnTextFieldhttp://www.nie.org/ontology/nietzsche#Linehttp://www.nie.org/ontology/nietzsche#lineHasNumberhttp://www.nie.org/ontology/nietzsche#lineHasBottomValueOnTranskriptionhttp://www.nie.org/ontology/nietzsche#lineHasTopValueOnTranskriptionhttp://www.nie.org/ontology/nietzsche#lineHasInnerBottomValueOnFaksimilehttp://www.nie.org/ontology/nietzsche#lineHasInnerTopValueOnFaksimilehttp://www.nie.org/ontology/nietzsche#lineHasOuterBottomValueOnFaksimilehttp://www.nie.org/ontology/nietzsche#lineHasOuterTopValueOnFaksimilehttp://www.nie.org/ontology/nietzsche#isMainLinehttp://www.nie.org/ontology/nietzsche#SimpleWordhttp://www.nie.org/ontology/nietzsche#hasTexthttp://www.nie.org/ontology/nietzsche#wordBelongsToLinehttp://www.nie.org/ontology/nietzsche#hasTranskriptionPositionhttp://www.nie.org/ontology/nietzsche#hasFaksimilePositionhttp://www.nie.org/ontology/nietzsche#SpecialWordhttp://www.nie.org/ontology/nietzsche#hasTexthttp://www.nie.org/ontology/nietzsche#wordBelongsToLinehttp://www.nie.org/ontology/nietzsche#hasTranskriptionPositionhttp://www.nie.org/ontology/nietzsche#hasFaksimilePositionhttp://www.nie.org/ontology/nietzsche#MarkForeignHandshttp://www.nie.org/ontology/nietzsche#hasTexthttp://www.nie.org/ontology/nietzsche#textOfForeignHandshttp://www.nie.org/ontology/nietzsche#penOfForeignHandshttp://www.nie.org/ontology/nietzsche#wordBelongsToLinehttp://www.nie.org/ontology/nietzsche#hasTranskriptionPositionhttp://www.nie.org/ontology/nietzsche#hasFaksimilePositionhttp://www.nie.org/ontology/nietzsche#Pagehttp://www.nie.org/ontology/nietzsche#hasNumberhttp://www.nie.org/ontology/nietzsche#hasOrientationhttp://www.nie.org/ontology/nietzsche#hasLineshttp://www.nie.org/ontology/nietzsche#hasWords
- http://www.nie.org/ontology/nietzsche#hasWritingProcesseshttp://www.nie.org/ontology/nietzsche#hasWordDeletionPathshttp://www.nie.org/ontology/nietzsche#hasWordInsertionMarkshttp://www.nie.org/ontology/nietzsche#hasFaksimileImagehttp://www.nie.org/ontology/nietzsche#hasSvgImagehttp://www.nie.org/ontology/nietzsche#pageIsOnTextFieldhttp://www.nie.org/ontology/nietzsche#Referencehttp://www.nie.org/ontology/nietzsche#firstLineOfReferencehttp://www.nie.org/ontology/nietzsche#lastLineOfReferencehttp://www.nie.org/ontology/nietzsche#IsUncertainhttp://www.nie.org/ontology/nietzsche#hasTitlehttp://www.nie.org/ontology/nietzsche#hasPageNumberhttp://www.nie.org/ontology/nietzsche#SVGImagehttp://www.nie.org/ontology/nietzsche#hasHeighthttp://www.nie.org/ontology/nietzsche#hasWidthhttp://www.nie.org/ontology/nietzsche#hasFileNamehttp://www.nie.org/ontology/nietzsche#StandoffTag
- http://www.nie.org/ontology/nietzsche#standoffTagHasMarkuphttp://www.nie.org/ontology/nietzsche#standoffTagHasStartIndexhttp://www.nie.org/ontology/nietzsche#standoffTagHasEndIndex
+ http://www.nie.org/ontology/nietzsche#standoffTagHasCSS
-
- http://www.nie.org/ontology/nietzsche#Text
-
- http://www.nie.org/ontology/nietzsche#textHasContent
- http://www.nie.org/ontology/nietzsche#textHasMarkup
-
- http://www.nie.org/ontology/nietzsche#TextConnectionMarkhttp://www.nie.org/ontology/nietzsche#hasTexthttp://www.nie.org/ontology/nietzsche#wordBelongsToLinehttp://www.nie.org/ontology/nietzsche#hasTranskriptionPositionhttp://www.nie.org/ontology/nietzsche#hasFaksimilePositionhttp://www.nie.org/ontology/nietzsche#textConnectionMarkHasTextSourcehttp://www.nie.org/ontology/nietzsche#TextFieldhttp://www.nie.org/ontology/nietzsche#hasHeighthttp://www.nie.org/ontology/nietzsche#hasWidthhttp://www.nie.org/ontology/nietzsche#hasLefthttp://www.nie.org/ontology/nietzsche#hasTophttp://www.nie.org/ontology/nietzsche#hasBottomhttp://www.nie.org/ontology/nietzsche#hasTransformhttp://www.nie.org/ontology/nietzsche#TranskriptionPositionhttp://www.nie.org/ontology/nietzsche#hasHeighthttp://www.nie.org/ontology/nietzsche#hasWidthhttp://www.nie.org/ontology/nietzsche#hasLefthttp://www.nie.org/ontology/nietzsche#hasTophttp://www.nie.org/ontology/nietzsche#hasBottomhttp://www.nie.org/ontology/nietzsche#hasTransformhttp://www.nie.org/ontology/nietzsche#isOnSvgImagehttp://www.nie.org/ontology/nietzsche#Wordhttp://www.nie.org/ontology/nietzsche#hasTexthttp://www.nie.org/ontology/nietzsche#hasEditedTexthttp://www.nie.org/ontology/nietzsche#wordHasWordPartshttp://www.nie.org/ontology/nietzsche#wordBelongsToLinehttp://www.nie.org/ontology/nietzsche#hasTranskriptionPositionhttp://www.nie.org/ontology/nietzsche#hasFaksimilePositionhttp://www.nie.org/ontology/nietzsche#wordHasStylehttp://www.nie.org/ontology/nietzsche#overwritesWordhttp://www.nie.org/ontology/nietzsche#isTransformationOfWordhttp://www.nie.org/ontology/nietzsche#isExtensionOfWordhttp://www.nie.org/ontology/nietzsche#isDeletionOfWordhttp://www.nie.org/ontology/nietzsche#isClarificationOfWordhttp://www.nie.org/ontology/nietzsche#wordHasEarlierVersionhttp://www.nie.org/ontology/nietzsche#wordHasCorrectionhttp://www.nie.org/ontology/nietzsche#wordIsDeletedByPathhttp://www.nie.org/ontology/nietzsche#WordDeletionPathhttp://www.nie.org/ontology/nietzsche#hasDAttributehttp://www.nie.org/ontology/nietzsche#WordInsertionMarkhttp://www.nie.org/ontology/nietzsche#hasHeighthttp://www.nie.org/ontology/nietzsche#hasWidthhttp://www.nie.org/ontology/nietzsche#hasLefthttp://www.nie.org/ontology/nietzsche#hasTophttp://www.nie.org/ontology/nietzsche#hasBottomhttp://www.nie.org/ontology/nietzsche#hasTransformhttp://www.nie.org/ontology/nietzsche#hasMarkTypehttp://www.nie.org/ontology/nietzsche#hasSymbolIdhttp://www.nie.org/ontology/nietzsche#hasNextWordhttp://www.nie.org/ontology/nietzsche#hasPreviousWordhttp://www.nie.org/ontology/nietzsche#wordInsertionMarkBelongsToLine
-
- http://www.nie.org/ontology/nietzsche#WritingProcess
-
- http://www.nie.org/ontology/nietzsche#hasVersion
- http://www.nie.org/ontology/nietzsche#hasDescription
-
- xml-dictionary
- 2020-05-13 11:04:44
+ 2020-06-05 10:25:21
Index: svgscripts/datatypes/standoff_tag.py
===================================================================
--- svgscripts/datatypes/standoff_tag.py (revision 94)
+++ svgscripts/datatypes/standoff_tag.py (revision 95)
@@ -1,138 +1,151 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
""" This class can be used to represent the standoff markup of a text.
"""
# Copyright (C) University of Basel 2020 {{{1
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see 1}}}
__author__ = "Christian Steiner"
__maintainer__ = __author__
__copyright__ = 'University of Basel'
__email__ = "christian.steiner@unibas.ch"
__status__ = "Development"
__license__ = "GPL v3"
__version__ = "0.0.1"
from lxml import etree as ET
import re
import sys
from .attachable_object import AttachableObject
sys.path.append('py2ttl')
from class_spec import SemanticClass
class StandoffTag(AttachableObject,SemanticClass):
"""
This class represents the standoff markup of a text.
"""
MARKUP_STYLES = [ 'bold', 'italic' ]
+ RDFS_SUBCLASSOF_LIST = ['http://www.nie.org/ontology/standoff#StandoffMarkup']
RELEVANT_STYLE_KEY = 'font-family'
RELEVANT_CONTENT_STARTSWITH = 'Frutiger-'
RELEVANT_PATTERN = re.compile('.*(Italic|Bold)$')
RELEVANT_SUB_PATTERN = re.compile('Frutiger-(Light)*')
+ STOFF_HAS_CSS_URL_STRING = 'http://www.nie.org/ontology/standoff#hasCSS'
+ STOFF_HAS_START_INDEX = 'http://www.nie.org/ontology/standoff#standoffMarkupHasStartIndex'
+ STOFF_HAS_END_INDEX = 'http://www.nie.org/ontology/standoff#standoffMarkupHasEndIndex'
+ HTML_TAG_DICTIONARY = { '': 'italic', '': 'bold', '': 'delete' }
+ CSS_DICTIONARY = { 'bold': 'font-weight:bold;',
+ 'italic': 'font-style: italic;',
+ 'delete': 'text-decoration:line-through;' }
def __init__(self, markup: str, startIndex: int, endIndex: int, id=0):
self.id = str(id)
+ self.css_string = self.CSS_DICTIONARY.get(markup)
self.markup = markup
self.startIndex = startIndex
self.endIndex = endIndex
def attach_object_to_tree(self, target_tree):
"""Attach object to tree.
"""
if target_tree.__class__.__name__ == '_ElementTree':
target_tree = target_tree.getroot()
obj_node = target_tree.xpath('.//' + self.markup + '[@id="%s"]' % self.id)[0] \
if(len(target_tree.xpath('.//' + self.markup + '[@id="%s"]' % self.id)) > 0) \
else ET.SubElement(target_tree, self.markup)
obj_node.set('id', self.id)
obj_node.set('start', str(self.startIndex))
obj_node.set('end', str(self.endIndex))
@classmethod
def create_cls(cls, start_index, end_index, style_string, page=None, style_dict=None):
"""Creates a StandoffTag from a style_string.
:return: a list of (datatypes.standoff_tag) StandoffTag
"""
if page is not None:
style_dict = cls.create_relevant_style_dictionary(page)
relevant_keys = [ key for key in set(style_string.split(' '))\
if key in style_dict.keys() ]
standoff_tags = []
if style_dict is None or len(style_dict) == 0:
return standoff_tags
for relevant_key in relevant_keys:
font_family = style_dict[relevant_key][cls.RELEVANT_STYLE_KEY]
if re.match(cls.RELEVANT_PATTERN, font_family):
markup = re.sub(cls.RELEVANT_SUB_PATTERN, '', font_family).lower()
standoff_tags.append(cls(markup, start_index, end_index))
return standoff_tags
+
@classmethod
def create_cls_from_node(cls, node):
"""Creates a StandoffTag from a node.
:return: (datatypes.standoff_tag) StandoffTag
"""
return cls(node.tag, int(node.get('start')), int(node.get('end')), id=node.get('id'))
@classmethod
def create_relevant_style_dictionary(cls, page):
"""Return a style dictionary that contains only relevant keys and contents.
"""
return { key: key_dict for key, key_dict in page.style_dict.items()\
if cls.RELEVANT_STYLE_KEY in key_dict.keys()\
and key_dict[cls.RELEVANT_STYLE_KEY].startswith(cls.RELEVANT_CONTENT_STARTSWITH) }
@classmethod
def get_semantic_dictionary(cls):
""" Creates a semantic dictionary as specified by SemanticClass.
"""
properties = {}
- properties.update(cls.create_semantic_property_dictionary('markup', str, cardinality=1,\
- name='standoffTagHasMarkup', label='standoff tag has a specific markup', comment='Connects a standoff tag with its markup, e.g. bold or italic'))
- properties.update(cls.create_semantic_property_dictionary('startIndex', int, cardinality=1,\
+ #properties.update(cls.create_semantic_property_dictionary('markup', str, cardinality=1,\
+ # name='standoffTagHasMarkup', label='standoff tag has a specific markup', comment='Connects a standoff tag with its markup, e.g. bold or italic'))
+ properties.update(cls.create_semantic_property_dictionary('startIndex', int, cardinality=1, subPropertyOf=cls.STOFF_HAS_START_INDEX,\
name='standoffTagHasStartIndex', label='standoff tag has a start index', comment='Connects a standoff tag with its start index.'))
- properties.update(cls.create_semantic_property_dictionary('endIndex', int, cardinality=1,\
+ properties.update(cls.create_semantic_property_dictionary('endIndex', int, cardinality=1, subPropertyOf=cls.STOFF_HAS_END_INDEX,\
name='standoffTagHasEndIndex', label='standoff tag has a end index', comment='Connects a standoff tag with its end index.'))
+ properties.update(cls.create_semantic_property_dictionary('css_string', str,\
+ subPropertyOf=cls.STOFF_HAS_CSS_URL_STRING,\
+ name='standoffTagHasCSS', label='standoff tag has css', comment='Connects a standoff tag with CSS style.'))
dictionary = { cls.CLASS_KEY: cls.get_class_dictionary(), cls.PROPERTIES_KEY: properties }
return cls.return_dictionary_after_updating_super_classes(dictionary)
def is_joinable(self, other):
"""Return true if self and other have same markup and self.endIndex == other.startIndex.
"""
return self.markup == other.markup and self.endIndex == other.startIndex
def join(self, other):
"""Join self with other.
"""
self.endIndex = other.endIndex
def join_list(self, others):
"""Join all others that are joinable, return remaining others as a list.
"""
unjoinable_others = []
for other in others:
if self.is_joinable(other):
self.join(other)
else:
unjoinable_others.append(other)
return unjoinable_others
Index: svgscripts/datatypes/style.py
===================================================================
--- svgscripts/datatypes/style.py (revision 94)
+++ svgscripts/datatypes/style.py (revision 95)
@@ -1,202 +1,205 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
""" This class can be used to represent the style of a word.
"""
# Copyright (C) University of Basel 2019 {{{1
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see 1}}}
__author__ = "Christian Steiner"
__maintainer__ = __author__
__copyright__ = 'University of Basel'
__email__ = "christian.steiner@unibas.ch"
__status__ = "Development"
__license__ = "GPL v3"
__version__ = "0.0.1"
import copy
from lxml import etree as ET
import re
import sys
from .color import Color
sys.path.append('py2ttl')
from class_spec import SemanticClass
class Style(SemanticClass):
"""
This class represents the style of a word.
Args:
manuscript: a ArchivalManuscriptUnity
"""
NIETSCHES_FONTS = { 'german': 'deutsche Schreibschrift', 'latin': 'lateinische Schreibschrift' }
COLOR_KEYS = [ 'black', 'red', 'blue', 'green', 'grey' ]
RELEVANT_STYLE_KEYS = [ 'font-family', 'fill', 'stroke' ]
ADDITIONAL_STYLE_KEYS = [ 'font-size' ]
+ PERCENTS = [ '80%', '70%' ]
WRITING_INSTRUMENTS = { (COLOR_KEYS[0], False): 'schwarze Tinte',\
(COLOR_KEYS[0], True): 'Bleistift',\
(COLOR_KEYS[4], True): 'Bleistift',\
(COLOR_KEYS[4], False): 'Bleistift',\
(COLOR_KEYS[1], False): 'braune Tinte',\
(COLOR_KEYS[1], True): 'Rotstift',\
(COLOR_KEYS[2], False): 'violette Tinte',\
(COLOR_KEYS[2], True): 'Blaustift',\
(COLOR_KEYS[3], False): '„Tinte der letzten Korrektur“',\
(COLOR_KEYS[3], True): '„Tinte der letzten Korrektur“'}
def __init__(self, manuscript=None, writing_process_id=-1, extended_styles=False, deletion_color=None):
self.color = Color.create_cls(manuscript=manuscript)
self.css_styles = []
self.css_string = None
self.deletion_color = deletion_color
self.is_german = True
self.font = self.NIETSCHES_FONTS['german']
self.font_family = 'Weidemann-Book'
self.font_size = ''
self.manuscript = manuscript
self.relevant_key_map = {}
relevant_style_keys = self.RELEVANT_STYLE_KEYS + self.ADDITIONAL_STYLE_KEYS\
if extended_styles else self.RELEVANT_STYLE_KEYS
for key in relevant_style_keys:
if not key.startswith('font'):
self.relevant_key_map.update({key: self.set_color})
elif key == 'font-family':
self.relevant_key_map.update({key: self.set_font})
elif key == 'font-size':
self.relevant_key_map.update({key: self.set_size})
self.writing_instrument = self.WRITING_INSTRUMENTS[(self.color.name, False)]
self.writing_process_id = writing_process_id
def create_a_copy_wo_writing_process_id(self):
new_self = copy.deepcopy(self)
new_self.writing_process_id = -1
return new_self
def create_a_copy(self, reduce_writing_process_id=False):
writing_process_id = self.writing_process_id\
if not reduce_writing_process_id\
else self.writing_process_id-1
copy = Style(manuscript=self.manuscript, writing_process_id=writing_process_id)
copy.color = self.color
copy.font_family = self.font_family
copy.process_style_classes()
if copy.manuscript is not None:
copy.manuscript.update_styles(copy)
return copy
def create_css_styles(self):
"""Create css styles.
"""
if self.deletion_color is not None:
self.css_styles.append('text-decoration:line-through;')
self.css_styles.append(f'text-decoration-color:{self.deletion_color.hex_color};')
self.css_styles.append(f'-webkit-text-decoration-color:{self.deletion_color.hex_color};')
if self.font_family.endswith('Bold'):
self.css_styles.append(f'font-weight:bold;')
- if self.font_size != '':
- self.css_styles.append(f'font-size:{self.font_size};')
+ #if self.font_size != '':
+ # self.css_styles.append(f'font-size:{self.font_size};')
+ if self.writing_process_id > 0:
+ self.css_styles.append(f'font-size:{self.PERCENTS[self.writing_process_id-1]};')
self.css_styles.append(f'color:{self.color.hex_color};')
self.css_string = ''.join(self.css_styles)
@classmethod
- def create_cls(cls, page, style_string, manuscript=None, create_css=False, deletion_color=None):
+ def create_cls(cls, page, style_string, manuscript=None, create_css=False, deletion_color=None, writing_process_id=-1):
"""Creates a Style from a style_string.
:return: (datatypes.style) Style
"""
- style = cls(manuscript=manuscript, extended_styles=create_css, deletion_color=deletion_color)
+ style = cls(manuscript=manuscript, extended_styles=create_css, deletion_color=deletion_color, writing_process_id=writing_process_id)
style_dict = { key: key_dict for key, key_dict in page.style_dict.items()\
if any(relevant_key in key_dict.keys() for relevant_key in style.relevant_key_map.keys()) }
for style_key in style_string.split(' '):
if style_key in style_dict.keys():
dictionary = style_dict[style_key]
for key, set_function in [ (key, func) for key, func in style.relevant_key_map.items() if key in dictionary.keys() ]:
if callable(set_function):
set_function(dictionary[key])
style.process_style_classes()
if create_css:
style.create_css_styles()
return style
@classmethod
def get_semantic_dictionary(cls):
""" Creates a semantic dictionary as specified by SemanticClass.
"""
properties = {}
properties.update(cls.create_semantic_property_dictionary('font', str, cardinality=1,\
name='styleHasFont', label='style has font', comment='Connects a style with the kind of font Nietzsche used in writing.'))
properties.update(cls.create_semantic_property_dictionary('writing_instrument', str, cardinality=1,\
name='styleHasWritingInstrument', label='style has writing instrument', comment='Connects a style with the description of a writing instrument.'))
properties.update(cls.create_semantic_property_dictionary('color', Color, cardinality=1,\
name='styleHasColor', label='style has color', comment='Connects a style with a color.'))
#properties.update(cls.create_semantic_property_dictionary('css_styles', str,\
properties.update(cls.create_semantic_property_dictionary('css_string', str,\
subPropertyOf=cls.STOFF_STYLE_HAS_CSS_URL_STRING,\
name='styleHasCSS', label='style has css', comment='Connects a style with CSS style.'))
dictionary = { cls.CLASS_KEY: cls.get_class_dictionary(), cls.PROPERTIES_KEY: properties }
return cls.return_dictionary_after_updating_super_classes(dictionary)
def process_style_classes(self):
"""Infere writing instrument from font-family and color.
"""
if self.font_family.startswith('NewsGothic'):
self.is_german = False
self.font = self.NIETSCHES_FONTS['latin']
if self.color.name in self.COLOR_KEYS:
self.writing_instrument = self.WRITING_INSTRUMENTS[(self.color.name, self.font_family.endswith('Bold'))]
def set_color(self, hex_color: str):
if hex_color != 'none':
self.color = Color.create_cls(hex_color=hex_color, manuscript=self.manuscript)
def set_font(self, font_family: str):
self.font_family = font_family
def set_size(self, font_size: str):
self.font_size = font_size
@classmethod
def remove_irrelevant_style_keys(cls, style_string, page, extended_styles=False) -> str:
"""Return a style_string without irrelevant style keys.
"""
relevant_style_keys = cls.RELEVANT_STYLE_KEYS + cls.ADDITIONAL_STYLE_KEYS\
if extended_styles else cls.RELEVANT_STYLE_KEYS
return ' '.join(sorted( style_key for style_key in style_string.split(' ')\
if len(\
[ key for key in page.style_dict[style_key].keys()\
if key in relevant_style_keys ]\
) > 0 ))
def __eq__(self, other):
"""Returns true if self is qualitatively identical to other.
Reason: For qualities, the idea of numerical identity is silly.
"""
if other is None:
return False
return self.color == other.color\
and self.font_family == other.font_family\
and self.writing_process_id == other.writing_process_id\
and self.css_styles == other.css_styles\
and self.font_size == other.font_size
def __hash__(self):
"""Return a hash value for self.
"""
return hash((self.color.__hash__, self.font_family, self.writing_process_id))
Index: svgscripts/datatypes/description.py
===================================================================
--- svgscripts/datatypes/description.py (revision 0)
+++ svgscripts/datatypes/description.py (revision 95)
@@ -0,0 +1,52 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+
+""" This class can be used to represent a description.
+"""
+# Copyright (C) University of Basel 2020 {{{1
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program. If not, see 1}}}
+
+__author__ = "Christian Steiner"
+__maintainer__ = __author__
+__copyright__ = 'University of Basel'
+__email__ = "christian.steiner@unibas.ch"
+__status__ = "Development"
+__license__ = "GPL v3"
+__version__ = "0.0.1"
+
+from lxml import etree as ET
+import re
+import sys
+
+from .text import Text
+
+class Description(Text):
+ """
+ This class represents a description.
+ """
+ ROOT_TAG = 'currentDescription'
+ XML_TAG = 'description'
+
+ def __init__(self, content: str, standoff_markups=None, id=0):
+ super(Description,self).__init__(content, standoff_markups=standoff_markups, id=id)
+
+ @classmethod
+ def create_cls_from_node(cls, node):
+ """Initialize a cls from node.
+
+ [:return:] cls
+ """
+ return cls.create_cls_from_html(node.text)
+
Index: svgscripts/datatypes/manuscript.py
===================================================================
--- svgscripts/datatypes/manuscript.py (revision 94)
+++ svgscripts/datatypes/manuscript.py (revision 95)
@@ -1,142 +1,159 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
""" This class can be used to represent an archival unity of manuscript pages, i.e. workbooks, notebooks, folders of handwritten pages.
"""
# Copyright (C) University of Basel 2019 {{{1
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see 1}}}
__author__ = "Christian Steiner"
__maintainer__ = __author__
__copyright__ = 'University of Basel'
__email__ = "christian.steiner@unibas.ch"
__status__ = "Development"
__license__ = "GPL v3"
__version__ = "0.0.1"
from lxml import etree as ET
from os.path import isfile
import sys
-from .page import Page, FILE_TYPE_XML_MANUSCRIPT, FILE_TYPE_SVG_WORD_POSITION
from .color import Color
+from .description import Description
+from .earlier_description import EarlierDescription
+from .page import Page, FILE_TYPE_XML_MANUSCRIPT, FILE_TYPE_SVG_WORD_POSITION
sys.path.append('py2ttl')
from class_spec import SemanticClass
sys.path.append('shared_util')
from myxmlwriter import parse_xml_of_type, write_pretty, xml_has_type
class ArchivalManuscriptUnity(SemanticClass):
"""
This class represents an archival unity of manuscript pages (workbooks, notebooks and portfolios of handwritten pages).
@label archival unity of manuscript pages
Args:
title title of archival unity
manuscript_type type of manuscript: 'Arbeitsheft', 'Notizheft', 'Mappe'
manuscript_tree lxml.ElementTree
"""
XML_TAG = 'manuscript'
XML_COLORS_TAG = 'colors'
TYPE_DICTIONARY = { 'Mp': 'Mappe', 'N': 'Notizheft', 'W': 'Arbeitsheft' }
UNITTESTING = False
def __init__(self, title='', manuscript_type='', manuscript_tree=None):
self.colors = []
+ self.earlier_descriptions = []
+ self.description = None
self.manuscript_tree = manuscript_tree
self.manuscript_type = manuscript_type
self.pages = []
self.styles = []
self.title = title
if self.manuscript_type == '' and self.title != ''\
and self.title.split(' ')[0] in self.TYPE_DICTIONARY.keys():
self.manuscript_type = self.TYPE_DICTIONARY[self.title.split(' ')[0]]
def get_name_and_id(self):
"""Return an identification for object as 2-tuple.
"""
return '', self.title.replace(' ', '_')
@classmethod
def create_cls(cls, xml_manuscript_file, page_status_list=None, page_xpath='', update_page_styles=False):
"""Create an instance of ArchivalManuscriptUnity from a xml file of type FILE_TYPE_XML_MANUSCRIPT.
:return: ArchivalManuscriptUnity
"""
manuscript_tree = parse_xml_of_type(xml_manuscript_file, FILE_TYPE_XML_MANUSCRIPT)
title = manuscript_tree.getroot().get('title') if bool(manuscript_tree.getroot().get('title')) else ''
manuscript_type = manuscript_tree.getroot().get('type') if bool(manuscript_tree.getroot().get('type')) else ''
manuscript = cls(title=title, manuscript_type=manuscript_type, manuscript_tree=manuscript_tree)
manuscript.colors = [ Color.create_cls(node=color_node) for color_node in manuscript_tree.xpath('.//' + cls.XML_COLORS_TAG + '/' + Color.XML_TAG) ]
if page_xpath == '':
page_status = ''
if page_status_list is not None\
and type(page_status_list) is list\
and len(page_status_list) > 0:
page_status = '[' + ' and '.join([ f'contains(@status, "{status}")' for status in page_status_list ]) + ']'
page_xpath = f'//pages/page{page_status}/@output'
manuscript.pages = [ Page(page_source)\
for page_source in manuscript_tree.xpath(page_xpath)\
if isfile(page_source) and xml_has_type(FILE_TYPE_SVG_WORD_POSITION, xml_source_file=page_source) ]
if update_page_styles:
for page in manuscript.pages: page.update_styles(manuscript=manuscript, add_to_parents=True, create_css=True)
+ description_node = manuscript_tree.xpath(Description.XML_TAG)[0]\
+ if len(manuscript_tree.xpath(Description.XML_TAG)) > 0\
+ else None
+ if description_node is not None:
+ manuscript.description = Description.create_cls_from_node(description_node.xpath(Description.ROOT_TAG)[0])\
+ if len(description_node.xpath(Description.ROOT_TAG)) > 0\
+ else None
+ for earlier_description_node in description_node.xpath(EarlierDescription.ROOT_TAG):
+ earlier_description = EarlierDescription.create_cls_from_node(earlier_description_node)
+ if earlier_description is not None:
+ manuscript.earlier_descriptions.append(earlier_description)
return manuscript
def get_color(self, hex_color) -> Color:
"""Return color if it exists or None.
"""
if hex_color in [ color.hex_color for color in self.colors ]:
return [ color for color in self.colors if color.hex_color == hex_color ][0]
return None
@classmethod
def get_semantic_dictionary(cls):
""" Creates a semantic dictionary as specified by SemanticClass.
"""
dictionary = {}
class_dict = cls.get_class_dictionary()
properties = {}
properties.update(cls.create_semantic_property_dictionary('title', str, 1))
properties.update(cls.create_semantic_property_dictionary('manuscript_type', str, 1))
properties.update(cls.create_semantic_property_dictionary('styles', list))
properties.update(cls.create_semantic_property_dictionary('pages', list))
+ properties.update(cls.create_semantic_property_dictionary('description', Description))
+ properties.update(cls.create_semantic_property_dictionary('earlier_descriptions', EarlierDescription))
dictionary.update({cls.CLASS_KEY: class_dict})
dictionary.update({cls.PROPERTIES_KEY: properties})
return cls.return_dictionary_after_updating_super_classes(dictionary)
def update_colors(self, color):
"""Update manuscript colors if color is not contained.
"""
if self.get_color(color.hex_color) is None:
self.colors.append(color)
if self.manuscript_tree is not None:
if len(self.manuscript_tree.xpath('.//' + self.XML_COLORS_TAG)) > 0:
self.manuscript_tree.xpath('.//' + self.XML_COLORS_TAG)[0].getparent().remove(self.manuscript_tree.xpath('.//' + self.XML_COLORS_TAG)[0])
colors_node = ET.SubElement(self.manuscript_tree.getroot(), self.XML_COLORS_TAG)
for color in self.colors:
color.attach_object_to_tree(colors_node)
if not self.UNITTESTING:
write_pretty(xml_element_tree=self.manuscript_tree, file_name=self.manuscript_tree.docinfo.URL,\
script_name=__file__, backup=True,\
file_type=FILE_TYPE_XML_MANUSCRIPT)
def update_styles(self, *styles):
"""Update manuscript styles.
"""
for style in styles:
if style not in self.styles:
#print(style.css_styles)
self.styles.append(style)
Index: svgscripts/datatypes/earlier_description.py
===================================================================
--- svgscripts/datatypes/earlier_description.py (revision 0)
+++ svgscripts/datatypes/earlier_description.py (revision 95)
@@ -0,0 +1,71 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+
+""" This class can be used to represent a description.
+"""
+# Copyright (C) University of Basel 2020 {{{1
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program. If not, see 1}}}
+
+__author__ = "Christian Steiner"
+__maintainer__ = __author__
+__copyright__ = 'University of Basel'
+__email__ = "christian.steiner@unibas.ch"
+__status__ = "Development"
+__license__ = "GPL v3"
+__version__ = "0.0.1"
+
+from lxml import etree as ET
+import re
+import sys
+
+from .description import Description
+
+class EarlierDescription(Description):
+ """
+ This class represents a description by another author.
+ """
+ AUTHOR_TAG = 'author'
+ CITATION_TAG = 'citation'
+ DESCRIPTION_TAG = 'manuscriptDescription'
+ ROOT_TAG = 'earlierDescription'
+
+
+ def __init__(self, content: str, standoff_markups=None, id=0, author=None, citation=None ):
+ super(EarlierDescription,self).__init__(content, standoff_markups=standoff_markups, id=id)
+ self.author = author
+ self.citation = citation
+
+ @classmethod
+ def create_cls_from_node(cls, node):
+ """Initialize a cls from node.
+
+ [:return:] cls
+ """
+ if len(node.xpath(cls.DESCRIPTION_TAG)) == 0:
+ return None
+ earlier_description = cls.create_cls_from_html(node.xpath(cls.DESCRIPTION_TAG)[0].text)
+ earlier_description.author = node.xpath(cls.AUTHOR_TAG)[0].text if len(node.xpath(cls.AUTHOR_TAG)) > 0 else None
+ earlier_description.citation = node.xpath(cls.CITATION_TAG)[0].text if len(node.xpath(cls.CITATION_TAG)) > 0 else None
+ return earlier_description
+
+ @classmethod
+ def get_semantic_dictionary(cls):
+ """ Creates a semantic dictionary as specified by SemanticClass.
+ """
+ dictionary = super(EarlierDescription,cls).get_semantic_dictionary()
+ dictionary[cls.PROPERTIES_KEY].update(cls.create_semantic_property_dictionary('author', str))
+ dictionary[cls.PROPERTIES_KEY].update(cls.create_semantic_property_dictionary('citation', str))
+ return cls.return_dictionary_after_updating_super_classes(dictionary)
+
Index: svgscripts/datatypes/page.py
===================================================================
--- svgscripts/datatypes/page.py (revision 94)
+++ svgscripts/datatypes/page.py (revision 95)
@@ -1,296 +1,296 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
""" This class can be used to represent a page.
"""
# Copyright (C) University of Basel 2019 {{{1
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see 1}}}
__author__ = "Christian Steiner"
__maintainer__ = __author__
__copyright__ = 'University of Basel'
__email__ = "christian.steiner@unibas.ch"
__status__ = "Development"
__license__ = "GPL v3"
__version__ = "0.0.1"
from lxml import etree as ET
from os.path import isfile
from progress.bar import Bar
from svgpathtools import svg2paths2, svg_to_paths
from svgpathtools.parser import parse_path
import sys
import warnings
from .box import Box
from .color import Color
from .image import Image, SVGImage
from .faksimile_image import FaksimileImage
from .faksimile_position import FaksimilePosition
from .lineNumber import LineNumber
from .line import Line
from .mark_foreign_hands import MarkForeignHands
from .matrix import Matrix
from .path import Path
from .positional_word_part import PositionalWordPart
from .super_page import SuperPage
from .style import Style
from .text_connection_mark import TextConnectionMark
from .text_field import TextField
from .transkriptionField import TranskriptionField
from .writing_process import WritingProcess
from .word import Word
from .word_deletion_path import WordDeletionPath
from .word_insertion_mark import WordInsertionMark
sys.path.append('py2ttl')
from class_spec import SemanticClass
FILE_TYPE_SVG_WORD_POSITION = SuperPage.FILE_TYPE_SVG_WORD_POSITION
FILE_TYPE_XML_MANUSCRIPT = SuperPage.FILE_TYPE_XML_MANUSCRIPT
STATUS_MERGED_OK = SuperPage.STATUS_MERGED_OK
STATUS_POSTMERGED_OK = SuperPage.STATUS_POSTMERGED_OK
class Page(SemanticClass,SuperPage):
"""
This class represents a page.
Args:
xml_source_file (str): name of the xml file to be instantiated.
xml_target_file (str): name of the xml file to which page info will be written.
"""
UNITTESTING = False
def __init__(self, xml_source_file, faksimile_image=None, faksimile_svgFile=None, add_deletion_paths_to_words=True):
super(Page,self).__init__(xml_source_file)
self.update_property_dictionary('faksimile_image', faksimile_image)
self.update_property_dictionary('faksimile_svgFile', faksimile_svgFile)
self.init_all_properties()
self.add_style(style_node=self.page_tree.getroot().find('.//style'))
self.init_node_objects()
if add_deletion_paths_to_words:
self.add_deletion_paths_to_words()
def add_deletion_paths_to_words(self):
"""Add deletion paths to words.
"""
if (self.svg_file is not None and isfile(self.svg_file))\
or (self.source is not None and isfile(self.source)):
svg_file = self.svg_file if self.svg_file is not None else self.source
transkription_field = TranskriptionField(svg_file)
words = [ word for word in self.words if word.deleted or True in [ part.deleted for part in word.word_parts ]]
for word in words:
word.add_deletion_paths(self.word_deletion_paths, tr_xmin=transkription_field.xmin, tr_ymin=transkription_field.ymin)
@classmethod
def get_pages_from_xml_file(cls, xml_file, status_contains='', status_not_contain='', word_selection_function=None):
"""Returns a list of Page instantiating a xml_file of type FILE_TYPE_SVG_WORD_POSITION
or xml_files contained in xml_file of type FILE_TYPE_XML_MANUSCRIPT.
[optional: instantiation depends on the fulfilment of a status_contains
and/or on the selection of some words by a word_selection_function].
"""
source_tree = ET.parse(xml_file)
if source_tree.getroot().find('metadata/type').text == FILE_TYPE_SVG_WORD_POSITION:
page = cls(xml_file)
if word_selection_function is None or len(word_selection_function(page.words)) > 0:
return [ page ]
else:
return []
elif source_tree.getroot().find('metadata/type').text == FILE_TYPE_XML_MANUSCRIPT:
pages = []
xpath = '//page/@output'
if status_contains != '' and status_not_contain != '':
xpath = '//page[contains(@status, "{0}") and not(contains(@status, "{1}"))]/@output'.format(status_contains, status_not_contain)
elif status_contains != '':
xpath = '//page[contains(@status, "{0}")]/@output'.format(status_contains)
elif status_not_contain != '':
xpath = '//page[not(contains(@status, "{0}"))]/@output'.format(status_not_contain)
for xml_source_file in source_tree.xpath(xpath):
if isfile(xml_source_file):
pages += cls.get_pages_from_xml_file(xml_source_file, word_selection_function=word_selection_function)
return pages
else:
return []
@classmethod
def get_semantic_dictionary(cls):
""" Creates a semantic dictionary as specified by SemanticClass.
"""
dictionary = {}
class_dict = cls.get_class_dictionary()
properties = { 'number': { 'class': str, 'cardinality': 1},\
'faksimile_image': { 'class': FaksimileImage, 'cardinality': 1},\
'orientation': { 'class': str, 'cardinality': 1},\
'svg_image': { 'class': SVGImage, 'cardinality': 1}}
properties.update(cls.create_semantic_property_dictionary('text_field', TextField,\
cardinality=1, name='pageIsOnTextField', label='page is on text field',\
comment='Relates a page to the text field on a faksimile image.'))
- for key in [ 'lines', 'words', 'writing_processes', 'word_deletion_paths', 'word_insertion_marks']:
+ for key in [ 'lines', 'words', 'word_deletion_paths', 'word_insertion_marks']:
properties.update(cls.create_semantic_property_dictionary(key, list))
dictionary.update({cls.CLASS_KEY: class_dict})
dictionary.update({cls.PROPERTIES_KEY: properties})
return cls.return_dictionary_after_updating_super_classes(dictionary)
def init_node_objects(self):
"""Initialize all node objects.
"""
self.word_insertion_marks = [ WordInsertionMark(wim_node=wim_node) for wim_node in self.page_tree.getroot().xpath('//' + WordInsertionMark.XML_TAG) ]
self.words = [ Word.create_cls(word_node) for word_node in self.page_tree.getroot().xpath('./word') ]
self.mark_foreign_hands = [ MarkForeignHands.create_cls(node) for node in self.page_tree.getroot().xpath('//' + MarkForeignHands.XML_TAG) ]
self.text_connection_marks = [ TextConnectionMark.create_cls(node) for node in self.page_tree.getroot().xpath('//' + TextConnectionMark.XML_TAG) ]
self.line_numbers = [ LineNumber(xml_text_node=line_number_node) for line_number_node in self.page_tree.getroot().xpath('//' + LineNumber.XML_TAG) ]
self.lines = [ Line.create_cls_from_node(node=line_number_node) for line_number_node in self.page_tree.getroot().xpath('//' + LineNumber.XML_TAG) ]
self.writing_processes = [ WritingProcess.create_writing_process_from_xml(node, self.words) for node in self.page_tree.xpath('//' + WritingProcess.XML_TAG) ]
self.word_deletion_paths = [ WordDeletionPath.create_cls(node, self) for node in self.page_tree.xpath('//' + WordDeletionPath.XML_TAG) ]
if self.faksimile_image is not None and self.text_field is not None:
for simple_word in self.words + self.mark_foreign_hands + self.text_connection_marks:
simple_word.init_word(self)
for wim in self.word_insertion_marks:
if wim.line_number > -1:
wim.line = [ line for line in self.lines if line.id == wim.line_number ][0]
def update_and_attach_words2tree(self, update_function_on_word=None, include_special_words_of_type=[]):
"""Update word ids and attach them to page.page_tree.
"""
if not self.is_locked():
update_function_on_word = [ update_function_on_word ]\
if type(update_function_on_word) != list\
else update_function_on_word
for node in self.page_tree.xpath('.//word|.//' + MarkForeignHands.XML_TAG + '|.//' + TextConnectionMark.XML_TAG):
node.getparent().remove(node)
for index, word in enumerate(self.words):
word.id = index
for func in update_function_on_word:
if callable(func):
func(word)
word.attach_word_to_tree(self.page_tree)
for index, mark_foreign_hands in enumerate(self.mark_foreign_hands):
mark_foreign_hands.id = index
if MarkForeignHands in include_special_words_of_type:
for func in update_function_on_word:
if callable(update_function_on_word):
func(mark_foreign_hands)
mark_foreign_hands.attach_word_to_tree(self.page_tree)
for index, text_connection_mark in enumerate(self.text_connection_marks):
text_connection_mark.id = index
if TextConnectionMark in include_special_words_of_type:
for func in update_function_on_word:
if callable(update_function_on_word):
func(text_connection_mark)
text_connection_mark.attach_word_to_tree(self.page_tree)
else:
print('locked')
def update_data_source(self, faksimile_svgFile=None, xml_correction_file=None):
"""Update the data source of page.
"""
if faksimile_svgFile is not None:
self.faksimile_svgFile = faksimile_svgFile
data_node = self.page_tree.xpath('.//data-source')[0]\
if len(self.page_tree.xpath('.//data-source')) > 0\
else ET.SubElement(self.page_tree.getroot(), 'data-source')
data_node.set('file', self.faksimile_svgFile)
if xml_correction_file is not None:
data_node.set('xml-corrected-words', xml_correction_file)
def update_line_number_area(self, transkription_field, svg_tree=None):
"""Determines the width of the area where the line numbers are written in the page.source file.
"""
THRESHOLD = 0.4
if svg_tree is None:
svg_tree = ET.parse(self.source)
if len(self.line_numbers) > 1:
line_number = self.line_numbers[9]\
if transkription_field.is_page_verso() and len(self.line_numbers) > 8\
else self.line_numbers[1]
ln_nodes = [ item for item in svg_tree.iterfind('//text', svg_tree.getroot().nsmap)\
if Matrix.IS_NEARX_TRANSKRIPTION_FIELD(item.get('transform'), transkription_field)\
and LineNumber.IS_A_LINE_NUMBER(item)\
and LineNumber(raw_text_node=item, transkription_field=transkription_field).id == line_number.id ]
if len(ln_nodes) > 0:
matrix = Matrix(transform_matrix_string=ln_nodes[0].get('transform'))
if transkription_field.is_page_verso():
transkription_field.add_line_number_area_width(matrix.getX())
elif self.svg_file is not None and isfile(self.svg_file):
svg_path_tree = ET.parse(self.svg_file)
namespaces = { k if k is not None else 'ns': v for k, v in svg_path_tree.getroot().nsmap.items() }
svg_x = matrix.getX()
svg_y = self.line_numbers[1].bottom + transkription_field.ymin
use_nodes = svg_path_tree.xpath('//ns:use[@x>="{0}" and @x<="{1}" and @y>="{2}" and @y<="{3}"]'\
.format(svg_x-THRESHOLD, svg_x+THRESHOLD,svg_y-THRESHOLD, svg_y+THRESHOLD), namespaces=namespaces)
if len(use_nodes) > 0:
symbol_id = use_nodes[0].get('{%s}href' % namespaces['xlink']).replace('#', '')
d_strings = use_nodes[0].xpath('//ns:symbol[@id="{0}"]/ns:path/@d'.format(symbol_id), namespaces=namespaces)
if len(d_strings) > 0 and d_strings[0] != '':
path = parse_path(d_strings[0])
xmin, xmax, ymin, ymax = path.bbox()
width = xmax - xmin
transkription_field.add_line_number_area_width(matrix.getX() + width)
def update_page_type(self, transkription_field=None):
"""Adds a source to page and attaches it to page_tree.
"""
if transkription_field is None:
if self.source is None or not isfile(self.source):
raise FileNotFoundError('Page does not have a source!')
transkription_field = TranskriptionField(self.source)
self.page_type = Page.PAGE_VERSO\
if transkription_field.is_page_verso()\
else Page.PAGE_RECTO
self.page_tree.getroot().set('pageType', self.page_type)
def update_styles(self, words=None, manuscript=None, add_to_parents=False, partition_according_to_styles=False, create_css=False):
"""Update styles of words and add them to their transkription_positions.
Args:
add_to_parents: Add styles also to word (and if not None to manuscript).
partition_according_to_styles: Partition word if its transkription_positions have different styles.
"""
style_dictionary = {}
if words is None:
words = self.words
for word in words:
if len(word.word_parts) > 0:
self.update_styles(words=word.word_parts, manuscript=manuscript, create_css=create_css,\
add_to_parents=add_to_parents, partition_according_to_styles=partition_according_to_styles)
for transkription_position in word.transkription_positions:
if len(transkription_position.positional_word_parts) > 0:
style_class = transkription_position.positional_word_parts[0].style_class
writing_process_id = -1
for font_key in [ font_key for font_key in style_class.split(' ') if font_key in self.fontsizekey2stage_mapping.keys() ]:
writing_process_id = self.fontsizekey2stage_mapping.get(font_key)
style_class_key = (Style.remove_irrelevant_style_keys(style_class, self, extended_styles=create_css), writing_process_id)
if create_css:
if style_dictionary.get((style_class_key, word.deleted)) is None:
color = word.deletion_paths[0].style.color\
if len(word.deletion_paths) > 0 else None
style_dictionary[(style_class_key, word.deleted)] = Style.create_cls(self, style_class_key[0], manuscript=manuscript,\
- create_css=create_css, deletion_color=color )
+ create_css=create_css, deletion_color=color, writing_process_id=style_class_key[1] )
transkription_position.style = style_dictionary[(style_class_key, word.deleted)]
#print(style_dictionary[(style_class_key, word.deleted)])
else:
if style_dictionary.get(style_class_key) is None:
style_dictionary[style_class_key] = Style.create_cls(self, style_class_key[0], manuscript=manuscript, create_css=create_css)
style_dictionary[style_class_key].writing_process_id = style_class_key[1]
transkription_position.style = style_dictionary[style_class_key]
if add_to_parents and transkription_position.style not in word.styles:
word.styles.append(transkription_position.style)
if partition_according_to_styles:
word.split_according_to_status('style', splits_are_parts=True)
if manuscript is not None\
and add_to_parents:
manuscript.update_styles(*style_dictionary.values())
Index: svgscripts/datatypes/writing_process.py
===================================================================
--- svgscripts/datatypes/writing_process.py (revision 94)
+++ svgscripts/datatypes/writing_process.py (revision 95)
@@ -1,88 +1,88 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
""" This class can be used to represent a text version.
"""
# Copyright (C) University of Basel 2019 {{{1
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see 1}}}
__author__ = "Christian Steiner"
__maintainer__ = __author__
__copyright__ = 'University of Basel'
__email__ = "christian.steiner@unibas.ch"
__status__ = "Development"
__license__ = "GPL v3"
__version__ = "0.0.1"
from lxml import etree as ET
import sys
from .attachable_object import AttachableObject
sys.path.append('py2ttl')
from class_spec import SemanticClass
-class WritingProcess(AttachableObject,SemanticClass):
+class WritingProcess(AttachableObject):
"""
This class represents a stage in Nietzsche's process of writing. Each stage of writing is relative to its surrounding words.
Args:
version (int): stage in the writing process
words (list of datatypes.word.Word): all words that belong to this stage
"""
XML_TAG = 'writing-process'
FIRST_VERSION = 0
INSERTION_AND_ADDITION = 1
LATER_INSERTION_AND_ADDITION = 2
VERSION_DESCRIPTION = [ 'first version', 'insertion and addition', 'later insertion and addition' ]
def __init__(self, version=FIRST_VERSION):
self.id = version
self.version = version
self.description = WritingProcess.VERSION_DESCRIPTION[self.version]\
if self.version < len(WritingProcess.VERSION_DESCRIPTION) else ''
@classmethod
def create_writing_process_from_xml(cls, node, all_words=[]):
"""Creates a WritingProcess by instantiating a -node.
[:return:] (datatypes.writing_process) WritingProcess
"""
version = int(node.get('version'))\
if bool(node.get('version')) else cls.FIRST_VERSION
return cls(version=version)
@classmethod
def get_semantic_dictionary(cls):
""" Creates and returns a semantic dictionary as specified by SemanticClass.
"""
dictionary = {}
class_dict = cls.get_class_dictionary()
properties = {}
properties.update(cls.create_semantic_property_dictionary('version', int, cardinality=1))
properties.update(cls.create_semantic_property_dictionary('description', str, cardinality=1))
dictionary.update({cls.CLASS_KEY: class_dict})
dictionary.update({cls.PROPERTIES_KEY: properties})
return cls.return_dictionary_after_updating_super_classes(dictionary)
def attach_object_to_tree(self, target_tree):
"""Attach object to tree.
"""
obj_node = target_tree.getroot().xpath('//' + WritingProcess.XML_TAG + '[@version="%s"]' % self.version)[0] \
if(len(target_tree.getroot().xpath('//' + WritingProcess.XML_TAG + '[@version="%s"]' % self.version)) > 0) \
else ET.SubElement(target_tree.getroot(), WritingProcess.XML_TAG)
obj_node.set('version', str(self.version))
if self.description != '':
obj_node.set('description', self.description)
Index: svgscripts/datatypes/text.py
===================================================================
--- svgscripts/datatypes/text.py (revision 94)
+++ svgscripts/datatypes/text.py (revision 95)
@@ -1,114 +1,144 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
""" This class can be used to represent a text that may have standoff markup.
"""
# Copyright (C) University of Basel 2020 {{{1
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see 1}}}
__author__ = "Christian Steiner"
__maintainer__ = __author__
__copyright__ = 'University of Basel'
__email__ = "christian.steiner@unibas.ch"
__status__ = "Development"
__license__ = "GPL v3"
__version__ = "0.0.1"
from lxml import etree as ET
import re
import sys
from .attachable_object import AttachableObject
from .standoff_tag import StandoffTag
sys.path.append('py2ttl')
from class_spec import SemanticClass
class Text(AttachableObject,SemanticClass):
"""
This class represents a text that may have standoff markup.
"""
+ TAG_PATTERN = re.compile(r'([^<]*)(<[^/]+>)')
XML_TAG = 'text-with-markup'
XML_SUB_TAG = 'text'
def __init__(self, content: str, standoff_markups=None, id=0, tag=XML_TAG):
self.id = str(id)
self.tag = tag
self.content = content
self.standoff_markups = standoff_markups\
if standoff_markups is not None\
else []
def append(self, content: str) -> int:
"""Extend text with content.
[:return:] startIndex of appended content
"""
startIndex = len(self.content)
self.content += content
return startIndex
def attach_object_to_tree(self, target_tree):
"""Attach object to tree.
"""
if target_tree.__class__.__name__ == '_ElementTree':
target_tree = target_tree.getroot()
obj_node = target_tree.xpath('.//' + self.tag + '[@id="%s"]' % self.id)[0] \
if(len(target_tree.xpath('.//' + self.tag + '[@id="%s"]' % self.id)) > 0) \
else ET.SubElement(target_tree, self.tag)
obj_node.set('id', self.id)
text_node = ET.SubElement(obj_node, self.XML_SUB_TAG)
text_node.text = self.content
for index, markup in enumerate(self.standoff_markups):
markup.id = str(index)
markup.attach_object_to_tree(obj_node)
def join(self, other):
"""Join self and other.
"""
correction = self.append(' ' + other.content) + 1
for standoff_markup in other.standoff_markups:
standoff_markup.startIndex += correction
standoff_markup.endIndex += correction
self.standoff_markups += other.standoff_markups
del other
@classmethod
def create_cls_from_node(cls, node):
"""Initialize a cls from node.
[:return:] cls
"""
standoff_markups = [ StandoffTag.create_cls_from_node(item) for item in\
node.xpath('./' + '|./'.join(StandoffTag.MARKUP_STYLES)) ]
text = node.xpath('./' + cls.XML_SUB_TAG + '/text()')[0]\
if len(node.xpath('./' + cls.XML_SUB_TAG + '/text()')) > 0\
else ''
return cls(text, standoff_markups=standoff_markups, id=node.get('id'), tag=node.tag)
@classmethod
+ def create_cls_from_html(cls, html):
+ """Creates a Text from a html string.
+
+ :return: a (datatypes.text) Text
+ """
+ standoff_markups = []
+ tag_matched = re.match(cls.TAG_PATTERN, html)
+ while tag_matched is not None:
+ tag = tag_matched.group(2)
+ tags = [ t for t in tag.split('<') if t != '']
+ tags.reverse()
+ endTag = ''.join([ '' + t for t in tags])
+ startIndex = tag_matched.end() - len(tag)
+ inner_tag_matched = re.match(cls.TAG_PATTERN, html[0:startIndex])
+ html = html[0:startIndex] + html[tag_matched.end():]
+ endTag_matched = re.match(rf'(.*)({endTag})', html)
+ if endTag_matched is not None:
+ endIndex = endTag_matched.end() - len(endTag)
+ html = html[0:endIndex] + html[endTag_matched.end():]
+ for markup in [ StandoffTag.HTML_TAG_DICTIONARY['<'+tag] for tag in tags\
+ if bool(StandoffTag.HTML_TAG_DICTIONARY.get('<'+tag)) ]:
+ standoff_markups.append(StandoffTag(markup, startIndex, endIndex))
+ else:
+ msg = f'HTML string contains no ending tag for {tag}!'
+ raise Exception(msg)
+ tag_matched = re.match(cls.TAG_PATTERN, html)
+ return cls(html, standoff_markups=standoff_markups)
+
+ @classmethod
def get_semantic_dictionary(cls):
""" Creates a semantic dictionary as specified by SemanticClass.
"""
properties = {}
properties.update(cls.create_semantic_property_dictionary('content', str, cardinality=1,\
name='textHasContent', label='content of text', comment='Connects a text with its content.'))
- properties.update(cls.create_semantic_property_dictionary('standoff_markups', list,\
- name='textHasMarkup', label='standoff tag of text', comment='Connects a text with a list of standoff tags.'))
+ properties.update(cls.create_semantic_property_dictionary('standoff_markups', StandoffTag,\
+ name='textHasMarkup', label='standoff markup of text', comment='Connects a text with a list of standoff tags.'))
dictionary = { cls.CLASS_KEY: cls.get_class_dictionary(), cls.PROPERTIES_KEY: properties }
return cls.return_dictionary_after_updating_super_classes(dictionary)
Index: tests_svgscripts/test_word.py
===================================================================
--- tests_svgscripts/test_word.py (revision 94)
+++ tests_svgscripts/test_word.py (revision 95)
@@ -1,469 +1,469 @@
import unittest
from os import sep, path
import lxml.etree as ET
import sys
sys.path.append('svgscripts')
from process_words_post_merging import reset_page, update_writing_process_ids
from datatypes.box import Box
from datatypes.manuscript import ArchivalManuscriptUnity
from datatypes.matrix import Matrix
import datatypes.page
from datatypes.path import Path
from datatypes.positional_word_part import PositionalWordPart
from datatypes.style import Style
from datatypes.transkriptionField import TranskriptionField
from datatypes.transkription_position import TranskriptionPosition
from datatypes.word import Word, execute_function_on_parts, update_transkription_position_ids
from datatypes.word_deletion_path import WordDeletionPath
from datatypes.word_position import WordPosition
sys.path.append('py2ttl')
from class_spec import SemanticClass
class Page:
def __init__(self):
self.svg_file = None
def get_line_number(self, input=0):
return -1
def get_biggest_fontSize4styles(self, style_set={}):
return 7
class TestWord(unittest.TestCase):
TESTCASE = None
def setUp(self):
DATADIR = path.dirname(__file__) + sep + 'test_data'
self.test_file = DATADIR + sep + 'N_VII_1_page009.xml'
self.word_deletion_path_file = DATADIR + sep + 'N_VII_1_page138.xml'
self.pdf_xml = DATADIR + sep + 'W_I_8_page125.xml'
self.pdf_xml_source = DATADIR + sep + 'W_I_8_neu_125-01.svg'
self.word_part_objs = [{'text': 'a' }, {'text': 'b' }, {'text': 'c' }]
x = 0
for dict in self.word_part_objs:
dict['class'] = 'st22'
dict['x'] = x
dict['y'] = 11
x += 1
mylist = {'text': 'abc', 'id': '0', 'line-number': '2', 'deleted': 'true' }
word_position = TranskriptionPosition(x=0, y=1, height=10, width=10, matrix=Matrix('matrix(0.94 0.342 -0.342 0.94 0 0)'))
self.transkription_positions = [ word_position ]
self.word_node = ET.Element('word', attrib=mylist)
word_position.attach_object_to_tree(self.word_node)
x = 0
for char in mylist['text']:
ET.SubElement(self.word_node, 'part', attrib={'text': char, 'x': str(x), 'y': '11', 'class': 'st22' })
x += 1
def test_add_deletion_paths(self):
page = datatypes.page.Page(self.word_deletion_path_file, add_deletion_paths_to_words=False)
word = [ word for word in page.words if word.text == 'AufBau'][0]
#self.assertTrue(word.deleted)
self.assertTrue(len(word.word_parts) > 0)
self.assertTrue(word.word_parts[0].deleted)
word.add_deletion_paths(page.word_deletion_paths, tr_xmin=28.347656, tr_ymin=49.921875)
self.assertTrue(len(word.word_parts[0].deletion_paths) > 0)
#print(word.deletion_paths)
def test_Word_with_word_part_objs(self):
word = Word.CREATE_WORD(word_part_objs=self.word_part_objs, height=10, endX=10)
self.assertEqual(word.id, 0)
self.assertEqual(word.transkription_positions[0].bottom, 13)
self.assertEqual(word.transkription_positions[0].height, 10)
self.assertEqual(word.transkription_positions[0].top, 3)
self.assertEqual(word.transkription_positions[0].left, 0)
self.assertEqual(word.transkription_positions[0].width, 10)
self.assertEqual(word.text, 'abc')
def test_Word_with_word_node(self):
word = Word.create_cls(self.word_node)
self.assertEqual(word.id, 0)
self.assertEqual(word.deleted, True)
self.assertEqual(word.transkription_positions[0].bottom, 11)
self.assertEqual(word.transkription_positions[0].height, 10)
self.assertEqual(word.transkription_positions[0].top, 1)
self.assertEqual(word.transkription_positions[0].left, 0)
self.assertEqual(word.transkription_positions[0].width, 10)
self.assertEqual(word.text, 'abc')
self.assertEqual(word.line_number, 2)
self.assertEqual(word.transkription_positions[0].transform.isRotationMatrix(), True)
def test_attach_word_to_tree(self):
newWord = Word.CREATE_WORD(word_part_objs=self.word_part_objs, height=10, endX=10)
empty_tree = ET.ElementTree(ET.Element('page'))
newWord.attach_word_to_tree(empty_tree)
for word_node in empty_tree.getroot().xpath('//word'):
word = Word.CREATE_WORD(word_node=word_node)
self.assertEqual(word.id, 0)
self.assertEqual(word.deleted, False)
self.assertEqual(word.transkription_positions[0].bottom, 13)
self.assertEqual(word.transkription_positions[0].height, 10)
self.assertEqual(word.transkription_positions[0].top, 3)
self.assertEqual(word.transkription_positions[0].left, 0)
self.assertEqual(word.transkription_positions[0].width, 10)
self.assertEqual(word.text, 'abc')
@unittest.skipUnless(TESTCASE is None or TESTCASE == 0, 'Not testing this case')
def test_create_correction_history_case0(self):
# Case 1: whole word over box
box = Box(earlier_text='XYX')
word = Word(text='ASDF', transkription_positions=[TranskriptionPosition()])
word.word_box = box
word.create_correction_history()
self.assertEqual(word.earlier_version is None, True)
self.assertEqual(word.overwrites_word is not None, True)
@unittest.skipUnless(TESTCASE is None or TESTCASE == 1, 'Not testing this case')
def test_create_correction_history_case1(self):
# Case 2: part of word over box
box = Box(earlier_text='XYX')
partA = Word(text='A', transkription_positions=[TranskriptionPosition()])
partA.word_box = box
partB = Word(text='SDF', transkription_positions=[TranskriptionPosition()])
word = Word(text='ASDF', word_parts=[ partA, partB])
word.create_correction_history()
self.assertEqual(word.earlier_version is None, True)
self.assertEqual(word.word_parts[0].overwrites_word is not None, True)
@unittest.skipUnless(TESTCASE is None or TESTCASE == 2, 'Not testing this case')
def test_create_correction_history_case3(self):
# Case 3: part of word over box, word under box is part of earlier version
box = Box(earlier_text='XYX')
tp0 = TranskriptionPosition()
tp0.style = Style(writing_process_id=0)
tp1 = TranskriptionPosition()
tp1.style = Style(writing_process_id=1)
partA = Word(id=0, text='Test', transkription_positions=[ tp0])
partB = Word(id=1, text='er', transkription_positions=[ tp1])
partB.word_box = box
word = Word(text='Tester', writing_process_id=1, word_parts=[ partA, partB ] )
word.create_correction_history(box_style=tp0.style)
self.assertEqual(word.text, 'Tester')
self.assertEqual(word.earlier_version is not None, True)
self.assertEqual(word.earlier_version.text, 'TestXYX')
self.assertEqual(word.word_parts[1].isTransformationOfWord, word.earlier_version.word_parts[1])
@unittest.skipUnless(TESTCASE is None or TESTCASE == 3, 'Not testing this case')
def test_create_correction_history_case4(self):
# Case 4: part of word is deleted
partA = Word(id=0, text='A', deleted=True, transkription_positions=[TranskriptionPosition()])
partB = Word(id=1, text='SDF', transkription_positions=[TranskriptionPosition()])
word = Word(text='ASDF', word_parts=[ partA, partB])
word.create_correction_history()
self.assertEqual(word.earlier_version is not None, True)
self.assertEqual(word.word_parts[0].isDeletionOfWord is not None, True)
self.assertEqual(word.word_parts[0].isDeletionOfWord, word.earlier_version.word_parts[0])
self.assertEqual(word.edited_text, 'SDF')
@unittest.skipUnless(TESTCASE is None or TESTCASE == 4, 'Not testing this case')
def test_create_correction_history_case5(self):
tp0 = TranskriptionPosition()
tp0.style = Style(writing_process_id=0)
tp1 = TranskriptionPosition()
tp1.style = Style(writing_process_id=1)
partA = Word(id=0, text='Test', transkription_positions=[ tp0])
partB = Word(id=1, text='er', transkription_positions=[ tp1])
word = Word(text='Tester', word_parts=[ partA, partB ] )
word.create_correction_history()
self.assertEqual(word.earlier_version is not None, True)
self.assertEqual(word.word_parts[1].extendsEarlierVersion, True)
self.assertEqual(word.word_parts[1].isExtensionOfWord, word.earlier_version)
#@unittest.skipUnless(TESTCASE is None or TESTCASE == 5, 'Not testing this case')
#@unittest.skip('case tested, relies on a local xml file')
def test_create_correction_history_case_full(self):
page = datatypes.page.Page('xml/N_VII_1_page138.xml')
manuscript = ArchivalManuscriptUnity()
reset_page(page)
update_writing_process_ids(page)
word = [ word for word in page.words if word.text == 'Verschiedenes' and word.line_number == 4 ][0]
wordAufBau = [ word for word in page.words if word.text == 'AufBau' ][0]
#page.words = [ word ]
page.update_styles(manuscript=manuscript, partition_according_to_styles=True)
word.word_parts[0].transkription_positions[0].has_box = Box(earlier_text='v')
self.assertEqual(len(word.word_parts), 2)
word_over_box = word._get_partial_word_over_box()
update_transkription_position_ids(word)
word.create_correction_history(page)
self.assertEqual(word.writing_process_id, 1)
self.assertEqual(word.earlier_version is not None, True)
self.assertEqual(word.earlier_version.text, 'verschiedenes')
#print(word.earlier_version.id, [ (w.id, w.text) for w in word.earlier_version.word_parts ])
empty_tree = ET.ElementTree(ET.Element('page'))
word_node = word.attach_word_to_tree(empty_tree)
#print(ET.dump(word_node))
"""
self.assertEqual(word.word_parts[0].isDeletionOfWord, word.earlier_version.word_parts[0])
self.assertEqual(word.word_parts[1].isTransformationOfWord, word.earlier_version.word_parts[1])
self.assertEqual(word.word_parts[1].overwrites_word is not None, True)
"""
word = wordAufBau
page.words = [ word ]
page.update_styles(manuscript=manuscript, partition_according_to_styles=True)
word.word_parts[0].deleted = True
word.word_parts[1].transkription_positions[0].has_box = Box(earlier_text='b')
self.assertEqual(len(word.word_parts), 3)
word_over_box = word._get_partial_word_over_box()
self.assertEqual(len(word.word_parts), 3)
update_transkription_position_ids(word)
word.create_correction_history(page)
self.assertEqual(word.writing_process_id, 2)
self.assertEqual(word.earlier_version is not None, True)
self.assertEqual(word.text, 'AufBau')
self.assertEqual(word.edited_text, 'Bau')
self.assertEqual(word.earlier_version.text, 'Aufbau')
self.assertEqual(word.word_parts[0].isDeletionOfWord, word.earlier_version.word_parts[0])
self.assertEqual(word.word_parts[1].isTransformationOfWord, word.earlier_version.word_parts[1])
self.assertEqual(word.word_parts[1].overwrites_word is not None, True)
empty_tree = ET.ElementTree(ET.Element('page'))
word_node = word.attach_word_to_tree(empty_tree)
#print(ET.dump(word_node))
newWord = Word.create_cls(word_node)
#@unittest.skip('')
def test_earlier_version(self):
partA = Word(id=0, text='A', deleted=True, transkription_positions=[TranskriptionPosition()])
partB = Word(id=1, text='SDF', transkription_positions=[TranskriptionPosition()])
word = Word(text='ASDF', word_parts=[ partA, partB])
earlier_version = word.create_earlier_version()
self.assertEqual(earlier_version is not None, True)
self.assertEqual(word.word_parts[0].isDeletionOfWord is not None, True)
self.assertEqual(word.word_parts[0].isDeletionOfWord, earlier_version.word_parts[0])
def test_undo_partitioning(self):
tps = []
for i, xy in enumerate([ 3, 4, 5 ]):
tps.append(TranskriptionPosition(id=i, x=xy, y=xy, height=10, width=10))
partA = Word(id=0, text='Auf', writing_process_id=1, deleted=True, transkription_positions=[ tps[0]])
partB = Word(id=1, text='B', writing_process_id=2, transkription_positions=[tps[1]])
partC = Word(id=2, text='au', writing_process_id=1,transkription_positions=[tps[2]])
word = Word(text='Aufbau', writing_process_id=2, word_parts=[ partA, partB, partC ] )
word.undo_partitioning()
self.assertEqual(len(word.transkription_positions), len(tps))
self.assertEqual(len(word.word_parts), 0)
"""
page = datatypes.page.Page('xml/N_VII_1_page138.xml')
word = page.words[77]
word.undo_partitioning()
self.assertEqual(len(word.word_parts), 0)
self.assertEqual(len(word.transkription_positions), 3)
update_transkription_position_ids(word)
empty_tree = ET.ElementTree(ET.Element('page'))
word_node = word.attach_word_to_tree(empty_tree)
print(ET.dump(word_node))
"""
def test_split(self):
page = Page()
pwps = PositionalWordPart.CREATE_SIMPLE_POSITIONAL_WORD_PART_LIST(page, self.word_part_objs)
transkription_positions = TranskriptionPosition.CREATE_TRANSKRIPTION_POSITION_LIST_FROM_PWPS(pwps)
word = Word(text=''.join([pwp.text for pwp in pwps]), transkription_positions=transkription_positions)
previousWord, currentWord, nextWord = word.split('b')
self.assertEqual(previousWord.id, 0)
self.assertEqual(previousWord.text, 'a')
self.assertEqual(currentWord.id, 1)
self.assertEqual(nextWord.id, 2)
word = Word(text=''.join([pwp.text for pwp in pwps]), transkription_positions=transkription_positions)
previousWord, currentWord, nextWord = word.split('bc')
self.assertEqual(previousWord.id, 0)
self.assertEqual(previousWord.text, 'a')
self.assertEqual(currentWord.id, 1)
word = Word(text=''.join([pwp.text for pwp in pwps]), transkription_positions=transkription_positions)
previousWord, currentWord, nextWord = word.split('ab', start_id=10)
self.assertEqual(currentWord.id, 10)
self.assertEqual(currentWord.text, 'ab')
self.assertEqual(currentWord.transkription_positions[0].width, 2.1)
self.assertEqual(nextWord.id, 11)
self.assertEqual(nextWord.transkription_positions[0].width, 5.2)
word_part_objs=[{'text': 'x', 'class':'st22', 'x': 0, 'y': 0},\
{'text': 'Insofern', 'class':'st22', 'x': 1, 'y': 0},\
{'text': 'x', 'class':'st22', 'x': 10, 'y': 0}]
pwps = PositionalWordPart.CREATE_SIMPLE_POSITIONAL_WORD_PART_LIST(page, word_part_objs)
transkription_positions = TranskriptionPosition.CREATE_TRANSKRIPTION_POSITION_LIST_FROM_PWPS(pwps)
word = Word(text=''.join([pwp.text for pwp in pwps]), transkription_positions=transkription_positions)
with self.assertWarns(Warning):
previousWord, currentWord, nextWord = word.split('Insofer')
word_part_objs=[{'text': 'xInsofern', 'class':'st22', 'x': 0, 'y': 0}]
pwps = PositionalWordPart.CREATE_SIMPLE_POSITIONAL_WORD_PART_LIST(page, word_part_objs)
transkription_positions = TranskriptionPosition.CREATE_TRANSKRIPTION_POSITION_LIST_FROM_PWPS(pwps)
word = Word(text=''.join([pwp.text for pwp in pwps]), transkription_positions=transkription_positions)
with self.assertWarns(Warning):
previousWord, currentWord, nextWord = word.split('Insofern')
def test_join(self):
word = Word.CREATE_WORD(word_part_objs=self.word_part_objs, height=10, endX=10)
other_word = Word.CREATE_WORD(word_part_objs=[{'text': '.', 'class':'st22', 'x': 3, 'y': 11}])
word.join(other_word)
self.assertEqual(word.text, 'abc.')
other_word = Word.CREATE_WORD(word_part_objs=[{'text': '.', 'class':'st22', 'x': 3, 'y': 11}])
word.join(other_word, append_at_end_of_new_word=False)
self.assertEqual(word.text, '.abc.')
"""
tree = ET.ElementTree(ET.Element('page'))
word.attach_word_to_tree(tree)
print(ET.dump(tree.getroot()))
"""
def test_get_semanticAndDataDict(self):
dictionary = Word.get_semantic_dictionary()
- print(dictionary)
+ #print(dictionary)
info_dict = dictionary['properties'].get('isDeletionOfWord')
self.assertEqual(SemanticClass.SUPER_PROPERTY in info_dict.keys(), True)
super_info_dict = info_dict[SemanticClass.SUPER_PROPERTY]
#print(info_dict[SemanticClass.SUPER_PROPERTY].get(SemanticClass.PROPERTY_NAME))
def test_simplify_transkription_positions(self):
node_string = """ """
nodeA = ET.fromstring(node_string)
node_string = """
"""
nodeB = ET.fromstring(node_string)
word = Word(text="Si", transkription_positions=[ TranskriptionPosition(node=nodeA), TranskriptionPosition(node=nodeB) ])
self.assertEqual(len(word.transkription_positions), 2)
word.simplify_transkription_positions()
self.assertEqual(len(word.transkription_positions), 1)
word = Word(text="Si", transkription_positions=[ TranskriptionPosition(node=nodeA), TranskriptionPosition(node=nodeB) ])
word.transkription_positions[1].writing_process_id = -1
word.simplify_transkription_positions()
self.assertEqual(len(word.transkription_positions), 1)
self.assertEqual(word.transkription_positions[0].writing_process_id, 0)
"""
tree = ET.ElementTree(ET.Element('page'))
word.attach_word_to_tree(tree)
print(ET.dump(tree.getroot()))
"""
def test_partition(self):
page = datatypes.page.Page(self.test_file)
word = page.words[67]
self.assertEqual(word.belongs_to_multiple_writing_processes(), True)
word.partition_according_to_writing_process_id()
self.assertEqual(len(word.word_parts), 3)
self.assertEqual(word.belongs_to_multiple_writing_processes(), False)
self.assertEqual(word.belongs_to_multiple_writing_processes(include_parts=True), True)
empty_tree = ET.ElementTree(ET.Element('page'))
word_node = word.attach_word_to_tree(empty_tree)
newWord = Word.create_cls(word_node)
self.assertEqual(len(newWord.word_parts), 3)
#print(ET.dump(empty_tree.getroot()))
def test_partition_deletion(self):
page = datatypes.page.Page(self.test_file)
word = page.words[67]
for transkription_position in word.transkription_positions:
transkription_position.deleted = transkription_position.writing_process_id == 1
self.assertEqual(word.has_mixed_status('deleted'), True)
word.partition_according_to_deletion()
self.assertEqual(len(word.word_parts), 3)
self.assertEqual(word.has_mixed_status('deleted'), False)
self.assertEqual(word.has_mixed_status('deleted', include_parts=True), True)
page = datatypes.page.Page(self.test_file)
word = page.words[67]
word.partition_according_to_writing_process_id()
#print([(word.text, word.deleted) for word in word.word_parts])
word.word_parts[1].transkription_positions[1].deleted = True
word.partition_according_to_deletion()
self.assertEqual(len(word.word_parts), 4)
#print([(word.text, word.deleted) for word in word.word_parts])
partA = Word(text='A', deleted=True)
partB = Word(text='SDF', deleted=False)
word = Word(text='ASDF', word_parts=[ partA, partB])
self.assertEqual(word.has_mixed_status('deleted', include_parts=True), True)
def test_execute_function_on_parts(self):
page = datatypes.page.Page(self.test_file)
word_parts = [ page.words[67], page.words[68] ]
word_parts, none = execute_function_on_parts(word_parts, 'partition_according_to_writing_process_id')
self.assertEqual(len(word_parts) == 4, True)
def test_process_word_boxes(self):
page = datatypes.page.Page(self.pdf_xml)
page.source = self.pdf_xml_source
page.update_styles(partition_according_to_styles=True)
tr = TranskriptionField(page.source)
box_path_d = ['M 598.11,626.565 L 603.557,626.565 L 603.557,632.565 L 598.11,632.565 L 598.11,626.565',\
'M 557.443,683.44 L 574.182,683.44 L 574.182,694.815 L 557.443,694.815 L 557.443,683.44',\
'M 404.193,659.565 L 407.80699999999996,659.565 L 407.80699999999996,668.94 L 404.193,668.94 L 404.193,659.565',\
'M 587.932,634.065 L 598.318,634.065 L 598.318,643.19 L 587.932,643.19 L 587.932,634.065',\
'M 570.443,221.315 L 576.557,221.315 L 576.557,230.065 L 570.443,230.065 L 570.443,221.315']
box_paths = [ Box(d_string=d_string, earlier_text='test') for d_string in box_path_d ]
indices = [30, 277, 288, 297, 321]
for word_id, index in enumerate(indices):
word_over_box = page.words[index].process_boxes(box_paths, tr_xmin=tr.xmin, tr_ymin=tr.ymin)
self.assertEqual(word_over_box is not None, True)
self.assertEqual(word_over_box == page.words[index] or word_over_box in page.words[index].word_parts, True)
#self.assertEqual(word_over_box in page.words[index].word_parts, True)
def test_process_word_several_boxesOn1LIne(self):
page = datatypes.page.Page(self.pdf_xml)
page.source = self.pdf_xml_source
for word in page.words:
word.set_writing_process_id_to_transkription_positions(page)
word.partition_according_to_writing_process_id()
tr = TranskriptionField(page.source)
box_path_d = ['M 598.11,626.565 L 603.557,626.565 L 603.557,632.565 L 598.11,632.565 L 598.11,626.565',\
'M 557.443,683.44 L 574.182,683.44 L 574.182,694.815 L 557.443,694.815 L 557.443,683.44',\
'M 404.193,659.565 L 407.80699999999996,659.565 L 407.80699999999996,668.94 L 404.193,668.94 L 404.193,659.565',\
'M 587.932,634.065 L 598.318,634.065 L 598.318,643.19 L 587.932,643.19 L 587.932,634.065',\
'M 570.443,221.315 L 576.557,221.315 L 576.557,230.065 L 570.443,230.065 L 570.443,221.315']
box_paths = [ Box(d_string=d_string, earlier_text='test') for d_string in box_path_d ]
indices = [30, 277, 288, 297, 321]
empty_tree = ET.ElementTree(ET.Element('page'))
for word_id, index in enumerate(indices):
word_over_box = page.words[index].process_boxes(box_paths, tr_xmin=tr.xmin, tr_ymin=tr.ymin)
self.assertEqual(word_over_box is not None, True)
def test_split_according_to_status(self):
page = datatypes.page.Page(self.test_file)
word = page.words[67]
for transkription_position in word.transkription_positions:
transkription_position.text = 'asdf'\
if transkription_position.writing_process_id == 1\
else word.text
self.assertEqual(word.has_mixed_status('text'), True)
new_words = word.split_according_to_status('text')
#print([word.text for word in new_words ])
self.assertEqual(len(new_words) > 1, True)
self.assertEqual(new_words[0].id, word.id)
self.assertEqual(new_words[0].deleted, word.deleted)
self.assertEqual(new_words[1].id, word.id+1)
manuscript = ArchivalManuscriptUnity()
page = datatypes.page.Page(self.test_file)
word = page.words[67]
page.words = [ word ]
page.update_styles(manuscript=manuscript)
new_words = word.split_according_to_status('style', splits_are_parts=True)
self.assertEqual(len(word.word_parts), 3)
def test__create_new_word(self):
manuscript = ArchivalManuscriptUnity()
page = datatypes.page.Page(self.test_file)
word = page.words[67]
page.words = [ word ]
page.update_styles(manuscript=manuscript)
newWord = word._create_new_word([ word.transkription_positions[0] ], 'style')
for key in Word.COPY_PROPERTY_KEY:
self.assertEqual(newWord.__dict__[key], word.__dict__[key])
self.assertEqual(len(newWord.styles), 1)
def test__get_partial_word_over_box(self):
word = Word(text='test', transkription_positions=[ TranskriptionPosition(id=0), TranskriptionPosition(id=1) ])
word.transkription_positions[0].has_box = Box(earlier_text='asdf')
word._get_partial_word_over_box()
self.assertEqual(len(word.word_parts), 2)
partA = Word(id=0, text='A', transkription_positions=[TranskriptionPosition()])
partB = Word(id=1, text='SDF', transkription_positions=[TranskriptionPosition(), TranskriptionPosition(id=1)])
partB.transkription_positions[0].has_box = Box(earlier_text='asdf')
word = Word(text='ASDF', word_parts=[ partA, partB])
word._get_partial_word_over_box()
self.assertEqual(len(word.word_parts), 2)
if __name__ == "__main__":
unittest.main()
Index: tests_svgscripts/test_footnotes.py
===================================================================
--- tests_svgscripts/test_footnotes.py (revision 94)
+++ tests_svgscripts/test_footnotes.py (revision 95)
@@ -1,53 +1,53 @@
import unittest
from os import sep, path, remove
from os.path import isdir, isfile, dirname
import shutil
import sys
import lxml.etree as ET
import warnings
import sys
sys.path.append('svgscripts')
from datatypes.footnotes import FootnoteColumns, extract_footnotes, extract_footnotes_as_strings, UNITTESTING
from datatypes.matrix import Matrix
from datatypes.page import Page
from datatypes.positional_word_part import PositionalWordPart
from datatypes.transkriptionField import TranskriptionField
class TestExtractFootnotes(unittest.TestCase):
def setUp(self):
UNITTESTING = True
DATADIR = path.dirname(__file__) + sep + 'test_data'
self.test_footnote = DATADIR + sep + 'W_I_8_neu_125-01.svg'
self.test_footnote_verso = DATADIR + sep + 'N_VII_1_xp5_4_page5.svg'
self.test_footnote_recto = DATADIR + sep + 'N_VII_1_xp5_4_page6.svg'
self.test_footnote_multi = DATADIR + sep + 'N_VII_1_xp5_4_page13.svg'
self.test_footnote_multi_xml = DATADIR + sep + 'N_VII_1_page013.xml'
def test_extract_footnotes(self):
footnotes = extract_footnotes_as_strings(svg_file=self.test_footnote_multi, contains_string='Anschlußzeichen')
self.assertEqual(len(footnotes), 4)
page = Page(self.test_footnote_multi_xml)
footnotes = extract_footnotes(page, svg_file=self.test_footnote_multi, contains_string='Anschlußzeichen')
self.assertEqual(len(footnotes), 4)
footnotes = extract_footnotes(page, svg_file=self.test_footnote_multi)
self.assertEqual(len(footnotes), 7)
- print([ footnote for footnote in footnotes if footnote.content.startswith('23: Philosophen')])
+ #print([ footnote for footnote in footnotes if footnote.content.startswith('23: Philosophen')])
def test_columns(self):
svg_tree = ET.parse(self.test_footnote_multi)
transkription_field = TranskriptionField(self.test_footnote_multi)
nodes_in_footnote_area = [ item for item in filter(lambda x: Matrix.IS_IN_FOOTNOTE_AREA(x.get('transform'), transkription_field),\
svg_tree.getroot().iterfind('.//text', svg_tree.getroot().nsmap))]
bottom_values = sorted([ bottom_value for bottom_value in set(Matrix(transform_matrix_string=item.get('transform')).getY() for item in nodes_in_footnote_area) ])
footnote_columns = FootnoteColumns(svg_tree.getroot().nsmap, nodes_in_footnote_area, bottom_values, None)
self.assertEqual(len(footnote_columns.footnote_columns), 2)
footnote_columns.register_index(184)
footnote_columns.append('asdf')
self.assertEqual(len(footnote_columns.footnote_columns[0]), 1)
#print(footnote_columns.footnote_columns[0])
if __name__ == "__main__":
unittest.main()
Index: tests_svgscripts/test_description.py
===================================================================
--- tests_svgscripts/test_description.py (revision 0)
+++ tests_svgscripts/test_description.py (revision 95)
@@ -0,0 +1,40 @@
+import unittest
+from os import sep, path
+from os.path import dirname, basename, isfile, isdir
+import lxml.etree as ET
+import sys
+
+sys.path.append('svgscripts')
+from datatypes.page import Page
+from datatypes.standoff_tag import StandoffTag
+from datatypes.text import Text
+from datatypes.description import Description
+
+class TestText(unittest.TestCase):
+ def setUp(self):
+ DATADIR = dirname(__file__) + sep + 'test_data'
+ if not isdir(DATADIR):
+ DATADIR = dirname(dirname(__file__)) + sep + 'test_data'
+ self.test_file = DATADIR + sep + 'test.xml'
+ self.test_svg_file = DATADIR + sep + 'test421.svg'
+ self.pdf_xml = DATADIR + sep + 'W_I_8_page125.xml'
+ self.xml_file = DATADIR + sep + 'N_VII_1_page005.xml'
+ self.xml_fileB = DATADIR + sep + 'N_VII_1_page006.xml'
+ self.pdf_xml_source = DATADIR + sep + 'W_I_8_neu_125-01.svg'
+ self.test_page = DATADIR + sep + 'N_VII_1_page001.xml'
+ self.test_manuscript = DATADIR + sep + 'N_VII_1.xml'
+
+ def test_semantic(self):
+ pass
+ #print(Text.get_semantic_dictionary())
+
+ def test_create_cls_from_node(self):
+ tree = ET.parse(self.test_manuscript)
+ node = tree.xpath('description/earlierDescription[@id="1"]/manuscriptDescription')[0]
+ description = Description.create_cls_from_node(node)
+ #print(description.content)
+ self.assertTrue(len(description.standoff_markups) > 0)
+
+
+if __name__ == "__main__":
+ unittest.main()
Index: tests_svgscripts/test_text.py
===================================================================
--- tests_svgscripts/test_text.py (revision 94)
+++ tests_svgscripts/test_text.py (revision 95)
@@ -1,55 +1,65 @@
import unittest
from os import sep, path
from os.path import dirname, basename, isfile, isdir
import lxml.etree as ET
import sys
sys.path.append('svgscripts')
from datatypes.page import Page
from datatypes.standoff_tag import StandoffTag
from datatypes.text import Text
class TestText(unittest.TestCase):
def setUp(self):
DATADIR = dirname(__file__) + sep + 'test_data'
if not isdir(DATADIR):
DATADIR = dirname(dirname(__file__)) + sep + 'test_data'
self.test_file = DATADIR + sep + 'test.xml'
self.test_svg_file = DATADIR + sep + 'test421.svg'
self.pdf_xml = DATADIR + sep + 'W_I_8_page125.xml'
self.xml_file = DATADIR + sep + 'N_VII_1_page005.xml'
self.xml_fileB = DATADIR + sep + 'N_VII_1_page006.xml'
self.pdf_xml_source = DATADIR + sep + 'W_I_8_neu_125-01.svg'
self.test_page = DATADIR + sep + 'N_VII_1_page001.xml'
self.test_manuscript = DATADIR + sep + 'N_VII_1.xml'
def test_semantic(self):
pass
#print(Text.get_semantic_dictionary())
def test_attach_to_tree(self):
empty_tree = ET.ElementTree(ET.Element('page'))
content = 'asdf'
standoff_tag = StandoffTag('bold', 0, len(content)-1)
standoff_tag2 = StandoffTag('italic', int(len(content)/2), len(content),id='1')
text = Text(content, standoff_markups=[ standoff_tag, standoff_tag2 ])
text.attach_object_to_tree(empty_tree)
text = Text.create_cls_from_node(empty_tree.xpath('//' + Text.XML_TAG)[0])
self.assertEqual(text.content, content)
self.assertEqual(text.id, '0')
self.assertEqual(len(text.standoff_markups), 2)
#print(ET.dump(empty_tree.getroot()))
def test_join(self):
content = 'asdfa'
standoff_tag = StandoffTag('bold', 0, len(content)-2)
standoff_tag2 = StandoffTag('italic', int(len(content)/2), len(content)-1,id='1')
textA = Text(content, standoff_markups=[ standoff_tag, standoff_tag2 ])
standoff_tag = StandoffTag('bold', 0, len(content)-2)
standoff_tag2 = StandoffTag('italic', int(len(content)/2), len(content)-1,id='1')
textB = Text(content, standoff_markups=[ standoff_tag, standoff_tag2 ])
textA.join(textB)
self.assertEqual(textA.content, content + ' ' + content)
+ def test_create_from_html(self):
+ html = 'asdf test the best'
+ text = Text.create_cls_from_html(html)
+ self.assertEqual(len(text.standoff_markups), 3)
+ self.assertEqual(text.standoff_markups[0].startIndex, text.standoff_markups[1].startIndex)
+ self.assertEqual(text.standoff_markups[0].endIndex, text.standoff_markups[1].endIndex)
+ html = 'asdf test'
+ text = Text.create_cls_from_html(html)
+ self.assertEqual(len(text.standoff_markups), 1)
+
if __name__ == "__main__":
unittest.main()
Index: tests_svgscripts/test_earlier_description.py
===================================================================
--- tests_svgscripts/test_earlier_description.py (revision 0)
+++ tests_svgscripts/test_earlier_description.py (revision 95)
@@ -0,0 +1,39 @@
+import unittest
+from os import sep, path
+from os.path import dirname, basename, isfile, isdir
+import lxml.etree as ET
+import sys
+
+sys.path.append('svgscripts')
+from datatypes.page import Page
+from datatypes.earlier_description import EarlierDescription
+
+class TestEarlierDescription(unittest.TestCase):
+ def setUp(self):
+ DATADIR = dirname(__file__) + sep + 'test_data'
+ if not isdir(DATADIR):
+ DATADIR = dirname(dirname(__file__)) + sep + 'test_data'
+ self.test_file = DATADIR + sep + 'test.xml'
+ self.test_svg_file = DATADIR + sep + 'test421.svg'
+ self.pdf_xml = DATADIR + sep + 'W_I_8_page125.xml'
+ self.xml_file = DATADIR + sep + 'N_VII_1_page005.xml'
+ self.xml_fileB = DATADIR + sep + 'N_VII_1_page006.xml'
+ self.pdf_xml_source = DATADIR + sep + 'W_I_8_neu_125-01.svg'
+ self.test_page = DATADIR + sep + 'N_VII_1_page001.xml'
+ self.test_manuscript = DATADIR + sep + 'N_VII_1.xml'
+
+ def test_semantic(self):
+ pass
+ #print(EarlierDescription.get_semantic_dictionary())
+
+ def test_create_cls_from_node(self):
+ tree = ET.parse(self.test_manuscript)
+ node = tree.xpath('description/earlierDescription[@id="1"]')[0]
+ description = EarlierDescription.create_cls_from_node(node)
+ self.assertTrue(description.author is not None)
+ self.assertTrue(description.citation is not None)
+ self.assertTrue(len(description.standoff_markups) > 0)
+
+
+if __name__ == "__main__":
+ unittest.main()
Index: tests_svgscripts/test_word_deletion_path.py
===================================================================
--- tests_svgscripts/test_word_deletion_path.py (revision 94)
+++ tests_svgscripts/test_word_deletion_path.py (revision 95)
@@ -1,40 +1,40 @@
import unittest
from os import sep, path
from os.path import isdir, dirname, basename
import lxml.etree as ET
from svgpathtools.parser import parse_path
from svgpathtools.path import Line
from svgpathtools.path import Path as SVGPath
import sys
import sys
sys.path.append('svgscripts')
from datatypes.page import Page
from datatypes.word_deletion_path import WordDeletionPath
from datatypes.transkription_position import TranskriptionPosition
from datatypes.positional_word_part import PositionalWordPart
class TestPath(unittest.TestCase):
def setUp(self):
DATADIR = path.dirname(__file__) + sep + 'test_data'
self.xml_file = DATADIR + sep + 'N_VII_1_page138.xml'
def test_init(self):
page = Page(self.xml_file)
node = page.page_tree.xpath(WordDeletionPath.XML_TAG)[0]
path = WordDeletionPath.create_cls(node, page)
- print(path)
+ #print(path)
def test_get_semantic_dict(self):
#print(WordDeletionPath.get_semantic_dictionary())
pass
if __name__ == "__main__":
unittest.main()
Index: tests_svgscripts/test_writing_process.py
===================================================================
--- tests_svgscripts/test_writing_process.py (revision 94)
+++ tests_svgscripts/test_writing_process.py (revision 95)
@@ -1,48 +1,45 @@
import unittest
from os import sep, path
from os.path import isdir, dirname
import lxml.etree as ET
import sys
import sys
sys.path.append('svgscripts')
dir_changed = False
if not isdir('datatypes'):
sys.path.append(dirname(sys.path[0]))
dir_changed = True
from datatypes.writing_process import WritingProcess
class TestWritingProcess(unittest.TestCase):
def setUp(self):
DATADIR = dirname(__file__) + sep + 'test_data'
if not isdir(DATADIR):
DATADIR = dirname(dirname(__file__)) + sep + 'test_data'
self.test_target_file = DATADIR + sep + 'test.xml'
def test_init(self):
wp = WritingProcess()
self.assertEqual(wp.version, 0)
self.assertEqual(wp.description, WritingProcess.VERSION_DESCRIPTION[0])
def test_attachable(self):
empty_tree = ET.ElementTree(ET.Element('page'))
wp = WritingProcess()
wp.attach_object_to_tree(empty_tree)
wp_node = empty_tree.xpath('//' + WritingProcess.XML_TAG)
self.assertEqual(len(wp_node), 1)
self.assertEqual(wp_node[0].get('version'), str(wp.version))
def test_create_writing_process_from_xml(self):
empty_tree = ET.ElementTree(ET.Element('page'))
wp = WritingProcess()
wp.attach_object_to_tree(empty_tree)
wp_node = empty_tree.xpath('//' + WritingProcess.XML_TAG)
wp2 = WritingProcess.create_writing_process_from_xml(wp_node[0])
self.assertEqual(wp2.version, wp.version)
- def test_semantics(self):
- dict = WritingProcess.get_semantic_dictionary()
- #print(dict)
if __name__ == "__main__":
unittest.main()
Index: tests_svgscripts/test_data/N_VII_1_page006.xml
===================================================================
--- tests_svgscripts/test_data/N_VII_1_page006.xml (revision 94)
+++ tests_svgscripts/test_data/N_VII_1_page006.xml (revision 95)
@@ -1,1276 +1,1276 @@
svgWordPosition2019-08-02 15:17:372019-08-02 15:17:372019-08-02 15:30:592019-08-02 15:30:59
- 2020-05-13 17:06:34
+ 2020-06-05 11:30:54
Index: tests_svgscripts/test_manuscript.py
===================================================================
--- tests_svgscripts/test_manuscript.py (revision 94)
+++ tests_svgscripts/test_manuscript.py (revision 95)
@@ -1,54 +1,56 @@
import unittest
from os import sep, path
from os.path import basename, dirname, isfile
import lxml.etree as ET
import sys
sys.path.append('svgscripts')
from datatypes.manuscript import ArchivalManuscriptUnity
from datatypes.color import Color
class TestArchivalManuscriptUnity(unittest.TestCase):
def setUp(self):
ArchivalManuscriptUnity.UNITTESTING = True
DATADIR = dirname(__file__) + sep + 'test_data'
self.test_manuscript = DATADIR + sep + 'N_VII_1.xml'
def test_init(self):
title = 'Test I 1'
manuscript = ArchivalManuscriptUnity(title=title)
self.assertEqual(manuscript.title, title)
def test_get_semanticAndDataDict(self):
semantic_dict = ArchivalManuscriptUnity.get_semantic_dictionary()
#print(semantic_dict)
def test_create_cls(self):
manuscript = ArchivalManuscriptUnity.create_cls(self.test_manuscript)
+ self.assertTrue(manuscript.description is not None)
+ self.assertEqual(len(manuscript.earlier_descriptions), 2)
self.assertEqual(manuscript.title, basename(self.test_manuscript).replace('.xml','').replace('_', ' '))
self.assertEqual(manuscript.manuscript_type, 'Notizheft')
self.assertEqual(len(manuscript.pages), 4)
manuscript = ArchivalManuscriptUnity.create_cls(self.test_manuscript, page_status_list=['faksimile merged'])
self.assertEqual(len(manuscript.pages), 2)
manuscript = ArchivalManuscriptUnity.create_cls(self.test_manuscript, page_status_list=['faksimile merged', 'words processed'])
self.assertEqual(len(manuscript.pages), 1)
manuscript = ArchivalManuscriptUnity.create_cls(self.test_manuscript, page_xpath='//pages/page/@output')
self.assertEqual(len(manuscript.pages), 4)
def test_get_color(self):
color = Color()
manuscript = ArchivalManuscriptUnity.create_cls(self.test_manuscript)
self.assertEqual(manuscript.get_color(color.hex_color) is not None, True)
self.assertEqual(manuscript.get_color("#F7F6F5") is None, True)
def test_update_colors(self):
color = Color()
manuscript = ArchivalManuscriptUnity.create_cls(self.test_manuscript)
manuscript.update_colors(color)
self.assertEqual(len(manuscript.colors), 2)
#print(ET.dump(manuscript.manuscript_tree.getroot()))
if __name__ == "__main__":
unittest.main()
Index: tests_svgscripts/test_style.py
===================================================================
--- tests_svgscripts/test_style.py (revision 94)
+++ tests_svgscripts/test_style.py (revision 95)
@@ -1,96 +1,98 @@
import unittest
from os import sep, path
from os.path import dirname, basename, isfile, isdir
import lxml.etree as ET
import sys
sys.path.append('svgscripts')
from datatypes.color import Color
from datatypes.manuscript import ArchivalManuscriptUnity
from datatypes.page import Page
from datatypes.style import Style
class TestStyle(unittest.TestCase):
def setUp(self):
DATADIR = dirname(__file__) + sep + 'test_data'
if not isdir(DATADIR):
DATADIR = dirname(dirname(__file__)) + sep + 'test_data'
self.test_file = DATADIR + sep + 'test.xml'
self.test_svg_file = DATADIR + sep + 'test421.svg'
self.pdf_xml = DATADIR + sep + 'W_I_8_page125.xml'
self.xml_file = DATADIR + sep + 'N_VII_1_page005.xml'
self.xml_fileB = DATADIR + sep + 'N_VII_1_page006.xml'
self.pdf_xml_source = DATADIR + sep + 'W_I_8_neu_125-01.svg'
self.test_page = DATADIR + sep + 'N_VII_1_page001.xml'
self.test_manuscript = DATADIR + sep + 'N_VII_1.xml'
def test_create_cls(self):
page = Page(self.test_page)
style_string = "st11 st10 st5"
style = Style.create_cls(page, style_string)
self.assertEqual(style.font_family, 'Weidemann-Book')
self.assertEqual(style.color.hex_color, "#DADADA")
self.assertEqual(style.writing_instrument, 'schwarze Tinte')
style_string = "st11 st10"
style = Style.create_cls(page, style_string)
self.assertEqual(style.font_family, 'Weidemann-Book')
self.assertEqual(style.color.name, "black")
self.assertEqual(style.writing_instrument, 'schwarze Tinte')
style_string = "st11 st3"
style = Style.create_cls(page, style_string, create_css=True)
+ #style.writing_process_id = 1
+ #style.create_css_styles()
self.assertEqual(style.font_family, 'Weidemann-Book')
self.assertEqual(style.font_size, '9px')
style_string = "st18"
page = Page(self.test_page)
style = Style.create_cls(page, style_string)
self.assertEqual(style.color.name, 'black')
def test_remove_irrelevant_style_keys(self):
page = Page(self.test_page)
style_string = "st11 st10 st9 st5 st0"
self.assertEqual(Style.remove_irrelevant_style_keys(style_string, page), "st11 st5 st9")
def test_process_style_classes(self):
style = Style()
style.color = Color.create_cls(hex_color='#009CDE')
style.process_style_classes()
self.assertEqual(style.writing_instrument, 'violette Tinte')
self.assertEqual(style.font, 'deutsche Schreibschrift')
style.font_family = "NewsGothicBT-Bold"
style.process_style_classes()
self.assertEqual(style.writing_instrument, 'Blaustift')
self.assertEqual(style.font, 'lateinische Schreibschrift')
style = Style()
style.font_family = "NewsGothicBT-Bold"
style.process_style_classes()
#print(style.css_styles)
def test_get_semantic_dictionary(self):
dictionary = Style.get_semantic_dictionary()
#print(dictionary)
def test_copy(self):
manuscript = ArchivalManuscriptUnity.create_cls(self.test_manuscript)
page = Page(self.test_page)
page.words = [ page.words[0] ]
page.update_styles(manuscript=manuscript, add_to_parents=True)
self.assertEqual(len(manuscript.styles), 1)
styleA = page.words[0].transkription_positions[0].style
styleB = styleA.create_a_copy()
self.assertEqual(styleA == styleB, True)
styleB = styleA.create_a_copy(reduce_writing_process_id=True)
self.assertEqual(styleA != styleB, True)
def test_eq(self):
page = Page(self.test_page)
style_string = "st11 st10 st5"
styleA = Style.create_cls(page, style_string)
styleB = Style.create_cls(page, style_string)
self.assertEqual(styleA == styleB, True)
style_string = "st11 st10"
styleC = Style.create_cls(page, style_string)
self.assertEqual(styleA != styleC, True)
if __name__ == "__main__":
unittest.main()