Index: py2ttl/class_spec.py
===================================================================
--- py2ttl/class_spec.py (revision 91)
+++ py2ttl/class_spec.py (revision 92)
@@ -1,218 +1,227 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
""" This is an abstract class for all classes that are semantically relevant.
"""
# Copyright (C) University of Basel 2019 {{{1
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see 1}}}
__author__ = "Christian Steiner"
__maintainer__ = __author__
__copyright__ = 'University of Basel'
__email__ = "christian.steiner@unibas.ch"
__status__ = "Development"
__license__ = "GPL v3"
__version__ = "0.0.1"
import abc
import inspect
import warnings
class UnSemanticClass:
"""
Subclasses of this class are not semantically relevant, even if their superclasses are.
"""
pass
class SemanticClass(metaclass=abc.ABCMeta):
"""
This is an abstract class for all classes that are semantically relevant.
"""
HAS_PART = 'has_part'
HAS_SEQNUM = 'has_seqnum'
SINGLE_VALUE = 1
LIST = -99
CLASS_KEY = 'class'
CARDINALITY = "cardinality"
CARDINALITY_RESTRICTION = "cardinality_restriction"
HAS_HOMOTYPIC_PARTS_URL_STRING = 'http://www.nie.org/ontology/homotypic#hasHomotypicParts'
+ HOMOTYPIC_HAS_TEXT_URL_STRING = 'http://www.nie.org/ontology/homotypic#hasText'
+ STOFF_STYLE_HAS_CSS_URL_STRING = 'http://www.nie.org/ontology/standoff#styleHasCSS'
PROPERTY_NAME = "name"
PROPERTY_LABEL = "label"
PROPERTY_COMMENT = "comment"
PROPERTIES_KEY = "properties"
SUBCLASS_OF = "rdfs:subClassOf"
SUBPROPERTYOF = "subPropertyOf"
- SUPER_CLASSES_DICT = { 'http://www.nie.org/ontology/homotypic': 'HomotypicEntity' }
+ SUPER_CLASSES_DICT = { 'http://www.nie.org/ontology/homotypic': 'HomotypicEntity', 'http://www.nie.org/ontology/standoff': 'Style' }
SUPER_PROPERTY = "super_property"
THIS = "this"
TYPE = "type"
@classmethod
def create_semantic_property_dictionary(cls, property_key, class_type, cardinality=0, cardinality_restriction='cardinality', name='', label='', comment='', subPropertyOf='') -> dict:
"""Create a semantic property dicitonary.
+ Here is how to make a subproperty:
+
+ Pass the IRI of the super property as subPropertyOf=IRI,
+ be sure that base_uri of IRI (as key) and Class identifier of super class (as value) are in cls.SUPER_CLASSES_DICT,
+ then call cls.return_dictionary_after_updating_super_classes -> it will subclass the class that owns the subproperty
+ to the super class.
+
:return: semantic property dicitonary (dict)
"""
property_content = { SemanticClass.CLASS_KEY: class_type }
if cardinality > 0:
property_content.update({ SemanticClass.CARDINALITY: cardinality})
property_content.update({ SemanticClass.CARDINALITY_RESTRICTION: cardinality_restriction})
if name != '':
property_content.update({ SemanticClass.PROPERTY_NAME: name})
if label != '':
property_content.update({ SemanticClass.PROPERTY_LABEL: label})
if comment != '':
property_content.update({ SemanticClass.PROPERTY_COMMENT: comment})
if subPropertyOf != '':
property_content.update({ SemanticClass.SUBPROPERTYOF: subPropertyOf})
return { property_key: property_content }
@classmethod
def get_class_dictionary(cls):
"""Creates and returns a class_dictionary with the keys cls.THIS [, cls.SUBCLASS_OF, cls.TYPE].
"""
class_dict = {cls.THIS: cls }
if cls.__dict__.get('OWL_EQUIVALENTCLASSES') and len(cls.OWL_EQUIVALENTCLASSES) > 0:
class_dict.update({'owl:equivalentClass': cls.OWL_EQUIVALENTCLASSES })
if cls.__dict__.get('RDFS_SUBCLASSOF_LIST') and len(cls.RDFS_SUBCLASSOF_LIST) > 0:
class_dict.update({cls.SUBCLASS_OF: cls.RDFS_SUBCLASSOF_LIST })
else:
direct_super_class = inspect.getclasstree([cls],unique=True)[0][0]
if issubclass(direct_super_class, SemanticClass) and direct_super_class != SemanticClass:
class_dict.update({cls.TYPE: direct_super_class})
return class_dict
def get_name_and_id(self):
"""Return an identification for object as 2-tuple.
"""
id = 0
if 'id' in self.__dict__.keys():
id = self.id
elif 'number' in self.__dict__.keys():
id = self.number
elif 'title' in self.__dict__.keys():
id = self.title.replace(' ', '_')
return type(self).__name__, id
def _get_list_of_type(self, list_type):
"""Return list of type == list_type if list is not empty.
"""
list_of_type = []
for object_list in [ list_obj for list_obj in self.__dict__.values()\
if type(list_obj) == list ]:
if len(object_list) > 0 and type(object_list[0]) == list_type:
return object_list
return list_of_type
def get_object_from_list_with_id(self, object_type, object_id):
"""Return object from list if object has id == object_id,
None if not found.
"""
list_with_object = [ item for item in self._get_list_of_type(object_type)\
if item.id == object_id ]
if len(list_with_object) > 0:
return list_with_object[0]
return None
@classmethod
def get_cls_hasPart_objectCls_dictionaries(cls, object_cls, xpath, object_seqnum_xpath=None, cardinality=0, cardinality_restriction='minCardinality'):
"""Return a dictionary containing the information for creating a class that can act
as an intermediary between cls and a number of object_cls if object_cls has
a position in a sequence of object_classes that belong to cls.
"""
part_name = object_cls.__name__ + 'Part'
has_part_name = object_cls.__name__.lower() + 'PartHas' + object_cls.__name__
has_seqnum_name = object_cls.__name__.lower() + 'HasSeqNum'
if object_seqnum_xpath is None:
object_seqnum_xpath = xpath + '/@id'
object_part_dictionary = { 'class': object_cls, 'cardinality': 1, 'xpath': xpath,\
'name': has_part_name, 'label': '{0} has a {1}'.format(part_name, object_cls.__name__),\
'comment': '{0} has a part, that is a {1}'.format(part_name, object_cls.__name__)}
object_seqnum_dictionary = { 'class': int, 'cardinality': 1, 'xpath': object_seqnum_xpath,\
'name': has_seqnum_name, 'label': '{0} has a sequence number'.format(part_name),\
'comment': '{0} has a part, that stands in a sequence with this number'.format(part_name, object_cls.__name__)}
object_dictionary = { 'class_name': part_name, SemanticClass.HAS_PART: object_part_dictionary, SemanticClass.HAS_SEQNUM: object_seqnum_dictionary,\
'label': '{0} part'.format(object_cls.__name__.lower()),\
'comment': 'This class servers as a intermediary between {0} and {1}. {0} has some {1} in a specific sequence.'.format(cls.__name__, object_cls.__name__)}
dictionary = { 'flag': 'ordered_list' , 'class': object_dictionary, 'cardinality': cardinality, 'cardinality_restriction': cardinality_restriction, 'xpath': xpath,\
'name': cls.__name__.lower() + 'Has' + part_name, 'label': '{0} has a part that connects it with a {1}'.format(cls.__name__, object_cls.__name__),\
'comment': '{0} has a part that connects it with a {1}, that has a position in a sequence of {1}'.format(cls.__name__, object_cls.__name__)}
return dictionary
@classmethod
@abc.abstractmethod
def get_semantic_dictionary(cls):
"""Creates a semantic dictionary with cls.CLASS_KEY and cls.PROPERTIES_KEY as its keys.
The class-key points to a class_dictionary with the keys: cls.THIS [, cls.SUBCLASS_OF, cls.TYPE].
Create initial dictionary using cls.get_class_dictionary():
dictionary = { cls.CLASS_KEY: cls.get_class_dictionary(), cls.PROPERTIES_KEY: {} }
The properties_key points to a properties_dictionary with semantically relevant keys
of self.__dict__ as keys. Use cls.create_semantic_property_dictionary(...) in order to
add a property dictionary for each property as follows:
dictionary[cls.PROPERTIES_KEY].update(cls.create_semantic_property_dictionary(property_key, ...))
Return dictionary by using:
cls.return_dictionary_after_updating_super_classes(dictionary)
"""
pass
@classmethod
def return_dictionary_after_updating_super_classes(cls, dictionary):
"""Return semantic dictionary after updating super classes if necessary.
"""
if cls.PROPERTIES_KEY not in dictionary.keys():
return dictionary
subproperty_base_uri_set = set( value.get(cls.SUBPROPERTYOF).split('#')[0]\
for value in dictionary[cls.PROPERTIES_KEY].values()\
if bool(value.get(cls.SUBPROPERTYOF)) )
for sub_property_base in subproperty_base_uri_set:
if bool(cls.SUPER_CLASSES_DICT.get(sub_property_base))\
and (\
cls.SUBCLASS_OF not in dictionary[cls.CLASS_KEY].keys()\
or len(dictionary[cls.CLASS_KEY][cls.SUBCLASS_OF]) == 0\
or sub_property_base + '#' + cls.SUPER_CLASSES_DICT.get(sub_property_base) not in dictionary[cls.CLASS_KEY][cls.SUBCLASS_OF]\
):
subclass_list = dictionary[cls.CLASS_KEY][cls.SUBCLASS_OF]\
if cls.SUBCLASS_OF in dictionary[cls.CLASS_KEY].keys()\
and len(dictionary[cls.CLASS_KEY].get(cls.SUBCLASS_OF)) > 0\
else []
subclass_list.append(sub_property_base + '#' + cls.SUPER_CLASSES_DICT.get(sub_property_base))
dictionary[cls.CLASS_KEY].update({cls.SUBCLASS_OF: subclass_list})
return dictionary
def __repr__(self) -> str:
"""Return a representation of all semantically relevant properties.
"""
data_string = self.__str__()
return f'<{data_string}>'
def __str__(self) -> str:
"""Return a str of all semantically relevant properties.
"""
name = type(self).__name__
data = []
for key in self.get_semantic_dictionary()[self.PROPERTIES_KEY].keys():
if key in self.__dict__.keys() and\
(self.__dict__[key] != None or
(type(self.__dict__[key]) == list and len(self.__dict__[key]) > 0)):
data.append(f'{key}: {self.__dict__[key]}')
data_string = ', '.join(data)
return f'{name} {data_string}'
Index: py2ttl/data_handler.py
===================================================================
--- py2ttl/data_handler.py (revision 91)
+++ py2ttl/data_handler.py (revision 92)
@@ -1,190 +1,195 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
""" This class can be used to add data to a rdf graph.
"""
# Copyright (C) University of Basel 2019 {{{1
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see 1}}}
__author__ = "Christian Steiner"
__maintainer__ = __author__
__copyright__ = 'University of Basel'
__email__ = "christian.steiner@unibas.ch"
__status__ = "Development"
__license__ = "GPL v3"
__version__ = "0.0.1"
from rdflib import Graph, URIRef, Literal, BNode, OWL, RDF, RDFS, XSD
from rdflib import RDF as ns_rdf
from os.path import isfile
import random
import warnings
from class_spec import SemanticClass
from config import DATA_URL
class RDFDataHandler:
"""
This class can be used to add data to a rdf graph.
"""
UNITTESTING = False
SIMPLE_DATA_TYPE_MAPPING = { int: XSD.integer, float: XSD.float, str: XSD.string, bool: XSD.boolean, list: RDF.List }
def __init__(self, target_file, mapping_dictionary):
self.target_file = target_file
self.mapping_dictionary = mapping_dictionary
self.ontology_graph = Graph()
self.data_graph = Graph()
self.data_identifier_mapping = {}
if bool(self.mapping_dictionary.get('ontology')):
self.project_name = self.mapping_dictionary['ontology'].get('project_name')
self.project_uri = URIRef(self.mapping_dictionary['ontology'].get('project_uri'))
ontology_file = self.mapping_dictionary['ontology'].get('ontology_file')
if bool(ontology_file) and isfile(ontology_file):
self.ontology_graph.parse(ontology_file, format="turtle")
self.ns = { uriref: ns for ns, uriref in self.data_graph.namespace_manager.namespaces() }
self.data_graph.bind(self.project_name, self.project_uri)
self.data_graph.bind('data', DATA_URL + '#')
else:
raise Exception('Error: mapping_dictionary does not contain key "ontology"!')
def add_data(self, data_instance, identifier_prefix, parent_data_instance=None):
"""Add a data rdf instance of data_instance to the data_graph.
:return: (rdflib.URIRef) subject_uri of data instance
"""
identifier_uri = self.create_identifier_uri(data_instance, identifier_prefix)
if bool(self.mapping_dictionary['classes'].get(type(data_instance).__name__)):
class_uri = self.mapping_dictionary['classes'][type(data_instance).__name__]['class_uri']
self.data_identifier_mapping.update({data_instance: identifier_uri})
self.data_graph_add((identifier_uri, RDF.type, class_uri))
semantic_dict = data_instance.get_semantic_dictionary()
for key, content in semantic_dict['properties'].items():
if bool(self.mapping_dictionary['classes'][type(data_instance).__name__]['properties'].get(key)):
datatype = content.get('class')
cardinality = content.get('cardinality')\
if bool(content.get('cardinality')) else 0
if data_instance.__dict__.get(key) is not None\
and (type(data_instance.__dict__.get(key)) != int or data_instance.__dict__.get(key) != -1):
predicate_uri = self.mapping_dictionary['classes'][type(data_instance).__name__]['properties'][key]
child_data_instance = data_instance.__dict__.get(key)
new_identifier_prefix = identifier_uri[identifier_uri.index('#')+1:]
if datatype is list:
self.add_ordered_list(child_data_instance, identifier_uri, predicate_uri,\
new_identifier_prefix, data_instance)
elif issubclass(datatype, SemanticClass):
if type(child_data_instance) is not list:
if type(child_data_instance) != datatype:
child_id = child_data_instance
child_data_instance = parent_data_instance.get_object_from_list_with_id(datatype,\
child_id)
if child_data_instance is None:
print(key, content)# parent_data_instance.number, child_id, type(child_id), datatype)
msg = 'No child_data_instance found for data_instance {0}: looking for {1} with id {2}'.format(\
type(parent_data_instance), datatype, child_id)
raise Exception(msg)
else:
new_list_name = 'list_of_' + datatype.__name__ + 's'
if new_list_name in data_instance.__dict__.keys():
data_instance.__dict__[new_list_name].append(child_data_instance)
else:
data_instance.__dict__.update({ new_list_name: [ child_data_instance ]})
if child_data_instance not in self.data_identifier_mapping.keys():
child_identifier_uri = self.add_data(child_data_instance, new_identifier_prefix,\
parent_data_instance=data_instance)
else:
child_identifier_uri = self.data_identifier_mapping[child_data_instance]
self.data_graph_add((identifier_uri, predicate_uri, child_identifier_uri))
else:
for child_item in child_data_instance:
if child_item not in self.data_identifier_mapping.keys():
child_identifier_uri = self.add_data(child_item, new_identifier_prefix,\
parent_data_instance=data_instance)
else:
child_identifier_uri = self.data_identifier_mapping[child_item]
self.data_graph_add((identifier_uri, predicate_uri, child_identifier_uri))
else:
literal_datatype = RDFDataHandler.SIMPLE_DATA_TYPE_MAPPING[datatype]
ontology_datatypes = [ o for o in self.ontology_graph.objects(subject=predicate_uri, predicate=RDFS.range) ]
if len(ontology_datatypes) > 0:
literal_datatype = ontology_datatypes[0]
- object_literal = Literal(str(child_data_instance), datatype=literal_datatype)
- self.data_graph_add((identifier_uri, predicate_uri, object_literal))
+ if type(child_data_instance) is list:
+ for child_item in child_data_instance:
+ object_literal = Literal(str(child_item), datatype=literal_datatype)
+ self.data_graph_add((identifier_uri, predicate_uri, object_literal))
+ else:
+ object_literal = Literal(str(child_data_instance), datatype=literal_datatype)
+ self.data_graph_add((identifier_uri, predicate_uri, object_literal))
else:
msg = 'Mapping dictionary for {0} does not contain a entry for {1}!'.format(type(data_instance).__name__, key)
raise Exception(msg)
else:
msg = 'Mapping dictionary does not contain a entry for {}!'.format(type(data_instance).__name__)
raise Exception(msg)
return identifier_uri
def add_ordered_list(self, data_instance_list, identifier_uri, predicate_uri, identifier_prefix, data_instance):
"""Add a data rdf instance of data_instance to the data_graph.
"""
if len(data_instance_list) > 0:
child_identifiers = []
for item in data_instance_list:
if item not in self.data_identifier_mapping.keys():
child_identifiers.append(self.add_data(item, identifier_prefix, data_instance))
else:
child_identifiers.append(self.data_identifier_mapping[item])
list_node = self.generate_RDF_collection(child_identifiers)
self.data_graph_add((identifier_uri, predicate_uri, list_node))
def create_identifier_uri(self, data_instance, identifier_prefix):
"""Return a data identifier uri.
:return: (rdflib.URIRef) subject_uri of data instance
"""
data_type, id = data_instance.get_name_and_id()
identifier_uri = URIRef(DATA_URL + '#' + identifier_prefix + '_' + data_type + str(id))
randombit_length = 5
while (identifier_uri, None, None) in self.data_graph:
identifier_uri = URIRef(DATA_URL + '#' + identifier_prefix + '_' + data_type + str(random.getrandbits(randombit_length)))
randombit_length += 1
return identifier_uri
def data_graph_add(self, rdf_triple):
"""Add a triple to the graph.
"""
#not RDFDataHandler.UNITTESTING and print(rdf_triple)
self.data_graph.add(rdf_triple)
def generate_RDF_collection(self, vals ) -> BNode:
"""
Generate an RDF List from vals, returns the head of the list
@URL:
@organization: U{World Wide Web Consortium}
@author: U{Ivan Herman}
@license:
U{W3C® SOFTWARE NOTICE AND LICENSE}
@param graph: RDF graph
@type graph: RDFLib Graph
@param vals: array of RDF Resources
@return: head of the List (an RDF Resource)
"""
heads = [ BNode() for r in vals ] + [ ns_rdf["nil"] ]
for i in range(0, len(vals)) :
self.data_graph_add( (heads[i], ns_rdf["first"], vals[i]) )
self.data_graph_add( (heads[i], ns_rdf["rest"], heads[i+1]) )
return heads[0]
def write(self, output_format="turtle"):
"""Write graph.
"""
f = open(self.target_file, 'wb+')
f.write(self.data_graph.serialize(format=output_format))
f.close()
Index: py2ttl/py2ttl_ontology.py
===================================================================
--- py2ttl/py2ttl_ontology.py (revision 91)
+++ py2ttl/py2ttl_ontology.py (revision 92)
@@ -1,361 +1,363 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
""" This program can be used to convert py classes that are
subclasses of class_spec.SemanticClass to
a owl ontology in turtle format.
"""
# Copyright (C) University of Basel 2019 {{{1
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see 1}}}
import getopt
import importlib
import importlib.util
import inspect
import lxml.etree as ET
from os import sep, path, listdir
from os.path import isfile, isdir, dirname, basename
from progress.bar import Bar
from rdflib import Graph, URIRef, Literal, BNode, OWL, RDF, RDFS, XSD
import re
import sys
import warnings
if dirname(__file__) not in sys.path:
sys.path.append(dirname(__file__))
from class_spec import SemanticClass, UnSemanticClass
from config import check_config_files_exist, get_datatypes_dir, PROJECT_NAME, PROJECT_ONTOLOGY_FILE, PROJECT_URL
from data_handler import RDFDataHandler
sys.path.append('shared_util')
from myxmlwriter import dict2xml
__author__ = "Christian Steiner"
__maintainer__ = __author__
__copyright__ = 'University of Basel'
__email__ = "christian.steiner@unibas.ch"
__status__ = "Development"
__license__ = "GPL v3"
__version__ = "0.0.1"
class Py2TTLOntologyConverter:
"""This class can be used convert semantic_dictionaries to a owl ontology in turtle format.
"""
UNITTESTING = False
+ INFERRED_SUB_CLASS = RDFS.subClassOf * '*'
def __init__(self, project_ontology_file=None):
self.class_uri_dict = {}
self.uri_mapping4cls_and_properties = {}
self.project_graph = Graph()
self.base_uriref = URIRef(PROJECT_URL)
self.project_name = PROJECT_NAME
self.ns = { self.base_uriref + '#': self.project_name }
if project_ontology_file is not None and isfile(project_ontology_file):
self.project_graph.parse(project_ontology_file, format="turtle")
if len(self.project_graph) > 0:
self.base_uriref = self.project_graph.value(predicate=RDF.type, object=OWL.Ontology, any=False)
self.ns = { uriref: ns for ns, uriref in self.project_graph.namespace_manager.namespaces() }
self.project_name = self.ns.get(self.base_uriref + '#')
self.project_graph.bind(self.project_name, self.base_uriref + '#')
self.uri_mapping4cls_and_properties.update({ 'ontology': { 'project_name': self.project_name, 'project_uri': self.base_uriref + '#' }})
self.uri_mapping4cls_and_properties.update({ 'classes': {} })
def addClass2Graph(self, cls, semantic_dict=None) -> (URIRef, type):
"""Add a class to project_graph.
:return: (cls_uri (URIRef), super_cls (cls))
"""
if semantic_dict is None:
semantic_dict = cls.get_semantic_dictionary()
comment, label = self.get_comment_label(cls)
cls_uri = URIRef(self.base_uriref + '#' + cls.__name__)
self.project_graph.add((cls_uri, RDF.type, OWL.Class))
self.project_graph.add((cls_uri, RDFS.isDefinedBy, self.base_uriref))
if comment != '':
self.project_graph.add((cls_uri, RDFS.comment, Literal(comment, lang='en')))
if label != '':
self.project_graph.add((cls_uri, RDFS.label, Literal(label, lang='en')))
super_uri = None
super_cls = None
- if SemanticClass.SUBCLASS_OF in semantic_dict[SemanticClass.CLASS_KEY].keys()\
- and len(semantic_dict[SemanticClass.CLASS_KEY][SemanticClass.SUBCLASS_OF]) > 0:
- for super_uri_string in semantic_dict[SemanticClass.CLASS_KEY].get(SemanticClass.SUBCLASS_OF):
- super_uri = URIRef(super_uri_string)
- self.project_graph.add((cls_uri, RDFS.subClassOf, super_uri))
if bool(semantic_dict[SemanticClass.CLASS_KEY].get(SemanticClass.TYPE)):
super_cls = semantic_dict[SemanticClass.CLASS_KEY].get(SemanticClass.TYPE)
super_uri = self.createClassAndProperties(super_cls)
if super_uri is not None:
self.project_graph.add((cls_uri, RDFS.subClassOf, super_uri))
+ if SemanticClass.SUBCLASS_OF in semantic_dict[SemanticClass.CLASS_KEY].keys()\
+ and len(semantic_dict[SemanticClass.CLASS_KEY][SemanticClass.SUBCLASS_OF]) > 0:
+ for super_uri_string in semantic_dict[SemanticClass.CLASS_KEY].get(SemanticClass.SUBCLASS_OF):
+ super_uri = URIRef(super_uri_string)
+ if not (cls_uri, self.INFERRED_SUB_CLASS, super_uri) in self.project_graph:
+ self.project_graph.add((cls_uri, RDFS.subClassOf, super_uri))
return cls_uri, super_cls
def addProperty2Graph(self, property_uri, domain_uri, range_uri, info_dict, property_type=OWL.ObjectProperty):
"""Add a property to self.project_graph.
"""
label = 'has ' + property_uri.split('#')[1].replace('has','')\
if SemanticClass.PROPERTY_LABEL not in info_dict.keys() else info_dict[SemanticClass.PROPERTY_LABEL]
self.project_graph.add((property_uri, RDF.type, property_type))
self.project_graph.add((property_uri, RDFS.isDefinedBy, self.base_uriref))
self.project_graph.add((property_uri, RDFS.domain, domain_uri))
self.project_graph.add((property_uri, RDFS.range, range_uri))
if SemanticClass.PROPERTY_COMMENT in info_dict.keys():
comment = info_dict[SemanticClass.PROPERTY_COMMENT]
self.project_graph.add((property_uri, RDFS.comment, Literal(comment, lang='en')))
self.project_graph.add((property_uri, RDFS.label, Literal(label, lang='en')))
if SemanticClass.CARDINALITY in info_dict.keys()\
and info_dict[SemanticClass.CARDINALITY] > 0:
self.addRestriction2Class(domain_uri, property_uri, info_dict)
def addRestriction2Class(self, cls_uri, property_uri, info_dict):
"""Adds restriction on property_uri to class cls_uri.
"""
if SemanticClass.CARDINALITY in info_dict.keys()\
and info_dict[SemanticClass.CARDINALITY] > 0:
if (cls_uri, None, None) not in self.project_graph:
warnings.warn('{} not in graph!'.format(cls_uri))
restriction = BNode()
cardinality_restriction = URIRef(OWL + info_dict[SemanticClass.CARDINALITY_RESTRICTION])\
if SemanticClass.CARDINALITY_RESTRICTION in info_dict.keys()\
else OWL.cardinality
cardinality = info_dict[SemanticClass.CARDINALITY]
self.project_graph.add((cls_uri, RDFS.subClassOf, restriction))
self.project_graph.add((restriction, RDF.type, OWL.Restriction))
self.project_graph.add((restriction, OWL.onProperty, property_uri))
self.project_graph.add((restriction, cardinality_restriction, Literal(str(cardinality), datatype=XSD.nonNegativeInteger)))
def create_ontology(self, datatypes_dir, target_ontology_file):
"""Convert all classes contained in datatypes_dir that are subclasses of class_spec.SemanticClass to rdf.
:return: exit code (int)
"""
if isdir(datatypes_dir):
semantic_classes = self.get_semantic_classes(datatypes_dir)
if not Py2TTLOntologyConverter.UNITTESTING:
bar = Bar('creating classes and properties', max=len(semantic_classes))
for cls in semantic_classes:
self.createClassAndProperties(cls)
not bool(Py2TTLOntologyConverter.UNITTESTING) and bar.next()
not bool(Py2TTLOntologyConverter.UNITTESTING) and bar.finish()
self.uri_mapping4cls_and_properties['ontology'].update({'ontology_file': target_ontology_file})
f = open(target_ontology_file, 'wb+')
f.write(self.project_graph.serialize(format="turtle"))
f.close()
if not Py2TTLOntologyConverter.UNITTESTING:
xml_file = 'mapping_file4' + datatypes_dir.replace(sep, '.') + '2' + target_ontology_file.replace('.' + sep, '').replace(sep, '.').replace('.ttl', '.xml')
dict2xml(self.uri_mapping4cls_and_properties, xml_file)
else:
print('Error: dir {} does not exist!'.format(datatypes_dir))
usage
return 1
return 0
def createClassAndProperties(self, cls):
"""Creates a owl:Class and some owl:ObjectProperty from semantic_dictionary of a python class.
"""
if not cls.__name__ in self.class_uri_dict:
self.class_uri_dict.update({cls.__name__: cls})
semantic_dict = cls.get_semantic_dictionary()
cls_uri, super_cls = self.addClass2Graph(cls, semantic_dict)
uri_mapping4properties = {}
for property_key in self._get_semantic_dictionary_keys_super_first(semantic_dict['properties']):
super_semantic_dict = {} if super_cls is None else super_cls.get_semantic_dictionary()
if len(super_semantic_dict) == 0 or not bool(super_semantic_dict['properties'].get(property_key)):
property_dict4key = semantic_dict['properties'].get(property_key)
property_cls = property_dict4key.get('class')
subject_uri, property_uri = self.createProperty(cls_uri, property_key, property_cls, property_dict4key)
uri_mapping4properties.update({ property_key: property_uri })
elif bool(self.uri_mapping4cls_and_properties.get('classes').get(super_cls.__name__).get('properties').get(property_key)):
property_uri = self.uri_mapping4cls_and_properties['classes'][super_cls.__name__]['properties'][property_key]
uri_mapping4properties.update({ property_key: property_uri})
self.uri_mapping4cls_and_properties.get('classes').update({ cls.__name__: { 'class_uri': cls_uri, 'properties': uri_mapping4properties }})
return URIRef(self.base_uriref + '#' + cls.__name__)
def createProperty(self, domain_uri, property_name, range_cls, info_dict) -> (URIRef, URIRef):
"""Creates a owl:ObjectProperty.
:return: tuple of domain_uri (rdflib.URIRef) and property_uri (rdflib.URIRef) of created property
"""
name = self.createPropertyName(property_name=property_name)\
if SemanticClass.PROPERTY_NAME not in info_dict.keys() else info_dict[SemanticClass.PROPERTY_NAME]
property_uri = URIRef(self.base_uriref + '#' + name)
inferredSubClass = RDFS.subClassOf * '*'
range_uri = URIRef(self.base_uriref + '#' + range_cls.__name__)
super_property_uri = None
if SemanticClass.SUBPROPERTYOF in info_dict.keys():
super_property_uri = URIRef(info_dict[SemanticClass.SUBPROPERTYOF])
elif SemanticClass.SUPER_PROPERTY in info_dict.keys():
domain_uri, super_property_uri = self.createProperty(domain_uri,\
info_dict[SemanticClass.SUPER_PROPERTY].get(SemanticClass.PROPERTY_NAME),\
range_cls, info_dict[SemanticClass.SUPER_PROPERTY])
if (property_uri, None, None) not in self.project_graph:
property_type = OWL.ObjectProperty
if range_cls.__module__ == 'builtins':
if range_cls != list:
property_type = OWL.DatatypeProperty
range_uri = RDFDataHandler.SIMPLE_DATA_TYPE_MAPPING.get(range_cls)
if range_uri == XSD.string and property_name == 'URL':
range_uri = XSD.anyURI
self.addProperty2Graph(property_uri, domain_uri, range_uri, info_dict, property_type=property_type)
elif not True in [\
(domain_uri, inferredSubClass, o) in self.project_graph\
for o in self.project_graph.objects(property_uri, RDFS.domain)\
]:
# if domain_uri is NOT a subclass of a cls specified by RDFS.domain
if SemanticClass.CARDINALITY in info_dict.keys()\
and info_dict[SemanticClass.CARDINALITY] > 0:
self.addRestriction2Class(domain_uri, property_uri, info_dict)
self.project_graph.add((property_uri, RDFS.domain, domain_uri))
if super_property_uri is not None\
and (property_uri, RDFS.subPropertyOf, super_property_uri) not in self.project_graph:
self.project_graph.add((property_uri, RDFS.subPropertyOf, super_property_uri))
return domain_uri, property_uri
def createPropertyName(self, property_name=None, subject_uri=None, object_uri=None, connector='BelongsTo', prefix='has'):
"""Returns a property name.
"""
if property_name is not None:
property_name = ''.join([ property_name.split('_')[0].lower() ] + [ text.capitalize() for text in property_name.split('_')[1:] ])
return prefix + property_name[0].upper() + property_name[1:] if property_name[0].islower()\
else prefix + property_name
elif subject_uri is not None:
property_name = subject_uri.split('#')[1] + self.createPropertyName(object_uri=object_uri, prefix=connector)
return property_name[0].lower() + property_name[1:]
elif object_uri is not None:
return prefix + object_uri.split('#')[1]
else:
return prefix
def get_comment_label(self, cls):
"""Returns comment and label from cls __doc__.
"""
comment = cls.__doc__.replace('\n','').lstrip()
label = cls.__name__
if '.' in cls.__doc__:
comment = [ text for text in cls.__doc__.split('\n') if text != '' ][0].lstrip()
if '@label' in cls.__doc__:
m = re.search('(@label[:]*\s)(.*[\.]*)', cls.__doc__)
label_tag, label = m.groups()
elif re.search('([A-Z][a-z]+)', label):
m = re.search('([A-Z]\w+)([A-Z]\w+)', label)
label = ' '.join([ text.lower() for text in re.split(r'([A-Z][a-z]+)', label) if text != '' ])
return comment, label
def get_semantic_classes(self, datatypes_dir):
"""Returns a list of all classes that are contained in datatypes_dir that are subclasses of class_spec.SemanticClass.
:return: a list of (str_name, class)
"""
base_dir = dirname(dirname(__file__))
sys.path.append(base_dir)
root_modul_name = datatypes_dir.replace('/','.')
files = [ file.replace('.py','') for file in listdir(datatypes_dir) if file.endswith('.py') and not file.startswith('test_') and not file.startswith('_')]
all_modules = []
for name in files:
all_modules.append(importlib.import_module('{}.{}'.format(root_modul_name, name)))
all_classes = []
for modul in all_modules:
all_classes += inspect.getmembers(modul, inspect.isclass)
#all_classes = sorted(set(all_classes))
all_classes = sorted(set(all_classes), key=lambda current_class: current_class[0])
semantic_classes = [ cls for name, cls in all_classes if issubclass(cls, SemanticClass)\
and not issubclass(cls, UnSemanticClass)\
and not (cls == SemanticClass)]
return semantic_classes
def _get_builtin_cls_keys(self, property_dict):
"""Returns a list of keys for classes that are builtin.
"""
builtin_cls_keys = []
for key in property_dict.keys():
property_cls = property_dict.get(key).get('class')\
if type(property_dict.get(key)) is dict\
else property_dict.get(key)[0]
if type(property_cls) != dict\
and property_cls.__module__ == 'builtins':
builtin_cls_keys.append(key)
return builtin_cls_keys
def _get_semantic_dictionary_keys_super_first(self, property_dict):
"""Sorts the keys of the property part of a semantic dictionary
and returns the keys for super classes before keys of subclasses.
:return: a sorted list of keys.
"""
builtin_cls_keys = self._get_builtin_cls_keys(property_dict)
complex_cls_keys = []
for key in [ key for key in property_dict.keys()\
if key not in builtin_cls_keys ]:
current_cls = property_dict.get(key).get('class')
key_inserted = False
for index, cls_key in enumerate(complex_cls_keys):
potential_sub_cls = property_dict.get(cls_key).get('class')
if issubclass(potential_sub_cls, current_cls):
complex_cls_keys.insert(index, key)
key_inserted = True
break
if not key_inserted:
complex_cls_keys.append(key)
return builtin_cls_keys + complex_cls_keys
def usage():
"""prints information on how to use the script
"""
print(main.__doc__)
def main(argv):
"""This program can be used to convert py classes that are subclasses of class_spec.SemanticClass to owl:Class
and its properties to owl:ObjectProperty.
py2ttl/py2ttl_ontology.py [OPTIONS ]
[optional] directory containing datatypes that are subclasses of class_spec.SemanticClass.
Overwrites DATATYPES_DIR in py2ttl/config.py.
OPTIONS:
-h|--help: show help
-s|--source=source_ontology_file source ontology ttl file, option overwrites PROJECT_ONTOLOGY_FILE in py2ttl/config.py
-t|--target=target_ontology_file target ontology ttl file, default: 'PROJECT_PREFIX-ontology_autogenerated.ttl'
:return: exit code (int)
"""
check_config_files_exist()
datatypes_dir = get_datatypes_dir()
source_ontology_file = PROJECT_ONTOLOGY_FILE
target_ontology_file = ''
try:
opts, args = getopt.getopt(argv, "hs:t:", ["help","source=", "target="])
except getopt.GetoptError:
usage()
return 2
for opt, arg in opts:
if opt in ('-h', '--help'):
usage()
return 0
elif opt in ('-t', '--target'):
target_ontology_file = arg
elif opt in ('-s', '--source'):
source_ontology_file = arg
converter = Py2TTLOntologyConverter(project_ontology_file=source_ontology_file)
if len(args) > 0:
datatypes_dir = args[0]
if target_ontology_file == '':
target_ontology_file = '.{0}{1}-ontology_autogenerated.ttl'.format(sep, converter.project_name)
return converter.create_ontology(datatypes_dir, target_ontology_file)
if __name__ == "__main__":
sys.exit(main(sys.argv[1:]))
Index: tests_py2ttl/test_class_spec.py
===================================================================
--- tests_py2ttl/test_class_spec.py (revision 91)
+++ tests_py2ttl/test_class_spec.py (revision 92)
@@ -1,90 +1,91 @@
import unittest
from os import sep, path
import inspect
from rdflib import Graph, URIRef, Literal, BNode, OWL, RDF, RDFS, XSD
import sys
sys.path.append('svgscripts')
from datatypes.image import Image
from datatypes.word import Word
+from datatypes.simple_word import SimpleWord
sys.path.append('py2ttl')
try:
from class_spec import SemanticClass
except ImportError:
sys.path.append(dirname(dirname(realpath(__file__))))
from py2ttl.class_spec import SemanticClass
class TestSemanticClassFail(SemanticClass):
def __init__(self):
pass
class Dummy:
def __init__(self, id):
self.id = id
class TestSemanticClassOK(SemanticClass):
def __init__(self):
self.msg = 'Hello World!'
self.id = 0
self.mylist = [ Dummy(0), Dummy(1), Dummy(2) ]
@staticmethod
def get_semantic_dictionary():
return {'class': { 'this': TestSemanticClassOK}, 'properties': { 'msg': (str, SemanticClass.SINGLE_VALUE) }}
class TestSemanticClassB(SemanticClass):
def __init__(self):
self.data = [ 1, 2, 3, 4 ]
self.test = [ TestSemanticClassOK(), TestSemanticClassOK() ]
@staticmethod
def get_semantic_dictionary():
return { 'class': {'this': TestSemanticClassB }, 'properties': TestSemanticClassB.create_semantic_property_dictionary('data', int)}
def get_super(self):
return inspect.getclasstree([self.__class__],unique=True)[0][0]
class TestSemanticClassC(TestSemanticClassB):
pass
class TestSemanticClass(unittest.TestCase):
def test_fail(self):
with self.assertRaises(TypeError):
TestSemanticClassFail()
def test_success(self):
test = TestSemanticClassOK()
self.assertEqual(TestSemanticClassOK.get_semantic_dictionary()['properties'], { 'msg': (str, 1) })
test = TestSemanticClassB()
self.assertEqual(test.get_semantic_dictionary()['class'].get('this'), TestSemanticClassB)
dictionary = test.return_dictionary_after_updating_super_classes(TestSemanticClassB.get_semantic_dictionary())
def test_get_class_dictionary(self):
test = TestSemanticClassC()
self.assertEqual(test.get_class_dictionary().get('type') is not None, True)
self.assertEqual(test.get_class_dictionary().get('type'), TestSemanticClassB)
#print(test.create_semantic_property_dictionary('is_true', bool, cardinality=1, name='IsTrue', label='is true', comment='test comment'))
def test_get_cls_hasPart_objectCls_dictionaries(self):
dictionary = SemanticClass.get_cls_hasPart_objectCls_dictionaries(SemanticClass, 'asdf/asdf')
#print(dictionary)
def test_get_object_from_list_with_id(self):
test = TestSemanticClassOK()
#mylist = test._get_list_of_type(Dummy)
d_1 = test.get_object_from_list_with_id(Dummy, 1)
self.assertEqual(d_1 is not None, True)
self.assertEqual(d_1.id, 1)
def test_return_dictionary_after_updating_super_classes(self):
class TestWord(Word):
RDFS_SUBCLASSOF_LIST = [ 'http://www.example.com#Test' ]
dictionary = TestWord.get_semantic_dictionary()
self.assertEqual(TestWord.SUBCLASS_OF in dictionary[TestWord.CLASS_KEY].keys(), True)
self.assertEqual(len(dictionary[TestWord.CLASS_KEY][TestWord.SUBCLASS_OF]), 2)
def test_repr(self):
word = Word()
#print(word)
if __name__ == "__main__":
unittest.main()
Index: svgscripts/datatypes/style.py
===================================================================
--- svgscripts/datatypes/style.py (revision 91)
+++ svgscripts/datatypes/style.py (revision 92)
@@ -1,161 +1,193 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
""" This class can be used to represent the style of a word.
"""
# Copyright (C) University of Basel 2019 {{{1
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see 1}}}
__author__ = "Christian Steiner"
__maintainer__ = __author__
__copyright__ = 'University of Basel'
__email__ = "christian.steiner@unibas.ch"
__status__ = "Development"
__license__ = "GPL v3"
__version__ = "0.0.1"
import copy
from lxml import etree as ET
import re
import sys
from .color import Color
sys.path.append('py2ttl')
from class_spec import SemanticClass
class Style(SemanticClass):
"""
This class represents the style of a word.
Args:
manuscript: a ArchivalManuscriptUnity
"""
NIETSCHES_FONTS = { 'german': 'deutsche Schreibschrift', 'latin': 'lateinische Schreibschrift' }
COLOR_KEYS = [ 'black', 'red', 'blue', 'green', 'grey' ]
RELEVANT_STYLE_KEYS = [ 'font-family', 'fill', 'stroke' ]
+ ADDITIONAL_STYLE_KEYS = [ 'font-size' ]
WRITING_INSTRUMENTS = { (COLOR_KEYS[0], False): 'schwarze Tinte',\
(COLOR_KEYS[0], True): 'Bleistift',\
(COLOR_KEYS[4], True): 'Bleistift',\
(COLOR_KEYS[1], False): 'braune Tinte',\
(COLOR_KEYS[1], True): 'Rotstift',\
(COLOR_KEYS[2], False): 'violette Tinte',\
(COLOR_KEYS[2], True): 'Blaustift',\
(COLOR_KEYS[3], False): '„Tinte der letzten Korrektur“'}
- def __init__(self, manuscript=None, writing_process_id=-1):
+ def __init__(self, manuscript=None, writing_process_id=-1, extended_styles=False, deleted=False):
self.color = Color.create_cls(manuscript=manuscript)
+ self.css_styles = []
+ self.deleted = deleted
+ self.is_german = True
self.font = self.NIETSCHES_FONTS['german']
self.font_family = 'Weidemann-Book'
+ self.font_size = None
self.manuscript = manuscript
self.relevant_key_map = {}
- for key in self.RELEVANT_STYLE_KEYS:
+ relevant_style_keys = self.RELEVANT_STYLE_KEYS + self.ADDITIONAL_STYLE_KEYS\
+ if extended_styles else self.RELEVANT_STYLE_KEYS
+ for key in relevant_style_keys:
if not key.startswith('font'):
self.relevant_key_map.update({key: self.set_color})
elif key == 'font-family':
self.relevant_key_map.update({key: self.set_font})
+ elif key == 'font-size':
+ self.relevant_key_map.update({key: self.set_size})
self.writing_instrument = self.WRITING_INSTRUMENTS[(self.color.name, False)]
self.writing_process_id = writing_process_id
def create_a_copy_wo_writing_process_id(self):
new_self = copy.deepcopy(self)
new_self.writing_process_id = -1
return new_self
def create_a_copy(self, reduce_writing_process_id=False):
writing_process_id = self.writing_process_id\
if not reduce_writing_process_id\
else self.writing_process_id-1
copy = Style(manuscript=self.manuscript, writing_process_id=writing_process_id)
copy.color = self.color
copy.font_family = self.font_family
copy.process_style_classes()
if copy.manuscript is not None:
copy.manuscript.update_styles(copy)
return copy
+ def create_css_styles(self):
+ """Create css styles.
+ """
+ if self.deleted:
+ self.css_styles.append('text-decoration: line-through;')
+ if self.font_family.endswith('Bold'):
+ self.css_styles.append(f'font-weight: bold;')
+ if self.font_size is not None:
+ self.css_styles.append(f'font-size: {self.font_size};')
+ self.css_styles.append(f'color: {self.color.hex_color};')
+
@classmethod
- def create_cls(cls, page, style_string, manuscript=None):
+ def create_cls(cls, page, style_string, manuscript=None, create_css=False, deleted=False):
"""Creates a Style from a style_string.
:return: (datatypes.style) Style
"""
- style = cls(manuscript=manuscript)
+ style = cls(manuscript=manuscript, extended_styles=create_css, deleted=deleted)
style_dict = { key: key_dict for key, key_dict in page.style_dict.items()\
if any(relevant_key in key_dict.keys() for relevant_key in style.relevant_key_map.keys()) }
for style_key in style_string.split(' '):
if style_key in style_dict.keys():
dictionary = style_dict[style_key]
for key, set_function in [ (key, func) for key, func in style.relevant_key_map.items() if key in dictionary.keys() ]:
if callable(set_function):
set_function(dictionary[key])
style.process_style_classes()
+ if create_css:
+ style.create_css_styles()
return style
@classmethod
def get_semantic_dictionary(cls):
""" Creates a semantic dictionary as specified by SemanticClass.
"""
properties = {}
properties.update(cls.create_semantic_property_dictionary('font', str, cardinality=1,\
name='styleHasFont', label='style has font', comment='Connects a style with the kind of font Nietzsche used in writing.'))
properties.update(cls.create_semantic_property_dictionary('writing_instrument', str, cardinality=1,\
name='styleHasWritingInstrument', label='style has writing instrument', comment='Connects a style with the description of a writing instrument.'))
properties.update(cls.create_semantic_property_dictionary('color', Color, cardinality=1,\
name='styleHasColor', label='style has color', comment='Connects a style with a color.'))
+ properties.update(cls.create_semantic_property_dictionary('css_styles', str, cardinality=1,\
+ subPropertyOf=cls.STOFF_STYLE_HAS_CSS_URL_STRING,cardinality_restriction='minCardinality',\
+ name='styleHasCSS', label='style has css', comment='Connects a style with CSS style.'))
dictionary = { cls.CLASS_KEY: cls.get_class_dictionary(), cls.PROPERTIES_KEY: properties }
return cls.return_dictionary_after_updating_super_classes(dictionary)
-
+
def process_style_classes(self):
"""Infere writing instrument from font-family and color.
"""
if self.font_family.startswith('NewsGothic'):
+ self.is_german = False
self.font = self.NIETSCHES_FONTS['latin']
if self.color.name in self.COLOR_KEYS:
self.writing_instrument = self.WRITING_INSTRUMENTS[(self.color.name, self.font_family.endswith('Bold'))]
def set_color(self, hex_color: str):
self.color = Color.create_cls(hex_color=hex_color, manuscript=self.manuscript)
def set_font(self, font_family: str):
self.font_family = font_family
+ def set_size(self, font_size: str):
+ self.font_size = font_size
+
@classmethod
- def remove_irrelevant_style_keys(cls, style_string, page) -> str:
+ def remove_irrelevant_style_keys(cls, style_string, page, extended_styles=False) -> str:
"""Return a style_string without irrelevant style keys.
"""
+ relevant_style_keys = cls.RELEVANT_STYLE_KEYS + cls.ADDITIONAL_STYLE_KEYS\
+ if extended_styles else cls.RELEVANT_STYLE_KEYS
return ' '.join(sorted( style_key for style_key in style_string.split(' ')\
if len(\
[ key for key in page.style_dict[style_key].keys()\
- if key in cls.RELEVANT_STYLE_KEYS ]\
+ if key in relevant_style_keys ]\
) > 0 ))
def __eq__(self, other):
"""Returns true if self is qualitatively identical to other.
Reason: For qualities, the idea of numerical identity is silly.
"""
if other is None:
return False
return self.color == other.color\
and self.font_family == other.font_family\
- and self.writing_process_id == other.writing_process_id
+ and self.writing_process_id == other.writing_process_id\
+ and self.css_styles == other.css_styles
def __hash__(self):
"""Return a hash value for self.
"""
return hash((self.color.__hash__, self.font_family, self.writing_process_id))
Index: svgscripts/datatypes/word.py
===================================================================
--- svgscripts/datatypes/word.py (revision 91)
+++ svgscripts/datatypes/word.py (revision 92)
@@ -1,798 +1,800 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
""" This class can be used to represent a word.
"""
# Copyright (C) University of Basel 2019 {{{1
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see 1}}}
__author__ = "Christian Steiner"
__maintainer__ = __author__
__copyright__ = 'University of Basel'
__email__ = "christian.steiner@unibas.ch"
__status__ = "Development"
__license__ = "GPL v3"
__version__ = "0.0.1"
import copy
import inspect
from lxml import etree as ET
from operator import attrgetter
import re
import string
import sys
import warnings
from .box import Box
from .matrix import Matrix
from .path import Path
from .simple_word import SimpleWord
from .style import Style
from .word_position import WordPosition
from .transkription_position import TranskriptionPosition
from .writing_process import WritingProcess
SINGLE_PUNCTUATION_PATTERN = r"^[{}–]$".format(string.punctuation)
def execute_function_on_parts(word_parts, func_name):
"""Execute function on parts and add those parts instead of original word to word_parts.
:return: new word_parts, output from func
"""
copy_parts = word_parts[:]
for word in word_parts:
output = eval('word.{0}()'.format(func_name))
if len(word.word_parts) > 0:
for part_word in word.word_parts:
copy_parts.insert(copy_parts.index(word), part_word)
copy_parts.remove(word)
word.word_parts = []
return copy_parts, output
def update_transkription_position_ids(word):
"""Update transkription_position' ids according to index.
"""
word_part_ids = [ wp.id for wp in word.word_parts ]
if len(word_part_ids) != len(set(word_part_ids)):
for id, wp in enumerate(word.word_parts):
wp.id = id
for index, transkription_position in enumerate(sorted(word.transkription_positions, key=attrgetter('left'))):
transkription_position.id = index
transkription_position.has_box = None
transkription_position.deleted = False
class Word(SimpleWord):
"""
This class represents a word.
"""
COPY_PROPERTY_KEY = [ 'line_number', 'deleted', 'writing_process_id' ]
APPEND_PROPERTY2LIST_SOURCE_TARGET_KEYS = { 'style': 'styles' }
DATA = 'debug-data'
XML_TAG = 'word'
XML_EARLIER_VERSION = 'earlier-version'
XML_OVERWRITES = 'overwrites'
XML_CORRECTION_DICT = { 'isClarificationOfWord': 'clarifiesWord',\
'isDeletionOfWord': 'deletesEarlierPart',\
'isExtensionOfWord': 'extendsEarlierVersion',\
'isTransformationOfWord': 'transformsEarlierPart' }
def __init__(self, id=0, text='', line_number=-1, deleted=False, transkription_positions=None, faksimile_positions=None, word_part_objs=None, word_parts=None, writing_process_id=-1, earlier_version=None, box_paths=None, styles=None):
super(Word,self).__init__(id=id, text=text, line_number=line_number, transkription_positions=transkription_positions,\
faksimile_positions=faksimile_positions)
self.corrections = []
self.deleted = deleted
self.debug_container = {}
self.debug_msg = None
self.earlier_version = earlier_version
self.edited_text = None
self.isClarificationOfWord = None
self.isDeletionOfWord = None
self.isExtensionOfWord = None
self.isTransformationOfWord = None
if len(self.text) == 0 and len(''.join([ tp.get_text() for tp in self.transkription_positions if type(tp) == TranskriptionPosition ])) > 0:
self.text = ''.join([ tp.get_text() for tp in self.transkription_positions ])
self.overwrites_word = None
self.styles = styles\
if styles is not None\
else []
self.verified = None
self.writing_process_id = writing_process_id
self.writing_processes = []
self.word_insertion_mark = None
self.word_box = None
self.word_parts = word_parts if word_parts is not None else []
self.word_part_objs = word_part_objs if word_part_objs is not None else []
self.IS_DEBUG_WORD = (self.text == 'Übertreibung' and self.line_number == 8)
def attach_word_to_tree(self, target_tree):
"""Attaches word to tree target_tree.
"""
word_node = super(Word,self).attach_word_to_tree(target_tree)
if self.deleted is not None:
word_node.set('deleted', str(self.deleted).lower())
if self.verified is not None:
word_node.set('verified', str(self.verified).lower())
if self.edited_text is not None:
word_node.set('edited-text', self.edited_text)
if self.writing_process_id > -1:
word_node.set('writing-process-id', str(self.writing_process_id))
for index, word_part in enumerate(self.word_parts):
word_part.id = index
word_part.attach_word_to_tree(word_node)
if self.earlier_version is not None:
earlier_node = ET.SubElement(word_node, self.XML_EARLIER_VERSION)
self.earlier_version.attach_word_to_tree(earlier_node)
if self.overwrites_word is not None\
and len(self.overwrites_word.transkription_positions) > 0:
overwrite_node = ET.SubElement(word_node, self.XML_OVERWRITES)
self.overwrites_word.attach_word_to_tree(overwrite_node)
if self.word_box is not None:
self.word_box.attach_object_to_tree(word_node)
if len(self.corrections) > 0:
word_node.set('corrections', ' '.join(set([ str(word.id) for word in self.corrections ])))
for key in self.XML_CORRECTION_DICT.keys():
if self.__dict__[key] is not None:
word_node.set(self.XML_CORRECTION_DICT[key], 'true')
return word_node
def belongs_to_multiple_writing_processes(self, include_parts=False):
"""Returns true if transkription_positions belong to different WritingProcesses.
"""
if len(self.word_parts) > 0 and include_parts:
return len(set(word.writing_process_id for word in self.word_parts)) > 1
return len(set(tp.writing_process_id for tp in self.transkription_positions )) > 1
def set_parent_word_writing_process_id(self):
"""Set writing_process_id for parent word.
"""
ids = set(word.transkription_positions[0].style for word in self.word_parts\
if len(word.transkription_positions) > 0 and word.transkription_positions[0].style is not None)
if len(ids) > 1:
self.writing_process_id = max([style.writing_process_id for style in ids])
if len(set(word.transkription_positions[0].style.create_a_copy_wo_writing_process_id()\
for word in self.word_parts\
if len(word.transkription_positions) > 0 and word.transkription_positions[0].style is not None))\
> 1:
self.writing_process_id += 1
@classmethod
def create_cls(cls, word_node):
"""Creates a word from a (lxml.Element) node.
[:return:] Word
"""
cls = super(Word,cls).create_cls(word_node)
cls.writing_process_id = int(word_node.get('writing-process-id')) if bool(word_node.get('writing-process-id')) else -1
cls.split_strings = None
cls.join_string = word_node.get('join')
if bool(word_node.get('split')):
cls.split_strings = word_node.get('split').split(' ')
if ''.join(cls.split_strings) != cls.text:
error_msg = 'Error in file {0}: word with id="{1}" has split attributes that do not correspond to its text attribute!\n'.\
format(word_node.getroottree().docinfo.URL, str(cls.id))\
+ 'Split attributes: "{0}".\n'.format(' '.join(cls.split_strings))\
+ 'Text attribute: "{0}".\n'.format(cls.text)
raise Exception(error_msg)
cls.verified = word_node.get('verified') == 'true'\
if bool(word_node.get('verified')) else None
cls.deleted = word_node.get('deleted') == 'true'\
if bool(word_node.get('deleted')) else None
cls.edited_text = word_node.get('edited-text')
cls.word_parts = [ cls.create_cls(node) for node in word_node.xpath('./' + cls.XML_TAG) ]
if bool(word_node.get('corrections')):
for index in [ int(i) for i in word_node.get('corrections').split(' ') ]:
if index < len(cls.word_parts):
cls.corrections.append(cls.word_parts[index])
cls.earlier_version = None
if len(word_node.xpath('./' + cls.XML_EARLIER_VERSION + '/' + cls.XML_TAG)) > 0:
cls.earlier_version = [ cls.create_cls(node) for node in word_node.xpath('./' + cls.XML_EARLIER_VERSION + '/' + cls.XML_TAG) ][0]
for key_value in cls.XML_CORRECTION_DICT.values():
if word_node.get(key_value) == 'true':
cls.__dict__[key_value] = True
if cls.earlier_version is not None:
for word_part in cls.word_parts:
for key in [ key for key, value in cls.XML_CORRECTION_DICT.items() if value.endswith('Part') ]:
if cls.XML_CORRECTION_DICT[key] in word_part.__dict__.keys() and word_part.__dict__[cls.XML_CORRECTION_DICT[key]]\
and len(cls.word_parts) <= len(cls.earlier_version.word_parts):
try:
word_part.__dict__[key] = cls.earlier_version.word_parts[word_part.id]
except Exception:
msg = f'{cls.id} {cls.text}: {word_part.id}'
raise Exception(msg)
for key in [ key for key, value in cls.XML_CORRECTION_DICT.items() if value.endswith('EarlierVersion') ]:
if cls.XML_CORRECTION_DICT[key] in word_part.__dict__.keys() and word_part.__dict__[cls.XML_CORRECTION_DICT[key]]:
word_part.__dict__[key] = cls.earlier_version
for key in [ key for key, value in cls.XML_CORRECTION_DICT.items() if value.endswith('Word') ]:
if cls.XML_CORRECTION_DICT[key] in word_part.__dict__.keys() and word_part.__dict__[cls.XML_CORRECTION_DICT[key]]:
word_part.__dict__[key] = cls
cls.overwrites_word = [ cls.create_cls(node) for node in word_node.xpath('./' + cls.XML_OVERWRITES + '/' + cls.XML_TAG)][0]\
if len(word_node.xpath('./' + cls.XML_OVERWRITES + '/' + cls.XML_TAG)) > 0\
else None
cls.word_box = [ Box(node=node) for node in word_node.xpath('./' + Box.XML_TAG) ][0]\
if len(word_node.xpath('./' + Box.XML_TAG)) > 0\
else None
return cls
def create_earlier_version(self, root_word=None, id=0):
"""Create an earlier version of word.
"""
if root_word is None:
root_word = self
root_word.set_parent_word_writing_process_id()
word_parts = []
non_single_punctuation_word_parts = [ word_part for word_part in self.word_parts\
if not re.match(SINGLE_PUNCTUATION_PATTERN, word_part.text) ]
non_single_punctuation_word_parts_length = len(non_single_punctuation_word_parts)
if non_single_punctuation_word_parts_length > 0\
and len([ word_part for word_part in non_single_punctuation_word_parts\
if word_part.deleted ])\
== non_single_punctuation_word_parts_length:
self.deleted = True
for word_part in non_single_punctuation_word_parts: word_part.deleted = False
for id, word_part in enumerate(self.word_parts):
earlierWordPart = word_part.create_earlier_version(root_word=root_word, id=id)
if word_part.deleted:
word_part.isDeletionOfWord = earlierWordPart
word_parts.append(earlierWordPart)
if word_part not in self.corrections:
self.corrections.append(word_part)
elif word_part.overwrites_word is not None\
and (len(word_part.transkription_positions) > 0\
and word_part.overwrites_word.transkription_positions[0].style is not None\
and word_part.transkription_positions[0].style is not None\
and word_part.transkription_positions[0].style\
!= word_part.overwrites_word.transkription_positions[0].style):
word_part.overwrites_word.id = word_part.id
word_parts.append(word_part.overwrites_word)
word_part.isTransformationOfWord = word_part.overwrites_word
#print(f'transform: {self.text}')
if word_part not in self.corrections:
self.corrections.append(word_part)
elif root_word.writing_process_id > -1\
and (len(word_part.transkription_positions) > 0\
and word_part.transkription_positions[0].style is not None\
and word_part.transkription_positions[0].style.writing_process_id\
== root_word.writing_process_id):
word_part.extendsEarlierVersion = True
#print('extends')
if word_part not in self.corrections:
self.corrections.append(word_part)
else:
if word_part.deleted:
word_part.isDeletionOfWord = earlierWordPart
word_parts.append(earlierWordPart)
if word_part not in self.corrections:
self.corrections.append(word_part)
else:
#print(f'default: {self.text}')
word_parts.append(earlierWordPart)
text = ''.join([ word.text for word in word_parts ])\
if len(word_parts) > 0\
else self.text
if len(word_parts) == 1:
self.transkription_positions += word_parts[0].transkription_positions
self.faksimile_positions += word_parts[0].faksimile_positions
word_parts = []
new_transkription_positions = copy.deepcopy(self.transkription_positions)
if len(self.transkription_positions) > 0\
and self.transkription_positions[0].style is not None:
writing_process_id = self.transkription_positions[0].style.writing_process_id
for new_tp in new_transkription_positions:
new_tp.style.writing_process_id = writing_process_id
return Word(id=id, text=text, transkription_positions=new_transkription_positions,\
faksimile_positions=self.faksimile_positions, line_number=self.line_number,\
word_parts=word_parts)
def create_correction_history(self, page=None, box_style=None):
"""Create correction history.
"""
if self.word_box is not None:
manuscript = self.transkription_positions[0].style.manuscript\
if len(self.transkription_positions) > 0\
and self.transkription_positions[0].style is not None\
else None
style = Style()
if box_style is not None:
style = box_style
if page is not None:
style = Style.create_cls(page, self.word_box.text_style_class, manuscript=manuscript)
for font_key in [ font_key for font_key in self.word_box.text_style_class.split(' ') if font_key in page.fontsizekey2stage_mapping.keys() ]:
style.writing_process_id = page.fontsizekey2stage_mapping.get(font_key)
transkription_positions = TranskriptionPosition.copy_list_of_cls(self.transkription_positions)
for transkription_position in transkription_positions:
transkription_position.style = style
self.overwrites_word = Word(text=self.word_box.earlier_text, transkription_positions=transkription_positions,\
line_number=self.line_number)
for word_part in self.word_parts:
word_part.create_correction_history(page=page, box_style=box_style)
if len(self.word_parts) > 0:
earlier_version = self.create_earlier_version()
extending_words = self._get_parts_with_property_key('extendsEarlierVersion')
if len(extending_words) > 0:
for word in extending_words:
word.isExtensionOfWord = earlier_version
if self.has_mixed_status('deleted', include_parts=True):
self.edited_text = ''.join([ word.text for word in self.word_parts if not word.deleted ])
if len(self.corrections) > 0:
self.earlier_version = earlier_version
@staticmethod
def CREATE_WORD(word_node=None, page=None, word_part_objs=[], id=0, height=0, endX=0, endSign=None, matrix=None, line_number=-1, debug_msg=None):
"""Creates a word from a (lxml.Element) node or word_part_objs.
[:return:] Word
"""
if word_node is not None: # init word from xml node
id = int(word_node.get('id'))
line_number = int(word_node.get('line-number')) if bool(word_node.get('line-number')) else line_number
text = word_node.get('text')
deleted = bool(word_node.get('deleted')) and word_node.get('deleted') == 'true'
transkription_positions = [ TranskriptionPosition(node=node) for node in word_node.findall('.//' + WordPosition.TRANSKRIPTION) ]
faksimile_positions = [ WordPosition(node=node) for node in word_node.findall('.//' + WordPosition.FAKSIMILE) ]
word_part_objs = [ item.attrib for item in word_node.findall('.//' + Word.DATA + '/part')]\
if len(word_node.findall('.//' + Word.DATA)) > 0\
else [ item.attrib for item in word_node.findall('.//part')]
return Word(id=id, text=text, deleted=deleted, line_number=line_number, transkription_positions=transkription_positions,\
faksimile_positions=faksimile_positions, word_part_objs=word_part_objs)
elif len(word_part_objs) > 0: # init word from word_part_obj that has been extracted from svg file
WIDTH = 5
TOPCORRECTION = 2.0
FONTWIDTHFACTOR = 0.7 # factor that multiplies lastCharFontSize
height = height
x = round(float(word_part_objs[0]['x']), 3)
if(page is not None and bool(page.style_dict)):
HEIGHT_FACTOR = 1.1 # factor that multiplies biggest_font_size -> height
style_set = set(' '.join(set( dict['class'] for dict in word_part_objs)).split(' '))
biggest_font_size = page.get_biggest_fontSize4styles(style_set=style_set)
height = round(biggest_font_size * HEIGHT_FACTOR + HEIGHT_FACTOR / biggest_font_size, 3)
TOPCORRECTION = 1 + HEIGHT_FACTOR / biggest_font_size
if endSign is not None and '%' in endSign:
lastCharFontSizeList = [ float(page.style_dict[key]['font-size'].replace('px',''))\
for key in word_part_objs[len(word_part_objs)-1]['class'].split(' ')\
if bool(page.style_dict[key].get('font-size'))]
lastCharFontSize = lastCharFontSizeList[0] if len(lastCharFontSizeList) > 0 else 1
endX = float(endX) + lastCharFontSize * FONTWIDTHFACTOR
elif endSign is not None and '%' in endSign:
endX = float(endX) + WIDTH
bottom = round(float(word_part_objs[0]['y']), 3)
y = round(bottom - height + TOPCORRECTION, 3)
width = round(float(endX) - x, 3)
transkription_positions = [ WordPosition(height=height, width=width, x=x, y=y, matrix=matrix, tag=WordPosition.TRANSKRIPTION) ]
text = ''.join([ dict['text'] for dict in word_part_objs])
line_number = page.get_line_number( (y + bottom)/2) if page is not None else line_number
word = Word(id=id, text=text, line_number=line_number, transkription_positions=transkription_positions, word_part_objs=word_part_objs)
word.debug_msg = debug_msg
return word
else:
error_msg = 'word_node has not been defined' if (word_node is None) else 'word_part_objs is empty'
raise Exception('Error: {}'.format(error_msg))
@classmethod
def get_semantic_dictionary(cls):
""" Creates and returns a semantic dictionary as specified by SemanticClass.
"""
dictionary = super(Word,cls).get_semantic_dictionary()
dictionary[cls.PROPERTIES_KEY].update(cls.create_semantic_property_dictionary('styles', Style,\
cardinality=1, cardinality_restriction='minCardinality',\
name='wordHasStyle', label='word has style', comment='Word has an appearance that is characterized by this style.'))
dictionary[cls.PROPERTIES_KEY].update(cls.create_semantic_property_dictionary('corrections', Word,\
name='wordHasCorrection', label='word has corrections', comment='Word has a correction made by the author.'))
dictionary[cls.PROPERTIES_KEY].update(cls.create_semantic_property_dictionary('deleted', bool,\
name='isWordDeleted', label='has word been deleted', comment='Word has been deleted by the author.'))
dictionary[cls.PROPERTIES_KEY].update(cls.create_semantic_property_dictionary('earlier_version', Word,\
name='wordHasEarlierVersion', label='word has an earlier version', comment='There is a earlier version of this word.'))
dictionary[cls.PROPERTIES_KEY].update(cls.create_semantic_property_dictionary('edited_text', str,\
name='hasEditedText', label='word has an edited text', comment='Word has a text that is edited automatically by removing deleted parts or hyphens.'))
dictionary[cls.PROPERTIES_KEY].update(cls.create_semantic_property_dictionary('isClarificationOfWord', Word,\
name='isClarificationOfWord', label='word is a clarification of word',\
comment='The author has used this part of the word in order to clarify the appearance of that word.'))
dictionary[cls.PROPERTIES_KEY].update(cls.create_semantic_property_dictionary('isDeletionOfWord', Word,\
name='isDeletionOfWord', label='word is a deletion of word',\
comment='The author has used this part of a word in order to delete the corresponding part of an earlier version of this word.'))
dictionary[cls.PROPERTIES_KEY].update(cls.create_semantic_property_dictionary('isExtensionOfWord', Word,\
name='isExtensionOfWord', label='word is a extension of word',\
comment='The author has used this part of a word in order to extend an earlier version of this word.'))
dictionary[cls.PROPERTIES_KEY].update(cls.create_semantic_property_dictionary('isTransformationOfWord', Word,\
name='isTransformationOfWord', label='word is a transformation of word',\
comment='The author has used this part of a word in order to transform the corresponding part of an earlier version of this word.'))
dictionary[cls.PROPERTIES_KEY].update(cls.create_semantic_property_dictionary('overwrites_word', Word,\
name='overwritesWord', label='word overwrites word',\
comment='The author has used this word in order to overwrite that word.'))
+ # This makes wordHasWordParts a subproperty of cls.HAS_HOMOTYPIC_PARTS_URL_STRING,
+ # cls.return_dictionary_after_updating_super_classes will subclass Word under the corresponding super class.
dictionary[cls.PROPERTIES_KEY].update(cls.create_semantic_property_dictionary('word_parts', list,\
name='wordHasWordParts', label='word has word parts', comment='Word consists of a list of words.',\
subPropertyOf=cls.HAS_HOMOTYPIC_PARTS_URL_STRING))
super_property_dictionary = cls.create_semantic_property_dictionary(cls.SUPER_PROPERTY, Word,\
name='isCorrectionOfWord', label='word is a correction of word',\
comment='The author has used this word in order to correct that word.')
for key in cls.XML_CORRECTION_DICT.keys():
correction_dict = dictionary[cls.PROPERTIES_KEY].get(key)
correction_dict.update(super_property_dictionary)
dictionary[cls.PROPERTIES_KEY].update({key: correction_dict})
return cls.return_dictionary_after_updating_super_classes(dictionary)
def has_mixed_status(self, property_key, include_parts=False, concerns_word=True):
"""Returns true if transkription_positions have mixed status concerning the property_key in their __dict__.
"""
if False in set(property_key in tp.__dict__.keys() for tp in self.transkription_positions):
return False
if len(self.word_parts) > 0 and include_parts:
if concerns_word:
if False in set(property_key in word.__dict__.keys() for word in self.word_parts):
return False
return len(set(word.__dict__[property_key] for word in self.word_parts)) > 1
else:
return len(set(word.transkription_positions[0].__dict__[property_key] for word in self.word_parts\
if len(word.transkription_positions) > 0 and property_key in word.transkription_positions[0].__dict__.keys())) > 1
return len(set(tp.__dict__[property_key] for tp in self.transkription_positions )) > 1
def init_word(self, page):
"""Initialize word with objects from page.
"""
super(Word,self).init_word(page)
if self.writing_process_id > -1:
self.writing_processes += [ wp for wp in page.writing_processes if wp.id == self.writing_process_id ]
writing_processes = self.writing_processes
for word_part in self.word_parts:
word_part.init_word(page)
self.lines += word_part.lines
self.writing_processes += word_part.writing_processes
self.lines = [ line for line in set(self.lines) ]
self.writing_processes = [ wp for wp in set(self.writing_processes)]
if self.overwrites_word is not None:
self.overwrites_word.init_word(page)
if self.earlier_version is not None:
if self.earlier_version.writing_process_id == -1:
self.earlier_version.writing_process_id = self.writing_process_id-1
if self.earlier_version.line_number == -1:
self.earlier_version.line_number = self.line_number
self.earlier_version.init_word(page)
def join(self, other_word, append_at_end_of_new_word=True):
"""Joins other_word with this word by changing the text of current word and adding other_word.transkription_positions.
"""
if append_at_end_of_new_word:
self.text = self.text + other_word.text
for position in other_word.transkription_positions:
position.id = str(len(self.transkription_positions))
self.transkription_positions.append(position)
else:
self.text = other_word.text + self.text
index = 0
for position in other_word.transkription_positions:
self.transkription_positions.insert(index, position)
index += 1
while index < len(self.transkription_positions):
self.transkription_positions[index].id = str(index)
index += 1
self.simplify_transkription_positions()
def partition_according_to_deletion(self):
"""Partition a word according to its transkription_positions' deletion status
->split word and add partial words as its parts.
"""
if self.has_mixed_status('deleted'):
transkription_positions = []
last_status = None
for transkription_position in self.transkription_positions:
if transkription_position.deleted != last_status\
and len(transkription_positions) > 0:
newWord = Word(id=len(self.word_parts), line_number=self.line_number,\
transkription_positions=transkription_positions, deleted=last_status, writing_process_id=self.writing_process_id)
self.word_parts.append(newWord)
transkription_positions = []
transkription_positions.append(transkription_position)
last_status = transkription_position.deleted
if len(transkription_positions) > 0:
newWord = Word(id=len(self.word_parts), line_number=self.line_number,\
transkription_positions=transkription_positions, deleted=last_status, writing_process_id=self.writing_process_id)
self.word_parts.append(newWord)
self.transkription_positions = []
self.line_number = -1
self.deleted = False
elif len(self.word_parts) > 0:
self.word_parts, none = execute_function_on_parts(self.word_parts, 'partition_according_to_deletion')
elif not self.deleted\
and len(self.transkription_positions) > 0\
and self.transkription_positions[0].deleted:
self.deleted = True
def partition_according_to_writing_process_id(self):
"""Partition a word according to its transkription_positions' writing_process_ids
->split word and add partial words as its parts.
"""
if self.belongs_to_multiple_writing_processes():
last_writing_process_id = -1
transkription_positions = []
for transkription_position in self.transkription_positions:
if transkription_position.writing_process_id != last_writing_process_id\
and len(transkription_positions) > 0:
newWord = Word(id=len(self.word_parts), line_number=self.line_number,\
transkription_positions=transkription_positions, writing_process_id=last_writing_process_id)
self.word_parts.append(newWord)
transkription_positions = []
transkription_positions.append(transkription_position)
last_writing_process_id = transkription_position.writing_process_id
if len(transkription_positions) > 0:
newWord = Word(id=len(self.word_parts), line_number=self.line_number,\
transkription_positions=transkription_positions, writing_process_id=last_writing_process_id)
self.word_parts.append(newWord)
self.transkription_positions = []
elif len(self.word_parts) > 0:
self.word_parts, none = execute_function_on_parts(self.word_parts, 'partition_according_to_writing_process_id')
if self.belongs_to_multiple_writing_processes(include_parts=True):
self.writing_process_id = sorted(set([ word.writing_process_id for word in self.word_parts ]), reverse=True)[0]
elif len(self.transkription_positions) > 0:
self.writing_process_id = self.transkription_positions[0].writing_process_id
def process_boxes(self, box_paths, tr_xmin=0.0, tr_ymin=0.0, previous_word_has_box=False):
"""Determines whether word is over a word box.
"""
word_over_box = None
if len(self.word_parts) > 0:
for word in self.word_parts:
current_word = word.process_boxes(box_paths, tr_xmin=tr_xmin, tr_ymin=tr_ymin, previous_word_has_box=(word_over_box is not None))
if current_word is not None and current_word.word_box is not None:
word_over_box = current_word
else:
new_tp_dict = {}
for index, transkription_position in enumerate(self.transkription_positions):
if previous_word_has_box and index == 0:
if len(transkription_position.positional_word_parts) > 0:
transkription_position.positional_word_parts[0].left += transkription_position.positional_word_parts[0].width/2
#print(f'{self.text}: {transkription_position.positional_word_parts[0].left}')
else:
transkription_position.left += 1
word_path = Path.create_path_from_transkription_position(transkription_position,\
tr_xmin=tr_xmin, tr_ymin=tr_ymin)
containing_boxes = [ box_path for box_path in box_paths\
if word_path.is_partially_contained_by(box_path)\
or box_path.do_paths_intersect(word_path) ]
if len(containing_boxes) > 0:
if previous_word_has_box:
print(f'{self.text}: {word_path.path.bbox()} {containing_boxes[0].path.bbox()}')
self._set_box_to_transkription_position(containing_boxes[0], word_path,\
transkription_position, new_tp_dict, tr_xmin)
box_paths.remove(containing_boxes[0])
for replace_tp in new_tp_dict.keys():
for tp in new_tp_dict.get(replace_tp):
self.transkription_positions.insert(self.transkription_positions.index(replace_tp), tp)
self.transkription_positions.remove(replace_tp)
word_over_box = self._get_partial_word_over_box()
update_transkription_position_ids(self)
return word_over_box
def set_word_insertion_mark(self, word_insertion_mark):
"""Sets word_insertion_mark
"""
self.word_insertion_mark = word_insertion_mark
def set_writing_process_id_to_transkription_positions(self, page):
"""Determines the writing process id of the transkription_positions.
"""
for transkription_position in self.transkription_positions:
if len(transkription_position.positional_word_parts) > 0:
for font_key in transkription_position.positional_word_parts[0].style_class.split(' '):
if font_key in page.fontsizekey2stage_mapping.keys():
transkription_position.writing_process_id = page.fontsizekey2stage_mapping.get(font_key)
def simplify_transkription_positions(self):
"""Merge transkription_positions if possible.
"""
index = len(self.transkription_positions)-1
while index > 0\
and False not in [ 'positional_word_parts' in tp.__dict__.keys() for tp in self.transkription_positions ]:
current_tp = self.transkription_positions[index]
index -= 1
previous_tp = self.transkription_positions[index]
if previous_tp.is_mergebale_with(current_tp):
positional_word_parts = previous_tp.positional_word_parts
positional_word_parts += current_tp.positional_word_parts
transkription_positions = TranskriptionPosition.CREATE_TRANSKRIPTION_POSITION_LIST_FROM_PWPS(\
positional_word_parts, debug_msg_string='simplifying transkription positions', transkription_position_id=previous_tp.id)
if len(transkription_positions) == 1:
transkription_positions[0].writing_process_id = previous_tp.writing_process_id\
if previous_tp.writing_process_id != -1\
else current_tp.writing_process_id
self.transkription_positions.pop(index+1)
self.transkription_positions[index] = transkription_positions[0]
#print(self.text, len(self.transkription_positions))
def split(self, split_string, start_id=0):
"""Splits the word and returns an 3-tuple of new words.
"""
previousString, currentString, nextString = self.text.partition(split_string)
currentWord = None
previousWord = None
nextWord = None
previousIndex = 0
current_id = start_id
all_positional_word_parts = []
for position in self.transkription_positions:
all_positional_word_parts += position.positional_word_parts
if len(all_positional_word_parts) == 0:
warnings.warn('ATTENTION: Word: {} {} with Strings "{}, {}, {}": there are no parts!'.format(self.id, self.text, previousString, currentString, nextString))
if len(previousString) > 0:
previous_pwps = []
while previousIndex < len(all_positional_word_parts) and previousString != ''.join([ pwp.text for pwp in previous_pwps ]):
previous_pwps.append(all_positional_word_parts[previousIndex])
previousIndex += 1
if previousString != ''.join([ pwp.text for pwp in previous_pwps ]):
warnings.warn('ATTENTION: "{}" does not match a word_part_obj!'.format(previousString))
else:
previous_transkription_positions = TranskriptionPosition.CREATE_TRANSKRIPTION_POSITION_LIST_FROM_PWPS(previous_pwps, debug_msg_string='word.split')
previous_text = ''.join([ pwp.text for pwp in previous_pwps ])
previousWord = Word(text=previous_text, id=current_id, line_number=self.line_number, transkription_positions=previous_transkription_positions)
current_id += 1
all_positional_word_parts = all_positional_word_parts[previousIndex:]
if len(nextString) > 0:
tmp_pwps = []
index = 0
while index < len(all_positional_word_parts) and currentString != ''.join([ pwp.text for pwp in tmp_pwps ]):
tmp_pwps.append(all_positional_word_parts[index])
index += 1
if currentString != ''.join([ pwp.text for pwp in tmp_pwps ]):
warnings.warn('ATTENTION: "{}" does not match a word_part_obj!'.format(currentString))
else:
next_pwps = all_positional_word_parts[index:]
next_transkription_positions = TranskriptionPosition.CREATE_TRANSKRIPTION_POSITION_LIST_FROM_PWPS(next_pwps, debug_msg_string='word.split')
next_text = ''.join([ pwp.text for pwp in next_pwps ])
nextWord = Word(text=next_text, id=current_id+1, line_number=self.line_number, transkription_positions=next_transkription_positions)
all_positional_word_parts = all_positional_word_parts[:index]
current_transkription_positions = TranskriptionPosition.CREATE_TRANSKRIPTION_POSITION_LIST_FROM_PWPS(all_positional_word_parts, debug_msg_string='word.split')
current_text = ''.join([ pwp.text for pwp in all_positional_word_parts ])
currentWord = Word(text=current_text, id=current_id, line_number=self.line_number, transkription_positions=current_transkription_positions)
return previousWord, currentWord, nextWord
def split_according_to_status(self, status, splits_are_parts=False):
"""Split a word according to its transkription_positions' text.
:return: a list of new word.Word
"""
new_words = []
if self.has_mixed_status(status):
last_status = None
transkription_positions = []
for transkription_position in self.transkription_positions:
if transkription_position.__dict__[status] != last_status\
and len(transkription_positions) > 0:
new_words.append(\
self._create_new_word(transkription_positions, status, new_id=self.id+len(new_words)))
transkription_positions = []
transkription_positions.append(transkription_position)
last_status = transkription_position.__dict__[status]
if len(transkription_positions) > 0:
new_words.append(\
self._create_new_word(transkription_positions, status, new_id=self.id+len(new_words)))
if splits_are_parts:
self.word_parts += new_words
if len(self.word_parts) > 0:
self.transkription_positions = []
return new_words
def undo_partitioning(self):
"""Undo partitioning.
"""
if len(self.word_parts) > 0:
for word_part in self.word_parts:
word_part.undo_partitioning()
if self.text != ''.join([ tp.get_text() for tp in self.transkription_positions ]):
self.transkription_positions += word_part.transkription_positions
self.earlier_version = None
self.edited_text = None
self.word_box = None
self.word_parts = []
self.corrections = []
self.earlier_versions = []
self.box_paths = []
def _create_new_word(self, transkription_positions, status, new_id=0):
"""Create a new word from self and transkription_positions.
"""
newWord = Word(id=new_id, transkription_positions=transkription_positions)
for key in self.COPY_PROPERTY_KEY:
if key != status and key in self.__dict__.keys():
newWord.__dict__[key] = self.__dict__[key]
if status in self.APPEND_PROPERTY2LIST_SOURCE_TARGET_KEYS.keys():
newWord.__dict__[self.APPEND_PROPERTY2LIST_SOURCE_TARGET_KEYS[status]].append(transkription_positions[0].__dict__[status])
else:
newWord.__dict__[status] = transkription_positions[0].__dict__[status]
return newWord
def _get_parts_with_property_key(self, property_key):
"""Return a list of word_parts with property == property_key.
"""
word_parts = []
for word_part in self.word_parts:
if property_key in word_part.__dict__.keys():
word_parts.append(word_part)
else:
word_parts += word_part._get_parts_with_property_key(property_key)
return word_parts
def _get_partial_word_over_box(self):
"""Partition a word according to its transkription_positions' has_box
->split word and add partial words as its parts.
:return: word over box or self
"""
word_over_box = None
if self.has_mixed_status('has_box'):
transkription_positions = []
last_word_box = None
for transkription_position in self.transkription_positions:
if transkription_position.has_box != last_word_box\
and len(transkription_positions) > 0:
newWord = Word(id=len(self.word_parts), line_number=self.line_number,\
transkription_positions=transkription_positions, deleted=self.deleted, writing_process_id=self.writing_process_id)
self.word_parts.append(newWord)
if last_word_box is not None:
word_over_box = newWord
word_over_box.word_box = last_word_box
transkription_positions = []
transkription_positions.append(transkription_position)
last_word_box = transkription_position.has_box
if len(transkription_positions) > 0:
newWord = Word(id=len(self.word_parts), line_number=self.line_number,\
transkription_positions=transkription_positions, deleted=self.deleted, writing_process_id=self.writing_process_id)
self.word_parts.append(newWord)
if last_word_box is not None:
word_over_box = newWord
word_over_box.word_box = last_word_box
self.transkription_positions = []
elif len(self.word_parts) > 0:
#self.word_parts, word_over_box = execute_function_on_parts(self.word_parts, inspect.currentframe().f_code.co_name) #'get_partial_word_over_box')
for word_part in self.word_parts:
if word_over_box is None:
word_over_box = word_part._get_partial_word_over_box()
else:
break
elif len([ tp for tp in self.transkription_positions if tp.has_box is not None]) == 1:
word_over_box = self
word_over_box.word_box = [ tp for tp in self.transkription_positions if tp.has_box is not None][0].has_box
return word_over_box
def _set_box_to_transkription_position(self, box_path, word_path, transkription_position, new_transkription_positions_dictionary, tr_xmin):
"""Set box_path to transkription_position that is contained by box_path.
Create new transkription_positions by splitting old ones if necessaryand add them to new_transkription_positions_dictionary.
"""
if box_path.contains_path(word_path):
transkription_position.has_box = box_path
elif box_path.contains_start_of_path(word_path):
split_position = box_path.path.bbox()[1] - tr_xmin
new_tps = transkription_position.split(split_position)
if len(new_tps) == 2:
new_tps[0].has_box = box_path
new_transkription_positions_dictionary.update({ transkription_position: new_tps })
else:
transkription_position.has_box = box_path
elif box_path.contains_end_of_path(word_path):
split_position = box_path.path.bbox()[0] - tr_xmin
new_tps = transkription_position.split(split_position)
if len(new_tps) == 2:
new_tps[1].has_box = box_path
new_transkription_positions_dictionary.update({ transkription_position: new_tps })
else:
transkription_position.has_box = box_path
else: # box_path in the middle of word_pathz
split_position1 = box_path.path.bbox()[0] - tr_xmin
split_position2 = box_path.path.bbox()[1] - tr_xmin
new_tps = transkription_position.split(split_position1, split_position2)
if len(new_tps) >= 2:
new_tps[1].has_box = box_path
new_transkription_positions_dictionary.update({ transkription_position: new_tps })
else:
transkription_position.has_box = box_path
Index: svgscripts/datatypes/page.py
===================================================================
--- svgscripts/datatypes/page.py (revision 91)
+++ svgscripts/datatypes/page.py (revision 92)
@@ -1,273 +1,280 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
""" This class can be used to represent a page.
"""
# Copyright (C) University of Basel 2019 {{{1
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see 1}}}
__author__ = "Christian Steiner"
__maintainer__ = __author__
__copyright__ = 'University of Basel'
__email__ = "christian.steiner@unibas.ch"
__status__ = "Development"
__license__ = "GPL v3"
__version__ = "0.0.1"
from lxml import etree as ET
from os.path import isfile
from progress.bar import Bar
from svgpathtools import svg2paths2, svg_to_paths
from svgpathtools.parser import parse_path
import sys
import warnings
from .box import Box
from .color import Color
from .image import Image, SVGImage
from .faksimile_image import FaksimileImage
from .faksimile_position import FaksimilePosition
from .lineNumber import LineNumber
from .line import Line
from .mark_foreign_hands import MarkForeignHands
from .matrix import Matrix
from .path import Path
from .positional_word_part import PositionalWordPart
from .super_page import SuperPage
from .style import Style
from .text_connection_mark import TextConnectionMark
from .text_field import TextField
from .transkriptionField import TranskriptionField
from .writing_process import WritingProcess
from .word import Word
from .word_insertion_mark import WordInsertionMark
sys.path.append('py2ttl')
from class_spec import SemanticClass
FILE_TYPE_SVG_WORD_POSITION = SuperPage.FILE_TYPE_SVG_WORD_POSITION
FILE_TYPE_XML_MANUSCRIPT = SuperPage.FILE_TYPE_XML_MANUSCRIPT
STATUS_MERGED_OK = SuperPage.STATUS_MERGED_OK
STATUS_POSTMERGED_OK = SuperPage.STATUS_POSTMERGED_OK
class Page(SemanticClass,SuperPage):
"""
This class represents a page.
Args:
xml_source_file (str): name of the xml file to be instantiated.
xml_target_file (str): name of the xml file to which page info will be written.
"""
UNITTESTING = False
def __init__(self, xml_source_file, faksimile_image=None, faksimile_svgFile=None):
super(Page,self).__init__(xml_source_file)
self.update_property_dictionary('faksimile_image', faksimile_image)
self.update_property_dictionary('faksimile_svgFile', faksimile_svgFile)
self.init_all_properties()
self.add_style(style_node=self.page_tree.getroot().find('.//style'))
self.init_node_objects()
@classmethod
def get_pages_from_xml_file(cls, xml_file, status_contains='', status_not_contain='', word_selection_function=None):
"""Returns a list of Page instantiating a xml_file of type FILE_TYPE_SVG_WORD_POSITION
or xml_files contained in xml_file of type FILE_TYPE_XML_MANUSCRIPT.
[optional: instantiation depends on the fulfilment of a status_contains
and/or on the selection of some words by a word_selection_function].
"""
source_tree = ET.parse(xml_file)
if source_tree.getroot().find('metadata/type').text == FILE_TYPE_SVG_WORD_POSITION:
page = cls(xml_file)
if word_selection_function is None or len(word_selection_function(page.words)) > 0:
return [ page ]
else:
return []
elif source_tree.getroot().find('metadata/type').text == FILE_TYPE_XML_MANUSCRIPT:
pages = []
xpath = '//page/@output'
if status_contains != '' and status_not_contain != '':
xpath = '//page[contains(@status, "{0}") and not(contains(@status, "{1}"))]/@output'.format(status_contains, status_not_contain)
elif status_contains != '':
xpath = '//page[contains(@status, "{0}")]/@output'.format(status_contains)
elif status_not_contain != '':
xpath = '//page[not(contains(@status, "{0}"))]/@output'.format(status_not_contain)
for xml_source_file in source_tree.xpath(xpath):
if isfile(xml_source_file):
pages += cls.get_pages_from_xml_file(xml_source_file, word_selection_function=word_selection_function)
return pages
else:
return []
@classmethod
def get_semantic_dictionary(cls):
""" Creates a semantic dictionary as specified by SemanticClass.
"""
dictionary = {}
class_dict = cls.get_class_dictionary()
properties = { 'number': { 'class': str, 'cardinality': 1},\
'faksimile_image': { 'class': FaksimileImage, 'cardinality': 1},\
'orientation': { 'class': str, 'cardinality': 1},\
'svg_image': { 'class': SVGImage, 'cardinality': 1}}
properties.update(cls.create_semantic_property_dictionary('text_field', TextField,\
cardinality=1, name='pageIsOnTextField', label='page is on text field',\
comment='Relates a page to the text field on a faksimile image.'))
for key in [ 'lines', 'words', 'writing_processes', 'word_deletion_paths', 'word_insertion_marks']:
properties.update(cls.create_semantic_property_dictionary(key, list))
dictionary.update({cls.CLASS_KEY: class_dict})
dictionary.update({cls.PROPERTIES_KEY: properties})
return cls.return_dictionary_after_updating_super_classes(dictionary)
def init_node_objects(self):
"""Initialize all node objects.
"""
self.word_insertion_marks = [ WordInsertionMark(wim_node=wim_node) for wim_node in self.page_tree.getroot().xpath('//' + WordInsertionMark.XML_TAG) ]
self.words = [ Word.create_cls(word_node) for word_node in self.page_tree.getroot().xpath('./word') ]
self.mark_foreign_hands = [ MarkForeignHands.create_cls(node) for node in self.page_tree.getroot().xpath('//' + MarkForeignHands.XML_TAG) ]
self.text_connection_marks = [ TextConnectionMark.create_cls(node) for node in self.page_tree.getroot().xpath('//' + TextConnectionMark.XML_TAG) ]
self.line_numbers = [ LineNumber(xml_text_node=line_number_node) for line_number_node in self.page_tree.getroot().xpath('//' + LineNumber.XML_TAG) ]
self.lines = [ Line.create_cls_from_node(node=line_number_node) for line_number_node in self.page_tree.getroot().xpath('//' + LineNumber.XML_TAG) ]
self.writing_processes = [ WritingProcess.create_writing_process_from_xml(node, self.words) for node in self.page_tree.xpath('//' + WritingProcess.XML_TAG) ]
self.word_deletion_paths = [ Path(node=node) for node in self.page_tree.xpath('//' + Path.WORD_DELETION_PATH_TAG) ]
if self.faksimile_image is not None and self.text_field is not None:
for simple_word in self.words + self.mark_foreign_hands + self.text_connection_marks:
simple_word.init_word(self)
for wim in self.word_insertion_marks:
if wim.line_number > -1:
wim.line = [ line for line in self.lines if line.id == wim.line_number ][0]
def update_and_attach_words2tree(self, update_function_on_word=None, include_special_words_of_type=[]):
"""Update word ids and attach them to page.page_tree.
"""
if not self.is_locked():
update_function_on_word = [ update_function_on_word ]\
if type(update_function_on_word) != list\
else update_function_on_word
for node in self.page_tree.xpath('.//word|.//' + MarkForeignHands.XML_TAG + '|.//' + TextConnectionMark.XML_TAG):
node.getparent().remove(node)
for index, word in enumerate(self.words):
word.id = index
for func in update_function_on_word:
if callable(func):
func(word)
word.attach_word_to_tree(self.page_tree)
for index, mark_foreign_hands in enumerate(self.mark_foreign_hands):
mark_foreign_hands.id = index
if MarkForeignHands in include_special_words_of_type:
for func in update_function_on_word:
if callable(update_function_on_word):
func(mark_foreign_hands)
mark_foreign_hands.attach_word_to_tree(self.page_tree)
for index, text_connection_mark in enumerate(self.text_connection_marks):
text_connection_mark.id = index
if TextConnectionMark in include_special_words_of_type:
for func in update_function_on_word:
if callable(update_function_on_word):
func(text_connection_mark)
text_connection_mark.attach_word_to_tree(self.page_tree)
else:
print('locked')
def update_data_source(self, faksimile_svgFile=None, xml_correction_file=None):
"""Update the data source of page.
"""
if faksimile_svgFile is not None:
self.faksimile_svgFile = faksimile_svgFile
data_node = self.page_tree.xpath('.//data-source')[0]\
if len(self.page_tree.xpath('.//data-source')) > 0\
else ET.SubElement(self.page_tree.getroot(), 'data-source')
data_node.set('file', self.faksimile_svgFile)
if xml_correction_file is not None:
data_node.set('xml-corrected-words', xml_correction_file)
def update_line_number_area(self, transkription_field, svg_tree=None):
"""Determines the width of the area where the line numbers are written in the page.source file.
"""
THRESHOLD = 0.4
if svg_tree is None:
svg_tree = ET.parse(self.source)
if len(self.line_numbers) > 1:
line_number = self.line_numbers[9]\
if transkription_field.is_page_verso() and len(self.line_numbers) > 8\
else self.line_numbers[1]
ln_nodes = [ item for item in svg_tree.iterfind('//text', svg_tree.getroot().nsmap)\
if Matrix.IS_NEARX_TRANSKRIPTION_FIELD(item.get('transform'), transkription_field)\
and LineNumber.IS_A_LINE_NUMBER(item)\
and LineNumber(raw_text_node=item, transkription_field=transkription_field).id == line_number.id ]
if len(ln_nodes) > 0:
matrix = Matrix(transform_matrix_string=ln_nodes[0].get('transform'))
if transkription_field.is_page_verso():
transkription_field.add_line_number_area_width(matrix.getX())
elif self.svg_file is not None and isfile(self.svg_file):
svg_path_tree = ET.parse(self.svg_file)
namespaces = { k if k is not None else 'ns': v for k, v in svg_path_tree.getroot().nsmap.items() }
svg_x = matrix.getX()
svg_y = self.line_numbers[1].bottom + transkription_field.ymin
use_nodes = svg_path_tree.xpath('//ns:use[@x>="{0}" and @x<="{1}" and @y>="{2}" and @y<="{3}"]'\
.format(svg_x-THRESHOLD, svg_x+THRESHOLD,svg_y-THRESHOLD, svg_y+THRESHOLD), namespaces=namespaces)
if len(use_nodes) > 0:
symbol_id = use_nodes[0].get('{%s}href' % namespaces['xlink']).replace('#', '')
d_strings = use_nodes[0].xpath('//ns:symbol[@id="{0}"]/ns:path/@d'.format(symbol_id), namespaces=namespaces)
if len(d_strings) > 0 and d_strings[0] != '':
path = parse_path(d_strings[0])
xmin, xmax, ymin, ymax = path.bbox()
width = xmax - xmin
transkription_field.add_line_number_area_width(matrix.getX() + width)
def update_page_type(self, transkription_field=None):
"""Adds a source to page and attaches it to page_tree.
"""
if transkription_field is None:
if self.source is None or not isfile(self.source):
raise FileNotFoundError('Page does not have a source!')
transkription_field = TranskriptionField(self.source)
self.page_type = Page.PAGE_VERSO\
if transkription_field.is_page_verso()\
else Page.PAGE_RECTO
self.page_tree.getroot().set('pageType', self.page_type)
- def update_styles(self, words=None, manuscript=None, add_to_parents=False, partition_according_to_styles=False):
+ def update_styles(self, words=None, manuscript=None, add_to_parents=False, partition_according_to_styles=False, create_css=False):
"""Update styles of words and add them to their transkription_positions.
Args:
add_to_parents: Add styles also to word (and if not None to manuscript).
partition_according_to_styles: Partition word if its transkription_positions have different styles.
"""
style_dictionary = {}
if words is None:
words = self.words
for word in words:
if len(word.word_parts) > 0:
- self.update_styles(words=word.word_parts, manuscript=manuscript,\
+ self.update_styles(words=word.word_parts, manuscript=manuscript, create_css=create_css,\
add_to_parents=add_to_parents, partition_according_to_styles=partition_according_to_styles)
for transkription_position in word.transkription_positions:
if len(transkription_position.positional_word_parts) > 0:
style_class = transkription_position.positional_word_parts[0].style_class
writing_process_id = -1
for font_key in [ font_key for font_key in style_class.split(' ') if font_key in self.fontsizekey2stage_mapping.keys() ]:
writing_process_id = self.fontsizekey2stage_mapping.get(font_key)
style_class_key = (Style.remove_irrelevant_style_keys(style_class, self), writing_process_id)
- if style_dictionary.get(style_class_key) is None:
- style_dictionary[style_class_key] = Style.create_cls(self, style_class_key[0], manuscript=manuscript)
- style_dictionary[style_class_key].writing_process_id = style_class_key[1]
- transkription_position.style = style_dictionary[style_class_key]
- if add_to_parents and transkription_position.style not in word.styles:
- word.styles.append(transkription_position.style)
+ if create_css:
+ if style_dictionary.get((style_class_key, word.deleted)) is None:
+ style_dictionary[(style_class_key, word.deleted)] = Style.create_cls(self, style_class_key[0], manuscript=manuscript,\
+ create_css=create_css, deleted=word.deleted )
+ transkription_position.style = style_dictionary[(style_class_key, word.deleted)]
+ #print(style_dictionary[(style_class_key, word.deleted)])
+ else:
+ if style_dictionary.get(style_class_key) is None:
+ style_dictionary[style_class_key] = Style.create_cls(self, style_class_key[0], manuscript=manuscript, create_css=create_css)
+ style_dictionary[style_class_key].writing_process_id = style_class_key[1]
+ transkription_position.style = style_dictionary[style_class_key]
+ if add_to_parents and transkription_position.style not in word.styles:
+ word.styles.append(transkription_position.style)
if partition_according_to_styles:
word.split_according_to_status('style', splits_are_parts=True)
if manuscript is not None\
and add_to_parents:
manuscript.update_styles(*style_dictionary.values())
Index: svgscripts/datatypes/simple_word.py
===================================================================
--- svgscripts/datatypes/simple_word.py (revision 91)
+++ svgscripts/datatypes/simple_word.py (revision 92)
@@ -1,123 +1,124 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
""" This super class can be used to represent a simple word.
"""
# Copyright (C) University of Basel 2019 {{{1
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see 1}}}
__author__ = "Christian Steiner"
__maintainer__ = __author__
__copyright__ = 'University of Basel'
__email__ = "christian.steiner@unibas.ch"
__status__ = "Development"
__license__ = "GPL v3"
__version__ = "0.0.1"
import abc
from lxml import etree as ET
import sys
from .line import Line
from .faksimile_position import FaksimilePosition
from .transkription_position import TranskriptionPosition
from .word_position import WordPosition
sys.path.append('py2ttl')
from class_spec import SemanticClass
class SimpleWord(SemanticClass, metaclass=abc.ABCMeta):
"""
This class represents a simple word.
"""
XML_TAG = 'simple-word'
XML_SUB_TAG = 'content'
def __init__(self, id=0, line_number=-1, line=None, text='', deleted=False, transkription_positions=None, faksimile_positions=None):
self.id = id
self.text = text
self.line_number = line_number
self.lines = []
if line is not None:
self.lines.append(line)
self.transkription_positions = transkription_positions if transkription_positions is not None else []
self.faksimile_positions = faksimile_positions if faksimile_positions is not None else []
def attach_word_to_tree(self, target_tree):
"""Attaches word to tree target_tree.
"""
if target_tree.__class__.__name__ == '_ElementTree':
target_tree = target_tree.getroot()
if len(target_tree.xpath('.//' + self.XML_TAG + '[@id="%s"]' % self.id)) > 0:
word_node = target_tree.xpath('.//' + self.XML_TAG + '[@id="%s"]' % self.id)[0]
word_node.getparent().remove(word_node)
word_node = ET.SubElement(target_tree, self.XML_TAG, attrib={'id': str(self.id)})
word_node.set('text', self.text)
if self.line_number > -1:
word_node.set('line-number', str(self.line_number))
for transkription_position in self.transkription_positions:
transkription_position.attach_object_to_tree(word_node)
for faksimile_position in self.faksimile_positions:
faksimile_position.attach_object_to_tree(word_node)
return word_node
@classmethod
def create_cls(cls, word_node):
"""Creates a cls from a (lxml.Element) node.
[:return:] cls
"""
if word_node is not None: # init word from xml node
id = int(word_node.get('id'))
line_number = int(word_node.get('line-number')) if bool(word_node.get('line-number')) else -1
text = word_node.get('text')
transkription_positions = [ TranskriptionPosition(node=node) for node in word_node.findall('./' + WordPosition.TRANSKRIPTION) ]
faksimile_positions = [ WordPosition(node=node) for node in word_node.findall('./' + WordPosition.FAKSIMILE) ]
return cls(id=id, text=text, line_number=line_number, transkription_positions=transkription_positions,\
faksimile_positions=faksimile_positions)
else:
error_msg = 'word_node has not been defined'
raise Exception('Error: {}'.format(error_msg))
@classmethod
def get_semantic_dictionary(cls):
""" Creates and returns a semantic dictionary as specified by SemanticClass.
"""
dictionary = {}
class_dict = cls.get_class_dictionary()
properties = { 'lines': {cls.CLASS_KEY: Line,\
cls.CARDINALITY: 1,\
cls.CARDINALITY_RESTRICTION: 'minCardinality',\
cls.PROPERTY_NAME: 'wordBelongsToLine',\
cls.PROPERTY_LABEL: 'word belongs to a line',\
cls.PROPERTY_COMMENT: 'Relating a word to a line.'}}
properties.update(cls.create_semantic_property_dictionary('transkription_positions', TranskriptionPosition,\
name='hasTranskriptionPosition', cardinality=1, cardinality_restriction='minCardinality'))
properties.update(cls.create_semantic_property_dictionary('faksimile_positions', FaksimilePosition,\
name='hasFaksimilePosition')) #, cardinality=1, cardinality_restriction='minCardinality'))
- properties.update(cls.create_semantic_property_dictionary('text', str, cardinality=1))
+ properties.update(cls.create_semantic_property_dictionary('text', str, cardinality=1,\
+ subPropertyOf=cls.HOMOTYPIC_HAS_TEXT_URL_STRING))
dictionary.update({cls.CLASS_KEY: class_dict})
dictionary.update({cls.PROPERTIES_KEY: properties})
return cls.return_dictionary_after_updating_super_classes(dictionary)
def init_word(self, page):
"""Initialize word with objects from page.
"""
for transkription_position in self.transkription_positions:
transkription_position.svg_image = page.svg_image
self.faksimile_positions = FaksimilePosition.create_list_of_cls(self.faksimile_positions, page.faksimile_image, page.text_field)
if self.line_number > -1:
self.lines += [ line for line in page.lines if line.id == self.line_number ]
Index: svgscripts/datatypes/manuscript.py
===================================================================
--- svgscripts/datatypes/manuscript.py (revision 91)
+++ svgscripts/datatypes/manuscript.py (revision 92)
@@ -1,141 +1,142 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
""" This class can be used to represent an archival unity of manuscript pages, i.e. workbooks, notebooks, folders of handwritten pages.
"""
# Copyright (C) University of Basel 2019 {{{1
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see 1}}}
__author__ = "Christian Steiner"
__maintainer__ = __author__
__copyright__ = 'University of Basel'
__email__ = "christian.steiner@unibas.ch"
__status__ = "Development"
__license__ = "GPL v3"
__version__ = "0.0.1"
from lxml import etree as ET
from os.path import isfile
import sys
from .page import Page, FILE_TYPE_XML_MANUSCRIPT, FILE_TYPE_SVG_WORD_POSITION
from .color import Color
sys.path.append('py2ttl')
from class_spec import SemanticClass
sys.path.append('shared_util')
from myxmlwriter import parse_xml_of_type, write_pretty, xml_has_type
class ArchivalManuscriptUnity(SemanticClass):
"""
This class represents an archival unity of manuscript pages (workbooks, notebooks and portfolios of handwritten pages).
@label archival unity of manuscript pages
Args:
title title of archival unity
manuscript_type type of manuscript: 'Arbeitsheft', 'Notizheft', 'Mappe'
manuscript_tree lxml.ElementTree
"""
XML_TAG = 'manuscript'
XML_COLORS_TAG = 'colors'
TYPE_DICTIONARY = { 'Mp': 'Mappe', 'N': 'Notizheft', 'W': 'Arbeitsheft' }
UNITTESTING = False
def __init__(self, title='', manuscript_type='', manuscript_tree=None):
self.colors = []
self.manuscript_tree = manuscript_tree
self.manuscript_type = manuscript_type
self.pages = []
self.styles = []
self.title = title
if self.manuscript_type == '' and self.title != ''\
and self.title.split(' ')[0] in self.TYPE_DICTIONARY.keys():
self.manuscript_type = self.TYPE_DICTIONARY[self.title.split(' ')[0]]
def get_name_and_id(self):
"""Return an identification for object as 2-tuple.
"""
return '', self.title.replace(' ', '_')
@classmethod
def create_cls(cls, xml_manuscript_file, page_status_list=None, page_xpath='', update_page_styles=False):
"""Create an instance of ArchivalManuscriptUnity from a xml file of type FILE_TYPE_XML_MANUSCRIPT.
:return: ArchivalManuscriptUnity
"""
manuscript_tree = parse_xml_of_type(xml_manuscript_file, FILE_TYPE_XML_MANUSCRIPT)
title = manuscript_tree.getroot().get('title') if bool(manuscript_tree.getroot().get('title')) else ''
manuscript_type = manuscript_tree.getroot().get('type') if bool(manuscript_tree.getroot().get('type')) else ''
manuscript = cls(title=title, manuscript_type=manuscript_type, manuscript_tree=manuscript_tree)
manuscript.colors = [ Color.create_cls(node=color_node) for color_node in manuscript_tree.xpath('.//' + cls.XML_COLORS_TAG + '/' + Color.XML_TAG) ]
if page_xpath == '':
page_status = ''
if page_status_list is not None\
and type(page_status_list) is list\
and len(page_status_list) > 0:
page_status = '[' + ' and '.join([ f'contains(@status, "{status}")' for status in page_status_list ]) + ']'
page_xpath = f'//pages/page{page_status}/@output'
manuscript.pages = [ Page(page_source)\
for page_source in manuscript_tree.xpath(page_xpath)\
if isfile(page_source) and xml_has_type(FILE_TYPE_SVG_WORD_POSITION, xml_source_file=page_source) ]
if update_page_styles:
- for page in manuscript.pages: page.update_styles(manuscript=manuscript, add_to_parents=True)
+ for page in manuscript.pages: page.update_styles(manuscript=manuscript, add_to_parents=True, create_css=True)
return manuscript
def get_color(self, hex_color) -> Color:
"""Return color if it exists or None.
"""
if hex_color in [ color.hex_color for color in self.colors ]:
return [ color for color in self.colors if color.hex_color == hex_color ][0]
return None
@classmethod
def get_semantic_dictionary(cls):
""" Creates a semantic dictionary as specified by SemanticClass.
"""
dictionary = {}
class_dict = cls.get_class_dictionary()
properties = {}
properties.update(cls.create_semantic_property_dictionary('title', str, 1))
properties.update(cls.create_semantic_property_dictionary('manuscript_type', str, 1))
properties.update(cls.create_semantic_property_dictionary('styles', list))
properties.update(cls.create_semantic_property_dictionary('pages', list))
dictionary.update({cls.CLASS_KEY: class_dict})
dictionary.update({cls.PROPERTIES_KEY: properties})
return cls.return_dictionary_after_updating_super_classes(dictionary)
def update_colors(self, color):
"""Update manuscript colors if color is not contained.
"""
if self.get_color(color.hex_color) is None:
self.colors.append(color)
if self.manuscript_tree is not None:
if len(self.manuscript_tree.xpath('.//' + self.XML_COLORS_TAG)) > 0:
self.manuscript_tree.xpath('.//' + self.XML_COLORS_TAG)[0].getparent().remove(self.manuscript_tree.xpath('.//' + self.XML_COLORS_TAG)[0])
colors_node = ET.SubElement(self.manuscript_tree.getroot(), self.XML_COLORS_TAG)
for color in self.colors:
color.attach_object_to_tree(colors_node)
if not self.UNITTESTING:
write_pretty(xml_element_tree=self.manuscript_tree, file_name=self.manuscript_tree.docinfo.URL,\
script_name=__file__, backup=True,\
file_type=FILE_TYPE_XML_MANUSCRIPT)
def update_styles(self, *styles):
"""Update manuscript styles.
"""
for style in styles:
if style not in self.styles:
+ #print(style.css_styles)
self.styles.append(style)
Index: tests_svgscripts/test_data/N_VII_1_page006.xml
===================================================================
--- tests_svgscripts/test_data/N_VII_1_page006.xml (revision 91)
+++ tests_svgscripts/test_data/N_VII_1_page006.xml (revision 92)
@@ -1,1276 +1,1276 @@
svgWordPosition
2019-08-02 15:17:37
2019-08-02 15:17:37
2019-08-02 15:30:59
2019-08-02 15:30:59
- 2020-03-27 17:38:19
+ 2020-05-12 10:14:00
Index: tests_svgscripts/test_style.py
===================================================================
--- tests_svgscripts/test_style.py (revision 91)
+++ tests_svgscripts/test_style.py (revision 92)
@@ -1,84 +1,92 @@
import unittest
from os import sep, path
from os.path import dirname, basename, isfile, isdir
import lxml.etree as ET
import sys
sys.path.append('svgscripts')
from datatypes.color import Color
from datatypes.manuscript import ArchivalManuscriptUnity
from datatypes.page import Page
from datatypes.style import Style
class TestStyle(unittest.TestCase):
def setUp(self):
DATADIR = dirname(__file__) + sep + 'test_data'
if not isdir(DATADIR):
DATADIR = dirname(dirname(__file__)) + sep + 'test_data'
self.test_file = DATADIR + sep + 'test.xml'
self.test_svg_file = DATADIR + sep + 'test421.svg'
self.pdf_xml = DATADIR + sep + 'W_I_8_page125.xml'
self.xml_file = DATADIR + sep + 'N_VII_1_page005.xml'
self.xml_fileB = DATADIR + sep + 'N_VII_1_page006.xml'
self.pdf_xml_source = DATADIR + sep + 'W_I_8_neu_125-01.svg'
self.test_page = DATADIR + sep + 'N_VII_1_page001.xml'
self.test_manuscript = DATADIR + sep + 'N_VII_1.xml'
def test_create_cls(self):
page = Page(self.test_page)
style_string = "st11 st10 st5"
style = Style.create_cls(page, style_string)
self.assertEqual(style.font_family, 'Weidemann-Book')
self.assertEqual(style.color.hex_color, "#DADADA")
self.assertEqual(style.writing_instrument, 'schwarze Tinte')
style_string = "st11 st10"
style = Style.create_cls(page, style_string)
self.assertEqual(style.font_family, 'Weidemann-Book')
self.assertEqual(style.color.name, "black")
self.assertEqual(style.writing_instrument, 'schwarze Tinte')
+ style_string = "st11 st3"
+ style = Style.create_cls(page, style_string, create_css=True)
+ self.assertEqual(style.font_family, 'Weidemann-Book')
+ self.assertEqual(style.font_size, '9px')
def test_remove_irrelevant_style_keys(self):
page = Page(self.test_page)
style_string = "st11 st10 st9 st5 st0"
self.assertEqual(Style.remove_irrelevant_style_keys(style_string, page), "st11 st5 st9")
def test_process_style_classes(self):
style = Style()
style.color = Color.create_cls(hex_color='#009CDE')
style.process_style_classes()
self.assertEqual(style.writing_instrument, 'violette Tinte')
self.assertEqual(style.font, 'deutsche Schreibschrift')
style.font_family = "NewsGothicBT-Bold"
style.process_style_classes()
self.assertEqual(style.writing_instrument, 'Blaustift')
self.assertEqual(style.font, 'lateinische Schreibschrift')
+ style = Style()
+ style.font_family = "NewsGothicBT-Bold"
+ style.process_style_classes()
+ #print(style.css_styles)
def test_get_semantic_dictionary(self):
dictionary = Style.get_semantic_dictionary()
#print(dictionary)
def test_copy(self):
manuscript = ArchivalManuscriptUnity.create_cls(self.test_manuscript)
page = Page(self.test_page)
page.words = [ page.words[0] ]
page.update_styles(manuscript=manuscript, add_to_parents=True)
self.assertEqual(len(manuscript.styles), 1)
styleA = page.words[0].transkription_positions[0].style
styleB = styleA.create_a_copy()
self.assertEqual(styleA == styleB, True)
styleB = styleA.create_a_copy(reduce_writing_process_id=True)
self.assertEqual(styleA != styleB, True)
def test_eq(self):
page = Page(self.test_page)
style_string = "st11 st10 st5"
styleA = Style.create_cls(page, style_string)
styleB = Style.create_cls(page, style_string)
self.assertEqual(styleA == styleB, True)
style_string = "st11 st10"
styleC = Style.create_cls(page, style_string)
self.assertEqual(styleA != styleC, True)
if __name__ == "__main__":
unittest.main()
Index: tests_svgscripts/test_word.py
===================================================================
--- tests_svgscripts/test_word.py (revision 91)
+++ tests_svgscripts/test_word.py (revision 92)
@@ -1,455 +1,456 @@
import unittest
from os import sep, path
import lxml.etree as ET
import sys
sys.path.append('svgscripts')
from process_words_post_merging import reset_page, update_writing_process_ids
from datatypes.box import Box
from datatypes.manuscript import ArchivalManuscriptUnity
from datatypes.matrix import Matrix
import datatypes.page
from datatypes.path import Path
from datatypes.positional_word_part import PositionalWordPart
from datatypes.style import Style
from datatypes.transkriptionField import TranskriptionField
from datatypes.transkription_position import TranskriptionPosition
from datatypes.word import Word, execute_function_on_parts, update_transkription_position_ids
from datatypes.word_position import WordPosition
sys.path.append('py2ttl')
from class_spec import SemanticClass
class Page:
def __init__(self):
self.svg_file = None
def get_line_number(self, input=0):
return -1
def get_biggest_fontSize4styles(self, style_set={}):
return 7
class TestWord(unittest.TestCase):
TESTCASE = None
def setUp(self):
DATADIR = path.dirname(__file__) + sep + 'test_data'
self.test_file = DATADIR + sep + 'N_VII_1_page009.xml'
self.pdf_xml = DATADIR + sep + 'W_I_8_page125.xml'
self.pdf_xml_source = DATADIR + sep + 'W_I_8_neu_125-01.svg'
self.word_part_objs = [{'text': 'a' }, {'text': 'b' }, {'text': 'c' }]
x = 0
for dict in self.word_part_objs:
dict['class'] = 'st22'
dict['x'] = x
dict['y'] = 11
x += 1
mylist = {'text': 'abc', 'id': '0', 'line-number': '2', 'deleted': 'true' }
word_position = TranskriptionPosition(x=0, y=1, height=10, width=10, matrix=Matrix('matrix(0.94 0.342 -0.342 0.94 0 0)'))
self.transkription_positions = [ word_position ]
self.word_node = ET.Element('word', attrib=mylist)
word_position.attach_object_to_tree(self.word_node)
x = 0
for char in mylist['text']:
ET.SubElement(self.word_node, 'part', attrib={'text': char, 'x': str(x), 'y': '11', 'class': 'st22' })
x += 1
def test_Word_with_word_part_objs(self):
word = Word.CREATE_WORD(word_part_objs=self.word_part_objs, height=10, endX=10)
self.assertEqual(word.id, 0)
self.assertEqual(word.transkription_positions[0].bottom, 13)
self.assertEqual(word.transkription_positions[0].height, 10)
self.assertEqual(word.transkription_positions[0].top, 3)
self.assertEqual(word.transkription_positions[0].left, 0)
self.assertEqual(word.transkription_positions[0].width, 10)
self.assertEqual(word.text, 'abc')
def test_Word_with_word_node(self):
word = Word.create_cls(self.word_node)
self.assertEqual(word.id, 0)
self.assertEqual(word.deleted, True)
self.assertEqual(word.transkription_positions[0].bottom, 11)
self.assertEqual(word.transkription_positions[0].height, 10)
self.assertEqual(word.transkription_positions[0].top, 1)
self.assertEqual(word.transkription_positions[0].left, 0)
self.assertEqual(word.transkription_positions[0].width, 10)
self.assertEqual(word.text, 'abc')
self.assertEqual(word.line_number, 2)
self.assertEqual(word.transkription_positions[0].transform.isRotationMatrix(), True)
def test_attach_word_to_tree(self):
newWord = Word.CREATE_WORD(word_part_objs=self.word_part_objs, height=10, endX=10)
empty_tree = ET.ElementTree(ET.Element('page'))
newWord.attach_word_to_tree(empty_tree)
for word_node in empty_tree.getroot().xpath('//word'):
word = Word.CREATE_WORD(word_node=word_node)
self.assertEqual(word.id, 0)
self.assertEqual(word.deleted, False)
self.assertEqual(word.transkription_positions[0].bottom, 13)
self.assertEqual(word.transkription_positions[0].height, 10)
self.assertEqual(word.transkription_positions[0].top, 3)
self.assertEqual(word.transkription_positions[0].left, 0)
self.assertEqual(word.transkription_positions[0].width, 10)
self.assertEqual(word.text, 'abc')
@unittest.skipUnless(TESTCASE is None or TESTCASE == 0, 'Not testing this case')
def test_create_correction_history_case0(self):
# Case 1: whole word over box
box = Box(earlier_text='XYX')
word = Word(text='ASDF', transkription_positions=[TranskriptionPosition()])
word.word_box = box
word.create_correction_history()
self.assertEqual(word.earlier_version is None, True)
self.assertEqual(word.overwrites_word is not None, True)
@unittest.skipUnless(TESTCASE is None or TESTCASE == 1, 'Not testing this case')
def test_create_correction_history_case1(self):
# Case 2: part of word over box
box = Box(earlier_text='XYX')
partA = Word(text='A', transkription_positions=[TranskriptionPosition()])
partA.word_box = box
partB = Word(text='SDF', transkription_positions=[TranskriptionPosition()])
word = Word(text='ASDF', word_parts=[ partA, partB])
word.create_correction_history()
self.assertEqual(word.earlier_version is None, True)
self.assertEqual(word.word_parts[0].overwrites_word is not None, True)
@unittest.skipUnless(TESTCASE is None or TESTCASE == 2, 'Not testing this case')
def test_create_correction_history_case3(self):
# Case 3: part of word over box, word under box is part of earlier version
box = Box(earlier_text='XYX')
tp0 = TranskriptionPosition()
tp0.style = Style(writing_process_id=0)
tp1 = TranskriptionPosition()
tp1.style = Style(writing_process_id=1)
partA = Word(id=0, text='Test', transkription_positions=[ tp0])
partB = Word(id=1, text='er', transkription_positions=[ tp1])
partB.word_box = box
word = Word(text='Tester', writing_process_id=1, word_parts=[ partA, partB ] )
word.create_correction_history(box_style=tp0.style)
self.assertEqual(word.text, 'Tester')
self.assertEqual(word.earlier_version is not None, True)
self.assertEqual(word.earlier_version.text, 'TestXYX')
self.assertEqual(word.word_parts[1].isTransformationOfWord, word.earlier_version.word_parts[1])
@unittest.skipUnless(TESTCASE is None or TESTCASE == 3, 'Not testing this case')
def test_create_correction_history_case4(self):
# Case 4: part of word is deleted
partA = Word(id=0, text='A', deleted=True, transkription_positions=[TranskriptionPosition()])
partB = Word(id=1, text='SDF', transkription_positions=[TranskriptionPosition()])
word = Word(text='ASDF', word_parts=[ partA, partB])
word.create_correction_history()
self.assertEqual(word.earlier_version is not None, True)
self.assertEqual(word.word_parts[0].isDeletionOfWord is not None, True)
self.assertEqual(word.word_parts[0].isDeletionOfWord, word.earlier_version.word_parts[0])
self.assertEqual(word.edited_text, 'SDF')
@unittest.skipUnless(TESTCASE is None or TESTCASE == 4, 'Not testing this case')
def test_create_correction_history_case5(self):
tp0 = TranskriptionPosition()
tp0.style = Style(writing_process_id=0)
tp1 = TranskriptionPosition()
tp1.style = Style(writing_process_id=1)
partA = Word(id=0, text='Test', transkription_positions=[ tp0])
partB = Word(id=1, text='er', transkription_positions=[ tp1])
word = Word(text='Tester', word_parts=[ partA, partB ] )
word.create_correction_history()
self.assertEqual(word.earlier_version is not None, True)
self.assertEqual(word.word_parts[1].extendsEarlierVersion, True)
self.assertEqual(word.word_parts[1].isExtensionOfWord, word.earlier_version)
#@unittest.skipUnless(TESTCASE is None or TESTCASE == 5, 'Not testing this case')
#@unittest.skip('case tested, relies on a local xml file')
def test_create_correction_history_case_full(self):
page = datatypes.page.Page('xml/N_VII_1_page138.xml')
manuscript = ArchivalManuscriptUnity()
reset_page(page)
update_writing_process_ids(page)
word = [ word for word in page.words if word.text == 'Verschiedenes' and word.line_number == 4 ][0]
wordAufBau = [ word for word in page.words if word.text == 'AufBau' ][0]
#page.words = [ word ]
page.update_styles(manuscript=manuscript, partition_according_to_styles=True)
word.word_parts[0].transkription_positions[0].has_box = Box(earlier_text='v')
self.assertEqual(len(word.word_parts), 2)
word_over_box = word._get_partial_word_over_box()
update_transkription_position_ids(word)
word.create_correction_history(page)
self.assertEqual(word.writing_process_id, 1)
self.assertEqual(word.earlier_version is not None, True)
self.assertEqual(word.earlier_version.text, 'verschiedenes')
#print(word.earlier_version.id, [ (w.id, w.text) for w in word.earlier_version.word_parts ])
empty_tree = ET.ElementTree(ET.Element('page'))
word_node = word.attach_word_to_tree(empty_tree)
#print(ET.dump(word_node))
"""
self.assertEqual(word.word_parts[0].isDeletionOfWord, word.earlier_version.word_parts[0])
self.assertEqual(word.word_parts[1].isTransformationOfWord, word.earlier_version.word_parts[1])
self.assertEqual(word.word_parts[1].overwrites_word is not None, True)
"""
word = wordAufBau
page.words = [ word ]
page.update_styles(manuscript=manuscript, partition_according_to_styles=True)
word.word_parts[0].deleted = True
word.word_parts[1].transkription_positions[0].has_box = Box(earlier_text='b')
self.assertEqual(len(word.word_parts), 3)
word_over_box = word._get_partial_word_over_box()
self.assertEqual(len(word.word_parts), 3)
update_transkription_position_ids(word)
word.create_correction_history(page)
self.assertEqual(word.writing_process_id, 2)
self.assertEqual(word.earlier_version is not None, True)
self.assertEqual(word.text, 'AufBau')
self.assertEqual(word.edited_text, 'Bau')
self.assertEqual(word.earlier_version.text, 'Aufbau')
self.assertEqual(word.word_parts[0].isDeletionOfWord, word.earlier_version.word_parts[0])
self.assertEqual(word.word_parts[1].isTransformationOfWord, word.earlier_version.word_parts[1])
self.assertEqual(word.word_parts[1].overwrites_word is not None, True)
empty_tree = ET.ElementTree(ET.Element('page'))
word_node = word.attach_word_to_tree(empty_tree)
#print(ET.dump(word_node))
newWord = Word.create_cls(word_node)
#@unittest.skip('')
def test_earlier_version(self):
partA = Word(id=0, text='A', deleted=True, transkription_positions=[TranskriptionPosition()])
partB = Word(id=1, text='SDF', transkription_positions=[TranskriptionPosition()])
word = Word(text='ASDF', word_parts=[ partA, partB])
earlier_version = word.create_earlier_version()
self.assertEqual(earlier_version is not None, True)
self.assertEqual(word.word_parts[0].isDeletionOfWord is not None, True)
self.assertEqual(word.word_parts[0].isDeletionOfWord, earlier_version.word_parts[0])
def test_undo_partitioning(self):
tps = []
for i, xy in enumerate([ 3, 4, 5 ]):
tps.append(TranskriptionPosition(id=i, x=xy, y=xy, height=10, width=10))
partA = Word(id=0, text='Auf', writing_process_id=1, deleted=True, transkription_positions=[ tps[0]])
partB = Word(id=1, text='B', writing_process_id=2, transkription_positions=[tps[1]])
partC = Word(id=2, text='au', writing_process_id=1,transkription_positions=[tps[2]])
word = Word(text='Aufbau', writing_process_id=2, word_parts=[ partA, partB, partC ] )
word.undo_partitioning()
self.assertEqual(len(word.transkription_positions), len(tps))
self.assertEqual(len(word.word_parts), 0)
"""
page = datatypes.page.Page('xml/N_VII_1_page138.xml')
word = page.words[77]
word.undo_partitioning()
self.assertEqual(len(word.word_parts), 0)
self.assertEqual(len(word.transkription_positions), 3)
update_transkription_position_ids(word)
empty_tree = ET.ElementTree(ET.Element('page'))
word_node = word.attach_word_to_tree(empty_tree)
print(ET.dump(word_node))
"""
def test_split(self):
page = Page()
pwps = PositionalWordPart.CREATE_SIMPLE_POSITIONAL_WORD_PART_LIST(page, self.word_part_objs)
transkription_positions = TranskriptionPosition.CREATE_TRANSKRIPTION_POSITION_LIST_FROM_PWPS(pwps)
word = Word(text=''.join([pwp.text for pwp in pwps]), transkription_positions=transkription_positions)
previousWord, currentWord, nextWord = word.split('b')
self.assertEqual(previousWord.id, 0)
self.assertEqual(previousWord.text, 'a')
self.assertEqual(currentWord.id, 1)
self.assertEqual(nextWord.id, 2)
word = Word(text=''.join([pwp.text for pwp in pwps]), transkription_positions=transkription_positions)
previousWord, currentWord, nextWord = word.split('bc')
self.assertEqual(previousWord.id, 0)
self.assertEqual(previousWord.text, 'a')
self.assertEqual(currentWord.id, 1)
word = Word(text=''.join([pwp.text for pwp in pwps]), transkription_positions=transkription_positions)
previousWord, currentWord, nextWord = word.split('ab', start_id=10)
self.assertEqual(currentWord.id, 10)
self.assertEqual(currentWord.text, 'ab')
self.assertEqual(currentWord.transkription_positions[0].width, 2.1)
self.assertEqual(nextWord.id, 11)
self.assertEqual(nextWord.transkription_positions[0].width, 5.2)
word_part_objs=[{'text': 'x', 'class':'st22', 'x': 0, 'y': 0},\
{'text': 'Insofern', 'class':'st22', 'x': 1, 'y': 0},\
{'text': 'x', 'class':'st22', 'x': 10, 'y': 0}]
pwps = PositionalWordPart.CREATE_SIMPLE_POSITIONAL_WORD_PART_LIST(page, word_part_objs)
transkription_positions = TranskriptionPosition.CREATE_TRANSKRIPTION_POSITION_LIST_FROM_PWPS(pwps)
word = Word(text=''.join([pwp.text for pwp in pwps]), transkription_positions=transkription_positions)
with self.assertWarns(Warning):
previousWord, currentWord, nextWord = word.split('Insofer')
word_part_objs=[{'text': 'xInsofern', 'class':'st22', 'x': 0, 'y': 0}]
pwps = PositionalWordPart.CREATE_SIMPLE_POSITIONAL_WORD_PART_LIST(page, word_part_objs)
transkription_positions = TranskriptionPosition.CREATE_TRANSKRIPTION_POSITION_LIST_FROM_PWPS(pwps)
word = Word(text=''.join([pwp.text for pwp in pwps]), transkription_positions=transkription_positions)
with self.assertWarns(Warning):
previousWord, currentWord, nextWord = word.split('Insofern')
def test_join(self):
word = Word.CREATE_WORD(word_part_objs=self.word_part_objs, height=10, endX=10)
other_word = Word.CREATE_WORD(word_part_objs=[{'text': '.', 'class':'st22', 'x': 3, 'y': 11}])
word.join(other_word)
self.assertEqual(word.text, 'abc.')
other_word = Word.CREATE_WORD(word_part_objs=[{'text': '.', 'class':'st22', 'x': 3, 'y': 11}])
word.join(other_word, append_at_end_of_new_word=False)
self.assertEqual(word.text, '.abc.')
"""
tree = ET.ElementTree(ET.Element('page'))
word.attach_word_to_tree(tree)
print(ET.dump(tree.getroot()))
"""
def test_get_semanticAndDataDict(self):
dictionary = Word.get_semantic_dictionary()
+ #print(dictionary)
info_dict = dictionary['properties'].get('isDeletionOfWord')
self.assertEqual(SemanticClass.SUPER_PROPERTY in info_dict.keys(), True)
super_info_dict = info_dict[SemanticClass.SUPER_PROPERTY]
#print(info_dict[SemanticClass.SUPER_PROPERTY].get(SemanticClass.PROPERTY_NAME))
def test_simplify_transkription_positions(self):
node_string = """
"""
nodeA = ET.fromstring(node_string)
node_string = """
"""
nodeB = ET.fromstring(node_string)
word = Word(text="Si", transkription_positions=[ TranskriptionPosition(node=nodeA), TranskriptionPosition(node=nodeB) ])
self.assertEqual(len(word.transkription_positions), 2)
word.simplify_transkription_positions()
self.assertEqual(len(word.transkription_positions), 1)
word = Word(text="Si", transkription_positions=[ TranskriptionPosition(node=nodeA), TranskriptionPosition(node=nodeB) ])
word.transkription_positions[1].writing_process_id = -1
word.simplify_transkription_positions()
self.assertEqual(len(word.transkription_positions), 1)
self.assertEqual(word.transkription_positions[0].writing_process_id, 0)
"""
tree = ET.ElementTree(ET.Element('page'))
word.attach_word_to_tree(tree)
print(ET.dump(tree.getroot()))
"""
def test_partition(self):
page = datatypes.page.Page(self.test_file)
word = page.words[67]
self.assertEqual(word.belongs_to_multiple_writing_processes(), True)
word.partition_according_to_writing_process_id()
self.assertEqual(len(word.word_parts), 3)
self.assertEqual(word.belongs_to_multiple_writing_processes(), False)
self.assertEqual(word.belongs_to_multiple_writing_processes(include_parts=True), True)
empty_tree = ET.ElementTree(ET.Element('page'))
word_node = word.attach_word_to_tree(empty_tree)
newWord = Word.create_cls(word_node)
self.assertEqual(len(newWord.word_parts), 3)
#print(ET.dump(empty_tree.getroot()))
def test_partition_deletion(self):
page = datatypes.page.Page(self.test_file)
word = page.words[67]
for transkription_position in word.transkription_positions:
transkription_position.deleted = transkription_position.writing_process_id == 1
self.assertEqual(word.has_mixed_status('deleted'), True)
word.partition_according_to_deletion()
self.assertEqual(len(word.word_parts), 3)
self.assertEqual(word.has_mixed_status('deleted'), False)
self.assertEqual(word.has_mixed_status('deleted', include_parts=True), True)
page = datatypes.page.Page(self.test_file)
word = page.words[67]
word.partition_according_to_writing_process_id()
#print([(word.text, word.deleted) for word in word.word_parts])
word.word_parts[1].transkription_positions[1].deleted = True
word.partition_according_to_deletion()
self.assertEqual(len(word.word_parts), 4)
#print([(word.text, word.deleted) for word in word.word_parts])
partA = Word(text='A', deleted=True)
partB = Word(text='SDF', deleted=False)
word = Word(text='ASDF', word_parts=[ partA, partB])
self.assertEqual(word.has_mixed_status('deleted', include_parts=True), True)
def test_execute_function_on_parts(self):
page = datatypes.page.Page(self.test_file)
word_parts = [ page.words[67], page.words[68] ]
word_parts, none = execute_function_on_parts(word_parts, 'partition_according_to_writing_process_id')
self.assertEqual(len(word_parts) == 4, True)
def test_process_word_boxes(self):
page = datatypes.page.Page(self.pdf_xml)
page.source = self.pdf_xml_source
page.update_styles(partition_according_to_styles=True)
tr = TranskriptionField(page.source)
box_path_d = ['M 598.11,626.565 L 603.557,626.565 L 603.557,632.565 L 598.11,632.565 L 598.11,626.565',\
'M 557.443,683.44 L 574.182,683.44 L 574.182,694.815 L 557.443,694.815 L 557.443,683.44',\
'M 404.193,659.565 L 407.80699999999996,659.565 L 407.80699999999996,668.94 L 404.193,668.94 L 404.193,659.565',\
'M 587.932,634.065 L 598.318,634.065 L 598.318,643.19 L 587.932,643.19 L 587.932,634.065',\
'M 570.443,221.315 L 576.557,221.315 L 576.557,230.065 L 570.443,230.065 L 570.443,221.315']
box_paths = [ Box(d_string=d_string, earlier_text='test') for d_string in box_path_d ]
indices = [30, 277, 288, 297, 321]
for word_id, index in enumerate(indices):
word_over_box = page.words[index].process_boxes(box_paths, tr_xmin=tr.xmin, tr_ymin=tr.ymin)
self.assertEqual(word_over_box is not None, True)
self.assertEqual(word_over_box == page.words[index] or word_over_box in page.words[index].word_parts, True)
#self.assertEqual(word_over_box in page.words[index].word_parts, True)
def test_process_word_several_boxesOn1LIne(self):
page = datatypes.page.Page(self.pdf_xml)
page.source = self.pdf_xml_source
for word in page.words:
word.set_writing_process_id_to_transkription_positions(page)
word.partition_according_to_writing_process_id()
tr = TranskriptionField(page.source)
box_path_d = ['M 598.11,626.565 L 603.557,626.565 L 603.557,632.565 L 598.11,632.565 L 598.11,626.565',\
'M 557.443,683.44 L 574.182,683.44 L 574.182,694.815 L 557.443,694.815 L 557.443,683.44',\
'M 404.193,659.565 L 407.80699999999996,659.565 L 407.80699999999996,668.94 L 404.193,668.94 L 404.193,659.565',\
'M 587.932,634.065 L 598.318,634.065 L 598.318,643.19 L 587.932,643.19 L 587.932,634.065',\
'M 570.443,221.315 L 576.557,221.315 L 576.557,230.065 L 570.443,230.065 L 570.443,221.315']
box_paths = [ Box(d_string=d_string, earlier_text='test') for d_string in box_path_d ]
indices = [30, 277, 288, 297, 321]
empty_tree = ET.ElementTree(ET.Element('page'))
for word_id, index in enumerate(indices):
word_over_box = page.words[index].process_boxes(box_paths, tr_xmin=tr.xmin, tr_ymin=tr.ymin)
self.assertEqual(word_over_box is not None, True)
def test_split_according_to_status(self):
page = datatypes.page.Page(self.test_file)
word = page.words[67]
for transkription_position in word.transkription_positions:
transkription_position.text = 'asdf'\
if transkription_position.writing_process_id == 1\
else word.text
self.assertEqual(word.has_mixed_status('text'), True)
new_words = word.split_according_to_status('text')
#print([word.text for word in new_words ])
self.assertEqual(len(new_words) > 1, True)
self.assertEqual(new_words[0].id, word.id)
self.assertEqual(new_words[0].deleted, word.deleted)
self.assertEqual(new_words[1].id, word.id+1)
manuscript = ArchivalManuscriptUnity()
page = datatypes.page.Page(self.test_file)
word = page.words[67]
page.words = [ word ]
page.update_styles(manuscript=manuscript)
new_words = word.split_according_to_status('style', splits_are_parts=True)
self.assertEqual(len(word.word_parts), 3)
def test__create_new_word(self):
manuscript = ArchivalManuscriptUnity()
page = datatypes.page.Page(self.test_file)
word = page.words[67]
page.words = [ word ]
page.update_styles(manuscript=manuscript)
newWord = word._create_new_word([ word.transkription_positions[0] ], 'style')
for key in Word.COPY_PROPERTY_KEY:
self.assertEqual(newWord.__dict__[key], word.__dict__[key])
self.assertEqual(len(newWord.styles), 1)
def test__get_partial_word_over_box(self):
word = Word(text='test', transkription_positions=[ TranskriptionPosition(id=0), TranskriptionPosition(id=1) ])
word.transkription_positions[0].has_box = Box(earlier_text='asdf')
word._get_partial_word_over_box()
self.assertEqual(len(word.word_parts), 2)
partA = Word(id=0, text='A', transkription_positions=[TranskriptionPosition()])
partB = Word(id=1, text='SDF', transkription_positions=[TranskriptionPosition(), TranskriptionPosition(id=1)])
partB.transkription_positions[0].has_box = Box(earlier_text='asdf')
word = Word(text='ASDF', word_parts=[ partA, partB])
word._get_partial_word_over_box()
self.assertEqual(len(word.word_parts), 2)
if __name__ == "__main__":
unittest.main()
Index: tests_svgscripts/test_page.py
===================================================================
--- tests_svgscripts/test_page.py (revision 91)
+++ tests_svgscripts/test_page.py (revision 92)
@@ -1,148 +1,154 @@
import unittest
from os import sep, path
from os.path import isdir, isfile, dirname, basename
import lxml.etree as ET
import sys
import sys
sys.path.append('svgscripts')
dir_changed = False
if not isdir('datatypes'):
sys.path.append(dirname(sys.path[0]))
dir_changed = True
from datatypes.lineNumber import LineNumber
from datatypes.mark_foreign_hands import MarkForeignHands
from datatypes.page import Page, STATUS_MERGED_OK, STATUS_POSTMERGED_OK
from datatypes.path import Path
from datatypes.text_connection_mark import TextConnectionMark
from datatypes.transkriptionField import TranskriptionField
from datatypes.style import Style
from datatypes.writing_process import WritingProcess
from datatypes.word import Word
class TestPage(unittest.TestCase):
def setUp(self):
DATADIR = dirname(__file__) + sep + 'test_data'
if not isdir(DATADIR):
DATADIR = dirname(dirname(__file__)) + sep + 'test_data'
self.test_file = DATADIR + sep + 'test.xml'
self.test_svg_file = DATADIR + sep + 'test421.svg'
self.pdf_xml = DATADIR + sep + 'W_I_8_page125.xml'
self.xml_file = DATADIR + sep + 'N_VII_1_page005.xml'
self.xml_fileB = DATADIR + sep + 'N_VII_1_page006.xml'
self.pdf_xml_source = DATADIR + sep + 'W_I_8_neu_125-01.svg'
self.test_tcm_xml = DATADIR + sep + 'N_VII_1_page001.xml'
self.test_manuscript = DATADIR + sep + 'N_VII_1.xml'
self.test_styles_color = DATADIR + sep + 'N_VII_1_page013.xml'
def test_Page(self):
page = Page(self.test_file)
self.assertEqual(page.title, 'Mp XIV 1')
self.assertEqual(page.number, '421')
self.assertEqual(len(page.sonderzeichen_list), 2)
self.assertEqual('st21' in page.sonderzeichen_list, True)
self.assertEqual('st23' in page.sonderzeichen_list, True)
self.assertEqual(page.style_dict['st0']['fill'], '#F8F9F8')
stage0 = [ key for key, value in page.fontsizekey2stage_mapping.items() if value == 0 ]
stage1 = [ key for key, value in page.fontsizekey2stage_mapping.items() if value == 1 ]
stage2 = [ key for key, value in page.fontsizekey2stage_mapping.items() if value == 2 ]
fontStage0 = float(page.style_dict.get(stage0[0]).get('font-size').replace('px',''))
fontStage1 = float(page.style_dict.get(stage1[0]).get('font-size').replace('px',''))
fontStage2 = float(page.style_dict.get(stage2[0]).get('font-size').replace('px',''))
self.assertEqual(fontStage0 > fontStage1, True)
self.assertEqual(fontStage1 > fontStage2, True)
def test_get_biggest_fontSize4styles(self):
page = Page(self.test_file)
style_set = { 'st12', 'st2', 'st14', 'st13' }
self.assertEqual(page.get_biggest_fontSize4styles(style_set=style_set), 10)
def test_get_words(self):
page = Page(self.test_file)
words = page.words
self.assertEqual(len(words), 440)
self.assertEqual(words[0].text, '$')
self.assertEqual(words[439].text, 'mußte!')
def test_get_line_number(self):
page = Page(self.test_file)
self.assertEqual(page.get_line_number( (page.words[0].transkription_positions[0].bottom+page.words[0].transkription_positions[0].top)/2), 1)
self.assertEqual(page.get_line_number( (page.words[27].transkription_positions[0].bottom+page.words[27].transkription_positions[0].top)/2), 2)
self.assertEqual(page.get_line_number( (page.words[105].transkription_positions[0].bottom+page.words[105].transkription_positions[0].top)/2), 7)
def test_update_page_type(self):
page = Page(self.pdf_xml)
tf = TranskriptionField(self.pdf_xml_source)
page.update_page_type(transkription_field=tf)
self.assertEqual(page.page_type, Page.PAGE_VERSO)
#page = Page(self.xml_fileB)
#page.update_page_type()
#self.assertEqual(page.page_type, Page.PAGE_RECTO)
def test_update_line_number_area(self):
page = Page(self.xml_file)
transkription_field = TranskriptionField(page.source)
page.update_line_number_area(transkription_field)
self.assertEqual(transkription_field.line_number_area_width > 0, True)
self.assertEqual(transkription_field.line_number_area_width < 15, True)
page = Page(self.xml_fileB)
transkription_field = TranskriptionField(page.source)
page.update_line_number_area(transkription_field)
self.assertEqual(transkription_field.line_number_area_width > 0, True)
self.assertEqual(transkription_field.line_number_area_width < 15, True)
def test_get_pages_from_xml_file(self):
pages = Page.get_pages_from_xml_file(self.test_manuscript)
self.assertEqual(len(pages), 4)
self.assertEqual(pages[0].number, '5')
self.assertEqual(pages[1].number, '6')
pages = Page.get_pages_from_xml_file(self.test_manuscript, status_contains=STATUS_MERGED_OK)
self.assertEqual(len(pages), 2)
self.assertEqual(pages[0].number, '5')
pages = Page.get_pages_from_xml_file(self.test_manuscript, status_contains=STATUS_MERGED_OK, status_not_contain=STATUS_POSTMERGED_OK)
self.assertEqual(len(pages), 1)
def test_get_semantic_dictionary(self):
dictionary = Page.get_semantic_dictionary()
#print(dictionary)
def test_update_styles(self):
page = Page(self.pdf_xml)
page.words = [ word for word in page.words if word.text == 'Schopenhauer' ]
page.update_styles(add_to_parents=True)
self.assertEqual(len(page.words[0].styles), 1)
self.assertEqual(page.words[0].styles[0].color.name, 'black')
self.assertEqual(page.words[0].styles[0].font, Style.NIETSCHES_FONTS['latin'])
self.assertEqual(page.words[0].styles[0].writing_instrument, Style.WRITING_INSTRUMENTS[('black',False)])
page = Page(self.test_styles_color)
page.words = [ word for word in page.words if word.text == 'Versöhnlichkeit' ]
page.update_styles(add_to_parents=True)
self.assertEqual(len(page.words[0].styles), 1)
self.assertEqual(page.words[0].styles[0].color.name, 'green')
self.assertEqual(page.words[0].styles[0].font, Style.NIETSCHES_FONTS['german'])
self.assertEqual(page.words[0].styles[0].writing_instrument, Style.WRITING_INSTRUMENTS[('green',False)])
self.assertEqual(page.words[0].styles[0].writing_process_id, WritingProcess.INSERTION_AND_ADDITION)
page = Page(self.test_styles_color)
page.words = [ word for word in page.words if word.text == 'Versöhnlichkeit' or word.text == 'gewisse' ]
self.assertEqual(len(page.words), 2)
word = page.words[0]
word.transkription_positions += page.words[1].transkription_positions
page.words = [ word ]
page.update_styles(add_to_parents=True, partition_according_to_styles=True)
self.assertEqual(len(page.words[0].word_parts), 2)
+ page = Page(self.test_styles_color)
+ page.update_styles(add_to_parents=True, create_css=True)
+ for word in page.words:
+ self.assertTrue(len(word.styles) > 0)
+ for style in word.styles:
+ self.assertTrue(len(style.css_styles) > 0)
def test_lock(self):
page = Page(self.test_tcm_xml)
self.assertEqual(page.is_locked(), False)
page.lock('asdf.txt')
self.assertEqual(page.is_locked(), True)
self.assertEqual(page.page_tree.xpath('//lock/reference-file/text()')[0], 'asdf.txt')
page.unlock()
self.assertEqual(page.is_locked(), False)
if __name__ == "__main__":
unittest.main()