Index: svgscripts/process_footnotes.py
===================================================================
--- svgscripts/process_footnotes.py	(revision 109)
+++ svgscripts/process_footnotes.py	(revision 110)
@@ -1,282 +1,294 @@
 #!/usr/bin/env python3
 # -*- coding: utf-8 -*-
 
 """   This program can be used to process words after they have been merged with faksimile data.
 """
 #    Copyright (C) University of Basel 2019  {{{1
 #
 #    This program is free software: you can redistribute it and/or modify
 #    it under the terms of the GNU General Public License as published by
 #    the Free Software Foundation, either version 3 of the License, or
 #    (at your option) any later version.
 #
 #    This program is distributed in the hope that it will be useful,
 #    but WITHOUT ANY WARRANTY; without even the implied warranty of
 #    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 #    GNU General Public License for more details.
 #
 #    You should have received a copy of the GNU General Public License
 #    along with this program.  If not, see <https://www.gnu.org/licenses/> 1}}}
 
 from colorama import Fore, Style
 import getopt
 import lxml.etree as ET
 import os
 from os import listdir, sep, path, setpgrp, devnull
 from os.path import exists, isfile, isdir, dirname, basename
 from pathlib import Path as PathlibPath
 from progress.bar import Bar
+import inspect
 import re
 import shutil
 import sys
 import warnings
 
 if dirname(__file__) not in sys.path:
     sys.path.append(dirname(__file__))
 
 from datatypes.archival_manuscript import ArchivalManuscriptUnity
 from datatypes.page import Page, STATUS_MERGED_OK, STATUS_POSTMERGED_OK
 from datatypes.atypical_writing import AtypicalWriting
 from datatypes.clarification import Clarification
 from datatypes.editor_comment import EditorComment
 from datatypes.editor_correction import EditorCorrection
 from datatypes.footnotes import extract_footnotes
+from datatypes.imprint import extract_imprints
 from datatypes.line_continuation import LineContinuation
 from datatypes.standoff_tag import StandoffTag
 from datatypes.text import Text
 from datatypes.text_connection_mark import TextConnectionMark
 from datatypes.uncertain_decipherment import UncertainDecipherment
 
 from util import back_up
 from process_files import update_svgposfile_status
 
 sys.path.append('shared_util')
 from myxmlwriter import write_pretty, xml_has_type, FILE_TYPE_SVG_WORD_POSITION, FILE_TYPE_XML_MANUSCRIPT
 
 
 __author__ = "Christian Steiner"
 __maintainer__ = __author__
 __copyright__ = 'University of Basel'
 __email__ = "christian.steiner@unibas.ch"
 __status__ = "Development"
 __license__ = "GPL v3"
 __version__ = "0.0.1"
 
 UNITTESTING = False
 
 ATYPICAL_GROUP = re.compile(r'(.*:.*]\s*)(¿)(.*)')
 CLARIFICATION_GROUP = re.compile(r'(.*:.*]\s*)(Vk)(.*)')
 CONTINUATION_GROUP = re.compile(r'(.*:\s*)(Fortsetzung\s*)')
 COMMENT_GROUP = re.compile(r'(.*:.*])')
 EDITOR_CORRECTION_GROUP = re.compile(r'(.*:.*]\s*)(>[?]*)(.*)')
 LINE_REFERENCE_GROUP = re.compile(r'(\d+-|\d/(\d+/)*)*([0-9]+)(:.*)')
 LINE_REFERENCE_GROUP_START_INDEX = 1
 LINE_REFERENCE_GROUP_MID_INDEX = 2
 LINE_REFERENCE_GROUP_END_INDEX = 3
 LINE_COMMENT_GROUP = re.compile(r'(.*\d+:)')
 UNCERTAINTY_WORD_GROUP = re.compile(r'(.*:.*]\s*)([>]*\?)(.*)')
 UNCERTAINTY_EDITOR_GROUP = re.compile(r'(.*)(\?)')
 WORD_REFERENCE_GROUP = re.compile(r'(.*[0-9]+:\s*)(.*)(].*)')
 DEBUG = False
 
 def categorize_footnotes(page, footnotes=None, debug=False, skip_after=-1.0, find_content=False):
     """Categorize footnotes.
     """
     DEBUG = debug
     if footnotes is None:
         footnotes = extract_footnotes(page, skip_after=skip_after)
     for footnote in footnotes:
         line_match = re.match(LINE_REFERENCE_GROUP, footnote.content)
         if line_match is not None:
             _process_line_match(page, footnote, line_match)
         else:
             warnings.warn(f'Unknown editor comment without a line reference: <{footnote}>')
     if find_content and len(page.text_connection_marks) > 0:
         TextConnectionMark.find_content_in_footnotes(page, footnotes=footnotes)
     page.update_and_attach_words2tree()
     for line in page.lines: line.attach_object_to_tree(page.page_tree)
     DEBUG = False
     if not UNITTESTING:
         write_pretty(xml_element_tree=page.page_tree, file_name=page.page_tree.docinfo.URL,\
                         script_name=__file__, file_type=FILE_TYPE_SVG_WORD_POSITION)
 
+def save_imprints(page):
+    """Categorize footnotes.
+    """
+    for imprint in extract_imprints(page):
+        imprint.attach_object_to_tree(page.page_tree)
+    if not UNITTESTING:
+        write_pretty(xml_element_tree=page.page_tree, file_name=page.page_tree.docinfo.URL,\
+                        script_name=f'{__file__}:{inspect.currentframe().f_back.f_code.co_name}', file_type=FILE_TYPE_SVG_WORD_POSITION)
+
 def _is_uncertain(footnote) -> bool:
     """Return whether footnote contains sign for uncertainty.
     """
     uncertain_match = re.match(UNCERTAINTY_EDITOR_GROUP, footnote.content)
     return (uncertain_match is not None\
            and len([ markup for markup in footnote.standoff_markups\
                      if markup.css_string.endswith('italic;')\
                      and uncertain_match.end() >= markup.startIndex\
                      and uncertain_match.end() <= markup.endIndex ]) > 0)
 
 def _process_line_match(page, footnote, line_match):
     """Process footnote if reference to a line matches.
     """
     word_match = re.match(WORD_REFERENCE_GROUP, footnote.content)
     end_line_number = int(line_match.group(LINE_REFERENCE_GROUP_END_INDEX)) 
     lines = []
     if line_match.group(LINE_REFERENCE_GROUP_START_INDEX) is not None:
         if line_match.group(LINE_REFERENCE_GROUP_MID_INDEX) is not None:
             line_ids = [ int(line_id) for line_id in\
                          line_match.group(LINE_REFERENCE_GROUP_START_INDEX).split('/')\
                          if line_id != '' ] + [ end_line_number ]
             lines = [ line for line in page.lines if line.id in line_ids ]
         else:
             start_line_number = int(line_match.group(1)[0:-1])
             lines = [ line for line in page.lines if line.id >= start_line_number and line.id <= end_line_number ]
     else:
         lines = [ line for line in page.lines if line.id == end_line_number ]
     if word_match is not None:
         _process_word_match(page.words, footnote, line_match, word_match.group(2), end_line_number)
     elif len(lines) > 0:
         uncertain_match = re.match(UNCERTAINTY_EDITOR_GROUP, footnote.content)
         for line in lines:
             _process_line_reference(page, footnote, line, _is_uncertain(footnote))
     else:
         warnings.warn(f'Footnote refers to missing line {line_number}: {footnote}') 
 
 def _process_line_reference(page, footnote, line, is_uncertain):
     """Process footnote if there is a line reference.
     """
     continuation_match = re.match(CONTINUATION_GROUP, footnote.content)
     if continuation_match is not None:
         reference_string = footnote.content[continuation_match.end():]
         if is_uncertain:
             reference_string = reference_string[:-1]
         line.editor_comments.append(LineContinuation.create_cls(reference_string=reference_string, is_uncertain=is_uncertain))
     else:
         comment_match = re.match(LINE_COMMENT_GROUP, footnote.content)
         if comment_match is not None:
             is_uncertain = _is_uncertain(footnote)
             comment = footnote.content[comment_match.end():-1].strip()\
                     if is_uncertain\
                     else footnote.content[comment_match.end():].strip()
             line.editor_comments.append(EditorComment(comment=comment, is_uncertain=is_uncertain))
         else:
             warnings.warn(f'Unknown editor comment for line "{line.id}": <{footnote}>')
    
 def _process_word_match(words, footnote, line_match, word_text, line_number, parent_word_composition=None):
     """Process footnote if there is a word reference.
     """
     referred_words = [ word for word in words\
             if word.line_number == line_number\
             and (word.text == word_text\
                 or re.match(rf'\W*{word_text}\W', word.text)\
                 or word.edited_text == word_text) ]
     referred_word_parts = [ word.word_parts for word in words\
             if word.line_number == line_number\
             and len(word.word_parts) > 0\
             and word_text in [ wp.text for wp in word.word_parts ] ]
     overwritten_word_matches = [ word for word in words\
                               if word.line_number == line_number\
                                 and len(word.word_parts) > 0\
                                 and len([word_part for word_part in word.word_parts\
                                            if word_part.overwrites_word is not None\
                                            and word_part.overwrites_word.text == word_text]) > 0]
     if len(referred_words) > 0\
        or len(overwritten_word_matches) > 0\
        or len(referred_word_parts) > 0:
         word = None
         if len(referred_words) == 1:
             word = referred_words[0]
         elif len(overwritten_word_matches) > 0:
             word = [ word_part.overwrites_word for word_part in overwritten_word_matches[0].word_parts\
                     if word_part.overwrites_word is not None and word_part.overwrites_word.text == word_text][0]
         elif len(referred_word_parts) > 0:
             word = [ word_part for word_part in referred_word_parts[0] if word_part.text == word_text ][0]
         else:
             word = [ better_word for better_word in referred_words if better_word.text == word_text][0]
         atypical_match = re.match(ATYPICAL_GROUP, footnote.content)
         correction_match = re.match(EDITOR_CORRECTION_GROUP, footnote.content)
         clarification_match = re.match(CLARIFICATION_GROUP, footnote.content)
         is_uncertain = re.match(UNCERTAINTY_WORD_GROUP, footnote.content) is not None
         if correction_match is not None:
             correction = correction_match.group(3).strip()
             word.editor_comment = EditorCorrection(correction_text=correction, is_uncertain=is_uncertain)
             if not is_uncertain:
                 word.edited_text = correction
         elif clarification_match is not None:
             word.editor_comment = Clarification(text=footnote.extract_part(word_text, css_filter='bold;'))
         elif atypical_match is not None:
             text = footnote.extract_part(word_text, css_filter='bold;')\
                     if footnote.markup_contains_css_filter('bold;')\
                     else None
             word.editor_comment = AtypicalWriting(text=text)
         elif is_uncertain:
             word.editor_comment = UncertainDecipherment()
         else:
             comment_match = re.match(COMMENT_GROUP, footnote.content)
             if comment_match is not None:
                 is_uncertain = _is_uncertain(footnote)
                 comment = footnote.content[comment_match.end():-1].strip()\
                         if is_uncertain\
                         else footnote.content[comment_match.end():].strip()
                 word.editor_comment = EditorComment(comment=comment, is_uncertain=is_uncertain)
             else:
                 warnings.warn(f'Unknown editor comment for word "{word.text}": <{footnote}>')
     elif re.match(r'.*\s.*', word_text):
         for word_part in word_text.split(' '):
             _process_word_match(words, footnote, line_match, word_part, line_number, parent_word_composition=word_text)
     elif len([word for word in words if word.line_number == -1 and len(word.word_parts) > 0 ]) > 0:
         new_words = []
         for word in [word for word in words if word.line_number == -1 and len(word.word_parts) > 0 ]:
             new_words += word.word_parts
         _process_word_match(new_words, footnote, line_match, word_text, line_number)
     else:
         warnings.warn(f'No word found with text "{word_text}" on line {line_number}: <{footnote}>')
 
 def usage():
     """prints information on how to use the script
     """
     print(main.__doc__)
 
 def main(argv):
     """This program can be used to process the footnotes of a page.
 
     svgscripts/process_footnotes.py [OPTIONS] <xmlManuscriptFile|svg_pos_file>
 
         <xmlManuscriptFile>     a xml file about a manuscript, containing information about its pages.
         <svg_pos_file>          a xml file about a page, containing information about svg word positions.
 
         OPTIONS:
         -h|--help               show help
         -s|--skip-until=left    skip all nodes.get('X') < left
 
         :return: exit code (int)
     """
     skip_after=-1.0
     try:
         opts, args = getopt.getopt(argv, "hs:", ["help", "skip-until=" ])
     except getopt.GetoptError:
         usage()
         return 2
     for opt, arg in opts:
         if opt in ('-h', '--help'):
             usage()
             return 0
         elif opt in ('-s', '--skip-until'):
             skip_after = float(arg)
     if len(args) < 1:  
         usage()
         return 2
     exit_status = 0
     file_a = args[0]
     if isfile(file_a):
         manuscript_file = file_a\
                 if xml_has_type(FILE_TYPE_XML_MANUSCRIPT, xml_source_file=file_a)\
                 else None
         counter = 0
         for page in Page.get_pages_from_xml_file(file_a, status_contains=STATUS_MERGED_OK):
             if not UNITTESTING:
                 print(Fore.CYAN + f'Processing {page.title}, {page.number} ...' + Style.RESET_ALL)
                 back_up(page, page.xml_file)
             categorize_footnotes(page, skip_after=skip_after, find_content=True)
+            save_imprints(page)
             counter += 1
         not UNITTESTING and print(Style.RESET_ALL + f'[{counter} pages processed]')
     else:
         raise FileNotFoundError('File {} does not exist!'.format(file_a))
     return exit_status
 
 if __name__ == "__main__":
     sys.exit(main(sys.argv[1:]))
Index: svgscripts/datatypes/word.py
===================================================================
--- svgscripts/datatypes/word.py	(revision 109)
+++ svgscripts/datatypes/word.py	(revision 110)
@@ -1,907 +1,913 @@
 #!/usr/bin/env python3
 # -*- coding: utf-8 -*-
 
 """   This class can be used to represent a word.
 """
 #    Copyright (C) University of Basel 2019  {{{1
 #
 #    This program is free software: you can redistribute it and/or modify
 #    it under the terms of the GNU General Public License as published by
 #    the Free Software Foundation, either version 3 of the License, or
 #    (at your option) any later version.
 #
 #    This program is distributed in the hope that it will be useful,
 #    but WITHOUT ANY WARRANTY; without even the implied warranty of
 #    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 #    GNU General Public License for more details.
 #
 #    You should have received a copy of the GNU General Public License
 #    along with this program.  If not, see <https://www.gnu.org/licenses/> 1}}}
 
 __author__ = "Christian Steiner"
 __maintainer__ = __author__
 __copyright__ = 'University of Basel'
 __email__ = "christian.steiner@unibas.ch"
 __status__ = "Development"
 __license__ = "GPL v3"
 __version__ = "0.0.1"
 
 import copy
 import inspect
 from lxml import etree as ET
 from operator import attrgetter
 import re
 import string
 import sys
 import warnings
 
 from .box import Box
 from .editor_comment import EditorComment
 from .matrix import Matrix
 from .path import Path
 from .simple_word import SimpleWord
 from .style import Style
 from .word_deletion_path import WordDeletionPath
 from .word_position import WordPosition
 from .transkription_position import TranskriptionPosition
 from .writing_process import WritingProcess
 
 SINGLE_PUNCTUATION_PATTERN = r"^[{}–]$".format(string.punctuation)
 
 def execute_function_on_parts(word_parts, func_name):
     """Execute function on parts and add those parts instead of original word to word_parts.
         
         :return: new word_parts, output from func
     """
     copy_parts = word_parts[:]
     for word in word_parts:
         output = eval('word.{0}()'.format(func_name))
         if len(word.word_parts) > 0:
             for part_word in word.word_parts:
                 copy_parts.insert(copy_parts.index(word), part_word)
             copy_parts.remove(word)
         word.word_parts = []
     return copy_parts, output
 
 def update_transkription_position_ids(word):
     """Update transkription_position' ids according to index.
     """
     word_part_ids = [ wp.id for wp in word.word_parts ]
     if len(word_part_ids) != len(set(word_part_ids)):
         for id, wp in enumerate(word.word_parts):
             wp.id = id
     for index, transkription_position in enumerate(sorted(word.transkription_positions, key=attrgetter('left'))):
         transkription_position.id = index
         transkription_position.has_box = None
         transkription_position.deleted = False
 
 class Word(SimpleWord):
     """
     This class represents a word.
 
     """
     COPY_PROPERTY_KEY = [ 'line_number', 'deleted', 'writing_process_id' ]
     APPEND_PROPERTY2LIST_SOURCE_TARGET_KEYS = { 'style': 'styles' }
     DATA = 'debug-data'
-    RDFS_SUBCLASSOF_LIST = ['http://www.e-editiones.ch/ontology/text#HandwrittenText']
+    RDFS_SUBCLASSOF_LIST = ['https://www.e-editiones.ch/ontology/text#HandwrittenText']
     XML_TAG = 'word'
     XML_EARLIER_VERSION = 'earlier-version'
     XML_OVERWRITES = 'overwrites'
     XML_CORRECTION_DICT = { 'isClarificationOfWord': 'clarifiesWord',\
                             'isDeletionOfWord': 'deletesEarlierPart',\
                            'isExtensionOfWord': 'extendsEarlierVersion',\
                            'isTransformationOfWord': 'transformsEarlierPart' }
 
     def __init__(self, id=0, text='', line_number=-1, deleted=False, transkription_positions=None, faksimile_positions=None, word_part_objs=None, word_parts=None, writing_process_id=-1, earlier_version=None, box_paths=None, styles=None):
         super(Word,self).__init__(id=id, text=text, line_number=line_number, transkription_positions=transkription_positions,\
                 faksimile_positions=faksimile_positions)
         self.corrections = []
+        self.clean_edited_text = None
         self.deleted = deleted
         self.deletion_paths = []
         self.deletion_paths_near_word = []
         self.debug_container = {} 
         self.debug_msg = None
         self.earlier_version = earlier_version
         self.edited_text = None
         self.editor_comment = None
         self.isClarificationOfWord = None
         self.isDeletionOfWord = None
         self.isExtensionOfWord = None
         self.isTransformationOfWord = None
         if len(self.text) == 0 and len(''.join([ tp.get_text() for tp in self.transkription_positions if type(tp) == TranskriptionPosition ])) > 0:
             self.text = ''.join([ tp.get_text() for tp in self.transkription_positions ])
         self.overwrites_word = None
         self.process_flags = []
         self.styles = styles\
                 if styles is not None\
                 else []
         self.verified = None
         self.writing_process_id = writing_process_id
         self.writing_processes = []
         self.word_insertion_mark = None
         self.word_box = None
         self.word_parts = word_parts if word_parts is not None else []
         self.word_part_objs = word_part_objs if word_part_objs is not None else []
 
     def add_deletion_paths(self, deletion_paths, tr_xmin=0.0, tr_ymin=0.0):
         """Add a word deletion path to word.
         """
         if len(self.word_parts) > 0:
             for part in self.word_parts: part.add_deletion_paths(deletion_paths, tr_xmin=tr_xmin, tr_ymin=tr_ymin)
         elif self.deleted:
             index = 0
             while len(self.deletion_paths) == 0 and index < len(self.transkription_positions):
                 include_pwps = (len(self.transkription_positions[index].positional_word_parts) > 0
                                 and abs(self.transkription_positions[index].left-self.transkription_positions[index].positional_word_parts[0].left) < 10)
                 word_path = Path.create_path_from_transkription_position(self.transkription_positions[index],\
                                                     tr_xmin=tr_xmin, tr_ymin=tr_ymin, include_pwps=include_pwps)
                 self.deletion_paths += [ deletion_path for deletion_path in deletion_paths\
                     if not Path.is_path_contained(self.deletion_paths, deletion_path)\
                        and deletion_path.do_paths_intersect(word_path) ]
                 index += 1
 
     def attach_word_to_tree(self, target_tree):
         """Attaches word to tree target_tree.
         """
         word_node = super(Word,self).attach_word_to_tree(target_tree)
         if self.deleted is not None:
             word_node.set('deleted', str(self.deleted).lower())
         if self.verified is not None:
             word_node.set('verified', str(self.verified).lower())
         if self.edited_text is not None:
             word_node.set('edited-text', self.edited_text)
         if self.editor_comment is not None:
             self.editor_comment.attach_object_to_tree(word_node)
         if self.writing_process_id > -1:
             word_node.set('writing-process-id', str(self.writing_process_id))
         if len(self.process_flags) > 0:
             word_node.set('process-flags', ' '.join(self.process_flags))
         for index, word_part in enumerate(self.word_parts):
             word_part.id = index
             word_part.attach_word_to_tree(word_node)
         if self.earlier_version is not None:
             earlier_node = ET.SubElement(word_node, self.XML_EARLIER_VERSION)
             self.earlier_version.attach_word_to_tree(earlier_node)
         if self.overwrites_word is not None\
                 and len(self.overwrites_word.transkription_positions) > 0:
             overwrite_node = ET.SubElement(word_node, self.XML_OVERWRITES)
             self.overwrites_word.attach_word_to_tree(overwrite_node)
             if self.word_box is not None:
                 self.word_box.attach_object_to_tree(word_node)
         if len(self.corrections) > 0:
             word_node.set('corrections', ' '.join(set([ str(word.id) for word in self.corrections ])))
         for deletion_id, deletion_path in enumerate(self.deletion_paths):
             deletion_path.id = deletion_id
             deletion_path.tag = WordDeletionPath.XML_TAG
             deletion_path.attach_object_to_tree(word_node)
         for key in self.XML_CORRECTION_DICT.keys():
             if self.__dict__[key] is not None:
                 word_node.set(self.XML_CORRECTION_DICT[key], 'true')
         return word_node
     
     def belongs_to_multiple_writing_processes(self, include_parts=False):
         """Returns true if transkription_positions belong to different WritingProcesses.
         """
         if len(self.word_parts) > 0 and include_parts:
             return len(set(word.writing_process_id for word in self.word_parts)) > 1
         return len(set(tp.writing_process_id for tp in self.transkription_positions )) > 1
 
     def set_parent_word_writing_process_id(self):
         """Set writing_process_id for parent word.
         """
         ids =  set(word.transkription_positions[0].style for word in self.word_parts\
                     if len(word.transkription_positions) > 0 and word.transkription_positions[0].style is not None) 
         if len(ids) > 1:
             self.writing_process_id = max([style.writing_process_id for style in ids])
             if len(set(word.transkription_positions[0].style.create_a_copy_wo_writing_process_id()\
                     for word in self.word_parts\
                     if len(word.transkription_positions) > 0 and word.transkription_positions[0].style is not None))\
                     > 1:
                 self.writing_process_id += 1
 
     @classmethod
     def create_cls(cls, word_node):
         """Creates a word from a (lxml.Element) node.
 
             [:return:] Word
         """
         cls = super(Word,cls).create_cls(word_node)
         cls.writing_process_id = int(word_node.get('writing-process-id')) if bool(word_node.get('writing-process-id')) else -1
         cls.split_strings = None
         cls.join_string = word_node.get('join')
         if bool(word_node.get('split')):
             cls.split_strings = word_node.get('split').split(' ')
             if ''.join(cls.split_strings) != cls.text:
                 error_msg = 'Error in file {0}: word with id="{1}" has split attributes that do not correspond to its text attribute!\n'.\
                             format(word_node.getroottree().docinfo.URL, str(cls.id))\
                             + 'Split attributes: "{0}".\n'.format(' '.join(cls.split_strings))\
                             + 'Text attribute: "{0}".\n'.format(cls.text)
                 raise Exception(error_msg)
         cls.verified = word_node.get('verified') == 'true'\
                     if bool(word_node.get('verified')) else None
         cls.deleted = word_node.get('deleted') == 'true'\
                     if bool(word_node.get('deleted')) else None
         cls.edited_text = word_node.get('edited-text')
+        if cls.edited_text is not None:
+            cls.clean_edited_text = cls._create_clean_text(cls.edited_text)
         cls.editor_comment = [ EditorComment.create_cls_from_node(node) for node in word_node.xpath('./' + EditorComment.XML_TAG) ][0]\
                 if len([ node for node in word_node.xpath('./' + EditorComment.XML_TAG) ]) > 0 else None
         cls.word_parts = [ cls.create_cls(node) for node in word_node.xpath('./' + cls.XML_TAG) ]
         if bool(word_node.get('corrections')):
             for index in [ int(i) for i in word_node.get('corrections').split(' ') ]:
                 if index < len(cls.word_parts):
                     cls.corrections.append(cls.word_parts[index])
         cls.earlier_version = None
         if len(word_node.xpath('./' + cls.XML_EARLIER_VERSION + '/' + cls.XML_TAG)) > 0:
             cls.earlier_version = [ cls.create_cls(node) for node in word_node.xpath('./' + cls.XML_EARLIER_VERSION + '/' + cls.XML_TAG) ][0]
         for key_value in cls.XML_CORRECTION_DICT.values():
             if word_node.get(key_value) == 'true':
                 cls.__dict__[key_value] = True
         if cls.earlier_version is not None:
             for word_part in cls.word_parts:
                 for key in [ key for key, value in cls.XML_CORRECTION_DICT.items() if value.endswith('Part') ]:
                     if cls.XML_CORRECTION_DICT[key] in word_part.__dict__.keys() and word_part.__dict__[cls.XML_CORRECTION_DICT[key]]\
                         and len(cls.word_parts) <= len(cls.earlier_version.word_parts):
                         try:
                             word_part.__dict__[key] = cls.earlier_version.word_parts[word_part.id]
                         except Exception:
                             msg = f'{cls.id} {cls.text}: {word_part.id}'
                             raise Exception(msg)
                 for key in [ key for key, value in cls.XML_CORRECTION_DICT.items() if value.endswith('EarlierVersion') ]:
                     if cls.XML_CORRECTION_DICT[key] in word_part.__dict__.keys() and word_part.__dict__[cls.XML_CORRECTION_DICT[key]]:
                         word_part.__dict__[key] = cls.earlier_version
                 for key in [ key for key, value in cls.XML_CORRECTION_DICT.items() if value.endswith('Word') ]:
                     if cls.XML_CORRECTION_DICT[key] in word_part.__dict__.keys() and word_part.__dict__[cls.XML_CORRECTION_DICT[key]]:
                         word_part.__dict__[key] = cls
         cls.overwrites_word = [ cls.create_cls(node) for node in word_node.xpath('./' + cls.XML_OVERWRITES + '/' + cls.XML_TAG)][0]\
                 if len(word_node.xpath('./' + cls.XML_OVERWRITES + '/' + cls.XML_TAG)) > 0\
                 else None
         cls.word_box = [ Box(node=node) for node in word_node.xpath('./' + Box.XML_TAG) ][0]\
                 if len(word_node.xpath('./' + Box.XML_TAG)) > 0\
                 else None
         cls.deletion_paths = [ Path(node=node) for node in word_node.xpath(f'./{WordDeletionPath.XML_TAG}') ]
         cls.process_flags = word_node.get('process-flags').split(' ')\
                 if bool(word_node.get('process-flags'))\
                 else []
         return cls
 
     @classmethod
     def join_words(cls, list_of_words, add_white_space_between_words=False):
         """Creates a word from a list of words.
 
             [:return:] Word
         """
         if len(list_of_words) > 1:
             deleted = True in [ word.deleted for word in list_of_words ]\
                     and len(set([ word.deleted for word in list_of_words ])) == 1
             line_number = list_of_words[0].line_number\
                     if len(set([ word.line_number for word in list_of_words ])) == 1\
                     else -1
             faksimile_positions = []
             for word in list_of_words:
                 if len(word.word_parts) > 0:
                     faksimile_positions += word.faksimile_positions
                     index = list_of_words.index(word)
                     list_of_words.remove(word)
                     for part_word in reversed(word.word_parts):
                         list_of_words.insert(index, part_word)
             new_word_text = ''.join([word.text for word in list_of_words])\
                     if not add_white_space_between_words\
                     else ' '.join([word.text for word in list_of_words])
             new_word = cls(id=list_of_words[0].id, text=new_word_text, faksimile_positions=faksimile_positions,\
                     line_number=line_number, deleted=deleted, word_parts=list_of_words)
             if True in [ word.text.endswith('-') or word.text.endswith('=') for word in new_word.word_parts[:-1]]:
                 change_text = [ word.text for word in new_word.word_parts[:-1] if word.text.endswith('-') or word.text.endswith('=') ][0]
                 new_word.edited_text = new_word.text.replace(change_text, change_text[:-1])
             for id, word in enumerate(new_word.word_parts): word.id = id
             return new_word
         if len(list_of_words) > 0:
             return list_of_words[0]
         else:
             return None
 
     def create_earlier_version(self, root_word=None, id=0):
         """Create an earlier version of word.
         """
         if root_word is None:
             root_word = self
             root_word.set_parent_word_writing_process_id()
         word_parts = []
         non_single_punctuation_word_parts = [ word_part for word_part in self.word_parts\
                 if not re.match(SINGLE_PUNCTUATION_PATTERN, word_part.text) ]
         non_single_punctuation_word_parts_length = len(non_single_punctuation_word_parts)
         if non_single_punctuation_word_parts_length > 0\
            and len([ word_part for word_part in non_single_punctuation_word_parts\
                    if word_part.deleted ])\
                 == non_single_punctuation_word_parts_length:
             self.deleted = True
             for word_part in non_single_punctuation_word_parts: word_part.deleted = False
         for id, word_part in enumerate(self.word_parts):
             earlierWordPart = word_part.create_earlier_version(root_word=root_word, id=id)
             if word_part.deleted:
                 word_part.isDeletionOfWord = earlierWordPart
                 word_parts.append(earlierWordPart)
                 if word_part not in self.corrections:
                     self.corrections.append(word_part)
             elif word_part.overwrites_word is not None\
                     and ((len(word_part.transkription_positions) > 0\
                             and word_part.overwrites_word.transkription_positions[0].style is not None\
                             and word_part.transkription_positions[0].style is not None\
                             and word_part.transkription_positions[0].style\
                                     != word_part.overwrites_word.transkription_positions[0].style)
                          or word_part.word_box.earlier_version):
                 word_part.overwrites_word.id = word_part.id
                 word_parts.append(word_part.overwrites_word)
                 word_part.isTransformationOfWord = word_part.overwrites_word
                 #print(f'transform: {self.text}')
                 if word_part not in self.corrections:
                     self.corrections.append(word_part)
             elif root_word.writing_process_id > -1\
                     and (len(word_part.transkription_positions) > 0\
                             and word_part.transkription_positions[0].style is not None\
                             and word_part.transkription_positions[0].style.writing_process_id\
                     == root_word.writing_process_id):
                 word_part.extendsEarlierVersion = True
                 #print('extends')
                 if word_part not in self.corrections:
                     self.corrections.append(word_part)
             else:
                 if word_part.deleted:
                     word_part.isDeletionOfWord = earlierWordPart
                     word_parts.append(earlierWordPart)
                     if word_part not in self.corrections:
                         self.corrections.append(word_part)
                 else:
                     #print(f'default: {self.text}')
                     word_parts.append(earlierWordPart)
         text = ''.join([ word.text for word in word_parts ])\
                 if len(word_parts) > 0\
                 else self.text
         if len(word_parts) == 1:
             self.transkription_positions += word_parts[0].transkription_positions
             self.faksimile_positions += word_parts[0].faksimile_positions
             word_parts = []
         new_transkription_positions = copy.deepcopy(self.transkription_positions)
         if len(self.transkription_positions) > 0\
             and self.transkription_positions[0].style is not None:
             writing_process_id = self.transkription_positions[0].style.writing_process_id
             for new_tp in new_transkription_positions:
                 new_tp.style.writing_process_id = writing_process_id
         return Word(id=id, text=text, transkription_positions=new_transkription_positions,\
                 faksimile_positions=self.faksimile_positions, line_number=self.line_number,\
                 word_parts=word_parts)
 
     def create_correction_history(self, page=None, box_style=None):
         """Create correction history.
         """
         if self.word_box is not None:
             manuscript = self.transkription_positions[0].style.manuscript\
                     if len(self.transkription_positions) > 0\
                         and self.transkription_positions[0].style is not None\
                     else None
             style = Style()
             if box_style is not None:
                 style = box_style
             if page is not None:
                 style = Style.create_cls(page, self.word_box.text_style_class, manuscript=manuscript)
                 for font_key in [ font_key for font_key in self.word_box.text_style_class.split(' ') if font_key in page.fontsizekey2stage_mapping.keys() ]:
                     style.writing_process_id = page.fontsizekey2stage_mapping.get(font_key)
             transkription_positions = TranskriptionPosition.copy_list_of_cls(self.transkription_positions)
             for transkription_position in transkription_positions:
                 transkription_position.style = style
             self.overwrites_word = Word(text=self.word_box.earlier_text, transkription_positions=transkription_positions,\
                      line_number=self.line_number)
         for word_part in self.word_parts:
             word_part.create_correction_history(page=page, box_style=box_style)
         if len(self.word_parts) > 0:
             earlier_version = self.create_earlier_version() 
             extending_words = self._get_parts_with_property_key('extendsEarlierVersion')
             if len(extending_words) > 0:
                 for word in extending_words:
                     word.isExtensionOfWord = earlier_version
             if self.has_mixed_status('deleted', include_parts=True):
                 self.edited_text = ''.join([ word.text for word in self.word_parts if not word.deleted ])
             if len(self.corrections) > 0:
                 self.earlier_version = earlier_version
 
     @staticmethod
     def CREATE_WORD(word_node=None, page=None, word_part_objs=[], id=0, height=0, endX=0, endSign=None, matrix=None, line_number=-1, debug_msg=None):
         """Creates a word from a (lxml.Element) node or word_part_objs.
 
             [:return:] Word
         """
         if word_node is not None: # init word from xml node
             id = int(word_node.get('id'))
             line_number = int(word_node.get('line-number')) if bool(word_node.get('line-number')) else line_number
             text = word_node.get('text')
             deleted = bool(word_node.get('deleted')) and word_node.get('deleted') == 'true'
             transkription_positions = [ TranskriptionPosition(node=node) for node in word_node.findall('.//' + WordPosition.TRANSKRIPTION) ]
             faksimile_positions = [ WordPosition(node=node) for node in word_node.findall('.//' + WordPosition.FAKSIMILE) ]
             word_part_objs = [ item.attrib for item in word_node.findall('.//' + Word.DATA + '/part')]\
                     if len(word_node.findall('.//' + Word.DATA)) > 0\
                     else [ item.attrib for item in word_node.findall('.//part')] 
             return Word(id=id, text=text, deleted=deleted, line_number=line_number, transkription_positions=transkription_positions,\
                     faksimile_positions=faksimile_positions, word_part_objs=word_part_objs)
         elif len(word_part_objs) > 0: # init word from word_part_obj that has been extracted from svg file
             WIDTH = 5
             TOPCORRECTION = 2.0
             FONTWIDTHFACTOR = 0.7 # factor that multiplies lastCharFontSize
             height = height
             x = round(float(word_part_objs[0]['x']), 3)
             if(page is not None and bool(page.style_dict)):
                 HEIGHT_FACTOR = 1.1 # factor that multiplies biggest_font_size -> height
                 style_set = set(' '.join(set( dict['class'] for dict in word_part_objs)).split(' '))
                 biggest_font_size = page.get_biggest_fontSize4styles(style_set=style_set)
                 height = round(biggest_font_size * HEIGHT_FACTOR + HEIGHT_FACTOR / biggest_font_size, 3)
                 TOPCORRECTION = 1 + HEIGHT_FACTOR / biggest_font_size
                 if endSign is not None and '%' in endSign:
                     lastCharFontSizeList = [ float(page.style_dict[key]['font-size'].replace('px',''))\
                             for key in word_part_objs[len(word_part_objs)-1]['class'].split(' ')\
                             if bool(page.style_dict[key].get('font-size'))]
                     lastCharFontSize = lastCharFontSizeList[0] if len(lastCharFontSizeList) > 0 else 1
                     endX = float(endX) + lastCharFontSize * FONTWIDTHFACTOR
             elif endSign is not None and '%' in endSign:
                 endX = float(endX) + WIDTH
             bottom = round(float(word_part_objs[0]['y']), 3)
             y = round(bottom - height + TOPCORRECTION, 3)
             width = round(float(endX) - x, 3)
             transkription_positions = [ WordPosition(height=height, width=width, x=x, y=y, matrix=matrix, tag=WordPosition.TRANSKRIPTION) ]
             text = ''.join([ dict['text'] for dict in word_part_objs])
             line_number = page.get_line_number( (y + bottom)/2) if page is not None else line_number
             word = Word(id=id, text=text, line_number=line_number, transkription_positions=transkription_positions, word_part_objs=word_part_objs)
             word.debug_msg = debug_msg
             return word
         else:
             error_msg = 'word_node has not been defined' if (word_node is None) else 'word_part_objs is empty'
             raise Exception('Error: {}'.format(error_msg))
 
     @classmethod
     def get_semantic_dictionary(cls):
         """ Creates and returns a semantic dictionary as specified by SemanticClass.
         """
         dictionary = super(Word,cls).get_semantic_dictionary()
         dictionary[cls.PROPERTIES_KEY].update(cls.create_semantic_property_dictionary('styles', Style,\
                   cardinality=1, cardinality_restriction='minCardinality',\
                   name='wordHasStyle', label='word has style', comment='Word has an appearance that is characterized by this style.'))
         dictionary[cls.PROPERTIES_KEY].update(cls.create_semantic_property_dictionary('corrections', Word,\
                   name='wordHasCorrection', label='word has corrections', comment='Word has a correction made by the author.'))
         dictionary[cls.PROPERTIES_KEY].update(cls.create_semantic_property_dictionary('deletion_paths', WordDeletionPath,\
                   name='wordIsDeletedByPath', label='word has been deleted with a deletion path',\
                   comment='Word has been deleted by the author using a deletion path.'))
         dictionary[cls.PROPERTIES_KEY].update(cls.create_semantic_property_dictionary('editor_comment', EditorComment,\
                   name='wordHasEditorComment', label='word has a comment by the editors', comment='Word has been commented by the editors.'))
         dictionary[cls.PROPERTIES_KEY].update(cls.create_semantic_property_dictionary('earlier_version', Word,\
                   name='wordHasEarlierVersion', label='word has an earlier version', comment='There is a earlier version of this word.'))
         dictionary[cls.PROPERTIES_KEY].update(cls.create_semantic_property_dictionary('edited_text', str,\
                   name='hasEditedText', label='word has an edited text', comment='Word has a text that is edited automatically by removing deleted parts or hyphens.'))
+        dictionary[cls.PROPERTIES_KEY].update(cls.create_semantic_property_dictionary('clean_edited_text', str,\
+                  name='hasCleanEditedText', label='word has an edited text without punctuation',\
+                  comment='Word has a text without punctuation that is edited automatically by removing deleted parts or hyphens.'))
         dictionary[cls.PROPERTIES_KEY].update(cls.create_semantic_property_dictionary('isClarificationOfWord', Word,\
                   name='isClarificationOfWord', label='word is a clarification of word',\
                   comment='The author has used this part of the word in order to clarify the appearance of that word.'))
         dictionary[cls.PROPERTIES_KEY].update(cls.create_semantic_property_dictionary('isDeletionOfWord', Word,\
                   name='isDeletionOfWord', label='word is a deletion of word',\
                   comment='The author has used this part of a word in order to delete the corresponding part of an earlier version of this word.'))
         dictionary[cls.PROPERTIES_KEY].update(cls.create_semantic_property_dictionary('isExtensionOfWord', Word,\
                   name='isExtensionOfWord', label='word is a extension of word',\
                   comment='The author has used this part of a word in order to extend an earlier version of this word.'))
         dictionary[cls.PROPERTIES_KEY].update(cls.create_semantic_property_dictionary('isTransformationOfWord', Word,\
                   name='isTransformationOfWord', label='word is a transformation of word',\
                   comment='The author has used this part of a word in order to transform the corresponding part of an earlier version of this word.'))
         dictionary[cls.PROPERTIES_KEY].update(cls.create_semantic_property_dictionary('overwrites_word', Word,\
                   name='overwritesWord', label='word overwrites word',\
                   comment='The author has used this word in order to overwrite that word.'))
         # This makes wordHasWordParts a subproperty of cls.HAS_HOMOTYPIC_PARTS_URL_STRING,
         # cls.return_dictionary_after_updating_super_classes will subclass Word under the corresponding super class.
         dictionary[cls.PROPERTIES_KEY].update(cls.create_semantic_property_dictionary('word_parts', list,\
                 name='wordHasWordParts', label='word has word parts', comment='Word consists of a list of words.',\
                 subPropertyOf=cls.HAS_HOMOTYPIC_PARTS_URL_STRING))
         super_property_dictionary = cls.create_semantic_property_dictionary(cls.SUPER_PROPERTY, Word,\
                 name='isCorrectionOfWord', label='word is a correction of word',\
                 comment='The author has used this word in order to correct that word.')
         for key in cls.XML_CORRECTION_DICT.keys():
             correction_dict = dictionary[cls.PROPERTIES_KEY].get(key)
             correction_dict.update(super_property_dictionary)
             dictionary[cls.PROPERTIES_KEY].update({key: correction_dict})
         return cls.return_dictionary_after_updating_super_classes(dictionary)
 
     def has_mixed_status(self, property_key, include_parts=False, concerns_word=True):
         """Returns true if transkription_positions have mixed status concerning the property_key in their __dict__.
         """
         if False in set(property_key in tp.__dict__.keys() for tp in self.transkription_positions):
             return False
         if len(self.word_parts) > 0 and include_parts:
             if concerns_word:
                 if False in set(property_key in word.__dict__.keys() for word in self.word_parts):
                     return False
                 return len(set(word.__dict__[property_key] for word in self.word_parts)) > 1
             else:
                 return len(set(word.transkription_positions[0].__dict__[property_key] for word in self.word_parts\
                         if len(word.transkription_positions) > 0 and property_key in word.transkription_positions[0].__dict__.keys())) > 1
         return len(set(tp.__dict__[property_key] for tp in self.transkription_positions )) > 1
 
     def init_word(self, page):
         """Initialize word with objects from page.
         """
         super(Word,self).init_word(page)
         if self.writing_process_id > -1:
             self.writing_processes += [ wp for wp in page.writing_processes if wp.id == self.writing_process_id ]
         writing_processes = self.writing_processes
         for word_part in self.word_parts:
             word_part.init_word(page)
             self.lines += word_part.lines
             self.writing_processes += word_part.writing_processes
         self.lines = [ line for line in set(self.lines) ] 
         self.writing_processes = [ wp for wp in set(self.writing_processes)]
         if self.overwrites_word is not None:
             self.overwrites_word.init_word(page)
         if self.earlier_version is not None:
             if self.earlier_version.writing_process_id == -1:
                 self.earlier_version.writing_process_id = self.writing_process_id-1
             if self.earlier_version.line_number == -1:
                 self.earlier_version.line_number = self.line_number
             self.earlier_version.init_word(page)
         self.deletion_paths = [ page.get_word_deletion_path(path) for path in self.deletion_paths if path.path is not None ]
                   
     def join(self, other_word, append_at_end_of_new_word=True, add_white_space_between_words=False):
         """Joins other_word with this word by changing the text of current word and adding other_word.transkription_positions.
         """
         if append_at_end_of_new_word:
             self.text = self.text + other_word.text\
                     if not add_white_space_between_words\
                     else self.text + ' ' + other_word.text
             for position in other_word.transkription_positions:
                 position.id = str(len(self.transkription_positions))
                 self.transkription_positions.append(position)
             for position in other_word.faksimile_positions:
                 position.id = str(len(self.faksimile_positions))
                 self.faksimile_positions.append(position)
         else:
             self.text = other_word.text + self.text
             index = 0
             for position in other_word.transkription_positions:
                 self.transkription_positions.insert(index, position)
                 index += 1
             while index < len(self.transkription_positions):
                 self.transkription_positions[index].id = str(index)
                 index += 1
             index = 0
             for position in other_word.faksimile_positions:
                 self.faksimile_positions.insert(indexposition)
                 index += 1
             while index < len(self.faksimile_positions):
                 self.faksimile_positions[index].id = str(index)
                 index += 1
         self.simplify_transkription_positions()
         
     def partition_according_to_deletion(self):
         """Partition a word according to its transkription_positions' deletion status
             ->split word and add partial words as its parts.
         """
         if self.has_mixed_status('deleted'):
             transkription_positions = []
             last_status = None
             for transkription_position in self.transkription_positions:
                 if transkription_position.deleted != last_status\
                     and len(transkription_positions) > 0:
                         newWord = Word(id=len(self.word_parts), line_number=self.line_number,\
                                 transkription_positions=transkription_positions, deleted=last_status, writing_process_id=self.writing_process_id)
                         for tp in transkription_positions:
                             newWord.deletion_paths += tp._deletion_paths
                         self.word_parts.append(newWord)
                         transkription_positions = []
                 transkription_positions.append(transkription_position)
                 last_status = transkription_position.deleted
             if len(transkription_positions) > 0:
                 newWord = Word(id=len(self.word_parts), line_number=self.line_number,\
                     transkription_positions=transkription_positions, deleted=last_status, writing_process_id=self.writing_process_id)
                 for tp in transkription_positions:
                     newWord.deletion_paths += tp._deletion_paths
                 self.word_parts.append(newWord)
             self.transkription_positions = []
             self.line_number = -1
             self.deleted = False
         elif len(self.word_parts) > 0:
             self.word_parts, none = execute_function_on_parts(self.word_parts, 'partition_according_to_deletion')
         elif not self.deleted\
            and len(self.transkription_positions) > 0\
            and self.transkription_positions[0].deleted:
             self.deleted = True
             for tp in self.transkription_positions:
                 self.deletion_paths += tp._deletion_paths
 
     def partition_according_to_writing_process_id(self):
         """Partition a word according to its transkription_positions' writing_process_ids
             ->split word and add partial words as its parts.
         """
         if self.belongs_to_multiple_writing_processes():
             last_writing_process_id = -1
             transkription_positions = []
             for transkription_position in self.transkription_positions:
                 if transkription_position.writing_process_id != last_writing_process_id\
                     and len(transkription_positions) > 0:
                         newWord = Word(id=len(self.word_parts), line_number=self.line_number,\
                                 transkription_positions=transkription_positions, writing_process_id=last_writing_process_id)
                         self.word_parts.append(newWord)
                         transkription_positions = []
                 transkription_positions.append(transkription_position)
                 last_writing_process_id = transkription_position.writing_process_id
             if len(transkription_positions) > 0:
                 newWord = Word(id=len(self.word_parts), line_number=self.line_number,\
                     transkription_positions=transkription_positions, writing_process_id=last_writing_process_id)
                 self.word_parts.append(newWord)
             self.transkription_positions = []
         elif len(self.word_parts) > 0:
             self.word_parts, none = execute_function_on_parts(self.word_parts, 'partition_according_to_writing_process_id')
         if self.belongs_to_multiple_writing_processes(include_parts=True):
             self.writing_process_id = sorted(set([ word.writing_process_id for word in self.word_parts ]), reverse=True)[0]
         elif len(self.transkription_positions) > 0:
             self.writing_process_id = self.transkription_positions[0].writing_process_id 
 
     def process_boxes(self, box_paths, tr_xmin=0.0, tr_ymin=0.0, previous_word_has_box=False):
         """Determines whether word is over a word box.
         """
         word_over_box = None
         if len(self.word_parts) > 0:
             for word in self.word_parts:
                 current_word = word.process_boxes(box_paths, tr_xmin=tr_xmin, tr_ymin=tr_ymin, previous_word_has_box=(word_over_box is not None))
                 if current_word is not None and current_word.word_box is not None:
                     word_over_box = current_word
         else:
             new_tp_dict = {}
             for index, transkription_position in enumerate(self.transkription_positions):
                 if previous_word_has_box and index == 0:
                     if len(transkription_position.positional_word_parts) > 0:
                         transkription_position.positional_word_parts[0].left += transkription_position.positional_word_parts[0].width/2
                         #print(f'{self.text}: {transkription_position.positional_word_parts[0].left}')
                     else:
                         transkription_position.left += 1
                 word_path = Path.create_path_from_transkription_position(transkription_position,\
                     tr_xmin=tr_xmin, tr_ymin=tr_ymin)
                 containing_boxes = [ box_path for box_path in box_paths\
                         if word_path.is_partially_contained_by(box_path)\
                         or box_path.do_paths_intersect(word_path) ]
                 if len(containing_boxes) > 0:
                     if previous_word_has_box:
                         print(f'{self.text}: {word_path.path.bbox()} {containing_boxes[0].path.bbox()}')
                     self._set_box_to_transkription_position(containing_boxes[0], word_path,\
                             transkription_position, new_tp_dict, tr_xmin)
                     box_paths.remove(containing_boxes[0])
             for replace_tp in new_tp_dict.keys():
                 for tp in new_tp_dict.get(replace_tp):
                     self.transkription_positions.insert(self.transkription_positions.index(replace_tp), tp)
                 self.transkription_positions.remove(replace_tp)
             word_over_box = self._get_partial_word_over_box()
             update_transkription_position_ids(self)
         return word_over_box
 
     def set_word_insertion_mark(self, word_insertion_mark):
         """Sets word_insertion_mark
         """
         self.word_insertion_mark = word_insertion_mark
 
     def set_writing_process_id_to_transkription_positions(self, page):
         """Determines the writing process id of the transkription_positions.
         """
         for transkription_position in self.transkription_positions:
             if len(transkription_position.positional_word_parts) > 0:
                 for font_key in transkription_position.positional_word_parts[0].style_class.split(' '):
                     if font_key in page.fontsizekey2stage_mapping.keys():
                         transkription_position.writing_process_id = page.fontsizekey2stage_mapping.get(font_key)
 
     def simplify_transkription_positions(self):
         """Merge transkription_positions if possible.
         """
         index = len(self.transkription_positions)-1
         while index > 0\
                 and False not in [ 'positional_word_parts' in tp.__dict__.keys() for tp in self.transkription_positions ]:
             current_tp = self.transkription_positions[index]
             index -= 1
             previous_tp = self.transkription_positions[index]
             if previous_tp.is_mergebale_with(current_tp):
                 positional_word_parts = previous_tp.positional_word_parts
                 positional_word_parts += current_tp.positional_word_parts
                 transkription_positions = TranskriptionPosition.CREATE_TRANSKRIPTION_POSITION_LIST_FROM_PWPS(\
                         positional_word_parts, debug_msg_string='simplifying transkription positions', transkription_position_id=previous_tp.id)
                 if len(transkription_positions) == 1:
                     transkription_positions[0].writing_process_id = previous_tp.writing_process_id\
                             if previous_tp.writing_process_id != -1\
                             else current_tp.writing_process_id
                     self.transkription_positions.pop(index+1)
                     self.transkription_positions[index] = transkription_positions[0]
         #print(self.text, len(self.transkription_positions))
 
     def split(self, split_string, start_id=0):
         """Splits the word and returns an 3-tuple of new words.
         """
         previousString, currentString, nextString = self.text.partition(split_string)
         currentWord = None
         previousWord = None
         nextWord = None
         previousIndex = 0
         current_id = start_id
         all_positional_word_parts = []
         for position in self.transkription_positions:
             all_positional_word_parts += position.positional_word_parts
         if len(all_positional_word_parts) == 0:
             warnings.warn('ATTENTION: Word: {} {} with Strings "{}, {}, {}": there are no parts!'.format(self.id, self.text, previousString, currentString, nextString))
         if len(previousString) > 0:
             previous_pwps = []
             while previousIndex < len(all_positional_word_parts) and previousString != ''.join([ pwp.text for pwp in previous_pwps ]):
                 previous_pwps.append(all_positional_word_parts[previousIndex])
                 previousIndex += 1
             if previousString != ''.join([ pwp.text for pwp in previous_pwps ]):
                 warnings.warn('ATTENTION: "{}" does not match a word_part_obj!'.format(previousString))
             else:
                 previous_transkription_positions = TranskriptionPosition.CREATE_TRANSKRIPTION_POSITION_LIST_FROM_PWPS(previous_pwps, debug_msg_string='word.split')
                 previous_text = ''.join([ pwp.text for pwp in previous_pwps ])
                 previousWord = Word(text=previous_text, id=current_id, line_number=self.line_number, transkription_positions=previous_transkription_positions)
                 previousWord.faksimile_positions = self.faksimile_positions
                 current_id += 1
                 all_positional_word_parts = all_positional_word_parts[previousIndex:]
         if len(nextString) > 0:
             tmp_pwps = []
             index = 0
             while index < len(all_positional_word_parts) and currentString != ''.join([ pwp.text for pwp in tmp_pwps ]):
                 tmp_pwps.append(all_positional_word_parts[index])
                 index += 1
             if currentString != ''.join([ pwp.text for pwp in tmp_pwps ]):
                 warnings.warn('ATTENTION: "{}" does not match a word_part_obj!'.format(currentString))
             else:
                 next_pwps = all_positional_word_parts[index:]
                 next_transkription_positions = TranskriptionPosition.CREATE_TRANSKRIPTION_POSITION_LIST_FROM_PWPS(next_pwps, debug_msg_string='word.split')
                 next_text = ''.join([ pwp.text for pwp in next_pwps ])
                 nextWord = Word(text=next_text, id=current_id+1, line_number=self.line_number, transkription_positions=next_transkription_positions)
                 nextWord.faksimile_positions = self.faksimile_positions
                 all_positional_word_parts = all_positional_word_parts[:index]
         current_transkription_positions = TranskriptionPosition.CREATE_TRANSKRIPTION_POSITION_LIST_FROM_PWPS(all_positional_word_parts, debug_msg_string='word.split')
         current_text = ''.join([ pwp.text for pwp in all_positional_word_parts ])
         currentWord = Word(text=current_text, id=current_id, line_number=self.line_number, transkription_positions=current_transkription_positions)
         currentWord.faksimile_positions = self.faksimile_positions
         return previousWord, currentWord, nextWord
 
     def split_according_to_status(self, status, splits_are_parts=False):
         """Split a word according to its transkription_positions' text. 
 
             :return: a list of new word.Word
         """
         new_words = []
         if self.has_mixed_status(status):
             last_status = None
             transkription_positions = []
             for transkription_position in self.transkription_positions:
                 if transkription_position.__dict__[status] != last_status\
                     and len(transkription_positions) > 0:
                         new_words.append(\
                                 self._create_new_word(transkription_positions, status, new_id=self.id+len(new_words)))
                         transkription_positions = []
                 transkription_positions.append(transkription_position)
                 last_status = transkription_position.__dict__[status]
             if len(transkription_positions) > 0:
                 new_words.append(\
                     self._create_new_word(transkription_positions, status, new_id=self.id+len(new_words)))
         if splits_are_parts:
             self.word_parts += new_words
             if len(self.word_parts) > 0:
                 self.transkription_positions = []
         return new_words
 
     def undo_partitioning(self):
         """Undo partitioning.
         """
         if len(self.word_parts) > 0:
             for word_part in self.word_parts:
                 word_part.undo_partitioning()
                 if self.text != ''.join([ tp.get_text() for tp in self.transkription_positions ]):
                     self.transkription_positions += word_part.transkription_positions
         self.earlier_version = None
         self.edited_text = None
         self.word_box = None
         self.word_parts = []
         self.corrections = []
         self.earlier_versions = []
         self.box_paths = []
 
     def _create_new_word(self, transkription_positions, status, new_id=0):
         """Create a new word from self and transkription_positions.
         """
         newWord = Word(id=new_id, transkription_positions=transkription_positions)
         for key in self.COPY_PROPERTY_KEY:
             if key != status and key in self.__dict__.keys():
                 newWord.__dict__[key] = self.__dict__[key] 
         if status in self.APPEND_PROPERTY2LIST_SOURCE_TARGET_KEYS.keys():
             newWord.__dict__[self.APPEND_PROPERTY2LIST_SOURCE_TARGET_KEYS[status]].append(transkription_positions[0].__dict__[status])
         else:
             newWord.__dict__[status] = transkription_positions[0].__dict__[status]
         return newWord
 
     def _get_parts_with_property_key(self, property_key):
         """Return a list of word_parts with property == property_key.
         """
         word_parts = []
         for word_part in self.word_parts:
             if property_key in word_part.__dict__.keys():
                 word_parts.append(word_part)
             else:
                 word_parts += word_part._get_parts_with_property_key(property_key)
         return word_parts
 
     def _get_partial_word_over_box(self):
         """Partition a word according to its transkription_positions' has_box 
             ->split word and add partial words as its parts.
 
             :return: word over box or self
         """
         word_over_box = None
         if self.has_mixed_status('has_box'):
             transkription_positions = []
             last_word_box = None
             for transkription_position in self.transkription_positions:
                 if transkription_position.has_box != last_word_box\
                     and len(transkription_positions) > 0:
                         newWord = Word(id=len(self.word_parts), line_number=self.line_number,\
                                 transkription_positions=transkription_positions, deleted=self.deleted, writing_process_id=self.writing_process_id)
                         self.word_parts.append(newWord)
                         if last_word_box is not None:
                             word_over_box = newWord
                             word_over_box.word_box = last_word_box
                         transkription_positions = []
                 transkription_positions.append(transkription_position)
                 last_word_box = transkription_position.has_box
             if len(transkription_positions) > 0:
                 newWord = Word(id=len(self.word_parts), line_number=self.line_number,\
                     transkription_positions=transkription_positions, deleted=self.deleted, writing_process_id=self.writing_process_id)
                 self.word_parts.append(newWord)
                 if last_word_box is not None:
                     word_over_box = newWord
                     word_over_box.word_box = last_word_box
             self.transkription_positions = []
         elif len(self.word_parts) > 0:
             #self.word_parts, word_over_box = execute_function_on_parts(self.word_parts, inspect.currentframe().f_code.co_name) #'get_partial_word_over_box')
             for word_part in self.word_parts:
                 if word_over_box is None:
                     word_over_box = word_part._get_partial_word_over_box()
                 else:
                     break
         elif len([ tp for tp in self.transkription_positions if tp.has_box is not None]) == 1:
             word_over_box = self
             word_over_box.word_box = [ tp for tp in self.transkription_positions if tp.has_box is not None][0].has_box
         return word_over_box
 
     def _set_box_to_transkription_position(self, box_path, word_path, transkription_position, new_transkription_positions_dictionary, tr_xmin):
         """Set box_path to transkription_position that is contained by box_path.
         Create new transkription_positions by splitting old ones if necessaryand add them to new_transkription_positions_dictionary.
         """
         if box_path.contains_path(word_path):
             transkription_position.has_box = box_path 
         elif box_path.contains_start_of_path(word_path):
             split_position = box_path.path.bbox()[1] - tr_xmin
             new_tps = transkription_position.split(split_position)
             if len(new_tps) == 2:
                 new_tps[0].has_box = box_path
                 new_transkription_positions_dictionary.update({ transkription_position: new_tps })
             else:
                 transkription_position.has_box = box_path 
         elif box_path.contains_end_of_path(word_path):
             split_position = box_path.path.bbox()[0] - tr_xmin
             new_tps = transkription_position.split(split_position)
             if len(new_tps) == 2:
                 new_tps[1].has_box = box_path
                 new_transkription_positions_dictionary.update({ transkription_position: new_tps })
             else:
                 transkription_position.has_box = box_path 
         else: # box_path in the middle of word_pathz
             split_position1 = box_path.path.bbox()[0] - tr_xmin
             split_position2 = box_path.path.bbox()[1] - tr_xmin
             new_tps = transkription_position.split(split_position1, split_position2)
             if len(new_tps) >= 2:
                 new_tps[1].has_box = box_path
                 new_transkription_positions_dictionary.update({ transkription_position: new_tps })
             else:
                 transkription_position.has_box = box_path
 
 def do_paths_intersect_saveMode(mypath1, mypath2):
     """Returns true if paths intersect, false if not or if there was an exception.
     """
     try:
         return mypath1.path.intersect(mypath2.path, justonemode=True)\
                 or mypath1.is_partially_contained_by(mypath2)
     except AssertionError:
         return False
 
Index: svgscripts/datatypes/page.py
===================================================================
--- svgscripts/datatypes/page.py	(revision 109)
+++ svgscripts/datatypes/page.py	(revision 110)
@@ -1,428 +1,430 @@
 #!/usr/bin/env python3
 # -*- coding: utf-8 -*-
 
 """   This class can be used to represent a page.
 """
 #    Copyright (C) University of Basel 2019  {{{1
 #
 #    This program is free software: you can redistribute it and/or modify
 #    it under the terms of the GNU General Public License as published by
 #    the Free Software Foundation, either version 3 of the License, or
 #    (at your option) any later version.
 #
 #    This program is distributed in the hope that it will be useful,
 #    but WITHOUT ANY WARRANTY; without even the implied warranty of
 #    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 #    GNU General Public License for more details.
 #
 #    You should have received a copy of the GNU General Public License
 #    along with this program.  If not, see <https://www.gnu.org/licenses/> 1}}}
 
 __author__ = "Christian Steiner"
 __maintainer__ = __author__
 __copyright__ = 'University of Basel'
 __email__ = "christian.steiner@unibas.ch"
 __status__ = "Development"
 __license__ = "GPL v3"
 __version__ = "0.0.1"
 
 from lxml import etree as ET
 from os.path import isfile, basename
 from progress.bar import Bar
 from svgpathtools import svg2paths2, svg_to_paths
 from svgpathtools.parser import parse_path
 import re
 import sys
 import warnings
 
 from .box import Box
 from .color import Color
 from .image import Image, SVGImage
 from .faksimile_image import FaksimileImage
 from .faksimile_position import FaksimilePosition
+from .imprint import Imprint
 from .lineNumber import LineNumber
 from .line import Line
 from .mark_foreign_hands import MarkForeignHands
 from .matrix import Matrix
 from .path import Path
 from .positional_word_part import PositionalWordPart
 from .super_page import SuperPage
 from .style import Style
 from .text_connection_mark import TextConnectionMark
 from .text_field import TextField
 from .transkriptionField import TranskriptionField
 from .writing_process import WritingProcess
 from .word import Word
 from .word_deletion_path import WordDeletionPath
 from .word_insertion_mark import WordInsertionMark
 
 sys.path.append('py2ttl')
 from class_spec import SemanticClass
 
 sys.path.append('shared_util')
 from main_util import extract_paths_on_tf, get_paths_near_position
 
 FILE_TYPE_SVG_WORD_POSITION = SuperPage.FILE_TYPE_SVG_WORD_POSITION 
 FILE_TYPE_XML_MANUSCRIPT = SuperPage.FILE_TYPE_XML_MANUSCRIPT
 STATUS_MERGED_OK = SuperPage.STATUS_MERGED_OK
 STATUS_POSTMERGED_OK = SuperPage.STATUS_POSTMERGED_OK
 
 
 class Page(SemanticClass,SuperPage):
     """
     This class represents a page.
 
     Args:
         xml_source_file (str): name of the xml file to be instantiated.
         faksimile_image: FaksimileImage.
         faksimile_svgFile: svg file containing information about word positions.
 
     """
     UNITTESTING = False
 
     def __init__(self, xml_source_file=None, faksimile_image=None, faksimile_svgFile=None, add_paths_near_words=False, warn=False, number=None):
         if xml_source_file is not None:
             super(Page,self).__init__(xml_source_file)
             self.update_property_dictionary('faksimile_image', faksimile_image)
             self.update_property_dictionary('faksimile_svgFile', faksimile_svgFile)
             self.init_all_properties()
             self.add_style(style_node=self.page_tree.getroot().find('.//style'))
             self.faksimile_text_field = None
             self.svg_text_field = None
             self.init_node_objects() 
             self.warn = warn
             self.add_deletion_paths_to_words(add_paths_near_words)
         else:
             self.page_tree = None
             self.number = number
 
     def add_deletion_paths_to_words(self, add_paths_near_words=False):
         """Add deletion paths to words.
         """
         words = [ word for word in self.words if (len(word.word_parts) == 0 and word.deleted and len(word.deletion_paths) == 0)\
                                                  or 'add_paths_near_words' in word.process_flags ]
         words += [ word for word in self.words\
                                           if len(word.word_parts) > 0 and True in\
                                           [ (wp.deleted and len(wp.deletion_paths) == 0) for wp in word.word_parts ]]
         if len(words) > 0 and ((self.svg_file is not None and isfile(self.svg_file))\
          or (self.source is not None and isfile(self.source))):
             svg_file = self.svg_file if self.svg_file is not None else self.source
             transkription_field = TranskriptionField(svg_file)
             tr_xmin = transkription_field.xmin if (self.svg_image is None or self.svg_image.text_field is None) else 0
             tr_ymin = transkription_field.ymin if (self.svg_image is None or self.svg_image.text_field is None) else 0
             word_deletion_paths = self.word_deletion_paths
             index = 0
             dp_updated = False
             while index < len(words):
                 word = words[index]
                 word.add_deletion_paths(word_deletion_paths, tr_xmin=tr_xmin, tr_ymin=tr_ymin)
                 if len(word.deletion_paths) > 0 or True in [ len(w.deletion_paths) > 0 for w in word.word_parts ]:
                     deletion_paths = word.deletion_paths
                     for wp in word.word_parts: deletion_paths += wp.deletion_paths
                     for deletion_path in deletion_paths:
                         if deletion_path not in self.word_deletion_paths:
                             self.word_deletion_paths.append(deletion_path)
                 elif not dp_updated:
                     word_deletion_paths = extract_paths_on_tf(self)
                     dp_updated = True
                     index -= 1
                 if add_paths_near_words\
                     and ('add_paths_near_words' in word.process_flags\
                         or ((word.deleted and len(word.deletion_paths) == 0)\
                             or True in [ (w.deleted and len(w.deletion_paths) == 0) for w in word.word_parts ])):
                     if not dp_updated\
                         and 'add_paths_near_words' in word.process_flags:
                         word_deletion_paths = extract_paths_on_tf(self)
                         dp_updated = True
                     transform = None
                     tp = None
                     target_word = word
                     paths_near_word = []
                     if word.deleted and len(word.transkription_positions) > 0:
                         transform = word.transkription_positions[0].transform
                         for tp in word.transkription_positions:
                             word.deletion_paths_near_word += get_paths_near_position(tp, word_deletion_paths)
                     elif len(word.word_parts) > 0:
                         for wp in word.word_parts:
                             if wp.deleted and len(wp.transkription_positions) > 0:
                                 target_word = wp
                                 for tp in wp.transkription_positions:
                                     wp.deletion_paths_near_word = get_paths_near_position(tp, word_deletion_paths)
                     if self.warn and (word.deleted and len(word.deletion_paths) == 0):
                         warnings.warn(\
                         f'WARNING: {self.title} {self.number}: {word.id} on {word.line_number}, {word.text} has no deletion paths! {target_word.deletion_paths_near_word}, {transform}')
                 index += 1
 
     @classmethod
     def create_cls(cls, xml_source_file=None, create_dummy_page=False, page_node=None):
         """Create a Page.
         """
         if not create_dummy_page:
             return cls(xml_source_file)
         else:
             m = re.match(r'(.*)(page[0]*)(.*)(\.xml)', xml_source_file)
             if m is not None and len(m.groups()) > 3:
                 number = m.group(3)
             else:
                 number = basename(xml_source_file).replace('.xml','')
             return cls(number=number)
 
     @classmethod
     def get_pages_from_xml_file(cls, xml_file, status_contains='', status_not_contain='', word_selection_function=None):
         """Returns a list of Page instantiating a xml_file of type FILE_TYPE_SVG_WORD_POSITION 
                             or xml_files contained in xml_file of type FILE_TYPE_XML_MANUSCRIPT.
                             [optional: instantiation depends on the fulfilment of a status_contains 
                                     and/or on the selection of some words by a word_selection_function].
         """
         source_tree = ET.parse(xml_file)
         if source_tree.getroot().find('metadata/type').text == cls.FILE_TYPE_SVG_WORD_POSITION:
             page = cls(xml_file)    
             if word_selection_function is None or len(word_selection_function(page.words)) > 0:
                 return [ page ]
             else:
                 return []
         elif source_tree.getroot().find('metadata/type').text == FILE_TYPE_XML_MANUSCRIPT:
             pages = []
             xpath = '//page/@output'
             if status_contains != '' and status_not_contain != '':
                 xpath = '//page[contains(@status, "{0}") and not(contains(@status, "{1}"))]/@output'.format(status_contains, status_not_contain)
             elif status_contains != '':
                 xpath = '//page[contains(@status, "{0}")]/@output'.format(status_contains)
             elif status_not_contain != '':
                 xpath = '//page[not(contains(@status, "{0}"))]/@output'.format(status_not_contain)
             for xml_source_file in source_tree.xpath(xpath):
                 if isfile(xml_source_file):
                     pages += cls.get_pages_from_xml_file(xml_source_file, word_selection_function=word_selection_function)
             return pages
         else:
             return []
 
     @classmethod
     def get_semantic_dictionary(cls):
         """ Creates a semantic dictionary as specified by SemanticClass.
         """
         dictionary = {}
         class_dict = cls.get_class_dictionary()
         properties = { 'number': { 'class': str, 'cardinality': 1}}
         properties.update(cls.create_semantic_property_dictionary('faksimile_image', FaksimileImage, subPropertyOf=cls.HAS_IMAGE))
         properties.update(cls.create_semantic_property_dictionary('faksimile_text_field', TextField,\
                 name='pageIsOnFaksimileTextField', label='page is on faksimile text field',\
                 comment='Relates a page to the text field on a svg image.', subPropertyOf=cls.PAGE_IS_ON_TEXTFIELD))
         properties.update(cls.create_semantic_property_dictionary('orientation', str))
         properties.update(cls.create_semantic_property_dictionary('svg_image', SVGImage, subPropertyOf=cls.HAS_IMAGE))
         properties.update(cls.create_semantic_property_dictionary('svg_text_field', TextField,\
                 name='pageIsOnSVGTextField', label='page is on svg text field',\
                 comment='Relates a page to the text field on a faksimile image.', subPropertyOf=cls.PAGE_IS_ON_TEXTFIELD))
-        for key in [ 'lines', 'mark_foreign_hands', 'words', 'word_deletion_paths', 'word_insertion_marks']:
+        for key in [ 'imprints', 'lines', 'mark_foreign_hands', 'words', 'word_deletion_paths', 'word_insertion_marks']:
             properties.update(cls.create_semantic_property_dictionary(key, list))
         dictionary.update({cls.CLASS_KEY: class_dict})
         dictionary.update({cls.PROPERTIES_KEY: properties})
         return cls.return_dictionary_after_updating_super_classes(dictionary)
 
     def get_word_deletion_path(self, path=None, d_attribute=None) ->WordDeletionPath:
         """Return a word deletion path that belongs to page.
         """
         if path is None and d_attribute is None:
             raise Exception('ERROR: get_word_deletion_path needs a path or a d_attribute!')
         if d_attribute is None:
             d_attribute = path.d_attribute
         page_paths = [ dpath for dpath in self.word_deletion_paths if dpath.d_attribute == d_attribute ]
         if len(page_paths) > 0:
             return page_paths[0]
         else:
             dpath = WordDeletionPath.create_cls(self, path=path, d_attribute=d_attribute)
             if dpath is not None:
                 dpath.id = len(self.word_deletion_paths)
                 self.word_deletion_paths.append(dpath)
                 dpath.attach_object_to_tree(self.page_tree)
             return dpath
 
     def init_node_objects(self):
         """Initialize all node objects.
         """
         self.word_insertion_marks = [ WordInsertionMark(wim_node=wim_node) for wim_node in self.page_tree.getroot().xpath('//' + WordInsertionMark.XML_TAG) ]
         self.words = [ Word.create_cls(word_node) for word_node in self.page_tree.getroot().xpath('./word') ]
         self.mark_foreign_hands = [ MarkForeignHands.create_cls(node) for node in self.page_tree.getroot().xpath('//' + MarkForeignHands.XML_TAG) ]
         self.text_connection_marks = [ TextConnectionMark.create_cls(node) for node in self.page_tree.getroot().xpath('//' + TextConnectionMark.XML_TAG) ]
         self.line_numbers = [ LineNumber(xml_text_node=line_number_node) for line_number_node in self.page_tree.getroot().xpath('//' + LineNumber.XML_TAG) ]
         self.lines = [ Line.create_cls_from_node(node=line_number_node) for line_number_node in self.page_tree.getroot().xpath('//' + LineNumber.XML_TAG) ]
+        self.imprints = [ Imprint.create_cls_from_node(imprint_node, self.lines) for imprint_node in self.page_tree.getroot().xpath('//' + Imprint.XML_TAG) ]
         self.writing_processes = [ WritingProcess.create_writing_process_from_xml(node, self.words) for node in self.page_tree.xpath('//' + WritingProcess.XML_TAG) ]
         self.word_deletion_paths = [ WordDeletionPath.create_cls(self, node=node) for node in self.page_tree.xpath('./' + WordDeletionPath.XML_TAG) ]
         if self.faksimile_image is not None and self.faksimile_image.text_field is not None:
             self.faksimile_text_field = self.faksimile_image.text_field
         if self.svg_image is not None and self.svg_image.text_field is not None:
             self.svg_text_field = self.svg_image.text_field
         for simple_word in self.words + self.mark_foreign_hands + self.text_connection_marks:
             simple_word.init_word(self)
         for wim in self.word_insertion_marks:
             if wim.line_number > -1:
                 wim.line = [ line for line in self.lines if line.id == wim.line_number ][0]
             
     def update_and_attach_words2tree(self, update_function_on_word=None, include_special_words_of_type=[]):
         """Update word ids and attach them to page.page_tree.
         """
         if not self.is_locked():
             update_function_on_word = [ update_function_on_word ]\
                     if type(update_function_on_word) != list\
                     else update_function_on_word
             for node in self.page_tree.xpath('.//word|.//' + MarkForeignHands.XML_TAG + '|.//' + TextConnectionMark.XML_TAG): 
                 node.getparent().remove(node)
             for index, word in enumerate(self.words):
                 word.id = index
                 for func in update_function_on_word:
                     if callable(func):
                         func(word)
                 word.attach_word_to_tree(self.page_tree)
             for index, mark_foreign_hands in enumerate(self.mark_foreign_hands):
                 mark_foreign_hands.id = index
                 if MarkForeignHands in include_special_words_of_type:
                     for func in update_function_on_word:
                         if callable(update_function_on_word):
                             func(mark_foreign_hands)
                 mark_foreign_hands.attach_word_to_tree(self.page_tree)
             for index, text_connection_mark in enumerate(self.text_connection_marks):
                 text_connection_mark.id = index
                 if TextConnectionMark in include_special_words_of_type:
                     for func in update_function_on_word:
                         if callable(update_function_on_word):
                             func(text_connection_mark)
                 text_connection_mark.attach_word_to_tree(self.page_tree)
         else:
             print('locked')
 
     def update_data_source(self, faksimile_svgFile=None, xml_correction_file=None):
         """Update the data source of page.
         """
         if faksimile_svgFile is not None:
             self.faksimile_svgFile = faksimile_svgFile
         data_node = self.page_tree.xpath('.//data-source')[0]\
                 if len(self.page_tree.xpath('.//data-source')) > 0\
                 else ET.SubElement(self.page_tree.getroot(), 'data-source')
         data_node.set('file', self.faksimile_svgFile)
         if xml_correction_file is not None:
             data_node.set('xml-corrected-words', xml_correction_file)
 
     def update_line_number_area(self, transkription_field, svg_tree=None, set_to_text_field_zero=True):
         """Determines the width of the area where the line numbers are written in the page.source file.
         """
         THRESHOLD = 0.4
         if svg_tree is None:
             svg_tree = ET.parse(self.source)
         if len(self.line_numbers) > 1:
             line_number = self.line_numbers[9]\
                 if transkription_field.is_page_verso() and len(self.line_numbers) > 8\
                 else self.line_numbers[1]
             ln_nodes = [ item for item in svg_tree.iterfind('//text', svg_tree.getroot().nsmap)\
                 if Matrix.IS_NEARX_TRANSKRIPTION_FIELD(item.get('transform'), transkription_field)\
                 and LineNumber.IS_A_LINE_NUMBER(item)\
                 and LineNumber(raw_text_node=item).id  == line_number.id ]
             if len(ln_nodes) > 0:
                 matrix = Matrix(transform_matrix_string=ln_nodes[0].get('transform'))
                 if transkription_field.is_page_verso():
                     transkription_field.add_line_number_area_width(matrix.getX())
                 elif self.svg_file is not None and isfile(self.svg_file):
                     svg_path_tree = ET.parse(self.svg_file)
                     namespaces = { k if k is not None else 'ns': v for k, v in svg_path_tree.getroot().nsmap.items() }
                     svg_x = matrix.getX()
                     svg_y = self.line_numbers[1].bottom + transkription_field.ymin\
                             if set_to_text_field_zero\
                             else self.line_numbers[1].bottom
                     use_nodes = svg_path_tree.xpath('//ns:use[@x>="{0}" and @x<="{1}" and @y>="{2}" and @y<="{3}"]'\
                         .format(svg_x-THRESHOLD, svg_x+THRESHOLD,svg_y-THRESHOLD, svg_y+THRESHOLD), namespaces=namespaces)
                     if len(use_nodes) > 0:
                         symbol_id = use_nodes[0].get('{%s}href' % namespaces['xlink']).replace('#', '') 
                         d_strings = use_nodes[0].xpath('//ns:symbol[@id="{0}"]/ns:path/@d'.format(symbol_id), namespaces=namespaces)
                         if len(d_strings) > 0 and d_strings[0] != '':
                             path = parse_path(d_strings[0])
                             xmin, xmax, ymin, ymax = path.bbox()
                             width = xmax - xmin
                             transkription_field.add_line_number_area_width(matrix.getX() + width)
 
     def update_page_type(self, transkription_field=None):
         """Adds a source to page and attaches it to page_tree.
         """
         if self.number.endswith('r')\
             or self.number.endswith('v'):
             self.page_type = Page.PAGE_VERSO\
                 if self.number.endswith('v')\
                 else Page.PAGE_RECTO
         else:
             if transkription_field is None:
                 if self.source is None or not isfile(self.source):
                     raise FileNotFoundError('Page does not have a source!')
                 transkription_field = TranskriptionField(self.source, multipage_index=self.multipage_index)
             self.page_type = Page.PAGE_VERSO\
                    if transkription_field.is_page_verso()\
                    else Page.PAGE_RECTO
         self.page_tree.getroot().set('pageType', self.page_type)
 
     def update_styles(self, words=None, manuscript=None, add_to_parents=False, partition_according_to_styles=False, create_css=False):
         """Update styles of words and add them to their transkription_positions.
             Args:
                 add_to_parents:                 Add styles also to word (and if not None to manuscript).
                 partition_according_to_styles:  Partition word if its transkription_positions have different styles.
         """
         style_dictionary = {}
         if words is None:
             words = self.words
         for word in words:
             if len(word.word_parts) > 0:
                 self.update_styles(words=word.word_parts, manuscript=manuscript, create_css=create_css,\
                         add_to_parents=add_to_parents, partition_according_to_styles=partition_according_to_styles)
             for transkription_position in word.transkription_positions:
                 if len(transkription_position.positional_word_parts) > 0:
                     style_class = transkription_position.positional_word_parts[0].style_class
                     writing_process_id = -1
                     for font_key in [ font_key for font_key in style_class.split(' ') if font_key in self.fontsizekey2stage_mapping.keys() ]:
                         writing_process_id = self.fontsizekey2stage_mapping.get(font_key)
                     style_class_key = (Style.remove_irrelevant_style_keys(style_class, self, extended_styles=create_css), writing_process_id)
                     if create_css:
                         if style_dictionary.get((style_class_key, word.deleted)) is None:
                             color = None
                             if len(word.deletion_paths) > 0:
                                 if word.deletion_paths[0].style_class is not None\
                                 and word.deletion_paths[0].style_class != ''\
                                 and self.style_dict.get(word.deletion_paths[0].style_class) is not None:
                                     color = Color.create_cls_from_style_object(self.style_dict.get(word.deletion_paths[0].style_class))
                                 else:
                                     color = Color()
                             style_dictionary[(style_class_key, word.deleted)] = Style.create_cls(self, style_class_key[0], manuscript=manuscript,\
                                     create_css=create_css, deletion_color=color, writing_process_id=style_class_key[1] )
                         transkription_position.style = style_dictionary[(style_class_key, word.deleted)]
                         #print(style_dictionary[(style_class_key, word.deleted)])
                     else:
                         if style_dictionary.get(style_class_key) is None:
                             style_dictionary[style_class_key] = Style.create_cls(self, style_class_key[0], manuscript=manuscript, create_css=create_css)
                             style_dictionary[style_class_key].writing_process_id = style_class_key[1]
                         transkription_position.style = style_dictionary[style_class_key]
                     if add_to_parents and transkription_position.style not in word.styles:
                         word.styles.append(transkription_position.style)
             if partition_according_to_styles:
                 word.split_according_to_status('style', splits_are_parts=True)
         if manuscript is not None\
                 and add_to_parents:
             manuscript.update_styles(*style_dictionary.values())
 
     def __eq__(self, other):
         """Returns true if self is qualitatively identical to other.
         """
         if other is None:
             return False
         if self.page_tree is None and other.page_tree is None:
             return self.number == other.number
         if self.page_tree is None or other.page_tree is None:
             return False
         return self.page_tree.docinfo.URL == other.page_tree.docinfo.URL
 
     def __hash__(self):
         """Return a hash value for self.
         """
         try:
             if self.page_tree is None:
                 return hash(self.number)
         except AttributeError:
             print(self)
             return hash(self.number)
         return hash(self.page_tree.docinfo.URL)
 
 
Index: svgscripts/datatypes/imprint.py
===================================================================
--- svgscripts/datatypes/imprint.py	(revision 0)
+++ svgscripts/datatypes/imprint.py	(revision 110)
@@ -0,0 +1,159 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+
+"""   This program can be used to extract imprints from a svg file.
+"""
+#    Copyright (C) University of Basel 2021  {{{1
+#
+#    This program is free software: you can redistribute it and/or modify
+#    it under the terms of the GNU General Public License as published by
+#    the Free Software Foundation, either version 3 of the License, or
+#    (at your option) any later version.
+#
+#    This program is distributed in the hope that it will be useful,
+#    but WITHOUT ANY WARRANTY; without even the implied warranty of
+#    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+#    GNU General Public License for more details.
+#
+#    You should have received a copy of the GNU General Public License
+#    along with this program.  If not, see <https://www.gnu.org/licenses/> 1}}}
+
+import re
+import sys
+from os import listdir, sep, path
+from os.path import isfile, isdir, dirname
+import lxml.etree as ET
+import warnings
+
+
+
+__author__ = "Christian Steiner"
+__maintainer__ = __author__
+__copyright__ = 'University of Basel'
+__email__ = "christian.steiner@unibas.ch"
+__status__ = "Development"
+__license__ = "GPL v3"
+__version__ = "0.0.1"
+
+from .attachable_object import AttachableObject
+from .atypical_writing import AtypicalWriting
+from .clarification import Clarification
+from .editor_correction import EditorCorrection
+from .line_continuation import LineContinuation
+from .matrix import Matrix
+from .standoff_tag import StandoffTag
+from .text import Text
+from .transkriptionField import TranskriptionField
+from .uncertain_decipherment import UncertainDecipherment
+from .footnotes import FootnoteColumns
+
+sys.path.append('py2ttl')
+from class_spec import SemanticClass
+from xml_conform_dictionary import XMLConformDictionary
+
+UNITTESTING = False
+DEBUG = False
+
+class Imprint(SemanticClass,AttachableObject):
+    """This class represents an imprint, i.e. the reference to the printed version of the corresponding manuscript.
+    """
+    START_END_LINE_PATTERN = re.compile('^(\d+)(-)(\d+)(:\s*)(.*)')
+    COMMA_LINE_PATTERN = re.compile('^(\d+)(,)(\d+)(-)(\d+)(:\s*)(.*)')
+    LINE_PATTERN = re.compile('^(((\d+,)*\d+-)*\d+)(:\s)(.*)')
+    XML_TAG = 'imprint'
+    DEBUG = False
+
+    def __init__(self, reference=None, lines=None, line_list_string='', id=0):
+        self.id = id
+        self.reference = reference
+        self.lines = lines if lines is not None else []
+        self.line_list_string = line_list_string
+
+    def attach_object_to_tree(self, target_tree):
+        """Attach object to tree.
+        """
+        obj_node = self.get_or_create_node_with_id(target_tree)
+        obj_node.set('reference', self.reference)
+        if self.line_list_string != '':
+            obj_node.set('line-list-string', self.line_list_string)
+
+    @classmethod
+    def create_cls_from_node(cls, node, lines):
+        """Initialize a cls from node.
+
+            [:return:] cls
+        """
+        reference = node.get('reference')
+        line_list_string = node.get('line-list-string')\
+                if bool(node.get('line-list-string')) else ''
+        return cls(reference=reference, lines=get_lines(lines, line_list_string))
+
+    @classmethod
+    def extract_cls(cls, lines, raw_node, namespaces, id=0):
+        """Return an instance of cls by extractign information from raw svg node.
+        """
+        raw_string = ''.join(raw_node.xpath('./ns:tspan/text()', namespaces=namespaces))
+        line_match = re.match(cls.LINE_PATTERN, raw_string)
+        if line_match is not None:
+            return cls(line_match.groups()[-1], line_list_string=line_match.groups()[0], id=id) 
+        else:
+            return cls(raw_string, id=id)
+        
+    @classmethod
+    def get_semantic_dictionary(cls):
+        """ Creates a semantic dictionary as specified by SemanticClass.
+        """
+        properties = {}
+        properties.update(cls.create_semantic_property_dictionary('reference', str,\
+                name='imprintHasReference', label='imprint refers to the signature of the printed version of the manuscript'))
+        properties.update(cls.create_semantic_property_dictionary('lines', list,\
+                name='imprintRefersToLines', label='the printed version of the manuscript concerns this list of lines'))
+        dictionary = { cls.CLASS_KEY: cls.get_class_dictionary(), cls.PROPERTIES_KEY: properties }
+        return cls.return_dictionary_after_updating_super_classes(dictionary)
+
+def get_lines(lines, line_list_string='') ->list:
+    """Returns a list of lines that correspond to the lines that are imprinted
+    """
+    # TODO create tln:partOfPageTextUnit for each line range
+    if line_list_string == '':
+        return []
+    relevant_lines = []
+    if re.match(r'(.*\d+)(,)(\d+.*)', line_list_string):
+        for line_list_sub_string in line_list_string.split(','):
+            relevant_lines += get_lines(lines, line_list_string=line_list_sub_string)
+        return relevant_lines
+    multi_line_match = re.match(r'(\d+)(-)(\d+)', line_list_string)
+    single_line_match = re.match(r'^\d+$', line_list_string)
+    if multi_line_match is not None:
+        start_segment = int(multi_line_match.groups()[0])
+        end_segment = int(multi_line_match.groups()[2])
+        return [ line for line in lines if line.id >= start_segment and line.id <= end_segment ]
+    elif single_line_match is not None: 
+        return [ line for line in lines if line.id == int(single_line_match.group()) ]
+    return relevant_lines
+
+def extract_imprints(page, transkription_field=None, svg_tree=None) ->list:
+    """Returns a list of imprints.
+    """
+    if page.marginals_source is not None:
+        svg_tree = ET.parse(page.marginals_source)
+        if transkription_field is None:
+            transkription_field = TranskriptionField(page.source)
+    if svg_tree is None and page.source is not None:
+        svg_tree = ET.parse(page.source)
+    if transkription_field is None:
+        transkription_field = TranskriptionField(svg_tree.docinfo.URL)
+    nodes_beneath_tf = [ item for item in filter(lambda node: Matrix.IS_BENEATH_TF(Matrix(transform_matrix_string=node.get('transform')), transkription_field),\
+            svg_tree.getroot().iterfind('.//text', svg_tree.getroot().nsmap))]
+    if len(nodes_beneath_tf) == 0:
+        return []
+    x = Matrix(transform_matrix_string=nodes_beneath_tf[0].get('transform')).getX()
+    namespaces = { k if k is not None else 'ns': v for k, v in svg_tree.getroot().nsmap.items() }
+    imprints = [ Imprint.extract_cls(page.lines, node, namespaces, id=i) for (i, node) in enumerate([ node for node in svg_tree.getroot().iterfind('.//text', svg_tree.getroot().nsmap)\
+            if Matrix(transform_matrix_string=node.get('transform')).getX() == x ]) ]
+    return imprints 
+
+
+
+if __name__ == "__main__":
+    sys.exit(main(sys.argv[1:]))
Index: svgscripts/datatypes/footnotes.py
===================================================================
--- svgscripts/datatypes/footnotes.py	(revision 109)
+++ svgscripts/datatypes/footnotes.py	(revision 110)
@@ -1,347 +1,347 @@
 #!/usr/bin/env python3
 # -*- coding: utf-8 -*-
 
 """   This program can be used to extract footnotes from a svg file.
 """
 #    Copyright (C) University of Basel 2019  {{{1
 #
 #    This program is free software: you can redistribute it and/or modify
 #    it under the terms of the GNU General Public License as published by
 #    the Free Software Foundation, either version 3 of the License, or
 #    (at your option) any later version.
 #
 #    This program is distributed in the hope that it will be useful,
 #    but WITHOUT ANY WARRANTY; without even the implied warranty of
 #    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 #    GNU General Public License for more details.
 #
 #    You should have received a copy of the GNU General Public License
 #    along with this program.  If not, see <https://www.gnu.org/licenses/> 1}}}
 
 import re
 import sys
 from os import listdir, sep, path
 from os.path import isfile, isdir, dirname
 import lxml.etree as ET
 import warnings
 
 
 
 __author__ = "Christian Steiner"
 __maintainer__ = __author__
 __copyright__ = 'University of Basel'
 __email__ = "christian.steiner@unibas.ch"
 __status__ = "Development"
 __license__ = "GPL v3"
 __version__ = "0.0.1"
 
 from .atypical_writing import AtypicalWriting
 from .clarification import Clarification
 from .editor_correction import EditorCorrection
 from .line_continuation import LineContinuation
 from .matrix import Matrix
 from .standoff_tag import StandoffTag
 from .text import Text
 from .transkriptionField import TranskriptionField
 from .uncertain_decipherment import UncertainDecipherment
 
 UNITTESTING = False
 DEBUG = False
 
 class FootnoteColumns:
     """This class represents footnote columns.
     """
     REFERENCE_PATTERN = re.compile('.*(\d+-)*[0-9]+:')
     EXTENDED_REFERENCE_PATTERN = re.compile('.*(\d+(-|/))*[0-9]+:')
     REFERENCE_GROUP = re.compile('(.*\D)((\d+-)*[0-9]+:)')
     EXCEPTION = re.compile('((\d+/)+[0-9]+:)')
 
     def __init__(self, nsmap, nodes, bottom_values,  style_dict, debug=False, skip_after=-1.0):
         self.bottom_values = bottom_values 
         self.footnote_columns = []
         self.footnote_keys = {}
         self.index = 0
         self.nodes = nodes
         self.nsmap = nsmap 
         self.skip_after = skip_after
         self.style_dict = style_dict
         self.debug = debug
         self._init_columns()
 
     def _init_columns(self):
         """Initialize footnote column positions 
             by creating lists in self.footnote_columns and adding the positions a keys 
             to self.footnote_keys while the index of self.footnote_columns are their values.
         """
         first_line_fn_nodes = sorted([ item for item in self.nodes\
                 if round(Matrix(transform_matrix_string=item.get('transform')).getY(), 1) == round(self.bottom_values[0], 1)\
                 and Matrix(transform_matrix_string=item.get('transform')).getX() > self.skip_after],\
                 key=lambda node: Matrix(transform_matrix_string=node.get('transform')).getX())
         current_nodes = []
         for node in first_line_fn_nodes:
             matrix = Matrix(transform_matrix_string=node.get('transform'))
             if len(node.getchildren()) > 0:
                 for tspan in node.findall('tspan', self.nsmap):
                     x = matrix.add2X(float(tspan.get('x')))
                     current_nodes.append({ 'x': x, 'text': tspan.text })
             elif node.text is not None:
                 x = matrix.getX()
                 current_nodes.append({ 'x': x, 'text': node.text })
             if re.match(self.EXTENDED_REFERENCE_PATTERN,\
                     ''.join([ item.get('text') for item in current_nodes])):
                 current_nodes = self._remove_unused_texts(current_nodes)
                 self.footnote_columns.append([])
                 self.footnote_keys.update({ round(current_nodes[0].get('x')): len(self.footnote_columns)-1 })
                 current_nodes = []
         if len(self.footnote_keys) == 0:
             raise Exception(f'ERROR: there are no footnote_keys')
 
     def _remove_unused_texts(self, nodes):
         """Remove tspan that contain text that is not a line reference.
         """
         threshold = 100
         node_text = ''.join([ item.get('text') for item in nodes])
         match = re.match(self.REFERENCE_GROUP, node_text)
         if match is not None and match.group(1) is not None\
                 and not re.match(self.EXCEPTION, node_text):
             unused_text = ''
             index = 0
             for item in nodes:
                 unused_text += item.get('text')
                 if match.group(1).startswith(unused_text):
                     index += 1
                 else:
                     break
             if len(nodes) > index+1:
                 counter = 0
                 has_gap = False
                 for item in nodes[index:]:
                     if len(nodes) > index+counter+1\
                     and nodes[index+counter+1].get('x')-nodes[index+counter].get('x') > threshold:
                         index += counter+1
                         has_gap = True
                         break
                     counter += 1
                 if has_gap:
                     return nodes[index+1:]
             return nodes[index:]
         return nodes
 
     def append(self, footnote):
         """Append footnote to a column
         """
         self.footnote_columns[self.index].append(footnote)
 
     @classmethod
     def create_cls(cls, style_dict=None, page=None, transkription_field=None, svg_tree=None, svg_file=None, marginals_on_extra_page=False, skip_after=-1.0):
         """Returns all footnotes as a list of Text.
         """
         if page is not None and page.source is not None and svg_file is None:
             svg_file = page.source\
                     if page.marginals_source is None\
                     else page.marginals_source
         if transkription_field is None and svg_file is not None:
             multipage_index = -1\
                     if page is None\
                     else page.multipage_index
             transkription_field = TranskriptionField(svg_file, multipage_index=multipage_index)
         if svg_tree is None and svg_file is not None:
             svg_tree = ET.parse(svg_file)
         if style_dict is None and page is not None:
             style_dict = StandoffTag.create_relevant_style_dictionary(page)
         if page is not None and page.marginals_source is not None:
             marginals_on_extra_page = True
             svg_tree = ET.parse(page.marginals_source)
         nodes_in_footnote_area = cls.EXTRACT_NODES_IN_FOOTNOTE_AREA(svg_tree, transkription_field, marginals_on_extra_page=marginals_on_extra_page) 
         bottom_values = cls.GET_UNIQUE_BOTTOM_VALUES(nodes_in_footnote_area) 
         if len(bottom_values) == 0:
             return None
         else:
             return cls(svg_tree.getroot().nsmap, nodes_in_footnote_area, bottom_values, style_dict, skip_after=skip_after)
 
     def extract_footnotes(self, contains_string='', contains_strings=None) -> list:
         """Returns all footnotes as a list of Text.
         """
         left_value = -1
         for bottom_value in self.bottom_values:
             nodes_on_line = sorted([ item for item in self.nodes\
                                      if round(Matrix(transform_matrix_string=item.get('transform')).getY(), 1) == bottom_value\
                                      and Matrix(transform_matrix_string=item.get('transform')).getX() > self.skip_after\
                                    ],\
                             key=lambda x: Matrix(transform_matrix_string=x.get('transform')).getX())
             footnote = None
             matrix = None
             for node in nodes_on_line:
                 matrix = Matrix(transform_matrix_string=node.get('transform'))
                 footnote, left_value = self._process_content_and_markup(node, footnote, matrix)
             if footnote is not None:
                 self.append(footnote)
         footnotes = self.toList()
         if contains_strings is not None:
             footnotes = [ footnote for footnote in footnotes if True in [ contains_string in footnote.content for contains_string in contains_strings] ]
         if contains_string != '':
             footnotes = [ footnote for footnote in footnotes if contains_string in footnote.content ]
         return footnotes
 
     def get_index(self, left_value) -> int:
         """Return index of column for left value.
         """
         index = -1
         if round(left_value) in self.footnote_keys.keys():
             index = self.footnote_keys[round(left_value)]
         else:
             for key, value in self.footnote_keys.items():
                 if abs(key - round(left_value)) < 2:
                     index = value
                     break
         return index 
 
     def register_index(self, left_value):
         """Register index for next column to be used.
         """
         index = self.get_index(left_value)
         if index > -1:
             self.index = index
         else:
             error_value = round(left_value)
             msg = f'Left value not part of columns: {error_value} -> {self.footnote_keys}'
             raise Exception(msg)
 
     def toList(self):
         """Return footnotes as a list of Text.
         """
         footnotes = []
         for footnote_list in self.footnote_columns:
             for footnote in footnote_list:
                 if re.match(self.REFERENCE_PATTERN, footnote.content):
                     footnotes.append(footnote)
                 elif len(footnotes) > 0:
                     footnotes[-1].join(footnote)
                 else:
                     print([ footnote.content for footnote in self.footnote_columns[1]])
                     print(self.footnote_keys)
                     raise Exception(f'List of footnotes empty and footnote "{footnote.content}" does not match {self.REFERENCE_PATTERN.pattern}!')
         return footnotes
 
     def _process_content_and_markup(self, node, footnote, matrix): 
         """Process content and markup of node.
     
             [:return:] (footnote: Text, left_value: float)
         """
         startIndex = 0
         next_text = node.text
         left_value = matrix.getX()
         items = [ item for item in node.findall('tspan', self.nsmap)]
         if len(items) > 0:
             next_text = ''.join([ item.text for item in items])
             left_value = matrix.add2X(float(items[0].get('x')))
         elif bool(node.get('x')):
             left_value = matrix.add2X(float(node.get('x')))
         if footnote != None and\
                 ((re.match(r'.*[0-9]+:', next_text)\
                     and re.match(r'.*[0-9]+:', footnote.content)\
                     and not re.match(r'.*\d-', footnote.content))\
                 or (self.get_index(left_value) > -1\
                     and self.get_index(left_value) != self.index)):
             if DEBUG and re.match(r'.*[0-9]+:', next_text)\
                     and not re.match(r'.*[0-9]+:', footnote.content):
                 print(footnote, next_text)
             self.append(footnote)
             footnote = None
         if len(items) > 0:
             for item in items:
                 footnote, left_value = self._process_content_and_markup(item, footnote, matrix)
         else:
             if footnote is None:
                 footnote = Text(content=next_text)
                 try:
                     self.register_index(left_value)
                 except Exception:
                     print(self.footnote_columns)
                     raise Exception(f'{footnote}')
             else:
                 startIndex = footnote.append(next_text)
             if bool(node.get('class')):
-                standoff_markups = StandoffTag.create_cls(startIndex, len(footnote.content), node.get('class'), style_dict=self.style_dict)
+                standoff_markups = StandoffTag.create_cls(startIndex, len(footnote.content)-1, node.get('class'), style_dict=self.style_dict)
                 if len(standoff_markups) > 0:
                     if len(footnote.standoff_markups) > 0:
                         standoff_markups = footnote.standoff_markups[-1].join_list(standoff_markups)
                     if len(standoff_markups) > 0:
                         footnote.standoff_markups += standoff_markups
         return footnote, left_value
 
     @staticmethod
     def EXTRACT_NODES_IN_FOOTNOTE_AREA(svg_tree, transkription_field=None, marginals_on_extra_page=False) ->list:
         """Return a list of nodes that are in footnote area.
         """
         if transkription_field is None and svg_tree is not None:
             transkription_field = TranskriptionField(svg_tree.docinfo.URL)
         nodes_in_footnote_area = [ item for item in filter(lambda node: Matrix.NODE_HAS_CONTENT_IN_FOOTNOTE_AREA(node, transkription_field,\
                 marginals_on_extra_page=marginals_on_extra_page),\
                     svg_tree.getroot().iterfind('.//text', svg_tree.getroot().nsmap))] 
         for node in nodes_in_footnote_area: 
             if not Matrix.IS_IN_FOOTNOTE_AREA(node.get('transform'), transkription_field, marginals_on_extra_page=marginals_on_extra_page):
                 for child in node.getchildren():
                     if not Matrix.IS_IN_FOOTNOTE_AREA(node.get('transform'), transkription_field, x=float(child.get('x')), marginals_on_extra_page=marginals_on_extra_page):
                         node.remove(child)
         return nodes_in_footnote_area
 
     @staticmethod
     def GET_UNIQUE_BOTTOM_VALUES(nodes_in_footnote_area) ->list:
         """Return sorted list of unique bottom values.
         """
         return sorted([ bottom_value for bottom_value in set(round(Matrix(transform_matrix_string=item.get('transform')).getY(),1) for item in nodes_in_footnote_area) ])
 
 def extract_footnotes_as_strings(transkription_field=None, svg_tree=None, svg_file=None, contains_string='', marginals_extra=False):
     """Returns all footnotes as a list of strings.
     """
     if transkription_field is None and svg_file is not None:
         transkription_field = TranskriptionField(svg_file)
     if svg_tree is None and svg_file is not None:
         svg_tree = ET.parse(svg_file)
     footnotes = []
     nodes_in_footnote_area = [ item for item in filter(lambda x: Matrix.IS_IN_FOOTNOTE_AREA(x.get('transform'), transkription_field, marginals_on_extra_page=marginals_extra),\
                 svg_tree.getroot().iterfind('.//text', svg_tree.getroot().nsmap))] 
     bottom_values = sorted([ bottom_value for bottom_value in set(Matrix(transform_matrix_string=item.get('transform')).getY() for item in nodes_in_footnote_area) ])
     for bottom_value in bottom_values:
         nodes_on_line = [ item for item in nodes_in_footnote_area if Matrix(transform_matrix_string=item.get('transform')).getY() == bottom_value ]
         nodes_on_line = sorted(nodes_on_line, key=lambda x: Matrix(transform_matrix_string=x.get('transform')).getX())
         footnote_string = ''
         for node in nodes_on_line:
             if len(node.getchildren()) == 0:
                 if footnote_string != '' and re.match(r'.*[0-9]+:', node.text):
                     footnotes.append(footnote_string)
                     footnote_string = node.text
                 else:
                     footnote_string += node.text
             else:
                 next_string = ''.join([ item.text for item in node.findall('tspan', svg_tree.getroot().nsmap)])
                 if footnote_string != '' and re.match(r'.*[0-9]+:', next_string):
                     footnotes.append(footnote_string)
                     footnote_string = next_string
                 else:
                     footnote_string += next_string
         footnotes.append(footnote_string)
     if contains_string != '':
         footnotes = [ footnote_string for footnote_string in footnotes if contains_string in footnote_string ]
     return footnotes
 
 def extract_footnotes(page, transkription_field=None, svg_tree=None, svg_file=None, contains_string='', contains_strings=None, skip_after=-1.0) ->list:
     """Returns all footnotes as a list of Text.
     """
     marginals_on_extra_page = False
     if page.marginals_source is not None:
         marginals_on_extra_page = True
         svg_tree = ET.parse(page.marginals_source)
         if transkription_field is None:
             transkription_field = TranskriptionField(page.source)
     footnote_columns = FootnoteColumns.create_cls(page=page, transkription_field=transkription_field,\
             svg_tree=svg_tree, svg_file=svg_file, marginals_on_extra_page=marginals_on_extra_page, skip_after=skip_after)
     if footnote_columns is None:
         return []
     return footnote_columns.extract_footnotes(contains_string=contains_string, contains_strings=contains_strings)
 
 
 
 if __name__ == "__main__":
     sys.exit(main(sys.argv[1:]))
Index: svgscripts/datatypes/simple_word.py
===================================================================
--- svgscripts/datatypes/simple_word.py	(revision 109)
+++ svgscripts/datatypes/simple_word.py	(revision 110)
@@ -1,124 +1,139 @@
 #!/usr/bin/env python3
 # -*- coding: utf-8 -*-
 
 """   This super class can be used to represent a simple word.
 """
 #    Copyright (C) University of Basel 2019  {{{1
 #
 #    This program is free software: you can redistribute it and/or modify
 #    it under the terms of the GNU General Public License as published by
 #    the Free Software Foundation, either version 3 of the License, or
 #    (at your option) any later version.
 #
 #    This program is distributed in the hope that it will be useful,
 #    but WITHOUT ANY WARRANTY; without even the implied warranty of
 #    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 #    GNU General Public License for more details.
 #
 #    You should have received a copy of the GNU General Public License
 #    along with this program.  If not, see <https://www.gnu.org/licenses/> 1}}}
 
 __author__ = "Christian Steiner"
 __maintainer__ = __author__
 __copyright__ = 'University of Basel'
 __email__ = "christian.steiner@unibas.ch"
 __status__ = "Development"
 __license__ = "GPL v3"
 __version__ = "0.0.1"
 
 import abc
 from lxml import etree as ET
+import re
 import sys
 
 from .line import Line
 from .faksimile_position import FaksimilePosition
 from .transkription_position import TranskriptionPosition
 from .word_position import WordPosition
 
 sys.path.append('py2ttl')
 from class_spec import SemanticClass
 
 class SimpleWord(SemanticClass, metaclass=abc.ABCMeta):
     """
     This class represents a simple word.
 
     """
+    PUNCTUATION_PATTERN = re.compile('(^[\.\?,\!;:\-_–()“„]|[\.\?,\!;:\-_–()“„]$)')
+    FIND_PUNCTUATION_PATTERN = re.compile('(^[\.\?,\!;:\-_–()“„]|.*[\.\?,\!;:\-_–()“„]$)')
     XML_TAG = 'simple-word'
     XML_SUB_TAG = 'content'
 
     def __init__(self, id=0, line_number=-1, line=None, text='', deleted=False, transkription_positions=None, faksimile_positions=None):
         self.id = id
         self.text = text
+        self.clean_text = self._create_clean_text(text)
         self.line_number = line_number
         self.lines = []
         if line is not None:
             self.lines.append(line)
         self.transkription_positions = transkription_positions if transkription_positions is not None else []
         self.faksimile_positions = faksimile_positions if faksimile_positions is not None else []
     
     def attach_word_to_tree(self, target_tree):
         """Attaches word to tree target_tree.
         """
         if target_tree.__class__.__name__ == '_ElementTree':
             target_tree = target_tree.getroot()
         if len(target_tree.xpath('.//' + self.XML_TAG + '[@id="%s"]' % self.id)) > 0:
             word_node = target_tree.xpath('.//' + self.XML_TAG + '[@id="%s"]' % self.id)[0]
             word_node.getparent().remove(word_node)
         word_node = ET.SubElement(target_tree, self.XML_TAG, attrib={'id': str(self.id)})
         word_node.set('text', self.text)
         if self.line_number > -1:
             word_node.set('line-number', str(self.line_number))
         for id, transkription_position in enumerate(self.transkription_positions):
             transkription_position.id = id
             transkription_position.attach_object_to_tree(word_node)
         for faksimile_position in self.faksimile_positions:
             faksimile_position.attach_object_to_tree(word_node)
         return word_node
 
+    def _create_clean_text(self, text: str) ->str:
+        """Creates a text without any punctuation chars.
+        """
+        if len(text) < 2\
+            or (len(text) < 3 and re.match(self.FIND_PUNCTUATION_PATTERN, text[0]) is None)\
+            or re.match(self.FIND_PUNCTUATION_PATTERN, text) is None:
+            return text
+        return self._create_clean_text(re.sub(self.PUNCTUATION_PATTERN, '', text))
+
     @classmethod
     def create_cls(cls, word_node):
         """Creates a cls from a (lxml.Element) node.
 
             [:return:] cls
         """
         if word_node is not None: # init word from xml node
             id = int(word_node.get('id'))
             line_number = int(word_node.get('line-number')) if bool(word_node.get('line-number')) else -1
             text = word_node.get('text')
             transkription_positions = [ TranskriptionPosition(id=id, node=node) for id, node in enumerate(word_node.findall('./' + WordPosition.TRANSKRIPTION)) ]
             faksimile_positions = [ WordPosition(node=node) for node in word_node.findall('./' + WordPosition.FAKSIMILE) ]
             return cls(id=id, text=text, line_number=line_number, transkription_positions=transkription_positions,\
                     faksimile_positions=faksimile_positions)
         else:
             error_msg = 'word_node has not been defined'
             raise Exception('Error: {}'.format(error_msg))
 
     @classmethod
     def get_semantic_dictionary(cls):
         """ Creates and returns a semantic dictionary as specified by SemanticClass.
         """
         dictionary = {}
         class_dict = cls.get_class_dictionary()
         properties = { 'lines': {cls.CLASS_KEY: Line,\
                                  cls.CARDINALITY: 1,\
                                  cls.CARDINALITY_RESTRICTION: 'minCardinality',\
                                  cls.PROPERTY_NAME: 'wordBelongsToLine',\
                                  cls.PROPERTY_LABEL: 'word belongs to a line',\
                                  cls.PROPERTY_COMMENT: 'Relating a word to a line.'}}
         properties.update(cls.create_semantic_property_dictionary('transkription_positions', TranskriptionPosition,\
                 name='hasTranskriptionPosition', cardinality=1, cardinality_restriction='minCardinality'))
         properties.update(cls.create_semantic_property_dictionary('faksimile_positions', FaksimilePosition,\
                 name='hasFaksimilePosition', cardinality=1, cardinality_restriction='minCardinality'))
         properties.update(cls.create_semantic_property_dictionary('text', str, cardinality=1,\
                 subPropertyOf=cls.HOMOTYPIC_HAS_TEXT_URL_STRING))
+        properties.update(cls.create_semantic_property_dictionary('clean_text', str, cardinality=1,\
+                name='hasCleanText', label='text without punctuation', comment='text of word without punctuation except abbrevations'))
         dictionary.update({cls.CLASS_KEY: class_dict})
         dictionary.update({cls.PROPERTIES_KEY: properties})
         return cls.return_dictionary_after_updating_super_classes(dictionary)
 
     def init_word(self, page):
         """Initialize word with objects from page.
         """
         if self.line_number > -1:
             self.lines += [ line for line in page.lines if line.id == self.line_number ]
         elif 'word_parts' in self.__dict__.keys() and len(self.word_parts) > 0:
             self.lines += [ line for line in page.lines if line.id in [ wp.line_number for wp in self.word_parts ] ]
Index: svgscripts/datatypes/super_page.py
===================================================================
--- svgscripts/datatypes/super_page.py	(revision 109)
+++ svgscripts/datatypes/super_page.py	(revision 110)
@@ -1,295 +1,296 @@
 #!/usr/bin/env python3
 # -*- coding: utf-8 -*-
 
 """   This class can be used to represent a super page.
 """
 #    Copyright (C) University of Basel 2019  {{{1
 #
 #    This program is free software: you can redistribute it and/or modify
 #    it under the terms of the GNU General Public License as published by
 #    the Free Software Foundation, either version 3 of the License, or
 #    (at your option) any later version.
 #
 #    This program is distributed in the hope that it will be useful,
 #    but WITHOUT ANY WARRANTY; without even the implied warranty of
 #    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 #    GNU General Public License for more details.
 #
 #    You should have received a copy of the GNU General Public License
 #    along with this program.  If not, see <https://www.gnu.org/licenses/> 1}}}
 
 __author__ = "Christian Steiner"
 __maintainer__ = __author__
 __copyright__ = 'University of Basel'
 __email__ = "christian.steiner@unibas.ch"
 __status__ = "Development"
 __license__ = "GPL v3"
 __version__ = "0.0.1"
 
 from lxml import etree as ET
 from os.path import isfile, basename, dirname
 from progress.bar import Bar
 from svgpathtools import svg2paths2, svg_to_paths
 from svgpathtools.parser import parse_path
 import sys
 import warnings
 
 from .image import Image, SVGImage
 from .faksimile_image import FaksimileImage
 from .mark_foreign_hands import MarkForeignHands
 from .text_connection_mark import TextConnectionMark
 from .text_field import TextField
 from .writing_process import WritingProcess
 
 class SuperPage:
     """
     This super class represents a page.
 
     Args:
         xml_source_file (str): name of the xml file to be instantiated.
         xml_target_file (str): name of the xml file to which page info will be written.
 
     """
     FILE_TYPE_SVG_WORD_POSITION = 'svgWordPosition'
     FILE_TYPE_XML_MANUSCRIPT = 'xmlManuscriptFile'
     ADD2Y = 7
     PAGE_RECTO = 'recto'
     PAGE_VERSO = 'verso'
     STATUS_MERGED_OK = 'faksimile merged'
     STATUS_POSTMERGED_OK = 'words processed'
     UNITTESTING = False
     XML_TAG = 'page'
 
     def __init__(self, xml_file, title=None, page_number='', orientation='North', multipage_index=-1, page_type=PAGE_VERSO, should_xml_file_exist=False):
         self.properties_dictionary = {\
             'faksimile_image': (FaksimileImage.XML_TAG, None, FaksimileImage),\
             'faksimile_svgFile': ('data-source/@file', None, str),\
             'multipage_index': ('page/@multipage-index', multipage_index, int),\
             'marginals_source': ('page/@marginals-source', None, str),\
             'number': ('page/@number', str(page_number), str),\
             'orientation': ('page/@orientation', orientation, str),\
             'page_type': ('page/@pageType', page_type, str),\
             'pdfFile': ('pdf/@file', None, str),\
             'source': ('page/@source', None, str),\
             'svg_file': ('svg/@file', None, str),\
             'svg_image': (SVGImage.XML_TAG, None, SVGImage),\
             'text_field': (FaksimileImage.XML_TAG + '/' + TextField.XML_TAG, None, TextField),\
             'title': ('page/@title', title, str),\
         }
         self.bak_file = None
         self.online_properties = []
+        self.imprints = []
         self.line_numbers = []
         self.lines = []
         self.mark_foreign_hands = []
         self.page_tree = None 
         self.sonderzeichen_list = []
         self.style_dict = {}
         self.text_connection_marks = []
         self.word_deletion_paths = []
         self.word_insertion_marks = []
         self.words = []
         self.writing_processes = []
         self.xml_file = xml_file
         if not self.is_page_source_xml_file():
             msg = f'ERROR: xml_source_file {self.xml_file} is not of type "{FILE_TYPE_SVG_WORD_POSITION}"'
             raise Exception(msg)
         self._init_tree(should_xml_file_exist=should_xml_file_exist)
 
     def add_style(self, sonderzeichen_list=None, letterspacing_list=None, style_dict=None, style_node=None):
         """Adds a list of classes that are sonderzeichen and a style dictionary to page.
         """
         self.sonderzeichen_list = sonderzeichen_list if sonderzeichen_list is not None else []
         self.letterspacing_list = letterspacing_list if letterspacing_list is not None else []
         self.style_dict = style_dict if style_dict is not None else {}
         if style_node is not None:
             self.style_dict = { item.get('name'): { key: value for key, value in item.attrib.items() if key != 'name' } for item in style_node.findall('.//class') }
             self.sonderzeichen_list = [ item.get('name') for item in style_node.findall('.//class')\
                     if bool(item.get('font-family')) and 'Sonderzeichen' in item.get('font-family') ]
             self.letterspacing_list = [ item.get('name') for item in style_node.findall('.//class')\
                     if bool(item.get('letterspacing-list')) ]
         elif bool(self.style_dict):
             for node in self.page_tree.xpath('//style'): node.getparent().remove(node)
             style_node = ET.SubElement(self.page_tree.getroot(), 'style')
             if len(self.sonderzeichen_list) > 0:
                 style_node.set('Sonderzeichen', ' '.join(self.sonderzeichen_list))
             if len(self.letterspacing_list) > 0:
                 style_node.set('letterspacing-list', ' '.join(self.letterspacing_list))
             for key in self.style_dict.keys():
                 self.style_dict[key]['name'] = key
                 ET.SubElement(style_node, 'class', attrib=self.style_dict[key])
         fontsize_dict = { key: float(value.get('font-size').replace('px','')) for key, value in self.style_dict.items() if 'font-size' in value }
         fontsizes = sorted(fontsize_dict.values(), reverse=True)
         # create a mapping between fontsizes and word stages 
         self.fontsizekey2stage_mapping = {}
         for fontsize_key, value in fontsize_dict.items():
             if value >= fontsizes[0]-1:
                 self.fontsizekey2stage_mapping.update({ fontsize_key: WritingProcess.FIRST_VERSION })
             elif value <= fontsizes[-1]+1:
                 self.fontsizekey2stage_mapping.update({ fontsize_key: WritingProcess.LATER_INSERTION_AND_ADDITION })
             else:
                 self.fontsizekey2stage_mapping.update({ fontsize_key: WritingProcess.INSERTION_AND_ADDITION })
 
     def get_biggest_fontSize4styles(self, style_set={}):
         """Returns biggest font size from style_dict for a set of style class names.
 
             [:returns:] (float) biggest font size OR 1 if style_dict is empty
         """
         if bool(self.style_dict):
             sorted_font_sizes = sorted( (float(self.style_dict[key]['font-size'].replace('px','')) for key in style_set if bool(self.style_dict[key].get('font-size'))), reverse=True)
             return sorted_font_sizes[0] if len(sorted_font_sizes) > 0 else 1
         else:
             return 1
 
     def get_line_number(self, y):
         """Returns line number id for element at y.
 
             [:return:] (int) line number id or -1
         """
         if len(self.line_numbers) > 0:
             result_list = [ line_number.id for line_number in self.line_numbers if y >= line_number.top and y <= line_number.bottom ]
             return result_list[0] if len(result_list) > 0 else -1
         else:
             return -1
 
     def init_all_properties(self, overwrite=False):
         """Initialize all properties.
         """
         for property_key in self.properties_dictionary.keys():
             if property_key not in self.online_properties:
                 self.init_property(property_key, overwrite=overwrite)
     
     def init_property(self, property_key, value=None, overwrite=False):
         """Initialize all properties.
 
             Args:
                 property_key: key of property in self.__dict__
                 value:        new value to set to property
                 overwrite:    whether or not to update values from xml_file (default: read only)
         """
         if value is None:
             if property_key not in self.online_properties:
                 xpath, value, cls = self.properties_dictionary.get(property_key)
                 if len(self.page_tree.xpath('//' + xpath)) > 0:
                     value = self.page_tree.xpath('//' + xpath)[0]
                 if value is not None:
                     if cls.__module__ == 'builtins':
                         self.update_tree(value, xpath)
                         self.__dict__.update({property_key: cls(value)})
                     else:
                         value = cls(node=value)\
                                 if type(value) != cls\
                                 else value
                         self.__dict__.update({property_key: value})
                         self.__dict__.get(property_key).attach_object_to_tree(self.page_tree)
                 else:
                     self.__dict__.update({property_key: value})
                 self.online_properties.append(property_key)
         elif overwrite or property_key not in self.online_properties:
             xpath, default_value, cls = self.properties_dictionary.get(property_key)
             if cls.__module__ == 'builtins':
                 self.__dict__.update({property_key: cls(value)})
                 self.update_tree(value, xpath)
             else:
                 self.__dict__.update({property_key: value})
                 self.__dict__.get(property_key).attach_object_to_tree(self.page_tree)
             self.online_properties.append(property_key)
 
     def is_locked(self):
         """Return true if page is locked.
         """
         return len(self.page_tree.xpath('//metadata/lock')) > 0
 
     def is_page_source_xml_file(self, source_tree=None):
         """Return true if xml_file is of type FILE_TYPE_SVG_WORD_POSITION.
         """
         if not isfile(self.xml_file):
             return True
         if source_tree is None:
             source_tree = ET.parse(self.xml_file)
         return source_tree.getroot().find('metadata/type').text == self.FILE_TYPE_SVG_WORD_POSITION
 
     def lock(self, reference_file, message=''):
         """Lock tree such that ids of words etc. correspond to ids 
             in reference_file, optionally add a message that will be shown.
         """
         if not self.is_locked():
             metadata = self.page_tree.xpath('./metadata')[0]\
                 if len(self.page_tree.xpath('./metadata')) > 0\
                 else ET.SubElement(self.page_tree.getroot(), 'metadata')
             lock = ET.SubElement(metadata, 'lock')
             ET.SubElement(lock, 'reference-file').text = reference_file
             if message != '':
                 ET.SubElement(lock, 'message').text = message
 
     def unlock(self):
         """Lock tree such that ids of words etc. correspond to ids 
             in reference_file, optionally add a message that will be shown.
         """
         if self.is_locked():
             lock = self.page_tree.xpath('//metadata/lock')[0]
             lock.getparent().remove(lock) 
             
     def update_and_attach_words2tree(self, update_function_on_word=None, include_special_words_of_type=[]):
         """Update word ids and attach them to page.page_tree.
         """
         if not self.is_locked():
             update_function_on_word = [ update_function_on_word ]\
                     if type(update_function_on_word) != list\
                     else update_function_on_word
             for node in self.page_tree.xpath('.//word|.//' + MarkForeignHands.XML_TAG + '|.//' + TextConnectionMark.XML_TAG): 
                 node.getparent().remove(node)
             for index, word in enumerate(self.words):
                 word.id = index
                 for func in update_function_on_word:
                     if callable(func):
                         func(word)
                 word.attach_word_to_tree(self.page_tree)
             for index, mark_foreign_hands in enumerate(self.mark_foreign_hands):
                 mark_foreign_hands.id = index
                 if MarkForeignHands in include_special_words_of_type:
                     for func in update_function_on_word:
                         if callable(update_function_on_word):
                             func(mark_foreign_hands)
                 mark_foreign_hands.attach_word_to_tree(self.page_tree)
             for index, text_connection_mark in enumerate(self.text_connection_marks):
                 text_connection_mark.id = index
                 if TextConnectionMark in include_special_words_of_type:
                     for func in update_function_on_word:
                         if callable(update_function_on_word):
                             func(text_connection_mark)
                 text_connection_mark.attach_word_to_tree(self.page_tree)
         else:
             print('locked') 
     
     def update_property_dictionary(self, property_key, default_value):
         """Update properties_dictionary.
         """
         content = self.properties_dictionary.get(property_key)
         if content is not None:
             self.properties_dictionary.update({property_key: (content[0], default_value, content[2])})
         else:
             msg = f'ERROR: properties_dictionary does not contain a key {property_key}!'
             raise Exception(msg)
 
     def update_tree(self, value, xpath):
         """Update tree.
         """
         node_name = dirname(xpath)
         node = self.page_tree.xpath('//' + node_name)[0]\
                 if len(self.page_tree.xpath('//' + node_name)) > 0\
                 else ET.SubElement(self.page_tree.getroot(), node_name)
         node.set(basename(xpath).replace('@', ''), str(value))
 
     def _init_tree(self, should_xml_file_exist=False):
         """Initialize page_tree from xml_file if it exists.
         """
         if isfile(self.xml_file):
             parser = ET.XMLParser(remove_blank_text=True)
             self.page_tree = ET.parse(self.xml_file, parser)
         elif not should_xml_file_exist:
             self.page_tree = ET.ElementTree(ET.Element('page'))
             self.page_tree.docinfo.URL = self.xml_file
         else:
             msg = f'ERROR: xml_source_file {self.xml_file} does not exist!'
             raise FileNotFoundError(msg)
 
Index: tests_svgscripts/test_util.py
===================================================================
--- tests_svgscripts/test_util.py	(revision 109)
+++ tests_svgscripts/test_util.py	(revision 110)
@@ -1,256 +1,260 @@
 import unittest
 from os import sep, path, remove, listdir
 from os.path import isdir, isfile, dirname, basename
 import shutil
 import sys
 import lxml.etree as ET
 import sys
 import tempfile
 import warnings
 
 sys.path.append('svgscripts')
 
 import util
 from local_config import FAKSIMILE_LOCATION, PDF_READER, SVG_EDITOR, USER_ROOT_LOCATION_DICT
 from datatypes.faksimile import FaksimilePage
 from datatypes.page import Page
 from datatypes.page_creator import PageCreator
 from datatypes.positional_word_part import PositionalWordPart
 from datatypes.text_field import TextField
 from datatypes.transkriptionField import TranskriptionField
 from datatypes.word_position import WordPosition
 from datatypes.word import Word
 
 sys.path.append('shared_util')
 from myxmlwriter import write_pretty, FILE_TYPE_SVG_WORD_POSITION, FILE_TYPE_XML_MANUSCRIPT
 sys.path.append('fixes')
 from fix_old_data import save_page
 
 class TestCopy(unittest.TestCase):
     def setUp(self):
         util.UNITTESTING = True
         DATADIR = path.dirname(__file__) + sep + 'test_data'  
         self.test_dir = DATADIR
         self.faksimile_dir = DATADIR + sep + 'faksimile_svg'
         self.faksimile_file = self.faksimile_dir + sep + 'N-VII-1,5et6.svg'
         self.image = DATADIR + sep + 'image.jpg'
         self.svg_testrecord = DATADIR + sep + 'TESTRECORD.svg'
         self.xml_file = DATADIR + sep + 'N_VII_1_page005.xml'
         self.Mp_XIV_page420 = DATADIR + sep + 'Mp_XIV_page420.xml'
         self.tmp_dir = tempfile.mkdtemp()
 
     def test_copy(self):
         tmp_image = self.tmp_dir + sep + basename(self.image)
         target_file = 'asdf.svg'
         shutil.copy(self.image, self.tmp_dir)
         util.copy_faksimile_svg_file(target_file, faksimile_source_file=self.faksimile_file,\
                 target_directory=self.tmp_dir, local_image_path=tmp_image)
         self.assertEqual(isfile(self.tmp_dir + sep + target_file), True)
         util.copy_faksimile_svg_file(faksimile_source_file=self.faksimile_file,\
                 target_directory=self.tmp_dir, local_image_path=tmp_image)
         self.assertEqual(isfile(self.tmp_dir + sep + basename(self.faksimile_file)), True)
         with self.assertRaises(Exception):
             util.copy_faksimile_svg_file()
         with self.assertRaises(Exception):
             util.copy_faksimile_svg_file(faksimile_source_file=self.faksimile_source_file)
 
     def test_copy_xml(self):
         old_page = Page(self.xml_file)
         xml_file = util.copy_xml_file_word_pos_only(self.xml_file, self.tmp_dir)
         self.assertEqual(isfile(xml_file), True)
         page = Page(xml_file)
         self.assertEqual(len(page.words), len(old_page.words))
         self.assertEqual(len(page.line_numbers), 0)
 
     def test_create_highlighted_svg_file(self):
         target_file = self.tmp_dir + sep + basename(self.faksimile_file)
         tmp_image = self.tmp_dir + sep + basename(self.image)
         faksimile_tree = ET.parse(self.faksimile_file)
         namespaces = { k if k is not None else 'ns': v for k, v in faksimile_tree.getroot().nsmap.items() }
         node_ids = ['rect947', 'rect951', 'rect953', 'rect955', 'rect959', 'rect961', 'rect963']
         highlight_color = 'blue'
         util.create_highlighted_svg_file(faksimile_tree, node_ids, target_directory=self.tmp_dir, highlight_color=highlight_color, namespaces=namespaces)
         self.assertEqual(isfile(target_file), True)
         new_tree = ET.parse(target_file)
         for node in new_tree.xpath('//ns:rect[@fill="{0}"]|//ns:path[@fill="{0}"]'.format(highlight_color), namespaces=namespaces):
             node_ids.remove(node.get('id'))
         self.assertEqual(len(node_ids), 0)
 
     def test_get_empty_node_ids(self):
         faksimile_tree = ET.parse(self.faksimile_file)
         faksimile_page = FaksimilePage.GET_FAKSIMILEPAGES(faksimile_tree)[0]
         empty_node_ids = util.get_empty_node_ids(faksimile_tree, faksimile_page=faksimile_page)
         self.assertEqual('rect1085' in empty_node_ids, True)
 
     def test_record_changes(self):
         new_tree = ET.parse(self.faksimile_file)
         old_tree = ET.parse(self.faksimile_file)
         empty_node_id = 'rect1085'
         title_node_id = 'test001'
         namespaces = { k if k is not None else 'ns': v for k, v in new_tree.getroot().nsmap.items() }
         node = new_tree.xpath('//ns:rect[@id="{0}"]'.format(empty_node_id), namespaces=namespaces)[0]
         title = ET.SubElement(node, 'title', attrib={ 'id': title_node_id })
         title.text = 'test'
         new_file = self.tmp_dir + sep + 'new.svg'
         old_file = self.tmp_dir + sep + 'old.svg'
         util.copy_faksimile_svg_file(target_file=new_file, faksimile_tree=new_tree)
         util.copy_faksimile_svg_file(target_file=old_file, faksimile_tree=old_tree)
         util.record_changes(old_file, new_file, [ empty_node_id ], namespaces=namespaces)
         test_tree = ET.parse(old_file)
         self.assertEqual(len(test_tree.xpath('//ns:rect[@id="{0}"]/ns:title[@id="{1}"]'.format(empty_node_id, title_node_id), namespaces=namespaces)), 1)
 
     def test_replace_chars(self):
         page = Page(self.xml_file)
         faksimile_tree = ET.parse(self.faksimile_file)
         namespaces = { k if k is not None else 'ns': v for k, v in faksimile_tree.getroot().nsmap.items() }
         word_position = WordPosition(id='rect1159', text='„Gedächtniß"')
         wps, texts = util.replace_chars(page.words, [ word_position ])
         self.assertEqual(texts[0].endswith('“'), True)
         self.assertEqual(wps[0].text.endswith('“'), True) 
         word_position = WordPosition(id='rect1173', text='-')
         wps, texts = util.replace_chars(page.words, [ word_position ])
         self.assertEqual(wps[0].text.endswith('–'), True) 
 
     def test_mismatch_words(self):
         page = Page(self.xml_file)
         faksimile_tree = ET.parse(self.faksimile_file)
         faksimile_page = FaksimilePage.GET_FAKSIMILEPAGES(faksimile_tree)[0]
         page = Page('xml/N_VII_1_page174.xml')
         faksimile_tree = ET.parse('faksimile_svg/N-VII-1,173et174.svg')
         faksimile_page = FaksimilePage.GET_FAKSIMILEPAGES(faksimile_tree)[0]
         self.assertEqual('-' in [ tp.text for tp in faksimile_page.word_positions], True)
         wps, texts = util.replace_chars(page.words,faksimile_page.word_positions)
         self.assertEqual('–' in texts, True)
         self.assertEqual(len([ faksimile_position for faksimile_position in wps\
                 if faksimile_position.text == '–' ]), 4)
         mismatching_words, mismatching_faksimile_positions = util.get_mismatching_ids(page.words, faksimile_page.word_positions)
         self.assertEqual(len([word for word in mismatching_words if word.text.endswith('“') ]), 0)
         self.assertEqual(len([word for word in mismatching_words if word.text.endswith('–') ]), 0)
 
     def test_process_warnings(self):
         with warnings.catch_warnings(record=True) as w:
             warnings.simplefilter('default')
             warnings.warn('Test1: asdf')
             warnings.warn('Test2: asdf')
             status = util.process_warnings4status(w, ['Test1', 'Test2' ], 'asdf', 'OK', status_prefix='with warnings')
             #print(status)
             self.assertTrue('Test1' in status.split(':'))
             self.assertTrue('Test2' in status.split(':'))
 
     @unittest.skip('test uses external program, has been tested')
     def test_show_files(self):
         list_of_files = [ self.test_dir + sep + file for file in listdir(self.test_dir) if file.endswith('pdf') ][0:2]
         util.ExternalViewer.show_files(single_file=self.faksimile_file, list_of_files=list_of_files)
 
     def test_record_changes_to_page(self):
         page = util.record_changes_on_svg_file_to_page(self.xml_file, self.svg_testrecord, [ 1 ])
         old_length = len(page.words)
         self.assertEqual(page.words[1].text, 'asdf')
         self.assertEqual(page.words[1].transkription_positions[0].width, 353)
         page = util.record_changes_on_svg_file_to_page(self.xml_file, self.svg_testrecord, [ 13 ])
         self.assertEqual(page.words[13].text, 'er')
         self.assertEqual(page.words[14].text, '=')
         self.assertEqual(len(page.words), old_length+1)
         page = util.record_changes_on_svg_file_to_page(self.xml_file, self.svg_testrecord, [ 64 ])
         self.assertEqual(page.words[64].text, 'Simplifications-apparat')
         self.assertEqual(len(page.words[64].transkription_positions), 3)
         self.assertEqual(len(page.words), old_length-1)
 
     @unittest.skipUnless(__name__ == "__main__", 'tests all words')
     def test_extended__record_changes_to_page(self):
         page = Page(self.xml_file)
         old_length = len(page.words)
         page = util.record_changes_on_svg_file_to_page(self.xml_file, self.svg_testrecord)
         self.assertEqual(page.words[1].text, 'asdf')
         self.assertEqual(page.words[13].text, 'er')
         self.assertEqual(page.words[14].text, '=')
         self.assertEqual(page.words[65].text, 'Simplifications-apparat')
         self.assertEqual(len(page.words), old_length)
 
     def test_copy_faksimile_update_image_location(self):
         test_dir = self.tmp_dir #FAKSIMILE_LOCATION + '/Myriam/Fertig/'
         util.copy_faksimile_update_image_location(self.faksimile_file, target_directory=test_dir)
         with self.assertWarns(UserWarning):
             util.copy_faksimile_update_image_location(self.faksimile_file, target_directory=test_dir)
 
     def test_record_changes_on_xml(self):
         old_page = Page(self.xml_file)
         xml_file = util.copy_xml_file_word_pos_only(self.xml_file, self.tmp_dir)
         tree = ET.parse(xml_file)
         node = tree.xpath('//word[@id="135"]')[0]
         counter =0
         while node.get('text') != 'gar' or counter > 5:
             counter += 1
             nextnode = node.getnext()
             node.set('text', node.get('text') + nextnode.get('text'))
             for element in nextnode.getchildren():
                 node.append(element)
             nextnode.getparent().remove(nextnode)
         write_pretty(xml_element_tree=tree, file_name=xml_file,\
                 script_name=__file__, file_type=FILE_TYPE_SVG_WORD_POSITION)
         new_page = util.record_changes_on_xml_file_to_page(self.xml_file, xml_file)
         self.assertEqual(len(new_page.words), len(old_page.words)-2)
         self.assertEqual(len([ word for word in new_page.words if word.text == 'gar']), 1)
         old_page = Page(self.xml_file)
         xml_file = util.copy_xml_file_word_pos_only(self.xml_file, self.tmp_dir)
         tree = ET.parse(xml_file)
         node = tree.xpath('//word[@id="138"]')[0]
         counter =0
         while node.get('text') != 'nichtvorkommt.' or counter > 5:
             counter += 1
             nextnode = node.getnext()
             node.set('text', node.get('text') + nextnode.get('text'))
             for element in nextnode.getchildren():
                 node.append(element)
             nextnode.getparent().remove(nextnode)
         node.set('split', 'nicht vorkommt.')
         write_pretty(xml_element_tree=tree, file_name=xml_file,\
                 script_name=__file__, file_type=FILE_TYPE_SVG_WORD_POSITION)
         joined_page = Page(xml_file)
         self.assertEqual(len([word for word in joined_page.words if word.text == 'nichtvorkommt.']), 1)
         self.assertEqual(len([word for word in joined_page.words if word.text == 'nichtvorkommt.'][0].split_strings), 2)
         self.assertEqual(len(joined_page.words), len(old_page.words)-1)
         new_page = util.record_changes_on_xml_file_to_page(self.xml_file, xml_file)
         self.assertEqual(len(new_page.words), len(old_page.words))
         self.assertEqual(len([word for word in new_page.words if word.text == 'vorkommt.']), 1)
         self.assertEqual(len([word for word in old_page.words if word.text == 'nicht']),\
                 len([word for word in new_page.words if word.text == 'nicht']))
         xml_file = util.copy_xml_file_word_pos_only(self.xml_file, self.tmp_dir)
         tree = ET.parse(xml_file)
         old_page = Page(xml_file)
         nodes = tree.xpath('//word[@id>="85" and @id<="87"]')
         self.assertEqual(len(nodes), 3)
         prevWordText = nodes[0].get('text')
         nodes[0].set('join', prevWordText + 'z')
         nodes[1].set('split', 'z u')
         lastWordText = nodes[2].get('text')
         nodes[2].set('join', 'u' + lastWordText)
         write_pretty(xml_element_tree=tree, file_name=xml_file,\
                 script_name=__file__, file_type=FILE_TYPE_SVG_WORD_POSITION)
         joined_page = util.record_changes_on_xml_file_to_page(self.xml_file, xml_file)
         self.assertEqual(len(joined_page.words), len(old_page.words)-1)
 
     def test_reset_tp_with_matrix(self):
         page = Page(self.Mp_XIV_page420)
         util.reset_tp_with_matrix(page.words[0].transkription_positions)
         self.assertTrue(page.words[0].transkription_positions[0].left > 0 and page.words[0].transkription_positions[0].top > -5)
         transformed_words = [w for w in page.words if (len(w.transkription_positions) > 0 and w.transkription_positions[0].transform is not None) ]
         util.reset_tp_with_matrix(transformed_words[0].transkription_positions)
         self.assertEqual(transformed_words[0].transkription_positions[0].left, 0)
         self.assertTrue(transformed_words[0].transkription_positions[0].top < 0)
         
     def test_back_up(self):
         test_dir = self.tmp_dir 
         page = Page(self.xml_file)
         target_file_name = util.back_up(page, self.xml_file, bak_dir=test_dir)
         self.assertEqual(isfile(target_file_name), True)
         svg_tree = ET.parse(page.svg_file)
         namespaces = { k if k is not None else 'ns': v for k, v in svg_tree.getroot().nsmap.items() }
         util.back_up_svg_file(svg_tree, namespaces)
+        """
+        page = Page('xml/Mp_XV_page79r.xml')
+        util.back_up(page,  page.xml_file)
+        """
         
     def tearDown(self):
         shutil.rmtree(self.tmp_dir, ignore_errors=True)
         pass
 
 if __name__ == "__main__":
     unittest.main()
Index: tests_svgscripts/test_simple_word.py
===================================================================
--- tests_svgscripts/test_simple_word.py	(revision 109)
+++ tests_svgscripts/test_simple_word.py	(revision 110)
@@ -1,36 +1,44 @@
 import unittest
 from os import sep, path
 from os.path import dirname, isdir
 import lxml.etree as ET
+import re
 import sys
 
 sys.path.append('svgscripts')
 from datatypes.matrix import Matrix
 from datatypes.transkriptionField import TranskriptionField
 from datatypes.transkription_position import TranskriptionPosition
 from datatypes.simple_word import SimpleWord
 from datatypes.mark_foreign_hands import MarkForeignHands
 from datatypes.word import Word
 
 class TestSimpleWord(unittest.TestCase):
 
     def test_get_semanticAndDataDict(self):
         dictionary = SimpleWord.get_semantic_dictionary()
         #print(dictionary)
 
     def test_create_cls_from_word(self):
         word = Word(text='test')
         mark = MarkForeignHands.create_cls_from_word(word) 
         self.assertEqual(mark.text, word.text)
         self.assertEqual(type(mark), MarkForeignHands)
 
     def test_attach(self):
         word = SimpleWord()
         word.transkription_positions.append(TranskriptionPosition(id=0))
         word.transkription_positions.append(TranskriptionPosition(id=0))
         tree = ET.Element('page')
         word.attach_word_to_tree(tree) 
         self.assertEqual(len(tree.xpath('//' + TranskriptionPosition.XML_TAG)), 2)
 
+    def test_clean_text(self):
+        word = SimpleWord()
+        self.assertEqual(word._create_clean_text('-asdf'), 'asdf')
+        self.assertEqual(word._create_clean_text('(-asdf)'), 'asdf')
+        self.assertEqual(word._create_clean_text('(a.)'), 'a.')
+        self.assertEqual(word._create_clean_text('.verhehlen'), 'verhehlen')
+
 if __name__ == "__main__":
     unittest.main()
Index: tests_svgscripts/test_imprint.py
===================================================================
--- tests_svgscripts/test_imprint.py	(revision 0)
+++ tests_svgscripts/test_imprint.py	(revision 110)
@@ -0,0 +1,57 @@
+import unittest
+from os import sep, path, remove
+from os.path import isdir, isfile, dirname
+import shutil
+import sys
+import lxml.etree as ET
+import warnings
+import sys
+
+sys.path.append('svgscripts')
+
+import datatypes.imprint
+from datatypes.imprint import Imprint, extract_imprints, get_lines, UNITTESTING, DEBUG
+from datatypes.matrix import Matrix
+from datatypes.page import Page
+from datatypes.positional_word_part import PositionalWordPart
+from datatypes.transkriptionField import TranskriptionField
+
+class TestExtractImprint(unittest.TestCase):
+    def setUp(self):
+        datatypes.imprint.UNITTESTING = True
+        DATADIR = path.dirname(__file__) + sep + 'test_data'  
+        self.test_page =  Page.create_cls(DATADIR + sep + 'Mp_XV_page79v.xml')
+        self.test_page.source = self.test_page.page_tree.docinfo.URL.replace('.xml', '.svg')
+
+    def test_extract_footnotes(self):
+        imprints = extract_imprints(self.test_page)
+        self.assertEqual(len(imprints), 4)
+        """
+        for imprint in imprints:
+            print(imprint.reference, imprint.start_line, imprint.end_line)
+        """
+
+    def test_attach(self):
+        imprints = extract_imprints(self.test_page)
+        tree = ET.ElementTree(ET.Element('asdf'))
+        for imprint in imprints:
+            imprint.attach_object_to_tree(tree)
+        tree.xpath('//asdf')[0].set('test', 'This is a Test.')
+        #print(ET.dump(tree.getroot()))
+
+    def test_init_from_node(self):
+        for imprint in  extract_imprints(self.test_page):
+            imprint.attach_object_to_tree(self.test_page.page_tree)
+        imprints = [ Imprint.create_cls_from_node(node, self.test_page.lines) for node in self.test_page.page_tree.xpath('//' + Imprint.XML_TAG) ]
+        self.assertEqual(len(imprints), 4)
+        """
+        Imprint.DEBUG = True
+        page = Page('xml/Mp_XV_page81v.xml')
+        line_list_string = '21-24,30-36,65-68'
+        for imprint in page.imprints:
+            print(imprint.reference)
+            for line in imprint.lines: print(line.id)
+        """
+
+if __name__ == "__main__":
+    unittest.main()
Index: tests_svgscripts/test_process_footnotes.py
===================================================================
--- tests_svgscripts/test_process_footnotes.py	(revision 109)
+++ tests_svgscripts/test_process_footnotes.py	(revision 110)
@@ -1,46 +1,54 @@
 import unittest
 from os import sep, path, remove
 from os.path import isdir, isfile, dirname
 import shutil
 import sys
 import lxml.etree as ET
 import warnings
 import sys
 
 sys.path.append('svgscripts')
 
 from datatypes.footnotes import extract_footnotes
+from datatypes.imprint import Imprint
 from datatypes.page import Page
 import process_footnotes
-from process_footnotes import categorize_footnotes, main
+from process_footnotes import categorize_footnotes, main, save_imprints
 
 class TestExtractFootnotes(unittest.TestCase):
     def setUp(self):
         process_footnotes.UNITTESTING = True
         DATADIR = path.dirname(__file__) + sep + 'test_data'  
         self.test_footnote = DATADIR + sep + 'W_I_8_neu_125-01.svg'
         self.test_footnote_verso = DATADIR + sep + 'N_VII_1_xp5_4_page5.svg'
         self.test_footnote_recto = DATADIR + sep + 'N_VII_1_xp5_4_page6.svg'
         self.test_footnote_multi = DATADIR + sep + 'N_VII_1_xp5_4_page13.svg'
         self.test_footnote_multi_xml = DATADIR + sep + 'N_VII_1_page013.xml'
         self.test_categorize_footnote = DATADIR + sep + 'N_VII_1_page006.xml'
 
     def test_categorize_footnotes(self):
         page = Page(self.test_categorize_footnote)
         footnotes = extract_footnotes(page, svg_file=self.test_footnote_recto)
         categorize_footnotes(page, footnotes)
         words_with_comments = [ word for word in page.words if word.editor_comment is not None ]
         self.assertEqual(len(words_with_comments), 4)
         lines_with_comments = [ line for line in page.lines if len(line.editor_comments) > 0 ]
         self.assertEqual(len(lines_with_comments), 1)
         page = Page('xml/W_II_1_page141.xml')
         footnotes = extract_footnotes(page)
         categorize_footnotes(page, footnotes, debug=True)
         words_with_comments = [ word for word in page.words if word.editor_comment is not None ]
 
+    def test_save_imprints(self):
+        page = Page(self.test_categorize_footnote)
+        save_imprints(page)
+        self.assertEqual(len(page.page_tree.xpath('//' + Imprint.XML_TAG)), 2)
+        #print(ET.dump(page.page_tree.getroot()))
+
+
     def test_main(self):
         self.assertEqual(main(['xml/N_VII_1_page005.xml']), 0)
 
 
 if __name__ == "__main__":
     unittest.main()
Index: fixes/fix_old_data.py
===================================================================
--- fixes/fix_old_data.py	(revision 109)
+++ fixes/fix_old_data.py	(revision 110)
@@ -1,540 +1,551 @@
 #!/usr/bin/env python3
 # -*- coding: utf-8 -*-
 
 """   This program can be used to fix old data.
 """
 #    Copyright (C) University of Basel 2019  {{{1
 #
 #    This program is free software: you can redistribute it and/or modify
 #    it under the terms of the GNU General Public License as published by
 #    the Free Software Foundation, either version 3 of the License, or
 #    (at your option) any later version.
 #
 #    This program is distributed in the hope that it will be useful,
 #    but WITHOUT ANY WARRANTY; without even the implied warranty of
 #    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 #    GNU General Public License for more details.
 #
 #    You should have received a copy of the GNU General Public License
 #    along with this program.  If not, see <https://www.gnu.org/licenses/> 1}}}
 
 from colorama import Fore, Style
 from deprecated import deprecated
 from functools import cmp_to_key
 import getopt
 import inspect
 import lxml.etree as ET
 import re
 import shutil
 import string
 from svgpathtools import svg2paths2, svg_to_paths
 from svgpathtools.path import Path as SVGPath
 from svgpathtools.path import Line 
 import sys
 import tempfile
 from operator import attrgetter
 import os
 from os import listdir, sep, path, setpgrp, devnull
 from os.path import exists, isfile, isdir, dirname, basename
 from progress.bar import Bar
 import warnings
 
 
 sys.path.append('svgscripts')
 from convert_wordPositions import HTMLConverter
 from datatypes.box import Box
 from datatypes.faksimile import FaksimilePage
+from datatypes.imprint import Imprint
 from datatypes.archival_manuscript import ArchivalManuscriptUnity
 from datatypes.mark_foreign_hands import MarkForeignHands
 from datatypes.matrix import Matrix
 from datatypes.page import Page, STATUS_MERGED_OK, STATUS_POSTMERGED_OK, FILE_TYPE_SVG_WORD_POSITION, FILE_TYPE_XML_MANUSCRIPT
 from datatypes.positional_word_part import PositionalWordPart
 from datatypes.path import Path
 from datatypes.word import Word
 from datatypes.text_connection_mark import TextConnectionMark
 from datatypes.transkriptionField import TranskriptionField
 from datatypes.transkription_position import TranskriptionPosition
 from datatypes.word import Word, update_transkription_position_ids
 from join_faksimileAndTranskription import sort_words
 from util import back_up, back_up_svg_file, copy_faksimile_svg_file, reset_tp_with_matrix
 from process_files import update_svgposfile_status
+from process_footnotes import save_imprints
 from process_words_post_merging import update_faksimile_line_positions, MERGED_DIR
 
 sys.path.append('shared_util')
 from myxmlwriter import write_pretty, xml_has_type, FILE_TYPE_SVG_WORD_POSITION, FILE_TYPE_XML_MANUSCRIPT
 from main_util import create_function_dictionary, get_manuscript_files
 
 
 __author__ = "Christian Steiner"
 __maintainer__ = __author__
 __copyright__ = 'University of Basel'
 __email__ = "christian.steiner@unibas.ch"
 __status__ = "Development"
 __license__ = "GPL v3"
 __version__ = "0.0.1"
 
 UNITTESTING = False
 MAX_SVG_XY_THRESHOLD = 10
 
 #TODO: fix all svg graphical files: change xlink:href to href!!!! 
 
 def convert_old_matrix(tp, xmin, ymin) ->(Matrix, float, float):
     """Return new matrix, x and y for old transkription_position.
     """
     matrix = tp.transform.clone_transformation_matrix()
     matrix.matrix[Matrix.XINDEX] = round(tp.transform.matrix[Matrix.XINDEX] + xmin, 3)
     matrix.matrix[Matrix.YINDEX] = round(tp.transform.matrix[Matrix.YINDEX] + ymin, 3)
     x = round(tp.left - tp.transform.matrix[Matrix.XINDEX], 3)\
             if tp.left > 0\
             else 0
     y = round((tp.height-1.5)*-1, 3)
     return matrix, x, y
 
 def save_page(page, attach_first=False, backup=False, script_name=None):
     """Write page to xml file
     """
     if backup:
         back_up(page, page.xml_file)
     if attach_first:
         page.update_and_attach_words2tree()
     if script_name is None:
         script_name = f'{__file__}:{inspect.currentframe().f_back.f_code.co_name}'
     write_pretty(xml_element_tree=page.page_tree, file_name=page.page_tree.docinfo.URL,\
             script_name=script_name, file_type=FILE_TYPE_SVG_WORD_POSITION)
 
 def page_already_changed(page) -> bool:
     """Return whether page has alreadybeen changed by function
     """
     return len(\
             page.page_tree.xpath(f'//metadata/modifiedBy[@script="{__file__}:{inspect.currentframe().f_back.f_code.co_name}"]')\
     ) > 0
 
 def fix_faksimile_line_position(page, redo=False) -> bool:
     """Create a faksimile line position.
     """
     if not redo and page_already_changed(page):
         return False;
     update_faksimile_line_positions(page)
     if not UNITTESTING:
         save_page(page)
     return True
 
 def check_faksimile_positions(page, redo=False) -> bool:
     """Check  faksimile line position.
     """
     if len(page.page_tree.xpath('//data-source/@file')) > 0:
         svg_file = page.page_tree.xpath('//data-source/@file')[0]
         svg_tree = ET.parse(svg_file)
         positions_are_equal_counter = 0
         page_changed = False
         for faksimile_page in FaksimilePage.GET_FAKSIMILEPAGES(svg_tree):
             if page.title == faksimile_page.title\
                     and page.number == faksimile_page.page_number:
                 #print([fp.id for fp in faksimile_page.word_positions ])
                 for word in page.words:
                     for fp in word.faksimile_positions:
                         rect_fps = [ rfp for rfp in faksimile_page.word_positions if rfp.id == fp.id ]
                         if len(rect_fps) > 0:
                             rfp = rect_fps[0]
                             if fp.left != rfp.left or fp.top != rfp.top:
                                 #print(f'{fp.id}: {fp.left}/{rfp.left} {fp.top}/{rfp.top}')
                                 fp.left = rfp.left
                                 fp.top = rfp.top
                                 fp.bottom = fp.top + rfp.height
                                 word.attach_word_to_tree(page.page_tree)
                                 page_changed = True
                             else:
                                 positions_are_equal_counter += 1
         print(f'{positions_are_equal_counter}/{len(page.words)} are equal')
     if page_changed and not UNITTESTING:
         save_page(page)
     return page_changed
 
 def fix_faksimile_positions(page, redo=False) -> bool:
     """Set faksimile positions to absolute values.
 
         [:return:] fixed
     """
     if not redo and len(page.page_tree.xpath(f'//metadata/modifiedBy[@script="{__file__}"]')) > 0:
         return False
     x_min = page.text_field.xmin
     y_min = page.text_field.ymin
     for word in page.words:
         for fp in word.faksimile_positions:
             fp.left = fp.left + x_min
             fp.top = fp.top + y_min
             fp.bottom = fp.bottom + y_min
         word.attach_word_to_tree(page.page_tree)
     if not UNITTESTING:
         print(f'writing to {page.page_tree.docinfo.URL}')
         write_pretty(xml_element_tree=page.page_tree, file_name=page.page_tree.docinfo.URL,\
             script_name=__file__, file_type=FILE_TYPE_SVG_WORD_POSITION)
     return True
 
 def _fix_tp_of_word(page, word, text_field):
     """Fix transkription positions ->set relative to 0,0 instead of text_field.left,text_field.top
     """
     for tp in word.transkription_positions:
         tp.left += text_field.left
         tp.top += text_field.top
     reset_tp_with_matrix(word.transkription_positions)
     if type(word) == Word:
         words_in_word = word.word_parts + [ item for item in word.__dict__.items() if type(item) == Word ]
         for wp in words_in_word:
             _fix_tp_of_word(page, wp, text_field)
 
 def fix_tp_with_matrix(page, redo=False) -> bool:
     """Fix transkription positions with rotation matrix ->set left to 0 and top to -5. 
 
         [:return:] fixed
     """
     xmin = 0 if page.svg_image is None or page.svg_image.text_field is None else page.svg_image.text_field.left
     ymin = 0 if page.svg_image is None or page.svg_image.text_field is None else page.svg_image.text_field.top
     for word in page.words:
         reset_tp_with_matrix(word.transkription_positions, tr_xmin=xmin, tr_ymin=ymin)
         for wp in word.word_parts:
             reset_tp_with_matrix(wp.transkription_positions, tr_xmin=xmin, tr_ymin=ymin)
     if not UNITTESTING:
         print(f'writing to {page.page_tree.docinfo.URL}')
         save_page(page, attach_first=True)
     return True
 
 def _fix_old_transkription_positions(page, redo=False) -> bool:
     """Fix transkription positions ->set relative to 0,0 instead of text_field.left,text_field.top
         [:return:] fixed
     """
     if page.svg_image is not None\
         and page.svg_image.text_field is None:
         if page.svg_image is None:
             if page.svg_file is not None:
                 transkription_field = TranskriptionField(page.svg_file)
                 width = round(tf.documentWidth, 3)
                 height = round(tf.documentHeight, 3)
                 page.svg_image = SVGImage(file_name=svg_file, width=width,\
                         height=height, text_field=transkription_field.convert_to_text_field())
                 page.svg_image.attach_object_to_tree(page.page_tree)
             else:
                 raise Exception(f'ERROR page {page.page_tree.docinfo.URL} does not have a svg_file!')
         elif page.svg_image.text_field is None:
             page.svg_image.text_field = TranskriptionField(page.svg_image.file_name).convert_to_text_field()
             page.svg_image.attach_object_to_tree(page.page_tree)
         for line_number in page.line_numbers:
             line_number.top += page.svg_image.text_field.top
             line_number.bottom += page.svg_image.text_field.top
             line_number.attach_object_to_tree(page.page_tree)
         for word in page.words:
             _fix_tp_of_word(page, word, page.svg_image.text_field) 
         for mark in page.mark_foreign_hands:
             _fix_tp_of_word(page, mark, page.svg_image.text_field) 
         for tcm in page.text_connection_marks:
             _fix_tp_of_word(page, tcm, page.svg_image.text_field) 
         if not UNITTESTING:
             print(f'writing to {page.page_tree.docinfo.URL}')
             save_page(page, attach_first=True)
         return True
     return False
 
 def _fix_old_pwps(page, old_tps):
     """Adjust positional_word_parts to corrected transkription_positions.
     """
     for tp in old_tps:
         for pwp in tp.xpath(f'./{PositionalWordPart.XML_TAG}'):
             left = float(pwp.get('left'))
             top = float(pwp.get('top'))
             bottom = float(pwp.get('bottom'))
             pwp.set('left', str(left + page.svg_image.text_field.left))
             pwp.set('top', str(top + page.svg_image.text_field.top))
             pwp.set('bottom', str(bottom + page.svg_image.text_field.top))
 
 def _fix_quotation_mark_tps(page, old_tps):
     """Fix the height of transkription_positions of words with quotation marks.
     """
     for tp in old_tps:
         heighest_pwp = sorted(tp.xpath(f'./{PositionalWordPart.XML_TAG}'), key=lambda pwp: float(pwp.get('height')), reverse=True)[0]
         toppest_pwp  = sorted(tp.xpath(f'./{PositionalWordPart.XML_TAG}'), key=lambda pwp: float(pwp.get('top')))[0]  
         new_height = float(tp.get('height')) + abs(float(heighest_pwp.get('top'))-float(toppest_pwp.get('top')))
         tp.set('height', str(new_height))
 
 def fix_transkription_positions(page, redo=False) -> bool:
     """Fix transkription positions ->set relative to 0,0 instead of text_field.left,text_field.top
         [:return:] fixed
     """
     THRESHOLD = 10
     if page.svg_image is not None\
         and page.svg_image.text_field is None:
         if not _fix_old_transkription_positions(page):
             return False
     _fix_old_pwps(page, [ pwp.getparent() for pwp in page.page_tree.xpath(f'//{PositionalWordPart.XML_TAG}[@id="0"]')\
                         if abs(float(pwp.get('left')) - float(pwp.getparent().get('left'))) > THRESHOLD ])
     _fix_quotation_mark_tps(page, [ tp for tp in page.page_tree.xpath(f'//{TranskriptionPosition.XML_TAG}')\
                                     if len(tp.xpath(f'./{PositionalWordPart.XML_TAG}')) > 0\
                                     and sorted(tp.xpath(f'./{PositionalWordPart.XML_TAG}'), key=lambda pwp: float(pwp.get('height')), reverse=True)[0]\
                                         != sorted(tp.xpath(f'./{PositionalWordPart.XML_TAG}'), key=lambda pwp: float(pwp.get('top')))[0] ])
     if not UNITTESTING:
         print(f'writing to {page.page_tree.docinfo.URL}')
         save_page(page)
     return True
 
 def fix_styles(page, redo=False):
     """Remove unused styles from tree.
     """
     if len(page.page_tree.xpath('//style')) > 1:
         for node in page.page_tree.xpath('//style')[1:]: node.getparent().remove(node)
     if not UNITTESTING:
         print(f'writing to {page.page_tree.docinfo.URL}')
         save_page(page)
     return True
 
+def fix_imprints(page, redo=False):
+    """Remove unused styles from tree.
+    """
+    if len(page.page_tree.xpath('//' + Imprint.XML_TAG)) == 0:
+        save_imprints(page)
+    return True
+
 def merge_transkription_positions(page, redo=False) -> bool:
     """Fix transkription positions of merged words
 
         [:return:] fixed
     """
     if not isdir(dirname(page.page_tree.docinfo.URL) + sep + MERGED_DIR)\
             or not isfile(dirname(page.page_tree.docinfo.URL) + sep + MERGED_DIR + sep + basename(page.page_tree.docinfo.URL)):
         return False
     merged_page = Page(dirname(page.page_tree.docinfo.URL) + sep + MERGED_DIR + sep + basename(page.page_tree.docinfo.URL))
     sync_dictionary = sync_words_linewise(merged_page.words, page.words, merged_page.line_numbers) 
     words = []
     for source_word in merged_page.words:
         words.append(source_word)
         if bool(sync_dictionary.get(source_word)):
             _sync_transkriptions_with_words(source_word, sync_dictionary)
         if source_word.text != ''.join([ t.get_text() for t in source_word.transkription_positions ]):
             text = ''.join([ t.get_text() for t in source_word.transkription_positions ])
             print(f'{source_word.line_number}: {source_word.text} has transkription_positions with text "{text}".')
             response = input('Change? [Y/n]>')
             if not response.startswith('n'):
                 new_sync_dictionary = sync_words_linewise(merged_page.words, page.words,\
                         [ line for line in merged_page.line_numbers if line.id == source_word.line_number ], force_sync_on_word=source_word)
                 if bool(new_sync_dictionary.get(source_word)):
                     _sync_transkriptions_with_words(source_word, new_sync_dictionary)
                 else:
                     raise Exception(f'Could not find sourc_word {source_word.text} in {new_sync_dictionary}!')
     page.words = words
     page.update_and_attach_words2tree()
     if not UNITTESTING:
         print(f'writing to {page.page_tree.docinfo.URL}')
         save_page(page)
     return True
 
 def fix_graphical_svg_file(page, redo=False) -> bool:
     """Fix glyphs of word for which there is a /changed-word in page.page_tree
     """
     svg_tree = ET.parse(page.svg_file)
     transkription_field = TranskriptionField(page.source)
     namespaces = { k if k is not None else 'ns': v for k, v in svg_tree.getroot().nsmap.items() }
     back_up_svg_file(svg_tree, namespaces=namespaces)
     tr_xmin = transkription_field.xmin if (page.svg_image is None or page.svg_image.text_field is None) else 0
     tr_ymin = transkription_field.ymin if (page.svg_image is None or page.svg_image.text_field is None) else 0
     for deleted_word_node in page.page_tree.xpath('//deleted-word'):
         deleted_word = Word.create_cls(deleted_word_node)
         _run_function_on_nodes_for_word(svg_tree, namespaces, deleted_word, tr_xmin, tr_ymin, _set_node_attribute_to, 'visibility', 'hidden')
     for changed_word_node in page.page_tree.xpath('//changed-word'):
         changed_word = Word.create_cls(changed_word_node)
         try:
             word = [ word for word in page.words if word.id == changed_word.id and word.text == changed_word.text ][0]
             left_difference = word.transkription_positions[0].left - changed_word.transkription_positions[0].left
             _run_function_on_nodes_for_word(svg_tree, namespaces, word, tr_xmin, tr_ymin, _add_value2attribute, 'x', left_difference)
         except IndexError:
             warnings.warn(f'There is no word for changed_word {changed_word.id}: "{changed_word.text}" in {page.page_tree.docinfo.URL}!')
     copy_faksimile_svg_file(target_file=page.svg_file, faksimile_tree=svg_tree, namespaces=namespaces)
 
 def _add_value2attribute(node, attribute, value):
     """Add left_difference to x of node.
     """
     node.set(attribute, str(float(node.get(attribute)) + value))
     node.set('changed', 'true')
 
 def _get_nodes_with_symbol_id(svg_tree, namespaces, symbol_id, svg_x, svg_y, threshold=0.1) -> list:
     """Return nodes with symbol_id n x = svg_x and y = svg_y.
     """
     nodes = [ node for node in svg_tree.xpath(\
             f'//ns:use[@xlink:href="#{symbol_id}" and @x > {svg_x-threshold} and @x < {svg_x+threshold} and @y > {svg_y-threshold} and @y < {svg_y+threshold} ]',\
             namespaces=namespaces) if not bool(node.get('changed')) ]
     if len(nodes) == 0 and threshold < MAX_SVG_XY_THRESHOLD:
         return _get_nodes_with_symbol_id(svg_tree, namespaces, symbol_id, svg_x, svg_y, threshold=threshold+1)
     return nodes
 
 def _run_function_on_nodes_for_word(svg_tree, namespaces, word, tr_xmin, tr_ymin, function_on_node, attribute, value):
     """Run function on nodes for words.
     """
     for tp in word.transkription_positions:
         for pwp in tp.positional_word_parts:
             symbol_id = pwp.symbol_id
             svg_x = pwp.left + tr_xmin
             svg_y = pwp.bottom + tr_ymin
             nodes = _get_nodes_with_symbol_id(svg_tree, namespaces, symbol_id, svg_x, svg_y)
             if len(nodes) > 0:
                 node = nodes[0]
                 function_on_node(node, attribute, value)
 
 def _set_node_attribute_to(node, attribute, value):
     """Set attribute of node to value.
     """
     node.set(attribute, str(value)) 
     node.set('changed', 'true')
 
 def sync_words_linewise(source_words, target_words, lines, force_sync_on_word=None) -> dict:
     """Sync words an create a dictionary with source_words as keys, refering to a list of corresponding words.
     """
     result_dict = {}
     for word in target_words + source_words: word.processed = False
     for line in lines:
         source_words_on_line = sorted([ word for word in source_words if word.line_number == line.id ], key=lambda word: word.transkription_positions[0].left)
         target_words_on_line = sorted([ word for word in target_words if word.line_number == line.id ], key=lambda word: word.transkription_positions[0].left)
         if len(target_words_on_line) == len(source_words_on_line):
             _sync_same_length(result_dict, source_words_on_line, target_words_on_line, force_sync_on_word=force_sync_on_word)
         elif len(source_words_on_line) < len(target_words_on_line):
             _sync_more_target_words(result_dict, source_words_on_line, target_words_on_line, force_sync_on_word=force_sync_on_word)
         else:
             print('okey dokey')
     return result_dict
 
 def _force_sync_on_word(force_sync_on_word, target_words_on_line, result_dict):
     """Force sync on word.
     """
     unprocessed_target_words = [t_word for t_word in target_words_on_line if not t_word.processed]
     if len(unprocessed_target_words) > 0:
         print([ (i, t_word.text) for i, t_word in enumerate(unprocessed_target_words)])
         response = input(f'Please specify indices of words to sync {force_sync_on_word.text} with: [default:0-{len(unprocessed_target_words)-1}]>')        
         indices = [ i for i in range(0, len(unprocessed_target_words)) ]
         if re.match(r'\d+-\d+', response):
             index_strings = response.split('-')
             indices = [ i for i in range(int(index_strings[0]), int(index_strings[1])+1) ]
         elif response != '':
             indices = [ int(i) for i in response.split(' ') ]
         target_words = []
         for i in indices: target_words.append(unprocessed_target_words[i])
         result_dict.update({ force_sync_on_word: target_words })
     else:
         raise Exception(f'There are no unprocessed target_words for {force_sync_on_word.text} on line {force_sync_on_word.line_number}!')
 
 def _sync_transkriptions_with_words(word, sync_dictionary):
     """Sync transkription_positions of word with syncronized words.
     """
     word.transkription_positions = []
     for target_word in sync_dictionary[word]:
         word.transkription_positions += target_word.transkription_positions
 
 def _sync_more_target_words(result_dict, source_words_on_line, target_words_on_line, force_sync_on_word=None):
     """Sync if there are more target words.
     """
     current_source_word = None
     for target_word in target_words_on_line:
         if current_source_word is not None\
                 and current_source_word.text.startswith(''.join([ w.text for w in result_dict[current_source_word]]) + target_word.text):
             result_dict[current_source_word].append(target_word)    
             target_word.processed = True
             if current_source_word.text == ''.join([ w.text for w in result_dict[current_source_word]]):
                 current_source_word = None
         elif len([ s_word for s_word in source_words_on_line if not s_word.processed and s_word.text == target_word.text ]) > 0:
             source_word = [ s_word for s_word in source_words_on_line if not s_word.processed and s_word.text == target_word.text ][0]
             target_word.processed = True
             source_word.processed = True
             result_dict.update({ source_word: [ target_word ] })
         elif len([ s_word for s_word in source_words_on_line if not s_word.processed and s_word.text.startswith(target_word.text) ]) > 0:
             current_source_word = [ s_word for s_word in source_words_on_line if not s_word.processed and s_word.text.startswith(target_word.text) ][0]
             current_source_word.processed = True
             target_word.processed = True
             result_dict.update({ current_source_word: [ target_word ] })
         else:
             msg = f'On line {target_word.line_number}: target_word "{target_word.text}" does not have a sibling in {[ s.text for s in source_words_on_line if not s.processed ]}'
             warnings.warn(msg)
     if force_sync_on_word is not None:
         _force_sync_on_word(force_sync_on_word, target_words_on_line, result_dict)
 
 def _sync_same_length(result_dict, source_words_on_line, target_words_on_line, force_sync_on_word=None):
     """Sync same length
     """
     for i, word in enumerate(source_words_on_line):
         if word.text == target_words_on_line[i].text:
             word.processed = True
             target_words_on_line[i].processed = True
             result_dict.update({ word: [ target_words_on_line[i] ] })
         elif len([ t_word for t_word in target_words_on_line if not t_word.processed and t_word.text == word.text ]) > 0:
             target_word = [ t_word for t_word in target_words_on_line if not t_word.processed and t_word.text == word.text ][0]
             word.processed = True
             target_word.processed = True
             result_dict.update({ word: [ target_word ] })
         else:
             msg = f'On line {word.line_number}: source_word "{word.text}" does not have a sibling in {[ s.text for s in target_words_on_line]}'
             warnings.warn(msg)
     if force_sync_on_word is not None:
         _force_sync_on_word(force_sync_on_word, target_words_on_line, result_dict)
 
 def usage():
     """prints information on how to use the script
     """
     print(main.__doc__)
 
 def main(argv):
     """This program can be used to fix old data.
 
     svgscripts/fix_old_data.py [OPTIONS] <xmlManuscriptFile|svg_pos_file>
 
         <xmlManuscriptFile>     a xml file about a manuscript, containing information about its pages.
         <svg_pos_file>          a xml file about a page, containing information about svg word positions.
 
         OPTIONS:
         -h|--help                       show help
         -c|--check-faksimile-positions  check whether faksimile positions have been updated
+        -i|--fix-imprints               add imprints to page 
         -l|--faksimile-line-position    create faksimile line positions
         -p|--faksimile-positions        fix old faksimile positions
         -r|--redo                       rerun
         -s|--fix-graphical-svg          fix use position of glyphs for words changed by 'changed-word' and 'deleted-word' in xml file.
         -S|--fix-styles                 fix use position of glyphs for words changed by 'changed-word' and 'deleted-word' in xml file.
         -t|--transkription-positions    fix old transkription positions
         -M|--matrix                     fix old transkription positions with transform matrix 
 
         :return: exit code (int)
     """
     function_list = []
     function_dict = create_function_dictionary(['-c', '--check-faksimile-positions'], check_faksimile_positions)
     function_dict = create_function_dictionary(['-l', '--faksimile-line-position'], fix_faksimile_line_position, function_dictionary=function_dict)
     function_dict = create_function_dictionary(['-p', '--faksimile-positions'], fix_faksimile_positions, function_dictionary=function_dict)
     function_dict = create_function_dictionary(['-m', '--merge-positions'], merge_transkription_positions, function_dictionary=function_dict)
     function_dict = create_function_dictionary(['-s', '--fix-graphical-svg'], fix_graphical_svg_file, function_dictionary=function_dict)
     function_dict = create_function_dictionary(['-M', '--matrix'], fix_tp_with_matrix, function_dictionary=function_dict)
     function_dict = create_function_dictionary(['-t', '--transkription-positions'], fix_transkription_positions, function_dictionary=function_dict)
-    function_dict = create_function_dictionary(['default', '-S', '--fix-styles'], fix_styles, function_dictionary=function_dict)
+    function_dict = create_function_dictionary(['-S', '--fix-styles'], fix_styles, function_dictionary=function_dict)
+    function_dict = create_function_dictionary(['default', '-i', '--fix-imprints'], fix_imprints, function_dictionary=function_dict)
     redo = False;
     try:
-        opts, args = getopt.getopt(argv, "hcplrmsStM", ["help", "check-faksimile-positions", "faksimile-positions", "faksimile-line-position",\
-                "redo", "merge-positions", "fix-graphical-svg", "fix-styles", "transkription-positions", 'matrix' ])
+        opts, args = getopt.getopt(argv, "hcplrmsStMi", ["help", "check-faksimile-positions", "faksimile-positions", "faksimile-line-position",\
+                "redo", "merge-positions", "fix-graphical-svg", "fix-styles", "transkription-positions", 'matrix', 'fix-imprints' ])
     except getopt.GetoptError:
         usage()
         return 2
     for opt, arg in opts:
         if opt in ('-h', '--help'):
             usage()
             return 0
         elif opt in ('-r', '--redo'):
             redo = True;
         elif opt in function_dict.keys():
             function_list.append(function_dict[opt])
     if len(function_list) == 0:
         function_list.append(function_dict['default'])
     if len(args) < 1:  
         usage()
         return 2
     exit_status = 0
     for xml_file in get_manuscript_files(args):
         if isfile(xml_file):
             counters = { f.__name__: 0 for f in function_list }
             for current_function in function_list:
                 status_contains = STATUS_MERGED_OK if 'faksimile' in current_function.__name__ else 'OK'
                 for page in Page.get_pages_from_xml_file(xml_file, status_contains=status_contains):
                     if not UNITTESTING: 
                         print(Fore.CYAN + f'Processing {page.title}, {page.number} with function {current_function.__name__} ...' + Style.RESET_ALL)
                         back_up(page, page.xml_file)
                     counters[current_function.__name__] += 1 if current_function(page, redo=redo) else 0
             if not UNITTESTING:
                 for function_name, counter in counters.items():
                     print(Style.RESET_ALL + f'[{counter} pages changed by {function_name}]')
         else:
             raise FileNotFoundError('File {} does not exist!'.format(xml_file))
     return exit_status
 
 if __name__ == "__main__":
     sys.exit(main(sys.argv[1:]))
Index: py2ttl/py2ttl_ontology.py
===================================================================
--- py2ttl/py2ttl_ontology.py	(revision 109)
+++ py2ttl/py2ttl_ontology.py	(revision 110)
@@ -1,369 +1,371 @@
 #!/usr/bin/env python3
 # -*- coding: utf-8 -*-
 
 """   This program can be used to convert py classes that are 
       subclasses of class_spec.SemanticClass to 
       a owl ontology in turtle format.
 """
 #    Copyright (C) University of Basel 2019  {{{1
 #
 #    This program is free software: you can redistribute it and/or modify
 #    it under the terms of the GNU General Public License as published by
 #    the Free Software Foundation, either version 3 of the License, or
 #    (at your option) any later version.
 #
 #    This program is distributed in the hope that it will be useful,
 #    but WITHOUT ANY WARRANTY; without even the implied warranty of
 #    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 #    GNU General Public License for more details.
 #
 #    You should have received a copy of the GNU General Public License
 #    along with this program.  If not, see <https://www.gnu.org/licenses/> 1}}}
 
+from datetime import datetime
 import getopt
 import importlib
 import importlib.util
 import inspect
 import lxml.etree as ET
 from os import sep, path, listdir
 from os.path import isfile, isdir, dirname, basename
 from progress.bar import Bar
 from rdflib import Graph, URIRef, Literal, BNode, OWL, RDF, RDFS, XSD
 import re
 import requests
 import sys
 import warnings
 
 
 if dirname(__file__) not in sys.path:
     sys.path.append(dirname(__file__))
 
 from class_spec import SemanticClass, UnSemanticClass
 from config import check_config_files_exist, get_datatypes_dir, PROJECT_NAME, PROJECT_ONTOLOGY_FILE, PROJECT_URL, PROJECT_ONTOLOGY_FILE_URL
 from data_handler import RDFDataHandler
 
 sys.path.append('shared_util')
 from myxmlwriter import dict2xml
 
 __author__ = "Christian Steiner"
 __maintainer__ = __author__
 __copyright__ = 'University of Basel'
 __email__ = "christian.steiner@unibas.ch"
 __status__ = "Development"
 __license__ = "GPL v3"
 __version__ = "0.0.1"
 
 
 class Py2TTLOntologyConverter:
     """This class can be used convert semantic_dictionaries to a owl ontology in turtle format.
     """
     UNITTESTING = False
     INFERRED_SUB_CLASS = RDFS.subClassOf * '*' 
 
     def __init__(self, project_ontology_file=None):
         self.class_uri_dict = {}
         self.uri_mapping4cls_and_properties = {}
         self.project_graph = Graph()
         self.base_uriref = URIRef(PROJECT_URL)
         self.project_name = PROJECT_NAME
         self.ns = { self.base_uriref + '#': self.project_name }
         if project_ontology_file is not None and isfile(project_ontology_file):
             if project_ontology_file == PROJECT_ONTOLOGY_FILE:
                 r = requests.get(PROJECT_ONTOLOGY_FILE_URL)
                 with open(project_ontology_file, 'wb') as f:
                     f.write(r.content)
                 print(f'{project_ontology_file} updated from github repository')
             self.project_graph.parse(project_ontology_file, format="turtle")
         if len(self.project_graph) > 0:
             self.base_uriref = self.project_graph.value(predicate=RDF.type, object=OWL.Ontology, any=False)
             self.ns = { uriref: ns for ns, uriref in self.project_graph.namespace_manager.namespaces() }
             self.project_name = self.ns.get(self.base_uriref + '#')
         self.project_graph.bind(self.project_name, self.base_uriref + '#')
+        self.project_graph.add((self.base_uriref, OWL.versionInfo, Literal(datetime.now().strftime('%Y-%m-%d'))))
         self.uri_mapping4cls_and_properties.update({ 'ontology': { 'project_name': self.project_name, 'project_uri': self.base_uriref + '#' }})
         self.uri_mapping4cls_and_properties.update({ 'classes': {} })
 
     def addClass2Graph(self, cls, semantic_dict=None) -> (URIRef, type):
         """Add a class to project_graph.
 
             :return: (cls_uri (URIRef), super_cls (cls))
         """
         if semantic_dict is None:
             semantic_dict = cls.get_semantic_dictionary()
         comment, label = self.get_comment_label(cls)
         cls_uri = URIRef(self.base_uriref + '#' + cls.__name__)
         self.project_graph.add((cls_uri, RDF.type, OWL.Class))
         self.project_graph.add((cls_uri, RDFS.isDefinedBy, self.base_uriref))
         if comment != '':
             self.project_graph.add((cls_uri, RDFS.comment, Literal(comment, lang='en')))
         if label != '':
             self.project_graph.add((cls_uri, RDFS.label, Literal(label, lang='en')))
         super_uri = None
         super_cls = None
         if bool(semantic_dict[SemanticClass.CLASS_KEY].get(SemanticClass.TYPE)):
             super_cls = semantic_dict[SemanticClass.CLASS_KEY].get(SemanticClass.TYPE)
             super_uri = self.createClassAndProperties(super_cls)
             if super_uri is not None:
                 self.project_graph.add((cls_uri, RDFS.subClassOf, super_uri))
         if SemanticClass.SUBCLASS_OF in semantic_dict[SemanticClass.CLASS_KEY].keys()\
                 and len(semantic_dict[SemanticClass.CLASS_KEY][SemanticClass.SUBCLASS_OF]) > 0:
             for super_uri_string in semantic_dict[SemanticClass.CLASS_KEY].get(SemanticClass.SUBCLASS_OF):
                 super_uri = URIRef(super_uri_string)
                 if not (cls_uri, self.INFERRED_SUB_CLASS, super_uri) in self.project_graph:
                     self.project_graph.add((cls_uri, RDFS.subClassOf, super_uri))
         return cls_uri, super_cls
 
     def addProperty2Graph(self, property_uri, domain_uri, range_uri, info_dict, property_type=OWL.ObjectProperty):
         """Add a property to self.project_graph.
         """
         label = 'has ' + property_uri.split('#')[1].replace('has','')\
             if SemanticClass.PROPERTY_LABEL not in info_dict.keys() else info_dict[SemanticClass.PROPERTY_LABEL]
         self.project_graph.add((property_uri, RDF.type, property_type))
         self.project_graph.add((property_uri, RDFS.isDefinedBy, self.base_uriref))
         self.project_graph.add((property_uri, RDFS.domain, domain_uri))
         self.project_graph.add((property_uri, RDFS.range, range_uri))
         if SemanticClass.PROPERTY_COMMENT in info_dict.keys():
             comment =  info_dict[SemanticClass.PROPERTY_COMMENT]
             self.project_graph.add((property_uri, RDFS.comment, Literal(comment, lang='en')))
         self.project_graph.add((property_uri, RDFS.label, Literal(label, lang='en')))
         if SemanticClass.CARDINALITY in info_dict.keys()\
                     and info_dict[SemanticClass.CARDINALITY] > 0:
             self.addRestriction2Class(domain_uri, property_uri, info_dict)
 
     def addRestriction2Class(self, cls_uri, property_uri, info_dict):
         """Adds restriction on property_uri to class cls_uri.
         """
         if SemanticClass.CARDINALITY in info_dict.keys()\
             and info_dict[SemanticClass.CARDINALITY] > 0:
             if (cls_uri, None, None) not in self.project_graph:
                 warnings.warn('{} not in graph!'.format(cls_uri))        
             restriction = BNode()
             cardinality_restriction = URIRef(OWL + info_dict[SemanticClass.CARDINALITY_RESTRICTION])\
                 if SemanticClass.CARDINALITY_RESTRICTION in info_dict.keys()\
                 else OWL.cardinality
             cardinality = info_dict[SemanticClass.CARDINALITY]
             self.project_graph.add((cls_uri, RDFS.subClassOf, restriction))
             self.project_graph.add((restriction, RDF.type, OWL.Restriction))
             self.project_graph.add((restriction, OWL.onProperty, property_uri))
             self.project_graph.add((restriction, cardinality_restriction, Literal(str(cardinality), datatype=XSD.nonNegativeInteger)))
 
     def create_ontology(self, datatypes_dir, target_ontology_file):
         """Convert all classes contained in datatypes_dir that are subclasses of class_spec.SemanticClass to rdf.
     
             :return: exit code (int)
         """
         if isdir(datatypes_dir):
             semantic_classes = self.get_semantic_classes(datatypes_dir)
             if not Py2TTLOntologyConverter.UNITTESTING:
                 bar = Bar('creating classes and properties', max=len(semantic_classes))
             for cls in semantic_classes:
                 self.createClassAndProperties(cls)
                 not bool(Py2TTLOntologyConverter.UNITTESTING) and bar.next()
             not bool(Py2TTLOntologyConverter.UNITTESTING) and bar.finish()
             self.uri_mapping4cls_and_properties['ontology'].update({'ontology_file': target_ontology_file})
             f = open(target_ontology_file, 'wb+')
             f.write(self.project_graph.serialize(format="turtle"))
             f.close()
             if not Py2TTLOntologyConverter.UNITTESTING:
                 xml_file = 'mapping_file4' + datatypes_dir.replace(sep, '.') + '2' + target_ontology_file.replace('.' + sep, '').replace(sep, '.').replace('.ttl', '.xml')
                 dict2xml(self.uri_mapping4cls_and_properties, xml_file)
         else:
             print('Error: dir {} does not exist!'.format(datatypes_dir))
             usage
             return 1
         return 0
 
     def createClassAndProperties(self, cls):
         """Creates a owl:Class and some owl:ObjectProperty from semantic_dictionary of a python class.
         """
         if not cls.__name__ in self.class_uri_dict:
             self.class_uri_dict.update({cls.__name__: cls})
             semantic_dict = cls.get_semantic_dictionary()
             cls_uri, super_cls = self.addClass2Graph(cls, semantic_dict) 
             uri_mapping4properties = {}
             for property_key in self._get_semantic_dictionary_keys_super_first(semantic_dict['properties']):
                 super_semantic_dict = {} if super_cls is None else super_cls.get_semantic_dictionary()
                 if len(super_semantic_dict) == 0 or not bool(super_semantic_dict['properties'].get(property_key)):
                     property_dict4key = semantic_dict['properties'].get(property_key)
                     property_cls = property_dict4key.get('class')
                     subject_uri, property_uri = self.createProperty(cls_uri, property_key, property_cls, property_dict4key)
                     uri_mapping4properties.update({ property_key: property_uri })
                 elif bool(self.uri_mapping4cls_and_properties.get('classes').get(super_cls.__name__).get('properties').get(property_key)):
                     property_uri = self.uri_mapping4cls_and_properties['classes'][super_cls.__name__]['properties'][property_key]
                     uri_mapping4properties.update({ property_key: property_uri})
             self.uri_mapping4cls_and_properties.get('classes').update({ cls.__name__:  { 'class_uri': cls_uri, 'properties': uri_mapping4properties }})
         return URIRef(self.base_uriref + '#' + cls.__name__)
  
     def createProperty(self, domain_uri, property_name, range_cls, info_dict) -> (URIRef, URIRef):
         """Creates a owl:ObjectProperty.
 
             :return: tuple of domain_uri (rdflib.URIRef) and property_uri (rdflib.URIRef) of created property
         """
         name = self.createPropertyName(property_name=property_name)\
                 if SemanticClass.PROPERTY_NAME not in info_dict.keys() else info_dict[SemanticClass.PROPERTY_NAME]
         property_uri = URIRef(self.base_uriref + '#' + name)
         inferredSubClass = RDFS.subClassOf * '*'
         range_uri = URIRef(self.base_uriref + '#' + range_cls.__name__)
         super_property_uri = None
         if SemanticClass.SUBPROPERTYOF in info_dict.keys():
             super_property_uri = URIRef(info_dict[SemanticClass.SUBPROPERTYOF])
         elif SemanticClass.SUPER_PROPERTY in info_dict.keys():
             domain_uri, super_property_uri = self.createProperty(domain_uri,\
                                                 info_dict[SemanticClass.SUPER_PROPERTY].get(SemanticClass.PROPERTY_NAME),\
                                                 range_cls, info_dict[SemanticClass.SUPER_PROPERTY]) 
         if (property_uri, None, None) not in self.project_graph:
             property_type = OWL.ObjectProperty
             if range_cls.__module__ == 'builtins':
                 if range_cls != list:
                     property_type = OWL.DatatypeProperty
                 range_uri = RDFDataHandler.SIMPLE_DATA_TYPE_MAPPING.get(range_cls)
                 if range_uri == XSD.string and property_name == 'URL':
                     range_uri = XSD.anyURI 
             self.addProperty2Graph(property_uri, domain_uri, range_uri, info_dict, property_type=property_type)
         elif not True in [\
                 (domain_uri, inferredSubClass, o) in self.project_graph\
                 for o in self.project_graph.objects(property_uri, RDFS.domain)\
             ]:
             # if domain_uri is NOT a subclass of a cls specified by RDFS.domain
             if SemanticClass.CARDINALITY in info_dict.keys()\
                     and info_dict[SemanticClass.CARDINALITY] > 0:
                 self.addRestriction2Class(domain_uri, property_uri, info_dict)
             self.project_graph.add((property_uri, RDFS.domain, domain_uri))
         if super_property_uri is not None\
             and (property_uri, RDFS.subPropertyOf, super_property_uri) not in self.project_graph:
             self.project_graph.add((property_uri, RDFS.subPropertyOf, super_property_uri))
         return domain_uri, property_uri
 
     def createPropertyName(self, property_name=None, subject_uri=None, object_uri=None, connector='BelongsTo', prefix='has'):
         """Returns a property name.
         """
         if property_name is not None:
             property_name = ''.join([ property_name.split('_')[0].lower() ] + [ text.capitalize() for text in property_name.split('_')[1:] ])
             return prefix + property_name[0].upper() + property_name[1:] if property_name[0].islower()\
                     else prefix + property_name
         elif subject_uri is not None:
             property_name = subject_uri.split('#')[1] + self.createPropertyName(object_uri=object_uri, prefix=connector)
             return property_name[0].lower() + property_name[1:]
         elif object_uri is not None:
             return prefix + object_uri.split('#')[1]
         else:
             return prefix 
 
     def get_comment_label(self, cls):
         """Returns comment and label from cls __doc__.
         """
         comment = cls.__doc__.replace('\n','').lstrip()
         label = cls.__name__
         if '.' in cls.__doc__:
             comment = [ text for text in cls.__doc__.split('\n') if text != '' ][0].lstrip()
         if '@label' in cls.__doc__:
             m = re.search('(@label[:]*\s)(.*[\.]*)', cls.__doc__)
             label_tag, label = m.groups()
         elif re.search('([A-Z][a-z]+)', label):
             m = re.search('([A-Z]\w+)([A-Z]\w+)', label)
             label = ' '.join([ text.lower() for text in re.split(r'([A-Z][a-z]+)', label) if text != '' ])
         return comment, label
 
     def get_semantic_classes(self, datatypes_dir):
         """Returns a list of all classes that are  contained in datatypes_dir that are subclasses of class_spec.SemanticClass.
     
             :return: a list of (str_name, class)
         """
         base_dir = dirname(dirname(__file__))
         sys.path.append(base_dir)
         root_modul_name = datatypes_dir.replace('/','.')
         files = [ file.replace('.py','') for file in listdir(datatypes_dir) if file.endswith('.py') and not file.startswith('test_') and not file.startswith('_')]
         all_modules = []
         for name in files:
             all_modules.append(importlib.import_module('{}.{}'.format(root_modul_name, name)))
         all_classes = []
         for modul in all_modules:
             all_classes += inspect.getmembers(modul, inspect.isclass)
         #all_classes = sorted(set(all_classes))
         all_classes = sorted(set(all_classes), key=lambda current_class: current_class[0])
         semantic_classes = [ cls for name, cls in all_classes if issubclass(cls, SemanticClass)\
                                                               and not issubclass(cls, UnSemanticClass)\
                                                               and not (cls == SemanticClass)]
         return semantic_classes
 
     def _get_builtin_cls_keys(self, property_dict):
         """Returns a list of keys for classes that are builtin.
         """
         builtin_cls_keys = []
         for key in property_dict.keys():
             property_cls = property_dict.get(key).get('class')\
                     if type(property_dict.get(key)) is dict\
                     else property_dict.get(key)[0]
             if type(property_cls) != dict\
                     and property_cls.__module__ == 'builtins':
                 builtin_cls_keys.append(key)
         return builtin_cls_keys
 
     def _get_semantic_dictionary_keys_super_first(self, property_dict):
         """Sorts the keys of the property part of a semantic dictionary 
         and returns the keys for super classes before keys of subclasses.
 
             :return: a sorted list of keys.
         """
         builtin_cls_keys = self._get_builtin_cls_keys(property_dict) 
         complex_cls_keys = []
         for key in [ key for key in property_dict.keys()\
                 if key not in builtin_cls_keys ]:
             current_cls = property_dict.get(key).get('class')
             key_inserted = False
             for index, cls_key in enumerate(complex_cls_keys):
                 potential_sub_cls = property_dict.get(cls_key).get('class')
                 if issubclass(potential_sub_cls, current_cls):
                     complex_cls_keys.insert(index, key)
                     key_inserted = True
                     break
             if not key_inserted:
                 complex_cls_keys.append(key)
         return builtin_cls_keys + complex_cls_keys
 
 def usage():
     """prints information on how to use the script
     """
     print(main.__doc__)
 
 def main(argv):
     """This program can be used to convert py classes that are subclasses of class_spec.SemanticClass to owl:Class
             and its properties to owl:ObjectProperty.
 
     py2ttl/py2ttl_ontology.py [OPTIONS <dir>] 
 
         <dir>                              [optional] directory containing datatypes that are subclasses of class_spec.SemanticClass.
                                             Overwrites DATATYPES_DIR in py2ttl/config.py.
         OPTIONS:
         -h|--help:                          show help
         -s|--source=source_ontology_file    source ontology ttl file, option overwrites PROJECT_ONTOLOGY_FILE in py2ttl/config.py
         -t|--target=target_ontology_file    target ontology ttl file, default: 'PROJECT_PREFIX-ontology_autogenerated.ttl'
 
         :return: exit code (int)
     """
     check_config_files_exist()
     datatypes_dir =  get_datatypes_dir()
     source_ontology_file = PROJECT_ONTOLOGY_FILE 
     target_ontology_file = ''
     try:
         opts, args = getopt.getopt(argv, "hs:t:", ["help","source=", "target="])
     except getopt.GetoptError:
         usage()
         return 2
     for opt, arg in opts:
         if opt in ('-h', '--help'):
             usage()
             return 0
         elif opt in ('-t', '--target'):
             target_ontology_file = arg
         elif opt in ('-s', '--source'):
             source_ontology_file = arg
 
     converter = Py2TTLOntologyConverter(project_ontology_file=source_ontology_file)
     if len(args) > 0:
         datatypes_dir = args[0]
     if target_ontology_file == '':
         target_ontology_file = '.{0}{1}-ontology_autogenerated.ttl'.format(sep, converter.project_name)
     return converter.create_ontology(datatypes_dir, target_ontology_file)
 
 if __name__ == "__main__":
     sys.exit(main(sys.argv[1:]))
Index: py2ttl/xml_conform_dictionary.py
===================================================================
--- py2ttl/xml_conform_dictionary.py	(revision 109)
+++ py2ttl/xml_conform_dictionary.py	(revision 110)
@@ -1,121 +1,124 @@
 #!/usr/bin/env python3
 # -*- coding: utf-8 -*-
 
 """   This represents a xml conform dictionary of data.
 """
 #    Copyright (C) University of Basel 2019  {{{1
 #
 #    This program is free software: you can redistribute it and/or modify
 #    it under the terms of the GNU General Public License as published by
 #    the Free Software Foundation, either version 3 of the License, or
 #    (at your option) any later version.
 #
 #    This program is distributed in the hope that it will be useful,
 #    but WITHOUT ANY WARRANTY; without even the implied warranty of
 #    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 #    GNU General Public License for more details.
 #
 #    You should have received a copy of the GNU General Public License
 #    along with this program.  If not, see <https://www.gnu.org/licenses/> 1}}}
 
 __author__ = "Christian Steiner"
 __maintainer__ = __author__
 __copyright__ = 'University of Basel'
 __email__ = "christian.steiner@unibas.ch"
 __status__ = "Development"
 __license__ = "GPL v3"
 __version__ = "0.0.1"
 
 import inspect
 import re
 import warnings
 
 from class_spec  import SemanticClass
 
 class XMLConformDictionary:
     """
     This represents a xml conform dictionary of data.
     """
     
     def __init__(self):
         self.attachables = []
         self.builtins = {}
         self.builtin_list = {}
 
     def attach_data_to_tree(self, node):
         """Attach data to node
         """
         for xml_key, value in self.builtins.items():
             node.set(xml_key, value)
         for xml_key, value_list in self.builtin_list.items():
             node.set(xml_key, ' '.join([ str(i) for i in value_list]))
         for attachable in self.attachables:
             attachable.attach_object_to_tree(node)
 
     @classmethod
     def create_cls_from_data_object(cls, data_object):
         """Create a XMLConformDictionary.
         """
         if not issubclass(type(data_object), SemanticClass):
             msg = f'{type(data_object)} is not a subclass of {SemanticClass}'
             raise TypeError(msg)
         property_d = data_object.get_semantic_dictionary()[data_object.PROPERTIES_KEY]
         xml_d = cls()
         for key in property_d.keys():
             value = data_object.__dict__.get(key) 
             if value is not None and (type(value) != list or len(value) > 0):
                 semantic_type = property_d[key][data_object.CLASS_KEY]\
                         if type(property_d[key]) is dict\
                         else property_d[key][0]
                 if type(value) != list and semantic_type.__module__ == 'builtins':
                     if semantic_type == bool:
+                        if value != True and value != False:
+                            msg = f'Value "{value}" for key "{key}" is not of type "bool"'
+                            raise TypeError(msg)
                         xml_d.builtins.update({key.replace('_','-'): str(value).lower()})
                     else:
                         xml_d.builtins.update({key.replace('_','-'): str(value)})
                 elif semantic_type.__module__ != 'builtins':
                     if type(value) != list:
                         xml_d.attachables.append(value)
                     else:
                         for item in value:
                             xml_d.attachables.append(item)
                 else:
                     xml_d.builtin_list.update({key.replace('_','-'): value})
         return xml_d
 
     @staticmethod
     def CREATE_INSTANCEOF_CLASS_FROM_NODE(semantic_class, node):
         """Create a instance of semantic_class from node.
         """
         if not issubclass(semantic_class, SemanticClass):
             msg = f'{semantic_class} is not a subclass of {SemanticClass}'
             raise TypeError(msg)
         property_d = semantic_class.get_semantic_dictionary()[semantic_class.PROPERTIES_KEY]
         class_instance = semantic_class()
         for key in property_d.keys():
             semantic_type = property_d[key][semantic_class.CLASS_KEY]\
                         if type(property_d[key]) is dict\
                         else property_d[key][0]
             if semantic_type.__module__ == 'builtins' and semantic_type != list:
                 value = node.get(key.replace('_','-'))
                 if semantic_type == bool:
                     class_instance.__dict__.update({key: (value == 'true')})
                 elif semantic_type != str:
                     if re.match(r'(.*)(\s)', value): 
                         class_instance.__dict__.update({key: [ semantic_type(item) for item in value.split(' ')] })
                     else:
                         class_instance.__dict__.update({key: semantic_type(value)})
                 else:
                     class_instance.__dict__.update({key: value})
             else:
                 attachables = []
                 for sub_node in node.xpath(semantic_type.XML_TAG):
                     sub_instance = semantic_type.create_cls_from_node(sub_node)\
                             if 'create_cls_from_node' in semantic_type.__dict__\
                             else XMLConformDictionary.CREATE_INSTANCEOF_CLASS_FROM_NODE(semantic_type, sub_node)
                     attachables.append(sub_instance)
                 if len(attachables) > 0:
                     if len(attachables) > 1:
                         class_instance.__dict__.update({key: attachables})
                     else:
                         class_instance.__dict__.update({key: attachables[0]})
         return class_instance
Index: tests_py2ttl/test_data/mapping_dict.xml
===================================================================
--- tests_py2ttl/test_data/mapping_dict.xml	(revision 109)
+++ tests_py2ttl/test_data/mapping_dict.xml	(revision 110)
@@ -1,391 +1,408 @@
 <?xml version="1.0" encoding="utf-8"?>
 <root>
 	<dict>
 		<ontology>
 			<project_name type="str">tln</project_name>
 			<project_uri type="URIRef">http://www.nie.org/ontology/nietzsche#</project_uri>
 			<ontology_file type="str">./tln-ontology_autogenerated.ttl</ontology_file>
 		</ontology>
 		<classes>
 			<ManuscriptUnity>
 				<class_uri type="URIRef">http://www.nie.org/ontology/nietzsche#ManuscriptUnity</class_uri>
 				<properties>
 					<title type="URIRef">http://www.nie.org/ontology/nietzsche#hasTitle</title>
 					<manuscript_type type="URIRef">http://www.nie.org/ontology/nietzsche#hasManuscriptType</manuscript_type>
 					<pages type="URIRef">http://www.nie.org/ontology/nietzsche#hasPages</pages>
 					<description type="URIRef">http://www.nie.org/ontology/nietzsche#hasDescription</description>
 				</properties>
 			</ManuscriptUnity>
 			<ArchivalManuscriptUnity>
 				<class_uri type="URIRef">http://www.nie.org/ontology/nietzsche#ArchivalManuscriptUnity</class_uri>
 				<properties>
 					<title type="URIRef">http://www.nie.org/ontology/nietzsche#hasTitle</title>
 					<manuscript_type type="URIRef">http://www.nie.org/ontology/nietzsche#hasManuscriptType</manuscript_type>
 					<pages type="URIRef">http://www.nie.org/ontology/nietzsche#hasPages</pages>
 					<styles type="URIRef">http://www.nie.org/ontology/nietzsche#hasStyles</styles>
+					<gsa_signature type="URIRef">http://www.nie.org/ontology/nietzsche#hasGsaSignature</gsa_signature>
 					<description type="URIRef">http://www.nie.org/ontology/nietzsche#hasDescription</description>
 					<reconstructed_konvoluts type="URIRef">http://www.nie.org/ontology/nietzsche#partsBelongToReconstructedKonvolut</reconstructed_konvoluts>
 					<earlier_descriptions type="URIRef">http://www.nie.org/ontology/nietzsche#hasEarlierDescriptions</earlier_descriptions>
 				</properties>
 			</ArchivalManuscriptUnity>
 			<EditorComment>
 				<class_uri type="URIRef">http://www.nie.org/ontology/nietzsche#EditorComment</class_uri>
 				<properties>
 					<is_uncertain type="URIRef">http://www.nie.org/ontology/nietzsche#isUncertain</is_uncertain>
 					<comment type="URIRef">http://www.nie.org/ontology/nietzsche#hasComment</comment>
 				</properties>
 			</EditorComment>
 			<AtypicalWriting>
 				<class_uri type="URIRef">http://www.nie.org/ontology/nietzsche#AtypicalWriting</class_uri>
 				<properties>
 					<is_uncertain type="URIRef">http://www.nie.org/ontology/nietzsche#isUncertain</is_uncertain>
 					<comment type="URIRef">http://www.nie.org/ontology/nietzsche#hasComment</comment>
 					<text type="URIRef">http://www.nie.org/ontology/nietzsche#atypicalWritingHasText</text>
 				</properties>
 			</AtypicalWriting>
 			<Path>
 				<class_uri type="URIRef">http://www.nie.org/ontology/nietzsche#Path</class_uri>
 				<properties>
 					<d_attribute type="URIRef">http://www.nie.org/ontology/nietzsche#hasDAttribute</d_attribute>
 				</properties>
 			</Path>
 			<Box>
 				<class_uri type="URIRef">http://www.nie.org/ontology/nietzsche#Box</class_uri>
 				<properties>
 					<d_attribute type="URIRef">http://www.nie.org/ontology/nietzsche#hasDAttribute</d_attribute>
 					<earlier_text type="URIRef">http://www.nie.org/ontology/nietzsche#hasEarlierText</earlier_text>
 				</properties>
 			</Box>
 			<Clarification>
 				<class_uri type="URIRef">http://www.nie.org/ontology/nietzsche#Clarification</class_uri>
 				<properties>
 					<is_uncertain type="URIRef">http://www.nie.org/ontology/nietzsche#isUncertain</is_uncertain>
 					<comment type="URIRef">http://www.nie.org/ontology/nietzsche#hasComment</comment>
 					<text type="URIRef">http://www.nie.org/ontology/nietzsche#clarificationHasText</text>
 				</properties>
 			</Clarification>
 			<Color>
 				<class_uri type="URIRef">http://www.nie.org/ontology/nietzsche#Color</class_uri>
 				<properties>
 					<name type="URIRef">http://www.nie.org/ontology/nietzsche#colorHasName</name>
 					<hex_color type="URIRef">http://www.nie.org/ontology/nietzsche#hasHexadecimalValue</hex_color>
 				</properties>
 			</Color>
 			<Text>
 				<class_uri type="URIRef">http://www.nie.org/ontology/nietzsche#Text</class_uri>
 				<properties>
 					<content type="URIRef">http://www.nie.org/ontology/nietzsche#textHasContent</content>
 					<standoff_markups type="URIRef">http://www.nie.org/ontology/nietzsche#textHasMarkup</standoff_markups>
 				</properties>
 			</Text>
 			<Description>
 				<class_uri type="URIRef">http://www.nie.org/ontology/nietzsche#Description</class_uri>
 				<properties>
 					<content type="URIRef">http://www.nie.org/ontology/nietzsche#textHasContent</content>
 					<standoff_markups type="URIRef">http://www.nie.org/ontology/nietzsche#textHasMarkup</standoff_markups>
 				</properties>
 			</Description>
 			<EarlierDescription>
 				<class_uri type="URIRef">http://www.nie.org/ontology/nietzsche#EarlierDescription</class_uri>
 				<properties>
 					<content type="URIRef">http://www.nie.org/ontology/nietzsche#textHasContent</content>
 					<author type="URIRef">http://www.nie.org/ontology/nietzsche#hasAuthor</author>
 					<citation type="URIRef">http://www.nie.org/ontology/nietzsche#hasCitation</citation>
 					<standoff_markups type="URIRef">http://www.nie.org/ontology/nietzsche#textHasMarkup</standoff_markups>
 				</properties>
 			</EarlierDescription>
 			<EditorCorrection>
 				<class_uri type="URIRef">http://www.nie.org/ontology/nietzsche#EditorCorrection</class_uri>
 				<properties>
 					<is_uncertain type="URIRef">http://www.nie.org/ontology/nietzsche#isUncertain</is_uncertain>
 					<comment type="URIRef">http://www.nie.org/ontology/nietzsche#hasComment</comment>
 					<correction_text type="URIRef">http://www.nie.org/ontology/nietzsche#hasCorrectionText</correction_text>
 				</properties>
 			</EditorCorrection>
 			<Image>
 				<class_uri type="URIRef">http://www.nie.org/ontology/nietzsche#Image</class_uri>
 				<properties>
 					<height type="URIRef">http://www.nie.org/ontology/nietzsche#hasHeight</height>
 					<width type="URIRef">http://www.nie.org/ontology/nietzsche#hasWidth</width>
 					<file_name type="URIRef">http://www.nie.org/ontology/nietzsche#hasFileName</file_name>
 					<transform_string type="URIRef">http://www.nie.org/ontology/nietzsche#hasTransform</transform_string>
 					<primaryURL type="URIRef">http://www.nie.org/ontology/nietzsche#hasPrimaryurl</primaryURL>
 					<secondaryURL type="URIRef">http://www.nie.org/ontology/nietzsche#hasSecondaryurl</secondaryURL>
 					<text_field type="URIRef">http://www.nie.org/ontology/nietzsche#hasTextField</text_field>
 				</properties>
 			</Image>
 			<FaksimileImage>
 				<class_uri type="URIRef">http://www.nie.org/ontology/nietzsche#FaksimileImage</class_uri>
 				<properties>
 					<height type="URIRef">http://www.nie.org/ontology/nietzsche#hasHeight</height>
 					<width type="URIRef">http://www.nie.org/ontology/nietzsche#hasWidth</width>
 					<file_name type="URIRef">http://www.nie.org/ontology/nietzsche#hasFileName</file_name>
 					<transform_string type="URIRef">http://www.nie.org/ontology/nietzsche#hasTransform</transform_string>
 					<primaryURL type="URIRef">http://www.nie.org/ontology/nietzsche#hasPrimaryurl</primaryURL>
 					<secondaryURL type="URIRef">http://www.nie.org/ontology/nietzsche#hasSecondaryurl</secondaryURL>
 					<apiURL type="URIRef">http://www.nie.org/ontology/nietzsche#hasApiurl</apiURL>
 					<thumbURL type="URIRef">http://www.nie.org/ontology/nietzsche#hasThumburl</thumbURL>
 					<mediumURL type="URIRef">http://www.nie.org/ontology/nietzsche#hasMediumurl</mediumURL>
 					<text_field type="URIRef">http://www.nie.org/ontology/nietzsche#hasTextField</text_field>
 				</properties>
 			</FaksimileImage>
 			<PositionalObject>
 				<class_uri type="URIRef">http://www.nie.org/ontology/nietzsche#PositionalObject</class_uri>
 				<properties>
 					<height type="URIRef">http://www.nie.org/ontology/nietzsche#hasHeight</height>
 					<width type="URIRef">http://www.nie.org/ontology/nietzsche#hasWidth</width>
 					<left type="URIRef">http://www.nie.org/ontology/nietzsche#hasLeft</left>
 					<top type="URIRef">http://www.nie.org/ontology/nietzsche#hasTop</top>
 					<bottom type="URIRef">http://www.nie.org/ontology/nietzsche#hasBottom</bottom>
 					<transform_string type="URIRef">http://www.nie.org/ontology/nietzsche#hasTransform</transform_string>
 				</properties>
 			</PositionalObject>
 			<WordPosition>
 				<class_uri type="URIRef">http://www.nie.org/ontology/nietzsche#WordPosition</class_uri>
 				<properties>
 					<height type="URIRef">http://www.nie.org/ontology/nietzsche#hasHeight</height>
 					<width type="URIRef">http://www.nie.org/ontology/nietzsche#hasWidth</width>
 					<left type="URIRef">http://www.nie.org/ontology/nietzsche#hasLeft</left>
 					<top type="URIRef">http://www.nie.org/ontology/nietzsche#hasTop</top>
 					<bottom type="URIRef">http://www.nie.org/ontology/nietzsche#hasBottom</bottom>
 					<transform_string type="URIRef">http://www.nie.org/ontology/nietzsche#hasTransform</transform_string>
 				</properties>
 			</WordPosition>
 			<FaksimilePosition>
 				<class_uri type="URIRef">http://www.nie.org/ontology/nietzsche#FaksimilePosition</class_uri>
 				<properties>
 					<height type="URIRef">http://www.nie.org/ontology/nietzsche#hasHeight</height>
 					<width type="URIRef">http://www.nie.org/ontology/nietzsche#hasWidth</width>
 					<left type="URIRef">http://www.nie.org/ontology/nietzsche#hasLeft</left>
 					<top type="URIRef">http://www.nie.org/ontology/nietzsche#hasTop</top>
 					<bottom type="URIRef">http://www.nie.org/ontology/nietzsche#hasBottom</bottom>
 					<transform_string type="URIRef">http://www.nie.org/ontology/nietzsche#hasTransform</transform_string>
 				</properties>
 			</FaksimilePosition>
+			<Imprint>
+				<class_uri type="URIRef">http://www.nie.org/ontology/nietzsche#Imprint</class_uri>
+				<properties>
+					<reference type="URIRef">http://www.nie.org/ontology/nietzsche#imprintHasReference</reference>
+					<lines type="URIRef">http://www.nie.org/ontology/nietzsche#imprintRefersToLines</lines>
+				</properties>
+			</Imprint>
 			<Line>
 				<class_uri type="URIRef">http://www.nie.org/ontology/nietzsche#Line</class_uri>
 				<properties>
 					<id type="URIRef">http://www.nie.org/ontology/nietzsche#lineHasNumber</id>
 					<bottom type="URIRef">http://www.nie.org/ontology/nietzsche#lineHasBottomValueOnTranskription</bottom>
 					<top type="URIRef">http://www.nie.org/ontology/nietzsche#lineHasTopValueOnTranskription</top>
 					<faksimile_inner_bottom type="URIRef">http://www.nie.org/ontology/nietzsche#lineHasInnerBottomValueOnFaksimile</faksimile_inner_bottom>
 					<faksimile_inner_top type="URIRef">http://www.nie.org/ontology/nietzsche#lineHasInnerTopValueOnFaksimile</faksimile_inner_top>
 					<faksimile_outer_bottom type="URIRef">http://www.nie.org/ontology/nietzsche#lineHasOuterBottomValueOnFaksimile</faksimile_outer_bottom>
 					<faksimile_outer_top type="URIRef">http://www.nie.org/ontology/nietzsche#lineHasOuterTopValueOnFaksimile</faksimile_outer_top>
 					<is_even type="URIRef">http://www.nie.org/ontology/nietzsche#isMainLine</is_even>
 					<editor_comments type="URIRef">http://www.nie.org/ontology/nietzsche#lineHasEditorComment</editor_comments>
 				</properties>
 			</Line>
 			<LineContinuation>
 				<class_uri type="URIRef">http://www.nie.org/ontology/nietzsche#LineContinuation</class_uri>
 				<properties>
 					<is_uncertain type="URIRef">http://www.nie.org/ontology/nietzsche#isUncertain</is_uncertain>
 					<comment type="URIRef">http://www.nie.org/ontology/nietzsche#hasComment</comment>
 					<to_reference type="URIRef">http://www.nie.org/ontology/nietzsche#isLineAContinuationTo</to_reference>
 					<reference type="URIRef">http://www.nie.org/ontology/nietzsche#lineContinuationHasReference</reference>
 				</properties>
 			</LineContinuation>
 			<SimpleWord>
 				<class_uri type="URIRef">http://www.nie.org/ontology/nietzsche#SimpleWord</class_uri>
 				<properties>
 					<text type="URIRef">http://www.nie.org/ontology/nietzsche#hasText</text>
+					<clean_text type="URIRef">http://www.nie.org/ontology/nietzsche#hasCleanText</clean_text>
 					<lines type="URIRef">http://www.nie.org/ontology/nietzsche#wordBelongsToLine</lines>
 					<transkription_positions type="URIRef">http://www.nie.org/ontology/nietzsche#hasTranskriptionPosition</transkription_positions>
 					<faksimile_positions type="URIRef">http://www.nie.org/ontology/nietzsche#hasFaksimilePosition</faksimile_positions>
 				</properties>
 			</SimpleWord>
 			<SpecialWord>
 				<class_uri type="URIRef">http://www.nie.org/ontology/nietzsche#SpecialWord</class_uri>
 				<properties>
 					<text type="URIRef">http://www.nie.org/ontology/nietzsche#hasText</text>
+					<clean_text type="URIRef">http://www.nie.org/ontology/nietzsche#hasCleanText</clean_text>
 					<lines type="URIRef">http://www.nie.org/ontology/nietzsche#wordBelongsToLine</lines>
 					<transkription_positions type="URIRef">http://www.nie.org/ontology/nietzsche#hasTranskriptionPosition</transkription_positions>
 					<faksimile_positions type="URIRef">http://www.nie.org/ontology/nietzsche#hasFaksimilePosition</faksimile_positions>
 				</properties>
 			</SpecialWord>
 			<MarkForeignHands>
 				<class_uri type="URIRef">http://www.nie.org/ontology/nietzsche#MarkForeignHands</class_uri>
 				<properties>
 					<text type="URIRef">http://www.nie.org/ontology/nietzsche#hasText</text>
+					<clean_text type="URIRef">http://www.nie.org/ontology/nietzsche#hasCleanText</clean_text>
 					<pen type="URIRef">http://www.nie.org/ontology/nietzsche#penOfForeignHands</pen>
+					<resolution type="URIRef">http://www.nie.org/ontology/nietzsche#resolutionOfAbbreviation</resolution>
 					<lines type="URIRef">http://www.nie.org/ontology/nietzsche#wordBelongsToLine</lines>
 					<transkription_positions type="URIRef">http://www.nie.org/ontology/nietzsche#hasTranskriptionPosition</transkription_positions>
 					<faksimile_positions type="URIRef">http://www.nie.org/ontology/nietzsche#hasFaksimilePosition</faksimile_positions>
 					<foreign_hands_text type="URIRef">http://www.nie.org/ontology/nietzsche#textOfForeignHands</foreign_hands_text>
 				</properties>
 			</MarkForeignHands>
 			<Page>
 				<class_uri type="URIRef">http://www.nie.org/ontology/nietzsche#Page</class_uri>
 				<properties>
 					<number type="URIRef">http://www.nie.org/ontology/nietzsche#hasNumber</number>
 					<orientation type="URIRef">http://www.nie.org/ontology/nietzsche#hasOrientation</orientation>
+					<imprints type="URIRef">http://www.nie.org/ontology/nietzsche#hasImprints</imprints>
 					<lines type="URIRef">http://www.nie.org/ontology/nietzsche#hasLines</lines>
 					<mark_foreign_hands type="URIRef">http://www.nie.org/ontology/nietzsche#hasMarkForeignHands</mark_foreign_hands>
 					<words type="URIRef">http://www.nie.org/ontology/nietzsche#hasWords</words>
 					<word_deletion_paths type="URIRef">http://www.nie.org/ontology/nietzsche#hasWordDeletionPaths</word_deletion_paths>
 					<word_insertion_marks type="URIRef">http://www.nie.org/ontology/nietzsche#hasWordInsertionMarks</word_insertion_marks>
 					<faksimile_image type="URIRef">http://www.nie.org/ontology/nietzsche#hasFaksimileImage</faksimile_image>
 					<svg_text_field type="URIRef">http://www.nie.org/ontology/nietzsche#pageIsOnSVGTextField</svg_text_field>
 					<faksimile_text_field type="URIRef">http://www.nie.org/ontology/nietzsche#pageIsOnFaksimileTextField</faksimile_text_field>
 					<svg_image type="URIRef">http://www.nie.org/ontology/nietzsche#hasSvgImage</svg_image>
 				</properties>
 			</Page>
 			<NonExistentPage>
 				<class_uri type="URIRef">http://www.nie.org/ontology/nietzsche#NonExistentPage</class_uri>
 				<properties>
 					<number type="URIRef">http://www.nie.org/ontology/nietzsche#hasNumber</number>
 					<orientation type="URIRef">http://www.nie.org/ontology/nietzsche#hasOrientation</orientation>
+					<imprints type="URIRef">http://www.nie.org/ontology/nietzsche#hasImprints</imprints>
 					<lines type="URIRef">http://www.nie.org/ontology/nietzsche#hasLines</lines>
 					<mark_foreign_hands type="URIRef">http://www.nie.org/ontology/nietzsche#hasMarkForeignHands</mark_foreign_hands>
 					<words type="URIRef">http://www.nie.org/ontology/nietzsche#hasWords</words>
 					<word_deletion_paths type="URIRef">http://www.nie.org/ontology/nietzsche#hasWordDeletionPaths</word_deletion_paths>
 					<word_insertion_marks type="URIRef">http://www.nie.org/ontology/nietzsche#hasWordInsertionMarks</word_insertion_marks>
 					<status type="URIRef">http://www.nie.org/ontology/nietzsche#hasStatus</status>
 					<faksimile_image type="URIRef">http://www.nie.org/ontology/nietzsche#hasFaksimileImage</faksimile_image>
 					<svg_text_field type="URIRef">http://www.nie.org/ontology/nietzsche#pageIsOnSVGTextField</svg_text_field>
 					<faksimile_text_field type="URIRef">http://www.nie.org/ontology/nietzsche#pageIsOnFaksimileTextField</faksimile_text_field>
 					<svg_image type="URIRef">http://www.nie.org/ontology/nietzsche#hasSvgImage</svg_image>
 				</properties>
 			</NonExistentPage>
 			<ReconstructedKonvolut>
 				<class_uri type="URIRef">http://www.nie.org/ontology/nietzsche#ReconstructedKonvolut</class_uri>
 				<properties>
 					<title type="URIRef">http://www.nie.org/ontology/nietzsche#hasTitle</title>
 					<manuscript_type type="URIRef">http://www.nie.org/ontology/nietzsche#hasManuscriptType</manuscript_type>
 					<pages type="URIRef">http://www.nie.org/ontology/nietzsche#hasPages</pages>
 					<description type="URIRef">http://www.nie.org/ontology/nietzsche#hasDescription</description>
 				</properties>
 			</ReconstructedKonvolut>
 			<Reference>
 				<class_uri type="URIRef">http://www.nie.org/ontology/nietzsche#Reference</class_uri>
 				<properties>
 					<first_line type="URIRef">http://www.nie.org/ontology/nietzsche#firstLineOfReference</first_line>
 					<last_line type="URIRef">http://www.nie.org/ontology/nietzsche#lastLineOfReference</last_line>
 					<word_reference type="URIRef">http://www.nie.org/ontology/nietzsche#wordReference</word_reference>
 					<is_uncertain type="URIRef">http://www.nie.org/ontology/nietzsche#IsUncertain</is_uncertain>
 					<title type="URIRef">http://www.nie.org/ontology/nietzsche#hasTitle</title>
 					<page_number type="URIRef">http://www.nie.org/ontology/nietzsche#hasPageNumber</page_number>
 				</properties>
 			</Reference>
 			<SVGImage>
 				<class_uri type="URIRef">http://www.nie.org/ontology/nietzsche#SVGImage</class_uri>
 				<properties>
 					<height type="URIRef">http://www.nie.org/ontology/nietzsche#hasHeight</height>
 					<width type="URIRef">http://www.nie.org/ontology/nietzsche#hasWidth</width>
 					<file_name type="URIRef">http://www.nie.org/ontology/nietzsche#hasFileName</file_name>
 					<transform_string type="URIRef">http://www.nie.org/ontology/nietzsche#hasTransform</transform_string>
 					<primaryURL type="URIRef">http://www.nie.org/ontology/nietzsche#hasPrimaryurl</primaryURL>
 					<secondaryURL type="URIRef">http://www.nie.org/ontology/nietzsche#hasSecondaryurl</secondaryURL>
 					<text_field type="URIRef">http://www.nie.org/ontology/nietzsche#hasTextField</text_field>
 				</properties>
 			</SVGImage>
 			<StandoffTag>
 				<class_uri type="URIRef">http://www.nie.org/ontology/nietzsche#StandoffTag</class_uri>
 				<properties>
 					<startIndex type="URIRef">http://www.nie.org/ontology/nietzsche#standoffTagHasStartIndex</startIndex>
 					<endIndex type="URIRef">http://www.nie.org/ontology/nietzsche#standoffTagHasEndIndex</endIndex>
 					<css_string type="URIRef">http://www.nie.org/ontology/nietzsche#standoffTagHasCSS</css_string>
 				</properties>
 			</StandoffTag>
 			<Style>
 				<class_uri type="URIRef">http://www.nie.org/ontology/nietzsche#Style</class_uri>
 				<properties>
 					<font type="URIRef">http://www.nie.org/ontology/nietzsche#styleHasFont</font>
 					<writing_instrument type="URIRef">http://www.nie.org/ontology/nietzsche#styleHasWritingInstrument</writing_instrument>
 					<css_string type="URIRef">http://www.nie.org/ontology/nietzsche#styleHasCSS</css_string>
 					<color type="URIRef">http://www.nie.org/ontology/nietzsche#styleHasColor</color>
 				</properties>
 			</Style>
 			<TextConnectionMark>
 				<class_uri type="URIRef">http://www.nie.org/ontology/nietzsche#TextConnectionMark</class_uri>
 				<properties>
 					<text type="URIRef">http://www.nie.org/ontology/nietzsche#hasText</text>
+					<clean_text type="URIRef">http://www.nie.org/ontology/nietzsche#hasCleanText</clean_text>
 					<lines type="URIRef">http://www.nie.org/ontology/nietzsche#wordBelongsToLine</lines>
 					<transkription_positions type="URIRef">http://www.nie.org/ontology/nietzsche#hasTranskriptionPosition</transkription_positions>
 					<faksimile_positions type="URIRef">http://www.nie.org/ontology/nietzsche#hasFaksimilePosition</faksimile_positions>
 					<text_source type="URIRef">http://www.nie.org/ontology/nietzsche#textConnectionMarkHasTextSource</text_source>
 				</properties>
 			</TextConnectionMark>
 			<TextField>
 				<class_uri type="URIRef">http://www.nie.org/ontology/nietzsche#TextField</class_uri>
 				<properties>
 					<height type="URIRef">http://www.nie.org/ontology/nietzsche#hasHeight</height>
 					<width type="URIRef">http://www.nie.org/ontology/nietzsche#hasWidth</width>
 					<left type="URIRef">http://www.nie.org/ontology/nietzsche#hasLeft</left>
 					<top type="URIRef">http://www.nie.org/ontology/nietzsche#hasTop</top>
 					<bottom type="URIRef">http://www.nie.org/ontology/nietzsche#hasBottom</bottom>
 					<transform_string type="URIRef">http://www.nie.org/ontology/nietzsche#hasTransform</transform_string>
 				</properties>
 			</TextField>
 			<TranskriptionPosition>
 				<class_uri type="URIRef">http://www.nie.org/ontology/nietzsche#TranskriptionPosition</class_uri>
 				<properties>
 					<height type="URIRef">http://www.nie.org/ontology/nietzsche#hasHeight</height>
 					<width type="URIRef">http://www.nie.org/ontology/nietzsche#hasWidth</width>
 					<left type="URIRef">http://www.nie.org/ontology/nietzsche#hasLeft</left>
 					<top type="URIRef">http://www.nie.org/ontology/nietzsche#hasTop</top>
 					<bottom type="URIRef">http://www.nie.org/ontology/nietzsche#hasBottom</bottom>
 					<transform_string type="URIRef">http://www.nie.org/ontology/nietzsche#hasTransform</transform_string>
 				</properties>
 			</TranskriptionPosition>
 			<UncertainDecipherment>
 				<class_uri type="URIRef">http://www.nie.org/ontology/nietzsche#UncertainDecipherment</class_uri>
 				<properties>
 					<is_uncertain type="URIRef">http://www.nie.org/ontology/nietzsche#isUncertain</is_uncertain>
 					<comment type="URIRef">http://www.nie.org/ontology/nietzsche#hasComment</comment>
 				</properties>
 			</UncertainDecipherment>
 			<Word>
 				<class_uri type="URIRef">http://www.nie.org/ontology/nietzsche#Word</class_uri>
 				<properties>
 					<text type="URIRef">http://www.nie.org/ontology/nietzsche#hasText</text>
+					<clean_text type="URIRef">http://www.nie.org/ontology/nietzsche#hasCleanText</clean_text>
 					<edited_text type="URIRef">http://www.nie.org/ontology/nietzsche#hasEditedText</edited_text>
+					<clean_edited_text type="URIRef">http://www.nie.org/ontology/nietzsche#hasCleanEditedText</clean_edited_text>
 					<word_parts type="URIRef">http://www.nie.org/ontology/nietzsche#wordHasWordParts</word_parts>
 					<lines type="URIRef">http://www.nie.org/ontology/nietzsche#wordBelongsToLine</lines>
 					<transkription_positions type="URIRef">http://www.nie.org/ontology/nietzsche#hasTranskriptionPosition</transkription_positions>
 					<faksimile_positions type="URIRef">http://www.nie.org/ontology/nietzsche#hasFaksimilePosition</faksimile_positions>
 					<styles type="URIRef">http://www.nie.org/ontology/nietzsche#wordHasStyle</styles>
 					<overwrites_word type="URIRef">http://www.nie.org/ontology/nietzsche#overwritesWord</overwrites_word>
 					<isTransformationOfWord type="URIRef">http://www.nie.org/ontology/nietzsche#isTransformationOfWord</isTransformationOfWord>
 					<isExtensionOfWord type="URIRef">http://www.nie.org/ontology/nietzsche#isExtensionOfWord</isExtensionOfWord>
 					<isDeletionOfWord type="URIRef">http://www.nie.org/ontology/nietzsche#isDeletionOfWord</isDeletionOfWord>
 					<isClarificationOfWord type="URIRef">http://www.nie.org/ontology/nietzsche#isClarificationOfWord</isClarificationOfWord>
 					<earlier_version type="URIRef">http://www.nie.org/ontology/nietzsche#wordHasEarlierVersion</earlier_version>
 					<corrections type="URIRef">http://www.nie.org/ontology/nietzsche#wordHasCorrection</corrections>
 					<deletion_paths type="URIRef">http://www.nie.org/ontology/nietzsche#wordIsDeletedByPath</deletion_paths>
 					<editor_comment type="URIRef">http://www.nie.org/ontology/nietzsche#wordHasEditorComment</editor_comment>
 				</properties>
 			</Word>
 			<WordDeletionPath>
 				<class_uri type="URIRef">http://www.nie.org/ontology/nietzsche#WordDeletionPath</class_uri>
 				<properties>
 					<d_attribute type="URIRef">http://www.nie.org/ontology/nietzsche#hasDAttribute</d_attribute>
 					<style type="URIRef">http://www.nie.org/ontology/nietzsche#hasStyle</style>
 				</properties>
 			</WordDeletionPath>
 			<WordInsertionMark>
 				<class_uri type="URIRef">http://www.nie.org/ontology/nietzsche#WordInsertionMark</class_uri>
 				<properties>
 					<height type="URIRef">http://www.nie.org/ontology/nietzsche#hasHeight</height>
 					<width type="URIRef">http://www.nie.org/ontology/nietzsche#hasWidth</width>
 					<left type="URIRef">http://www.nie.org/ontology/nietzsche#hasLeft</left>
 					<top type="URIRef">http://www.nie.org/ontology/nietzsche#hasTop</top>
 					<bottom type="URIRef">http://www.nie.org/ontology/nietzsche#hasBottom</bottom>
 					<transform_string type="URIRef">http://www.nie.org/ontology/nietzsche#hasTransform</transform_string>
 					<mark_type type="URIRef">http://www.nie.org/ontology/nietzsche#hasMarkType</mark_type>
 					<symbol_id type="URIRef">http://www.nie.org/ontology/nietzsche#hasSymbolId</symbol_id>
 					<next_word_id type="URIRef">http://www.nie.org/ontology/nietzsche#hasNextWord</next_word_id>
 					<previous_word_id type="URIRef">http://www.nie.org/ontology/nietzsche#hasPreviousWord</previous_word_id>
 					<line type="URIRef">http://www.nie.org/ontology/nietzsche#wordInsertionMarkBelongsToLine</line>
 				</properties>
 			</WordInsertionMark>
 		</classes>
 	</dict>
 	<metadata>
 		<type>xml-dictionary</type>
 		<createdBy>
 			<script>dict2xml</script>
-			<date>2021-02-26 15:46:21</date>
+			<date>2021-08-23 09:52:15</date>
 		</createdBy>
 	</metadata>
 </root>
Index: Friedrich-Nietzsche-late-work-ontology.ttl
===================================================================
--- Friedrich-Nietzsche-late-work-ontology.ttl	(revision 109)
+++ Friedrich-Nietzsche-late-work-ontology.ttl	(revision 110)
@@ -1,160 +1,167 @@
 @prefix dct: <http://purl.org/dc/terms/>.
-@prefix document: <http://e-editiones.ch/ontology/document#>.
+@prefix document: <https://www.e-editiones.ch/ontology/document#>.
 @prefix homotypic: <http://www.nie.org/ontology/homotypic#>.
 @prefix stoff: <http://www.nie.org/ontology/standoff#>.
 @prefix text: <http://www.e-editiones.ch/ontology/text#>.
 @prefix owl: <http://www.w3.org/2002/07/owl#>.
 @prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#>.
 @prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> .
 @prefix skos: <http://www.w3.org/2004/02/skos/core#> .
 @prefix xsd: <http://www.w3.org/2001/XMLSchema#>.
 
 @prefix tln: <http://www.nie.org/ontology/nietzsche#>.
 
 <http://www.nie.org/ontology/nietzsche>
 	a owl:Ontology;
 	dct:license <http://creativecommons.org/licenses/by/3.0/>;
 	dct:title "An ontology about the collected late works of Friedrich Nietzsche"@en;
 	dct:description """Formal description of specific concepts in the scientific study of Friedrich Nietzsches late work."""@en;
 	dct:creator "Dominique Steinbach, tool coordinator/software developer, NIE-INE/digital edition of der späte Nietzsche, Basel University, Switzerland"@en;
 	dct:contributor "Christian Steiner, software developer, digital edition of der späte Nietzsche, University of Basel, Switzerland"@en;
    dct:publisher "Basel University, Switzerland"@en.
 
 tln:TextGenesis a owl:Class ;
     rdfs:label "identifies a genetic order of text versions"@en ;
     rdfs:comment "Identifies a genetic order of text versions, i.e. groups text units as earlier and later versions of each other."@en ;
     rdfs:isDefinedBy <http://www.nie.org/ontology/nietzsche> .
    
 tln:IdentifiedTextVersion a owl:Class ;
     rdfs:label "identifies a list of text unities as a text version"@en ;
     rdfs:comment "Identification of a list of text unities (e.g. pages or parts of pages) as a text version for which there is an earlier or later version."@en ;
     rdfs:isDefinedBy <http://www.nie.org/ontology/nietzsche> .
 
 tln:PartOfPageTextUnit a owl:Class ;
     rdfs:label "identifies a part of a page as a text unity"@en ;
     rdfs:comment "Identification of a part of page as a text unity."@en ;
     rdfs:isDefinedBy <http://www.nie.org/ontology/nietzsche> ;
     rdfs:subClassOf [ a owl:Restriction ;
             owl:cardinality "1"^^xsd:nonNegativeInteger ;
             owl:onProperty tln:belongsToPage ],
         [ a owl:Restriction ;
             owl:cardinality "1"^^xsd:nonNegativeInteger ;
             owl:onProperty tln:startLine ], 
          [ a owl:Restriction ;
             owl:cardinality "1"^^xsd:nonNegativeInteger ;
             owl:onProperty tln:endLine ] .
 
 tln:ExternalTextUnit a owl:Class ;
     rdfs:label "a list text unit that has been published external to the digital edition"@en ;
     rdfs:comment "A text unit that has been published external to the digital edition."@en ;
     rdfs:isDefinedBy <http://www.nie.org/ontology/nietzsche> ;
     rdfs:subClassOf tln:IdentifiedTextVersion .
 
 tln:Page a owl:Class ;
     rdfs:subClassOf document:Page .
 
 tln:belongsToPage a owl:ObjectProperty ;
     rdfs:label "relates a part of a page with the page it is a part of"@en ;
     rdfs:comment "Relates a part of a page with the page it is a part of."@en ;
     rdfs:isDefinedBy <http://www.nie.org/ontology/nietzsche> ;
     rdfs:domain tln:PartOfPageTextUnit ;
     rdfs:range tln:Page.
 
 tln:startLine a owl:ObjectProperty ;
     rdfs:label "relates a part of a page with the line it starts with"@en ;
     rdfs:comment "Relates a part of a page with the line it starts with."@en ;
     rdfs:isDefinedBy <http://www.nie.org/ontology/nietzsche> ;
     rdfs:domain tln:PartOfPageTextUnit ;
     rdfs:range tln:Line.
 
 tln:endLine a owl:ObjectProperty ;
     rdfs:label "relates a part of a page with the line it ends with"@en ;
     rdfs:comment "Relates a part of a page with the line it ends with."@en ;
     rdfs:isDefinedBy <http://www.nie.org/ontology/nietzsche> ;
     rdfs:domain tln:PartOfPageTextUnit ;
     rdfs:range tln:Line.
 
 tln:identifiesAsVersion a owl:ObjectProperty ;
     rdfs:label "groups a list of text unities together as a identified text version"@en ;
     rdfs:comment "Groups a list of text unities together as a identified text version for which there is an ealier or later version."@en ;
     rdfs:isDefinedBy <http://www.nie.org/ontology/nietzsche> ;
     rdfs:domain tln:IdentifiedTextVersion ;
     rdfs:range rdf:List.
 
 tln:hasGeneticOrder a owl:ObjectProperty ;
     rdfs:label "relates a list of text versions to an identified genetic order"@en ;
     rdfs:comment "Relates a list of text versions to an identified genetic order. The position in the list determines the version of a text unit."@en ;
     rdfs:isDefinedBy <http://www.nie.org/ontology/nietzsche> ;
     rdfs:domain tln:TextGenesis ;
     rdfs:range rdf:List.
 
 tln:textUnitHasTitle a owl:ObjectProperty ;
     rdfs:label "relates a external published text unit with a title"@en ;
     rdfs:comment "Relates a external published text unit with a title by which it can be identified."@en ;
     rdfs:isDefinedBy <http://www.nie.org/ontology/nietzsche> ;
     rdfs:domain tln:ExternalTextUnit ;
     rdfs:range xsd:string .
 
 tln:textUnitHasUrl a owl:ObjectProperty ;
     rdfs:label "relates a external published text unit with a URL"@en ;
     rdfs:comment "Relates a external published text unit with a URL by which it can be visited."@en ;
     rdfs:isDefinedBy <http://www.nie.org/ontology/nietzsche> ;
     rdfs:domain tln:ExternalTextUnit ;
     rdfs:range xsd:anyURI .
 
 tln:hasImage a owl:ObjectProperty ;
     rdfs:label "relates a page to a image"@en ;
     rdfs:comment "relates a page to an image that has a textfield that specifies the area where the writing that constitutes the page can be found."@en ;
     rdfs:isDefinedBy <http://www.nie.org/ontology/nietzsche> ;
     rdfs:domain tln:Page ;
     rdfs:range tln:Image .
 
 tln:hasUrl a owl:DatatypeProperty ;
     rdfs:label "has Url"@en ;
     rdfs:domain tln:Image ;
     rdfs:isDefinedBy <http://www.nie.org/ontology/nietzsche> ;
     rdfs:range xsd:anyURI .
 
-tln:inheritOverwritesWord a owl:ObjectProperty ;
-   rdfs:subPropertyOf tln:overwritesWord;
-   rdfs:label "word overwrites word (inherited from tln:wordHasCorrection)"@en ;
-   rdfs:comment "The author has used this word in order to overwrite that word."@en ;
-   rdfs:isDefinedBy <http://www.nie.org/ontology/nietzsche> ;
-   owl:propertyChainAxiom  ( tln:wordHasCorrection tln:overwritesWord ).
+#tln:inheritOverwritesWord a owl:ObjectProperty ;
+#   rdfs:subPropertyOf tln:overwritesWord;
+#   rdfs:label "word overwrites word (inherited from tln:wordHasCorrection)"@en ;
+#   rdfs:comment "The author has used this word in order to overwrite that word."@en ;
+#   rdfs:isDefinedBy <http://www.nie.org/ontology/nietzsche> ;
+#   owl:propertyChainAxiom  ( tln:wordHasCorrection tln:overwritesWord ).
+
+tln:hasStandoffMarkup4PartThatOverwritesWord a owl:ObjectProperty ;
+    rdfs:label "word has standoff markup for the part that overwrites a word"@en ;
+    rdfs:comment "word has standoff markup that highlights the part of its text that overwrites a word"@en ;
+    rdfs:isDefinedBy <http://www.nie.org/ontology/nietzsche> ;
+    rdfs:domain tln:Word ;
+    rdfs:range stoff:StandoffMarkup .
 
 tln:lineContinuesOn a owl:ObjectProperty ;
     rdfs:label "writing from subject line continues on object line"@en ;
     rdfs:comment "the writing that ends on subject line continues on object line"@en ;
     rdfs:isDefinedBy <http://www.nie.org/ontology/nietzsche> ;
     rdfs:domain tln:Line ;
     rdfs:range tln:Line .
 
 tln:pageIsOnTextField a owl:ObjectProperty ;
     rdfs:label "page is on text field"@en ;
     rdfs:comment "the writing that is referred to as subject can be found on object"@en ;
     rdfs:isDefinedBy <http://www.nie.org/ontology/nietzsche> ;
     rdfs:domain tln:Page ;
     rdfs:range tln:TextField .
 
 tln:writingContinuesWithWord a owl:ObjectProperty ;
     rdfs:label "writing continues with next word"@en ;
     rdfs:isDefinedBy <http://www.nie.org/ontology/nietzsche> ;
     rdfs:domain tln:Word ;
     rdfs:range tln:Word .
 
 tln:selectableWordProperty a owl:ObjectProperty ;
     rdfs:label "a property of a word for which it can be selected"@en ;
     rdfs:isDefinedBy <http://www.nie.org/ontology/nietzsche> ;
     rdfs:domain tln:Word .
 
 tln:cardinalityGreaterOne a rdf:Property ;
     rdfs:label "whether a tln:selectableWordProperty can have a greater cardinality then one"@en ;
     rdfs:isDefinedBy <http://www.nie.org/ontology/nietzsche> ;
     rdfs:domain tln:selectableWordProperty ;
     rdfs:range xsd:boolean .
 
 tln:suggestedMaxCardinality a rdf:Property ;
     rdfs:label "the suggested max cardinaltiy of a tln:selectableWordProperty on a word"@en ;
     rdfs:isDefinedBy <http://www.nie.org/ontology/nietzsche> ;
     rdfs:domain tln:selectableWordProperty ;
     rdfs:range xsd:nonNegativeInteger .