Index: svgscripts/extractWordPosition.py
===================================================================
--- svgscripts/extractWordPosition.py	(revision 110)
+++ svgscripts/extractWordPosition.py	(revision 111)
@@ -1,562 +1,710 @@
 #!/usr/bin/env python3
 # -*- coding: utf-8 -*-
 
 """   This program can be used to extract the position of the words in a svg file and write them to a xml file.
 """
 #    Copyright (C) University of Basel 2019  {{{1
 #
 #    This program is free software: you can redistribute it and/or modify
 #    it under the terms of the GNU General Public License as published by
 #    the Free Software Foundation, either version 3 of the License, or
 #    (at your option) any later version.
 #
 #    This program is distributed in the hope that it will be useful,
 #    but WITHOUT ANY WARRANTY; without even the implied warranty of
 #    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 #    GNU General Public License for more details.
 #
 #    You should have received a copy of the GNU General Public License
 #    along with this program.  If not, see <https://www.gnu.org/licenses/> 1}}}
 
 import inspect
 import getopt
 from lxml import etree as ET
 from os import sep, listdir, mkdir, path
 from os.path import exists, isfile, isdir
 from progress.bar import Bar
 import re
 import sys
 import warnings
 
 from datatypes.lineNumber import LineNumber
 from datatypes.matrix import Matrix
 from datatypes.page_creator import PageCreator, FILE_TYPE_SVG_WORD_POSITION, FILE_TYPE_XML_MANUSCRIPT
 from datatypes.pdf import PDFText
+from datatypes.positional_word_part import PositionalWordPart
 from datatypes.transkriptionField import TranskriptionField
 from datatypes.transkription_position import TranskriptionPosition
 from datatypes.word import Word
 from datatypes.word_insertion_mark import WordInsertionMark
 from util import process_warnings4status, reset_tp_with_matrix
 
 sys.path.append('shared_util')
 from myxmlwriter import write_pretty
 
 __author__ = "Christian Steiner"
 __maintainer__ = __author__
 __copyright__ = 'University of Basel'
 __email__ = "christian.steiner@unibas.ch"
 __status__ = "Development"
 __license__ = "GPL v3"
 __version__ = "0.0.1"
 
 class Extractor:
     """
     This class can be used to extract the word positions in a svg file and write it to a xml file.
 
     Args:
         [xml_dir (str): target directory]
         [title (str): title of document]
         [manuscript_file (str): xml file containing information about the archival unity to which the current page belongs
     """
     UNITTESTING = False
     SONDERZEICHEN_LIST = [ 'A', 'B', '{', '}' ]
     SET_POSITIONS_TO_TEXTFIELD_0_0 = False
 
     def __init__(self, xml_dir=None, title=None, manuscript_file=None, compare2pdf=False):
         if bool(xml_dir):
             self.xml_dir = xml_dir
             not isdir(self.xml_dir) and mkdir(self.xml_dir)
         else:
             self.xml_dir = 'xml' if(isdir('xml')) else '' 
         self.latest_status = None
         self.compare2pdf = compare2pdf
         self.xml_dir = self.xml_dir + sep if(bool(self.xml_dir)) else ''
         self.title = title
         self.manuscript_file = manuscript_file
         self.manuscript_tree = None
         self.svg_tree = None
         if not bool(self.title) and bool(self.manuscript_file) and isfile(self.manuscript_file):
             self.manuscript_tree = ET.parse(self.manuscript_file)
             self.title = self.manuscript_tree.getroot().get('title')
         elif bool(self.manuscript_file):
             raise FileNotFoundError('File "{}" does not exist!'.format(self.manuscript_file))
         elif bool(self.title):
             self.update_title_and_manuscript(self.title, False)
 
+    def _get_pwps_break_points(self, page, pwps) ->list:
+        """Return a list of break points from word_part_objs.
+        """
+        break_points = []
+        if(len(page.sonderzeichen_list) > 0): # check for Sonderzeichen and special chars -> mark for word insertion, create break points
+            for Sonderzeichen in self.SONDERZEICHEN_LIST:
+                sonderzeichen_pwps = [ pwp for pwp in pwps if pwp.text == Sonderzeichen and any(sz in pwp.style_class for sz in page.sonderzeichen_list) ]
+                if len(sonderzeichen_pwps) > 0:
+                    break_points += [ (endPoint, endPoint + 1) for endPoint in  [i for i, e in enumerate(sonderzeichen_pwps)  ]] 
+                    for pwp in sonderzeichen_pwps:
+                        wim_index = len(page.word_insertion_marks)
+                        wim = WordInsertionMark(id=wim_index, x=pwp.left, y=pwp.top-pwp.height, height=pwp.height, width=pwp.width,\
+                            line_number=page.get_line_number(pwp.top-pwp.height-1), mark_type=Sonderzeichen)
+                        page.word_insertion_marks.append(wim)
+        if(bool(re.search(r'\d[A-Za-z]', self.get_word_from_pwps(pwps)))): # case: digits from line number and chars from words -> create break points
+            THRESHOLDX = 20 # Threshold between line number and text
+            last_x = -1
+            for i, x in enumerate([float(pwp.left) for pwp in pwps]):
+                if(last_x > -1 and (x - last_x > THRESHOLDX)):
+                    break_points.append((i, i))
+                last_x = x
+        return break_points
+
     def _get_break_points(self, page, word_part_objs, transkription_field=None) ->list:
         """Return a list of break points from word_part_objs.
         """
         break_points = []
         if(len(page.sonderzeichen_list) > 0): # check for Sonderzeichen and special chars -> mark for word insertion, create break points
             for Sonderzeichen in self.SONDERZEICHEN_LIST:
                 contains_Sonderzeichen = [ dict['text'] == Sonderzeichen and any(sz in dict['class'] for sz in page.sonderzeichen_list) for dict in word_part_objs ]
                 if True in contains_Sonderzeichen:
                     break_points += [ (endPoint, endPoint + 1) for endPoint in  [i for i, e in enumerate(contains_Sonderzeichen) if e == True ]] 
                     for sz_point in [i for i, e in break_points]:
                         wim_index = len(page.word_insertion_marks)
                         x = float(word_part_objs[sz_point]['x'])
                         y = float(word_part_objs[sz_point]['y'])
                         if page.svg_file is not None and isfile(page.svg_file)\
                                 and (not self.SET_POSITIONS_TO_TEXTFIELD_0_0 or transkription_field is not None):
                             svg_path_tree = ET.parse(page.svg_file)
                             namespaces = { k if k is not None else 'ns': v for k, v in svg_path_tree.getroot().nsmap.items() }
                             xmin = 0 if not self.SET_POSITIONS_TO_TEXTFIELD_0_0 else transkription_field.xmin
                             ymin = 0 if not self.SET_POSITIONS_TO_TEXTFIELD_0_0 else transkription_field.ymin
                             wim = WordInsertionMark.CREATE_WORD_INSERTION_MARK(svg_path_tree, namespaces, id=wim_index, x=x, y=y, xmin=xmin, ymin=ymin,\
                                     line_number=page.get_line_number(y-1), mark_type=Sonderzeichen)
                             page.word_insertion_marks.append(wim)
         if(bool(re.search(r'\d[A-Za-z]', self.get_word_from_part_obj(word_part_objs)))): # case: digits from line number and chars from words -> create break points
             THRESHOLDX = 20 # Threshold between line number and text
             last_x = -1
             for i, x in enumerate([float(dict['x']) for dict in word_part_objs]):
                 if(last_x > -1 and (x - last_x > THRESHOLDX)):
                     break_points.append((i, i))
                 last_x = x
         return break_points
 
+    def _process_pwps_break_points(self, break_points, page, index, pwps) ->int:
+        """Process break points on pwps and return new index.
+        """
+        from_index = 0
+        debug_msg = 'process break points'
+        for end_point, next_from_index in break_points:
+            new_pwps = pwps[from_index:end_point]
+            from_index = next_from_index
+            index = self.create_word_from_pwps(page, index, new_pwps, debug_msg=debug_msg)
+        if from_index > 0 and from_index < len(pwps):
+            new_pwps = pwps[from_index:]
+            index = self.create_word_from_pwps(page, index, new_pwps, debug_msg=debug_msg + ' ... end point')
+        if len(page.words) > 1\
+           and re.match(r'[^\w\s]', page.words[-1].text):
+            last_word = page.words.pop()
+            page.words[-1].join(last_word)
+            return last_word.id
+        return index
+
     def _process_break_points(self, break_points, page, index, word_part_objs, endSign, endX, matrix=None, debug_msg=None, transkription_field=None) ->int:
         """Process break points on word_part_objs and return new index.
         """
         from_index = 0
         for end_point, next_from_index in break_points:
             new_word_part_objs = word_part_objs[from_index:end_point]
             new_endX = word_part_objs[end_point]['x']
             from_index = next_from_index
             index = self.add_word(page, index, new_word_part_objs, None, new_endX, matrix=matrix, debug_msg=debug_msg, transkription_field=transkription_field)
         if from_index > 0 and from_index < len(word_part_objs):
             new_word_part_objs = word_part_objs[from_index:]
             index = self.add_word(page, index, new_word_part_objs, endSign, endX, matrix=matrix, debug_msg=debug_msg, transkription_field=transkription_field)
         return index
 
     def add_word(self, page, index, word_part_objs, endSign, endX, matrix=None, debug_msg=None, transkription_field=None) ->int:
         """Creates transkription_positions and a new word from word_part_objs (i.e. a list of dictionaries about parts of this word).
             If word contains a Sonderzeichen as specified by self.SONDERZEICHEN_LIST, word_part_objs will be split and several words are created.
 
             :returns: the new word counter (int)
         """
         break_points = self._get_break_points(page, word_part_objs, transkription_field=transkription_field)
         if(len(break_points) > 0): # if there are break points -> split word_part_obj and add the corresponding words
             return self._process_break_points(break_points, page, index, word_part_objs, endSign, endX, matrix=matrix, debug_msg=debug_msg, transkription_field=transkription_field)
         else:
             if len(word_part_objs) > 0:
                 provide_tf = None if not self.SET_POSITIONS_TO_TEXTFIELD_0_0 else transkription_field
                 transkription_positions = TranskriptionPosition.CREATE_TRANSKRIPTION_POSITION_LIST(page, word_part_objs, matrix=matrix,\
                         debug_msg_string=debug_msg, transkription_field=provide_tf, svg_path_tree=self.svg_tree)
                 text = self.get_word_from_part_obj(word_part_objs)
                 line_number = page.get_line_number((transkription_positions[0].bottom+transkription_positions[0].top)/2)
                 if line_number == -1:
                     if transkription_positions[0].transform is not None:
                         line_number = page.get_line_number(transkription_positions[0].transform.getY())
                     if line_number == -1 and len(page.words) > 0:
                         lastWord = page.words[-1]
                         lastWord_lastTP = lastWord.transkription_positions[-1]
                         lastTP = transkription_positions[-1]
                         if transkription_positions[0].left > lastWord_lastTP.left\
                                 and abs(lastWord_lastTP.bottom-lastTP.bottom) < lastTP.height/2:
                             line_number = lastWord.line_number
                         else:
                             line_number = lastWord.line_number+1
                 #reset_tp_with_matrix(transkription_positions)
                 newWord = Word(id=index, text=text, line_number=line_number, transkription_positions=transkription_positions)
                 page.words.append(newWord)
                 return int(index) + 1
             else:
                 return int(index)
 
+    def create_word_from_pwps(self, page, index, pwps, debug_msg=None) ->int:
+        """Creates transkription_positions and a new word from pwps (i.e. a list of PositionalWordPart).
+            If word contains a Sonderzeichen as specified by self.SONDERZEICHEN_LIST, pwps will be split and several words are created.
+
+            :returns: the new word counter (int)
+        """
+        break_points = self._get_pwps_break_points(page, pwps)
+        if(len(break_points) > 0): # if there are break points -> split pwps and add the corresponding words
+            return self._process_pwps_break_points(break_points, page, index, pwps)
+        else:
+            if len(pwps) > 0:
+                transkription_positions = TranskriptionPosition.CREATE_TRANSKRIPTION_POSITION_LIST_FROM_PWPS(pwps, debug_msg_string=debug_msg)
+                text = self.get_word_from_pwps(pwps)
+                line_number = page.get_line_number((transkription_positions[0].bottom+transkription_positions[0].top)/2)
+                if line_number == -1:
+                    if transkription_positions[0].transform is not None:
+                        line_number = page.get_line_number(transkription_positions[0].transform.getY())
+                    if line_number == -1 and len(page.words) > 0:
+                        lastWord = page.words[-1]
+                        lastWord_lastTP = lastWord.transkription_positions[-1]
+                        lastTP = transkription_positions[-1]
+                        if transkription_positions[0].left > lastWord_lastTP.left\
+                                and abs(lastWord_lastTP.bottom-lastTP.bottom) < lastTP.height/2:
+                            line_number = lastWord.line_number
+                        else:
+                            line_number = lastWord.line_number+1
+                #reset_tp_with_matrix(transkription_positions)
+                newWord = Word(id=index, text=text, line_number=line_number, transkription_positions=transkription_positions)
+                page.words.append(newWord)
+                return int(index) + 1
+            else:
+                return int(index)
+
     def extractAndWriteInformation(self, file_name, page_number=None, xml_target_file=None, svg_file=None, pdfFile=None, record_warnings=False, warning_filter='default', multipage_index=-1, marginals_page=None):
         """Extracts information about positions of text elements and writes them to a xml file.
         """
         if isfile(file_name):
             if not bool(xml_target_file):
                 xml_target_file = self.get_file_name(file_name, page_number)
             if bool(self.xml_dir) and not bool(path.dirname(xml_target_file)):
                 xml_target_file = path.dirname(self.xml_dir) + sep + xml_target_file
             exit_status = 0
             with warnings.catch_warnings(record=record_warnings) as w:
                 warnings.simplefilter(warning_filter)
                 page = self.extract_information(file_name, page_number=page_number, xml_target_file=xml_target_file, svg_file=svg_file, pdfFile=pdfFile,\
                         multipage_index=multipage_index, marginals_page=marginals_page) 
                 status_message = process_warnings4status(w, [ PageCreator.WARNING_MISSING_USE_NODE4PWP, PageCreator.WARNING_MISSING_GLYPH_ID4WIM ],\
                         '', 'OK', 'with warnings')
                 if status_message != 'OK':
                     self.latest_status = status_message
                     exit_status = 1
                 else:
                     self.latest_status = None
                 page.page_tree.getroot().set('status', status_message)
             write_pretty(xml_element_tree=page.page_tree, file_name=xml_target_file, script_name=__file__, file_type=FILE_TYPE_SVG_WORD_POSITION)
             return exit_status 
         else:
             raise FileNotFoundError('\"{}\" is not an existing file!'.format(file_name))
 
     def extract_information(self, file_name, page_number=None, xml_target_file=None, svg_file=None, pdfFile=None, multipage_index=-1, marginals_page=None) -> PageCreator:
         """Extracts information about positions of text elements.
         """
         if isfile(file_name):
             if not bool(xml_target_file):
                 xml_target_file = self.get_file_name(file_name, page_number)
             if bool(self.xml_dir) and not bool(path.dirname(xml_target_file)):
                 xml_target_file = path.dirname(self.xml_dir) + sep + xml_target_file
             transkription_field = TranskriptionField(file_name, multipage_index=multipage_index) 
             text_field = transkription_field.convert_to_text_field()
             self.svg_tree = ET.parse(file_name) 
             page = PageCreator(xml_target_file, title=self.title, multipage_index=multipage_index,\
                     page_number=page_number, pdfFile=pdfFile, svg_file=svg_file,\
                     svg_text_field=text_field, source=file_name, marginals_source=marginals_page)
             sonderzeichen_list, letterspacing_list, style_dict = self.get_style(self.svg_tree.getroot())
             page.add_style(sonderzeichen_list=sonderzeichen_list, letterspacing_list=letterspacing_list, style_dict=style_dict)
             page.init_line_numbers(LineNumber.extract_line_numbers(self.svg_tree, transkription_field, set_to_text_field_zero=self.SET_POSITIONS_TO_TEXTFIELD_0_0),\
                     transkription_field.ymax) 
-            self.extract_word_position(self.svg_tree, page, transkription_field=transkription_field)
+            self.improved_extract_word_position(self.svg_tree, page, transkription_field=transkription_field)
             page.create_writing_processes_and_attach2tree()
             page.update_and_attach_words2tree()
             for word_insertion_mark in page.word_insertion_marks:
                 # it is not clear if we really need to know this alternative word ordering. See 'TODO.md'
                 #word_insertion_mark.inserted_words = self.find_inserted_words(page.page_tree, word_insertion_mark) 
                 word_insertion_mark.attach_object_to_tree(page.page_tree)
             return page
         else:
             raise FileNotFoundError('\"{}\" is not an existing file!'.format(file_name))
 
+    def improved_extract_word_position(self, svg_tree, page, transkription_field=None):
+        """Extracts word positions.
+        """
+        if page.svg_file is None or not isfile(page.svg_file):
+            warnings.warn('There is no page.svg_file or it does not exist ... using old function "extract_word_position"!')
+            self.extract_word_position(svg_tree, page, transkription_field=transkription_field)
+        else:
+            svg_path_tree = ET.parse(page.svg_file)
+            namespaces = { k if k is not None else 'ns': v for k, v in svg_path_tree.getroot().nsmap.items() }
+            counter = 0
+            word_part_obj = []
+            pwps = []
+            endSign = '%'
+            last_matrix = None
+            MAXBOTTOMDIFF = 5
+            MAXXDIFF = 11
+            INTERCHARSPACE = 1.1
+            if not Extractor.UNITTESTING:
+                bar = Bar('(improved) extracting word positions from text_item', max=len([*self.get_text_items(svg_tree.getroot(), transkription_field=transkription_field)]))
+            for text_item in self.get_text_items(svg_tree.getroot(), transkription_field=transkription_field):
+                current_matrix = Matrix(text_item.get('transform'))
+                # check for line breaks
+                if last_matrix is not None and len(pwps) > 0 and (\
+                        (current_matrix.getX() > pwps[-1].left+pwps[-1].width + INTERCHARSPACE or last_matrix.getX()-current_matrix.getX() > MAXXDIFF) or\
+                        (abs(current_matrix.getY() - last_matrix.getY()) > MAXBOTTOMDIFF)):
+                    endSign = '%'
+                    if(self.get_word_from_pwps(pwps) != ''):
+                        debug_msg = 'check for line breaks, diffx: {}, diffy: {}, current_matrix: {}, last_matrix: {}'.format(\
+                                round(current_matrix.getX()  - (pwps[-1].left+pwps[-1].width), 3), round(abs(current_matrix.getY() - last_matrix.getY()), 3),\
+                                current_matrix.toString(), last_matrix.toString())
+                        counter = self.create_word_from_pwps(page, counter, pwps, debug_msg=debug_msg)
+                    pwps = []
+                endX = current_matrix.getX()
+                if(len(text_item.findall(".//tspan", svg_tree.getroot().nsmap)) < 1): # case: <svg><text>TEXT
+                    if(bool(text_item.text) and not bool(re.search(r'^\s*$', text_item.text))):
+                        x = current_matrix.getX() if not current_matrix.isRotationMatrix() else 0.0
+                        y = current_matrix.getY() if not current_matrix.isRotationMatrix() else 0.0
+                        pwps += PositionalWordPart.CREATE_POSITIONAL_WORD_PART_LIST({ "text": text_item.text, "x": x, "y": y, "class": text_item.get('class'), "matrix": current_matrix},\
+                                svg_path_tree, namespaces, page=page)
+                    else:
+                        if(self.get_word_from_pwps(pwps) != ''):
+                            counter = self.create_word_from_pwps(page, counter, pwps, debug_msg="text: next string empty")  
+                        pwps = []
+                for tspan_item in text_item.findall(".//tspan", svg_tree.getroot().nsmap): # case: <svg><text><tspan>TEXT
+                    endX = current_matrix.add2X(tspan_item.get('x'))
+                    if(tspan_item.text != None and tspan_item.text != '' and not bool(re.search(r'^\s*$', tspan_item.text))):
+                        y = current_matrix.add2Y(tspan_item.get('y'))
+                        pwps += PositionalWordPart.CREATE_POSITIONAL_WORD_PART_LIST({ "text": tspan_item.text, "x": endX, "y": y, "class": tspan_item.get('class'),\
+                                "matrix": current_matrix }, svg_path_tree, namespaces, page=page)
+                        if len(set(page.letterspacing_list) & set(tspan_item.get('class').split(' '))) > 0:  
+                            """text_item has letterspacing class 
+                            (set s & set t = new set with elements common to s and t)
+                            """
+                            if(self.get_word_from_pwps(pwps) != ''):
+                                counter = self.create_word_from_pwps(page, counter, pwps, debug_msg="letterspacing class")  
+                            pwps = []
+                    else:
+                        if(self.get_word_from_pwps(pwps) != ''):
+                            counter = self.create_word_from_pwps(page, counter, pwps, debug_msg="tspan: next string empty")
+                        pwps = []
+                last_matrix = current_matrix
+                not bool(Extractor.UNITTESTING) and bar.next()
+            if(self.get_word_from_pwps(pwps) != ''):
+                counter = self.create_word_from_pwps(page, counter, pwps, debug_msg='end of loop')
+            pwps = []
+            not bool(Extractor.UNITTESTING) and bar.finish()
+
     def extract_word_position(self, svg_tree, page, transkription_field=None):
         """Extracts word positions.
         """
         counter = 0
         word_part_obj = []
         endSign = '%'
         last_matrix = None
         MAXBOTTOMDIFF = 5
         MAXXDIFF = 6
         if not Extractor.UNITTESTING:
             bar = Bar('extracting word positions from text_item', max=len([*self.get_text_items(svg_tree.getroot(), transkription_field=transkription_field)]))
         for text_item in self.get_text_items(svg_tree.getroot(), transkription_field=transkription_field):
             provide_tf = None if not self.SET_POSITIONS_TO_TEXTFIELD_0_0 else transkription_field
             current_matrix = Matrix(text_item.get('transform'), transkription_field=provide_tf)
             # check for line breaks
             if (last_matrix is not None and len(word_part_obj) > 0 and (\
                     Matrix.DO_CONVERSION_FACTORS_DIFFER(last_matrix, current_matrix) or\
                     (abs(current_matrix.getY() - last_matrix.getY()) > MAXBOTTOMDIFF) or\
                     (abs(current_matrix.getX() - word_part_obj[len(word_part_obj)-1]['x']) > MAXXDIFF)))\
                     or (len(word_part_obj) > 0 and self.get_word_object_multi_char_x(word_part_obj[0]) > current_matrix.getX()):
                 endSign = '%'
                 if(self.get_word_from_part_obj(word_part_obj) != ''):
                     debug_msg = 'check for line breaks, diffx: {}, diffy: {}, diff_conversion_matrix: {}'.format(\
                             round(abs(current_matrix.getX()  - word_part_obj[len(word_part_obj)-1]['x']), 3), round(abs(current_matrix.getY() - last_matrix.getY()), 3),\
                             str(Matrix.DO_CONVERSION_FACTORS_DIFFER(last_matrix, current_matrix)))
                     counter = self.add_word(page, counter, word_part_obj, endSign, endX, matrix=last_matrix, debug_msg=debug_msg, transkription_field=transkription_field)
                 word_part_obj = []
             endX = current_matrix.getX()
             if(len(text_item.findall(".//tspan", svg_tree.getroot().nsmap)) < 1): # case: <svg><text>TEXT
                 if(bool(text_item.text) and not bool(re.search(r'^\s*$', text_item.text))):
                     word_part_obj.append( { "text": text_item.text, "x": current_matrix.getX(), "y": current_matrix.getY(), "class": text_item.get('class'), "matrix": current_matrix} )
                 else:
                     endSign = text_item.text
                     if(self.get_word_from_part_obj(word_part_obj) != ''):
                         counter = self.add_word(page, counter, word_part_obj, endSign, endX, matrix=last_matrix, debug_msg='svg/text/\s', transkription_field=transkription_field)  
                     word_part_obj = []
                     endSign = '%'
             for tspan_item in text_item.findall(".//tspan", svg_tree.getroot().nsmap): # case: <svg><text><tspan>TEXT
                 endX = current_matrix.add2X(tspan_item.get('x'))
                 if(tspan_item.text != None and tspan_item.text != '' and not bool(re.search(r'^\s*$', tspan_item.text))):
                     y = current_matrix.add2Y(tspan_item.get('y'))
                     word_part_obj.append( { "text": tspan_item.text, "x": endX, "y": y, "class": tspan_item.get('class'), "matrix": current_matrix })
                     if len(set(page.letterspacing_list) & set(tspan_item.get('class').split(' '))) > 0:  
                         """text_item has letterspacing class 
                         (set s & set t = new set with elements common to s and t)
                         """
                         endSign = '%'
                         if(self.get_word_from_part_obj(word_part_obj) != ''):
                             counter = self.add_word(page, counter, word_part_obj, endSign, endX, matrix=current_matrix,\
                                     debug_msg='tspan with letterspacing', transkription_field=transkription_field)  
                         word_part_obj = []
                 else:
                     endSign = tspan_item.text
                     if(self.get_word_from_part_obj(word_part_obj) != ''):
                         counter = self.add_word(page, counter, word_part_obj, endSign, endX, matrix=current_matrix,\
                                 debug_msg='svg/text/tspan/\s', transkription_field=transkription_field)
                     word_part_obj = []
                     endSign = '%'
             last_matrix = current_matrix
             not bool(Extractor.UNITTESTING) and bar.next()
         if(self.get_word_from_part_obj(word_part_obj) != ''):
             counter = self.add_word(page, counter, word_part_obj, endSign, endX, matrix=current_matrix, debug_msg='end of loop',\
                     transkription_field=transkription_field)
         word_part_obj = []
         endSign = '%'
         not bool(Extractor.UNITTESTING) and bar.finish()
 
     def find_inserted_words_by_position(self, target_tree, x, y):
         """Returns an Array with the words that are inserted above the x, y position or [] if not found.
         """
         warnings.warn('Function "find_inserted_words_by_position" does not work and it is not clear whether we need this.')
         MINY = 31.0
         MAXY = 10.0
         DIFFX = 9.0
         if(len(target_tree.getroot().xpath('//word[@id]')) > 0):
             result_list = []
             minus2left = 20.0
             minus2top = 19.0
             while len(result_list) == 0 and minus2top < MINY and minus2left > DIFFX :
                 result_list = [ Word.CREATE_WORD(item) for item in target_tree.getroot().xpath(\
                         '//word[@top>{0} and @top<{1} and @left>{2} and @left<{3}]'.format(y - minus2top, y - MAXY, x - minus2left, x + DIFFX)) ]
                 minus2left -= 1
                 minus2top  += 1
             if len(result_list) > 0:
                 result_bottom = result_list[len(result_list)-1].bottom
                 result_left_min = result_list[len(result_list)-1].left + result_list[len(result_list)-1].width
                 for item in target_tree.getroot().xpath('//word[@bottom={0} and @left>{1}]'.format(result_bottom, result_left_min)):
                     result_left_min = result_list[len(result_list)-1].left + result_list[len(result_list)-1].width
                     result_left_max = result_left_min + DIFFX
                     if float(item.get('left')) - result_left_max < DIFFX:
                         result_list.append(Word.CREATE_WORD(item))
                     else:
                         break
             return result_list 
         else:
             return []
 
     def find_inserted_words(self, target_tree, word_insertion_mark):
         """Returns an Array with the words that are inserted above/underneath the word_insertion_mark.
 
         """
         warnings.warn('Function "find_inserted_words" does not work and it is not clear whether we need this.')
         if word_insertion_mark.line_number < 2 or word_insertion_mark.line_number % 2 == 1:
             return self.find_inserted_words_by_position(target_tree, word_insertion_mark.x, word_insertion_mark.y)
         if(len(target_tree.getroot().xpath('//word[@id]')) > 0):
             MINY = 31.0
             MAXY = 10.0
             DIFFX = 9.0
             result_list = []
             x = word_insertion_mark.x
             y = word_insertion_mark.y
             if word_insertion_mark.mark_type != 'B': # all insertions that are above the current line
                 line_number = word_insertion_mark.line_number - 1 
                 words_on_line = [ Word.CREATE_WORD(item) for item in target_tree.getroot().xpath(\
                                 '//word[@line-number={0}]'.format(line_number)) ]
                 if len(words_on_line) > 0:
                     minus2top = 1.0
                     while len(result_list) == 0 and minus2top < MINY:
                         for word in words_on_line:
                             for transkription_position in word.transkription_positions:
                                 if transkription_position.top > y - minus2top\
                                         and transkription_position.left > x - DIFFX\
                                         and transkription_position.left < x + DIFFX:
                                     result_list.append(word)
                                     break
                         minus2top  += 1
             elif word_insertion_mark.mark_type == 'B': # B means insertion is underneath the current line
                 line_number = word_insertion_mark.line_number + 1
                 words_on_line = [ Word.CREATE_WORD(item) for item in target_tree.getroot().xpath(\
                                 '//word[@line-number={0}]'.format(line_number)) ]
                 if len(words_on_line) > 0:
                     plus2top = 1.0
                     while len(result_list) == 0 and plus2top < MINY :
                         for word in words_on_line:
                             for transkription_position in word.transkription_positions:
                                 if transkription_position.top > y + plus2top\
                                         and transkription_position.left > x - DIFFX\
                                         and transkription_position.left < x + DIFFX:
                                     result_list.append(word)
                                     break
                         plus2top  += 1
             if len(result_list) > 0: # now, collect more words that are right of already collected words
                 result_bottom = result_list[len(result_list)-1].transkription_positions[0].bottom
                 result_left_min = result_list[len(result_list)-1].transkription_positions[0].left\
                         + result_list[len(result_list)-1].transkription_positions[0].width
                 for item in target_tree.getroot().xpath(\
                         '//word[@line-number={0} and @bottom>{1} and @bottom<{2} and @left>{3}]'.format(line_number, result_bottom-5, result_bottom+5, result_left_min)):
                     result_left_min = result_list[len(result_list)-1].transkription_positions[0].left\
                             + result_list[len(result_list)-1].transkription_positions[0].width
                     result_left_max = result_left_min + DIFFX
                     if float(item.get('left')) - result_left_max < DIFFX:
                         result_list.append(Word.CREATE_WORD(item))
                     else:
                         break
             return result_list 
         else:
             return []
 
     def get_file_name(self, file_name, page_number=None):
         """Returns the file_name of the target xml file.
         """
         dir_name = path.dirname(self.xml_dir) + sep if(bool(self.xml_dir)) else ''
         if bool(self.title):
             return dir_name + self.title.replace(' ', '_') + '_page' + self.get_page_number(file_name, page_number=page_number) + '.xml'
         else:
             return '{}{}'.format(dir_name, path.basename(file_name).replace('.svg', '.xml'))
 
     def get_page_number(self, file_name, page_number=None):
         """ Returns page number as a string (with leading zero(s) if len(page_number) < 3).
         """
         if not bool(page_number) and bool(re.search(r'\d', file_name)):
             """if page_number=None and filename contains digits,
                 then split filename into its parts that contain only digits, remove empty strings
                 and return the last part containing only digits.
             """
             page_number = list(filter(lambda x: x != '', re.split(r'\D+', file_name))).pop()
         if bool(page_number):
             leading_zeros = '00' if(len(page_number) == 1) else '0' if(len(page_number) == 2) else ''
             return leading_zeros + str(page_number)
         else:
             return ''
 
     def get_style(self, etree_root):
         """Returns the style specification as a dictionary.
 
             :returns: 
                 sonderzeichen_list:     list of keys for classes that are 'Sonderzeichen'
                 style_dict:             dictionary: key = class name (str), value = style specification (dictionary)
         """
         style_dict = {}
         sonderzeichen_list = []
         letterspacing_list = []
         style = etree_root.find('style', etree_root.nsmap)
         if style is not None:
             for style_item in list(filter(lambda x: x != '', style.text.split("\n\t"))):
                 style_key = style_item.split('{')[0].replace('.', '')
                 style_value_dict = {  item.split(':')[0]: item.split(':')[1].replace('\'','') \
                     for item in list(filter(lambda x: x!= '', style_item.split('{')[1].replace('}', '').replace('\n','').split(';')))}
                 style_dict[style_key] = style_value_dict
                 if bool(style_value_dict.get('font-family')) and 'Sonderzeichen' in style_value_dict.get('font-family'):
                     sonderzeichen_list.append(style_key)
                 if bool(style_value_dict.get('letter-spacing')):
                     letterspacing_list.append(style_key)
         return sonderzeichen_list, letterspacing_list, style_dict
 
     def get_text_items(self, tree_root, transkription_field=None):
         """Returns all text elements with a matrix or (if transkription_field is specified) 
         all text elements that are located inside the transkription field.
         """
         if transkription_field is not None:
             return filter(lambda x: Matrix.IS_PART_OF_TRANSKRIPTION_FIELD(transkription_field, text_node=x),\
                     tree_root.iterfind(".//text", tree_root.nsmap))
         else:
             return tree_root.iterfind(".//text", tree_root.nsmap)
 
+    def get_word_from_pwps(self, pwps):
+        """Extracts all 'text' from a list of dicitonaries and concats it to a string.
+        """
+        return ''.join([ pwp.text for pwp in pwps ])
+
     def get_word_from_part_obj(self, word_part_obj):
         """Extracts all 'text' from a list of dicitonaries and concats it to a string.
         """
         return ''.join([ dict['text'] for dict in word_part_obj])
 
     def get_word_object_multi_char_x(self, word_part_obj_dict):
         """Returns the x of the last char of word_part_object.
 
         TODO: get real widths from svg_file!!!
         """
         WIDTHFACTOR = 2.6
         return word_part_obj_dict['x'] if len(word_part_obj_dict['text']) < 2 else word_part_obj_dict['x'] + len(word_part_obj_dict['text']) * WIDTHFACTOR
 
     def update_title_and_manuscript(self, title, update_manuscript=True):
         """Updates title and manuscript.
         """
         self.title = title
         if update_manuscript or not bool(self.manuscript_file):
             self.manuscript_file = self.xml_dir + self.title.replace(' ', '_') + '.xml'
         if not isfile(self.manuscript_file):
             self.manuscript_tree = ET.ElementTree(ET.Element('manuscript', attrib={"title": self.title}))
             write_pretty(xml_element_tree=self.manuscript_tree, file_name=self.manuscript_file, script_name=__file__, file_type='xmlManuscriptFile')
 
 def usage():
     """prints information on how to use the script
     """
     print(main.__doc__)
 
 def main(argv):
     """This program can be used to extract the position of the words in a svg file and write them to a xml file. 
 
     svgscripts/extractWordPosition.py [OPTIONS] <file|dir>
 
         <file>                              svg file OR xml target file containing file name of svg file as "/page/@source".
         <dir>                               directory containing svg files
 
         OPTIONS:
         -h|--help:                          show help
         -c|--compare-to-pdf                 compare words to pdf and autocorrect 
         -d|--xml-dir=xmlDir:                target directory for the xml output file(s)
         -m|--manuscript-file:               xml file containing information about the archival order to which the current page(s) belong(s)
         -p|--page=pageNumber:               page number of the current page. For use with _one_ file only.
         -P|--PDF=pdfFile:                   pdf file - used for word correction
         -s|--svg=svgFile:                   svg web file
         -t|--title=title:                   title of the manuscript to which the current page(s) belong(s)
         -x|--xml-target-file=xmlOutputFile: xml target file 
 
         :return: exit code (int)
     """
     compare2pdf = True
     manuscript_file = None
     page_number = None
     pdfFile = None
     svg_file = None
     title = None
     xml_target_file = None
     xml_dir = ".{}xml".format(sep)
 
     try:
         opts, args = getopt.getopt(argv, "hcd:m:t:p:s:x:P:", ["help", "compare-to-pdf", "xml-dir=", "manuscript-file=", "title=", "page=", "svg=", "xml-target-file=", "PDF="])
     except getopt.GetoptError:
         usage()
         return 2
 
     for opt, arg in opts:
         if opt in ('-h', '--help') or not args:
             usage()
             return 0
         elif opt in ('-c', '--compare-to-pdf'):
             compare2pdf = True
         elif opt in ('-d', '--xml-dir'):
             xml_dir = arg
         elif opt in ('-m', '--manuscript-file'):
             manuscript_file = arg
         elif opt in ('-t', '--title'):
             title = arg
         elif opt in ('-p', '--page'):
             page_number = str(arg)
         elif opt in ('-s', '--svg'):
             svg_file = arg
         elif opt in ('-P', '--PDF'):
             pdfFile = arg
         elif opt in ('-x', '--xml-target-file'):
             xml_target_file = str(arg)
     files_to_process = list()
     for arg in args:
         if isfile(arg):
             files_to_process.append(arg)
         elif isdir(arg):
             files_to_process = files_to_process + list(filter(lambda file: '.svg' in file, listdir(arg))) 
         else:
             print("'{}' does not exist!".format(arg))
             return 2
     
     if len(files_to_process) < 1 or args[0].endswith('xml'):
         if xml_target_file is None:
             xml_target_file = args[0] if len(args) > 0 else None
         if xml_target_file is not None and isfile(xml_target_file):
             target_file_tree = ET.parse(xml_target_file)
             file_name = target_file_tree.getroot().get('source')
             title = target_file_tree.getroot().get('title') if title is None else title
             page_number = target_file_tree.getroot().get('number') if page_number is None else page_number
             if svg_file is None:
                 if len(target_file_tree.xpath('//svg-image')) > 0:
                     svg_file = target_file_tree.xpath('.//svg-image/@file-name')[0]\
                         if len(target_file_tree.xpath('.//svg-image/@file-name')) > 0 else None
                 else:
                     svg_file = target_file_tree.xpath('.//svg/@file')[0]\
                         if len(target_file_tree.xpath('.//svg/@file')) > 0 else None
             files_to_process.insert(0, file_name)
             if xml_target_file in files_to_process:
                 files_to_process.remove(xml_target_file)
         else:
             usage()
             return 2
     if len(files_to_process) > 1 and (bool(page_number) or bool(xml_target_file) or bool(pdfFile) or bool(svg_file)):
         print("ERROR: too many input files:  options --PDF, --page, --svg and --xml-target-file presuppose only one input file!")
         usage()
         return 2
 
     extractor = Extractor(xml_dir=xml_dir, title=title, manuscript_file=manuscript_file, compare2pdf=compare2pdf)
     for file in files_to_process:
         extractor.extractAndWriteInformation(file, page_number=page_number, xml_target_file=xml_target_file, pdfFile=pdfFile, svg_file=svg_file)
     return 0
 
 if __name__ == "__main__":
     sys.exit(main(sys.argv[1:]))
Index: svgscripts/datatypes/clarification.py
===================================================================
--- svgscripts/datatypes/clarification.py	(revision 110)
+++ svgscripts/datatypes/clarification.py	(revision 111)
@@ -1,51 +1,44 @@
 #!/usr/bin/env python3
 # -*- coding: utf-8 -*-
 
 """   This class can be used to represent a word clarification.
 """
 #    Copyright (C) University of Basel 2020  {{{1
 #
 #    This program is free software: you can redistribute it and/or modify
 #    it under the terms of the GNU General Public License as published by
 #    the Free Software Foundation, either version 3 of the License, or
 #    (at your option) any later version.
 #
 #    This program is distributed in the hope that it will be useful,
 #    but WITHOUT ANY WARRANTY; without even the implied warranty of
 #    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 #    GNU General Public License for more details.
 #
 #    You should have received a copy of the GNU General Public License
 #    along with this program.  If not, see <https://www.gnu.org/licenses/> 1}}}
 
 __author__ = "Christian Steiner"
 __maintainer__ = __author__
 __copyright__ = 'University of Basel'
 __email__ = "christian.steiner@unibas.ch"
 __status__ = "Development"
 __license__ = "GPL v3"
 __version__ = "0.0.1"
 
 from lxml import etree as ET
 import re
 import sys
 
 from .editor_comment import EditorComment
 from .text import Text
 
 class Clarification(EditorComment):
     """
     This class represents a word clarification.
     """
 
     def __init__(self, text=None):
         super(Clarification, self).__init__(is_uncertain=False)
         self.text = text
 
-    @classmethod
-    def get_semantic_dictionary(cls):
-        """ Creates a semantic dictionary as specified by SemanticClass.
-        """
-        dictionary = super(Clarification,cls).get_semantic_dictionary()
-        dictionary[cls.PROPERTIES_KEY].update(cls.create_semantic_property_dictionary('text', Text, name='clarificationHasText', cardinality=1))
-        return cls.return_dictionary_after_updating_super_classes(dictionary)
Index: svgscripts/datatypes/atypical_writing.py
===================================================================
--- svgscripts/datatypes/atypical_writing.py	(revision 110)
+++ svgscripts/datatypes/atypical_writing.py	(revision 111)
@@ -1,52 +1,43 @@
 #!/usr/bin/env python3
 # -*- coding: utf-8 -*-
 
 """   This class can be used to represent a atpycial writing by the author.
 """
 #    Copyright (C) University of Basel 2020  {{{1
 #
 #    This program is free software: you can redistribute it and/or modify
 #    it under the terms of the GNU General Public License as published by
 #    the Free Software Foundation, either version 3 of the License, or
 #    (at your option) any later version.
 #
 #    This program is distributed in the hope that it will be useful,
 #    but WITHOUT ANY WARRANTY; without even the implied warranty of
 #    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 #    GNU General Public License for more details.
 #
 #    You should have received a copy of the GNU General Public License
 #    along with this program.  If not, see <https://www.gnu.org/licenses/> 1}}}
 
 __author__ = "Christian Steiner"
 __maintainer__ = __author__
 __copyright__ = 'University of Basel'
 __email__ = "christian.steiner@unibas.ch"
 __status__ = "Development"
 __license__ = "GPL v3"
 __version__ = "0.0.1"
 
 from lxml import etree as ET
 import re
 import sys
 
 from .editor_comment import EditorComment
 from .text import Text
 
 class AtypicalWriting(EditorComment):
     """
     This class represents a a atpycial writing by the author.
     """
 
     def __init__(self, text=None):
         super(AtypicalWriting, self).__init__(is_uncertain=False)
         self.text = text
-
-    @classmethod
-    def get_semantic_dictionary(cls):
-        """ Creates a semantic dictionary as specified by SemanticClass.
-        """
-        dictionary = super(AtypicalWriting,cls).get_semantic_dictionary()
-        dictionary[cls.PROPERTIES_KEY].update(cls.create_semantic_property_dictionary('text', Text, name='atypicalWritingHasText'))
-        return cls.return_dictionary_after_updating_super_classes(dictionary)
-
Index: svgscripts/datatypes/word.py
===================================================================
--- svgscripts/datatypes/word.py	(revision 110)
+++ svgscripts/datatypes/word.py	(revision 111)
@@ -1,913 +1,919 @@
 #!/usr/bin/env python3
 # -*- coding: utf-8 -*-
 
 """   This class can be used to represent a word.
 """
 #    Copyright (C) University of Basel 2019  {{{1
 #
 #    This program is free software: you can redistribute it and/or modify
 #    it under the terms of the GNU General Public License as published by
 #    the Free Software Foundation, either version 3 of the License, or
 #    (at your option) any later version.
 #
 #    This program is distributed in the hope that it will be useful,
 #    but WITHOUT ANY WARRANTY; without even the implied warranty of
 #    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 #    GNU General Public License for more details.
 #
 #    You should have received a copy of the GNU General Public License
 #    along with this program.  If not, see <https://www.gnu.org/licenses/> 1}}}
 
 __author__ = "Christian Steiner"
 __maintainer__ = __author__
 __copyright__ = 'University of Basel'
 __email__ = "christian.steiner@unibas.ch"
 __status__ = "Development"
 __license__ = "GPL v3"
 __version__ = "0.0.1"
 
 import copy
 import inspect
 from lxml import etree as ET
 from operator import attrgetter
 import re
 import string
 import sys
 import warnings
 
 from .box import Box
 from .editor_comment import EditorComment
 from .matrix import Matrix
 from .path import Path
 from .simple_word import SimpleWord
 from .style import Style
 from .word_deletion_path import WordDeletionPath
 from .word_position import WordPosition
 from .transkription_position import TranskriptionPosition
 from .writing_process import WritingProcess
 
 SINGLE_PUNCTUATION_PATTERN = r"^[{}–]$".format(string.punctuation)
 
 def execute_function_on_parts(word_parts, func_name):
     """Execute function on parts and add those parts instead of original word to word_parts.
         
         :return: new word_parts, output from func
     """
     copy_parts = word_parts[:]
     for word in word_parts:
         output = eval('word.{0}()'.format(func_name))
         if len(word.word_parts) > 0:
             for part_word in word.word_parts:
                 copy_parts.insert(copy_parts.index(word), part_word)
             copy_parts.remove(word)
         word.word_parts = []
     return copy_parts, output
 
 def update_transkription_position_ids(word):
     """Update transkription_position' ids according to index.
     """
     word_part_ids = [ wp.id for wp in word.word_parts ]
     if len(word_part_ids) != len(set(word_part_ids)):
         for id, wp in enumerate(word.word_parts):
             wp.id = id
     for index, transkription_position in enumerate(sorted(word.transkription_positions, key=attrgetter('left'))):
         transkription_position.id = index
         transkription_position.has_box = None
         transkription_position.deleted = False
 
 class Word(SimpleWord):
     """
     This class represents a word.
 
     """
     COPY_PROPERTY_KEY = [ 'line_number', 'deleted', 'writing_process_id' ]
     APPEND_PROPERTY2LIST_SOURCE_TARGET_KEYS = { 'style': 'styles' }
     DATA = 'debug-data'
     RDFS_SUBCLASSOF_LIST = ['https://www.e-editiones.ch/ontology/text#HandwrittenText']
     XML_TAG = 'word'
     XML_EARLIER_VERSION = 'earlier-version'
     XML_OVERWRITES = 'overwrites'
     XML_CORRECTION_DICT = { 'isClarificationOfWord': 'clarifiesWord',\
                             'isDeletionOfWord': 'deletesEarlierPart',\
                            'isExtensionOfWord': 'extendsEarlierVersion',\
                            'isTransformationOfWord': 'transformsEarlierPart' }
 
     def __init__(self, id=0, text='', line_number=-1, deleted=False, transkription_positions=None, faksimile_positions=None, word_part_objs=None, word_parts=None, writing_process_id=-1, earlier_version=None, box_paths=None, styles=None):
         super(Word,self).__init__(id=id, text=text, line_number=line_number, transkription_positions=transkription_positions,\
                 faksimile_positions=faksimile_positions)
         self.corrections = []
         self.clean_edited_text = None
         self.deleted = deleted
         self.deletion_paths = []
         self.deletion_paths_near_word = []
         self.debug_container = {} 
         self.debug_msg = None
         self.earlier_version = earlier_version
         self.edited_text = None
-        self.editor_comment = None
+        #self.editor_comment = None
+        self.editor_comments = []
         self.isClarificationOfWord = None
         self.isDeletionOfWord = None
         self.isExtensionOfWord = None
         self.isTransformationOfWord = None
         if len(self.text) == 0 and len(''.join([ tp.get_text() for tp in self.transkription_positions if type(tp) == TranskriptionPosition ])) > 0:
             self.text = ''.join([ tp.get_text() for tp in self.transkription_positions ])
         self.overwrites_word = None
         self.process_flags = []
         self.styles = styles\
                 if styles is not None\
                 else []
         self.verified = None
         self.writing_process_id = writing_process_id
         self.writing_processes = []
         self.word_insertion_mark = None
         self.word_box = None
         self.word_parts = word_parts if word_parts is not None else []
         self.word_part_objs = word_part_objs if word_part_objs is not None else []
 
     def add_deletion_paths(self, deletion_paths, tr_xmin=0.0, tr_ymin=0.0):
         """Add a word deletion path to word.
         """
         if len(self.word_parts) > 0:
             for part in self.word_parts: part.add_deletion_paths(deletion_paths, tr_xmin=tr_xmin, tr_ymin=tr_ymin)
         elif self.deleted:
             index = 0
             while len(self.deletion_paths) == 0 and index < len(self.transkription_positions):
                 include_pwps = (len(self.transkription_positions[index].positional_word_parts) > 0
                                 and abs(self.transkription_positions[index].left-self.transkription_positions[index].positional_word_parts[0].left) < 10)
                 word_path = Path.create_path_from_transkription_position(self.transkription_positions[index],\
                                                     tr_xmin=tr_xmin, tr_ymin=tr_ymin, include_pwps=include_pwps)
                 self.deletion_paths += [ deletion_path for deletion_path in deletion_paths\
                     if not Path.is_path_contained(self.deletion_paths, deletion_path)\
                        and deletion_path.do_paths_intersect(word_path) ]
                 index += 1
 
     def attach_word_to_tree(self, target_tree):
         """Attaches word to tree target_tree.
         """
         word_node = super(Word,self).attach_word_to_tree(target_tree)
         if self.deleted is not None:
             word_node.set('deleted', str(self.deleted).lower())
         if self.verified is not None:
             word_node.set('verified', str(self.verified).lower())
         if self.edited_text is not None:
             word_node.set('edited-text', self.edited_text)
-        if self.editor_comment is not None:
-            self.editor_comment.attach_object_to_tree(word_node)
+        #if self.editor_comment is not None:
+        #    self.editor_comment.attach_object_to_tree(word_node)
+        for editor_comment in self.editor_comments:
+            editor_comment.attach_object_to_tree(word_node)
         if self.writing_process_id > -1:
             word_node.set('writing-process-id', str(self.writing_process_id))
         if len(self.process_flags) > 0:
             word_node.set('process-flags', ' '.join(self.process_flags))
         for index, word_part in enumerate(self.word_parts):
             word_part.id = index
             word_part.attach_word_to_tree(word_node)
         if self.earlier_version is not None:
             earlier_node = ET.SubElement(word_node, self.XML_EARLIER_VERSION)
             self.earlier_version.attach_word_to_tree(earlier_node)
         if self.overwrites_word is not None\
                 and len(self.overwrites_word.transkription_positions) > 0:
             overwrite_node = ET.SubElement(word_node, self.XML_OVERWRITES)
             self.overwrites_word.attach_word_to_tree(overwrite_node)
             if self.word_box is not None:
                 self.word_box.attach_object_to_tree(word_node)
         if len(self.corrections) > 0:
             word_node.set('corrections', ' '.join(set([ str(word.id) for word in self.corrections ])))
         for deletion_id, deletion_path in enumerate(self.deletion_paths):
             deletion_path.id = deletion_id
             deletion_path.tag = WordDeletionPath.XML_TAG
             deletion_path.attach_object_to_tree(word_node)
         for key in self.XML_CORRECTION_DICT.keys():
             if self.__dict__[key] is not None:
                 word_node.set(self.XML_CORRECTION_DICT[key], 'true')
         return word_node
     
     def belongs_to_multiple_writing_processes(self, include_parts=False):
         """Returns true if transkription_positions belong to different WritingProcesses.
         """
         if len(self.word_parts) > 0 and include_parts:
             return len(set(word.writing_process_id for word in self.word_parts)) > 1
         return len(set(tp.writing_process_id for tp in self.transkription_positions )) > 1
 
     def set_parent_word_writing_process_id(self):
         """Set writing_process_id for parent word.
         """
         ids =  set(word.transkription_positions[0].style for word in self.word_parts\
                     if len(word.transkription_positions) > 0 and word.transkription_positions[0].style is not None) 
         if len(ids) > 1:
             self.writing_process_id = max([style.writing_process_id for style in ids])
             if len(set(word.transkription_positions[0].style.create_a_copy_wo_writing_process_id()\
                     for word in self.word_parts\
                     if len(word.transkription_positions) > 0 and word.transkription_positions[0].style is not None))\
                     > 1:
                 self.writing_process_id += 1
 
     @classmethod
     def create_cls(cls, word_node):
         """Creates a word from a (lxml.Element) node.
 
             [:return:] Word
         """
         cls = super(Word,cls).create_cls(word_node)
         cls.writing_process_id = int(word_node.get('writing-process-id')) if bool(word_node.get('writing-process-id')) else -1
         cls.split_strings = None
         cls.join_string = word_node.get('join')
         if bool(word_node.get('split')):
             cls.split_strings = word_node.get('split').split(' ')
             if ''.join(cls.split_strings) != cls.text:
                 error_msg = 'Error in file {0}: word with id="{1}" has split attributes that do not correspond to its text attribute!\n'.\
                             format(word_node.getroottree().docinfo.URL, str(cls.id))\
                             + 'Split attributes: "{0}".\n'.format(' '.join(cls.split_strings))\
                             + 'Text attribute: "{0}".\n'.format(cls.text)
                 raise Exception(error_msg)
         cls.verified = word_node.get('verified') == 'true'\
                     if bool(word_node.get('verified')) else None
         cls.deleted = word_node.get('deleted') == 'true'\
                     if bool(word_node.get('deleted')) else None
         cls.edited_text = word_node.get('edited-text')
         if cls.edited_text is not None:
             cls.clean_edited_text = cls._create_clean_text(cls.edited_text)
+        cls.editor_comments = [ EditorComment.create_cls_from_node(node) for node in word_node.xpath('./' + EditorComment.XML_TAG) ]
+        """
         cls.editor_comment = [ EditorComment.create_cls_from_node(node) for node in word_node.xpath('./' + EditorComment.XML_TAG) ][0]\
                 if len([ node for node in word_node.xpath('./' + EditorComment.XML_TAG) ]) > 0 else None
+        """
         cls.word_parts = [ cls.create_cls(node) for node in word_node.xpath('./' + cls.XML_TAG) ]
         if bool(word_node.get('corrections')):
             for index in [ int(i) for i in word_node.get('corrections').split(' ') ]:
                 if index < len(cls.word_parts):
                     cls.corrections.append(cls.word_parts[index])
         cls.earlier_version = None
         if len(word_node.xpath('./' + cls.XML_EARLIER_VERSION + '/' + cls.XML_TAG)) > 0:
             cls.earlier_version = [ cls.create_cls(node) for node in word_node.xpath('./' + cls.XML_EARLIER_VERSION + '/' + cls.XML_TAG) ][0]
         for key_value in cls.XML_CORRECTION_DICT.values():
             if word_node.get(key_value) == 'true':
                 cls.__dict__[key_value] = True
         if cls.earlier_version is not None:
             for word_part in cls.word_parts:
                 for key in [ key for key, value in cls.XML_CORRECTION_DICT.items() if value.endswith('Part') ]:
                     if cls.XML_CORRECTION_DICT[key] in word_part.__dict__.keys() and word_part.__dict__[cls.XML_CORRECTION_DICT[key]]\
                         and len(cls.word_parts) <= len(cls.earlier_version.word_parts):
                         try:
                             word_part.__dict__[key] = cls.earlier_version.word_parts[word_part.id]
                         except Exception:
                             msg = f'{cls.id} {cls.text}: {word_part.id}'
                             raise Exception(msg)
                 for key in [ key for key, value in cls.XML_CORRECTION_DICT.items() if value.endswith('EarlierVersion') ]:
                     if cls.XML_CORRECTION_DICT[key] in word_part.__dict__.keys() and word_part.__dict__[cls.XML_CORRECTION_DICT[key]]:
                         word_part.__dict__[key] = cls.earlier_version
                 for key in [ key for key, value in cls.XML_CORRECTION_DICT.items() if value.endswith('Word') ]:
                     if cls.XML_CORRECTION_DICT[key] in word_part.__dict__.keys() and word_part.__dict__[cls.XML_CORRECTION_DICT[key]]:
                         word_part.__dict__[key] = cls
         cls.overwrites_word = [ cls.create_cls(node) for node in word_node.xpath('./' + cls.XML_OVERWRITES + '/' + cls.XML_TAG)][0]\
                 if len(word_node.xpath('./' + cls.XML_OVERWRITES + '/' + cls.XML_TAG)) > 0\
                 else None
         cls.word_box = [ Box(node=node) for node in word_node.xpath('./' + Box.XML_TAG) ][0]\
                 if len(word_node.xpath('./' + Box.XML_TAG)) > 0\
                 else None
         cls.deletion_paths = [ Path(node=node) for node in word_node.xpath(f'./{WordDeletionPath.XML_TAG}') ]
         cls.process_flags = word_node.get('process-flags').split(' ')\
                 if bool(word_node.get('process-flags'))\
                 else []
         return cls
 
     @classmethod
     def join_words(cls, list_of_words, add_white_space_between_words=False):
         """Creates a word from a list of words.
 
             [:return:] Word
         """
         if len(list_of_words) > 1:
             deleted = True in [ word.deleted for word in list_of_words ]\
                     and len(set([ word.deleted for word in list_of_words ])) == 1
             line_number = list_of_words[0].line_number\
                     if len(set([ word.line_number for word in list_of_words ])) == 1\
                     else -1
             faksimile_positions = []
             for word in list_of_words:
                 if len(word.word_parts) > 0:
                     faksimile_positions += word.faksimile_positions
                     index = list_of_words.index(word)
                     list_of_words.remove(word)
                     for part_word in reversed(word.word_parts):
                         list_of_words.insert(index, part_word)
             new_word_text = ''.join([word.text for word in list_of_words])\
                     if not add_white_space_between_words\
                     else ' '.join([word.text for word in list_of_words])
             new_word = cls(id=list_of_words[0].id, text=new_word_text, faksimile_positions=faksimile_positions,\
                     line_number=line_number, deleted=deleted, word_parts=list_of_words)
             if True in [ word.text.endswith('-') or word.text.endswith('=') for word in new_word.word_parts[:-1]]:
                 change_text = [ word.text for word in new_word.word_parts[:-1] if word.text.endswith('-') or word.text.endswith('=') ][0]
                 new_word.edited_text = new_word.text.replace(change_text, change_text[:-1])
             for id, word in enumerate(new_word.word_parts): word.id = id
             return new_word
         if len(list_of_words) > 0:
             return list_of_words[0]
         else:
             return None
 
     def create_earlier_version(self, root_word=None, id=0):
         """Create an earlier version of word.
         """
         if root_word is None:
             root_word = self
             root_word.set_parent_word_writing_process_id()
         word_parts = []
         non_single_punctuation_word_parts = [ word_part for word_part in self.word_parts\
                 if not re.match(SINGLE_PUNCTUATION_PATTERN, word_part.text) ]
         non_single_punctuation_word_parts_length = len(non_single_punctuation_word_parts)
         if non_single_punctuation_word_parts_length > 0\
            and len([ word_part for word_part in non_single_punctuation_word_parts\
                    if word_part.deleted ])\
                 == non_single_punctuation_word_parts_length:
             self.deleted = True
             for word_part in non_single_punctuation_word_parts: word_part.deleted = False
         for id, word_part in enumerate(self.word_parts):
             earlierWordPart = word_part.create_earlier_version(root_word=root_word, id=id)
             if word_part.deleted:
                 word_part.isDeletionOfWord = earlierWordPart
                 word_parts.append(earlierWordPart)
                 if word_part not in self.corrections:
                     self.corrections.append(word_part)
             elif word_part.overwrites_word is not None\
                     and ((len(word_part.transkription_positions) > 0\
                             and word_part.overwrites_word.transkription_positions[0].style is not None\
                             and word_part.transkription_positions[0].style is not None\
                             and word_part.transkription_positions[0].style\
                                     != word_part.overwrites_word.transkription_positions[0].style)
                          or word_part.word_box.earlier_version):
                 word_part.overwrites_word.id = word_part.id
                 word_parts.append(word_part.overwrites_word)
                 word_part.isTransformationOfWord = word_part.overwrites_word
                 #print(f'transform: {self.text}')
                 if word_part not in self.corrections:
                     self.corrections.append(word_part)
             elif root_word.writing_process_id > -1\
                     and (len(word_part.transkription_positions) > 0\
                             and word_part.transkription_positions[0].style is not None\
                             and word_part.transkription_positions[0].style.writing_process_id\
                     == root_word.writing_process_id):
                 word_part.extendsEarlierVersion = True
                 #print('extends')
                 if word_part not in self.corrections:
                     self.corrections.append(word_part)
             else:
                 if word_part.deleted:
                     word_part.isDeletionOfWord = earlierWordPart
                     word_parts.append(earlierWordPart)
                     if word_part not in self.corrections:
                         self.corrections.append(word_part)
                 else:
                     #print(f'default: {self.text}')
                     word_parts.append(earlierWordPart)
         text = ''.join([ word.text for word in word_parts ])\
                 if len(word_parts) > 0\
                 else self.text
         if len(word_parts) == 1:
             self.transkription_positions += word_parts[0].transkription_positions
             self.faksimile_positions += word_parts[0].faksimile_positions
             word_parts = []
         new_transkription_positions = copy.deepcopy(self.transkription_positions)
         if len(self.transkription_positions) > 0\
             and self.transkription_positions[0].style is not None:
             writing_process_id = self.transkription_positions[0].style.writing_process_id
             for new_tp in new_transkription_positions:
                 new_tp.style.writing_process_id = writing_process_id
         return Word(id=id, text=text, transkription_positions=new_transkription_positions,\
                 faksimile_positions=self.faksimile_positions, line_number=self.line_number,\
                 word_parts=word_parts)
 
     def create_correction_history(self, page=None, box_style=None):
         """Create correction history.
         """
         if self.word_box is not None:
             manuscript = self.transkription_positions[0].style.manuscript\
                     if len(self.transkription_positions) > 0\
                         and self.transkription_positions[0].style is not None\
                     else None
             style = Style()
             if box_style is not None:
                 style = box_style
             if page is not None:
                 style = Style.create_cls(page, self.word_box.text_style_class, manuscript=manuscript)
                 for font_key in [ font_key for font_key in self.word_box.text_style_class.split(' ') if font_key in page.fontsizekey2stage_mapping.keys() ]:
                     style.writing_process_id = page.fontsizekey2stage_mapping.get(font_key)
             transkription_positions = TranskriptionPosition.copy_list_of_cls(self.transkription_positions)
             for transkription_position in transkription_positions:
                 transkription_position.style = style
             self.overwrites_word = Word(text=self.word_box.earlier_text, transkription_positions=transkription_positions,\
                      line_number=self.line_number)
         for word_part in self.word_parts:
             word_part.create_correction_history(page=page, box_style=box_style)
         if len(self.word_parts) > 0:
             earlier_version = self.create_earlier_version() 
             extending_words = self._get_parts_with_property_key('extendsEarlierVersion')
             if len(extending_words) > 0:
                 for word in extending_words:
                     word.isExtensionOfWord = earlier_version
             if self.has_mixed_status('deleted', include_parts=True):
                 self.edited_text = ''.join([ word.text for word in self.word_parts if not word.deleted ])
             if len(self.corrections) > 0:
                 self.earlier_version = earlier_version
 
     @staticmethod
     def CREATE_WORD(word_node=None, page=None, word_part_objs=[], id=0, height=0, endX=0, endSign=None, matrix=None, line_number=-1, debug_msg=None):
         """Creates a word from a (lxml.Element) node or word_part_objs.
 
             [:return:] Word
         """
         if word_node is not None: # init word from xml node
             id = int(word_node.get('id'))
             line_number = int(word_node.get('line-number')) if bool(word_node.get('line-number')) else line_number
             text = word_node.get('text')
             deleted = bool(word_node.get('deleted')) and word_node.get('deleted') == 'true'
             transkription_positions = [ TranskriptionPosition(node=node) for node in word_node.findall('.//' + WordPosition.TRANSKRIPTION) ]
             faksimile_positions = [ WordPosition(node=node) for node in word_node.findall('.//' + WordPosition.FAKSIMILE) ]
             word_part_objs = [ item.attrib for item in word_node.findall('.//' + Word.DATA + '/part')]\
                     if len(word_node.findall('.//' + Word.DATA)) > 0\
                     else [ item.attrib for item in word_node.findall('.//part')] 
             return Word(id=id, text=text, deleted=deleted, line_number=line_number, transkription_positions=transkription_positions,\
                     faksimile_positions=faksimile_positions, word_part_objs=word_part_objs)
         elif len(word_part_objs) > 0: # init word from word_part_obj that has been extracted from svg file
             WIDTH = 5
             TOPCORRECTION = 2.0
             FONTWIDTHFACTOR = 0.7 # factor that multiplies lastCharFontSize
             height = height
             x = round(float(word_part_objs[0]['x']), 3)
             if(page is not None and bool(page.style_dict)):
                 HEIGHT_FACTOR = 1.1 # factor that multiplies biggest_font_size -> height
                 style_set = set(' '.join(set( dict['class'] for dict in word_part_objs)).split(' '))
                 biggest_font_size = page.get_biggest_fontSize4styles(style_set=style_set)
                 height = round(biggest_font_size * HEIGHT_FACTOR + HEIGHT_FACTOR / biggest_font_size, 3)
                 TOPCORRECTION = 1 + HEIGHT_FACTOR / biggest_font_size
                 if endSign is not None and '%' in endSign:
                     lastCharFontSizeList = [ float(page.style_dict[key]['font-size'].replace('px',''))\
                             for key in word_part_objs[len(word_part_objs)-1]['class'].split(' ')\
                             if bool(page.style_dict[key].get('font-size'))]
                     lastCharFontSize = lastCharFontSizeList[0] if len(lastCharFontSizeList) > 0 else 1
                     endX = float(endX) + lastCharFontSize * FONTWIDTHFACTOR
             elif endSign is not None and '%' in endSign:
                 endX = float(endX) + WIDTH
             bottom = round(float(word_part_objs[0]['y']), 3)
             y = round(bottom - height + TOPCORRECTION, 3)
             width = round(float(endX) - x, 3)
             transkription_positions = [ WordPosition(height=height, width=width, x=x, y=y, matrix=matrix, tag=WordPosition.TRANSKRIPTION) ]
             text = ''.join([ dict['text'] for dict in word_part_objs])
             line_number = page.get_line_number( (y + bottom)/2) if page is not None else line_number
             word = Word(id=id, text=text, line_number=line_number, transkription_positions=transkription_positions, word_part_objs=word_part_objs)
             word.debug_msg = debug_msg
             return word
         else:
             error_msg = 'word_node has not been defined' if (word_node is None) else 'word_part_objs is empty'
             raise Exception('Error: {}'.format(error_msg))
 
     @classmethod
     def get_semantic_dictionary(cls):
         """ Creates and returns a semantic dictionary as specified by SemanticClass.
         """
         dictionary = super(Word,cls).get_semantic_dictionary()
         dictionary[cls.PROPERTIES_KEY].update(cls.create_semantic_property_dictionary('styles', Style,\
                   cardinality=1, cardinality_restriction='minCardinality',\
                   name='wordHasStyle', label='word has style', comment='Word has an appearance that is characterized by this style.'))
         dictionary[cls.PROPERTIES_KEY].update(cls.create_semantic_property_dictionary('corrections', Word,\
                   name='wordHasCorrection', label='word has corrections', comment='Word has a correction made by the author.'))
         dictionary[cls.PROPERTIES_KEY].update(cls.create_semantic_property_dictionary('deletion_paths', WordDeletionPath,\
                   name='wordIsDeletedByPath', label='word has been deleted with a deletion path',\
                   comment='Word has been deleted by the author using a deletion path.'))
-        dictionary[cls.PROPERTIES_KEY].update(cls.create_semantic_property_dictionary('editor_comment', EditorComment,\
+        dictionary[cls.PROPERTIES_KEY].update(cls.create_semantic_property_dictionary('editor_comments', EditorComment,\
                   name='wordHasEditorComment', label='word has a comment by the editors', comment='Word has been commented by the editors.'))
         dictionary[cls.PROPERTIES_KEY].update(cls.create_semantic_property_dictionary('earlier_version', Word,\
                   name='wordHasEarlierVersion', label='word has an earlier version', comment='There is a earlier version of this word.'))
         dictionary[cls.PROPERTIES_KEY].update(cls.create_semantic_property_dictionary('edited_text', str,\
                   name='hasEditedText', label='word has an edited text', comment='Word has a text that is edited automatically by removing deleted parts or hyphens.'))
         dictionary[cls.PROPERTIES_KEY].update(cls.create_semantic_property_dictionary('clean_edited_text', str,\
                   name='hasCleanEditedText', label='word has an edited text without punctuation',\
                   comment='Word has a text without punctuation that is edited automatically by removing deleted parts or hyphens.'))
         dictionary[cls.PROPERTIES_KEY].update(cls.create_semantic_property_dictionary('isClarificationOfWord', Word,\
                   name='isClarificationOfWord', label='word is a clarification of word',\
                   comment='The author has used this part of the word in order to clarify the appearance of that word.'))
         dictionary[cls.PROPERTIES_KEY].update(cls.create_semantic_property_dictionary('isDeletionOfWord', Word,\
                   name='isDeletionOfWord', label='word is a deletion of word',\
                   comment='The author has used this part of a word in order to delete the corresponding part of an earlier version of this word.'))
         dictionary[cls.PROPERTIES_KEY].update(cls.create_semantic_property_dictionary('isExtensionOfWord', Word,\
                   name='isExtensionOfWord', label='word is a extension of word',\
                   comment='The author has used this part of a word in order to extend an earlier version of this word.'))
         dictionary[cls.PROPERTIES_KEY].update(cls.create_semantic_property_dictionary('isTransformationOfWord', Word,\
                   name='isTransformationOfWord', label='word is a transformation of word',\
                   comment='The author has used this part of a word in order to transform the corresponding part of an earlier version of this word.'))
         dictionary[cls.PROPERTIES_KEY].update(cls.create_semantic_property_dictionary('overwrites_word', Word,\
                   name='overwritesWord', label='word overwrites word',\
                   comment='The author has used this word in order to overwrite that word.'))
         # This makes wordHasWordParts a subproperty of cls.HAS_HOMOTYPIC_PARTS_URL_STRING,
         # cls.return_dictionary_after_updating_super_classes will subclass Word under the corresponding super class.
         dictionary[cls.PROPERTIES_KEY].update(cls.create_semantic_property_dictionary('word_parts', list,\
                 name='wordHasWordParts', label='word has word parts', comment='Word consists of a list of words.',\
                 subPropertyOf=cls.HAS_HOMOTYPIC_PARTS_URL_STRING))
         super_property_dictionary = cls.create_semantic_property_dictionary(cls.SUPER_PROPERTY, Word,\
                 name='isCorrectionOfWord', label='word is a correction of word',\
                 comment='The author has used this word in order to correct that word.')
         for key in cls.XML_CORRECTION_DICT.keys():
             correction_dict = dictionary[cls.PROPERTIES_KEY].get(key)
             correction_dict.update(super_property_dictionary)
             dictionary[cls.PROPERTIES_KEY].update({key: correction_dict})
         return cls.return_dictionary_after_updating_super_classes(dictionary)
 
     def has_mixed_status(self, property_key, include_parts=False, concerns_word=True):
         """Returns true if transkription_positions have mixed status concerning the property_key in their __dict__.
         """
         if False in set(property_key in tp.__dict__.keys() for tp in self.transkription_positions):
             return False
         if len(self.word_parts) > 0 and include_parts:
             if concerns_word:
                 if False in set(property_key in word.__dict__.keys() for word in self.word_parts):
                     return False
                 return len(set(word.__dict__[property_key] for word in self.word_parts)) > 1
             else:
                 return len(set(word.transkription_positions[0].__dict__[property_key] for word in self.word_parts\
                         if len(word.transkription_positions) > 0 and property_key in word.transkription_positions[0].__dict__.keys())) > 1
         return len(set(tp.__dict__[property_key] for tp in self.transkription_positions )) > 1
 
     def init_word(self, page):
         """Initialize word with objects from page.
         """
         super(Word,self).init_word(page)
         if self.writing_process_id > -1:
             self.writing_processes += [ wp for wp in page.writing_processes if wp.id == self.writing_process_id ]
         writing_processes = self.writing_processes
         for word_part in self.word_parts:
             word_part.init_word(page)
             self.lines += word_part.lines
             self.writing_processes += word_part.writing_processes
         self.lines = [ line for line in set(self.lines) ] 
         self.writing_processes = [ wp for wp in set(self.writing_processes)]
         if self.overwrites_word is not None:
             self.overwrites_word.init_word(page)
         if self.earlier_version is not None:
             if self.earlier_version.writing_process_id == -1:
                 self.earlier_version.writing_process_id = self.writing_process_id-1
             if self.earlier_version.line_number == -1:
                 self.earlier_version.line_number = self.line_number
             self.earlier_version.init_word(page)
         self.deletion_paths = [ page.get_word_deletion_path(path) for path in self.deletion_paths if path.path is not None ]
                   
     def join(self, other_word, append_at_end_of_new_word=True, add_white_space_between_words=False):
         """Joins other_word with this word by changing the text of current word and adding other_word.transkription_positions.
         """
         if append_at_end_of_new_word:
             self.text = self.text + other_word.text\
                     if not add_white_space_between_words\
                     else self.text + ' ' + other_word.text
             for position in other_word.transkription_positions:
                 position.id = str(len(self.transkription_positions))
                 self.transkription_positions.append(position)
             for position in other_word.faksimile_positions:
                 position.id = str(len(self.faksimile_positions))
                 self.faksimile_positions.append(position)
         else:
             self.text = other_word.text + self.text
             index = 0
             for position in other_word.transkription_positions:
                 self.transkription_positions.insert(index, position)
                 index += 1
             while index < len(self.transkription_positions):
                 self.transkription_positions[index].id = str(index)
                 index += 1
             index = 0
             for position in other_word.faksimile_positions:
                 self.faksimile_positions.insert(indexposition)
                 index += 1
             while index < len(self.faksimile_positions):
                 self.faksimile_positions[index].id = str(index)
                 index += 1
         self.simplify_transkription_positions()
         
     def partition_according_to_deletion(self):
         """Partition a word according to its transkription_positions' deletion status
             ->split word and add partial words as its parts.
         """
         if self.has_mixed_status('deleted'):
             transkription_positions = []
             last_status = None
             for transkription_position in self.transkription_positions:
                 if transkription_position.deleted != last_status\
                     and len(transkription_positions) > 0:
                         newWord = Word(id=len(self.word_parts), line_number=self.line_number,\
                                 transkription_positions=transkription_positions, deleted=last_status, writing_process_id=self.writing_process_id)
                         for tp in transkription_positions:
                             newWord.deletion_paths += tp._deletion_paths
                         self.word_parts.append(newWord)
                         transkription_positions = []
                 transkription_positions.append(transkription_position)
                 last_status = transkription_position.deleted
             if len(transkription_positions) > 0:
                 newWord = Word(id=len(self.word_parts), line_number=self.line_number,\
                     transkription_positions=transkription_positions, deleted=last_status, writing_process_id=self.writing_process_id)
                 for tp in transkription_positions:
                     newWord.deletion_paths += tp._deletion_paths
                 self.word_parts.append(newWord)
             self.transkription_positions = []
             self.line_number = -1
             self.deleted = False
         elif len(self.word_parts) > 0:
             self.word_parts, none = execute_function_on_parts(self.word_parts, 'partition_according_to_deletion')
         elif not self.deleted\
            and len(self.transkription_positions) > 0\
            and self.transkription_positions[0].deleted:
             self.deleted = True
             for tp in self.transkription_positions:
                 self.deletion_paths += tp._deletion_paths
 
     def partition_according_to_writing_process_id(self):
         """Partition a word according to its transkription_positions' writing_process_ids
             ->split word and add partial words as its parts.
         """
         if self.belongs_to_multiple_writing_processes():
             last_writing_process_id = -1
             transkription_positions = []
             for transkription_position in self.transkription_positions:
                 if transkription_position.writing_process_id != last_writing_process_id\
                     and len(transkription_positions) > 0:
                         newWord = Word(id=len(self.word_parts), line_number=self.line_number,\
                                 transkription_positions=transkription_positions, writing_process_id=last_writing_process_id)
                         self.word_parts.append(newWord)
                         transkription_positions = []
                 transkription_positions.append(transkription_position)
                 last_writing_process_id = transkription_position.writing_process_id
             if len(transkription_positions) > 0:
                 newWord = Word(id=len(self.word_parts), line_number=self.line_number,\
                     transkription_positions=transkription_positions, writing_process_id=last_writing_process_id)
                 self.word_parts.append(newWord)
             self.transkription_positions = []
         elif len(self.word_parts) > 0:
             self.word_parts, none = execute_function_on_parts(self.word_parts, 'partition_according_to_writing_process_id')
         if self.belongs_to_multiple_writing_processes(include_parts=True):
             self.writing_process_id = sorted(set([ word.writing_process_id for word in self.word_parts ]), reverse=True)[0]
         elif len(self.transkription_positions) > 0:
             self.writing_process_id = self.transkription_positions[0].writing_process_id 
 
     def process_boxes(self, box_paths, tr_xmin=0.0, tr_ymin=0.0, previous_word_has_box=False):
         """Determines whether word is over a word box.
         """
         word_over_box = None
         if len(self.word_parts) > 0:
             for word in self.word_parts:
                 current_word = word.process_boxes(box_paths, tr_xmin=tr_xmin, tr_ymin=tr_ymin, previous_word_has_box=(word_over_box is not None))
                 if current_word is not None and current_word.word_box is not None:
                     word_over_box = current_word
         else:
             new_tp_dict = {}
             for index, transkription_position in enumerate(self.transkription_positions):
                 if previous_word_has_box and index == 0:
                     if len(transkription_position.positional_word_parts) > 0:
                         transkription_position.positional_word_parts[0].left += transkription_position.positional_word_parts[0].width/2
                         #print(f'{self.text}: {transkription_position.positional_word_parts[0].left}')
                     else:
                         transkription_position.left += 1
                 word_path = Path.create_path_from_transkription_position(transkription_position,\
                     tr_xmin=tr_xmin, tr_ymin=tr_ymin)
                 containing_boxes = [ box_path for box_path in box_paths\
                         if word_path.is_partially_contained_by(box_path)\
                         or box_path.do_paths_intersect(word_path) ]
                 if len(containing_boxes) > 0:
                     if previous_word_has_box:
                         print(f'{self.text}: {word_path.path.bbox()} {containing_boxes[0].path.bbox()}')
                     self._set_box_to_transkription_position(containing_boxes[0], word_path,\
                             transkription_position, new_tp_dict, tr_xmin)
                     box_paths.remove(containing_boxes[0])
             for replace_tp in new_tp_dict.keys():
                 for tp in new_tp_dict.get(replace_tp):
                     self.transkription_positions.insert(self.transkription_positions.index(replace_tp), tp)
                 self.transkription_positions.remove(replace_tp)
             word_over_box = self._get_partial_word_over_box()
             update_transkription_position_ids(self)
         return word_over_box
 
     def set_word_insertion_mark(self, word_insertion_mark):
         """Sets word_insertion_mark
         """
         self.word_insertion_mark = word_insertion_mark
 
     def set_writing_process_id_to_transkription_positions(self, page):
         """Determines the writing process id of the transkription_positions.
         """
         for transkription_position in self.transkription_positions:
             if len(transkription_position.positional_word_parts) > 0:
                 for font_key in transkription_position.positional_word_parts[0].style_class.split(' '):
                     if font_key in page.fontsizekey2stage_mapping.keys():
                         transkription_position.writing_process_id = page.fontsizekey2stage_mapping.get(font_key)
 
     def simplify_transkription_positions(self):
         """Merge transkription_positions if possible.
         """
         index = len(self.transkription_positions)-1
         while index > 0\
                 and False not in [ 'positional_word_parts' in tp.__dict__.keys() for tp in self.transkription_positions ]:
             current_tp = self.transkription_positions[index]
             index -= 1
             previous_tp = self.transkription_positions[index]
             if previous_tp.is_mergebale_with(current_tp):
                 positional_word_parts = previous_tp.positional_word_parts
                 positional_word_parts += current_tp.positional_word_parts
                 transkription_positions = TranskriptionPosition.CREATE_TRANSKRIPTION_POSITION_LIST_FROM_PWPS(\
                         positional_word_parts, debug_msg_string='simplifying transkription positions', transkription_position_id=previous_tp.id)
                 if len(transkription_positions) == 1:
                     transkription_positions[0].writing_process_id = previous_tp.writing_process_id\
                             if previous_tp.writing_process_id != -1\
                             else current_tp.writing_process_id
                     self.transkription_positions.pop(index+1)
                     self.transkription_positions[index] = transkription_positions[0]
         #print(self.text, len(self.transkription_positions))
 
     def split(self, split_string, start_id=0):
         """Splits the word and returns an 3-tuple of new words.
         """
         previousString, currentString, nextString = self.text.partition(split_string)
         currentWord = None
         previousWord = None
         nextWord = None
         previousIndex = 0
         current_id = start_id
         all_positional_word_parts = []
         for position in self.transkription_positions:
             all_positional_word_parts += position.positional_word_parts
         if len(all_positional_word_parts) == 0:
             warnings.warn('ATTENTION: Word: {} {} with Strings "{}, {}, {}": there are no parts!'.format(self.id, self.text, previousString, currentString, nextString))
         if len(previousString) > 0:
             previous_pwps = []
             while previousIndex < len(all_positional_word_parts) and previousString != ''.join([ pwp.text for pwp in previous_pwps ]):
                 previous_pwps.append(all_positional_word_parts[previousIndex])
                 previousIndex += 1
             if previousString != ''.join([ pwp.text for pwp in previous_pwps ]):
                 warnings.warn('ATTENTION: "{}" does not match a word_part_obj!'.format(previousString))
             else:
                 previous_transkription_positions = TranskriptionPosition.CREATE_TRANSKRIPTION_POSITION_LIST_FROM_PWPS(previous_pwps, debug_msg_string='word.split')
                 previous_text = ''.join([ pwp.text for pwp in previous_pwps ])
                 previousWord = Word(text=previous_text, id=current_id, line_number=self.line_number, transkription_positions=previous_transkription_positions)
                 previousWord.faksimile_positions = self.faksimile_positions
                 current_id += 1
                 all_positional_word_parts = all_positional_word_parts[previousIndex:]
         if len(nextString) > 0:
             tmp_pwps = []
             index = 0
             while index < len(all_positional_word_parts) and currentString != ''.join([ pwp.text for pwp in tmp_pwps ]):
                 tmp_pwps.append(all_positional_word_parts[index])
                 index += 1
             if currentString != ''.join([ pwp.text for pwp in tmp_pwps ]):
                 warnings.warn('ATTENTION: "{}" does not match a word_part_obj!'.format(currentString))
             else:
                 next_pwps = all_positional_word_parts[index:]
                 next_transkription_positions = TranskriptionPosition.CREATE_TRANSKRIPTION_POSITION_LIST_FROM_PWPS(next_pwps, debug_msg_string='word.split')
                 next_text = ''.join([ pwp.text for pwp in next_pwps ])
                 nextWord = Word(text=next_text, id=current_id+1, line_number=self.line_number, transkription_positions=next_transkription_positions)
                 nextWord.faksimile_positions = self.faksimile_positions
                 all_positional_word_parts = all_positional_word_parts[:index]
         current_transkription_positions = TranskriptionPosition.CREATE_TRANSKRIPTION_POSITION_LIST_FROM_PWPS(all_positional_word_parts, debug_msg_string='word.split')
         current_text = ''.join([ pwp.text for pwp in all_positional_word_parts ])
         currentWord = Word(text=current_text, id=current_id, line_number=self.line_number, transkription_positions=current_transkription_positions)
         currentWord.faksimile_positions = self.faksimile_positions
         return previousWord, currentWord, nextWord
 
     def split_according_to_status(self, status, splits_are_parts=False):
         """Split a word according to its transkription_positions' text. 
 
             :return: a list of new word.Word
         """
         new_words = []
         if self.has_mixed_status(status):
             last_status = None
             transkription_positions = []
             for transkription_position in self.transkription_positions:
                 if transkription_position.__dict__[status] != last_status\
                     and len(transkription_positions) > 0:
                         new_words.append(\
                                 self._create_new_word(transkription_positions, status, new_id=self.id+len(new_words)))
                         transkription_positions = []
                 transkription_positions.append(transkription_position)
                 last_status = transkription_position.__dict__[status]
             if len(transkription_positions) > 0:
                 new_words.append(\
                     self._create_new_word(transkription_positions, status, new_id=self.id+len(new_words)))
         if splits_are_parts:
             self.word_parts += new_words
             if len(self.word_parts) > 0:
                 self.transkription_positions = []
         return new_words
 
     def undo_partitioning(self):
         """Undo partitioning.
         """
         if len(self.word_parts) > 0:
             for word_part in self.word_parts:
                 word_part.undo_partitioning()
                 if self.text != ''.join([ tp.get_text() for tp in self.transkription_positions ]):
                     self.transkription_positions += word_part.transkription_positions
         self.earlier_version = None
         self.edited_text = None
         self.word_box = None
         self.word_parts = []
         self.corrections = []
         self.earlier_versions = []
         self.box_paths = []
 
     def _create_new_word(self, transkription_positions, status, new_id=0):
         """Create a new word from self and transkription_positions.
         """
         newWord = Word(id=new_id, transkription_positions=transkription_positions)
         for key in self.COPY_PROPERTY_KEY:
             if key != status and key in self.__dict__.keys():
                 newWord.__dict__[key] = self.__dict__[key] 
         if status in self.APPEND_PROPERTY2LIST_SOURCE_TARGET_KEYS.keys():
             newWord.__dict__[self.APPEND_PROPERTY2LIST_SOURCE_TARGET_KEYS[status]].append(transkription_positions[0].__dict__[status])
         else:
             newWord.__dict__[status] = transkription_positions[0].__dict__[status]
         return newWord
 
     def _get_parts_with_property_key(self, property_key):
         """Return a list of word_parts with property == property_key.
         """
         word_parts = []
         for word_part in self.word_parts:
             if property_key in word_part.__dict__.keys():
                 word_parts.append(word_part)
             else:
                 word_parts += word_part._get_parts_with_property_key(property_key)
         return word_parts
 
     def _get_partial_word_over_box(self):
         """Partition a word according to its transkription_positions' has_box 
             ->split word and add partial words as its parts.
 
             :return: word over box or self
         """
         word_over_box = None
         if self.has_mixed_status('has_box'):
             transkription_positions = []
             last_word_box = None
             for transkription_position in self.transkription_positions:
                 if transkription_position.has_box != last_word_box\
                     and len(transkription_positions) > 0:
                         newWord = Word(id=len(self.word_parts), line_number=self.line_number,\
                                 transkription_positions=transkription_positions, deleted=self.deleted, writing_process_id=self.writing_process_id)
                         self.word_parts.append(newWord)
                         if last_word_box is not None:
                             word_over_box = newWord
                             word_over_box.word_box = last_word_box
                         transkription_positions = []
                 transkription_positions.append(transkription_position)
                 last_word_box = transkription_position.has_box
             if len(transkription_positions) > 0:
                 newWord = Word(id=len(self.word_parts), line_number=self.line_number,\
                     transkription_positions=transkription_positions, deleted=self.deleted, writing_process_id=self.writing_process_id)
                 self.word_parts.append(newWord)
                 if last_word_box is not None:
                     word_over_box = newWord
                     word_over_box.word_box = last_word_box
             self.transkription_positions = []
         elif len(self.word_parts) > 0:
             #self.word_parts, word_over_box = execute_function_on_parts(self.word_parts, inspect.currentframe().f_code.co_name) #'get_partial_word_over_box')
             for word_part in self.word_parts:
                 if word_over_box is None:
                     word_over_box = word_part._get_partial_word_over_box()
                 else:
                     break
         elif len([ tp for tp in self.transkription_positions if tp.has_box is not None]) == 1:
             word_over_box = self
             word_over_box.word_box = [ tp for tp in self.transkription_positions if tp.has_box is not None][0].has_box
         return word_over_box
 
     def _set_box_to_transkription_position(self, box_path, word_path, transkription_position, new_transkription_positions_dictionary, tr_xmin):
         """Set box_path to transkription_position that is contained by box_path.
         Create new transkription_positions by splitting old ones if necessaryand add them to new_transkription_positions_dictionary.
         """
         if box_path.contains_path(word_path):
             transkription_position.has_box = box_path 
         elif box_path.contains_start_of_path(word_path):
             split_position = box_path.path.bbox()[1] - tr_xmin
             new_tps = transkription_position.split(split_position)
             if len(new_tps) == 2:
                 new_tps[0].has_box = box_path
                 new_transkription_positions_dictionary.update({ transkription_position: new_tps })
             else:
                 transkription_position.has_box = box_path 
         elif box_path.contains_end_of_path(word_path):
             split_position = box_path.path.bbox()[0] - tr_xmin
             new_tps = transkription_position.split(split_position)
             if len(new_tps) == 2:
                 new_tps[1].has_box = box_path
                 new_transkription_positions_dictionary.update({ transkription_position: new_tps })
             else:
                 transkription_position.has_box = box_path 
         else: # box_path in the middle of word_pathz
             split_position1 = box_path.path.bbox()[0] - tr_xmin
             split_position2 = box_path.path.bbox()[1] - tr_xmin
             new_tps = transkription_position.split(split_position1, split_position2)
             if len(new_tps) >= 2:
                 new_tps[1].has_box = box_path
                 new_transkription_positions_dictionary.update({ transkription_position: new_tps })
             else:
                 transkription_position.has_box = box_path
 
 def do_paths_intersect_saveMode(mypath1, mypath2):
     """Returns true if paths intersect, false if not or if there was an exception.
     """
     try:
         return mypath1.path.intersect(mypath2.path, justonemode=True)\
                 or mypath1.is_partially_contained_by(mypath2)
     except AssertionError:
         return False
 
Index: svgscripts/datatypes/page.py
===================================================================
--- svgscripts/datatypes/page.py	(revision 110)
+++ svgscripts/datatypes/page.py	(revision 111)
@@ -1,430 +1,432 @@
 #!/usr/bin/env python3
 # -*- coding: utf-8 -*-
 
 """   This class can be used to represent a page.
 """
 #    Copyright (C) University of Basel 2019  {{{1
 #
 #    This program is free software: you can redistribute it and/or modify
 #    it under the terms of the GNU General Public License as published by
 #    the Free Software Foundation, either version 3 of the License, or
 #    (at your option) any later version.
 #
 #    This program is distributed in the hope that it will be useful,
 #    but WITHOUT ANY WARRANTY; without even the implied warranty of
 #    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 #    GNU General Public License for more details.
 #
 #    You should have received a copy of the GNU General Public License
 #    along with this program.  If not, see <https://www.gnu.org/licenses/> 1}}}
 
 __author__ = "Christian Steiner"
 __maintainer__ = __author__
 __copyright__ = 'University of Basel'
 __email__ = "christian.steiner@unibas.ch"
 __status__ = "Development"
 __license__ = "GPL v3"
 __version__ = "0.0.1"
 
 from lxml import etree as ET
 from os.path import isfile, basename
 from progress.bar import Bar
 from svgpathtools import svg2paths2, svg_to_paths
 from svgpathtools.parser import parse_path
 import re
 import sys
 import warnings
 
 from .box import Box
 from .color import Color
 from .image import Image, SVGImage
 from .faksimile_image import FaksimileImage
 from .faksimile_position import FaksimilePosition
 from .imprint import Imprint
 from .lineNumber import LineNumber
 from .line import Line
 from .mark_foreign_hands import MarkForeignHands
 from .matrix import Matrix
 from .path import Path
 from .positional_word_part import PositionalWordPart
 from .super_page import SuperPage
 from .style import Style
 from .text_connection_mark import TextConnectionMark
 from .text_field import TextField
 from .transkriptionField import TranskriptionField
 from .writing_process import WritingProcess
 from .word import Word
 from .word_deletion_path import WordDeletionPath
 from .word_insertion_mark import WordInsertionMark
 
 sys.path.append('py2ttl')
 from class_spec import SemanticClass
 
 sys.path.append('shared_util')
 from main_util import extract_paths_on_tf, get_paths_near_position
 
 FILE_TYPE_SVG_WORD_POSITION = SuperPage.FILE_TYPE_SVG_WORD_POSITION 
 FILE_TYPE_XML_MANUSCRIPT = SuperPage.FILE_TYPE_XML_MANUSCRIPT
 STATUS_MERGED_OK = SuperPage.STATUS_MERGED_OK
 STATUS_POSTMERGED_OK = SuperPage.STATUS_POSTMERGED_OK
 
 
 class Page(SemanticClass,SuperPage):
     """
     This class represents a page.
 
     Args:
         xml_source_file (str): name of the xml file to be instantiated.
         faksimile_image: FaksimileImage.
         faksimile_svgFile: svg file containing information about word positions.
 
     """
     UNITTESTING = False
 
     def __init__(self, xml_source_file=None, faksimile_image=None, faksimile_svgFile=None, add_paths_near_words=False, warn=False, number=None):
         if xml_source_file is not None:
             super(Page,self).__init__(xml_source_file)
             self.update_property_dictionary('faksimile_image', faksimile_image)
             self.update_property_dictionary('faksimile_svgFile', faksimile_svgFile)
             self.init_all_properties()
             self.add_style(style_node=self.page_tree.getroot().find('.//style'))
             self.faksimile_text_field = None
             self.svg_text_field = None
             self.init_node_objects() 
             self.warn = warn
             self.add_deletion_paths_to_words(add_paths_near_words)
         else:
             self.page_tree = None
             self.number = number
 
     def add_deletion_paths_to_words(self, add_paths_near_words=False):
         """Add deletion paths to words.
         """
         words = [ word for word in self.words if (len(word.word_parts) == 0 and word.deleted and len(word.deletion_paths) == 0)\
                                                  or 'add_paths_near_words' in word.process_flags ]
         words += [ word for word in self.words\
                                           if len(word.word_parts) > 0 and True in\
                                           [ (wp.deleted and len(wp.deletion_paths) == 0) for wp in word.word_parts ]]
         if len(words) > 0 and ((self.svg_file is not None and isfile(self.svg_file))\
          or (self.source is not None and isfile(self.source))):
             svg_file = self.svg_file if self.svg_file is not None else self.source
             transkription_field = TranskriptionField(svg_file)
             tr_xmin = transkription_field.xmin if (self.svg_image is None or self.svg_image.text_field is None) else 0
             tr_ymin = transkription_field.ymin if (self.svg_image is None or self.svg_image.text_field is None) else 0
             word_deletion_paths = self.word_deletion_paths
             index = 0
             dp_updated = False
             while index < len(words):
                 word = words[index]
                 word.add_deletion_paths(word_deletion_paths, tr_xmin=tr_xmin, tr_ymin=tr_ymin)
                 if len(word.deletion_paths) > 0 or True in [ len(w.deletion_paths) > 0 for w in word.word_parts ]:
                     deletion_paths = word.deletion_paths
                     for wp in word.word_parts: deletion_paths += wp.deletion_paths
                     for deletion_path in deletion_paths:
                         if deletion_path not in self.word_deletion_paths:
                             self.word_deletion_paths.append(deletion_path)
                 elif not dp_updated:
                     word_deletion_paths = extract_paths_on_tf(self)
                     dp_updated = True
                     index -= 1
                 if add_paths_near_words\
                     and ('add_paths_near_words' in word.process_flags\
                         or ((word.deleted and len(word.deletion_paths) == 0)\
                             or True in [ (w.deleted and len(w.deletion_paths) == 0) for w in word.word_parts ])):
                     if not dp_updated\
                         and 'add_paths_near_words' in word.process_flags:
                         word_deletion_paths = extract_paths_on_tf(self)
                         dp_updated = True
                     transform = None
                     tp = None
                     target_word = word
                     paths_near_word = []
                     if word.deleted and len(word.transkription_positions) > 0:
                         transform = word.transkription_positions[0].transform
                         for tp in word.transkription_positions:
                             word.deletion_paths_near_word += get_paths_near_position(tp, word_deletion_paths)
                     elif len(word.word_parts) > 0:
                         for wp in word.word_parts:
                             if wp.deleted and len(wp.transkription_positions) > 0:
                                 target_word = wp
                                 for tp in wp.transkription_positions:
                                     wp.deletion_paths_near_word = get_paths_near_position(tp, word_deletion_paths)
                     if self.warn and (word.deleted and len(word.deletion_paths) == 0):
                         warnings.warn(\
                         f'WARNING: {self.title} {self.number}: {word.id} on {word.line_number}, {word.text} has no deletion paths! {target_word.deletion_paths_near_word}, {transform}')
                 index += 1
 
     @classmethod
     def create_cls(cls, xml_source_file=None, create_dummy_page=False, page_node=None):
         """Create a Page.
         """
         if not create_dummy_page:
             return cls(xml_source_file)
         else:
             m = re.match(r'(.*)(page[0]*)(.*)(\.xml)', xml_source_file)
             if m is not None and len(m.groups()) > 3:
                 number = m.group(3)
             else:
                 number = basename(xml_source_file).replace('.xml','')
             return cls(number=number)
 
     @classmethod
     def get_pages_from_xml_file(cls, xml_file, status_contains='', status_not_contain='', word_selection_function=None):
         """Returns a list of Page instantiating a xml_file of type FILE_TYPE_SVG_WORD_POSITION 
                             or xml_files contained in xml_file of type FILE_TYPE_XML_MANUSCRIPT.
                             [optional: instantiation depends on the fulfilment of a status_contains 
                                     and/or on the selection of some words by a word_selection_function].
         """
         source_tree = ET.parse(xml_file)
         if source_tree.getroot().find('metadata/type').text == cls.FILE_TYPE_SVG_WORD_POSITION:
             page = cls(xml_file)    
             if word_selection_function is None or len(word_selection_function(page.words)) > 0:
                 return [ page ]
             else:
                 return []
         elif source_tree.getroot().find('metadata/type').text == FILE_TYPE_XML_MANUSCRIPT:
             pages = []
             xpath = '//page/@output'
             if status_contains != '' and status_not_contain != '':
                 xpath = '//page[contains(@status, "{0}") and not(contains(@status, "{1}"))]/@output'.format(status_contains, status_not_contain)
             elif status_contains != '':
                 xpath = '//page[contains(@status, "{0}")]/@output'.format(status_contains)
             elif status_not_contain != '':
                 xpath = '//page[not(contains(@status, "{0}"))]/@output'.format(status_not_contain)
             for xml_source_file in source_tree.xpath(xpath):
                 if isfile(xml_source_file):
                     pages += cls.get_pages_from_xml_file(xml_source_file, word_selection_function=word_selection_function)
             return pages
         else:
             return []
 
     @classmethod
     def get_semantic_dictionary(cls):
         """ Creates a semantic dictionary as specified by SemanticClass.
         """
         dictionary = {}
         class_dict = cls.get_class_dictionary()
         properties = { 'number': { 'class': str, 'cardinality': 1}}
         properties.update(cls.create_semantic_property_dictionary('faksimile_image', FaksimileImage, subPropertyOf=cls.HAS_IMAGE))
         properties.update(cls.create_semantic_property_dictionary('faksimile_text_field', TextField,\
                 name='pageIsOnFaksimileTextField', label='page is on faksimile text field',\
                 comment='Relates a page to the text field on a svg image.', subPropertyOf=cls.PAGE_IS_ON_TEXTFIELD))
         properties.update(cls.create_semantic_property_dictionary('orientation', str))
         properties.update(cls.create_semantic_property_dictionary('svg_image', SVGImage, subPropertyOf=cls.HAS_IMAGE))
         properties.update(cls.create_semantic_property_dictionary('svg_text_field', TextField,\
                 name='pageIsOnSVGTextField', label='page is on svg text field',\
                 comment='Relates a page to the text field on a faksimile image.', subPropertyOf=cls.PAGE_IS_ON_TEXTFIELD))
-        for key in [ 'imprints', 'lines', 'mark_foreign_hands', 'words', 'word_deletion_paths', 'word_insertion_marks']:
+        for key in [ 'lines','imprints',  'mark_foreign_hands', 'words', 'word_deletion_paths', 'word_insertion_marks']:
             properties.update(cls.create_semantic_property_dictionary(key, list))
         dictionary.update({cls.CLASS_KEY: class_dict})
         dictionary.update({cls.PROPERTIES_KEY: properties})
         return cls.return_dictionary_after_updating_super_classes(dictionary)
 
     def get_word_deletion_path(self, path=None, d_attribute=None) ->WordDeletionPath:
         """Return a word deletion path that belongs to page.
         """
         if path is None and d_attribute is None:
             raise Exception('ERROR: get_word_deletion_path needs a path or a d_attribute!')
         if d_attribute is None:
             d_attribute = path.d_attribute
         page_paths = [ dpath for dpath in self.word_deletion_paths if dpath.d_attribute == d_attribute ]
         if len(page_paths) > 0:
             return page_paths[0]
         else:
             dpath = WordDeletionPath.create_cls(self, path=path, d_attribute=d_attribute)
             if dpath is not None:
                 dpath.id = len(self.word_deletion_paths)
                 self.word_deletion_paths.append(dpath)
                 dpath.attach_object_to_tree(self.page_tree)
             return dpath
 
     def init_node_objects(self):
         """Initialize all node objects.
         """
         self.word_insertion_marks = [ WordInsertionMark(wim_node=wim_node) for wim_node in self.page_tree.getroot().xpath('//' + WordInsertionMark.XML_TAG) ]
         self.words = [ Word.create_cls(word_node) for word_node in self.page_tree.getroot().xpath('./word') ]
         self.mark_foreign_hands = [ MarkForeignHands.create_cls(node) for node in self.page_tree.getroot().xpath('//' + MarkForeignHands.XML_TAG) ]
-        self.text_connection_marks = [ TextConnectionMark.create_cls(node) for node in self.page_tree.getroot().xpath('//' + TextConnectionMark.XML_TAG) ]
+        #self.text_connection_marks = [ TextConnectionMark.create_cls(node) for node in self.page_tree.getroot().xpath('//' + TextConnectionMark.XML_TAG) ]
+        self.words += [ TextConnectionMark.instantiate_as_word(node, id=index+len(self.words))\
+                        for index, node in enumerate(self.page_tree.getroot().xpath('//' + TextConnectionMark.XML_TAG)) ]
         self.line_numbers = [ LineNumber(xml_text_node=line_number_node) for line_number_node in self.page_tree.getroot().xpath('//' + LineNumber.XML_TAG) ]
         self.lines = [ Line.create_cls_from_node(node=line_number_node) for line_number_node in self.page_tree.getroot().xpath('//' + LineNumber.XML_TAG) ]
         self.imprints = [ Imprint.create_cls_from_node(imprint_node, self.lines) for imprint_node in self.page_tree.getroot().xpath('//' + Imprint.XML_TAG) ]
         self.writing_processes = [ WritingProcess.create_writing_process_from_xml(node, self.words) for node in self.page_tree.xpath('//' + WritingProcess.XML_TAG) ]
         self.word_deletion_paths = [ WordDeletionPath.create_cls(self, node=node) for node in self.page_tree.xpath('./' + WordDeletionPath.XML_TAG) ]
         if self.faksimile_image is not None and self.faksimile_image.text_field is not None:
             self.faksimile_text_field = self.faksimile_image.text_field
         if self.svg_image is not None and self.svg_image.text_field is not None:
             self.svg_text_field = self.svg_image.text_field
         for simple_word in self.words + self.mark_foreign_hands + self.text_connection_marks:
             simple_word.init_word(self)
         for wim in self.word_insertion_marks:
             if wim.line_number > -1:
                 wim.line = [ line for line in self.lines if line.id == wim.line_number ][0]
             
     def update_and_attach_words2tree(self, update_function_on_word=None, include_special_words_of_type=[]):
         """Update word ids and attach them to page.page_tree.
         """
         if not self.is_locked():
             update_function_on_word = [ update_function_on_word ]\
                     if type(update_function_on_word) != list\
                     else update_function_on_word
             for node in self.page_tree.xpath('.//word|.//' + MarkForeignHands.XML_TAG + '|.//' + TextConnectionMark.XML_TAG): 
                 node.getparent().remove(node)
             for index, word in enumerate(self.words):
                 word.id = index
                 for func in update_function_on_word:
                     if callable(func):
                         func(word)
                 word.attach_word_to_tree(self.page_tree)
             for index, mark_foreign_hands in enumerate(self.mark_foreign_hands):
                 mark_foreign_hands.id = index
                 if MarkForeignHands in include_special_words_of_type:
                     for func in update_function_on_word:
                         if callable(update_function_on_word):
                             func(mark_foreign_hands)
                 mark_foreign_hands.attach_word_to_tree(self.page_tree)
             for index, text_connection_mark in enumerate(self.text_connection_marks):
                 text_connection_mark.id = index
                 if TextConnectionMark in include_special_words_of_type:
                     for func in update_function_on_word:
                         if callable(update_function_on_word):
                             func(text_connection_mark)
                 text_connection_mark.attach_word_to_tree(self.page_tree)
         else:
             print('locked')
 
     def update_data_source(self, faksimile_svgFile=None, xml_correction_file=None):
         """Update the data source of page.
         """
         if faksimile_svgFile is not None:
             self.faksimile_svgFile = faksimile_svgFile
         data_node = self.page_tree.xpath('.//data-source')[0]\
                 if len(self.page_tree.xpath('.//data-source')) > 0\
                 else ET.SubElement(self.page_tree.getroot(), 'data-source')
         data_node.set('file', self.faksimile_svgFile)
         if xml_correction_file is not None:
             data_node.set('xml-corrected-words', xml_correction_file)
 
     def update_line_number_area(self, transkription_field, svg_tree=None, set_to_text_field_zero=True):
         """Determines the width of the area where the line numbers are written in the page.source file.
         """
         THRESHOLD = 0.4
         if svg_tree is None:
             svg_tree = ET.parse(self.source)
         if len(self.line_numbers) > 1:
             line_number = self.line_numbers[9]\
                 if transkription_field.is_page_verso() and len(self.line_numbers) > 8\
                 else self.line_numbers[1]
             ln_nodes = [ item for item in svg_tree.iterfind('//text', svg_tree.getroot().nsmap)\
                 if Matrix.IS_NEARX_TRANSKRIPTION_FIELD(item.get('transform'), transkription_field)\
                 and LineNumber.IS_A_LINE_NUMBER(item)\
                 and LineNumber(raw_text_node=item).id  == line_number.id ]
             if len(ln_nodes) > 0:
                 matrix = Matrix(transform_matrix_string=ln_nodes[0].get('transform'))
                 if transkription_field.is_page_verso():
                     transkription_field.add_line_number_area_width(matrix.getX())
                 elif self.svg_file is not None and isfile(self.svg_file):
                     svg_path_tree = ET.parse(self.svg_file)
                     namespaces = { k if k is not None else 'ns': v for k, v in svg_path_tree.getroot().nsmap.items() }
                     svg_x = matrix.getX()
                     svg_y = self.line_numbers[1].bottom + transkription_field.ymin\
                             if set_to_text_field_zero\
                             else self.line_numbers[1].bottom
                     use_nodes = svg_path_tree.xpath('//ns:use[@x>="{0}" and @x<="{1}" and @y>="{2}" and @y<="{3}"]'\
                         .format(svg_x-THRESHOLD, svg_x+THRESHOLD,svg_y-THRESHOLD, svg_y+THRESHOLD), namespaces=namespaces)
                     if len(use_nodes) > 0:
                         symbol_id = use_nodes[0].get('{%s}href' % namespaces['xlink']).replace('#', '') 
                         d_strings = use_nodes[0].xpath('//ns:symbol[@id="{0}"]/ns:path/@d'.format(symbol_id), namespaces=namespaces)
                         if len(d_strings) > 0 and d_strings[0] != '':
                             path = parse_path(d_strings[0])
                             xmin, xmax, ymin, ymax = path.bbox()
                             width = xmax - xmin
                             transkription_field.add_line_number_area_width(matrix.getX() + width)
 
     def update_page_type(self, transkription_field=None):
         """Adds a source to page and attaches it to page_tree.
         """
         if self.number.endswith('r')\
             or self.number.endswith('v'):
             self.page_type = Page.PAGE_VERSO\
                 if self.number.endswith('v')\
                 else Page.PAGE_RECTO
         else:
             if transkription_field is None:
                 if self.source is None or not isfile(self.source):
                     raise FileNotFoundError('Page does not have a source!')
                 transkription_field = TranskriptionField(self.source, multipage_index=self.multipage_index)
             self.page_type = Page.PAGE_VERSO\
                    if transkription_field.is_page_verso()\
                    else Page.PAGE_RECTO
         self.page_tree.getroot().set('pageType', self.page_type)
 
     def update_styles(self, words=None, manuscript=None, add_to_parents=False, partition_according_to_styles=False, create_css=False):
         """Update styles of words and add them to their transkription_positions.
             Args:
                 add_to_parents:                 Add styles also to word (and if not None to manuscript).
                 partition_according_to_styles:  Partition word if its transkription_positions have different styles.
         """
         style_dictionary = {}
         if words is None:
             words = self.words
         for word in words:
             if len(word.word_parts) > 0:
                 self.update_styles(words=word.word_parts, manuscript=manuscript, create_css=create_css,\
                         add_to_parents=add_to_parents, partition_according_to_styles=partition_according_to_styles)
             for transkription_position in word.transkription_positions:
                 if len(transkription_position.positional_word_parts) > 0:
                     style_class = transkription_position.positional_word_parts[0].style_class
                     writing_process_id = -1
                     for font_key in [ font_key for font_key in style_class.split(' ') if font_key in self.fontsizekey2stage_mapping.keys() ]:
                         writing_process_id = self.fontsizekey2stage_mapping.get(font_key)
                     style_class_key = (Style.remove_irrelevant_style_keys(style_class, self, extended_styles=create_css), writing_process_id)
                     if create_css:
                         if style_dictionary.get((style_class_key, word.deleted)) is None:
                             color = None
                             if len(word.deletion_paths) > 0:
                                 if word.deletion_paths[0].style_class is not None\
                                 and word.deletion_paths[0].style_class != ''\
                                 and self.style_dict.get(word.deletion_paths[0].style_class) is not None:
                                     color = Color.create_cls_from_style_object(self.style_dict.get(word.deletion_paths[0].style_class))
                                 else:
                                     color = Color()
                             style_dictionary[(style_class_key, word.deleted)] = Style.create_cls(self, style_class_key[0], manuscript=manuscript,\
                                     create_css=create_css, deletion_color=color, writing_process_id=style_class_key[1] )
                         transkription_position.style = style_dictionary[(style_class_key, word.deleted)]
                         #print(style_dictionary[(style_class_key, word.deleted)])
                     else:
                         if style_dictionary.get(style_class_key) is None:
                             style_dictionary[style_class_key] = Style.create_cls(self, style_class_key[0], manuscript=manuscript, create_css=create_css)
                             style_dictionary[style_class_key].writing_process_id = style_class_key[1]
                         transkription_position.style = style_dictionary[style_class_key]
                     if add_to_parents and transkription_position.style not in word.styles:
                         word.styles.append(transkription_position.style)
             if partition_according_to_styles:
                 word.split_according_to_status('style', splits_are_parts=True)
         if manuscript is not None\
                 and add_to_parents:
             manuscript.update_styles(*style_dictionary.values())
 
     def __eq__(self, other):
         """Returns true if self is qualitatively identical to other.
         """
         if other is None:
             return False
         if self.page_tree is None and other.page_tree is None:
             return self.number == other.number
         if self.page_tree is None or other.page_tree is None:
             return False
         return self.page_tree.docinfo.URL == other.page_tree.docinfo.URL
 
     def __hash__(self):
         """Return a hash value for self.
         """
         try:
             if self.page_tree is None:
                 return hash(self.number)
         except AttributeError:
             print(self)
             return hash(self.number)
         return hash(self.page_tree.docinfo.URL)
 
 
Index: svgscripts/datatypes/faksimile.py
===================================================================
--- svgscripts/datatypes/faksimile.py	(revision 110)
+++ svgscripts/datatypes/faksimile.py	(revision 111)
@@ -1,204 +1,205 @@
 #!/usr/bin/env python3
 # -*- coding: utf-8 -*-
 
 """   This class can be used to represent a faksimile page.
 """
 #    Copyright (C) University of Basel 2019  {{{1
 #
 #    This program is free software: you can redistribute it and/or modify
 #    it under the terms of the GNU General Public License as published by
 #    the Free Software Foundation, either version 3 of the License, or
 #    (at your option) any later version.
 #
 #    This program is distributed in the hope that it will be useful,
 #    but WITHOUT ANY WARRANTY; without even the implied warranty of
 #    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 #    GNU General Public License for more details.
 #
 #    You should have received a copy of the GNU General Public License
 #    along with this program.  If not, see <https://www.gnu.org/licenses/> 1}}}
 
 __author__ = "Christian Steiner"
 __maintainer__ = __author__
 __copyright__ = 'University of Basel'
 __email__ = "christian.steiner@unibas.ch"
 __status__ = "Development"
 __license__ = "GPL v3"
 __version__ = "0.0.1"
 
 import re
 from lxml import etree as ET
 from os import path
 from os.path import isdir, isfile, sep, basename
 from svgpathtools.parser import parse_path
 
 
 from .faksimile_image import FaksimileImage
 from .matrix import Matrix
 from .text_field import TextField
 from .word_position import WordPosition
 
 class FaksimilePage:
     """
     This class represents a faksimile page.
 
     Args:
         xml_target_file (str): name of the xml file to which page info will be written.
         xml_source_file (str): name of the xml file that will be instantiated.
     """
     XML_TAG = 'faksimile-page'
 
     def __init__(self, xml_source_file=None, xml_target_file=None, title=None, page_number=None, svg_source_file=None, faksimile_image=None, text_field=None):
         xml_file = xml_source_file if xml_source_file is not None else xml_target_file
         self.title = title
         self.page_number = page_number
         self.xml_file = xml_file
         if xml_file is not None and isfile(xml_file):
             parser = ET.XMLParser(remove_blank_text=True)
             self.page_tree = ET.parse(xml_file, parser)
             self.title = self.page_tree.getroot().get('title')
             self.page_number = self.page_tree.getroot().get('page-number')
             self.width = float(self.page_tree.getroot().get('width')) if bool(self.page_tree.getroot().get('width')) else 0.0
             self.height = float(self.page_tree.getroot().get('height')) if bool(self.page_tree.getroot().get('height')) else 0.0
         else:
             self.page_tree = ET.ElementTree(ET.Element(self.XML_TAG))
             if title is not None:
                 self.page_tree.getroot().set('title', title)
             if page_number is not None:
                 self.page_tree.getroot().set('page-number', str(page_number))
         if xml_target_file is not None:
             self.remove_tags_from_page_tree([WordPosition.FAKSIMILE])
         if svg_source_file is not None:
             self.page_tree.getroot().set('svg-source-file', svg_source_file)
         if faksimile_image is not None:
             faksimile_image.attach_object_to_tree(self.page_tree)
         if text_field is not None:
             text_field.attach_object_to_tree(self.page_tree)
         self.svg_source_file = self.page_tree.getroot().get('svg-source-file')
         self.faksimile_image = FaksimileImage(node=self.page_tree.getroot().find('.//' + FaksimileImage.XML_TAG))\
                 if len(self.page_tree.getroot().findall('.//' + FaksimileImage.XML_TAG)) > 0 else None
         self.text_field = TextField(node=self.page_tree.getroot().find('.//' + TextField.XML_TAG))\
                 if len(self.page_tree.getroot().findall('.//' + TextField.XML_TAG)) > 0 else None
         self.word_positions = [ WordPosition(node=node) for node in self.page_tree.getroot().findall('.//' + WordPosition.FAKSIMILE) ]\
                 if len(self.page_tree.getroot().findall('.//' + WordPosition.FAKSIMILE)) > 0 else []
 
     def append_word_position(self, word_position):
         """Appends word_position to word_positions and attaches it to page_tree.
         """
         self.word_positions.append(word_position)
         word_position.attach_object_to_tree(self.page_tree)
 
     @classmethod
     def get_faksimile_pages(cls, svg_file, page_number='') -> list:
         """Creates and returns text fields contained in a svg_file as a list.
         """
         svg_tree = ET.parse(svg_file)
         namespaces = { k if k is not None else 'ns': v for k, v in svg_tree.getroot().nsmap.items() }
         return cls.GET_FAKSIMILEPAGES(svg_tree, namespaces=namespaces, page_number=page_number)
 
     @staticmethod
     def GET_FAKSIMILEPAGES(svg_tree, namespaces=None, page_number='') -> list:
        """Creates and returns text fields contained in a svg_tree as a list.
 
        """
        THRESHOLD_X = 10
        if namespaces is None:
             namespaces = { k if k is not None else 'ns': v for k, v in svg_tree.getroot().nsmap.items() }
        source_file_name = svg_tree.docinfo.URL
        image = FaksimileImage.CREATE_IMAGE(svg_tree.getroot().find('.//image', svg_tree.getroot().nsmap), source_file_name)
        xml_dir = '.{}xml'.format(sep)
        faksimile_pages = list()
        title_string = re.sub(r'[,_][0-9]+.*\.svg', '', basename(source_file_name))
        if re.match(r'.*-\d+[a-z]$', title_string):
             title_string = re.sub(r'-\d+[a-z]$', '', title_string)
        title = title_string.replace('-', ' ')
        rect_list = [ rect for rect in svg_tree.getroot().findall('.//rect', svg_tree.getroot().nsmap)\
                if rect.get('id', svg_tree.getroot().nsmap).startswith(title_string)\
                and rect.get('id', svg_tree.getroot().nsmap).endswith(str(page_number)) ]
        for text_field_rect in rect_list:
            tf_x = float(text_field_rect.get('x', svg_tree.getroot().nsmap)) - image.x
            tf_y = float(text_field_rect.get('y', svg_tree.getroot().nsmap)) - image.y
            tf_width = float(text_field_rect.get('width', svg_tree.getroot().nsmap))
            tf_height = float(text_field_rect.get('height', svg_tree.getroot().nsmap))
            tf_matrix = Matrix(transform_matrix_string=text_field_rect.get('transform'))\
                if bool(text_field_rect.get('transform'))\
                else None
            id = text_field_rect.get('id', svg_tree.getroot().nsmap)
            target_file_name = xml_dir + sep + id + '.xml' if isdir(xml_dir) else id + '.xml'
            page_number = re.sub(r'.*[,_]', '', id)
            if page_number.startswith('0'):
                page_number = page_number.lstrip('0')
            text_field = TextField(id=id, width=tf_width, height=tf_height, x=tf_x, y=tf_y, matrix=tf_matrix)
            faksimile_page = FaksimilePage(xml_target_file=target_file_name, svg_source_file=source_file_name,\
                    title=title, page_number=page_number, faksimile_image=image, text_field=text_field)
            x_min = text_field.xmin + image.x
            y_min = text_field.ymin + image.y
            #rect_titles = svg_tree.getroot().xpath('//ns:rect[@x>"{0}" and @x<"{1}" and @y>"{2}" and @y<"{3}" and @id!="{4}"]/ns:title'.format(\
            #        x_min, text_field.xmax + image.x - THRESHOLD_X, y_min, text_field.ymax + image.y, text_field.id), namespaces=namespaces)
            rect_titles = get_paths_inside_rect(svg_tree, '//ns:rect/ns:title', x_min, text_field.xmax + image.x - THRESHOLD_X,\
                    y_min, text_field.ymax + image.y, text_field.id, namespaces=namespaces)
            rect_titles += get_paths_inside_rect(svg_tree, '//ns:path/ns:title', x_min, text_field.xmax + image.x - THRESHOLD_X,\
                    y_min, text_field.ymax + image.y, text_field.id, namespaces=namespaces)
            for rect_title in rect_titles:
                rect = rect_title.getparent()
                x, y, height, width = 0.0, 0.0, 0.0, 0.0
                if rect.tag.endswith('path'):
                     path = parse_path(rect.get('d'))
                     x, xmax, y, ymax = path.bbox()
                     width = xmax - x
                     height = ymax - y 
                else:
                    x = float(rect.get('x', svg_tree.getroot().nsmap))
                    y = float(rect.get('y', svg_tree.getroot().nsmap))
                    height = float(rect.get('height', svg_tree.getroot().nsmap))
                    width = width=float(rect.get('width', svg_tree.getroot().nsmap))
                matrix = None
                if bool(rect.get('transform')):
                    matrix = Matrix(transform_matrix_string=rect.get('transform'))
+               text = re.sub(r'(\s(?=[-;:.,?!’–])|(?<=[-;:.,?!’–])\s)', '', rect_title.text)
                faksimile_page.append_word_position(\
-                       WordPosition(id=rect.get('id', svg_tree.getroot().nsmap), text=rect_title.text, height=height,\
+                       WordPosition(id=rect.get('id', svg_tree.getroot().nsmap), text=text, height=height,\
                        width=width, x=x, y=y, matrix=matrix, tag=WordPosition.FAKSIMILE))
            faksimile_pages.append(faksimile_page)
        return faksimile_pages 
           
     def remove_tags_from_page_tree(self, list_of_tags_to_remove):
         """Removes the tags specified in the list from the target tree.
         """
         for xpath2remove in list_of_tags_to_remove:
             for node in self.page_tree.xpath('//' + xpath2remove):
                 node.getparent().remove(node)
 
 def get_paths_inside_rect(svg_tree, xpath, x_min, x_max, y_min, y_max, not_id, namespaces={}):
     """Returns a list of all paths selected by xpath that are inside x_min, x_max, y_min, y_max and do not have id == not_id.
     """
     paths = []
     if len(namespaces) == 0:
         namespaces = { k if k is not None else 'ns': v for k, v in svg_tree.getroot().nsmap.items() }
     for path_node in svg_tree.xpath(xpath, namespaces=namespaces):
         append_node = path_node
         if not path_node.tag.endswith('path') and not path_node.tag.endswith('rect'):
             path_node = path_node.getparent()
         x, xmax, y, ymax = -1, -1, -1, -1
         init_xy = False
         if path_node.tag.endswith('rect'):
             x = float(path_node.get('x')) if bool(path_node.get('x')) else -1
             y = float(path_node.get('y')) if bool(path_node.get('y')) else -1
             xmax = x + float(path_node.get('width')) if bool(path_node.get('width')) else -1
             ymax = y + float(path_node.get('height')) if bool(path_node.get('height')) else -1
             init_xy = True
         elif path_node.tag.endswith('path') and bool(path_node.get('d')) and path_node.get('d') != 0:
             path = parse_path(path_node.get('d'))
             x, xmax, y, ymax = path.bbox()
             init_xy = True
         if init_xy:
             if bool(path_node.get('transform')):
                 matrix = Matrix(transform_matrix_string=path_node.get('transform'))
                 x, xmax = matrix.get_new_x(x=x, y=y), matrix.get_new_x(x=xmax, y=ymax) 
                 y, ymax = matrix.get_new_y(x=x, y=y), matrix.get_new_y(x=xmax, y=ymax) 
             width = xmax - x
             height = ymax - y 
             if x > x_min and x < x_max\
                 and y > y_min and y < y_max\
                 and path_node.get('id') != not_id:
                 paths.append(append_node) 
     return paths
Index: svgscripts/datatypes/editor_comment.py
===================================================================
--- svgscripts/datatypes/editor_comment.py	(revision 110)
+++ svgscripts/datatypes/editor_comment.py	(revision 111)
@@ -1,81 +1,84 @@
 #!/usr/bin/env python3
 # -*- coding: utf-8 -*-
 
 """   This class can be used to represent a comment by the editors.
 """
 #    Copyright (C) University of Basel 2020  {{{1
 #
 #    This program is free software: you can redistribute it and/or modify
 #    it under the terms of the GNU General Public License as published by
 #    the Free Software Foundation, either version 3 of the License, or
 #    (at your option) any later version.
 #
 #    This program is distributed in the hope that it will be useful,
 #    but WITHOUT ANY WARRANTY; without even the implied warranty of
 #    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 #    GNU General Public License for more details.
 #
 #    You should have received a copy of the GNU General Public License
 #    along with this program.  If not, see <https://www.gnu.org/licenses/> 1}}}
 
 __author__ = "Christian Steiner"
 __maintainer__ = __author__
 __copyright__ = 'University of Basel'
 __email__ = "christian.steiner@unibas.ch"
 __status__ = "Development"
 __license__ = "GPL v3"
 __version__ = "0.0.1"
 
 from lxml import etree as ET
 import re
 import sys
 
 from .attachable_object import AttachableObject
+from .text import Text
 sys.path.append('py2ttl')
 from class_spec import SemanticClass
 from xml_conform_dictionary import XMLConformDictionary
 
 class EditorComment(AttachableObject,SemanticClass):
     """
     This class represents a comment by the editors.
     """
     XML_TAG = 'editor-comment'
 
-    def __init__(self, id=0, comment=None, is_uncertain=False):
+    def __init__(self, id=0, comment=None, is_uncertain=False, text=None):
         self.id = id
         self.comment = comment
         self.is_uncertain = is_uncertain
+        self.text = text
         
 
     def attach_object_to_tree(self, target_tree):
         """Attach object to tree.
         """
         obj_node = self.get_or_create_node_with_id(target_tree)
         obj_node.set('type', self.__class__.__name__)
         xml_conform_dictionary = XMLConformDictionary.create_cls_from_data_object(self)
         xml_conform_dictionary.attach_data_to_tree(obj_node)
 
     @classmethod
     def create_cls_from_node(cls, node):
         """Initialize a cls from node.
 
             [:return:] cls
         """
         target_cls = cls
         cls_type = node.get('type')
         target_classes = [ target for target in cls.__subclasses__() if target.__name__ == cls_type ]
         if len(target_classes) > 0:
             target_cls = target_classes[0]
         return XMLConformDictionary.CREATE_INSTANCEOF_CLASS_FROM_NODE(target_cls, node)
 
     @classmethod
     def get_semantic_dictionary(cls):
         """ Creates a semantic dictionary as specified by SemanticClass.
         """
         properties = {}
         properties.update(cls.create_semantic_property_dictionary('is_uncertain', bool,\
                 name='isUncertain', label='whether something is uncertain'))
         properties.update(cls.create_semantic_property_dictionary('comment', str))
+        properties.update(cls.create_semantic_property_dictionary('text', Text, name='commentHasText'))
         dictionary = { cls.CLASS_KEY: cls.get_class_dictionary(), cls.PROPERTIES_KEY: properties }
         return cls.return_dictionary_after_updating_super_classes(dictionary)
 
Index: svgscripts/datatypes/text_connection_mark.py
===================================================================
--- svgscripts/datatypes/text_connection_mark.py	(revision 110)
+++ svgscripts/datatypes/text_connection_mark.py	(revision 111)
@@ -1,101 +1,114 @@
 #!/usr/bin/env python3
 # -*- coding: utf-8 -*-
 
 """   This class can be used to represent a text connection mark ("Anschlusszeichen").
 """
 #    Copyright (C) University of Basel 2019  {{{1
 #
 #    This program is free software: you can redistribute it and/or modify
 #    it under the terms of the GNU General Public License as published by
 #    the Free Software Foundation, either version 3 of the License, or
 #    (at your option) any later version.
 #
 #    This program is distributed in the hope that it will be useful,
 #    but WITHOUT ANY WARRANTY; without even the implied warranty of
 #    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 #    GNU General Public License for more details.
 #
 #    You should have received a copy of the GNU General Public License
 #    along with this program.  If not, see <https://www.gnu.org/licenses/> 1}}}
 
 __author__ = "Christian Steiner"
 __maintainer__ = __author__
 __copyright__ = 'University of Basel'
 __email__ = "christian.steiner@unibas.ch"
 __status__ = "Development"
 __license__ = "GPL v3"
 __version__ = "0.0.1"
 
 from lxml import etree as ET
 import sys
 
+from .editor_comment import EditorComment
 from .footnotes import extract_footnotes
 from .reference import Reference
 from .special_word import SpecialWord
 from .transkriptionField import TranskriptionField
+from .word import Word
 
 
 class TextConnectionMark(SpecialWord):
     """
     This class represents a text connection mark.
 
     """
     XML_TAG = 'text-connection-mark'
     XML_SUB_TAG = Reference.XML_TAG
     SPECIAL_CHAR_LIST = [ '*', 'W' ]
     FOOTNOTE_CONTAINS = [ 'Anschlußzeichen', 'Hinzufügungszeichen' ]
 
     def __init__(self, id=0, line_number=-1, text='*', transkription_positions=[], faksimile_positions=[], text_source=None):
         super(TextConnectionMark, self).__init__(id=id, text=text, line_number=line_number, transkription_positions=transkription_positions,\
                 faksimile_positions=faksimile_positions)
         self.text_source = text_source
 
     def add_content(self, node):
         """Adds content to TextConnectionMark.
         """
         self.text_source = Reference.create_cls(node=node)
 
     def attach_word_to_tree(self, target_tree):
         """Attaches TextConnectionMark to tree target_tree.
         """
         node = super(TextConnectionMark,self).attach_word_to_tree(target_tree)
         if self.text_source is not None:
             self.text_source.attach_object_to_tree(node)
 
     @classmethod
+    def instantiate_as_word(cls, node, id=0) ->Word:
+        """Instantiate a TextConnectionMark as a Word.
+        """
+        tcm = cls.create_cls(node)
+        word = Word(id=id, text=tcm.text, line_number=tcm.line_number, transkription_positions=tcm.transkription_positions, faksimile_positions=tcm.faksimile_positions)
+        if tcm.text_source is not None:
+            comment = 'Hinzufügungszeichen zu ' + tcm.text_source.toString()
+            word.editor_comments.append(EditorComment(comment=comment, is_uncertain=tcm.text_source.is_uncertain))
+        return word
+
+    @classmethod
     def find_content_in_footnotes(cls, page, transkription_field=None, svg_tree=None, title='', page_number='', footnotes=None, skip_after=-1.0):
         """Find content for the TextConnectionMark.
         """
         if footnotes is None:
             if svg_tree is None:
                 svg_tree = ET.parse(page.source)
             if transkription_field is None:
                 transkription_field = TranskriptionField(page.source)
             footnotes = extract_footnotes(page, transkription_field=transkription_field, svg_tree=svg_tree, contains_strings=cls.FOOTNOTE_CONTAINS, skip_after=skip_after)
         else:
             footnotes = [ footnote for footnote in footnotes if True in [ contains_string in footnote.content for contains_string in cls.FOOTNOTE_CONTAINS ] ]
         for text_connection_mark in page.text_connection_marks:
             relevant_footnotes = [ footnote.content for footnote in footnotes if footnote.content.strip().startswith(str(text_connection_mark.line_number)+ ':') ] 
             if len(relevant_footnotes) > 0:
                 footnote_string = relevant_footnotes[0].strip()
                 line_number = int(footnote_string.split(':')[0]) 
                 is_uncertain = footnote_string.endswith('?')
                 reference_string = footnote_string.replace('?', '').split('zu ')[1].strip()
                 text_connection_mark.text_source = Reference.create_cls(is_uncertain=is_uncertain,\
                     reference_string=reference_string, title=page.title, page_number=page.number)
 
     @classmethod
     def get_semantic_dictionary(cls):
         """ Creates a semantic dictionary as specified by SemanticClass.
         """
         dictionary = super(TextConnectionMark,cls).get_semantic_dictionary()
         dictionary['properties'].update(cls.create_semantic_property_dictionary('text_source', Reference,\
                 cardinality=1, name='textConnectionMarkHasTextSource', label='text connection mark has a text source')) 
         return cls.return_dictionary_after_updating_super_classes(dictionary)
 
     @classmethod
     def get_special_char_list(cls):
         """Returns a list of the chars that define this special word.
         """
         return cls.SPECIAL_CHAR_LIST 
 
Index: svgscripts/datatypes/transkription_position.py
===================================================================
--- svgscripts/datatypes/transkription_position.py	(revision 110)
+++ svgscripts/datatypes/transkription_position.py	(revision 111)
@@ -1,197 +1,203 @@
 #!/usr/bin/env python3
 # -*- coding: utf-8 -*-
 
 """   This class can be used to represent a transkription word position.
 """
 #    Copyright (C) University of Basel 2019  {{{1
 #
 #    This program is free software: you can redistribute it and/or modify
 #    it under the terms of the GNU General Public License as published by
 #    the Free Software Foundation, either version 3 of the License, or
 #    (at your option) any later version.
 #
 #    This program is distributed in the hope that it will be useful,
 #    but WITHOUT ANY WARRANTY; without even the implied warranty of
 #    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 #    GNU General Public License for more details.
 #
 #    You should have received a copy of the GNU General Public License
 #    along with this program.  If not, see <https://www.gnu.org/licenses/> 1}}}
 
 __author__ = "Christian Steiner"
 __maintainer__ = __author__
 __copyright__ = 'University of Basel'
 __email__ = "christian.steiner@unibas.ch"
 __status__ = "Development"
 __license__ = "GPL v3"
 __version__ = "0.0.1"
 
 from lxml import etree as ET
 from os.path import isfile
 import sys
 
 from .debug_message import DebugMessage
 from .image import SVGImage
 from .positional_word_part import PositionalWordPart
 from .word_position import WordPosition 
 from .matrix import Matrix
 
 sys.path.append('py2ttl')
 from class_spec import SemanticClass
 
 
 class TranskriptionPosition(WordPosition):
     """
     This class represents the position of a word on the transkription as it is displayed by a svg image.
     @label position of a word on the topological transkription
 
     Args:
         id (int):                       word id
         matrix (datatypes.Matrix):      matrix containing information about transformation.
         height (float):                 height of word
         width (float):                  width of word
         x (float):                      x position of word
         y (float):                      y position of word
         positional_word_parts           a list of (datatypes.positional_word_part) PositionalWordPart
         debug_message                   a (datatypes.debug_message) DebugMessage
     """
     ADD2X = 0.15
     ADD2TOP = 1.0
     ADD2BOTTOM = 0.2
     HEIGHT_FACTOR = 1.1 # factor that multiplies biggest_font_size -> height
     XML_TAG = WordPosition.TRANSKRIPTION
 
     def __init__(self, id=0, node=None, height=0.0, width=0.0, x=0.0, y=0.0, matrix=None, positional_word_parts=None, debug_message=None):
         super(TranskriptionPosition, self).__init__(id=id, node=node, height=height, width=width, x=x, y=y, matrix=matrix, tag=WordPosition.TRANSKRIPTION)
         self.positional_word_parts = positional_word_parts if positional_word_parts is not None else []
         self.debug_message = debug_message
         self.deleted = False
         self._deletion_paths = []
         self.has_box = None
         self.style = None
         self.svg_image = None
         if node is not None:
             self.debug_message = DebugMessage(node=node.xpath('.//' + DebugMessage.XML_TAG)[0])\
                     if len(node.xpath('.//' + DebugMessage.XML_TAG)) > 0 else None
             self.positional_word_parts = [ PositionalWordPart(node=pwp_node) for pwp_node in node.xpath('.//' + PositionalWordPart.XML_TAG) ] 
         self.attachable_objects += self.positional_word_parts
         if self.debug_message is not None:
             self.attachable_objects.append(self.debug_message)
 
     def get_text(self):
         """Returns the concatenated text of all positional_word_parts.
         """
         return ''.join([pwp.text for pwp in self.positional_word_parts])
 
     def is_mergebale_with(self, other) -> bool:
         """Return whether self and other have same writing_process_id or style.
         """
         if self.writing_process_id == other.writing_process_id:
             return True
         if self.writing_process_id == -1 or other.writing_process_id == -1\
                 and (len(self.positional_word_parts) > 0 and len(other.positional_word_parts) > 0):
             return self.positional_word_parts[0].style_class == other.positional_word_parts[0].style_class 
         return False
 
     def split(self, split_position, second_split=-1) ->list:
         """Split a transkription_position in two at split_position.
 
             :return: a list of the new transkription_positions
         """
         transkription_positions = []
         left_pwp = [ pwp for pwp in self.positional_word_parts if pwp.left + pwp.width < split_position ]
         transkription_positions += TranskriptionPosition.CREATE_TRANSKRIPTION_POSITION_LIST_FROM_PWPS(left_pwp, transkription_position_id=self.id)
         if second_split == -1:
             right_pwp = [ pwp for pwp in self.positional_word_parts if pwp not in left_pwp ]
             next_id = int(self.id) + 1
             transkription_positions += TranskriptionPosition.CREATE_TRANSKRIPTION_POSITION_LIST_FROM_PWPS(right_pwp, transkription_position_id=str(next_id))
         else:
             middle_pwp = [ pwp for pwp in self.positional_word_parts if pwp not in left_pwp and pwp.left + pwp.width < second_split ]
             next_id = int(self.id) + 1
             transkription_positions += TranskriptionPosition.CREATE_TRANSKRIPTION_POSITION_LIST_FROM_PWPS(middle_pwp, transkription_position_id=str(next_id))
             right_pwp = [ pwp for pwp in self.positional_word_parts if pwp not in left_pwp and pwp not in middle_pwp ]
             next_id = int(self.id) + 1
             transkription_positions += TranskriptionPosition.CREATE_TRANSKRIPTION_POSITION_LIST_FROM_PWPS(right_pwp, transkription_position_id=str(next_id))
         return transkription_positions
 
     def update_positional_word_parts(self, positional_word_parts):
         """Update positional_word_parts.
         """
         if len(self.positional_word_parts) > 0 and self.positional_word_parts in self.attachable_objects:
             self.attachable_objects.remove(self.positional_word_parts)
         self.positional_word_parts = positional_word_parts
         self.attachable_objects += self.positional_word_parts
 
     @staticmethod
     def CREATE_TRANSKRIPTION_POSITION_LIST_FROM_PWPS(positional_word_parts, debug_message=None, debug_msg_string=None, transkription_position_id=0):
         """Creates a list of TranskriptionPosition from a list of (datatypes.positional_word_part) PositionalWordPart.
 
             [:return:] a list of (datatypes.transkription_position) TranskriptionPosition
         """
         TOPCORRECTION = 1
         debug_message = DebugMessage(message=debug_msg_string)\
                 if debug_msg_string is not None else debug_message
         transkription_positions = []
         if len(positional_word_parts) < 1:
             return []
         matrix = positional_word_parts[0].transform
         index = 0
         matrices_differ = False
         style_class = positional_word_parts[0].style_class
         styles_differ = False
         while index < len(positional_word_parts) and not matrices_differ and not styles_differ:
             if Matrix.DO_CONVERSION_FACTORS_DIFFER(matrix, positional_word_parts[index].transform):
                 matrices_differ = True
             elif style_class != positional_word_parts[index].style_class:
                 styles_differ = True
             else:
                 index += 1
         if (matrices_differ or styles_differ) and index < len(positional_word_parts):
             debug_msg_string = 'matrices differ' if matrices_differ else 'styles differ'
             transkription_positions += TranskriptionPosition.CREATE_TRANSKRIPTION_POSITION_LIST_FROM_PWPS(\
                     positional_word_parts[index:], debug_msg_string=debug_msg_string, transkription_position_id=int(transkription_position_id)+1)
             positional_word_parts = positional_word_parts[:index]
         heighest_pwp = sorted(positional_word_parts, key=lambda pwp: pwp.height, reverse=True)[0]
         toppest_pwp = sorted(positional_word_parts, key=lambda pwp: pwp.top)[0]
         height = heighest_pwp.height + 2*TOPCORRECTION
         if heighest_pwp != toppest_pwp:
             height += abs(heighest_pwp.top-toppest_pwp.top)
         x = positional_word_parts[0].left - TranskriptionPosition.ADD2X
-        y = toppest_pwp.top - TOPCORRECTION
         width = positional_word_parts[-1].left - x\
                 + positional_word_parts[-1].width + TranskriptionPosition.ADD2X
+        if matrix is not None and matrix.isRotationMatrix():
+            x = positional_word_parts[0].left - matrix.matrix[Matrix.XINDEX]\
+                    if positional_word_parts[0].left - matrix.matrix[Matrix.XINDEX] > 0\
+                    else 0.0
+        y = toppest_pwp.top - TOPCORRECTION\
+                if matrix is None or not matrix.isRotationMatrix()\
+                else height*-1
         for pwp_index, pwp in enumerate(positional_word_parts):
             pwp.id = pwp_index
         transkription_positions.insert(0, TranskriptionPosition(id=transkription_position_id, height=height, width=width, x=x, y=y, matrix=matrix,\
                 positional_word_parts=positional_word_parts, debug_message=debug_message)) 
         return transkription_positions
 
     @staticmethod
     def CREATE_TRANSKRIPTION_POSITION_LIST(page, word_part_objs, matrix=None, debug_msg_string=None, transkription_field=None, svg_path_tree=None, namespaces=None):
         """Creates a list of TranskriptionPosition from word_part_objs (i.e. a list of dictionaries  
         with the keys: text, x, y, matrix, class).
 
             [:return:] a list of (datatypes.transkription_position) TranskriptionPosition
         """
         positional_word_parts = []
         debug_message = DebugMessage(message=debug_msg_string)\
                 if debug_msg_string is not None else None
         if page.svg_file is not None and isfile(page.svg_file):
             svg_path_tree = ET.parse(page.svg_file) if svg_path_tree is None else svg_path_tree
             namespaces = { k if k is not None else 'ns': v for k, v in svg_path_tree.getroot().nsmap.items() }\
                     if namespaces is None else namespaces
             xmin = 0.0
             ymin = 0.0
             if transkription_field is not None:
                 xmin = transkription_field.xmin
                 ymin = transkription_field.ymin
             for part_obj in word_part_objs:
                 positional_word_parts += PositionalWordPart.CREATE_POSITIONAL_WORD_PART_LIST(\
                         part_obj, svg_path_tree, namespaces, page, start_id=len(positional_word_parts),\
                         xmin=xmin, ymin=ymin)
         else:
             positional_word_parts = PositionalWordPart.CREATE_SIMPLE_POSITIONAL_WORD_PART_LIST(page, word_part_objs)
         if len(positional_word_parts) > 0:
             return TranskriptionPosition.CREATE_TRANSKRIPTION_POSITION_LIST_FROM_PWPS(positional_word_parts, debug_message=debug_message)
         else:
             return [ TranskriptionPosition(matrix=matrix, debug_message=debug_message) ]
Index: svgscripts/datatypes/reference.py
===================================================================
--- svgscripts/datatypes/reference.py	(revision 110)
+++ svgscripts/datatypes/reference.py	(revision 111)
@@ -1,173 +1,187 @@
 #!/usr/bin/env python3
 # -*- coding: utf-8 -*-
 
 """   This class can be used to represent a text reference.
 """
 #    Copyright (C) University of Basel 2019  {{{1
 #
 #    This program is free software: you can redistribute it and/or modify
 #    it under the terms of the GNU General Public License as published by
 #    the Free Software Foundation, either version 3 of the License, or
 #    (at your option) any later version.
 #
 #    This program is distributed in the hope that it will be useful,
 #    but WITHOUT ANY WARRANTY; without even the implied warranty of
 #    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 #    GNU General Public License for more details.
 #
 #    You should have received a copy of the GNU General Public License
 #    along with this program.  If not, see <https://www.gnu.org/licenses/> 1}}}
 
 __author__ = "Christian Steiner"
 __maintainer__ = __author__
 __copyright__ = 'University of Basel'
 __email__ = "christian.steiner@unibas.ch"
 __status__ = "Development"
 __license__ = "GPL v3"
 __version__ = "0.0.1"
 
 from lxml import etree as ET
 import re
 import sys
 
 from .attachable_object import AttachableObject
 
 sys.path.append('py2ttl')
 from class_spec import SemanticClass
 
 NON_INT = re.compile('\D+')
 
 class Reference(AttachableObject,SemanticClass):
     """
     This class represents a text reference.
 
     Args:
 
         id (int):                   object id
         first_line (int)        first line of reference
         last_line (int)         last line of reference
         is_uncertain (bool)     whether reference is uncertain
         title (str)             title of reference
         page_number (str)       page_number of reference
         tag (str)               xml tag
     """
     XML_TAG = 'reference'
     intKeys = [ 'first_line', 'last_line']
     boolKeys = [ 'is_uncertain' ]
     stringKeys = [ 'title', 'page_number', 'word_reference' ]
 
     def __init__(self, node=None, id=0, first_line=-1, last_line=-1, is_uncertain=False, title=None, page_number=None, word_reference=None, tag=XML_TAG):
         self.intKeys = []
         self.intKeys += Reference.intKeys
         self.intKeys.append('id')
         self.stringKeys = [] 
         self.stringKeys += Reference.stringKeys
         self.boolKeys = []
         self.boolKeys += Reference.boolKeys
         self.id = id
-        self.first_line = first_line
-        self.last_line = last_line
+        self.first_line = int(first_line)
+        self.last_line = int(last_line)
         self.is_uncertain = is_uncertain
         self.title = title
         self.page_number = page_number
         self.word_reference = word_reference
         self.tag = tag
 
     def attach_object_to_tree(self, target_tree):
         """Attach object to tree.
         """
         if target_tree.__class__.__name__ == '_ElementTree':
             target_tree = target_tree.getroot()
         obj_node = target_tree.xpath('.//' + self.tag + '[@id="%s"]' % self.id)[0] \
                 if(len(target_tree.xpath('.//' + self.tag + '[@id="%s"]' % self.id)) > 0) \
                 else ET.SubElement(target_tree, self.tag)
         for key in self.boolKeys:
             if self.__dict__[key] is not None:
                 obj_node.set(key.replace('_','-'), str(self.__dict__[key]).lower())
         for key in self.intKeys:
             if self.__dict__[key] is not None and self.__dict__[key] > -1:
                 obj_node.set(key.replace('_','-'), str(self.__dict__[key]))
         for key in self.stringKeys:
             if self.__dict__[key] is not None and self.__dict__[key] != '':
                 obj_node.set(key.replace('_','-'), str(self.__dict__[key]))
 
     @classmethod
     def create_cls_from_node(cls, node):
         """Creates a Reference from a (lxml.etree.Element) node.
 
             :return: (datatypes.reference) Reference
         """
         instance = cls()
         for key in instance.boolKeys:
             xml_key = key.replace('_', '-')
             if bool(node.get(xml_key)):
                 instance.__dict__[key] = node.get(xml_key) == 'true'
         for key in instance.intKeys:
             xml_key = key.replace('_', '-')
             if bool(node.get(xml_key)):
                 instance.__dict__[key] = int(node.get(xml_key))
         for key in instance.stringKeys:
             xml_key = key.replace('_', '-')
             if bool(node.get(xml_key)):
                 instance.__dict__[key] = node.get(xml_key)
         return instance
 
     @classmethod
     def create_cls(cls, node=None, id=0, is_uncertain=False, reference_string='', title='', page_number=''):
         """Creates a Reference from a (lxml.etree.Element) node or a reference_string.
 
             :return: (datatypes.reference) Reference
         """
         if node is not None:
             return cls.create_cls_from_node(node)
         else:
             first_line = -1
             last_line = -1
             word_reference = None
             if re.match(r'[0-9]+([a-z]+)*,[0-9]+(-[0-9]+)*', reference_string):
                 page_number = reference_string.split(',')[0]
                 line_numbers = reference_string.split(',')[1].split('-')
                 first_line = _save_get_int(line_numbers[0])
                 last_line = _save_get_int(line_numbers[1]) if len(line_numbers) > 1 else -1
             else:
                 if ',' not in reference_string:
                     if re.match(r'\D+.*', reference_string):
                         word_reference = reference_string.strip()
                     else:
                         line_numbers = reference_string.split('-')
                         first_line = _save_get_int(line_numbers[0])
                         last_line = _save_get_int(line_numbers[1]) if len(line_numbers) > 1 else -1
                 else:
                     if ' ' not in reference_string:
                         raise Exception('String "{}" is not a valid reference_string'.format(reference_string))
                     title = reference_string.split(' ')[0]
                     return cls.create_cls(id=id, is_uncertain=is_uncertain, reference_string=reference_string[len(title)+1:],\
                             title=title, page_number=page_number)
             return cls(id=id, is_uncertain=is_uncertain, first_line=first_line, last_line=last_line,\
                     title=title, page_number=page_number, word_reference=word_reference)
 
 
     @classmethod
     def get_semantic_dictionary(cls):
         """ Creates a semantic dictionary as specified by SemanticClass.
         """
         dictionary = {} 
         class_dict = cls.get_class_dictionary()
         properties = {}
         properties.update({'first_line': { 'class': int, 'cardinality': 1, 'cardinality_restriction': 'maxCardinality', 'name': 'firstLineOfReference',\
                 'label': 'first line of reference'}})
         properties.update({'last_line': { 'class': int, 'cardinality': 1, 'cardinality_restriction': 'maxCardinality', 'name': 'lastLineOfReference',\
                 'label': 'last line of reference'}})
         properties.update({'word_reference': { 'class': str, 'cardinality': 1, 'cardinality_restriction': 'maxCardinality', 'name': 'wordReference',\
                 'label': 'refers to word on same line'}})
         properties.update({'is_uncertain': { 'class': bool, 'cardinality': 0, 'name': 'IsUncertain', 'label': 'whether something is uncertain'}})
         properties.update(cls.create_semantic_property_dictionary('title', str, cardinality=0))
         properties.update(cls.create_semantic_property_dictionary('page_number', str, cardinality=0))
         dictionary.update({cls.CLASS_KEY: class_dict})
         dictionary.update({cls.PROPERTIES_KEY: properties})
         return cls.return_dictionary_after_updating_super_classes(dictionary)
+    
+    def toString(self) ->str:
+        """Return a string representation of reference
+        """
+        reference = self.title + ' '\
+                if self.title is not None\
+                else ''
+        reference = reference + self.page_number + ','\
+                if self.page_number is not None\
+                else ''
+        reference = reference + str(self.first_line) + '-' + str(self.last_line)\
+                if self.last_line > -1\
+                else reference + str(self.first_line)
+        return reference
 
 def _save_get_int(line_reference) -> int:
     """Return line as int and remove none int str at end of str.
     """
     return int(NON_INT.sub('', line_reference))
Index: svgscripts/process_words_post_merging.py
===================================================================
--- svgscripts/process_words_post_merging.py	(revision 110)
+++ svgscripts/process_words_post_merging.py	(revision 111)
@@ -1,495 +1,495 @@
 #!/usr/bin/env python3
 # -*- coding: utf-8 -*-
 
 """   This program can be used to process words after they have been merged with faksimile data.
 """
 #    Copyright (C) University of Basel 2019  {{{1
 #
 #    This program is free software: you can redistribute it and/or modify
 #    it under the terms of the GNU General Public License as published by
 #    the Free Software Foundation, either version 3 of the License, or
 #    (at your option) any later version.
 #
 #    This program is distributed in the hope that it will be useful,
 #    but WITHOUT ANY WARRANTY; without even the implied warranty of
 #    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 #    GNU General Public License for more details.
 #
 #    You should have received a copy of the GNU General Public License
 #    along with this program.  If not, see <https://www.gnu.org/licenses/> 1}}}
 
 from colorama import Fore, Style
 from deprecated import deprecated
 from functools import cmp_to_key
 import getopt
 import inspect
 import lxml.etree as ET
 from operator import attrgetter
 import os
 from os import listdir, sep, path, setpgrp, devnull
 from os.path import exists, isfile, isdir, dirname, basename
 from pathlib import Path as PathlibPath
 from progress.bar import Bar
 import re
 import shutil
 import string
 from svgpathtools import svg2paths2, svg_to_paths
 from svgpathtools.path import Path as SVGPath
 from svgpathtools.path import Line 
 import sys
 import tempfile
 import warnings
 
 if dirname(__file__) not in sys.path:
     sys.path.append(dirname(__file__))
 
 from datatypes.box import Box
 from datatypes.archival_manuscript import ArchivalManuscriptUnity
 from datatypes.mark_foreign_hands import MarkForeignHands
 from datatypes.page import Page, STATUS_MERGED_OK, STATUS_POSTMERGED_OK
 from datatypes.path import Path
 from datatypes.text_connection_mark import TextConnectionMark
 from datatypes.transkriptionField import TranskriptionField
 from datatypes.word import Word, do_paths_intersect_saveMode, update_transkription_position_ids
 from extract_line_continuation import extract_line_continuations
 from util import back_up, process_warnings4status
 from process_files import update_svgposfile_status
 from process_footnotes import categorize_footnotes
 
 sys.path.append('shared_util')
 from myxmlwriter import write_pretty, xml_has_type, FILE_TYPE_SVG_WORD_POSITION, FILE_TYPE_XML_MANUSCRIPT
 from main_util import extract_paths_on_tf
 
 __author__ = "Christian Steiner"
 __maintainer__ = __author__
 __copyright__ = 'University of Basel'
 __email__ = "christian.steiner@unibas.ch"
 __status__ = "Development"
 __license__ = "GPL v3"
 __version__ = "0.0.1"
 
 UNITTESTING = False
 DEBUG_WORD = None
 MERGED_DIR = 'merged'
 WARNING_FOOTNOTES_ERROR = 'footnotes not processed'
 WARNING_LINE_CONTINUATION = 'line continuation fail'
 
 
 def categorize_paths(page, transkription_field=None):
     """Categorize all paths that are part of the transkription field.
 
         :return: a dictionary containig a list for each category of path.
     """
     if page.source is not None and isfile(page.source):
         MAX_HEIGHT_LINES = 1
         max_line = sorted(\
                 [line_number.bottom-line_number.top for line_number in  page.line_numbers if line_number.id % 2 == 0],\
                     reverse=True)[0] + 2 if len(page.line_numbers) > 0 else 17
         tr_xmin = 0.0
         tr_ymin = 0.0
         if (page.svg_image is None or page.svg_image.text_field is None)\
                 and transkription_field is not None:
             tr_xmin = transkription_field.xmin
             tr_ymin = transkription_field.ymin
         paths, attributes = svg_to_paths.svg2paths(page.source)
         allpaths_outside_tf = []
         attributes_outside_tf = []
         if transkription_field is None:
             transkription_field = TranskriptionField(page.source, multipage_index=page.multipage_index)
         allpaths_on_tf = extract_paths_on_tf(page, outsiders=allpaths_outside_tf, outsider_attributes=attributes_outside_tf, transkription_field=transkription_field)
         path_dict = { 'text_area_deletion_paths': [],\
                   'deletion_or_underline_paths': [],\
                   'box_paths': [],\
                   'dots_paths': [],\
                   'word_connector_paths': [],\
                   'uncategorized_paths': [] }
         for mypath in allpaths_on_tf:
             xmin, xmax, ymin, ymax = mypath.path.bbox()
             start_line_number = page.get_line_number(mypath.path.start.imag-tr_ymin)
             if abs(xmax-xmin) < 1 and abs(ymax-ymin) < 1:
                 path_dict.get('dots_paths').append(mypath)
             elif abs(ymax-ymin) > MAX_HEIGHT_LINES and abs(ymax-ymin) < max_line and mypath.path.iscontinuous() and mypath.path.isclosed():
                 path_dict.get('box_paths').append(mypath)
             elif abs(ymax-ymin) > MAX_HEIGHT_LINES and abs(ymax-ymin) > max_line and mypath.path.iscontinuous() and not mypath.path.isclosed():
                 path_dict.get('word_connector_paths').append(mypath)
             elif abs(ymax-ymin) < MAX_HEIGHT_LINES:
                 mypath.start_line_number = start_line_number
                 path_dict.get('deletion_or_underline_paths').append(mypath)
             elif start_line_number != -1 and start_line_number != page.get_line_number(mypath.path.end.imag-tr_ymin):
                 # Check for "ladder", i.e. a path with 3 segments (seg0 is horizontal on line x, seg1 moves to line x+1, seg2 is horizontal on line x+1)
                 if start_line_number + 1 ==  page.get_line_number(mypath.path.end.imag-tr_ymin)\
                         and len(mypath.path._segments) == 3\
                         and abs(mypath.path._segments[0].bbox()[3]-mypath.path._segments[0].bbox()[2]) < MAX_HEIGHT_LINES\
                         and abs(mypath.path._segments[2].bbox()[3]-mypath.path._segments[2].bbox()[2]) < MAX_HEIGHT_LINES:
                     for index in 0, 2:
                         new_path = Path(parent_path=mypath, path=SVGPath(mypath.path._segments[index])) 
                         new_path.start_line_number = page.get_line_number(new_path.path.start.imag-tr_ymin)
                         path_dict.get('deletion_or_underline_paths').append(new_path)
                 else:
                     path_dict.get('text_area_deletion_paths').append(mypath)
             else:
                 path_dict.get('uncategorized_paths').append(mypath)
         underline_path = mark_words_intersecting_with_paths_as_deleted(page, path_dict.get('deletion_or_underline_paths'), tr_xmin, tr_ymin)
         path_dict.update({'underline_path': underline_path})
         path_dict['uncategorized_paths'] += process_word_boxes(page, path_dict.get('box_paths'), transkription_field,\
                                        paths=allpaths_outside_tf, attributes=attributes_outside_tf, max_line=max_line)
         return path_dict
     elif not UNITTESTING:
         error_msg = 'Svg source file {} does not exist!'.format(page.source)\
                 if page.source is not None else 'Page does not contain a source file!'
         raise FileNotFoundError(error_msg)
     return {}
 
 def copy_page_to_merged_directory(page, manuscript_file=None):
     """Copy page to directory that contains the first version of all svg_pos_files that have been 
         merged with the faksimile position data. MERGED_DIR is a subfolder of svg_pos_files-directory.
     """
     svg_pos_file = PathlibPath(page.page_tree.docinfo.URL)
     target_dir = svg_pos_file.parent / MERGED_DIR
     if not target_dir.is_dir():
         target_dir.mkdir()
     target_pos_file = target_dir / svg_pos_file.name
     save_page(page, str(svg_pos_file), target_svg_pos_file=str(target_pos_file), status=STATUS_MERGED_OK, manuscript_file=manuscript_file)
 
 def find_special_words(page, transkription_field=None):
     """Find special words, remove them from words, process their content.
     """
     if page.source is None or not isfile(page.source):
         raise FileNotFoundError('Page does not have a source!')
     if transkription_field is None:
         transkription_field = TranskriptionField(page.source, multipage_index=page.multipage_index)
     set_to_text_field_zero = (page.svg_image is None or page.svg_image.text_field is None)
     special_char_list = MarkForeignHands.get_special_char_list()
     special_char_list += TextConnectionMark.get_special_char_list()
     single_char_words = [ word for word in page.words if len(word.text) == 1 and word.text in special_char_list ]
     if not UNITTESTING:
         bar = Bar('find special words', max=len(single_char_words))
     for word in single_char_words:
         not bool(UNITTESTING) and bar.next()
         if word.text == MarkForeignHands.CLASS_MARK:
             id = len(page.mark_foreign_hands)
             page.mark_foreign_hands.append(MarkForeignHands.create_cls_from_word(word, id=id))
             page.words.remove(word)
         elif word.text in TextConnectionMark.SPECIAL_CHAR_LIST[0]\
             or (word.text in TextConnectionMark.SPECIAL_CHAR_LIST\
                 and any(style in page.sonderzeichen_list for style\
                     in word.transkription_positions[0].positional_word_parts[0].style_class.split(' '))):
             id = len(page.text_connection_marks) 
             page.text_connection_marks.append(TextConnectionMark.create_cls_from_word(word, id=id))
             page.words.remove(word)
     not bool(UNITTESTING) and bar.finish()
     svg_tree = ET.parse(page.source)
     page.update_page_type(transkription_field=transkription_field)
     page.update_line_number_area(transkription_field, svg_tree=svg_tree, set_to_text_field_zero=set_to_text_field_zero) 
     if page.marginals_source is not None:
         svg_tree = ET.parse(page.marginals_source)
     italic_classes = [ key for key in page.style_dict\
             if bool(page.style_dict[key].get('font-family')) and page.style_dict[key]['font-family'].endswith('Italic') ]
     if len(page.mark_foreign_hands) > 0:
         MarkForeignHands.find_content(page.mark_foreign_hands, transkription_field, svg_tree, italic_classes=italic_classes,\
             SonderzeichenList=page.sonderzeichen_list, set_to_text_field_zero=set_to_text_field_zero)
     if len(page.text_connection_marks) > 0:
         TextConnectionMark.find_content_in_footnotes(page, transkription_field, svg_tree)
 
 def mark_words_intersecting_with_paths_as_deleted(page, deletion_paths, tr_xmin=0.0, tr_ymin=0.0):
     """Marks all words that intersect with deletion paths as deleted 
     and adds these paths to word_deletion_paths.
 
         [:return:] list of .path.Path that might be word_underline_paths
     """
     if not UNITTESTING:
         bar = Bar('mark words that intersect with deletion paths', max=len(page.words))
     for word in page.words:
         not bool(UNITTESTING) and bar.next()
         word = mark_word_if_it_intersects_with_paths_as_deleted(word, page, deletion_paths, tr_xmin=tr_xmin, tr_ymin=tr_ymin) 
         for part_word in word.word_parts:
             part_word = mark_word_if_it_intersects_with_paths_as_deleted(part_word, page, deletion_paths, tr_xmin=tr_xmin, tr_ymin=tr_ymin) 
         word.partition_according_to_deletion()
     not bool(UNITTESTING) and bar.finish()
     # return those paths in deletion_paths that are not in page.word_deletion_paths
     return [ word_underline_path for word_underline_path in set(deletion_paths) - set(page.word_deletion_paths) ]
 
 def mark_word_if_it_intersects_with_paths_as_deleted(word, page, deletion_paths, tr_xmin=0.0, tr_ymin=0.0):
     """Marks word if it intersects with deletion paths as deleted 
     and adds these paths to word_deletion_paths.
 
         [:return:] word 
     """
     word.deleted = False
     for transkription_position in word.transkription_positions:
         word_path = Path.create_path_from_transkription_position(transkription_position,\
                 tr_xmin=tr_xmin, tr_ymin=tr_ymin)
         intersecting_paths = [ deletion_path for deletion_path in deletion_paths\
                 if do_paths_intersect_saveMode(deletion_path, word_path) ]
         if DEBUG_WORD is not None and word.text == DEBUG_WORD.text and word.line_number == DEBUG_WORD.line_number:
             relevant_paths = [ path for path in deletion_paths if path.start_line_number == DEBUG_WORD.line_number ]
             #print(word.line_number, word_path.path.bbox(), [ path.path.bbox() for path in relevant_paths])
         if len(intersecting_paths) > 0:
             #print(f'{word.line_number}: {word.id}, {word.text}: {intersecting_paths}')
             transkription_position.deleted = True
             transkription_position._deletion_paths += intersecting_paths
             for deletion_path in intersecting_paths:
                 if deletion_path.parent_path is not None:
                     deletion_path = deletion_path.parent_path
                 if deletion_path not in page.word_deletion_paths:
                     deletion_path.tag = Path.WORD_DELETION_PATH_TAG
                     deletion_path.attach_object_to_tree(page.page_tree)
                     page.word_deletion_paths.append(deletion_path)
     return word
 
 def post_merging_processing_and_saving(svg_pos_file=None, new_words=None, page=None, manuscript_file=None, target_svg_pos_file=None):
     """Process words after merging with faksimile word positions.
     """
     if page is None and svg_pos_file is None:
         raise Exception('ERROR: post_merging_processing_and_saving needs either a Page or a svg_pos_file!')
     if page is None:
         page = Page(svg_pos_file)
     if page.source is None or not isfile(page.source):
         raise FileNotFoundError('Page instantiated from {} does not contain an existing source!'.format(svg_pos_file))
     if svg_pos_file is None:
         svg_pos_file = page.page_tree.docinfo.URL
     if new_words is not None:
         page.words = sorted(new_words, key=attrgetter('id'))
     for word_node in page.page_tree.xpath('.//word'):
         word_node.getparent().remove(word_node)
     manuscript = ArchivalManuscriptUnity.create_cls(manuscript_file)\
             if manuscript_file is not None\
             else None
     copy_page_to_merged_directory(page, manuscript_file=manuscript_file)
     transkription_field = TranskriptionField(page.source, multipage_index=page.multipage_index)
     update_faksimile_line_positions(page)
     status = STATUS_MERGED_OK
     page.update_styles(manuscript=manuscript, partition_according_to_styles=True)
     save_page(page, svg_pos_file, target_svg_pos_file=target_svg_pos_file, status=status, manuscript_file=manuscript_file)
     categorize_paths(page, transkription_field=transkription_field)
     with warnings.catch_warnings(record=True) as w:
         warnings.simplefilter('default')
         try:
             find_special_words(page, transkription_field=transkription_field)
             save_page(page, svg_pos_file, target_svg_pos_file=target_svg_pos_file, status=status, manuscript_file=manuscript_file)
             categorize_footnotes(page)
             save_page(page, svg_pos_file, target_svg_pos_file=target_svg_pos_file, status=status, manuscript_file=manuscript_file)
             extract_line_continuations(page, warning_message=WARNING_LINE_CONTINUATION)
         except Exception:
             warnings.warn(WARNING_FOOTNOTES_ERROR)
         status = process_warnings4status(w, [ WARNING_FOOTNOTES_ERROR, WARNING_LINE_CONTINUATION ], status, STATUS_POSTMERGED_OK)
     save_page(page, svg_pos_file, target_svg_pos_file=target_svg_pos_file, status=status, manuscript_file=manuscript_file)
 
 def process_word_boxes(page, box_paths, transkription_field, paths=None, attributes=None, max_line=17) -> list:
     """Process word boxes: partition words according to word boxes.
 
         [:return:] a list of paths that are not boxes
     """
     MAX_HEIGHT_LINES = 1
     not_boxes = []
     try:
         if not UNITTESTING:
             bar = Bar('process word boxes', max=len(page.words))
         svg_tree = ET.parse(page.source)
         namespaces = { k if k is not None else 'ns': v for k, v in svg_tree.getroot().nsmap.items() }
         allpaths_on_margin_field = []
         tr_xmin = 0 if page.svg_image is not None and page.svg_image.text_field is not None\
             else transkription_field.xmin
         tr_ymin = 0 if page.svg_image is not None and page.svg_image.text_field is not None\
             else transkription_field.ymin
         if paths is None or attributes is None:
             paths = []
             raw_paths, attributes = svg_to_paths.svg2paths(page.source)
             for index, raw_path in enumerate(raw_paths):
                 paths.append(Path.create_cls(id=index, path=raw_path, style_class=attributes[index].get('class'), page=page))
         for index, mypath in enumerate(paths):
             path = mypath.path
             xmin, xmax, ymin, ymax = path.bbox()
             attribute = attributes[index]
             if len(path) > 0\
                 and path != transkription_field.path\
                 and ((path.bbox()[1] < transkription_field.xmin and transkription_field.is_page_verso())\
                 or (path.bbox()[0] > transkription_field.xmax and not transkription_field.is_page_verso()))\
                 and abs(ymax-ymin) < max_line:
                 allpaths_on_margin_field.append(mypath)#Path.create_cls(id=index, path=path, style_class=attribute.get('class'), page=page))
         box_line_number_dict = {}
         for box_path in sorted(box_paths, key=lambda path: path.get_median_y()):
             line_number = page.get_line_number(box_path.get_median_y(tr_ymin=tr_ymin))
             if line_number > 0:
                 if line_number not in box_line_number_dict.keys():
                     box_line_number_dict.update({ line_number: [ box_path ]})
                 else:
                     box_line_number_dict.get(line_number).append(box_path)
         boxes = []
         for line_number in box_line_number_dict.keys():
             box_paths_on_line = sorted(box_line_number_dict[line_number], key=lambda path: path.get_x())
             margin_boxes_on_line = sorted([ margin_box for margin_box in allpaths_on_margin_field\
                     if page.get_line_number(margin_box.get_median_y(tr_ymin=tr_ymin)) == line_number ],\
                     key=lambda path: path.get_x())
             threshold = 3 if line_number % 2 == 0 else 1.5
             if len(margin_boxes_on_line) > 0:
                 for box_path in box_paths_on_line:
                     #print(line_number, box_path.path.d(), len(margin_boxes_on_line))
                     box = Box.create_box(box_path, margin_boxes_on_line, svg_tree=svg_tree,\
                         namespaces=namespaces, threshold=threshold)
                     if box is not None:
                         boxes.append(box)
             else:
                 not_boxes += box_paths_on_line
         if len(boxes) > 0 and len(page.words) > 0:
             print(len(boxes))
             startIndex = 0
             steps = round(len(page.words)/4) if not bool(UNITTESTING) else len(page.words)
             while startIndex+steps <= len(page.words):
                 for word in page.words[startIndex:startIndex+steps]:
                     word.process_boxes(boxes, tr_xmin=tr_xmin, tr_ymin=tr_ymin)
                     word.create_correction_history(page)
                     if not bool(UNITTESTING):
                         bar.next()
                     elif word.earlier_version is not None:
                         #print(f'{word.text} -> {word.earlier_version.text}')
                         if word.earlier_version.earlier_version is not None:
                             print(f'{word.earlier_version.earlier_version.text}')
                 save_page(page, page.page_tree.docinfo.URL)
                 page = Page.create_cls(page.page_tree.docinfo.URL)
                 startIndex += steps
         not bool(UNITTESTING) and bar.finish()
     except Exception as e:
         print(e)
     return not_boxes
 
 def reset_page(page):
     """Reset all words that have word_parts in order to run the script a second time.
     """
     svg_pos_file = PathlibPath(page.page_tree.docinfo.URL)
     first_merge_version = svg_pos_file.parent / MERGED_DIR / svg_pos_file.name
     if first_merge_version.exists():
         page = Page(str(first_merge_version))
     else:
         word_with_wordparts = [ word for word in page.words if len(word.word_parts) > 0 ]
         word_with_wordparts += [ word for word in page.words if word.earlier_version is not None ]
         page_changed = False
         if len(word_with_wordparts) > 0:
             for word in word_with_wordparts:
                 word.undo_partitioning()
                 update_transkription_position_ids(word)
             page_changed = True
         no_line_numbers = [ word for word in page.words if word.line_number == -1 ]
         if len(no_line_numbers) > 0:
             for word in no_line_numbers:
                 if len(word.transkription_positions) > 0:
                     word.line_number = page.get_line_number((word.transkription_positions[0].top+word.transkription_positions[0].bottom)/2)
                 else:
                     msg = f'Word {word.id} {word.text} has no transkription_position!'
                     warnings.warn(msg)
             page_changed = True
         if page_changed:
             page.update_and_attach_words2tree()
 
 def save_page(page, svg_pos_file, target_svg_pos_file=None, status=None, manuscript_file=None):
     """Save page to target_file and update status of file.
     """
     page.update_and_attach_words2tree()
     if not UNITTESTING:
         if target_svg_pos_file is None:
             target_svg_pos_file = svg_pos_file
         if status is not None:
             update_svgposfile_status(svg_pos_file, manuscript_file=manuscript_file, status=status)
         write_pretty(xml_element_tree=page.page_tree, file_name=target_svg_pos_file, script_name=__file__, file_type=FILE_TYPE_SVG_WORD_POSITION)
 
 def update_faksimile_line_positions(page):
     """Update faksimile_positions of the lines
     """
     num_lines = len(page.line_numbers)
     ymin = page.text_field.ymin\
             if page.text_field is not None\
             else 0.0
     for line_number in page.line_numbers:
         if len([ word.faksimile_positions[0] for word in page.words\
-                        if len(word.faksimile_positions) > 0 and word.line_number == line_number.id ]) > 0:
+                        if len(word.word_parts) < 2 and len(word.faksimile_positions) > 0 and word.line_number == line_number.id ]) > 0:
             line_number.faksimile_inner_top = min([ word.faksimile_positions[0].top for word in page.words\
                                             if len(word.faksimile_positions) > 0 and word.line_number == line_number.id ])
             line_number.faksimile_inner_bottom = max([ word.faksimile_positions[0].bottom for word in page.words\
                                             if len(word.faksimile_positions) > 0 and word.line_number == line_number.id ])
             if line_number.id % 2 == 0:  
                 line_number.faksimile_outer_top = line_number.faksimile_inner_top - ymin
                 line_number.faksimile_outer_bottom = line_number.faksimile_inner_bottom - ymin
     for index, line_number in enumerate(page.line_numbers):
         if line_number.faksimile_inner_bottom == 0.0\
                 or line_number.faksimile_inner_bottom < line_number.faksimile_inner_top:
             if index == 0 and num_lines > 1:
                 line_number.faksimile_inner_bottom = page.line_numbers[index+1].top 
             elif index == num_lines-1 and page.text_field is not None:
                 line_number.faksimile_inner_bottom = round(page.text_field.height + page.text_field.ymin, 3)
             elif index > 0 and index < num_lines-1:
                 line_number.faksimile_inner_bottom = page.line_numbers[index+1].faksimile_inner_top\
                     if page.line_numbers[index+1].faksimile_inner_top > page.line_numbers[index-1].faksimile_inner_bottom\
                     else page.line_numbers[index-1].faksimile_inner_bottom
         line_number.attach_object_to_tree(page.page_tree)
 
 def update_writing_process_ids(page):
     """Update the writing_process_ids of the words and split accordingly.
     """
     for word in page.words:
         word.set_writing_process_id_to_transkription_positions(page)
         word.partition_according_to_writing_process_id()
 
 def usage():
     """prints information on how to use the script
     """
     print(main.__doc__)
 
 def main(argv):
     """This program can be used to process words after they have been merged with faksimile data.
 
     svgscripts/process_words_post_merging.py [OPTIONS] <xmlManuscriptFile|svg_pos_file>
 
         <xmlManuscriptFile>     a xml file about a manuscript, containing information about its pages.
         <svg_pos_file>          a xml file about a page, containing information about svg word positions.
 
         OPTIONS:
         -h|--help                           show help
         -i|--include-missing-line-number    run script on files that contain words without line numbers 
         -r|--rerun                          rerun script on a svg_pos_file that has already been processed 
 
         :return: exit code (int)
     """
     status_not_contain = STATUS_POSTMERGED_OK
     include_missing_line_number = False
     try:
         opts, args = getopt.getopt(argv, "hir", ["help", "include-missing-line-number", "rerun" ])
     except getopt.GetoptError:
         usage()
         return 2
     for opt, arg in opts:
         if opt in ('-h', '--help'):
             usage()
             return 0
         elif opt in ('-i', '--include-missing-line-number'):
             include_missing_line_number = True
         elif opt in ('-r', '--rerun'):
             status_not_contain = ''
     if len(args) < 1:  
         usage()
         return 2
     exit_status = 0
     file_a = args[0]
     if isfile(file_a):
         manuscript_file = file_a\
                 if xml_has_type(FILE_TYPE_XML_MANUSCRIPT, xml_source_file=file_a)\
                 else None
         counter = 0
         for page in Page.get_pages_from_xml_file(file_a, status_contains=STATUS_MERGED_OK, status_not_contain=status_not_contain):
             reset_page(page)
             no_line_numbers = [ word for word in page.words if word.line_number == -1 ]
             if not include_missing_line_number and len(no_line_numbers) > 0:
                 not UNITTESTING and print(Fore.RED + f'Page {page.title}, {page.number} has words with no line number!')
                 for word in no_line_numbers:
                     not UNITTESTING and print(f'Word {word.id}: {word.text}')
             else:
                 back_up(page, page.xml_file)
                 not UNITTESTING and print(Fore.CYAN + f'Processing {page.title}, {page.number} ...' + Style.RESET_ALL)
                 post_merging_processing_and_saving(page=page, manuscript_file=manuscript_file)
                 counter += 1
         not UNITTESTING and print(Style.RESET_ALL + f'[{counter} pages processed]')
     else:
         raise FileNotFoundError('File {} does not exist!'.format(file_a))
     return exit_status
 
 if __name__ == "__main__":
     sys.exit(main(sys.argv[1:]))
Index: svgscripts/process_footnotes.py
===================================================================
--- svgscripts/process_footnotes.py	(revision 110)
+++ svgscripts/process_footnotes.py	(revision 111)
@@ -1,294 +1,294 @@
 #!/usr/bin/env python3
 # -*- coding: utf-8 -*-
 
 """   This program can be used to process words after they have been merged with faksimile data.
 """
 #    Copyright (C) University of Basel 2019  {{{1
 #
 #    This program is free software: you can redistribute it and/or modify
 #    it under the terms of the GNU General Public License as published by
 #    the Free Software Foundation, either version 3 of the License, or
 #    (at your option) any later version.
 #
 #    This program is distributed in the hope that it will be useful,
 #    but WITHOUT ANY WARRANTY; without even the implied warranty of
 #    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 #    GNU General Public License for more details.
 #
 #    You should have received a copy of the GNU General Public License
 #    along with this program.  If not, see <https://www.gnu.org/licenses/> 1}}}
 
 from colorama import Fore, Style
 import getopt
 import lxml.etree as ET
 import os
 from os import listdir, sep, path, setpgrp, devnull
 from os.path import exists, isfile, isdir, dirname, basename
 from pathlib import Path as PathlibPath
 from progress.bar import Bar
 import inspect
 import re
 import shutil
 import sys
 import warnings
 
 if dirname(__file__) not in sys.path:
     sys.path.append(dirname(__file__))
 
 from datatypes.archival_manuscript import ArchivalManuscriptUnity
 from datatypes.page import Page, STATUS_MERGED_OK, STATUS_POSTMERGED_OK
 from datatypes.atypical_writing import AtypicalWriting
 from datatypes.clarification import Clarification
 from datatypes.editor_comment import EditorComment
 from datatypes.editor_correction import EditorCorrection
 from datatypes.footnotes import extract_footnotes
 from datatypes.imprint import extract_imprints
 from datatypes.line_continuation import LineContinuation
 from datatypes.standoff_tag import StandoffTag
 from datatypes.text import Text
 from datatypes.text_connection_mark import TextConnectionMark
 from datatypes.uncertain_decipherment import UncertainDecipherment
 
 from util import back_up
 from process_files import update_svgposfile_status
 
 sys.path.append('shared_util')
 from myxmlwriter import write_pretty, xml_has_type, FILE_TYPE_SVG_WORD_POSITION, FILE_TYPE_XML_MANUSCRIPT
 
 
 __author__ = "Christian Steiner"
 __maintainer__ = __author__
 __copyright__ = 'University of Basel'
 __email__ = "christian.steiner@unibas.ch"
 __status__ = "Development"
 __license__ = "GPL v3"
 __version__ = "0.0.1"
 
 UNITTESTING = False
 
 ATYPICAL_GROUP = re.compile(r'(.*:.*]\s*)(¿)(.*)')
 CLARIFICATION_GROUP = re.compile(r'(.*:.*]\s*)(Vk)(.*)')
 CONTINUATION_GROUP = re.compile(r'(.*:\s*)(Fortsetzung\s*)')
 COMMENT_GROUP = re.compile(r'(.*:.*])')
 EDITOR_CORRECTION_GROUP = re.compile(r'(.*:.*]\s*)(>[?]*)(.*)')
 LINE_REFERENCE_GROUP = re.compile(r'(\d+-|\d/(\d+/)*)*([0-9]+)(:.*)')
 LINE_REFERENCE_GROUP_START_INDEX = 1
 LINE_REFERENCE_GROUP_MID_INDEX = 2
 LINE_REFERENCE_GROUP_END_INDEX = 3
 LINE_COMMENT_GROUP = re.compile(r'(.*\d+:)')
 UNCERTAINTY_WORD_GROUP = re.compile(r'(.*:.*]\s*)([>]*\?)(.*)')
 UNCERTAINTY_EDITOR_GROUP = re.compile(r'(.*)(\?)')
 WORD_REFERENCE_GROUP = re.compile(r'(.*[0-9]+:\s*)(.*)(].*)')
 DEBUG = False
 
 def categorize_footnotes(page, footnotes=None, debug=False, skip_after=-1.0, find_content=False):
     """Categorize footnotes.
     """
     DEBUG = debug
     if footnotes is None:
         footnotes = extract_footnotes(page, skip_after=skip_after)
     for footnote in footnotes:
         line_match = re.match(LINE_REFERENCE_GROUP, footnote.content)
         if line_match is not None:
             _process_line_match(page, footnote, line_match)
         else:
             warnings.warn(f'Unknown editor comment without a line reference: <{footnote}>')
     if find_content and len(page.text_connection_marks) > 0:
         TextConnectionMark.find_content_in_footnotes(page, footnotes=footnotes)
     page.update_and_attach_words2tree()
     for line in page.lines: line.attach_object_to_tree(page.page_tree)
     DEBUG = False
     if not UNITTESTING:
         write_pretty(xml_element_tree=page.page_tree, file_name=page.page_tree.docinfo.URL,\
                         script_name=__file__, file_type=FILE_TYPE_SVG_WORD_POSITION)
 
 def save_imprints(page):
     """Categorize footnotes.
     """
     for imprint in extract_imprints(page):
         imprint.attach_object_to_tree(page.page_tree)
     if not UNITTESTING:
         write_pretty(xml_element_tree=page.page_tree, file_name=page.page_tree.docinfo.URL,\
                         script_name=f'{__file__}:{inspect.currentframe().f_back.f_code.co_name}', file_type=FILE_TYPE_SVG_WORD_POSITION)
 
 def _is_uncertain(footnote) -> bool:
     """Return whether footnote contains sign for uncertainty.
     """
     uncertain_match = re.match(UNCERTAINTY_EDITOR_GROUP, footnote.content)
     return (uncertain_match is not None\
            and len([ markup for markup in footnote.standoff_markups\
                      if markup.css_string.endswith('italic;')\
                      and uncertain_match.end() >= markup.startIndex\
                      and uncertain_match.end() <= markup.endIndex ]) > 0)
 
 def _process_line_match(page, footnote, line_match):
     """Process footnote if reference to a line matches.
     """
     word_match = re.match(WORD_REFERENCE_GROUP, footnote.content)
     end_line_number = int(line_match.group(LINE_REFERENCE_GROUP_END_INDEX)) 
     lines = []
     if line_match.group(LINE_REFERENCE_GROUP_START_INDEX) is not None:
         if line_match.group(LINE_REFERENCE_GROUP_MID_INDEX) is not None:
             line_ids = [ int(line_id) for line_id in\
                          line_match.group(LINE_REFERENCE_GROUP_START_INDEX).split('/')\
                          if line_id != '' ] + [ end_line_number ]
             lines = [ line for line in page.lines if line.id in line_ids ]
         else:
             start_line_number = int(line_match.group(1)[0:-1])
             lines = [ line for line in page.lines if line.id >= start_line_number and line.id <= end_line_number ]
     else:
         lines = [ line for line in page.lines if line.id == end_line_number ]
     if word_match is not None:
         _process_word_match(page.words, footnote, line_match, word_match.group(2), end_line_number)
     elif len(lines) > 0:
         uncertain_match = re.match(UNCERTAINTY_EDITOR_GROUP, footnote.content)
         for line in lines:
             _process_line_reference(page, footnote, line, _is_uncertain(footnote))
     else:
         warnings.warn(f'Footnote refers to missing line {line_number}: {footnote}') 
 
 def _process_line_reference(page, footnote, line, is_uncertain):
     """Process footnote if there is a line reference.
     """
     continuation_match = re.match(CONTINUATION_GROUP, footnote.content)
     if continuation_match is not None:
         reference_string = footnote.content[continuation_match.end():]
         if is_uncertain:
             reference_string = reference_string[:-1]
         line.editor_comments.append(LineContinuation.create_cls(reference_string=reference_string, is_uncertain=is_uncertain))
     else:
         comment_match = re.match(LINE_COMMENT_GROUP, footnote.content)
         if comment_match is not None:
             is_uncertain = _is_uncertain(footnote)
             comment = footnote.content[comment_match.end():-1].strip()\
                     if is_uncertain\
                     else footnote.content[comment_match.end():].strip()
             line.editor_comments.append(EditorComment(comment=comment, is_uncertain=is_uncertain))
         else:
             warnings.warn(f'Unknown editor comment for line "{line.id}": <{footnote}>')
    
 def _process_word_match(words, footnote, line_match, word_text, line_number, parent_word_composition=None):
     """Process footnote if there is a word reference.
     """
     referred_words = [ word for word in words\
             if word.line_number == line_number\
             and (word.text == word_text\
                 or re.match(rf'\W*{word_text}\W', word.text)\
                 or word.edited_text == word_text) ]
     referred_word_parts = [ word.word_parts for word in words\
             if word.line_number == line_number\
             and len(word.word_parts) > 0\
             and word_text in [ wp.text for wp in word.word_parts ] ]
     overwritten_word_matches = [ word for word in words\
                               if word.line_number == line_number\
                                 and len(word.word_parts) > 0\
                                 and len([word_part for word_part in word.word_parts\
                                            if word_part.overwrites_word is not None\
                                            and word_part.overwrites_word.text == word_text]) > 0]
     if len(referred_words) > 0\
        or len(overwritten_word_matches) > 0\
        or len(referred_word_parts) > 0:
         word = None
         if len(referred_words) == 1:
             word = referred_words[0]
         elif len(overwritten_word_matches) > 0:
             word = [ word_part.overwrites_word for word_part in overwritten_word_matches[0].word_parts\
                     if word_part.overwrites_word is not None and word_part.overwrites_word.text == word_text][0]
         elif len(referred_word_parts) > 0:
             word = [ word_part for word_part in referred_word_parts[0] if word_part.text == word_text ][0]
         else:
             word = [ better_word for better_word in referred_words if better_word.text == word_text][0]
         atypical_match = re.match(ATYPICAL_GROUP, footnote.content)
         correction_match = re.match(EDITOR_CORRECTION_GROUP, footnote.content)
         clarification_match = re.match(CLARIFICATION_GROUP, footnote.content)
         is_uncertain = re.match(UNCERTAINTY_WORD_GROUP, footnote.content) is not None
         if correction_match is not None:
             correction = correction_match.group(3).strip()
-            word.editor_comment = EditorCorrection(correction_text=correction, is_uncertain=is_uncertain)
+            word.editor_comments.append(EditorCorrection(correction_text=correction, is_uncertain=is_uncertain))
             if not is_uncertain:
                 word.edited_text = correction
         elif clarification_match is not None:
-            word.editor_comment = Clarification(text=footnote.extract_part(word_text, css_filter='bold;'))
+            word.editor_comments.append(Clarification(text=footnote.extract_part(word_text, css_filter='bold;')))
         elif atypical_match is not None:
             text = footnote.extract_part(word_text, css_filter='bold;')\
                     if footnote.markup_contains_css_filter('bold;')\
                     else None
-            word.editor_comment = AtypicalWriting(text=text)
+            word.editor_comments.append(AtypicalWriting(text=text))
         elif is_uncertain:
-            word.editor_comment = UncertainDecipherment()
+            word.editor_comments.append(UncertainDecipherment())
         else:
             comment_match = re.match(COMMENT_GROUP, footnote.content)
             if comment_match is not None:
                 is_uncertain = _is_uncertain(footnote)
                 comment = footnote.content[comment_match.end():-1].strip()\
                         if is_uncertain\
                         else footnote.content[comment_match.end():].strip()
-                word.editor_comment = EditorComment(comment=comment, is_uncertain=is_uncertain)
+                word.editor_comments.append(EditorComment(comment=comment, is_uncertain=is_uncertain))
             else:
                 warnings.warn(f'Unknown editor comment for word "{word.text}": <{footnote}>')
     elif re.match(r'.*\s.*', word_text):
         for word_part in word_text.split(' '):
             _process_word_match(words, footnote, line_match, word_part, line_number, parent_word_composition=word_text)
     elif len([word for word in words if word.line_number == -1 and len(word.word_parts) > 0 ]) > 0:
         new_words = []
         for word in [word for word in words if word.line_number == -1 and len(word.word_parts) > 0 ]:
             new_words += word.word_parts
         _process_word_match(new_words, footnote, line_match, word_text, line_number)
     else:
         warnings.warn(f'No word found with text "{word_text}" on line {line_number}: <{footnote}>')
 
 def usage():
     """prints information on how to use the script
     """
     print(main.__doc__)
 
 def main(argv):
     """This program can be used to process the footnotes of a page.
 
     svgscripts/process_footnotes.py [OPTIONS] <xmlManuscriptFile|svg_pos_file>
 
         <xmlManuscriptFile>     a xml file about a manuscript, containing information about its pages.
         <svg_pos_file>          a xml file about a page, containing information about svg word positions.
 
         OPTIONS:
         -h|--help               show help
         -s|--skip-until=left    skip all nodes.get('X') < left
 
         :return: exit code (int)
     """
     skip_after=-1.0
     try:
         opts, args = getopt.getopt(argv, "hs:", ["help", "skip-until=" ])
     except getopt.GetoptError:
         usage()
         return 2
     for opt, arg in opts:
         if opt in ('-h', '--help'):
             usage()
             return 0
         elif opt in ('-s', '--skip-until'):
             skip_after = float(arg)
     if len(args) < 1:  
         usage()
         return 2
     exit_status = 0
     file_a = args[0]
     if isfile(file_a):
         manuscript_file = file_a\
                 if xml_has_type(FILE_TYPE_XML_MANUSCRIPT, xml_source_file=file_a)\
                 else None
         counter = 0
         for page in Page.get_pages_from_xml_file(file_a, status_contains=STATUS_MERGED_OK):
             if not UNITTESTING:
                 print(Fore.CYAN + f'Processing {page.title}, {page.number} ...' + Style.RESET_ALL)
                 back_up(page, page.xml_file)
             categorize_footnotes(page, skip_after=skip_after, find_content=True)
             save_imprints(page)
             counter += 1
         not UNITTESTING and print(Style.RESET_ALL + f'[{counter} pages processed]')
     else:
         raise FileNotFoundError('File {} does not exist!'.format(file_a))
     return exit_status
 
 if __name__ == "__main__":
     sys.exit(main(sys.argv[1:]))
Index: tests_svgscripts/test_text_connection_mark.py
===================================================================
--- tests_svgscripts/test_text_connection_mark.py	(revision 110)
+++ tests_svgscripts/test_text_connection_mark.py	(revision 111)
@@ -1,76 +1,84 @@
 import unittest
 from os import sep, path
 from os.path import dirname, isdir
 import lxml.etree as ET
 import sys
 
 sys.path.append('svgscripts')
 from datatypes.matrix import Matrix
 from datatypes.page import Page
 from datatypes.reference import Reference
 from datatypes.transkriptionField import TranskriptionField
 from datatypes.transkription_position import TranskriptionPosition
 from datatypes.text_connection_mark import TextConnectionMark 
 from datatypes.word import Word
 
 class TestTextConnectionMark(unittest.TestCase):
     def setUp(self):
         DATADIR = dirname(__file__) + sep + 'test_data'
         self.xml_file = DATADIR + sep + 'N_VII_1_page008.xml'
         mylist = {'text': '*', 'id': '0', 'line-number': '2' }
         self.node = ET.Element(TextConnectionMark.XML_TAG, attrib=mylist)
         word_position = TranskriptionPosition(x=0, y=1, height=10, width=10, matrix=Matrix('matrix(0.94 0.342 -0.342 0.94 0 0)'))
         self.transkription_positions = [ word_position ]
         word_position.attach_object_to_tree(self.node)
         self.test_tcm_xml = DATADIR + sep + 'N_VII_1_page013.xml'
 
     def test_create_cls(self):
         text_connection_mark = TextConnectionMark.create_cls(self.node)
         self.assertEqual(text_connection_mark.id, 0)
         self.assertEqual(text_connection_mark.transkription_positions[0].bottom, 11)
         self.assertEqual(text_connection_mark.transkription_positions[0].height, 10)
         self.assertEqual(text_connection_mark.transkription_positions[0].top, 1)
         self.assertEqual(text_connection_mark.transkription_positions[0].left, 0)
         self.assertEqual(text_connection_mark.transkription_positions[0].width, 10)
         self.assertEqual(text_connection_mark.text, '*')
         self.assertEqual(text_connection_mark.line_number, 2)
         self.assertEqual(text_connection_mark.transkription_positions[0].transform.isRotationMatrix(), True)
+    def test_instantiate_as_word(self):
+        text_source = Reference(first_line=1, title='ASDF', page_number='5c')
+        text_source.attach_object_to_tree(self.node)
+        text_connection_mark = TextConnectionMark.instantiate_as_word(self.node)
+        self.assertEqual(text_connection_mark.id, 0)
+        self.assertEqual(text_connection_mark.text, '*')
+        self.assertEqual(text_connection_mark.line_number, 2)
+        self.assertEqual(len(text_connection_mark.editor_comments), 1)
 
     def test_attach_word_to_tree(self):
         text_connection_mark = TextConnectionMark.create_cls(self.node)
         text_connection_mark.text_source = Reference(first_line=1, title='ASDF', page_number='5c')
         empty_tree = ET.ElementTree(ET.Element('page'))
         text_connection_mark.attach_word_to_tree(empty_tree)
         #print(ET.dump(empty_tree.getroot()))
         for node in empty_tree.xpath('//' + TextConnectionMark.XML_TAG):
             mark = TextConnectionMark.create_cls(node)
             self.assertEqual(mark.id, 0)
             self.assertEqual(mark.transkription_positions[0].bottom, 11)
             self.assertEqual(mark.transkription_positions[0].height, 10)
             self.assertEqual(mark.transkription_positions[0].top, 1)
             self.assertEqual(mark.transkription_positions[0].left, 0)
             self.assertEqual(mark.transkription_positions[0].width, 10)
             self.assertEqual(mark.text, '*')
             self.assertEqual(mark.line_number, 2)
             self.assertEqual(mark.transkription_positions[0].transform.isRotationMatrix(), True)
             self.assertEqual(mark.text_source.first_line, text_connection_mark.text_source.first_line)
             self.assertEqual(mark.text_source.page_number, text_connection_mark.text_source.page_number)
 
     def test_get_semanticAndDataDict(self):
         dictionary = TextConnectionMark.get_semantic_dictionary()
         #print(dictionary)
 
     def test_find_content(self):
         page = Page(self.test_tcm_xml)
         transkription_field = TranskriptionField(page.source)
         svg_tree = ET.parse(page.source)
         page.text_connection_marks = [ TextConnectionMark.create_cls_from_word(word) for word in page.words if word.text == TextConnectionMark.SPECIAL_CHAR_LIST[1]]
         TextConnectionMark.find_content_in_footnotes(page, transkription_field, svg_tree)
         self.assertEqual(len(page.text_connection_marks), 4)
         for tcm in page.text_connection_marks:
             self.assertEqual(tcm.text_source is not None, True)
             self.assertEqual(tcm.text_source.first_line > -1, True)
             self.assertEqual(tcm.text_source.page_number, '14')
         
 if __name__ == "__main__":
     unittest.main()
Index: tests_svgscripts/test_reference.py
===================================================================
--- tests_svgscripts/test_reference.py	(revision 110)
+++ tests_svgscripts/test_reference.py	(revision 111)
@@ -1,50 +1,54 @@
 import unittest
 from os import sep, path
 import lxml.etree as ET
 import sys
 
 sys.path.append('svgscripts')
 from datatypes.reference import Reference
 
 class TestReference(unittest.TestCase):
     def test_init(self):
         reference_string = '5'
         reference = Reference.create_cls(reference_string=reference_string, title='ASDF')
         self.assertEqual(reference.first_line, 5)
         reference_string = '5,5'
         reference = Reference.create_cls(reference_string=reference_string, title='ASDF')
         self.assertEqual(reference.first_line, 5)
         self.assertEqual(reference.page_number, str(5))
         reference_string = 'ASDF 5,5-8'
         reference = Reference.create_cls(reference_string=reference_string)
         self.assertEqual(reference.title, 'ASDF')
         self.assertEqual(reference.first_line, 5)
         self.assertEqual(reference.last_line, 8)
         self.assertEqual(reference.page_number, str(5))
         reference_string = 'ASDF 5,5 a .'
         reference = Reference.create_cls(reference_string=reference_string)
         self.assertEqual(reference.page_number, str(5))
 
     def test_attach_create(self):
         reference_string = 'ASDF 5,5-8'
         reference = Reference.create_cls(is_uncertain=True, reference_string=reference_string)
         empty_tree = ET.ElementTree(ET.Element('page'))
         reference.attach_object_to_tree(empty_tree)
         #print(ET.dump(empty_tree.getroot()))
         nodes = empty_tree.xpath('//' + Reference.XML_TAG)
         self.assertEqual(len(nodes), 1)
         reference_copy = Reference.create_cls(node=nodes[0])
         self.assertEqual(reference.id, reference_copy.id)
         self.assertEqual(reference.is_uncertain, reference_copy.is_uncertain)
         self.assertEqual(reference.title, reference_copy.title)
         self.assertEqual(reference.page_number, reference_copy.page_number)
         self.assertEqual(reference.first_line, reference_copy.first_line)
         self.assertEqual(reference.last_line, reference_copy.last_line)
 
     def test_get_semantic_dictionary(self):
         dictionary = Reference.get_semantic_dictionary()
         #print(dictionary)
-
+        
+    def test_toString(self):
+        reference_string = 'ASDF 5,5-8'
+        reference = Reference.create_cls(is_uncertain=True, reference_string=reference_string)
+        self.assertEqual(reference.toString(), reference_string)
 
 if __name__ == "__main__":
     unittest.main()
Index: tests_svgscripts/test_process_footnotes.py
===================================================================
--- tests_svgscripts/test_process_footnotes.py	(revision 110)
+++ tests_svgscripts/test_process_footnotes.py	(revision 111)
@@ -1,54 +1,54 @@
 import unittest
 from os import sep, path, remove
 from os.path import isdir, isfile, dirname
 import shutil
 import sys
 import lxml.etree as ET
 import warnings
 import sys
 
 sys.path.append('svgscripts')
 
 from datatypes.footnotes import extract_footnotes
 from datatypes.imprint import Imprint
 from datatypes.page import Page
 import process_footnotes
 from process_footnotes import categorize_footnotes, main, save_imprints
 
 class TestExtractFootnotes(unittest.TestCase):
     def setUp(self):
         process_footnotes.UNITTESTING = True
         DATADIR = path.dirname(__file__) + sep + 'test_data'  
         self.test_footnote = DATADIR + sep + 'W_I_8_neu_125-01.svg'
         self.test_footnote_verso = DATADIR + sep + 'N_VII_1_xp5_4_page5.svg'
         self.test_footnote_recto = DATADIR + sep + 'N_VII_1_xp5_4_page6.svg'
         self.test_footnote_multi = DATADIR + sep + 'N_VII_1_xp5_4_page13.svg'
         self.test_footnote_multi_xml = DATADIR + sep + 'N_VII_1_page013.xml'
         self.test_categorize_footnote = DATADIR + sep + 'N_VII_1_page006.xml'
 
     def test_categorize_footnotes(self):
         page = Page(self.test_categorize_footnote)
         footnotes = extract_footnotes(page, svg_file=self.test_footnote_recto)
         categorize_footnotes(page, footnotes)
-        words_with_comments = [ word for word in page.words if word.editor_comment is not None ]
+        words_with_comments = [ word for word in page.words if len(word.editor_comments) > 0 ]
         self.assertEqual(len(words_with_comments), 4)
         lines_with_comments = [ line for line in page.lines if len(line.editor_comments) > 0 ]
         self.assertEqual(len(lines_with_comments), 1)
         page = Page('xml/W_II_1_page141.xml')
         footnotes = extract_footnotes(page)
         categorize_footnotes(page, footnotes, debug=True)
-        words_with_comments = [ word for word in page.words if word.editor_comment is not None ]
+        words_with_comments = [ word for word in page.words if len(word.editor_comments) > 0 ]
 
     def test_save_imprints(self):
         page = Page(self.test_categorize_footnote)
         save_imprints(page)
         self.assertEqual(len(page.page_tree.xpath('//' + Imprint.XML_TAG)), 2)
         #print(ET.dump(page.page_tree.getroot()))
 
 
     def test_main(self):
         self.assertEqual(main(['xml/N_VII_1_page005.xml']), 0)
 
 
 if __name__ == "__main__":
     unittest.main()
Index: tests_svgscripts/test_extractWordPosition.py
===================================================================
--- tests_svgscripts/test_extractWordPosition.py	(revision 110)
+++ tests_svgscripts/test_extractWordPosition.py	(revision 111)
@@ -1,202 +1,236 @@
 import unittest
 import os
 from os import sep, path
 from os.path import isfile, isdir, dirname
 import re
 import shutil
 import tempfile
 import lxml.etree as ET
 from lxml.etree import XMLSyntaxError
 import sys
 
 sys.path.append('svgscripts')
 
 import extractWordPosition 
 from myxmlwriter import write_pretty
 from datatypes.transkriptionField import TranskriptionField
 from datatypes.matrix import Matrix
 from datatypes.page_creator import PageCreator, FILE_TYPE_SVG_WORD_POSITION
 from datatypes.page import Page
+from datatypes.positional_word_part import PositionalWordPart
 from datatypes.pdf import PDFText
 from datatypes.word import Word
 from datatypes.lineNumber import LineNumber
 from datatypes.word_insertion_mark import WordInsertionMark
 
 def test_write(xml_element_tree=None, file_name=None):
     write_pretty(xml_element_tree=xml_element_tree, file_name=None, script_name='test', file_type=FILE_TYPE_SVG_WORD_POSITION)
     
 
 class TestExtractor(unittest.TestCase):
     def setUp(self):
         extractWordPosition.Extractor.UNITTESTING = True
         DATADIR = dirname(__file__) + sep + 'test_data'
         self.test_file_find_word = DATADIR + sep + 'test_find_word.xml'
         self.test_dir = tempfile.mkdtemp()
         self.title = 'ABC 111'
         self.matrix_string = 'matrix(1 0 0 1 183.6558 197.9131)'
         self.test_file = DATADIR + sep + 'Mp_XIV_1_mytest_421.svg'
         self.test_empty_file = DATADIR + sep + 'my_empty_test.svg'
         self.test_source = DATADIR + sep + 'Mp_XIV_1_mytest_421.xml'
         self.xml420 = DATADIR + sep + 'Mp_XIV_1_page420.xml'
         self.pdf420 = DATADIR + sep + 'Mp_XIV_1_online_420.pdf'
         self.pdf_file = DATADIR + sep + 'W_I_8_page125.pdf'
         self.faulty_xml = DATADIR + sep + 'W_I_8_faulty_page125.xml'
         self.pdf_xml = DATADIR + sep + 'W_I_8_page125.xml'
         self.pdf_xml_source = DATADIR + sep + 'W_I_8_neu_125-01.svg'
         self.testA = DATADIR + sep + 'testA.xml'
         self.multipage = DATADIR + sep + 'multipage_small_above.svg'
 
     def test_extract_information(self):
         extractor = extractWordPosition.Extractor()
         page = extractor.extract_information(self.multipage, multipage_index=0)
         self.assertEqual(len(page.words), 59)
         self.assertEqual(page.multipage_index, 0)
         page = extractor.extract_information(self.multipage, multipage_index=1)
         self.assertEqual(page.multipage_index, 1)
         self.assertTrue(len(page.words) > 59)
         extractor = extractWordPosition.Extractor()
         source_page = Page('xml/Mp_XV_page78v.xml')
         extractor = extractWordPosition.Extractor()
         transkription_field = TranskriptionField(source_page.source) 
         svg_tree = ET.parse(source_page.source) 
         text_items = extractor.get_text_items(svg_tree.getroot(), transkription_field=transkription_field)
         self.assertTrue('matrix(1 0 0 1 115.6299 719.3535)' in [ item.get('transform') for item in text_items ])
         page = extractor.extract_information(source_page.source, svg_file=source_page.svg_file)
         self.assertTrue(page.svg_image.text_field is not None)
 
+    ##:map <F5> :w<Enter>:!python3 -m unittest tests_svgscripts.test_extractWordPosition.TestExtractor.test_improved_extract_word_position<Enter>
+    @unittest.skip('test with local file')
+    def test_improved_extract_word_position(self):
+        extractor = extractWordPosition.Extractor()
+        source_page = Page('xml/Mp_XV_page85v.xml')
+        print(len(source_page.words))
+        source_page.words = []
+        extractor = extractWordPosition.Extractor()
+        transkription_field = TranskriptionField(source_page.source) 
+        svg_tree = ET.parse(source_page.source) 
+        extractor.improved_extract_word_position(svg_tree, source_page, transkription_field=transkription_field)
+        print(len(source_page.words))
+        #for word in source_page.words: print(word.id, word.text)
+
     def test_update_title(self):
         extractor = extractWordPosition.Extractor(xml_dir=self.test_dir)
         extractor.update_title_and_manuscript('test')
         self.assertEqual(extractor.title, 'test')
         self.assertEqual(extractor.manuscript_file, '{}/test.xml'.format(self.test_dir))
         self.assertEqual(isfile('{}/test.xml'.format(self.test_dir)), True)
 
     def test_get_page_number(self):
         extractor = extractWordPosition.Extractor()
         self.assertEqual(extractor.get_page_number(self.test_file, page_number='1'), '001')
         self.assertEqual(extractor.get_page_number(self.test_file), '421')
 
     def test_get_file_name(self):
         extractor = extractWordPosition.Extractor()
         self.assertEqual(extractor.get_file_name(self.test_file), 'xml/Mp_XIV_1_mytest_421.xml')
         extractor = extractWordPosition.Extractor(title=self.title)
         self.assertEqual(extractor.get_file_name(self.test_file), 'xml/{}_page421.xml'.format(self.title.replace(' ', '_')))
         extractorA = extractWordPosition.Extractor(title=self.title)
         extractorB = extractWordPosition.Extractor(manuscript_file=extractorA.manuscript_file)
         self.assertEqual(extractorB.get_file_name(self.test_file), 'xml/{}_page421.xml'.format(self.title.replace(' ', '_')))
 
     def test_get_style(self):
         extractor = extractWordPosition.Extractor()
         svg_tree = ET.parse(self.test_file) 
         sonderzeichen_list, letterspacing_list, style_dict = extractor.get_style(svg_tree.getroot())
         self.assertEqual(sonderzeichen_list, [ 'st21', 'st23'])
         self.assertEqual(style_dict.get('st11').get('font-family'), 'Frutiger-Europeen')
         self.assertEqual(style_dict.get('st5').get('stroke'), '#CED5CE')
 
     def test_get_word_from_part_obj(self):
         extractor = extractWordPosition.Extractor()
         mylist = [{'text': 'a', 'class': 'asdf' }, {'text': 'b', 'endX': 0 }, {'text': 'c'}]
         self.assertEqual(extractor.get_word_from_part_obj(mylist), 'abc')
 
     def test_get_break_points(self):
         extractor = extractWordPosition.Extractor()
         page = Page(self.pdf_xml)
         page.source = self.pdf_xml_source
         matrix = Matrix('matrix(1 0 0 1 543.8164 173.9126)')
         matrixB = Matrix('matrix(1 0 0 1 573.6758 173.9126)')
         matrixC = Matrix('matrix(1 0 0 1 575.9873 173.9126)')
         mylist = [{'text': 'es', 'class': 'st5 st6', 'x': matrix.add2X(23.968), 'y': matrix.getY() },\
                 {'text': 'A', 'class': 'st9 st10', 'x': matrixB.getX(), 'y': matrixB.getY()  },\
                 {'text': 'sich', 'class': "st5 st6", 'x': matrixC.getX(), 'y': matrixC.getY()}]
         break_points = extractor._get_break_points(page, mylist)
         self.assertTrue(len(break_points) > 0)
+    def test_get_pwps_break_points(self):
+        extractor = extractWordPosition.Extractor()
+        page = Page(self.pdf_xml)
+        page.svg_file = "./svg/W_I_8_page125_web.svg"
+        page.source = self.pdf_xml_source
+        svg_path_tree = ET.parse(page.svg_file)
+        namespaces = { k if k is not None else 'ns': v for k, v in svg_path_tree.getroot().nsmap.items() }
+        matrix = Matrix('matrix(1 0 0 1 543.8164 173.9126)')
+        matrixB = Matrix('matrix(1 0 0 1 573.6758 173.9126)')
+        matrixC = Matrix('matrix(1 0 0 1 575.9873 173.9126)')
+        mylist = [{'text': 'es', 'class': 'st5 st6', 'x': matrix.add2X(23.968), 'y': matrix.getY() },\
+                {'text': 'A', 'class': 'st9 st10', 'x': matrixB.getX(), 'y': matrixB.getY()  },\
+                {'text': 'sich', 'class': "st5 st6", 'x': matrixC.getX(), 'y': matrixC.getY()}]
+        pwps = []
+        for word_part_obj in mylist:
+            pwps += PositionalWordPart.CREATE_POSITIONAL_WORD_PART_LIST(word_part_obj, svg_path_tree, namespaces, page=page)
+        self.assertTrue(len(page.sonderzeichen_list) > 0)
+        break_points = extractor._get_pwps_break_points(page, pwps)
+        self.assertTrue(len(break_points) > 0)
 
     def test_get_text_items(self):
         svg_tree = ET.parse(self.test_file)
         extractor = extractWordPosition.Extractor()
         mytest_items = [ x for x in extractor.get_text_items(svg_tree.getroot()) ]
         self.assertEqual(len(mytest_items), 300)
         self.assertEqual(mytest_items[0].get('transform'), 'matrix(1 0 0 1 386.8218 57.1914)')
         tf = TranskriptionField(self.test_file)
         mytest_itemsTF = [ x for x in extractor.get_text_items(svg_tree.getroot(), transkription_field=tf) ]
         self.assertEqual(mytest_itemsTF[0].get('transform'), 'matrix(1 0 0 1 204.8618 91.7134)')
 
     def test_init_tree_and_target_file(self):
         target_file = self.testA 
         page = PageCreator(target_file, title=self.title)
         tree = page.page_tree
         self.assertEqual(tree.getroot().get('title'), self.title)
         self.assertEqual(tree.getroot().findall('./style'), [])
         test_write(xml_element_tree=tree, file_name=target_file)
         page = PageCreator(target_file)
         tree = page.page_tree
         self.assertEqual(tree.getroot().get('title'), self.title)
         self.assertEqual(tree.getroot().findall('./style'), [])
         isfile(target_file) and os.remove(target_file)
 
     def test_add_style(self):
         extractor = extractWordPosition.Extractor()
         svg_tree = ET.parse(self.test_file) 
         sonderzeichen_list, letterspacing_list, style_dict = extractor.get_style(svg_tree.getroot())
         target_file = self.testA
         page = PageCreator(target_file,title=self.title)
         page.add_style(sonderzeichen_list=sonderzeichen_list, style_dict=style_dict)
         test_write(xml_element_tree=page.page_tree, file_name=target_file)
         fromTarget_xml_tree = ET.parse(target_file)
         self.assertEqual(fromTarget_xml_tree.getroot().get('title'), self.title)
         self.assertEqual(fromTarget_xml_tree.getroot().find("style").get('Sonderzeichen'), "st21 st23")
         self.assertEqual(fromTarget_xml_tree.getroot().find("style").find("class[@name='st5']").get('stroke'), '#CED5CE')
         self.assertEqual(fromTarget_xml_tree.getroot().find("style").find("class[@name='st11']").get('font-family'), 'Frutiger-Europeen')
         page = PageCreator(target_file)
         page.add_style(sonderzeichen_list=sonderzeichen_list, style_dict=style_dict)
         test_write(xml_element_tree=page.page_tree, file_name=target_file)
         fromTarget_xml_tree = ET.parse(target_file)
         self.assertEqual(fromTarget_xml_tree.getroot().get('title'), self.title)
         self.assertEqual(fromTarget_xml_tree.getroot().find("style").get('Sonderzeichen'), "st21 st23")
         self.assertEqual(fromTarget_xml_tree.getroot().find("style").find("class[@name='st5']").get('stroke'), '#CED5CE')
         self.assertEqual(fromTarget_xml_tree.getroot().find("style").find("class[@name='st11']").get('font-family'), 'Frutiger-Europeen')
         isfile(target_file) and os.remove(target_file)
 
     def test_add_word(self):
         extractor = extractWordPosition.Extractor()
         svg_tree = ET.parse(self.test_file) 
         mylist = [{'text': 'a' }, {'text': 'b' }, {'text': 'c' }]
         matrix = Matrix(self.matrix_string)
         for dict in mylist:
             dict['class'] = 'st22'
             dict['x'] = matrix.add2X(0) 
             dict['y'] = matrix.getY()
         target_file = self.test_dir + sep + 'asdfasdf.xml'
         page = PageCreator(target_file)
         sonderzeichen_list, letterspacing_list, style_dict = extractor.get_style(svg_tree.getroot())
         page.add_style(sonderzeichen_list=sonderzeichen_list, letterspacing_list=letterspacing_list, style_dict=style_dict)
         self.assertEqual(extractor.add_word(page, 0, mylist, '%', 0), 1)
         mylist[1]['text'] = 'A'
         mylist[1]['class'] = 'st21'
         mylist[1]['x'] = matrix.add2X(1) 
         self.assertEqual(extractor.add_word(page, 0, mylist, '%', 0), 2)
         page.update_and_attach_words2tree()
         self.assertEqual(page.page_tree.getroot().xpath('//word[@id="1"]')[0].get('text'), 'a')
         self.assertEqual(page.page_tree.getroot().xpath('//word[@id="2"]')[0].get('text'), 'c')
         self.assertEqual(page.page_tree.getroot().xpath('//word[@id="2"]/transkription-position')[0].get('left'), '183.506')
         self.assertEqual(page.page_tree.getroot().xpath('//word[@id="2"]/transkription-position')[0].get('height'), '8.25')
         
     def test_extractor(self):
         extractor = extractWordPosition.Extractor()
         self.assertEqual(extractor.title, None)
         self.assertEqual(extractor.manuscript_file, None)
         self.assertEqual(extractor.xml_dir, 'xml/')
         self.assertEqual(extractor.manuscript_tree, None)
 
     def test_write_title_to_manuscript_file(self):
         extractor = extractWordPosition.Extractor(xml_dir=self.test_dir, title=self.title)
         self.assertEqual(isfile(extractor.manuscript_file), True)
         extractor = extractWordPosition.Extractor(manuscript_file=extractor.manuscript_file)
         self.assertEqual(extractor.title, self.title)
 
     def tearDown(self):
         isdir(self.test_dir) and shutil.rmtree(self.test_dir)
         isfile('{}/{}.xml'.format('xml', self.title.replace(' ', '_'))) and os.remove('{}/{}.xml'.format('xml', self.title.replace(' ', '_')))
 
 if __name__ == "__main__":
     unittest.main()