Index: py2ttl/convert.py
===================================================================
--- py2ttl/convert.py	(revision 107)
+++ py2ttl/convert.py	(revision 108)
@@ -1,115 +1,118 @@
 #!/usr/bin/env python3
 # -*- coding: utf-8 -*-
 
 """   This program can be used to convert py objects to ontology and data in turtle format.
 """
 #    Copyright (C) University of Basel 2019  {{{1
 #
 #    This program is free software: you can redistribute it and/or modify
 #    it under the terms of the GNU General Public License as published by
 #    the Free Software Foundation, either version 3 of the License, or
 #    (at your option) any later version.
 #
 #    This program is distributed in the hope that it will be useful,
 #    but WITHOUT ANY WARRANTY; without even the implied warranty of
 #    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 #    GNU General Public License for more details.
 #
 #    You should have received a copy of the GNU General Public License
 #    along with this program.  If not, see <https://www.gnu.org/licenses/> 1}}}
 
 from colorama import Fore, Style
 import getopt
 import lxml.etree as ET
 from os import sep, path, listdir
 from os.path import isfile, isdir, dirname, basename
 from progress.bar import Bar
 import re
 import sys
 
 sys.path.append('svgscripts')
 from datatypes.archival_manuscript import ArchivalManuscriptUnity
 
 if dirname(__file__) not in sys.path:
     sys.path.append(dirname(__file__))
 
 from class_spec import SemanticClass
 from config import check_config_files_exist, get_datatypes_dir, PROJECT_NAME, PROJECT_ONTOLOGY_FILE, PROJECT_URL
 from py2ttl_data import Py2TTLDataConverter
 from py2ttl_ontology import Py2TTLOntologyConverter
 
 
 sys.path.append('shared_util')
 from myxmlwriter import xml2dict
-from main_util import get_manuscript_files
+from main_util import get_manuscript_files_and_include_status
 
 __author__ = "Christian Steiner"
 __maintainer__ = __author__
 __copyright__ = 'University of Basel'
 __email__ = "christian.steiner@unibas.ch"
 __status__ = "Development"
 __license__ = "GPL v3"
 __version__ = "0.0.1"
 
 FILE_TYPE_XML_PROJECT = "xmlProjectFile"
 
 def usage():
     """prints information on how to use the script
     """
     print(main.__doc__)
 
 def main(argv):
     """This program can be used to convert py objects to a owl:Ontology and rdf data in turtle format.
 
     py2ttl/py2ttl_data.py [OPTIONS] <manuscript.xml> [<manuscript2.xml> ...]
 
         <manuscript.xml>                   xml file of type shared_util.myxmlwriter.FILE_TYPE_XML_MANUSCRIPT.
 
         OPTIONS:
         -h|--help:                          show help
         -i|--include-status=STATUS          include pages with status = STATUS. STATUS is a ':' seperated string of status, e.g. 'OK:faksimile merged'.
 
         :return: exit code (int)
     """
     check_config_files_exist()
     datatypes_dir =  get_datatypes_dir()
     source_ontology_file = PROJECT_ONTOLOGY_FILE 
     target_ontology_file = '.{0}{1}-ontology_autogenerated.ttl'.format(sep, PROJECT_NAME)
     manuscript_file = None
     page_status_list = [ 'OK', 'faksimile merged' ] 
     try:
         opts, args = getopt.getopt(argv, "hi:", ["help", "include-status="])
     except getopt.GetoptError:
         usage()
         return 2
     for opt, arg in opts:
         if opt in ('-h', '--help'):
             usage()
             return 0
         elif opt in ('-i', '--include-status'):
             page_status_list = arg.split(':') 
     if len(args) < 1 :
         usage()
         return 2
     ontology_created = False
     ontology_converter = Py2TTLOntologyConverter(project_ontology_file=source_ontology_file)
     output = 2
-    for manuscript_file in get_manuscript_files(args):
+    for manuscript_file, include_status in get_manuscript_files_and_include_status(args):
         if not isfile(manuscript_file):
             usage()
             return 2
         if not ontology_created:
             print(Fore.CYAN + 'Create ontology from  "{}" ...'.format(manuscript_file))
             if ontology_converter.create_ontology(datatypes_dir, target_ontology_file) == 0:
                 print(Fore.GREEN + '[Ontology file {0} created]'.format(target_ontology_file))
                 ontology_created = True
             else:
                 return 2
-        print(Fore.CYAN + 'Create data from  "{}" ...'.format(manuscript_file))
+        current_page_status_list = page_status_list\
+                if include_status is None\
+                else include_status.split(':')
+        print(Fore.CYAN + f'Create data from  "{manuscript_file}" with status "{current_page_status_list}" ...')
         data_converter = Py2TTLDataConverter(manuscript_file, mapping_dictionary=ontology_converter.uri_mapping4cls_and_properties)
-        output = data_converter.convert(page_status_list=page_status_list)
+        output = data_converter.convert(page_status_list=current_page_status_list)
     return output
 
 
 if __name__ == "__main__":
     sys.exit(main(sys.argv[1:]))
Index: shared_util/main_util.py
===================================================================
--- shared_util/main_util.py	(revision 107)
+++ shared_util/main_util.py	(revision 108)
@@ -1,93 +1,103 @@
 import lxml.etree as ET
 from os.path import isfile, isdir, dirname, basename
 from svgpathtools import svg2paths2, svg_to_paths
 import sys
 
 sys.path.append('svgscripts')
 from datatypes.path import Path
 from datatypes.transkriptionField import TranskriptionField
 from datatypes.transkription_position import TranskriptionPosition
 
 FILE_TYPE_XML_PROJECT = "xmlProjectFile"
 
 def create_function_dictionary(list_of_keys, target_function, function_dictionary=None) -> dict:
     """Create a function_dictionary
     """
     if function_dictionary is None:
         function_dictionary = {}
     for key in list_of_keys:
         function_dictionary.update({key: target_function})
     return function_dictionary
 
 def get_manuscript_files(args: list) ->list:
     """Return a list of manuscript files. If first element is of type FILE_TYPE_XML_PROJECT read from
         xml file and return as list of filenames.
     """
     if len(args) == 1\
             and args[0].endswith('.xml')\
             and ET.parse(args[0]).getroot().find('metadata/type').text == FILE_TYPE_XML_PROJECT:
         return ET.parse(args[0]).xpath('//manuscript[contains(@status, "OK")]/@file')
     return args
 
+def get_manuscript_files_and_include_status(args: list) ->list:
+    """Return a list tuples of manuscript files and optional include status. If first element is of type FILE_TYPE_XML_PROJECT read from
+        xml file and return as list of tuples of filename (@files) and include status for manuscript pages (@include).
+    """
+    if len(args) == 1\
+            and args[0].endswith('.xml')\
+            and ET.parse(args[0]).getroot().find('metadata/type').text == FILE_TYPE_XML_PROJECT:
+        return [ (node.get('file'),node.get('include')) for node in ET.parse(args[0]).xpath('//manuscript[contains(@status, "OK")]')]
+    return args
+
 def extract_paths_on_tf(page, transkription_field=None, new_style_prefix='tln', outsiders=None, outsider_attributes=None) ->list:
     """Extract all paths on transkription_field.
         
         :return: a list of datatypes.path.Path
     """
     if page.source is not None and isfile(page.source):
         if transkription_field is None:
             transkription_field = TranskriptionField(page.source, multipage_index=page.multipage_index)
         paths, attributes = svg_to_paths.svg2paths(page.source)
         allpaths_on_tf = []
         for index, path in enumerate(paths):
             attribute = attributes[index]
             if len(path) > 0\
                and path != transkription_field.path\
                and path.bbox()[0] >= transkription_field.xmin\
                and path.bbox()[1] <= transkription_field.xmax\
                and path.bbox()[2] >= transkription_field.ymin\
                and path.bbox()[3] <= transkription_field.ymax:
                 style_class = attribute.get('class')
                 if style_class is None and attribute.get('style') is not None:
                     style_class = create_new_style(page, attribute.get('style'), new_style_prefix=new_style_prefix)
                 allpaths_on_tf.append(Path.create_cls(id=index, path=path, style_class=style_class, page=page))
             elif outsiders is not None\
                and len(path) > 0\
                and path != transkription_field.path:
                 style_class = attribute.get('class')
                 if style_class is None and attribute.get('style') is not None:
                     style_class = create_new_style(page, attribute.get('style'), new_style_prefix=new_style_prefix)
                 outsiders.append(Path.create_cls(id=index, path=path, style_class=style_class, page=page))
                 outsider_attributes.append(attribute)
         return allpaths_on_tf
     else:
         return []
 
 def create_new_style(page, style_attribute_string, new_style_prefix='tln') ->str:
     """Create new style, update page and return new style_class.
     """
     style_dict = {}
     style_class = None
     for key_content in style_attribute_string.split(';'):
         if ':' in key_content:
             key, content = tuple(key_content.split(':')) 
             style_dict.update({ key: content})
     if style_dict in page.style_dict.values():
         style_class = list(page.style_dict.keys())[list(page.style_dict.values()).index(style_dict)]
     else:
         new_style_index = len([ k for k in page.style_dict.keys() if k.startswith(new_style_prefix) ])
         style_class = f'{new_style_prefix}{new_style_index}'
         page.style_dict.update({style_class: style_dict })
         page.add_style(sonderzeichen_list=page.sonderzeichen_list, letterspacing_list=page.letterspacing_list,\
                 style_dict=page.style_dict)
     return style_class
 
 def get_paths_near_position(tp: TranskriptionPosition, paths: list, xmin=0, ymin=0, do_not_include_d_attributes=None) ->list:
     """Given a transkription position and a list of svgscripts.datatypes.path.Path,
         return a list of paths near this position.
     """
     tp_x = tp.left + (tp.width/2) + xmin
     tp_y = tp.top + (tp.height/2) + ymin
     do_not_include_d_attributes = do_not_include_d_attributes if do_not_include_d_attributes is not None else []
     return [ path.d_attribute for path in Path.get_nearest_paths(paths, tp_x, tp_y) if path.d_attribute not in do_not_include_d_attributes ]
 
Index: Friedrich-Nietzsche-late-work-ontology.ttl
===================================================================
--- Friedrich-Nietzsche-late-work-ontology.ttl	(revision 107)
+++ Friedrich-Nietzsche-late-work-ontology.ttl	(revision 108)
@@ -1,63 +1,143 @@
 @prefix dct: <http://purl.org/dc/terms/>.
 @prefix document: <http://e-editiones.ch/ontology/document#>.
 @prefix homotypic: <http://www.nie.org/ontology/homotypic#>.
 @prefix stoff: <http://www.nie.org/ontology/standoff#>.
 @prefix text: <http://www.e-editiones.ch/ontology/text#>.
 @prefix owl: <http://www.w3.org/2002/07/owl#>.
 @prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#>.
+@prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> .
 @prefix xsd: <http://www.w3.org/2001/XMLSchema#>.
 
 @prefix tln: <http://www.nie.org/ontology/nietzsche#>.
 
 <http://www.nie.org/ontology/nietzsche>
 	a owl:Ontology;
 	dct:license <http://creativecommons.org/licenses/by/3.0/>;
 	dct:title "An ontology about the collected late works of Friedrich Nietzsche"@en;
 	dct:description """Formal description of specific concepts in the scientific study of Friedrich Nietzsches late work."""@en;
 	dct:creator "Dominique Steinbach, tool coordinator/software developer, NIE-INE/digital edition of der späte Nietzsche, Basel University, Switzerland"@en;
 	dct:contributor "Christian Steiner, software developer, digital edition of der späte Nietzsche, University of Basel, Switzerland"@en;
    dct:publisher "Basel University, Switzerland"@en.
 
+tln:TextGenesis a owl:Class ;
+    rdfs:label "identifies a genetic order of text versions"@en ;
+    rdfs:comment "Identifies a genetic order of text versions, i.e. groups text units as earlier and later versions of each other."@en ;
+    rdfs:isDefinedBy <http://www.nie.org/ontology/nietzsche> .
+   
+tln:IdentifiedTextVersion a owl:Class ;
+    rdfs:label "identifies a list of text unities as a text version"@en ;
+    rdfs:comment "Identification of a list of text unities (e.g. pages or parts of pages) as a text version for which there is an earlier or later version."@en ;
+    rdfs:isDefinedBy <http://www.nie.org/ontology/nietzsche> .
+
+tln:PartOfPageTextUnit a owl:Class ;
+    rdfs:label "identifies a part of a page as a text unity"@en ;
+    rdfs:comment "Identification of a part of page as a text unity."@en ;
+    rdfs:isDefinedBy <http://www.nie.org/ontology/nietzsche> ;
+    rdfs:subClassOf [ a owl:Restriction ;
+            owl:cardinality "1"^^xsd:nonNegativeInteger ;
+            owl:onProperty tln:belongsToPage ],
+        [ a owl:Restriction ;
+            owl:cardinality "1"^^xsd:nonNegativeInteger ;
+            owl:onProperty tln:startLine ], 
+         [ a owl:Restriction ;
+            owl:cardinality "1"^^xsd:nonNegativeInteger ;
+            owl:onProperty tln:endLine ] .
+
+tln:ExternalTextUnit a owl:Class ;
+    rdfs:label "a list text unit that has been published external to the digital edition"@en ;
+    rdfs:comment "A text unit that has been published external to the digital edition."@en ;
+    rdfs:isDefinedBy <http://www.nie.org/ontology/nietzsche> ;
+    rdfs:subClassOf tln:IdentifiedTextVersion .
 
 tln:Page a owl:Class ;
     rdfs:subClassOf document:Page .
 
+tln:belongsToPage a owl:ObjectProperty ;
+    rdfs:label "relates a part of a page with the page it is a part of"@en ;
+    rdfs:comment "Relates a part of a page with the page it is a part of."@en ;
+    rdfs:isDefinedBy <http://www.nie.org/ontology/nietzsche> ;
+    rdfs:domain tln:PartOfPageTextUnit ;
+    rdfs:range tln:Page.
+
+tln:startLine a owl:ObjectProperty ;
+    rdfs:label "relates a part of a page with the line it starts with"@en ;
+    rdfs:comment "Relates a part of a page with the line it starts with."@en ;
+    rdfs:isDefinedBy <http://www.nie.org/ontology/nietzsche> ;
+    rdfs:domain tln:PartOfPageTextUnit ;
+    rdfs:range tln:Line.
+
+tln:endLine a owl:ObjectProperty ;
+    rdfs:label "relates a part of a page with the line it ends with"@en ;
+    rdfs:comment "Relates a part of a page with the line it ends with."@en ;
+    rdfs:isDefinedBy <http://www.nie.org/ontology/nietzsche> ;
+    rdfs:domain tln:PartOfPageTextUnit ;
+    rdfs:range tln:Line.
+
+tln:identifiesAsVersion a owl:ObjectProperty ;
+    rdfs:label "groups a list of text unities together as a identified text version"@en ;
+    rdfs:comment "Groups a list of text unities together as a identified text version for which there is an ealier or later version."@en ;
+    rdfs:isDefinedBy <http://www.nie.org/ontology/nietzsche> ;
+    rdfs:domain tln:IdentifiedTextVersion ;
+    rdfs:range rdf:List.
+
+tln:hasGeneticOrder a owl:ObjectProperty ;
+    rdfs:label "relates a list of text versions to an identified genetic order"@en ;
+    rdfs:comment "Relates a list of text versions to an identified genetic order. The position in the list determines the version of a text unit."@en ;
+    rdfs:isDefinedBy <http://www.nie.org/ontology/nietzsche> ;
+    rdfs:domain tln:TextGenesis ;
+    rdfs:range rdf:List.
+
+tln:textUnitHasTitle a owl:ObjectProperty ;
+    rdfs:label "relates a external published text unit with a title"@en ;
+    rdfs:comment "Relates a external published text unit with a title by which it can be identified."@en ;
+    rdfs:isDefinedBy <http://www.nie.org/ontology/nietzsche> ;
+    rdfs:domain tln:ExternalTextUnit ;
+    rdfs:range xsd:string .
+
+tln:textUnitHasUrl a owl:ObjectProperty ;
+    rdfs:label "relates a external published text unit with a URL"@en ;
+    rdfs:comment "Relates a external published text unit with a URL by which it can be visited."@en ;
+    rdfs:isDefinedBy <http://www.nie.org/ontology/nietzsche> ;
+    rdfs:domain tln:ExternalTextUnit ;
+    rdfs:range xsd:anyURI .
+
 tln:hasImage a owl:ObjectProperty ;
     rdfs:label "relates a page to a image"@en ;
     rdfs:comment "relates a page to an image that has a textfield that specifies the area where the writing that constitutes the page can be found."@en ;
     rdfs:isDefinedBy <http://www.nie.org/ontology/nietzsche> ;
     rdfs:domain tln:Page ;
     rdfs:range tln:Image .
 
 tln:hasUrl a owl:DatatypeProperty ;
     rdfs:label "has Url"@en ;
     rdfs:domain tln:Image ;
     rdfs:isDefinedBy <http://www.nie.org/ontology/nietzsche> ;
     rdfs:range xsd:anyURI .
 
 tln:inheritOverwritesWord a owl:ObjectProperty ;
    rdfs:subPropertyOf tln:overwritesWord;
    rdfs:label "word overwrites word (inherited from tln:wordHasCorrection)"@en ;
    rdfs:comment "The author has used this word in order to overwrite that word."@en ;
    rdfs:isDefinedBy <http://www.nie.org/ontology/nietzsche> ;
    owl:propertyChainAxiom  ( tln:wordHasCorrection tln:overwritesWord ).
 
 tln:lineContinuesOn a owl:ObjectProperty ;
     rdfs:label "writing from subject line continues on object line"@en ;
     rdfs:comment "the writing that ends on subject line continues on object line"@en ;
     rdfs:isDefinedBy <http://www.nie.org/ontology/nietzsche> ;
     rdfs:domain tln:Line ;
     rdfs:range tln:Line .
 
 tln:pageIsOnTextField a owl:ObjectProperty ;
     rdfs:label "page is on text field"@en ;
     rdfs:comment "the writing that is referred to as subject can be found on object"@en ;
     rdfs:isDefinedBy <http://www.nie.org/ontology/nietzsche> ;
     rdfs:domain tln:Page ;
     rdfs:range tln:TextField .
 
 tln:writingContinuesWithWord a owl:ObjectProperty ;
     rdfs:label "writing continues with next word"@en ;
     rdfs:isDefinedBy <http://www.nie.org/ontology/nietzsche> ;
     rdfs:domain tln:Word ;
     rdfs:range tln:Word .
+
Index: svgscripts/fix_missing_glyphs.py
===================================================================
--- svgscripts/fix_missing_glyphs.py	(revision 107)
+++ svgscripts/fix_missing_glyphs.py	(revision 108)
@@ -1,210 +1,213 @@
 #!/usr/bin/env python3
 # -*- coding: utf-8 -*-
 
 """   This program can be used to fix missing glyphs.
 """
 #    Copyright (C) University of Basel 2019  {{{1
 #
 #    This program is free software: you can redistribute it and/or modify
 #    it under the terms of the GNU General Public License as published by
 #    the Free Software Foundation, either version 3 of the License, or
 #    (at your option) any later version.
 #
 #    This program is distributed in the hope that it will be useful,
 #    but WITHOUT ANY WARRANTY; without even the implied warranty of
 #    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 #    GNU General Public License for more details.
 #
 #    You should have received a copy of the GNU General Public License
 #    along with this program.  If not, see <https://www.gnu.org/licenses/> 1}}}
 
 from colorama import Fore, Style
 import getopt
 import re
 import sys
 from os import listdir, sep, path
 from os.path import isfile, isdir, dirname
 import lxml.etree as ET
 
 if dirname(__file__) not in sys.path:
     sys.path.append(dirname(__file__))
 
 from datatypes.page import Page, FILE_TYPE_SVG_WORD_POSITION, FILE_TYPE_XML_MANUSCRIPT
 from datatypes.positional_word_part import PositionalWordPart
 from datatypes.transkriptionField import TranskriptionField
 from datatypes.transkription_position import TranskriptionPosition
 from datatypes.word import Word
 from util import update_svgposfile_status
 
 sys.path.append('shared_util')
 from myxmlwriter import write_pretty
 
 
 __author__ = "Christian Steiner"
 __maintainer__ = __author__
 __copyright__ = 'University of Basel'
 __email__ = "christian.steiner@unibas.ch"
 __status__ = "Development"
 __license__ = "GPL v3"
 __version__ = "0.0.1"
 
 UNITTESTING = False
 REMOVE_SVG_WORD_POS_PAGE_ENDING = re.compile('_page[0-9]+\w*')
 
 def find_missing_glyph_for_pwp(pwp, svg_path_tree, namespaces, xmin=0.0, ymin=0.0):
     """Finds missing glyph for  a PositionalWordPart.
 
         :return: list of PositionalWordPart
     """
     THRESHOLD = 15.5
     #pwp = PositionalWordPart(node=positional_word_part_node)
     word_part_obj = { "x": pwp.left, "y": pwp.top, "text": pwp.text, "matrix": pwp.transform, "class": pwp.style_class }
     start_id = int(pwp.id)
     threshold = -0.5
     positional_word_parts = []
     while threshold < THRESHOLD and len(positional_word_parts) < 1:
         try: 
             positional_word_parts = PositionalWordPart.CREATE_POSITIONAL_WORD_PART_LIST(word_part_obj, svg_path_tree, namespaces,\
                                     start_id=start_id, xmin=xmin, ymin=ymin, threshold=threshold, throw_error_if_not_found=True)
         except Exception:
             threshold += 0.1
     return positional_word_parts
 
 def update_word(word, old_transkription_position, old_positional_word_part, positional_word_parts):
     """Updates word according to new positional_word_parts.
 
         :return: new transkription_position
     """
     if len(positional_word_parts) > 0:
         debug_msg_string = 'update word from ' + __file__
         old_transkription_position.positional_word_parts.remove(old_positional_word_part)
         positional_word_parts.reverse()
         for positional_word_part in positional_word_parts:
             old_transkription_position.positional_word_parts.insert(int(old_positional_word_part.id), positional_word_part)
         for index, positional_word_part in enumerate(old_transkription_position.positional_word_parts):
             positional_word_part.id = index
         transkription_positions = TranskriptionPosition.CREATE_TRANSKRIPTION_POSITION_LIST_FROM_PWPS(\
                 old_transkription_position.positional_word_parts, debug_msg_string=debug_msg_string, transkription_position_id=old_transkription_position.id)
         word.transkription_positions.remove(old_transkription_position)
         transkription_positions.reverse()
         for new_tp in transkription_positions:
             word.transkription_positions.insert(int(old_transkription_position.id), new_tp)
         text = ''
         for index, tp in enumerate(word.transkription_positions):
             tp.id = index
             tp.writing_process_id = old_transkription_position.writing_process_id
             for pwp in tp.positional_word_parts:
                 text += pwp.text
         if word.text != text:
             word.text = text
         return transkription_positions[0]
 
 def fix_missing_glyphs(svg_word_pos_file, manuscript_file=None):
     """Finds missing glyphs for xml file of type FILE_TYPE_SVG_WORD_POSITION.
     """
     if isfile(svg_word_pos_file):
         if not UNITTESTING:
             print(Fore.LIGHTBLUE_EX + 'Fixing missing glyphs for file {} ... '.format(svg_word_pos_file), end='')
             print(Style.RESET_ALL)
         page = Page(svg_word_pos_file)
         xmin = 0
         ymin = 0
         if page.svg_image is None or page.svg_image.text_field is None:
             transkription_field = TranskriptionField(page.svg_file)
             xmin = transkription_field.xmin
             ymin = transkription_field.ymin
         svg_path_tree = ET.parse(page.svg_file)
         namespaces = { k if k is not None else 'ns': v for k, v in svg_path_tree.getroot().nsmap.items() }
         number_of_missing_glyphs = len(page.page_tree.xpath('//' + PositionalWordPart.XML_TAG + '[not(@symbol-id)]'))
         words_without_glyphs = [ word for word in page.words\
                                     if len([ tp for tp in word.transkription_positions\
                                         if len([ pwp for pwp in tp.positional_word_parts if pwp.symbol_id is None]) > 0]) > 0 ]
         for word in words_without_glyphs:
             for transkription_position in word.transkription_positions:
                 positional_word_parts = transkription_position.positional_word_parts[:]
                 for positional_word_part in positional_word_parts:
                     if positional_word_part.symbol_id is None:
                         pwps = find_missing_glyph_for_pwp(positional_word_part, svg_path_tree, namespaces, xmin=xmin, ymin=ymin)
                         new_transkription_position = update_word(word, transkription_position, positional_word_part, pwps)
                         if new_transkription_position is not None:
                             transkription_position = new_transkription_position
         page.update_and_attach_words2tree()
         write_pretty(xml_element_tree=page.page_tree, file_name=svg_word_pos_file, script_name=__file__, file_type=FILE_TYPE_SVG_WORD_POSITION)
         page = Page(svg_word_pos_file)
         new_number_of_missing_glyphs = len(page.page_tree.xpath('//' + PositionalWordPart.XML_TAG + '[not(@symbol-id)]'))
         if not UNITTESTING:
             result_color = Fore.LIGHTBLUE_EX if new_number_of_missing_glyphs == 0 else Fore.MAGENTA
             print(result_color + ' {0}/{1}'.format(number_of_missing_glyphs-new_number_of_missing_glyphs, number_of_missing_glyphs), end='')
             print(Fore.LIGHTBLUE_EX + ' fixed.', end='')
             print(Style.RESET_ALL)
         if len(page.page_tree.xpath('//' + PositionalWordPart.XML_TAG + '[not(@symbol-id)]')) == 0:
             update_svgposfile_status(svg_word_pos_file, manuscript_file=manuscript_file, status='OK')
 
 def get_filelist_and_manuscript_file(file_a, file_b=None):
     """Returns a file list and a manuscript file (or None)
     """
     file_list = []
     manuscript_file = None
     source_tree = ET.parse(file_a)
     if source_tree.getroot().find('metadata/type').text == FILE_TYPE_SVG_WORD_POSITION\
         and len([ word_part for word_part in source_tree.xpath('//' + PositionalWordPart.XML_TAG + '[not(@symbol-id)]')]) > 0: # if symbol_ids are missing ...
         file_list.append(file_a)
         if file_b is not None:
             manuscript_file = file_b
         else:
             manuscript_file = REMOVE_SVG_WORD_POS_PAGE_ENDING.sub('', file_a)
             if not isfile(manuscript_file):
                 manuscript_file = None
     elif source_tree.getroot().find('metadata/type').text == FILE_TYPE_XML_MANUSCRIPT:
         manuscript_file = file_a
         if file_b is not None:
             file_list.append(file_b)
         else:
             file_list = source_tree.xpath('//page[contains(@status, "{}")]/@output'.format(PositionalWordPart.WARN_NO_USE_NODE_FOUND.lower()))
+            if len(file_list) == 0:
+                file_list = source_tree.xpath('//page[contains(@status, "{}")]/@output'.format(PositionalWordPart.WARN_NO_USE_NODE_FOUND))
     return file_list, manuscript_file
 
 def usage():
     """prints information on how to use the script
     """
     print(main.__doc__)
 
 def main(argv):
     """This program can be used to fix missing glyphs.
 
     svgscripts/fix_missing_glyphs.py [OPTIONS] <xmlManuscriptFile|svgWordPosition>-File [<xmlManuscriptFile|svgWordPosition>-File]
 
         <xmlManuscriptFile>                 a xml file about a manuscript, containing information about its pages.
         <svgWordPosition>                   a xml file about a page, containing information about svg word positions.
 
         OPTIONS:
         -h|--help:                          show help
 
         :return: exit code (int)
     """
     try:
         opts, args = getopt.getopt(argv, "h", ["help"])
     except getopt.GetoptError:
         usage()
         return 2
     for opt, arg in opts:
         if opt in ('-h', '--help'):
             usage()
             return 0
     if len(args) < 1:  
         usage()
         return 2
     exit_status = 0
     file_a = args[0]
     if isfile(file_a):
         file_b = None
         if len(args) > 1 and isfile(args[1]):
             file_b = args[1]
         file_list, manuscript_file = get_filelist_and_manuscript_file(file_a, file_b=file_b)
         for svg_word_pos_file in file_list:
+            print(f'{svg_word_pos_file}')
             fix_missing_glyphs(svg_word_pos_file, manuscript_file=manuscript_file)
     else:
         raise FileNotFoundError('File {} does not exist!'.format(file_a))
     return exit_status
 
 if __name__ == "__main__":
     sys.exit(main(sys.argv[1:]))
Index: svgscripts/datatypes/reconstructed_konvolut.py
===================================================================
--- svgscripts/datatypes/reconstructed_konvolut.py	(revision 107)
+++ svgscripts/datatypes/reconstructed_konvolut.py	(revision 108)
@@ -1,153 +1,154 @@
 #!/usr/bin/env python3
 # -*- coding: utf-8 -*-
 
 """   This class can be used to represent a reconstruction of an original manuscript (e.g. a workbook or notebook).
 """
 #    Copyright (C) University of Basel 2019  {{{1
 #
 #    This program is free software: you can redistribute it and/or modify
 #    it under the terms of the GNU General Public License as published by
 #    the Free Software Foundation, either version 3 of the License, or
 #    (at your option) any later version.
 #
 #    This program is distributed in the hope that it will be useful,
 #    but WITHOUT ANY WARRANTY; without even the implied warranty of
 #    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 #    GNU General Public License for more details.
 #
 #    You should have received a copy of the GNU General Public License
 #    along with this program.  If not, see <https://www.gnu.org/licenses/> 1}}}
 
 __author__ = "Christian Steiner"
 __maintainer__ = __author__
 __copyright__ = 'University of Basel'
 __email__ = "christian.steiner@unibas.ch"
 __status__ = "Development"
 __license__ = "GPL v3"
 __version__ = "0.0.1"
 
 import abc
 from lxml import etree as ET
 from os.path import isfile
 import requests
 import sys
 
 from .description import Description
 from .faksimile_image import FaksimileImage
 from .manuscript import ManuscriptUnity
 from .page import Page, FILE_TYPE_XML_MANUSCRIPT, FILE_TYPE_SVG_WORD_POSITION
 
 
 sys.path.append('shared_util')
 from myxmlwriter import parse_xml_of_type, write_pretty, xml_has_type
 
 class NonExistentPage(Page):
     """This class represents a page that does not exist as part of the KGW edition.
         @label non existent page
 
     """
     NIETZSCHE_SOURCES_URL = 'http://www.nietzschesource.org/DFGAapi/api/fe/facsimile/'
     def __init__(self, number=None, faksimile_image=None, status=None):
+        self.page_tree = None
         self.number = number
         self.status = status
         self.faksimile_image = faksimile_image
 
     @classmethod
     def create_cls(cls, page_node, faksimile_image=None):
         """ 
         Create an instance of NonExistentPage from a page_node
     
             :return: NonExistentPage
         """
         number = page_node.get('title') + '_' + page_node.get('number')\
             if bool(page_node.get('title'))\
             else page_node.get('number')
         return cls(number=number, status=page_node.get('status'), faksimile_image=faksimile_image)
 
     def get_name_and_id(self):
         """Return an identification for object as 2-tuple.
         """
         return type(self).__name__, self.number.replace(' ', '_')
 
     @classmethod
     def get_semantic_dictionary(cls):
         """ Creates and returns a semantic dictionary as specified by SemanticClass.
         """
         dictionary = super(NonExistentPage,cls).get_semantic_dictionary()
         dictionary[cls.PROPERTIES_KEY].update(cls.create_semantic_property_dictionary('status', str))
         return cls.return_dictionary_after_updating_super_classes(dictionary)
 
 class ReconstructedKonvolut(ManuscriptUnity):
     """
     This class represents a reconstruction of an original manuscript (e.g. a workbook or notebook).
     @label reconstruction of an origianl manuscript 
 
     Args:
         title               title for identification of the reconstruction
         manuscript_type     type of manuscript: 'Arbeitsheft' or 'Notizheft' 
         manuscript_tree     lxml.ElementTree
     """
     XML_TAG = 'reconstructed-konvolut'
     TYPE_DICTIONARY = { 'R_n': 'Notizheft', 'R_w': 'Arbeitsheft' }
     UNITTESTING = False
 
     def __init__(self, title='', manuscript_type='', manuscript_tree=None):
         super(ReconstructedKonvolut,self).__init__(title=title, manuscript_type=manuscript_type,manuscript_tree=manuscript_tree)
 
     @classmethod
     def create_cls(cls, xml_manuscript_file, page_status_list=None, page_xpath=''):
         """Create an instance of ReconstructedKonvolut from a xml file of type FILE_TYPE_XML_MANUSCRIPT.
 
             :return: ReconstructedKonvolut
         """
         manuscript = super(ReconstructedKonvolut,cls).create_cls(xml_manuscript_file) 
         manuscript_tree = manuscript.manuscript_tree
         if page_xpath == '':
             page_status = '' 
             if page_status_list is not None\
                     and type(page_status_list) is list\
                     and len(page_status_list) > 0:
                 page_status = '[' + ' and '.join([ f'contains(@status, "{status}")' for status in page_status_list ]) + ']'
             page_xpath = f'//pages/page{page_status}/@output'
         included_page_list = [ page_source\
                 for page_source in manuscript_tree.xpath(page_xpath)\
                 if isfile(page_source) and xml_has_type(FILE_TYPE_SVG_WORD_POSITION, xml_source_file=page_source) ]
         for page_node in manuscript_tree.xpath('//pages/page'):
             if bool(page_node.get('output'))\
                and isfile(page_node.get('output'))\
                and xml_has_type(FILE_TYPE_SVG_WORD_POSITION, xml_source_file=page_node.get('output')):
                 manuscript.pages.append(Page.create_cls(\
                         page_node.get('output'), create_dummy_page=(page_node.get('output') not in included_page_list)))
             else:
                 faksimile_image = get_or_update_faksimile(xml_manuscript_file, page_node)
                 manuscript.pages.append(NonExistentPage.create_cls(page_node, faksimile_image))
         manuscript.description = Description.create_cls_from_node(manuscript_tree.xpath(Description.XML_TAG)[0])\
             if len(manuscript_tree.xpath(Description.XML_TAG)) > 0\
             else None
         return manuscript
 
 def get_or_update_faksimile(xml_source_file, page_node) ->FaksimileImage:
     """Return the faksimile image of the non existent page.
     """
     faksimile_image = None
     if len(page_node.xpath(f'./{FaksimileImage.XML_TAG}')) > 0:
         faksimile_image = FaksimileImage(node=page_node.xpath(f'./{FaksimileImage.XML_TAG}')[0])
     elif bool(page_node.get('alias')):
         url = NonExistentPage.NIETZSCHE_SOURCES_URL + page_node.get('alias')
         faksimile_dict = None
         try:
             r = requests.get(url)
             faksimile_dict = r.json()
         except Exception:
             print(f'URL does not work: {url}')
         if faksimile_dict is not None and len(faksimile_dict) > 0:
             width = faksimile_dict['imageWidth']
             height = faksimile_dict['imageHeight']
             file_name = page_node.get('alias') + '.jpg'
             URL = FaksimileImage.NIETZSCHE_SOURCES_URL + page_node.get('alias')
             faksimile_image = FaksimileImage(file_name=file_name, URL=URL, height=height, width=width)
             faksimile_image.attach_object_to_tree(page_node)
             write_pretty(xml_element_tree=page_node.getroottree(), file_name=xml_source_file, script_name=__file__,\
                     file_type=FILE_TYPE_XML_MANUSCRIPT, backup=True)
     return faksimile_image
 
Index: svgscripts/datatypes/page.py
===================================================================
--- svgscripts/datatypes/page.py	(revision 107)
+++ svgscripts/datatypes/page.py	(revision 108)
@@ -1,406 +1,428 @@
 #!/usr/bin/env python3
 # -*- coding: utf-8 -*-
 
 """   This class can be used to represent a page.
 """
 #    Copyright (C) University of Basel 2019  {{{1
 #
 #    This program is free software: you can redistribute it and/or modify
 #    it under the terms of the GNU General Public License as published by
 #    the Free Software Foundation, either version 3 of the License, or
 #    (at your option) any later version.
 #
 #    This program is distributed in the hope that it will be useful,
 #    but WITHOUT ANY WARRANTY; without even the implied warranty of
 #    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 #    GNU General Public License for more details.
 #
 #    You should have received a copy of the GNU General Public License
 #    along with this program.  If not, see <https://www.gnu.org/licenses/> 1}}}
 
 __author__ = "Christian Steiner"
 __maintainer__ = __author__
 __copyright__ = 'University of Basel'
 __email__ = "christian.steiner@unibas.ch"
 __status__ = "Development"
 __license__ = "GPL v3"
 __version__ = "0.0.1"
 
 from lxml import etree as ET
 from os.path import isfile, basename
 from progress.bar import Bar
 from svgpathtools import svg2paths2, svg_to_paths
 from svgpathtools.parser import parse_path
 import re
 import sys
 import warnings
 
 from .box import Box
 from .color import Color
 from .image import Image, SVGImage
 from .faksimile_image import FaksimileImage
 from .faksimile_position import FaksimilePosition
 from .lineNumber import LineNumber
 from .line import Line
 from .mark_foreign_hands import MarkForeignHands
 from .matrix import Matrix
 from .path import Path
 from .positional_word_part import PositionalWordPart
 from .super_page import SuperPage
 from .style import Style
 from .text_connection_mark import TextConnectionMark
 from .text_field import TextField
 from .transkriptionField import TranskriptionField
 from .writing_process import WritingProcess
 from .word import Word
 from .word_deletion_path import WordDeletionPath
 from .word_insertion_mark import WordInsertionMark
 
 sys.path.append('py2ttl')
 from class_spec import SemanticClass
 
 sys.path.append('shared_util')
 from main_util import extract_paths_on_tf, get_paths_near_position
 
 FILE_TYPE_SVG_WORD_POSITION = SuperPage.FILE_TYPE_SVG_WORD_POSITION 
 FILE_TYPE_XML_MANUSCRIPT = SuperPage.FILE_TYPE_XML_MANUSCRIPT
 STATUS_MERGED_OK = SuperPage.STATUS_MERGED_OK
 STATUS_POSTMERGED_OK = SuperPage.STATUS_POSTMERGED_OK
 
 
 class Page(SemanticClass,SuperPage):
     """
     This class represents a page.
 
     Args:
         xml_source_file (str): name of the xml file to be instantiated.
         faksimile_image: FaksimileImage.
         faksimile_svgFile: svg file containing information about word positions.
 
     """
     UNITTESTING = False
 
     def __init__(self, xml_source_file=None, faksimile_image=None, faksimile_svgFile=None, add_paths_near_words=False, warn=False, number=None):
         if xml_source_file is not None:
             super(Page,self).__init__(xml_source_file)
             self.update_property_dictionary('faksimile_image', faksimile_image)
             self.update_property_dictionary('faksimile_svgFile', faksimile_svgFile)
             self.init_all_properties()
             self.add_style(style_node=self.page_tree.getroot().find('.//style'))
             self.faksimile_text_field = None
             self.svg_text_field = None
             self.init_node_objects() 
             self.warn = warn
             self.add_deletion_paths_to_words(add_paths_near_words)
         else:
+            self.page_tree = None
             self.number = number
 
     def add_deletion_paths_to_words(self, add_paths_near_words=False):
         """Add deletion paths to words.
         """
         words = [ word for word in self.words if (len(word.word_parts) == 0 and word.deleted and len(word.deletion_paths) == 0)\
                                                  or 'add_paths_near_words' in word.process_flags ]
         words += [ word for word in self.words\
                                           if len(word.word_parts) > 0 and True in\
                                           [ (wp.deleted and len(wp.deletion_paths) == 0) for wp in word.word_parts ]]
         if len(words) > 0 and ((self.svg_file is not None and isfile(self.svg_file))\
          or (self.source is not None and isfile(self.source))):
             svg_file = self.svg_file if self.svg_file is not None else self.source
             transkription_field = TranskriptionField(svg_file)
             tr_xmin = transkription_field.xmin if (self.svg_image is None or self.svg_image.text_field is None) else 0
             tr_ymin = transkription_field.ymin if (self.svg_image is None or self.svg_image.text_field is None) else 0
             word_deletion_paths = self.word_deletion_paths
             index = 0
             dp_updated = False
             while index < len(words):
                 word = words[index]
                 word.add_deletion_paths(word_deletion_paths, tr_xmin=tr_xmin, tr_ymin=tr_ymin)
                 if len(word.deletion_paths) > 0 or True in [ len(w.deletion_paths) > 0 for w in word.word_parts ]:
                     deletion_paths = word.deletion_paths
                     for wp in word.word_parts: deletion_paths += wp.deletion_paths
                     for deletion_path in deletion_paths:
                         if deletion_path not in self.word_deletion_paths:
                             self.word_deletion_paths.append(deletion_path)
                 elif not dp_updated:
                     word_deletion_paths = extract_paths_on_tf(self)
                     dp_updated = True
                     index -= 1
                 if add_paths_near_words\
                     and ('add_paths_near_words' in word.process_flags\
                         or ((word.deleted and len(word.deletion_paths) == 0)\
                             or True in [ (w.deleted and len(w.deletion_paths) == 0) for w in word.word_parts ])):
                     if not dp_updated\
                         and 'add_paths_near_words' in word.process_flags:
                         word_deletion_paths = extract_paths_on_tf(self)
                         dp_updated = True
                     transform = None
                     tp = None
                     target_word = word
                     paths_near_word = []
                     if word.deleted and len(word.transkription_positions) > 0:
                         transform = word.transkription_positions[0].transform
                         for tp in word.transkription_positions:
                             word.deletion_paths_near_word += get_paths_near_position(tp, word_deletion_paths)
                     elif len(word.word_parts) > 0:
                         for wp in word.word_parts:
                             if wp.deleted and len(wp.transkription_positions) > 0:
                                 target_word = wp
                                 for tp in wp.transkription_positions:
                                     wp.deletion_paths_near_word = get_paths_near_position(tp, word_deletion_paths)
                     if self.warn and (word.deleted and len(word.deletion_paths) == 0):
                         warnings.warn(\
                         f'WARNING: {self.title} {self.number}: {word.id} on {word.line_number}, {word.text} has no deletion paths! {target_word.deletion_paths_near_word}, {transform}')
                 index += 1
 
     @classmethod
     def create_cls(cls, xml_source_file=None, create_dummy_page=False, page_node=None):
         """Create a Page.
         """
         if not create_dummy_page:
             return cls(xml_source_file)
         else:
             m = re.match(r'(.*)(page[0]*)(.*)(\.xml)', xml_source_file)
             if m is not None and len(m.groups()) > 3:
                 number = m.group(3)
             else:
                 number = basename(xml_source_file).replace('.xml','')
             return cls(number=number)
 
     @classmethod
     def get_pages_from_xml_file(cls, xml_file, status_contains='', status_not_contain='', word_selection_function=None):
         """Returns a list of Page instantiating a xml_file of type FILE_TYPE_SVG_WORD_POSITION 
                             or xml_files contained in xml_file of type FILE_TYPE_XML_MANUSCRIPT.
                             [optional: instantiation depends on the fulfilment of a status_contains 
                                     and/or on the selection of some words by a word_selection_function].
         """
         source_tree = ET.parse(xml_file)
         if source_tree.getroot().find('metadata/type').text == cls.FILE_TYPE_SVG_WORD_POSITION:
             page = cls(xml_file)    
             if word_selection_function is None or len(word_selection_function(page.words)) > 0:
                 return [ page ]
             else:
                 return []
         elif source_tree.getroot().find('metadata/type').text == FILE_TYPE_XML_MANUSCRIPT:
             pages = []
             xpath = '//page/@output'
             if status_contains != '' and status_not_contain != '':
                 xpath = '//page[contains(@status, "{0}") and not(contains(@status, "{1}"))]/@output'.format(status_contains, status_not_contain)
             elif status_contains != '':
                 xpath = '//page[contains(@status, "{0}")]/@output'.format(status_contains)
             elif status_not_contain != '':
                 xpath = '//page[not(contains(@status, "{0}"))]/@output'.format(status_not_contain)
             for xml_source_file in source_tree.xpath(xpath):
                 if isfile(xml_source_file):
                     pages += cls.get_pages_from_xml_file(xml_source_file, word_selection_function=word_selection_function)
             return pages
         else:
             return []
 
     @classmethod
     def get_semantic_dictionary(cls):
         """ Creates a semantic dictionary as specified by SemanticClass.
         """
         dictionary = {}
         class_dict = cls.get_class_dictionary()
         properties = { 'number': { 'class': str, 'cardinality': 1}}
         properties.update(cls.create_semantic_property_dictionary('faksimile_image', FaksimileImage, subPropertyOf=cls.HAS_IMAGE))
         properties.update(cls.create_semantic_property_dictionary('faksimile_text_field', TextField,\
                 name='pageIsOnFaksimileTextField', label='page is on faksimile text field',\
                 comment='Relates a page to the text field on a svg image.', subPropertyOf=cls.PAGE_IS_ON_TEXTFIELD))
         properties.update(cls.create_semantic_property_dictionary('orientation', str))
         properties.update(cls.create_semantic_property_dictionary('svg_image', SVGImage, subPropertyOf=cls.HAS_IMAGE))
         properties.update(cls.create_semantic_property_dictionary('svg_text_field', TextField,\
                 name='pageIsOnSVGTextField', label='page is on svg text field',\
                 comment='Relates a page to the text field on a faksimile image.', subPropertyOf=cls.PAGE_IS_ON_TEXTFIELD))
         for key in [ 'lines', 'mark_foreign_hands', 'words', 'word_deletion_paths', 'word_insertion_marks']:
             properties.update(cls.create_semantic_property_dictionary(key, list))
         dictionary.update({cls.CLASS_KEY: class_dict})
         dictionary.update({cls.PROPERTIES_KEY: properties})
         return cls.return_dictionary_after_updating_super_classes(dictionary)
 
     def get_word_deletion_path(self, path=None, d_attribute=None) ->WordDeletionPath:
         """Return a word deletion path that belongs to page.
         """
         if path is None and d_attribute is None:
             raise Exception('ERROR: get_word_deletion_path needs a path or a d_attribute!')
         if d_attribute is None:
             d_attribute = path.d_attribute
         page_paths = [ dpath for dpath in self.word_deletion_paths if dpath.d_attribute == d_attribute ]
         if len(page_paths) > 0:
             return page_paths[0]
         else:
             dpath = WordDeletionPath.create_cls(self, path=path, d_attribute=d_attribute)
             if dpath is not None:
                 dpath.id = len(self.word_deletion_paths)
                 self.word_deletion_paths.append(dpath)
                 dpath.attach_object_to_tree(self.page_tree)
             return dpath
 
     def init_node_objects(self):
         """Initialize all node objects.
         """
         self.word_insertion_marks = [ WordInsertionMark(wim_node=wim_node) for wim_node in self.page_tree.getroot().xpath('//' + WordInsertionMark.XML_TAG) ]
         self.words = [ Word.create_cls(word_node) for word_node in self.page_tree.getroot().xpath('./word') ]
         self.mark_foreign_hands = [ MarkForeignHands.create_cls(node) for node in self.page_tree.getroot().xpath('//' + MarkForeignHands.XML_TAG) ]
         self.text_connection_marks = [ TextConnectionMark.create_cls(node) for node in self.page_tree.getroot().xpath('//' + TextConnectionMark.XML_TAG) ]
         self.line_numbers = [ LineNumber(xml_text_node=line_number_node) for line_number_node in self.page_tree.getroot().xpath('//' + LineNumber.XML_TAG) ]
         self.lines = [ Line.create_cls_from_node(node=line_number_node) for line_number_node in self.page_tree.getroot().xpath('//' + LineNumber.XML_TAG) ]
         self.writing_processes = [ WritingProcess.create_writing_process_from_xml(node, self.words) for node in self.page_tree.xpath('//' + WritingProcess.XML_TAG) ]
         self.word_deletion_paths = [ WordDeletionPath.create_cls(self, node=node) for node in self.page_tree.xpath('./' + WordDeletionPath.XML_TAG) ]
         if self.faksimile_image is not None and self.faksimile_image.text_field is not None:
             self.faksimile_text_field = self.faksimile_image.text_field
         if self.svg_image is not None and self.svg_image.text_field is not None:
             self.svg_text_field = self.svg_image.text_field
         for simple_word in self.words + self.mark_foreign_hands + self.text_connection_marks:
             simple_word.init_word(self)
         for wim in self.word_insertion_marks:
             if wim.line_number > -1:
                 wim.line = [ line for line in self.lines if line.id == wim.line_number ][0]
             
     def update_and_attach_words2tree(self, update_function_on_word=None, include_special_words_of_type=[]):
         """Update word ids and attach them to page.page_tree.
         """
         if not self.is_locked():
             update_function_on_word = [ update_function_on_word ]\
                     if type(update_function_on_word) != list\
                     else update_function_on_word
             for node in self.page_tree.xpath('.//word|.//' + MarkForeignHands.XML_TAG + '|.//' + TextConnectionMark.XML_TAG): 
                 node.getparent().remove(node)
             for index, word in enumerate(self.words):
                 word.id = index
                 for func in update_function_on_word:
                     if callable(func):
                         func(word)
                 word.attach_word_to_tree(self.page_tree)
             for index, mark_foreign_hands in enumerate(self.mark_foreign_hands):
                 mark_foreign_hands.id = index
                 if MarkForeignHands in include_special_words_of_type:
                     for func in update_function_on_word:
                         if callable(update_function_on_word):
                             func(mark_foreign_hands)
                 mark_foreign_hands.attach_word_to_tree(self.page_tree)
             for index, text_connection_mark in enumerate(self.text_connection_marks):
                 text_connection_mark.id = index
                 if TextConnectionMark in include_special_words_of_type:
                     for func in update_function_on_word:
                         if callable(update_function_on_word):
                             func(text_connection_mark)
                 text_connection_mark.attach_word_to_tree(self.page_tree)
         else:
             print('locked')
 
     def update_data_source(self, faksimile_svgFile=None, xml_correction_file=None):
         """Update the data source of page.
         """
         if faksimile_svgFile is not None:
             self.faksimile_svgFile = faksimile_svgFile
         data_node = self.page_tree.xpath('.//data-source')[0]\
                 if len(self.page_tree.xpath('.//data-source')) > 0\
                 else ET.SubElement(self.page_tree.getroot(), 'data-source')
         data_node.set('file', self.faksimile_svgFile)
         if xml_correction_file is not None:
             data_node.set('xml-corrected-words', xml_correction_file)
 
     def update_line_number_area(self, transkription_field, svg_tree=None, set_to_text_field_zero=True):
         """Determines the width of the area where the line numbers are written in the page.source file.
         """
         THRESHOLD = 0.4
         if svg_tree is None:
             svg_tree = ET.parse(self.source)
         if len(self.line_numbers) > 1:
             line_number = self.line_numbers[9]\
                 if transkription_field.is_page_verso() and len(self.line_numbers) > 8\
                 else self.line_numbers[1]
             ln_nodes = [ item for item in svg_tree.iterfind('//text', svg_tree.getroot().nsmap)\
                 if Matrix.IS_NEARX_TRANSKRIPTION_FIELD(item.get('transform'), transkription_field)\
                 and LineNumber.IS_A_LINE_NUMBER(item)\
                 and LineNumber(raw_text_node=item).id  == line_number.id ]
             if len(ln_nodes) > 0:
                 matrix = Matrix(transform_matrix_string=ln_nodes[0].get('transform'))
                 if transkription_field.is_page_verso():
                     transkription_field.add_line_number_area_width(matrix.getX())
                 elif self.svg_file is not None and isfile(self.svg_file):
                     svg_path_tree = ET.parse(self.svg_file)
                     namespaces = { k if k is not None else 'ns': v for k, v in svg_path_tree.getroot().nsmap.items() }
                     svg_x = matrix.getX()
                     svg_y = self.line_numbers[1].bottom + transkription_field.ymin\
                             if set_to_text_field_zero\
                             else self.line_numbers[1].bottom
                     use_nodes = svg_path_tree.xpath('//ns:use[@x>="{0}" and @x<="{1}" and @y>="{2}" and @y<="{3}"]'\
                         .format(svg_x-THRESHOLD, svg_x+THRESHOLD,svg_y-THRESHOLD, svg_y+THRESHOLD), namespaces=namespaces)
                     if len(use_nodes) > 0:
                         symbol_id = use_nodes[0].get('{%s}href' % namespaces['xlink']).replace('#', '') 
                         d_strings = use_nodes[0].xpath('//ns:symbol[@id="{0}"]/ns:path/@d'.format(symbol_id), namespaces=namespaces)
                         if len(d_strings) > 0 and d_strings[0] != '':
                             path = parse_path(d_strings[0])
                             xmin, xmax, ymin, ymax = path.bbox()
                             width = xmax - xmin
                             transkription_field.add_line_number_area_width(matrix.getX() + width)
 
     def update_page_type(self, transkription_field=None):
         """Adds a source to page and attaches it to page_tree.
         """
         if self.number.endswith('r')\
             or self.number.endswith('v'):
             self.page_type = Page.PAGE_VERSO\
                 if self.number.endswith('v')\
                 else Page.PAGE_RECTO
         else:
             if transkription_field is None:
                 if self.source is None or not isfile(self.source):
                     raise FileNotFoundError('Page does not have a source!')
                 transkription_field = TranskriptionField(self.source, multipage_index=self.multipage_index)
             self.page_type = Page.PAGE_VERSO\
                    if transkription_field.is_page_verso()\
                    else Page.PAGE_RECTO
         self.page_tree.getroot().set('pageType', self.page_type)
 
     def update_styles(self, words=None, manuscript=None, add_to_parents=False, partition_according_to_styles=False, create_css=False):
         """Update styles of words and add them to their transkription_positions.
             Args:
                 add_to_parents:                 Add styles also to word (and if not None to manuscript).
                 partition_according_to_styles:  Partition word if its transkription_positions have different styles.
         """
         style_dictionary = {}
         if words is None:
             words = self.words
         for word in words:
             if len(word.word_parts) > 0:
                 self.update_styles(words=word.word_parts, manuscript=manuscript, create_css=create_css,\
                         add_to_parents=add_to_parents, partition_according_to_styles=partition_according_to_styles)
             for transkription_position in word.transkription_positions:
                 if len(transkription_position.positional_word_parts) > 0:
                     style_class = transkription_position.positional_word_parts[0].style_class
                     writing_process_id = -1
                     for font_key in [ font_key for font_key in style_class.split(' ') if font_key in self.fontsizekey2stage_mapping.keys() ]:
                         writing_process_id = self.fontsizekey2stage_mapping.get(font_key)
                     style_class_key = (Style.remove_irrelevant_style_keys(style_class, self, extended_styles=create_css), writing_process_id)
                     if create_css:
                         if style_dictionary.get((style_class_key, word.deleted)) is None:
                             color = None
                             if len(word.deletion_paths) > 0:
                                 if word.deletion_paths[0].style_class is not None\
                                 and word.deletion_paths[0].style_class != ''\
                                 and self.style_dict.get(word.deletion_paths[0].style_class) is not None:
                                     color = Color.create_cls_from_style_object(self.style_dict.get(word.deletion_paths[0].style_class))
                                 else:
                                     color = Color()
                             style_dictionary[(style_class_key, word.deleted)] = Style.create_cls(self, style_class_key[0], manuscript=manuscript,\
                                     create_css=create_css, deletion_color=color, writing_process_id=style_class_key[1] )
                         transkription_position.style = style_dictionary[(style_class_key, word.deleted)]
                         #print(style_dictionary[(style_class_key, word.deleted)])
                     else:
                         if style_dictionary.get(style_class_key) is None:
                             style_dictionary[style_class_key] = Style.create_cls(self, style_class_key[0], manuscript=manuscript, create_css=create_css)
                             style_dictionary[style_class_key].writing_process_id = style_class_key[1]
                         transkription_position.style = style_dictionary[style_class_key]
                     if add_to_parents and transkription_position.style not in word.styles:
                         word.styles.append(transkription_position.style)
             if partition_according_to_styles:
                 word.split_according_to_status('style', splits_are_parts=True)
         if manuscript is not None\
                 and add_to_parents:
             manuscript.update_styles(*style_dictionary.values())
 
+    def __eq__(self, other):
+        """Returns true if self is qualitatively identical to other.
+        """
+        if other is None:
+            return False
+        if self.page_tree is None and other.page_tree is None:
+            return self.number == other.number
+        if self.page_tree is None or other.page_tree is None:
+            return False
+        return self.page_tree.docinfo.URL == other.page_tree.docinfo.URL
+
+    def __hash__(self):
+        """Return a hash value for self.
+        """
+        try:
+            if self.page_tree is None:
+                return hash(self.number)
+        except AttributeError:
+            print(self)
+            return hash(self.number)
+        return hash(self.page_tree.docinfo.URL)
 
 
Index: svgscripts/datatypes/text.py
===================================================================
--- svgscripts/datatypes/text.py	(revision 107)
+++ svgscripts/datatypes/text.py	(revision 108)
@@ -1,185 +1,219 @@
 #!/usr/bin/env python3
 # -*- coding: utf-8 -*-
 
 """   This class can be used to represent a text that may have standoff markup.
 """
 #    Copyright (C) University of Basel 2020  {{{1
 #
 #    This program is free software: you can redistribute it and/or modify
 #    it under the terms of the GNU General Public License as published by
 #    the Free Software Foundation, either version 3 of the License, or
 #    (at your option) any later version.
 #
 #    This program is distributed in the hope that it will be useful,
 #    but WITHOUT ANY WARRANTY; without even the implied warranty of
 #    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 #    GNU General Public License for more details.
 #
 #    You should have received a copy of the GNU General Public License
 #    along with this program.  If not, see <https://www.gnu.org/licenses/> 1}}}
 
 __author__ = "Christian Steiner"
 __maintainer__ = __author__
 __copyright__ = 'University of Basel'
 __email__ = "christian.steiner@unibas.ch"
 __status__ = "Development"
 __license__ = "GPL v3"
 __version__ = "0.0.1"
 
 import copy
 from lxml import etree as ET
 import re
 import sys
 
 from .attachable_object import AttachableObject
 from .standoff_tag import StandoffTag
 
 sys.path.append('py2ttl')
 from class_spec import SemanticClass
 
 
+
 class Text(AttachableObject,SemanticClass):
     """
     This class represents a text that may have standoff markup.
     """
     TAG_PATTERN = re.compile(r'([^<]*)(<[^/]+>)')
+    #START_TAG_PATTERN = re.compile(r'.*<[a-z]+>')
+    START_TAG_PATTERN = re.compile(r'[^<]*(?!</b>)[^<]*<[a-z]+>')
     XML_TAG = 'text-with-markup'
     XML_SUB_TAG = 'text'
 
     def __init__(self, content=None, standoff_markups=None, id=0, tag=XML_TAG):
         self.id = str(id)
         self.tag = tag
         self.content = content
         self.standoff_markups = standoff_markups\
                 if standoff_markups is not None\
                 else []
 
     def append(self, content: str) -> int:
         """Extend text with content.
 
             [:return:] startIndex of appended content
         """
         startIndex = len(self.content)
         self.content += content
         return startIndex
 
     def attach_object_to_tree(self, target_tree):
         """Attach object to tree.
         """
         if target_tree.__class__.__name__ == '_ElementTree':
             target_tree = target_tree.getroot()
         obj_node = target_tree.xpath('.//' + self.tag + '[@id="%s"]' % self.id)[0] \
                 if(len(target_tree.xpath('.//' + self.tag + '[@id="%s"]' % self.id)) > 0) \
                 else ET.SubElement(target_tree, self.tag)
         obj_node.set('id', self.id)
         text_node = ET.SubElement(obj_node, self.XML_SUB_TAG)
         text_node.text = self.content
         for index, markup in enumerate(self.standoff_markups):
             markup.id = str(index)
             markup.attach_object_to_tree(obj_node)
 
     def extract_part(self, text_part, css_filter=';'):
         """Extract part of text for which text_part matchs content.
             
             :return: datatypes.text.Text
         """
         if not css_filter.endswith(';'):
             css_filter += ';'
         if text_part in  self.content:
             part_start_index = self.content.find(text_part)
             part_end_index = part_start_index + len(text_part) 
             standoff_markups = [ markup for markup in self.standoff_markups\
                     if markup.css_string.endswith(css_filter)\
                     if (markup.startIndex <= part_start_index\
                         and markup.endIndex > part_start_index)\
                     or (markup.startIndex >= part_start_index\
                         and markup.startIndex < part_end_index\
                         and markup.endIndex <= part_end_index)\
                     or (markup.startIndex < part_end_index\
                         and markup.endIndex >= part_end_index)]
             new_markups = []
             for markup in standoff_markups:
                 startIndex = markup.startIndex - part_start_index\
                 if markup.startIndex > part_start_index else 0
                 endIndex = markup.endIndex - part_start_index\
                         if markup.endIndex <= part_end_index\
                         else len(text_part)
                 new_markups.append(StandoffTag(markup.markup, startIndex, endIndex))
             return Text(content=text_part, standoff_markups=new_markups)
         else:
             msg = f'ERRROR {text_part} is not a part of {self.content}!'
             raise Exception(msg)
 
     def join(self, other):
         """Join self and other.
         """
         correction = self.append(' ' + other.content) + 1
         for standoff_markup in other.standoff_markups:
             standoff_markup.startIndex += correction
             standoff_markup.endIndex += correction
         self.standoff_markups += other.standoff_markups
         del other
 
     def markup_contains_css_filter(self, css_filter) ->bool:
         """Returns true if markup contains css_filter.
         """
         if not css_filter.endswith(';'):
             css_filter += ';'
         return len([ markup for markup in self.standoff_markups\
                 if markup.css_string.endswith(css_filter) ]) > 0
 
     @classmethod
     def create_cls_from_node(cls, node):
         """Initialize a cls from node.
 
             [:return:] cls
         """
         standoff_markups = [ StandoffTag.create_cls_from_node(item) for item in\
                 node.xpath('./' + '|./'.join(StandoffTag.MARKUP_STYLES)) ]
         text = node.xpath('./' + cls.XML_SUB_TAG + '/text()')[0]\
                 if len(node.xpath('./' + cls.XML_SUB_TAG + '/text()')) > 0\
                 else ''
         return cls(text, standoff_markups=standoff_markups, id=node.get('id'), tag=node.tag)
 
     @classmethod
     def create_cls_from_html(cls, html):
         """Creates a Text from a html string.
 
             :return: a (datatypes.text) Text 
         """
-        standoff_markups = []
+        html = html.replace('&lt;', '<').replace('&gt;', '>')
+        """
         tag_matched = re.match(cls.TAG_PATTERN, html)
         while tag_matched is not None:
             tag = tag_matched.group(2)
             tags = [ t for t in tag.split('<') if t != '']
             tags.reverse()
             endTag = ''.join([ '</' + t for t in tags])
             startIndex = tag_matched.end() - len(tag)
             inner_tag_matched = re.match(cls.TAG_PATTERN, html[0:startIndex])
             html = html[0:startIndex] + html[tag_matched.end():]
             endTag_matched = re.match(rf'(.*)({endTag})', html)
             if endTag_matched is not None:
                 endIndex = endTag_matched.end() - len(endTag)   
                 html = html[0:endIndex] + html[endTag_matched.end():]
                 for markup in [ StandoffTag.HTML_TAG_DICTIONARY['<'+tag] for tag in tags\
                         if bool(StandoffTag.HTML_TAG_DICTIONARY.get('<'+tag)) ]:
                     standoff_markups.append(StandoffTag(markup, startIndex, endIndex))
             else:
                 msg = f'HTML string contains no ending tag for {tag}!'
                 raise Exception(msg)
             tag_matched = re.match(cls.TAG_PATTERN, html)
+        """
+        html, standoff_markups = extract_standoff_data(html)
         return cls(html, standoff_markups=standoff_markups)
 
     @classmethod
     def get_semantic_dictionary(cls):
         """ Creates a semantic dictionary as specified by SemanticClass.
         """
         properties = {}
         properties.update(cls.create_semantic_property_dictionary('content', str, cardinality=1,\
                 name='textHasContent', label='content of text', comment='Connects a text with its content.'))
         properties.update(cls.create_semantic_property_dictionary('standoff_markups', StandoffTag,\
                 name='textHasMarkup', label='standoff markup of text', comment='Connects a text with a list of standoff tags.'))
         dictionary = { cls.CLASS_KEY: cls.get_class_dictionary(), cls.PROPERTIES_KEY: properties }
         return cls.return_dictionary_after_updating_super_classes(dictionary)
 
 
+def extract_standoff_data(html) ->(str, list):
+    """Extract standoff data and return the html string without tags and a list of standoff data.
+    """
+    standoff_markups = []
+    tag_matched = re.match(Text.START_TAG_PATTERN, html)
+    while tag_matched:
+        tag = re.sub(r'>.*', '', re.sub(r'^[^<]+<', '', tag_matched.group(0)))
+        startIndex = html.index(f'<{tag}>')
+        html = re.sub(rf'<{tag}>', '', html, count=1)
+        contains_tag_pattern = rf'.*<[a-z]+>.*</{tag}>.*'
+        if re.match(contains_tag_pattern, html):
+            html, new_standoff_data = extract_standoff_data(html)
+            standoff_markups += new_standoff_data
+        end_tag_pattern = rf'.*</{tag}>.*'
+        endTag_matched = re.match(end_tag_pattern, html)
+        if endTag_matched is not None:
+            endIndex = html.index(f'</{tag}>')   
+            html = html[0:endIndex] + html[endIndex+len(f'</{tag}>'):]
+            if bool(StandoffTag.HTML_TAG_DICTIONARY.get(f'<{tag}>')):
+                standoff_markups.append(StandoffTag(StandoffTag.HTML_TAG_DICTIONARY[f'<{tag}>'], startIndex, endIndex))
+        else:
+            msg = f'HTML string contains no ending tag for {tag}!'
+            raise Exception(msg)
+        tag_matched = re.match(Text.START_TAG_PATTERN, html)
+    return html, standoff_markups
+
+
+
Index: svgscripts/datatypes/standoff_tag.py
===================================================================
--- svgscripts/datatypes/standoff_tag.py	(revision 107)
+++ svgscripts/datatypes/standoff_tag.py	(revision 108)
@@ -1,151 +1,152 @@
 #!/usr/bin/env python3
 # -*- coding: utf-8 -*-
 
 """   This class can be used to represent the standoff markup of a text.
 """
 #    Copyright (C) University of Basel 2020  {{{1
 #
 #    This program is free software: you can redistribute it and/or modify
 #    it under the terms of the GNU General Public License as published by
 #    the Free Software Foundation, either version 3 of the License, or
 #    (at your option) any later version.
 #
 #    This program is distributed in the hope that it will be useful,
 #    but WITHOUT ANY WARRANTY; without even the implied warranty of
 #    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 #    GNU General Public License for more details.
 #
 #    You should have received a copy of the GNU General Public License
 #    along with this program.  If not, see <https://www.gnu.org/licenses/> 1}}}
 
 __author__ = "Christian Steiner"
 __maintainer__ = __author__
 __copyright__ = 'University of Basel'
 __email__ = "christian.steiner@unibas.ch"
 __status__ = "Development"
 __license__ = "GPL v3"
 __version__ = "0.0.1"
 
 from lxml import etree as ET
 import re
 import sys
 
 
 from .attachable_object import AttachableObject
 
 sys.path.append('py2ttl')
 from class_spec import SemanticClass
 
 
 class StandoffTag(AttachableObject,SemanticClass):
     """
     This class represents the standoff markup of a text.
     """
-    MARKUP_STYLES = [ 'bold', 'italic', 'delete' ]
+    MARKUP_STYLES = [ 'bold', 'italic', 'delete', 'underline' ]
     RDFS_SUBCLASSOF_LIST = ['http://www.nie.org/ontology/standoff#StandoffMarkup']
     RELEVANT_STYLE_KEY = 'font-family'
     RELEVANT_CONTENT_STARTSWITH = 'Frutiger-'
     RELEVANT_PATTERN = re.compile('.*(Italic|Bold)$')
     RELEVANT_SUB_PATTERN = re.compile('Frutiger-(Light)*')
     STOFF_HAS_CSS_URL_STRING = 'http://www.nie.org/ontology/standoff#hasCSS'
     STOFF_HAS_START_INDEX = 'http://www.nie.org/ontology/standoff#standoffMarkupHasStartIndex'
     STOFF_HAS_END_INDEX = 'http://www.nie.org/ontology/standoff#standoffMarkupHasEndIndex'
-    HTML_TAG_DICTIONARY = { '<i>': 'italic', '<b>': 'bold', '<del>': 'delete' }
+    HTML_TAG_DICTIONARY = { '<i>': 'italic', '<b>': 'bold', '<del>': 'delete', '<underline>': 'underline' }
     CSS_DICTIONARY = { 'bold': 'font-weight:bold;', 
                        'italic': 'font-style: italic;',
+                       'underline': 'text-decoration:underline;',
                        'delete': 'text-decoration:line-through;' }
 
 
     def __init__(self, markup: str, startIndex: int, endIndex: int, id=0):
         self.id = str(id)
         self.css_string = self.CSS_DICTIONARY.get(markup)
         self.markup = markup
         self.startIndex = startIndex
         self.endIndex = endIndex
 
     def attach_object_to_tree(self, target_tree):
         """Attach object to tree.
         """
         if target_tree.__class__.__name__ == '_ElementTree':
             target_tree = target_tree.getroot()
         obj_node = target_tree.xpath('.//' + self.markup + '[@id="%s"]' % self.id)[0] \
                 if(len(target_tree.xpath('.//' + self.markup + '[@id="%s"]' % self.id)) > 0) \
                 else ET.SubElement(target_tree, self.markup)
         obj_node.set('id', self.id)
         obj_node.set('start', str(self.startIndex))
         obj_node.set('end', str(self.endIndex))
 
     @classmethod
     def create_cls(cls, start_index, end_index, style_string, page=None, style_dict=None):
         """Creates a StandoffTag from a style_string.
 
             :return: a list of (datatypes.standoff_tag) StandoffTag
         """
         if page is not None:
             style_dict = cls.create_relevant_style_dictionary(page)
         relevant_keys = [ key for key in set(style_string.split(' '))\
                             if key in style_dict.keys() ]
         standoff_tags = []
         if style_dict is None or len(style_dict) == 0:
             return standoff_tags
         for relevant_key in relevant_keys:
             font_family = style_dict[relevant_key][cls.RELEVANT_STYLE_KEY]
             if re.match(cls.RELEVANT_PATTERN, font_family):
                 markup = re.sub(cls.RELEVANT_SUB_PATTERN, '', font_family).lower()
                 standoff_tags.append(cls(markup, start_index, end_index))
         return standoff_tags
 
 
     @classmethod
     def create_cls_from_node(cls, node):
         """Creates a StandoffTag from a node.
 
             :return: (datatypes.standoff_tag) StandoffTag
         """
         return cls(node.tag, int(node.get('start')), int(node.get('end')), id=node.get('id'))
         
     @classmethod
     def create_relevant_style_dictionary(cls, page):
         """Return a style dictionary that contains only relevant keys and contents.
         """
         return { key: key_dict for key, key_dict in page.style_dict.items()\
                     if cls.RELEVANT_STYLE_KEY in key_dict.keys()\
                     and key_dict[cls.RELEVANT_STYLE_KEY].startswith(cls.RELEVANT_CONTENT_STARTSWITH) }
 
     @classmethod
     def get_semantic_dictionary(cls):
         """ Creates a semantic dictionary as specified by SemanticClass.
         """
         properties = {}
         #properties.update(cls.create_semantic_property_dictionary('markup', str, cardinality=1,\
         #        name='standoffTagHasMarkup', label='standoff tag has a specific markup', comment='Connects a standoff tag with its markup, e.g. bold or italic'))
         properties.update(cls.create_semantic_property_dictionary('startIndex', int, cardinality=1, subPropertyOf=cls.STOFF_HAS_START_INDEX,\
                 name='standoffTagHasStartIndex', label='standoff tag has a start index', comment='Connects a standoff tag with its start index.'))
         properties.update(cls.create_semantic_property_dictionary('endIndex', int, cardinality=1, subPropertyOf=cls.STOFF_HAS_END_INDEX,\
                 name='standoffTagHasEndIndex', label='standoff tag has a end index', comment='Connects a standoff tag with its end index.'))
         properties.update(cls.create_semantic_property_dictionary('css_string', str,\
                 subPropertyOf=cls.STOFF_HAS_CSS_URL_STRING,\
                 name='standoffTagHasCSS', label='standoff tag has css', comment='Connects a standoff tag with CSS style.'))
         dictionary = { cls.CLASS_KEY: cls.get_class_dictionary(), cls.PROPERTIES_KEY: properties }
         return cls.return_dictionary_after_updating_super_classes(dictionary)
 
     def is_joinable(self, other):
         """Return true if self and other have same markup and self.endIndex == other.startIndex.
         """
         return self.markup == other.markup and self.endIndex == other.startIndex
 
     def join(self, other):
         """Join self with other.
         """
         self.endIndex = other.endIndex
 
     def join_list(self, others):
         """Join all others that are joinable, return remaining others as a list.
         """
         unjoinable_others = []
         for other in others:
             if self.is_joinable(other):
                 self.join(other)
             else:
                 unjoinable_others.append(other)
         return unjoinable_others
Index: tests_svgscripts/test_description.py
===================================================================
--- tests_svgscripts/test_description.py	(revision 107)
+++ tests_svgscripts/test_description.py	(revision 108)
@@ -1,40 +1,40 @@
 import unittest
 from os import sep, path
 from os.path import dirname, basename, isfile, isdir
 import lxml.etree as ET
 import sys
 
 sys.path.append('svgscripts')
 from datatypes.page import Page
 from datatypes.standoff_tag import StandoffTag
 from datatypes.text import Text
 from datatypes.description import Description
 
 class TestText(unittest.TestCase):
     def setUp(self):
         DATADIR = dirname(__file__) + sep + 'test_data'
         if not isdir(DATADIR):
             DATADIR = dirname(dirname(__file__)) + sep + 'test_data'
         self.test_file = DATADIR + sep + 'test.xml'
         self.test_svg_file = DATADIR + sep + 'test421.svg'
         self.pdf_xml = DATADIR + sep + 'W_I_8_page125.xml'
         self.xml_file = DATADIR + sep + 'N_VII_1_page005.xml'
         self.xml_fileB = DATADIR + sep + 'N_VII_1_page006.xml'
         self.pdf_xml_source = DATADIR + sep + 'W_I_8_neu_125-01.svg'
         self.test_page = DATADIR + sep + 'N_VII_1_page001.xml'
         self.test_manuscript = DATADIR + sep + 'N_VII_1.xml'
 
     def test_semantic(self):
         pass
         #print(Text.get_semantic_dictionary())
 
     def test_create_cls_from_node(self):
         tree = ET.parse(self.test_manuscript)
         node = tree.xpath('description/earlierDescription[@id="1"]/manuscriptDescription')[0]
         description = Description.create_cls_from_node(node)
-        #print(description.content)
         self.assertTrue(len(description.standoff_markups) > 0)
+        #print(description.content, description.standoff_markups)
 
 
 if __name__ == "__main__":
     unittest.main()
Index: tests_svgscripts/test_text.py
===================================================================
--- tests_svgscripts/test_text.py	(revision 107)
+++ tests_svgscripts/test_text.py	(revision 108)
@@ -1,91 +1,94 @@
 import unittest
 from os import sep, path
 from os.path import dirname, basename, isfile, isdir
 import lxml.etree as ET
 import sys
 
 sys.path.append('svgscripts')
 from datatypes.page import Page
 from datatypes.standoff_tag import StandoffTag
 from datatypes.text import Text
 
 class TestText(unittest.TestCase):
     def setUp(self):
         DATADIR = dirname(__file__) + sep + 'test_data'
         if not isdir(DATADIR):
             DATADIR = dirname(dirname(__file__)) + sep + 'test_data'
         self.test_file = DATADIR + sep + 'test.xml'
         self.test_svg_file = DATADIR + sep + 'test421.svg'
         self.pdf_xml = DATADIR + sep + 'W_I_8_page125.xml'
         self.xml_file = DATADIR + sep + 'N_VII_1_page005.xml'
         self.xml_fileB = DATADIR + sep + 'N_VII_1_page006.xml'
         self.pdf_xml_source = DATADIR + sep + 'W_I_8_neu_125-01.svg'
         self.test_page = DATADIR + sep + 'N_VII_1_page001.xml'
         self.test_manuscript = DATADIR + sep + 'N_VII_1.xml'
 
     def test_semantic(self):
         pass
         #print(Text.get_semantic_dictionary())
 
     def test_attach_to_tree(self):
         empty_tree = ET.ElementTree(ET.Element('page'))
         content = 'asdf'
         standoff_tag = StandoffTag('bold', 0, len(content)-1)
         standoff_tag2 = StandoffTag('italic', int(len(content)/2), len(content),id='1')
         text = Text(content, standoff_markups=[ standoff_tag, standoff_tag2 ])
         text.attach_object_to_tree(empty_tree)
         text = Text.create_cls_from_node(empty_tree.xpath('//' + Text.XML_TAG)[0])
         self.assertEqual(text.content, content)
         self.assertEqual(text.id, '0')
         self.assertEqual(len(text.standoff_markups), 2)
         #print(ET.dump(empty_tree.getroot()))
 
     def test_extract(self):
         content = 'asdfa'
         standoff_tag = StandoffTag('bold', 0, len(content)-2)
         standoff_tag2 = StandoffTag('italic', int(len(content)/2), len(content)-1,id='1')
         textA = Text(content, standoff_markups=[ standoff_tag, standoff_tag2 ])
         textB = textA.extract_part('sdf')
         self.assertEqual(len(textB.standoff_markups), 2)
         textB = textA.extract_part('sdf', css_filter='bold')
         self.assertEqual(len(textB.standoff_markups), 1)
         """
         content = '26: von „Regel]¿'
         textA = Text(content, standoff_markups=[ StandoffTag('bold', 6, 9)])
         print(textA.extract_part('von', css_filter='bold'))
         print(textA.extract_part('„Regel', css_filter='bold'))
         """
 
     def test_markup_contains_css_filter(self):
         content = 'asdfa'
         standoff_tag = StandoffTag('bold', 0, len(content)-2)
         standoff_tag2 = StandoffTag('italic', int(len(content)/2), len(content)-1,id='1')
         textA = Text(content, standoff_markups=[ standoff_tag, standoff_tag2 ])
         self.assertTrue(textA.markup_contains_css_filter('bold'))
         self.assertTrue(textA.markup_contains_css_filter('italic'))
         textA.standoff_markups.pop(0)
         self.assertFalse(textA.markup_contains_css_filter('bold'))
 
     def test_join(self):
         content = 'asdfa'
         standoff_tag = StandoffTag('bold', 0, len(content)-2)
         standoff_tag2 = StandoffTag('italic', int(len(content)/2), len(content)-1,id='1')
         textA = Text(content, standoff_markups=[ standoff_tag, standoff_tag2 ])
         standoff_tag = StandoffTag('bold', 0, len(content)-2)
         standoff_tag2 = StandoffTag('italic', int(len(content)/2), len(content)-1,id='1')
         textB = Text(content, standoff_markups=[ standoff_tag, standoff_tag2 ])
         textA.join(textB)
         self.assertEqual(textA.content, content + ' ' + content)
 
     def test_create_from_html(self):
         html = 'asdf <b><i>test</i></b> the <del>best</del>'
         text = Text.create_cls_from_html(html)
         self.assertEqual(len(text.standoff_markups), 3)
         self.assertEqual(text.standoff_markups[0].startIndex, text.standoff_markups[1].startIndex)
         self.assertEqual(text.standoff_markups[0].endIndex, text.standoff_markups[1].endIndex)
         html = 'asdf <i>test</i>'
         text = Text.create_cls_from_html(html)
         self.assertEqual(len(text.standoff_markups), 1)
+        html = 'Quart-, Oktav- und Folioblätter verschiedenen Formats (z. T. von Albert Brenners und Peter Gasts Hand); Entwürfe und Vorstufen aus dem Bereiche des &lt;i&gt;Menschlichen I&lt;/i&gt; (die sogenannten &lt;i&gt;Sorrentiner Papiere&lt;/i&gt;)'
+        text = Text.create_cls_from_html(html)
+        #print(text)
 
 if __name__ == "__main__":
     unittest.main()