Index: svgscripts/test_word_insertion_mark.py
===================================================================
--- svgscripts/test_word_insertion_mark.py	(revision 27)
+++ svgscripts/test_word_insertion_mark.py	(revision 28)
@@ -1,58 +1,63 @@
 import unittest
 from os import sep, path
 from os.path import dirname, isdir
 import lxml.etree as ET
 
 from datatypes.transkriptionField import TranskriptionField
 from datatypes.word_insertion_mark import WordInsertionMark
 from datatypes.word import Word
 
 class TestWordInsertionMark(unittest.TestCase):
     def setUp(self):
         DATADIR = dirname(__file__) + sep + 'test_data'
         if not isdir(DATADIR):
             DATADIR = dirname(dirname(__file__)) + sep + 'test_data'
         self.reference_file = DATADIR + sep + 'test_find_word.xml'
         self.test_svg_file = DATADIR + sep + 'path_svg.svg' 
 
     def test_WIM_with_input(self):
         wim = WordInsertionMark(x=1.0, y=1.0, previous_word_id=0, inserted_word_id=1)
         self.assertEqual(wim.id, '0')
         self.assertEqual(wim.left, 1.0)
         self.assertEqual(wim.previous_word_id, 0)
 
     def test_WIM_with_node(self):
         mylist = { 'id': '0', 'left': '1.0', 'top': '1.0', 'height': '0', 'width': '0', 'bottom': '0',  'previous-word-id': '0', 'inserted-word-id': '1' }
         node = ET.Element(WordInsertionMark.XML_TAG, attrib=mylist)
         wim = WordInsertionMark(wim_node=node)
         self.assertEqual(wim.id, '0')
         self.assertEqual(wim.left, 1.0)
         self.assertEqual(wim.previous_word_id, 0)
 
     def test_WIM_attach_object_to_tree(self):
         empty_tree= ET.parse(self.reference_file)
         for node in empty_tree.xpath('//freehand'): 
             node.getparent().remove(node) 
         newWim = WordInsertionMark(x=1.0, y=1.0, previous_word_id=0)
         newWim.attach_object_to_tree(empty_tree)
         newWim = WordInsertionMark(id=1,x=1.0, y=1.0, previous_word_id=0)
         newWim.attach_object_to_tree(empty_tree)
         self.assertEqual(len(empty_tree.getroot().xpath('//' + WordInsertionMark.XML_TAG)), 2)
         for wim_node in empty_tree.getroot().xpath('//' + WordInsertionMark.XML_TAG):
             wim = WordInsertionMark(wim_node=wim_node)
             self.assertEqual(wim.left, 1.0)
             self.assertEqual(wim.top, 1.0)
             self.assertEqual(wim.previous_word_id, 0)
 
     def test_CREATE_WIM(self):
         svg_tree = ET.parse(self.test_svg_file)
         namespaces = { k if k is not None else 'ns': v for k, v in svg_tree.getroot().nsmap.items() }
         xmin = 311.8125
         ymin = 158.0117
         x = 261.865
         y = 15.9
         wim = WordInsertionMark.CREATE_WORD_INSERTION_MARK(svg_tree, namespaces, x=x, y=y, xmin=xmin, ymin=ymin, line_number=1)
         self.assertEqual(wim.symbol_id, 'glyph2-1')
+
+    def test_get_semantic_dictionary(self):
+        dictionary = WordInsertionMark.get_semantic_dictionary()
+        self.assertEqual('previous_word_id' in dictionary['properties'].keys(), True)
+
         
 if __name__ == "__main__":
     unittest.main()
Index: svgscripts/test_transkription_position.py
===================================================================
--- svgscripts/test_transkription_position.py	(revision 27)
+++ svgscripts/test_transkription_position.py	(revision 28)
@@ -1,83 +1,87 @@
 import unittest
 from os import sep, path
 from os.path import dirname, isdir, isfile
 import lxml.etree as ET
 
 from datatypes.debug_message import DebugMessage
 from datatypes.matrix import Matrix
 from datatypes.page import Page
 from datatypes.positional_word_part import PositionalWordPart
 from datatypes.transkription_position import TranskriptionPosition
 from datatypes.transkriptionField import TranskriptionField
 from datatypes.word_position import WordPosition
 
 class TestTranskriptionPosition(unittest.TestCase):
     def setUp(self):
         DATADIR = dirname(__file__) + sep + 'test_data'
         if not isdir(DATADIR):
             DATADIR = dirname(dirname(__file__)) + sep + 'test_data'
         self.test_svg_file = DATADIR + sep + 'W_I_8_page125_web.svg' 
         self.test_xml = DATADIR + sep + 'W_I_8_page125.xml'
         self.dir = DATADIR
 
     def test_init(self):
         dmsg = DebugMessage(message='test')
         word_position = TranskriptionPosition(id=1, height=10, width=10, x=0, y=10, debug_message=dmsg)
         self.assertEqual(word_position.tag, WordPosition.TRANSKRIPTION)
         self.assertEqual(word_position.id, '1')
         self.assertEqual(word_position.debug_message.message, 'test')
         self.assertEqual(word_position.height, 10)
         self.assertEqual(word_position.top, 10)
         self.assertEqual(word_position.bottom, 20)
         self.assertEqual(word_position.left, 0)
         self.assertEqual(word_position.isOnTranskription(), True)
         self.assertEqual(word_position.isOnFaksimile(), False)
 
     def test_attach_object_to_tree(self):
         matrix = Matrix('matrix(0 0 0 0 0 0)')
         dmsg = DebugMessage(message='test')
         pwps = [ PositionalWordPart(text='test') ] 
         word_position = TranskriptionPosition(id=1, height=10, width=10, x=0, y=10, matrix=matrix, debug_message=dmsg, positional_word_parts=pwps)
         empty_tree = ET.ElementTree(ET.Element('page'))
         word_position.attach_object_to_tree(empty_tree)
         #print(ET.dump(empty_tree.getroot()))
         for node in empty_tree.getroot().xpath('//' + word_position.tag):
             self.assertEqual(node.get('id'), '1')
             self.assertEqual(node.get('bottom'), '20')
             self.assertEqual(node.get('transform'), matrix.toString())
             self.assertEqual(node.get('writing-process-id'), '-1')
         word_position = TranskriptionPosition(node=empty_tree.getroot().find('.//' + word_position.tag))
         self.assertEqual(word_position.height, 10)
         self.assertEqual(word_position.debug_message is not None, True)
         self.assertEqual(word_position.debug_message.message, 'test')
         self.assertEqual(len(word_position.positional_word_parts), 1)
 
     def test_CREATE_TRANSKRIPTION_POSITION_LIST(self):
         page = Page(xml_source_file=self.test_xml, svg_file=self.test_svg_file)
         tf = TranskriptionField(page.svg_file)
         word_part_objs = [{'text': 'es', 'class': 'st5 st6', 'x': 258.148, 'y': '8.5' }]
         transkription_positions = TranskriptionPosition.CREATE_TRANSKRIPTION_POSITION_LIST(page, word_part_objs, transkription_field=tf)
         self.assertEqual(transkription_positions[0].top, 3.829)
         self.assertEqual(transkription_positions[0].height, 5.672)
         word_part_objs = [{'text': 'Meine', 'class': 'st5 st8', 'x': 8.504, 'y': 70.5 }]
         transkription_positions = TranskriptionPosition.CREATE_TRANSKRIPTION_POSITION_LIST(page, word_part_objs, transkription_field=tf)
         self.assertEqual(transkription_positions[0].height, 11.11)
         self.assertEqual(transkription_positions[0].top, 61.266)
         self.assertEqual(transkription_positions[0].bottom, 72.376)
 
     def test_CREATE_TRANSKRIPTION_POSITION_LIST_FROM_PWPS(self):
         page = Page(xml_source_file=self.test_xml, svg_file=self.test_svg_file)
         tf = TranskriptionField(page.svg_file)
         word_part_objs = [{'text': 'Meine', 'class': 'st5 st8', 'x': 8.504, 'y': 70.5 }]
         transkription_positions = TranskriptionPosition.CREATE_TRANSKRIPTION_POSITION_LIST(page, word_part_objs, transkription_field=tf)
         transkription_positions[0].positional_word_parts[2].transform = Matrix('rotate(20)')
         transkription_positions = TranskriptionPosition.CREATE_TRANSKRIPTION_POSITION_LIST_FROM_PWPS(page, transkription_positions[0].positional_word_parts)
         self.assertEqual(len(transkription_positions), 3)
         transkription_positions = TranskriptionPosition.CREATE_TRANSKRIPTION_POSITION_LIST(page, word_part_objs, transkription_field=tf)
         transkription_positions[0].positional_word_parts[0].style_class = 'st5 st10' 
         transkription_positions = TranskriptionPosition.CREATE_TRANSKRIPTION_POSITION_LIST_FROM_PWPS(page, transkription_positions[0].positional_word_parts)
         self.assertEqual(len(transkription_positions), 2)
+
+    def test_get_semantic_dictionary(self):
+        dictionary = TranskriptionPosition.get_semantic_dictionary()
+        self.assertEqual(TranskriptionPosition.XML_TAG in dictionary['properties'].get('writing_process_id').get('xpath'), True)
         
 
 if __name__ == "__main__":
     unittest.main()
Index: svgscripts/test_extractWordPosition.py
===================================================================
--- svgscripts/test_extractWordPosition.py	(revision 27)
+++ svgscripts/test_extractWordPosition.py	(revision 28)
@@ -1,195 +1,183 @@
 import unittest
 import os
 from os import sep, path
 from os.path import isfile, isdir, dirname
 import re
 import shutil
 import tempfile
 import lxml.etree as ET
 
 import extractWordPosition 
 from myxmlwriter import write_pretty
 from datatypes.transkriptionField import TranskriptionField
 from datatypes.matrix import Matrix
 from datatypes.page import Page
 from datatypes.pdf import PDFText
 from datatypes.word import Word
 from datatypes.lineNumber import LineNumber
 from datatypes.word_insertion_mark import WordInsertionMark
 
 class TestExtractor(unittest.TestCase):
     def setUp(self):
         DATADIR = dirname(__file__) + sep + 'test_data'
         self.test_file_find_word = DATADIR + sep + 'test_find_word.xml'
         self.test_dir = tempfile.mkdtemp()
         self.title = 'ABC 111'
         self.matrix_string = 'matrix(1 0 0 1 183.6558 197.9131)'
         self.test_file = DATADIR + sep + 'Mp_XIV_1_mytest_421.svg'
         self.xml420 = DATADIR + sep + 'Mp_XIV_1_page420.xml'
         self.pdf420 = DATADIR + sep + 'Mp_XIV_1_online_420.pdf'
         self.pdf_file = DATADIR + sep + 'W_I_8_page125.pdf'
         self.faulty_xml = DATADIR + sep + 'W_I_8_faulty_page125.xml'
         self.pdf_xml = DATADIR + sep + 'W_I_8_page125.xml'
 
     def test_main(self):
+        extractWordPosition.Extractor.UNITTESTING = True
         argv = ['-d', self.test_dir, '-o', '--title=My Hero', '--page=1', self.test_file]
         self.assertEqual(extractWordPosition.main(argv), 0)
 
     def test_get_page_number(self):
         extractor = extractWordPosition.Extractor()
         self.assertEqual(extractor.get_page_number(self.test_file, page_number='1'), '001')
         self.assertEqual(extractor.get_page_number(self.test_file), '421')
 
     def test_get_file_name(self):
         extractor = extractWordPosition.Extractor()
         self.assertEqual(extractor.get_file_name(self.test_file), 'xml/Mp_XIV_1_mytest_421.xml')
         extractor = extractWordPosition.Extractor(title=self.title)
         self.assertEqual(extractor.get_file_name(self.test_file), 'xml/{}_page421.xml'.format(self.title.replace(' ', '_')))
         extractorA = extractWordPosition.Extractor(title=self.title)
         extractorB = extractWordPosition.Extractor(manuscript_file=extractorA.manuscript_file)
         self.assertEqual(extractorB.get_file_name(self.test_file), 'xml/{}_page421.xml'.format(self.title.replace(' ', '_')))
 
     def test_get_style(self):
         extractor = extractWordPosition.Extractor()
         svg_tree = ET.parse(self.test_file) 
         sonderzeichen_list, letterspacing_list, style_dict = extractor.get_style(svg_tree.getroot())
         self.assertEqual(sonderzeichen_list, [ 'st21', 'st23'])
         self.assertEqual(style_dict.get('st11').get('font-family'), 'Frutiger-Europeen')
         self.assertEqual(style_dict.get('st5').get('stroke'), '#CED5CE')
 
     def test_get_word_from_part_obj(self):
         extractor = extractWordPosition.Extractor()
         mylist = [{'text': 'a', 'class': 'asdf' }, {'text': 'b', 'endX': 0 }, {'text': 'c'}]
         self.assertEqual(extractor.get_word_from_part_obj(mylist), 'abc')
 
     def test_get_bottoms(self):
         svg_tree = ET.parse(self.test_file)
         extractor = extractWordPosition.Extractor()
         mybottoms = extractor.get_bottoms(svg_tree.getroot())
         self.assertEqual(mybottoms[0], '57.1914')
         self.assertEqual(len(mybottoms), 106)
         self.assertEqual(mybottoms[len(mybottoms)-1], '1155.6899')
         mybottoms = extractor.get_bottoms(svg_tree.getroot(), from_position=100.0, to_position=800.0)
         self.assertEqual(mybottoms[0], '100.5132')
         self.assertEqual(len(mybottoms), 84)
         self.assertEqual(mybottoms[len(mybottoms)-1], '792.8218')
         tf = TranskriptionField(self.test_file)
         mybottoms = extractor.get_bottoms(svg_tree.getroot(), transkription_field=tf)
         self.assertEqual(mybottoms[0], '91.7134')
         self.assertEqual(len(mybottoms), 75)
         self.assertEqual(mybottoms[len(mybottoms)-1], '681.7134')
 
     def test_get_text_items(self):
         svg_tree = ET.parse(self.test_file)
         extractor = extractWordPosition.Extractor()
         mytest_items = [ x for x in extractor.get_text_items(svg_tree.getroot()) ]
         self.assertEqual(len(mytest_items), 300)
         self.assertEqual(mytest_items[0].get('transform'), 'matrix(1 0 0 1 386.8218 57.1914)')
         tf = TranskriptionField(self.test_file)
         mytest_itemsTF = [ x for x in extractor.get_text_items(svg_tree.getroot(), transkription_field=tf) ]
         self.assertEqual(mytest_itemsTF[0].get('transform'), 'matrix(1 0 0 1 204.8618 91.7134)')
 
     def test_init_tree_and_target_file(self):
         target_file = 'xml/testA.xml'
         page = Page(xml_target_file=target_file, title=self.title)
         tree = page.page_tree
         self.assertEqual(tree.getroot().get('title'), self.title)
         self.assertEqual(tree.getroot().findall('./style'), [])
         write_pretty(xml_element_tree=tree, file_name=target_file)
         page = Page(xml_target_file=target_file)
         tree = page.page_tree
         self.assertEqual(tree.getroot().get('title'), self.title)
         self.assertEqual(tree.getroot().findall('./style'), [])
         isfile(target_file) and os.remove(target_file)
 
     def test_add_style(self):
         extractor = extractWordPosition.Extractor()
         svg_tree = ET.parse(self.test_file) 
         sonderzeichen_list, letterspacing_list, style_dict = extractor.get_style(svg_tree.getroot())
         target_file = 'xml/testA.xml'
         page = Page(xml_target_file=target_file,title=self.title)
         page.add_style(sonderzeichen_list=sonderzeichen_list, style_dict=style_dict)
         write_pretty(xml_element_tree=page.page_tree, file_name=target_file)
         fromTarget_xml_tree = ET.parse(target_file)
         self.assertEqual(fromTarget_xml_tree.getroot().get('title'), self.title)
         self.assertEqual(fromTarget_xml_tree.getroot().find("style").get('Sonderzeichen'), "st21 st23")
         self.assertEqual(fromTarget_xml_tree.getroot().find("style").find("class[@name='st5']").get('stroke'), '#CED5CE')
         self.assertEqual(fromTarget_xml_tree.getroot().find("style").find("class[@name='st11']").get('font-family'), 'Frutiger-Europeen')
         page = Page(xml_target_file=target_file)
         page.add_style(sonderzeichen_list=sonderzeichen_list, style_dict=style_dict)
         write_pretty(xml_element_tree=page.page_tree, file_name=target_file)
         fromTarget_xml_tree = ET.parse(target_file)
         self.assertEqual(fromTarget_xml_tree.getroot().get('title'), self.title)
         self.assertEqual(fromTarget_xml_tree.getroot().find("style").get('Sonderzeichen'), "st21 st23")
         self.assertEqual(fromTarget_xml_tree.getroot().find("style").find("class[@name='st5']").get('stroke'), '#CED5CE')
         self.assertEqual(fromTarget_xml_tree.getroot().find("style").find("class[@name='st11']").get('font-family'), 'Frutiger-Europeen')
         isfile(target_file) and os.remove(target_file)
 
     def test_add_word(self):
         extractor = extractWordPosition.Extractor()
         svg_tree = ET.parse(self.test_file) 
         mylist = [{'text': 'a' }, {'text': 'b' }, {'text': 'c' }]
         matrix = Matrix(self.matrix_string)
         for dict in mylist:
             dict['class'] = 'st22'
             dict['x'] = matrix.add2X(0) 
             dict['y'] = matrix.getY()
         target_file = self.test_dir + sep + 'asdfasdf.xml'
         page = Page(xml_target_file=target_file)
         sonderzeichen_list, letterspacing_list, style_dict = extractor.get_style(svg_tree.getroot())
         page.add_style(sonderzeichen_list=sonderzeichen_list, letterspacing_list=letterspacing_list, style_dict=style_dict)
         self.assertEqual(extractor.add_word(page, 0, mylist, '%', 0), 1)
         mylist[1]['text'] = 'A'
         mylist[1]['class'] = 'st21'
         mylist[1]['x'] = matrix.add2X(1) 
         self.assertEqual(extractor.add_word(page, 0, mylist, '%', 0), 2)
         extractor.update_and_attach_words2tree(page)
         #self.assertEqual(page.word_insertion_marks[0].x, 184.656)
         #self.assertEqual(page.word_insertion_marks[0].y, 197.913)
         self.assertEqual(page.page_tree.getroot().xpath('//word[@id="1"]')[0].get('text'), 'a')
         self.assertEqual(page.page_tree.getroot().xpath('//word[@id="2"]')[0].get('text'), 'c')
         self.assertEqual(page.page_tree.getroot().xpath('//word[@id="2"]/transkription-position')[0].get('left'), '183.506')
         self.assertEqual(page.page_tree.getroot().xpath('//word[@id="2"]/transkription-position')[0].get('height'), '8.25')
-
-    def test_find_inserted_words(self):
-        """PAUSED
-        """
-        """
-        reference_tree = ET.parse(self.test_file_find_word)
-        extractor = extractWordPosition.Extractor()
-        svg_tree = ET.parse(self.test_file) 
-        page = Page(xml_source_file=self.test_file_find_word)
-        for word_insertion in [ WordInsertionMark(wim_node=node) for node in reference_tree.getroot().xpath('//' + WordInsertionMark.XML_TAG()) ]:
-            words = extractor.find_inserted_words(page.page_tree, word_insertion)
-            self.assertEqual([ str(word.id) for word in words ], [ str(word.id) for word in word_insertion.inserted_words])
-            """
         
     def test_extractor(self):
         extractor = extractWordPosition.Extractor()
         self.assertEqual(extractor.title, None)
         self.assertEqual(extractor.manuscript_file, None)
         self.assertEqual(extractor.xml_dir, 'xml/')
         self.assertEqual(extractor.manuscript_tree, None)
 
     def test_write_title_to_manuscript_file(self):
         extractor = extractWordPosition.Extractor(xml_dir=self.test_dir, title=self.title)
         self.assertEqual(isfile(extractor.manuscript_file), True)
         extractor = extractWordPosition.Extractor(manuscript_file=extractor.manuscript_file)
         self.assertEqual(extractor.title, self.title)
 
     def test_extract_line_numbers(self):
         svg_tree = ET.parse(self.test_file)
         tf = TranskriptionField(self.test_file)
         extractor = extractWordPosition.Extractor()
         line_numbers = extractor.extract_line_numbers(svg_tree, tf)
         self.assertEqual(line_numbers[0].id, 2)
         self.assertEqual(len(line_numbers), 24)
         self.assertEqual(line_numbers[0].top, 45.163)
 
 
     def tearDown(self):
         isdir(self.test_dir) and shutil.rmtree(self.test_dir)
         isfile('{}/{}.xml'.format('xml', self.title.replace(' ', '_'))) and os.remove('{}/{}.xml'.format('xml', self.title.replace(' ', '_')))
 
 if __name__ == "__main__":
     unittest.main()
Index: svgscripts/extractWordPosition.py
===================================================================
--- svgscripts/extractWordPosition.py	(revision 27)
+++ svgscripts/extractWordPosition.py	(revision 28)
@@ -1,560 +1,562 @@
 #!/usr/bin/env python3
 # -*- coding: utf-8 -*-
 
 """   This program can be used to extract the position of the words in a svg file and write them to a xml file.
 """
 #    Copyright (C) University of Basel 2019  {{{1
 #
 #    This program is free software: you can redistribute it and/or modify
 #    it under the terms of the GNU General Public License as published by
 #    the Free Software Foundation, either version 3 of the License, or
 #    (at your option) any later version.
 #
 #    This program is distributed in the hope that it will be useful,
 #    but WITHOUT ANY WARRANTY; without even the implied warranty of
 #    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 #    GNU General Public License for more details.
 #
 #    You should have received a copy of the GNU General Public License
 #    along with this program.  If not, see <https://www.gnu.org/licenses/> 1}}}
 
 import inspect
 import getopt
 from lxml import etree as ET
 from os import sep, listdir, mkdir, path
 from os.path import exists, isfile, isdir
 from progress.bar import Bar
 import re
 import sys
 from svgpathtools import svg2paths2
 import warnings
 
 from myxmlwriter import write_pretty
 from datatypes.lineNumber import LineNumber
 from datatypes.matrix import Matrix
 from datatypes.page import Page
 from datatypes.pdf import PDFText
 from datatypes.transkriptionField import TranskriptionField
 from datatypes.transkription_position import TranskriptionPosition
 from datatypes.word import Word
 from datatypes.word_insertion_mark import WordInsertionMark
 
 __author__ = "Christian Steiner"
 __maintainer__ = __author__
 __copyright__ = 'University of Basel'
 __email__ = "christian.steiner@unibas.ch"
 __status__ = "Development"
 __license__ = "GPL v3"
 __version__ = "0.0.1"
 
 class Extractor:
     """
     This class can be used to extract the word positions in a svg file and write it to a xml file.
 
     Args:
         [xml_dir (str): target directory]
         [title (str): title of document]
         [manuscript_file (str): xml file containing information about the archival unity to which the current page belongs
         [extract_transkription_field_only (Boolean): if true extract_word_position will extract word positions only that 
                                                      are part of the transkription field.
     """
+    UNITTESTING = False
     SONDERZEICHEN_LIST = [ 'A', 'B', '{', '}' ]
 
     def __init__(self, xml_dir=None, title=None, manuscript_file=None, extract_transkription_field_only=False, compare2pdf=False):
         if bool(xml_dir):
             self.xml_dir = xml_dir
             not isdir(self.xml_dir) and mkdir(self.xml_dir)
         else:
             self.xml_dir = 'xml' if(isdir('xml')) else '' 
         self.compare2pdf = compare2pdf
         self.xml_dir = self.xml_dir + sep if(bool(self.xml_dir)) else ''
         self.title = title
         self.manuscript_file = manuscript_file
         self.extract_transkription_field_only = extract_transkription_field_only
         self.manuscript_tree = None
         if not bool(self.title) and bool(self.manuscript_file) and isfile(self.manuscript_file):
             self.manuscript_tree = ET.parse(self.manuscript_file)
             self.title = self.manuscript_tree.getroot().get('title')
         elif bool(self.manuscript_file):
             raise FileNotFoundError('File "{}" does not exist!'.format(self.manuscript_file))
         elif bool(self.title):
             if not bool(self.manuscript_file):
                 self.manuscript_file = self.xml_dir + self.title.replace(' ', '_') + '.xml'
             if not isfile(self.manuscript_file):
                 self.manuscript_tree = ET.ElementTree(ET.Element('page', attrib={"title": self.title}))
                 write_pretty(xml_element_tree=self.manuscript_tree, file_name=self.manuscript_file, script_name=__file__, file_type='xmlManuscriptFile')
 
     def get_page_number(self, file_name, page_number=None):
         """ Returns page number as a string (with leading zero(s) if len(page_number) < 3).
         """
         if not bool(page_number) and bool(re.search(r'\d', file_name)):
             """if page_number=None and filename contains digits,
                 then split filename into its parts that contain only digits, remove empty strings
                 and return the last part containing only digits.
             """
             page_number = list(filter(lambda x: x != '', re.split(r'\D+', file_name))).pop()
         if bool(page_number):
             leading_zeros = '00' if(len(page_number) == 1) else '0' if(len(page_number) == 2) else ''
             return leading_zeros + str(page_number)
         else:
             return ''
 
     def get_file_name(self, file_name, page_number=None):
         """Returns the file_name of the target xml file.
         """
         dir_name = path.dirname(self.xml_dir) + sep if(bool(self.xml_dir)) else ''
         if bool(self.title):
             return dir_name + self.title.replace(' ', '_') + '_page' + self.get_page_number(file_name, page_number=page_number) + '.xml'
         else:
             return '{}{}'.format(dir_name, path.basename(file_name).replace('.svg', '.xml'))
 
     def get_style(self, etree_root):
         """Returns the style specification as a dictionary.
 
             :returns: 
                 sonderzeichen_list:     list of keys for classes that are 'Sonderzeichen'
                 style_dict:             dictionary: key = class name (str), value = style specification (dictionary)
         """
         style_dict = {}
         sonderzeichen_list = []
         letterspacing_list = []
         style = etree_root.find('style', etree_root.nsmap)
         if style is not None:
             for style_item in list(filter(lambda x: x != '', style.text.split("\n\t"))):
                 style_key = style_item.split('{')[0].replace('.', '')
                 style_value_dict = {  item.split(':')[0]: item.split(':')[1].replace('\'','') \
                     for item in list(filter(lambda x: x!= '', style_item.split('{')[1].replace('}', '').replace('\n','').split(';')))}
                 style_dict[style_key] = style_value_dict
                 if bool(style_value_dict.get('font-family')) and 'Sonderzeichen' in style_value_dict.get('font-family'):
                     sonderzeichen_list.append(style_key)
                 if bool(style_value_dict.get('letter-spacing')):
                     letterspacing_list.append(style_key)
         return sonderzeichen_list, letterspacing_list, style_dict
 
     def get_word_from_part_obj(self, word_part_obj):
         """Extracts all 'text' from a list of dicitonaries and concats it to a string.
         """
         return ''.join([ dict['text'] for dict in word_part_obj])
 
     def find_inserted_words_by_position(self, target_tree, x, y):
         """Returns an Array with the words that are inserted above the x, y position or [] if not found.
         """
         warnings.warn('Function "find_inserted_words_by_position" does not work and it is not clear whether we need this.')
         MINY = 31.0
         MAXY = 10.0
         DIFFX = 9.0
         if(len(target_tree.getroot().xpath('//word[@id]')) > 0):
             result_list = []
             minus2left = 20.0
             minus2top = 19.0
             while len(result_list) == 0 and minus2top < MINY and minus2left > DIFFX :
                 result_list = [ Word.CREATE_WORD(item) for item in target_tree.getroot().xpath(\
                         '//word[@top>{0} and @top<{1} and @left>{2} and @left<{3}]'.format(y - minus2top, y - MAXY, x - minus2left, x + DIFFX)) ]
                 minus2left -= 1
                 minus2top  += 1
             if len(result_list) > 0:
                 result_bottom = result_list[len(result_list)-1].bottom
                 result_left_min = result_list[len(result_list)-1].left + result_list[len(result_list)-1].width
                 for item in target_tree.getroot().xpath('//word[@bottom={0} and @left>{1}]'.format(result_bottom, result_left_min)):
                     result_left_min = result_list[len(result_list)-1].left + result_list[len(result_list)-1].width
                     result_left_max = result_left_min + DIFFX
                     if float(item.get('left')) - result_left_max < DIFFX:
                         result_list.append(Word.CREATE_WORD(item))
                     else:
                         break
             return result_list 
         else:
             return []
 
     def find_inserted_words(self, target_tree, word_insertion_mark):
         """Returns an Array with the words that are inserted above/underneath the word_insertion_mark.
 
         """
         warnings.warn('Function "find_inserted_words" does not work and it is not clear whether we need this.')
         if word_insertion_mark.line_number < 2 or word_insertion_mark.line_number % 2 == 1:
             return self.find_inserted_words_by_position(target_tree, word_insertion_mark.x, word_insertion_mark.y)
         if(len(target_tree.getroot().xpath('//word[@id]')) > 0):
             MINY = 31.0
             MAXY = 10.0
             DIFFX = 9.0
             result_list = []
             x = word_insertion_mark.x
             y = word_insertion_mark.y
             if word_insertion_mark.mark_type != 'B': # all insertions that are above the current line
                 line_number = word_insertion_mark.line_number - 1 
                 words_on_line = [ Word.CREATE_WORD(item) for item in target_tree.getroot().xpath(\
                                 '//word[@line-number={0}]'.format(line_number)) ]
                 if len(words_on_line) > 0:
                     minus2top = 1.0
                     while len(result_list) == 0 and minus2top < MINY:
                         for word in words_on_line:
                             for transkription_position in word.transkription_positions:
                                 if transkription_position.top > y - minus2top\
                                         and transkription_position.left > x - DIFFX\
                                         and transkription_position.left < x + DIFFX:
                                     result_list.append(word)
                                     break
                         minus2top  += 1
             elif word_insertion_mark.mark_type == 'B': # B means insertion is underneath the current line
                 line_number = word_insertion_mark.line_number + 1
                 words_on_line = [ Word.CREATE_WORD(item) for item in target_tree.getroot().xpath(\
                                 '//word[@line-number={0}]'.format(line_number)) ]
                 if len(words_on_line) > 0:
                     plus2top = 1.0
                     while len(result_list) == 0 and plus2top < MINY :
                         for word in words_on_line:
                             for transkription_position in word.transkription_positions:
                                 if transkription_position.top > y + plus2top\
                                         and transkription_position.left > x - DIFFX\
                                         and transkription_position.left < x + DIFFX:
                                     result_list.append(word)
                                     break
                         plus2top  += 1
             if len(result_list) > 0: # now, collect more words that are right of already collected words
                 result_bottom = result_list[len(result_list)-1].transkription_positions[0].bottom
                 result_left_min = result_list[len(result_list)-1].transkription_positions[0].left\
                         + result_list[len(result_list)-1].transkription_positions[0].width
                 for item in target_tree.getroot().xpath(\
                         '//word[@line-number={0} and @bottom>{1} and @bottom<{2} and @left>{3}]'.format(line_number, result_bottom-5, result_bottom+5, result_left_min)):
                     result_left_min = result_list[len(result_list)-1].transkription_positions[0].left\
                             + result_list[len(result_list)-1].transkription_positions[0].width
                     result_left_max = result_left_min + DIFFX
                     if float(item.get('left')) - result_left_max < DIFFX:
                         result_list.append(Word.CREATE_WORD(item))
                     else:
                         break
             return result_list 
         else:
             return []
 
     def add_word(self, page, index, word_part_objs, endSign, endX, matrix=None, debug_msg=None, transkription_field=None):
         """Creates transkription_positions and a new word from word_part_objs (i.e. a list of dictionaries about parts of this word).
             If word contains a Sonderzeichen as specified by self.SONDERZEICHEN_LIST, word_part_objs will be split and several words are created.
 
             :returns: the new word counter (int)
         """
         break_points = []
         if(len(page.sonderzeichen_list) > 0): # check for Sonderzeichen and special chars -> mark for word insertion, create break points
             for Sonderzeichen in self.SONDERZEICHEN_LIST:
                 contains_Sonderzeichen = [ dict['text'] == Sonderzeichen and any(sz in dict['class'] for sz in page.sonderzeichen_list) for dict in word_part_objs ]
                 if True in contains_Sonderzeichen:
                     break_points += [ (endPoint, endPoint + 1) for endPoint in  [i for i, e in enumerate(contains_Sonderzeichen) if e == True ]] 
                     for sz_point in [i for i, e in break_points]:
                         wim_index = len(page.word_insertion_marks)
                         x = float(word_part_objs[sz_point]['x'])
                         y = float(word_part_objs[sz_point]['y'])
                         if page.svg_file is not None and isfile(page.svg_file) and transkription_field is not None:
                             svg_path_tree = ET.parse(page.svg_file)
                             namespaces = { k if k is not None else 'ns': v for k, v in svg_path_tree.getroot().nsmap.items() }
                             xmin = transkription_field.xmin
                             ymin = transkription_field.ymin
                             wim = WordInsertionMark.CREATE_WORD_INSERTION_MARK(svg_path_tree, namespaces, id=wim_index, x=x, y=y, xmin=xmin, ymin=ymin,\
                                     line_number=page.get_line_number(y-1), mark_type=Sonderzeichen)
                             page.word_insertion_marks.append(wim)
         if(bool(re.search(r'\d[A-Za-z]', self.get_word_from_part_obj(word_part_objs)))): # case: digits from line number and chars from words -> create break points
             THRESHOLDX = 20 # Threshold between line number and text
             last_x = -1
             for i, x in enumerate([float(dict['x']) for dict in word_part_objs]):
                 if(last_x > -1 and (x - last_x > THRESHOLDX)):
                     break_points.append((i, i))
                 last_x = x
         if(len(break_points) > 0): # if there are break points -> split word_part_obj and add the corresponding words
             from_index = 0
             for end_point, next_from_index in break_points:
                 new_word_part_objs = word_part_objs[from_index:end_point]
                 new_endX = word_part_objs[end_point]['x']
                 from_index = next_from_index
                 index = self.add_word(page, index, new_word_part_objs, None, new_endX, matrix=matrix, debug_msg=debug_msg, transkription_field=transkription_field)
             if from_index > 0 and from_index < len(word_part_objs):
                 new_word_part_objs = word_part_objs[from_index:]
                 index = self.add_word(page, index, new_word_part_objs, endSign, endX, matrix=matrix, debug_msg=debug_msg, transkription_field=transkription_field)
             return index
         else:
             if len(word_part_objs) > 0:
                 transkription_positions = TranskriptionPosition.CREATE_TRANSKRIPTION_POSITION_LIST(page, word_part_objs, matrix=matrix,\
                         debug_msg_string=debug_msg, transkription_field=transkription_field)
                 text = self.get_word_from_part_obj(word_part_objs)
                 line_number = page.get_line_number((transkription_positions[0].bottom+transkription_positions[0].top)/2)
                 newWord = Word(id=index, text=text, line_number=line_number, transkription_positions=transkription_positions)
                 #newWord = Word.CREATE_WORD(page=page, word_part_objs=word_part_objs, id=index, endX=endX, endSign=endSign, matrix=matrix, debug_msg=debug_msg)
                 #newWord.attach_word_to_tree(page.page_tree) -> now we attach all words with update_and_attach_words2tree()
                 page.words.append(newWord)
                 return int(index) + 1
             else:
                 return int(index)
 
     def get_bottoms(self, tree_root, from_position=-1.0, to_position=-1.0, transkription_field=None):
         """Returns all unique bottom values (Float) as a sorted list.
         """
         bottom_list = sorted(set(item.get('transform').split(' ')[5].replace(')','') for item in tree_root.findall(".//text", tree_root.nsmap)), key=float)
         if transkription_field is not None:
             from_position = transkription_field.ymin
             to_position = transkription_field.ymax
         if (from_position > 0.0 and to_position > 0.0):
             return [ item for item in filter(lambda x: float(x) > from_position and float(x) < to_position, bottom_list) ] 
         else:
             return bottom_list
 
     def get_text_items(self, tree_root, transkription_field=None):
         """Returns all text elements with a matrix or (if transkription_field is specified) 
         all text elements that are located inside the transkription field.
         """
         if transkription_field is not None:
             return filter(lambda x: Matrix.IS_PART_OF_TRANSKRIPTION_FIELD(x.get('transform'), transkription_field),\
                     tree_root.iterfind(".//text", tree_root.nsmap))
         else:
             return tree_root.iterfind(".//text", tree_root.nsmap)
        
     def extract_line_numbers(self, svg_tree, transkription_field):
         """Extracts line numbers and write them to a xml file.
         """
         nodes_near_tf = [ item for item in filter(lambda x: Matrix.IS_NEARX_TRANSKRIPTION_FIELD(x.get('transform'), transkription_field),\
                 svg_tree.getroot().iterfind('.//text', svg_tree.getroot().nsmap))]
         line_numbers = [ LineNumber(raw_text_node=item, transkription_field=transkription_field)\
                 for item in filter(lambda x: LineNumber.IS_A_LINE_NUMBER(x), nodes_near_tf)]
         if len(line_numbers) > 0:
             MINABOVE = 3
             last_to_position = transkription_field.ymin
             for line_number in line_numbers:
                 above_current_line_bottom = line_number.bottom + transkription_field.ymin - MINABOVE
                 bottoms = self.get_bottoms(svg_tree.getroot(), from_position=last_to_position, to_position=above_current_line_bottom)
                 last_to_position = above_current_line_bottom
                 if len(bottoms) > 0:
                     current_line_top = float(bottoms[len(bottoms)-1]) - transkription_field.ymin + MINABOVE
                     line_number.setTop(current_line_top)
         return line_numbers
 
     def get_word_object_multi_char_x(self, word_part_obj_dict):
         """Returns the x of the last char of word_part_object.
 
         TODO: get real widths from svg_file!!!
         """
         WIDTHFACTOR = 2.6
         return word_part_obj_dict['x'] if len(word_part_obj_dict['text']) < 2 else word_part_obj_dict['x'] + len(word_part_obj_dict['text']) * WIDTHFACTOR
 
     def extract_word_position(self, svg_tree, page, transkription_field=None):
         """Extracts word positions.
         """
         counter = 0
         word_part_obj = []
         endSign = '%'
         last_matrix = None
         MAXBOTTOMDIFF = 5
         MAXXDIFF = 6
-        bar = Bar('extracting word positions from text_item', max=len([*self.get_text_items(svg_tree.getroot(), transkription_field=transkription_field)]))
+        if not Extractor.UNITTESTING:
+            bar = Bar('extracting word positions from text_item', max=len([*self.get_text_items(svg_tree.getroot(), transkription_field=transkription_field)]))
         for text_item in self.get_text_items(svg_tree.getroot(), transkription_field=transkription_field):
             current_matrix = Matrix(text_item.get('transform'), transkription_field=transkription_field)
             # check for line breaks
             if (last_matrix is not None and len(word_part_obj) > 0 and (\
                     Matrix.DO_CONVERSION_FACTORS_DIFFER(last_matrix, current_matrix) or\
                     (abs(current_matrix.getY() - last_matrix.getY()) > MAXBOTTOMDIFF) or\
                     (abs(current_matrix.getX() - word_part_obj[len(word_part_obj)-1]['x']) > MAXXDIFF)))\
                     or (len(word_part_obj) > 0 and self.get_word_object_multi_char_x(word_part_obj[0]) > current_matrix.getX()):
                 endSign = '%'
                 if(self.get_word_from_part_obj(word_part_obj) != ''):
                     debug_msg = 'check for line breaks, diffx: {}, diffy: {}, diff_conversion_matrix: {}'.format(\
                             round(abs(current_matrix.getX()  - word_part_obj[len(word_part_obj)-1]['x']), 3), round(abs(current_matrix.getY() - last_matrix.getY()), 3),\
                             str(Matrix.DO_CONVERSION_FACTORS_DIFFER(last_matrix, current_matrix)))
                     counter = self.add_word(page, counter, word_part_obj, endSign, endX, matrix=last_matrix, debug_msg=debug_msg, transkription_field=transkription_field)
                 word_part_obj = []
             endX = current_matrix.getX()
             if(len(text_item.findall(".//tspan", svg_tree.getroot().nsmap)) < 1): # case: <svg><text>TEXT
                 if(bool(text_item.text) and not bool(re.search(r'^\s*$', text_item.text))):
                     word_part_obj.append( { "text": text_item.text, "x": current_matrix.getX(), "y": current_matrix.getY(), "class": text_item.get('class'), "matrix": current_matrix} )
                 else:
                     endSign = text_item.text
                     if(self.get_word_from_part_obj(word_part_obj) != ''):
                         counter = self.add_word(page, counter, word_part_obj, endSign, endX, matrix=last_matrix, debug_msg='svg/text/\s', transkription_field=transkription_field)  
                     word_part_obj = []
                     endSign = '%'
             for tspan_item in text_item.findall(".//tspan", svg_tree.getroot().nsmap): # case: <svg><text><tspan>TEXT
                 endX = current_matrix.add2X(tspan_item.get('x'))
                 if(tspan_item.text != None and tspan_item.text != '' and not bool(re.search(r'^\s*$', tspan_item.text))):
                     y = current_matrix.add2Y(tspan_item.get('y'))
                     word_part_obj.append( { "text": tspan_item.text, "x": endX, "y": y, "class": tspan_item.get('class'), "matrix": current_matrix })
                     if len(set(page.letterspacing_list) & set(tspan_item.get('class').split(' '))) > 0:  
                         """text_item has letterspacing class 
                         (set s & set t = new set with elements common to s and t)
                         """
                         endSign = '%'
                         if(self.get_word_from_part_obj(word_part_obj) != ''):
                             counter = self.add_word(page, counter, word_part_obj, endSign, endX, matrix=current_matrix,\
                                     debug_msg='tspan with letterspacing', transkription_field=transkription_field)  
                         word_part_obj = []
                 else:
                     endSign = tspan_item.text
                     if(self.get_word_from_part_obj(word_part_obj) != ''):
                         counter = self.add_word(page, counter, word_part_obj, endSign, endX, matrix=current_matrix,\
                                 debug_msg='svg/text/tspan/\s', transkription_field=transkription_field)
                     word_part_obj = []
                     endSign = '%'
             last_matrix = current_matrix
-            bar.next()
+            not bool(Extractor.UNITTESTING) and bar.next()
         if(self.get_word_from_part_obj(word_part_obj) != ''):
             counter = self.add_word(page, counter, word_part_obj, endSign, endX, matrix=current_matrix, debug_msg='end of loop',\
                     transkription_field=transkription_field)
         word_part_obj = []
         endSign = '%'
-        print()
+        not bool(Extractor.UNITTESTING) and bar.finish()
     
     def update_and_attach_words2tree(self, page):
         """Update word ids and attach them to page.page_tree.
         """
         for node in page.page_tree.xpath('//word'): 
             node.getparent().remove(node)
         for index, word in enumerate(page.words):
             word.id = index
             word.attach_word_to_tree(page.page_tree)
 
     def extract_information(self, file_name, page_number=None, xml_target_file=None, svg_file=None, pdfFile=None):
         """Extracts information about positions of text elements.
 
             [:returns:] (datatypes.page) the Page containing all information.
         """
         if isfile(file_name):
             if not bool(xml_target_file):
                 xml_target_file = self.get_file_name(file_name, page_number)
             if bool(self.xml_dir) and not bool(path.dirname(xml_target_file)):
                 xml_target_file = path.dirname(self.xml_dir) + sep + xml_target_file
             transkription_field = TranskriptionField(file_name) if bool(self.extract_transkription_field_only) else None
             svg_tree = ET.parse(file_name) 
             page = Page(xml_target_file=xml_target_file, title=self.title, page_number=page_number, pdfFile=pdfFile,\
                     svg_file=svg_file, extract_transkription_field_only=self.extract_transkription_field_only)
             sonderzeichen_list, letterspacing_list, style_dict = self.get_style(svg_tree.getroot())
             page.add_style(sonderzeichen_list=sonderzeichen_list, letterspacing_list=letterspacing_list, style_dict=style_dict)
             if transkription_field is not None:
                 page.init_line_numbers(self.extract_line_numbers(svg_tree, transkription_field), transkription_field.ymax)
             self.extract_word_position(svg_tree, page, transkription_field=transkription_field)
             if page.pdfFile is not None and isfile(page.pdfFile):
                 pdftext = PDFText(page.pdfFile, sonderzeichen=self.SONDERZEICHEN_LIST)
                 pdftext.compare_svgWords2pdfWords(page, transkription_field=transkription_field, split_wrongly_concatenated_words=self.compare2pdf)
             page.create_writing_processes_and_attach2tree()
             self.update_and_attach_words2tree(page)
             for word_insertion_mark in page.word_insertion_marks:
                 # it is not clear if we really need to know this alternative word ordering. See 'TODO.md'
                 #word_insertion_mark.inserted_words = self.find_inserted_words(page.page_tree, word_insertion_mark) 
                 word_insertion_mark.attach_object_to_tree(page.page_tree)
             return page
         else:
             raise FileNotFoundError('\"{}\" is not an existing file!'.format(file_name))
 
     def extractAndWriteInformation(self, file_name, page_number=None, xml_target_file=None, svg_file=None, pdfFile=None):
         """Extracts information about positions of text elements and writes them to a xml file.
         """
         if isfile(file_name):
             if not bool(xml_target_file):
                 xml_target_file = self.get_file_name(file_name, page_number)
             if bool(self.xml_dir) and not bool(path.dirname(xml_target_file)):
                 xml_target_file = path.dirname(self.xml_dir) + sep + xml_target_file
             page = self.extract_information(file_name, page_number=page_number, xml_target_file=xml_target_file, svg_file=svg_file, pdfFile=pdfFile) 
             write_pretty(xml_element_tree=page.page_tree, file_name=xml_target_file, script_name=__file__, file_type='svgWordPosition')
             return 0 
         else:
             raise FileNotFoundError('\"{}\" is not an existing file!'.format(file_name))
 
 def usage():
     """prints information on how to use the script
     """
     print(main.__doc__)
 
 def main(argv):
     """This program can be used to extract the position of the words in a svg file and write them to a xml file. 
 
     svgscripts/extractWordPosition.py [OPTIONS] <file|dir>
 
         <file>                              svg file OR xml target file containing file name of svg file as "/page/@source".
         <dir>                               directory containing svg files
 
         OPTIONS:
         -h|--help:                          show help
         -c|--compare-to-pdf                 compare words to pdf and autocorrect 
         -d|--xml-dir=xmlDir:                target directory for the xml output file(s)
         -m|--manuscript-file:               xml file containing information about the archival order to which the current page(s) belong(s)
         -o|--only-transkription-field:      extract only words that are part of the transkription field.
         -p|--page=pageNumber:               page number of the current page. For use with _one_ file only.
         -P|--PDF=pdfFile:                   pdf file - used for word correction
         -s|--svg=svgFile:                   svg web file
         -t|--title=title:                   title of the manuscript to which the current page(s) belong(s)
         -x|--xml-target-file=xmlOutputFile: xml target file 
 
         :return: exit code (int)
     """
     compare2pdf = False
     extract_transkription_field_only = True
     manuscript_file = None
     page_number = None
     pdfFile = None
     svg_file = None
     title = None
     xml_target_file = None
     xml_dir = ".{}xml".format(sep)
 
     try:
         opts, args = getopt.getopt(argv, "hocd:m:t:p:s:x:P:", ["help", "only-transkription-field", "compare-to-pdf", "xml-dir=", "manuscript-file=", "title=", "page=", "svg=", "xml-target-file=", "PDF="])
     except getopt.GetoptError:
         usage()
         return 2
 
     for opt, arg in opts:
         if opt in ('-h', '--help') or not args:
             usage()
             return 0
         elif opt in ('-c', '--compare-to-pdf'):
             compare2pdf = True
         elif opt in ('-o', '--only-transkription-field'):
             extract_transkription_field_only = True
         elif opt in ('-d', '--xml-dir'):
             xml_dir = arg
         elif opt in ('-m', '--manuscript-file'):
             manuscript_file = arg
         elif opt in ('-t', '--title'):
             title = arg
         elif opt in ('-p', '--page'):
             page_number = str(arg)
         elif opt in ('-s', '--svg'):
             svg_file = arg
         elif opt in ('-P', '--PDF'):
             pdfFile = arg
         elif opt in ('-x', '--xml-target-file'):
             xml_target_file = str(arg)
     files_to_process = list()
     for arg in args:
         if isfile(arg):
             files_to_process.append(arg)
         elif isdir(arg):
             files_to_process = files_to_process + list(filter(lambda file: '.svg' in file, listdir(arg))) 
         else:
             print("'{}' does not exist!".format(arg))
             return 2
     
     if len(files_to_process) < 1 or args[0].endswith('xml'):
         if xml_target_file is None:
             xml_target_file = args[0] if len(args) > 0 else None
         if xml_target_file is not None and isfile(xml_target_file):
             target_file_tree = ET.parse(xml_target_file)
             file_name = target_file_tree.getroot().get('source')
             title = target_file_tree.getroot().get('title') if title is None else title
             page_number = target_file_tree.getroot().get('number') if page_number is None else page_number
             extract_transkription_field_only = (target_file_tree.getroot().get('transkription-field-only') == 'true')\
                     if target_file_tree.getroot().get('transkription-field-only') is not None else False
             if svg_file is None:
                 svg_file = target_file_tree.xpath('.//svg/@file')[0]\
                         if len(target_file_tree.xpath('.//svg/@file')) > 0 else None
             files_to_process.insert(0, file_name)
             if xml_target_file in files_to_process:
                 files_to_process.remove(xml_target_file)
         else:
             usage()
             return 2
     if len(files_to_process) > 1 and (bool(page_number) or bool(xml_target_file) or bool(pdfFile) or bool(svg_file)):
         print("ERROR: too many input files:  options --PDF, --page, --svg and --xml-target-file presuppose only one input file!")
         usage()
         return 2
 
     extractor = Extractor(xml_dir=xml_dir, title=title, manuscript_file=manuscript_file, extract_transkription_field_only=extract_transkription_field_only, compare2pdf=compare2pdf)
     for file in files_to_process:
         extractor.extractAndWriteInformation(file, page_number=page_number, xml_target_file=xml_target_file, pdfFile=pdfFile, svg_file=svg_file)
     return 0
 
 if __name__ == "__main__":
     sys.exit(main(sys.argv[1:]))
Index: svgscripts/test_word_position.py
===================================================================
--- svgscripts/test_word_position.py	(revision 27)
+++ svgscripts/test_word_position.py	(revision 28)
@@ -1,34 +1,37 @@
 import unittest
 from os import sep, path
 import lxml.etree as ET
 
 from datatypes.matrix import Matrix
 from datatypes.word_position import WordPosition
 
 class TestWordPosition(unittest.TestCase):
     def test_init(self):
         word_position = WordPosition(id=1, height=10, width=10, x=0, y=10)
         self.assertEqual(word_position.tag, WordPosition.TRANSKRIPTION)
         self.assertEqual(word_position.id, '1')
         self.assertEqual(word_position.height, 10)
         self.assertEqual(word_position.top, 10)
         self.assertEqual(word_position.bottom, 20)
         self.assertEqual(word_position.left, 0)
         self.assertEqual(word_position.isOnTranskription(), True)
         self.assertEqual(word_position.isOnFaksimile(), False)
 
     def test_attach_object_to_tree(self):
         matrix = Matrix('matrix(0 0 0 0 0 0)')
         word_position = WordPosition(id=1, height=10, width=10, x=0, y=10, matrix=matrix, tag=WordPosition.FAKSIMILE)
         empty_tree = ET.ElementTree(ET.Element('page'))
         word_position.attach_object_to_tree(empty_tree)
         for node in empty_tree.getroot().xpath('//' + word_position.tag):
             self.assertEqual(node.get('id'), '1')
             self.assertEqual(node.get('bottom'), '20')
             self.assertEqual(node.get('transform'), matrix.toString())
             self.assertEqual(node.get('writing-process-id'), '-1')
         word_position = WordPosition(node=empty_tree.getroot().find('.//' + word_position.tag))
 
+    def test_get_semantic_dictionary(self):
+        dictionary = WordPosition.get_semantic_dictionary()
+        self.assertEqual(WordPosition.XML_TAG in dictionary['properties'].get('writing_process_id').get('xpath'), True)
 
 if __name__ == "__main__":
     unittest.main()
Index: svgscripts/datatypes/transkription_position.py
===================================================================
--- svgscripts/datatypes/transkription_position.py	(revision 27)
+++ svgscripts/datatypes/transkription_position.py	(revision 28)
@@ -1,155 +1,155 @@
 #!/usr/bin/env python3
 # -*- coding: utf-8 -*-
 
 """   This class can be used to represent a transkription word position.
 """
 #    Copyright (C) University of Basel 2019  {{{1
 #
 #    This program is free software: you can redistribute it and/or modify
 #    it under the terms of the GNU General Public License as published by
 #    the Free Software Foundation, either version 3 of the License, or
 #    (at your option) any later version.
 #
 #    This program is distributed in the hope that it will be useful,
 #    but WITHOUT ANY WARRANTY; without even the implied warranty of
 #    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 #    GNU General Public License for more details.
 #
 #    You should have received a copy of the GNU General Public License
 #    along with this program.  If not, see <https://www.gnu.org/licenses/> 1}}}
 
 __author__ = "Christian Steiner"
 __maintainer__ = __author__
 __copyright__ = 'University of Basel'
 __email__ = "christian.steiner@unibas.ch"
 __status__ = "Development"
 __license__ = "GPL v3"
 __version__ = "0.0.1"
 
 from lxml import etree as ET
 from os.path import isfile
 
 
 from .class_spec import SemanticClass
 from .debug_message import DebugMessage
 from .positional_word_part import PositionalWordPart
 from .word_position import WordPosition 
 from .matrix import Matrix
 
 class TranskriptionPosition(WordPosition):
     """
     This class represents a transkription word position.
 
     Args:
         id (int):                       word id
         matrix (datatypes.Matrix):      matrix containing information about transformation.
         height (float):                 height of word
         width (float):                  width of word
         x (float):                      x position of word
         y (float):                      y position of word
         positional_word_parts           a list of (datatypes.positional_word_part) PositionalWordPart
         debug_message                   a (datatypes.debug_message) DebugMessage
     """
     ADD2X = 0.15
     ADD2TOP = 1.0
     ADD2BOTTOM = 0.2
     HEIGHT_FACTOR = 1.1 # factor that multiplies biggest_font_size -> height
     XML_TAG = WordPosition.TRANSKRIPTION
 
     def __init__(self, id=0, node=None, height=0.0, width=0.0, x=0.0, y=0.0, matrix=None, positional_word_parts=[], debug_message=None):
         super(TranskriptionPosition, self).__init__(id=id, node=node, height=height, width=width, x=x, y=y, matrix=matrix, tag=WordPosition.TRANSKRIPTION)
         self.positional_word_parts = positional_word_parts
         self.debug_message = debug_message
         if node is not None:
             self.debug_message = DebugMessage(node=node.xpath('.//' + DebugMessage.XML_TAG)[0])\
                     if len(node.xpath('.//' + DebugMessage.XML_TAG)) > 0 else None
             self.positional_word_parts = [ PositionalWordPart(node=pwp_node) for pwp_node in node.xpath('.//' + PositionalWordPart.XML_TAG) ] 
         self.attachable_objects += self.positional_word_parts
         if self.debug_message is not None:
             self.attachable_objects.append(self.debug_message)
 
     @classmethod
     def get_semantic_dictionary(cls):
         """ Creates a semantic dictionary as specified by SemanticClass.
         """
-        dictionary = super(cls,cls).get_semantic_dictionary()
-        dictionary['properties'].update({'positional_word_parts': (PositionalWordPart, SemanticClass.LIST, '{}/@id'.format(WordPosition.TRANSKRIPTION))})
+        dictionary = super(TranskriptionPosition,cls).get_semantic_dictionary()
+        dictionary['properties'].update({'positional_word_parts': (PositionalWordPart, SemanticClass.LIST, '{}/@id'.format(cls.XML_TAG))})
         return dictionary
 
     @staticmethod
     def CREATE_TRANSKRIPTION_POSITION_LIST_FROM_PWPS(page, positional_word_parts, debug_message=None, debug_msg_string=None, transkription_position_id=0):
         """Creates a list of TranskriptionPosition from a list of (datatypes.positional_word_part) PositionalWordPart.
 
             [:return:] a list of (datatypes.transkription_position) TranskriptionPosition
         """
         TOPCORRECTION = 1
         debug_message = DebugMessage(message=debug_msg_string)\
                 if debug_msg_string is not None else debug_message
         transkription_positions = []
         if len(positional_word_parts) < 1:
             return []
         matrix = positional_word_parts[0].transform
         index = 0
         matrices_differ = False
         style_class = positional_word_parts[0].style_class
         styles_differ = False
         while index < len(positional_word_parts) and not matrices_differ and not styles_differ:
             if Matrix.DO_CONVERSION_FACTORS_DIFFER(matrix, positional_word_parts[index].transform):
                 matrices_differ = True
             elif style_class != positional_word_parts[index].style_class:
                 styles_differ = True
             else:
                 index += 1
         if (matrices_differ or styles_differ) and index < len(positional_word_parts):
             debug_msg_string = 'matrices differ' if matrices_differ else 'styles differ'
             transkription_positions += TranskriptionPosition.CREATE_TRANSKRIPTION_POSITION_LIST_FROM_PWPS(page,\
                     positional_word_parts[index:], debug_msg_string=debug_msg_string, transkription_position_id=transkription_position_id+1)
             positional_word_parts = positional_word_parts[:index]
         if page.get_line_number((positional_word_parts[0].top + positional_word_parts[0].bottom)/2) % 2 == 0:
             all_styles = []
             for pwp in positional_word_parts:
                 all_styles += pwp.style_class.split(' ')
             biggest_font_size = page.get_biggest_fontSize4styles(style_set=set(all_styles))
             height = round(biggest_font_size * TranskriptionPosition.HEIGHT_FACTOR + TranskriptionPosition.HEIGHT_FACTOR / biggest_font_size, 3)
             TOPCORRECTION = 2 + TranskriptionPosition.HEIGHT_FACTOR / biggest_font_size
         else:
             # take greatest value for height
             height = [ pwp.height for pwp in sorted(positional_word_parts, key=lambda pwp: pwp.height, reverse=True)][0] + 2*TOPCORRECTION
         x = positional_word_parts[0].left - TranskriptionPosition.ADD2X
         y = [ pwp.top for pwp in sorted(positional_word_parts, key=lambda pwp: pwp.top)][0] - TOPCORRECTION
         width = positional_word_parts[len(positional_word_parts)-1].left - x\
                 + positional_word_parts[len(positional_word_parts)-1].width + TranskriptionPosition.ADD2X
         for pwp_index, pwp in enumerate(positional_word_parts):
             pwp.id = pwp_index
         transkription_positions.insert(0, TranskriptionPosition(id=transkription_position_id, height=height, width=width, x=x, y=y, matrix=matrix,\
                 positional_word_parts=positional_word_parts, debug_message=debug_message)) 
         return transkription_positions
 
     @staticmethod
     def CREATE_TRANSKRIPTION_POSITION_LIST(page, word_part_objs, matrix=None, debug_msg_string=None, transkription_field=None):
         """Creates a list of TranskriptionPosition from word_part_objs (i.e. a list of dictionaries  
         with the keys: text, x, y, matrix, class).
 
             [:return:] a list of (datatypes.transkription_position) TranskriptionPosition
         """
         positional_word_parts = []
         debug_message = DebugMessage(message=debug_msg_string)\
                 if debug_msg_string is not None else None
         if page.svg_file is not None and isfile(page.svg_file):
             svg_path_tree = ET.parse(page.svg_file)
             namespaces = { k if k is not None else 'ns': v for k, v in svg_path_tree.getroot().nsmap.items() }
             xmin = 0.0
             ymin = 0.0
             if transkription_field is not None:
                 xmin = transkription_field.xmin
                 ymin = transkription_field.ymin
             for part_obj in word_part_objs:
                 positional_word_parts += PositionalWordPart.CREATE_POSITIONAL_WORD_PART_LIST(\
                         part_obj, svg_path_tree, namespaces, page, start_id=len(positional_word_parts),\
                         xmin=xmin, ymin=ymin)
         else:
             positional_word_parts = PositionalWordPart.CREATE_SIMPLE_POSITIONAL_WORD_PART_LIST(page, word_part_objs)
         if len(positional_word_parts) > 0:
             return TranskriptionPosition.CREATE_TRANSKRIPTION_POSITION_LIST_FROM_PWPS(page, positional_word_parts, debug_message=debug_message)
         else:
             return [ TranskriptionPosition(matrix=matrix, debug_message=debug_message) ]
Index: svgscripts/datatypes/word.py
===================================================================
--- svgscripts/datatypes/word.py	(revision 27)
+++ svgscripts/datatypes/word.py	(revision 28)
@@ -1,214 +1,219 @@
 #!/usr/bin/env python3
 # -*- coding: utf-8 -*-
 
 """   This class can be used to represent a word.
 """
 #    Copyright (C) University of Basel 2019  {{{1
 #
 #    This program is free software: you can redistribute it and/or modify
 #    it under the terms of the GNU General Public License as published by
 #    the Free Software Foundation, either version 3 of the License, or
 #    (at your option) any later version.
 #
 #    This program is distributed in the hope that it will be useful,
 #    but WITHOUT ANY WARRANTY; without even the implied warranty of
 #    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 #    GNU General Public License for more details.
 #
 #    You should have received a copy of the GNU General Public License
 #    along with this program.  If not, see <https://www.gnu.org/licenses/> 1}}}
 
 __author__ = "Christian Steiner"
 __maintainer__ = __author__
 __copyright__ = 'University of Basel'
 __email__ = "christian.steiner@unibas.ch"
 __status__ = "Development"
 __license__ = "GPL v3"
 __version__ = "0.0.1"
 
 from lxml import etree as ET
 import warnings
 
 from .class_spec import SemanticClass
+from .lineNumber import LineNumber
 from .matrix import Matrix
 from .word_position import WordPosition
 from .transkription_position import TranskriptionPosition
 
 class Word(SemanticClass):
     """
     This class represents a word.
 
     """
     DATA = 'debug-data'
 
     def __init__(self, id=0, text='', line_number=-1, transkription_positions=[], faksimile_positions=[], word_part_objs=[]):
         self.id = id
         self.text = text
         self.line_number = line_number
         self.transkription_positions = transkription_positions
         self.faksimile_positions = faksimile_positions
         self.word_part_objs = word_part_objs
         self.is_head_of_inserted_words = False
         self.is_tail_of_inserted_words = False
         self.is_before_inserted_words = False
         self.is_after_inserted_words = False
         self.word_insertion_mark = None
         self.debug_msg = None
 
     @classmethod
     def get_semantic_dictionary(cls):
         """ Creates and returns a semantic dictionary as specified by SemanticClass.
         """
         dictionary = {}
         class_dict = cls.get_class_dictionary()
-        properties = {'id': (int, 1, 'word/@id'), 'text': (str, 1, 'word/@text'), 'line_number': (int, 1, 'word/@line-number'),\
+        properties = {'id': (int, 1, 'word/@id'), 'text': (str, 1, 'word/@text'),\
+                'line_number': {'class': LineNumber, 'cardinality': 1,\
+                                'name': 'wordHasLineNumber', 'xpath': 'word/@line-number',\
+                                'label': 'word has a line number',\
+                                'comment': 'Relating a word to a line number it has.'},\
                 'transkription_positions': (TranskriptionPosition, SemanticClass.LIST, 'word/@id'),\
                 'faksimile_positions': (WordPosition, SemanticClass.LIST, 'word/@id')}
         dictionary.update({'class': class_dict})
         dictionary.update({'properties': properties})
         return dictionary
 
     def set_word_insertion_mark(self, word_insertion_mark):
         """Sets word_insertion_mark
         """
         self.word_insertion_mark = word_insertion_mark
 
     def attach_word_to_tree(self, target_tree):
         """Attaches word to tree target_tree.
         """
         word_node = target_tree.getroot().xpath('//word[@id="%s"]' % self.id)[0] \
                 if(len(target_tree.getroot().xpath('//word[@id="%s"]' % self.id)) > 0) \
                 else ET.SubElement(target_tree.getroot(), 'word', attrib={'id': str(self.id)})
         word_node.set('text', self.text)
         if self.line_number > -1:
             word_node.set('line-number', str(self.line_number))
         for transkription_position in self.transkription_positions:
             transkription_position.attach_object_to_tree(word_node)
         """
         data_node = word_node.find(self.DATA) if bool(word_node.find(self.DATA)) else ET.SubElement(word_node, self.DATA)
         for part_index, word_part in enumerate(self.word_part_objs):
             part_node = data_node.xpath('./part[@index="%s"]' % part_index)[0] \
                     if(len(data_node.xpath('./part[@index="%s"]' % part_index)) > 0) \
                     else ET.SubElement(data_node, 'part', attrib={'index': str(part_index)})
             part_node.set('text', word_part['text'])
             part_node.set('class', word_part['class'])
             part_node.set('x', str(round(float(word_part['x']), 3)))
             part_node.set('y', str(round(float(word_part['y']), 3)))
         if self.debug_msg is not None:
             ET.SubElement(data_node, 'end', attrib={'debug-msg': self.debug_msg})
         """
 
     def split(self, page, split_string, start_id=0):
         """Splits the word and returns an 3-tuple of new words.
         """
         previousString, currentString, nextString = self.text.partition(split_string)
         currentWord = None
         previousWord = None
         nextWord = None
         previousIndex = 0
         current_id = start_id
         all_positional_word_parts = []
         for position in self.transkription_positions:
             all_positional_word_parts += position.positional_word_parts
         if len(all_positional_word_parts) == 0:
             warnings.warn('ATTENTION: Word: {} {} with Strings "{}, {}, {}": there are no parts!'.format(self.id, self.text, previousString, currentString, nextString))
         if len(previousString) > 0:
             previous_pwps = []
             while previousIndex < len(all_positional_word_parts) and previousString != ''.join([ pwp.text for pwp in previous_pwps ]):
                 previous_pwps.append(all_positional_word_parts[previousIndex])
                 previousIndex += 1
             if previousString != ''.join([ pwp.text for pwp in previous_pwps ]):
                 warnings.warn('ATTENTION: "{}" does not match a word_part_obj!'.format(previousString))
             else:
                 previous_transkription_positions = TranskriptionPosition.CREATE_TRANSKRIPTION_POSITION_LIST_FROM_PWPS(page, previous_pwps, debug_msg_string='word.split')
                 previous_text = ''.join([ pwp.text for pwp in previous_pwps ])
                 previousWord = Word(text=previous_text, id=current_id, line_number=self.line_number, transkription_positions=previous_transkription_positions)
                 current_id += 1
                 all_positional_word_parts = all_positional_word_parts[previousIndex:]
         if len(nextString) > 0:
             tmp_pwps = []
             index = 0
             while index < len(all_positional_word_parts) and currentString != ''.join([ pwp.text for pwp in tmp_pwps ]):
                 tmp_pwps.append(all_positional_word_parts[index])
                 index += 1
             if currentString != ''.join([ pwp.text for pwp in tmp_pwps ]):
                 warnings.warn('ATTENTION: "{}" does not match a word_part_obj!'.format(currentString))
             else:
                 next_pwps = all_positional_word_parts[index:]
                 next_transkription_positions = TranskriptionPosition.CREATE_TRANSKRIPTION_POSITION_LIST_FROM_PWPS(page, next_pwps, debug_msg_string='word.split')
                 next_text = ''.join([ pwp.text for pwp in next_pwps ])
                 nextWord = Word(text=next_text, id=current_id+1, line_number=self.line_number, transkription_positions=next_transkription_positions)
                 all_positional_word_parts = all_positional_word_parts[:index]
         current_transkription_positions = TranskriptionPosition.CREATE_TRANSKRIPTION_POSITION_LIST_FROM_PWPS(page, all_positional_word_parts, debug_msg_string='word.split')
         current_text = ''.join([ pwp.text for pwp in all_positional_word_parts ])
         currentWord = Word(text=current_text, id=current_id, line_number=self.line_number, transkription_positions=current_transkription_positions)
         return previousWord, currentWord, nextWord
             
     def join(self, other_word, append_at_end_of_new_word=True):
         """Joins other_word with this word by changing the text of current word and adding other_word.transkription_positions.
         """
         if append_at_end_of_new_word:
             self.text = self.text + other_word.text 
             for position in other_word.transkription_positions:
                 position.id = str(len(self.transkription_positions))
                 self.transkription_positions.append(position)
         else:
             self.text = other_word.text + self.text
             index = 0
             for position in other_word.transkription_positions:
                 self.transkription_positions.insert(index, position)
                 index += 1
             while index < len(self.transkription_positions):
                 self.transkription_positions[index].id = str(index)
                 index += 1
 
     @staticmethod
     def CREATE_WORD(word_node=None, page=None, word_part_objs=[], id=0, height=0, endX=0, endSign=None, matrix=None, line_number=-1, debug_msg=None):
         """Creates a word from a (lxml.Element) node or word_part_objs.
 
             [:return:] Word
         """
         if word_node is not None: # init word from xml node
             id = int(word_node.get('id'))
             line_number = int(word_node.get('line-number')) if bool(word_node.get('line-number')) else line_number
             text = word_node.get('text')
             transkription_positions = [ TranskriptionPosition(node=node) for node in word_node.findall('.//' + WordPosition.TRANSKRIPTION) ]
             faksimile_positions = [ WordPosition(node=node) for node in word_node.findall('.//' + WordPosition.FAKSIMILE) ]
             word_part_objs = [ item.attrib for item in word_node.findall('.//' + Word.DATA + '/part')]\
                     if len(word_node.findall('.//' + Word.DATA)) > 0\
                     else [ item.attrib for item in word_node.findall('.//part')] 
             return Word(id=id, text=text, line_number=line_number, transkription_positions=transkription_positions,\
                     faksimile_positions=faksimile_positions, word_part_objs=word_part_objs)
         elif len(word_part_objs) > 0: # init word from word_part_obj that has been extracted from svg file
             WIDTH = 5
             TOPCORRECTION = 2.0
             FONTWIDTHFACTOR = 0.7 # factor that multiplies lastCharFontSize
             height = height
             x = round(float(word_part_objs[0]['x']), 3)
             if(page is not None and bool(page.style_dict)):
                 HEIGHT_FACTOR = 1.1 # factor that multiplies biggest_font_size -> height
                 style_set = set(' '.join(set( dict['class'] for dict in word_part_objs)).split(' '))
                 biggest_font_size = page.get_biggest_fontSize4styles(style_set=style_set)
                 height = round(biggest_font_size * HEIGHT_FACTOR + HEIGHT_FACTOR / biggest_font_size, 3)
                 TOPCORRECTION = 1 + HEIGHT_FACTOR / biggest_font_size
                 if endSign is not None and '%' in endSign:
                     lastCharFontSizeList = [ float(page.style_dict[key]['font-size'].replace('px',''))\
                             for key in word_part_objs[len(word_part_objs)-1]['class'].split(' ')\
                             if bool(page.style_dict[key].get('font-size'))]
                     lastCharFontSize = lastCharFontSizeList[0] if len(lastCharFontSizeList) > 0 else 1
                     endX = float(endX) + lastCharFontSize * FONTWIDTHFACTOR
             elif endSign is not None and '%' in endSign:
                 endX = float(endX) + WIDTH
             bottom = round(float(word_part_objs[0]['y']), 3)
             y = round(bottom - height + TOPCORRECTION, 3)
             width = round(float(endX) - x, 3)
             transkription_positions = [ WordPosition(height=height, width=width, x=x, y=y, matrix=matrix, tag=WordPosition.TRANSKRIPTION) ]
             text = ''.join([ dict['text'] for dict in word_part_objs])
             line_number = page.get_line_number( (y + bottom)/2) if page is not None else line_number
             word = Word(id=id, text=text, line_number=line_number, transkription_positions=transkription_positions, word_part_objs=word_part_objs)
             word.debug_msg = debug_msg
             return word
         else:
             error_msg = 'word_node has not been defined' if (word_node is None) else 'word_part_objs is empty'
             raise Exception('Error: {}'.format(error_msg))
 
Index: svgscripts/datatypes/word_insertion_mark.py
===================================================================
--- svgscripts/datatypes/word_insertion_mark.py	(revision 27)
+++ svgscripts/datatypes/word_insertion_mark.py	(revision 28)
@@ -1,130 +1,135 @@
 #!/usr/bin/env python3
 # -*- coding: utf-8 -*-
 
 """   This class can be used to represent a word insertion mark.
 """
 #    Copyright (C) University of Basel 2019  {{{1
 #
 #    This program is free software: you can redistribute it and/or modify
 #    it under the terms of the GNU General Public License as published by
 #    the Free Software Foundation, either version 3 of the License, or
 #    (at your option) any later version.
 #
 #    This program is distributed in the hope that it will be useful,
 #    but WITHOUT ANY WARRANTY; without even the implied warranty of
 #    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 #    GNU General Public License for more details.
 #
 #    You should have received a copy of the GNU General Public License
 #    along with this program.  If not, see <https://www.gnu.org/licenses/> 1}}}
 
 __author__ = "Christian Steiner"
 __maintainer__ = __author__
 __copyright__ = 'University of Basel'
 __email__ = "christian.steiner@unibas.ch"
 __status__ = "Development"
 __license__ = "GPL v3"
 __version__ = "0.0.1"
 
 from lxml import etree as ET
 from svgpathtools.parser import parse_path
 import warnings
 
+from .lineNumber import LineNumber
 from .positional_object import PositionalObject 
 from .word import Word
 
 
 class WordInsertionMark(PositionalObject):
     """
     This class represents a word insertion mark.
 
     Args:
         wim_node (etree.Element): element that contains information about a word_insertion_mark.
 
         OR
 
         id (int): word id
         x (float)
         y (float)
         height (float)
         width (float)
         previous_word_id (int): id of the word to which word insertion mark is attached
         inserted_words: Array->Word of inserted words marked by the word insertion mark. 
     """
     XML_TAG = 'word-insertion-mark'
     extraStringKeys = [ 'mark_type', 'symbol_id' ]
 
     def __init__(self, wim_node=None, id=0, x=-1.0, y=-1.0, height=0, width=0, previous_word_id=-1, next_word_id=-1, line_number=-1, symbol_id=None, inserted_words=[], inserted_word_id=-1, mark_type='A'):
         super(WordInsertionMark, self).__init__(id=id, node=wim_node, height=height, width=width, x=x, y=y, tag=WordInsertionMark.XML_TAG)
         self.stringKeys += [ 'mark_type', 'symbol_id' ]
         self.intKeys += [ 'line_number', 'next_word_id', 'previous_word_id' ]
         self.symbol_id = symbol_id
         self.mark_type = mark_type
         self.line_number = line_number
         self.previous_word_id = previous_word_id
         self.next_word_id = next_word_id
         if wim_node is not None:
             self.mark_type = wim_node.get('mark-type')
             self.line_number = int(wim_node.get('line-number')) if bool(wim_node.get('line-number')) else -1
             self.previous_word_id = int(wim_node.get('previous-word-id')) if bool(wim_node.get('previous-word-id')) else -1
             self.next_word_id = int(wim_node.get('next-word-id')) if bool(wim_node.get('next-word-id')) else -1
 
     def init_inserted_words(self, inserted_words=[], wim_node=None, inserted_word_id_string=None):
         if wim_node is not None and inserted_word_id_string is not None:
             ids = inserted_word_id_string.split(' ')
             inserted_words = [ Word.CREATE_WORD(word_node=word_node) for word_node in wim_node.getroottree().getroot().xpath('//word[@id>="{0}" and @id<="{1}"]'.format(ids[0], ids[len(ids)-1])) ]
         if len(inserted_words) > 0:
             inserted_words[0].is_head_of_inserted_words = True
             inserted_words[len(inserted_words)-1].is_tail_of_inserted_words = True
             for word in inserted_words:
                 word.set_word_insertion_mark(self)
         return inserted_words
 
     def attach_and_update_word_if_involved(self, word):
         if word.id == self.previous_word_id:
             word.is_before_inserted_words = True
             word.word_insertion_mark = self
         elif word.id == self.next_word_id:
             word.is_after_inserted_words = True
             word.word_insertion_mark = self
         elif word.id in [ inserted.id for inserted in self.inserted_words ]:
             word = [ inserted for inserted in self.inserted_words if inserted.id == word.id ][0]
         return word
 
     @classmethod
     def get_semantic_dictionary(cls):
         """ Creates a semantic dictionary as specified by SemanticClass.
         """
         dictionary = super(cls,cls).get_semantic_dictionary()
-        dictionary['properties'].update({ 'line_number': (int, 1, '{}/@{}'.format(cls.XML_TAG, 'line_number'.replace('_','-')))})
-        dictionary['properties'].update({ 'previous_word_id': (Word, 0, '{}/@{}'.format(cls.XML_TAG, 'previous_word_id'.replace('_','-')))})
-        dictionary['properties'].update({ 'next_word_id': (Word, 0, '{}/@{}'.format(cls.XML_TAG, 'next_word_id'.replace('_','-')))})
+        word_dicts = { key: { 'class': Word, 'cardinality': 1, 'cardinality_restriction': 'maxCardinality',\
+                              'label': 'has a {} word'.format(key.replace('_word_id','')),\
+                              'name': 'has{}'.format(key.title().replace('_Id','').replace('_','')),\
+                              'xpath': '{}/@{}'.format(cls.XML_TAG, key.replace('_','-')) } for key in [ 'previous_word_id', 'next_word_id' ] }
+        dictionary['properties'].update(word_dicts)
+        dictionary['properties'].update({'line_number': {'class': LineNumber, 'cardinality': 1, 'xpath': '{}/@line-number'.format(cls.XML_TAG),\
+                                                         'name': 'wordInsertionMarkHasLineNumber', 'label': 'word insertion mark has a line number'}})
         dictionary['properties'].update(dict(zip(cls.extraStringKeys, [ (str, 1, '{}/@{}'.format(cls.XML_TAG, i.replace('_','-'))) for i in cls.extraStringKeys])))
         return dictionary
 
     @staticmethod
     def CREATE_WORD_INSERTION_MARK(svg_path_tree, namespaces, id=0, x=0.0, y=0.0, xmin=0.0, ymin=0.0, line_number=-1, mark_type='A'):
         """Creates a (datatypes.word_insertion_mark) WordInsertionMark 
             using a (lxml.ElementTree) svg_path_tree and the corresponding namespaces.
         """
         THRESHOLD = 0.4
         svg_x = x + xmin
         svg_y = y + ymin
         use_nodes = svg_path_tree.xpath('//ns:use[@x>="{0}" and @x<="{1}" and @y>="{2}" and @y<="{3}"]'\
                     .format(svg_x-THRESHOLD, svg_x+THRESHOLD,svg_y-THRESHOLD, svg_y+THRESHOLD), namespaces=namespaces)
         if len(use_nodes) > 0:
             symbol_id = use_nodes[0].get('{%s}href' % namespaces['xlink']).replace('#', '') 
             d_strings = use_nodes[0].xpath('//ns:symbol[@id="{0}"]/ns:path/@d'.format(symbol_id), namespaces=namespaces)
             height = 0.0
             width = 0.0
             if len(d_strings) > 0:
                 path = parse_path(d_strings[0])
                 xmin, xmax, ymin, ymax = path.bbox()
                 width = xmax - xmin
                 height = ymax - ymin 
             return WordInsertionMark(id=id, x=x, y=y-height, height=height, width=width, line_number=line_number,\
                         mark_type=mark_type, symbol_id=symbol_id)
         else:
             warnings.warn('No glyph_id found for word insertion mark {} on line {}'.format(id, line_number))
             return WordInsertionMark(id=id, x=x, y=y, line_number=line_number, mark_type=mark_type)
 
Index: svgscripts/datatypes/word_position.py
===================================================================
--- svgscripts/datatypes/word_position.py	(revision 27)
+++ svgscripts/datatypes/word_position.py	(revision 28)
@@ -1,70 +1,82 @@
 #!/usr/bin/env python3
 # -*- coding: utf-8 -*-
 
 """   This class can be used to represent a word position.
 """
 #    Copyright (C) University of Basel 2019  {{{1
 #
 #    This program is free software: you can redistribute it and/or modify
 #    it under the terms of the GNU General Public License as published by
 #    the Free Software Foundation, either version 3 of the License, or
 #    (at your option) any later version.
 #
 #    This program is distributed in the hope that it will be useful,
 #    but WITHOUT ANY WARRANTY; without even the implied warranty of
 #    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 #    GNU General Public License for more details.
 #
 #    You should have received a copy of the GNU General Public License
 #    along with this program.  If not, see <https://www.gnu.org/licenses/> 1}}}
 
 __author__ = "Christian Steiner"
 __maintainer__ = __author__
 __copyright__ = 'University of Basel'
 __email__ = "christian.steiner@unibas.ch"
 __status__ = "Development"
 __license__ = "GPL v3"
 __version__ = "0.0.1"
 
 from lxml import etree as ET
 
 
 from .matrix import Matrix
 from .positional_object import PositionalObject
+from .writing_process import WritingProcess
 
 class WordPosition(PositionalObject):
     """
     This class represents a word position.
 
     Args:
         id (int):                   word id
         matrix (datatypes.Matrix):  matrix containing information about conversion.
         height (float):             height of word
         width (float):              width of word
         x (float):                  x position of word
         y (float):                  y position of word
         tag (str)                   location of the word position: 'WordPosition.TRANSKRIPTION' (default) or 'WordPosition.FAKSIMILE'
     """
     TRANSKRIPTION = 'transkription-position'
     FAKSIMILE = 'faksimile-position'
     XML_TAG = 'faksimile-position'
 
     def __init__(self, id=0, node=None, height=0.0, width=0.0, x=0.0, y=0.0, matrix=None, tag=TRANSKRIPTION):
         super(WordPosition, self).__init__(id=id, node=node, height=height, width=width, x=x, y=y, matrix=matrix, tag=tag)
         self.intKeys.append('writing_process_id')
         self.writing_process_id = -1
         if node is not None:
             self.writing_process_id = int(node.get('writing-process-id'))\
                     if bool(node.get('writing-process-id')) else -1
 
+    @classmethod
+    def get_semantic_dictionary(cls):
+        """ Creates a semantic dictionary as specified by SemanticClass.
+        """
+        dictionary = super(WordPosition,cls).get_semantic_dictionary()
+        dictionary['properties'].update({'writing_process_id':\
+                { 'class': WritingProcess, 'cardinality': 1, 'cardinality_restriction':  'cardinality',\
+                  'name': '{}BelongsTo{}'.format(WordPosition.__name__, WritingProcess.__name__),\
+                  'label': "connects a {} with a stage in Nietzsche's process of writing".format(WordPosition.__name__),\
+                  'xpath': '{}/@writing-process-id'.format(cls.XML_TAG)}})
+        return dictionary
+
     def isOnTranskription(self):
         """Returns whether position is on transkription.
         """
         return self.tag == self.TRANSKRIPTION
 
     def isOnFaksimile(self):
         """Returns whether position is on transkription.
         """
         return self.tag == self.FAKSIMILE
 
-    # TODO: def join(self, ...
Index: svgscripts/test_word.py
===================================================================
--- svgscripts/test_word.py	(revision 27)
+++ svgscripts/test_word.py	(revision 28)
@@ -1,140 +1,142 @@
 import unittest
 from os import sep, path
 import lxml.etree as ET
 
 from datatypes.matrix import Matrix
 from datatypes.positional_word_part import PositionalWordPart
 from datatypes.transkriptionField import TranskriptionField
 from datatypes.transkription_position import TranskriptionPosition
 from datatypes.word import Word
 from datatypes.word_position import WordPosition
 
 class Page:
     def __init__(self):
         self.svg_file = None
     def get_line_number(self, input=0):
         return -1
     def get_biggest_fontSize4styles(self, style_set={}):
         return 7
 
 class TestWord(unittest.TestCase):
     def setUp(self):
         self.word_part_objs = [{'text': 'a' }, {'text': 'b' }, {'text': 'c' }]
         x = 0
         for dict in self.word_part_objs:
             dict['class'] = 'st22'
             dict['x'] =  x
             dict['y'] = 11
             x += 1
         mylist = {'text': 'abc', 'id': '0', 'line-number': '2' }
         word_position = TranskriptionPosition(x=0, y=1, height=10, width=10, matrix=Matrix('matrix(0.94 0.342 -0.342 0.94 0 0)'))
         self.transkription_positions = [ word_position ]
         self.word_node = ET.Element('word', attrib=mylist)
         word_position.attach_object_to_tree(self.word_node)
         x = 0
         for char in mylist['text']:
             ET.SubElement(self.word_node, 'part', attrib={'text': char, 'x': str(x), 'y': '11', 'class': 'st22' })
             x += 1
 
     def test_Word_with_word_part_objs(self):
         word = Word.CREATE_WORD(word_part_objs=self.word_part_objs, height=10, endX=10)
         self.assertEqual(word.id, 0)
         self.assertEqual(word.transkription_positions[0].bottom, 13)
         self.assertEqual(word.transkription_positions[0].height, 10)
         self.assertEqual(word.transkription_positions[0].top, 3)
         self.assertEqual(word.transkription_positions[0].left, 0)
         self.assertEqual(word.transkription_positions[0].width, 10)
         self.assertEqual(word.text, 'abc')
 
     def test_Word_with_word_node(self):
         word = Word.CREATE_WORD(word_node=self.word_node)
         self.assertEqual(word.id, 0)
         self.assertEqual(word.transkription_positions[0].bottom, 11)
         self.assertEqual(word.transkription_positions[0].height, 10)
         self.assertEqual(word.transkription_positions[0].top, 1)
         self.assertEqual(word.transkription_positions[0].left, 0)
         self.assertEqual(word.transkription_positions[0].width, 10)
         self.assertEqual(word.text, 'abc')
         self.assertEqual(word.line_number, 2)
         self.assertEqual(word.transkription_positions[0].transform.isRotationMatrix(), True)
 
     def test_attach_word_to_tree(self):
         newWord = Word.CREATE_WORD(word_part_objs=self.word_part_objs, height=10, endX=10)
         empty_tree = ET.ElementTree(ET.Element('page'))
         newWord.attach_word_to_tree(empty_tree)
         for word_node in empty_tree.getroot().xpath('//word'):
             word = Word.CREATE_WORD(word_node=word_node)
             self.assertEqual(word.id, 0)
             self.assertEqual(word.transkription_positions[0].bottom, 13)
             self.assertEqual(word.transkription_positions[0].height, 10)
             self.assertEqual(word.transkription_positions[0].top, 3)
             self.assertEqual(word.transkription_positions[0].left, 0)
             self.assertEqual(word.transkription_positions[0].width, 10)
             self.assertEqual(word.text, 'abc')
 
     def test_split(self):
         page = Page()
         pwps = PositionalWordPart.CREATE_SIMPLE_POSITIONAL_WORD_PART_LIST(page, self.word_part_objs)
         transkription_positions = TranskriptionPosition.CREATE_TRANSKRIPTION_POSITION_LIST_FROM_PWPS(page, pwps) 
         word = Word(text=''.join([pwp.text for pwp in pwps]), transkription_positions=transkription_positions)
         previousWord, currentWord, nextWord = word.split(page, 'b')
         self.assertEqual(previousWord.id, 0)
         self.assertEqual(previousWord.text, 'a')
         self.assertEqual(currentWord.id, 1)
         self.assertEqual(nextWord.id, 2)
         word = Word(text=''.join([pwp.text for pwp in pwps]), transkription_positions=transkription_positions)
         previousWord, currentWord, nextWord = word.split(page, 'bc')
         self.assertEqual(previousWord.id, 0)
         self.assertEqual(previousWord.text, 'a')
         self.assertEqual(currentWord.id, 1)
         word = Word(text=''.join([pwp.text for pwp in pwps]), transkription_positions=transkription_positions)
         previousWord, currentWord, nextWord = word.split(page, 'ab', start_id=10)
         self.assertEqual(currentWord.id, 10)
         self.assertEqual(currentWord.text, 'ab')
         self.assertEqual(currentWord.transkription_positions[0].width, 2.1)
         self.assertEqual(nextWord.id, 11)
         self.assertEqual(nextWord.transkription_positions[0].width, 5.2)
         word_part_objs=[{'text': 'x', 'class':'st22', 'x': 0, 'y': 0},\
                 {'text': 'Insofern', 'class':'st22', 'x': 1, 'y': 0},\
                 {'text': 'x', 'class':'st22', 'x': 10, 'y': 0}]
         pwps = PositionalWordPart.CREATE_SIMPLE_POSITIONAL_WORD_PART_LIST(page, word_part_objs)
         transkription_positions = TranskriptionPosition.CREATE_TRANSKRIPTION_POSITION_LIST_FROM_PWPS(page, pwps) 
         word = Word(text=''.join([pwp.text for pwp in pwps]), transkription_positions=transkription_positions)
         with self.assertWarns(Warning):
             previousWord, currentWord, nextWord = word.split(page, 'Insofer')
         word_part_objs=[{'text': 'xInsofern', 'class':'st22', 'x': 0, 'y': 0}]
         pwps = PositionalWordPart.CREATE_SIMPLE_POSITIONAL_WORD_PART_LIST(page, word_part_objs)
         transkription_positions = TranskriptionPosition.CREATE_TRANSKRIPTION_POSITION_LIST_FROM_PWPS(page, pwps) 
         word = Word(text=''.join([pwp.text for pwp in pwps]), transkription_positions=transkription_positions)
         with self.assertWarns(Warning):
             previousWord, currentWord, nextWord = word.split(page, 'Insofern')
 
     def test_join(self):
         word = Word.CREATE_WORD(word_part_objs=self.word_part_objs, height=10, endX=10)
         other_word = Word.CREATE_WORD(word_part_objs=[{'text': '.', 'class':'st22', 'x': 3, 'y': 11}])
         word.join(other_word)
         self.assertEqual(word.text, 'abc.')
         other_word = Word.CREATE_WORD(word_part_objs=[{'text': '.', 'class':'st22', 'x': 3, 'y': 11}])
         word.join(other_word, append_at_end_of_new_word=False)
         self.assertEqual(word.text, '.abc.')
         """
         tree = ET.ElementTree(ET.Element('page'))
         word.attach_word_to_tree(tree)
         print(ET.dump(tree.getroot()))
         """
 
     def test_get_semanticAndDataDict(self):
         word = Word.CREATE_WORD(word_node=self.word_node)
         empty_tree = ET.ElementTree(ET.Element('page'))
         word.attach_word_to_tree(empty_tree)
-        dict = Word.get_semantic_dictionary()
-        for key in dict['properties'].keys():
-            cls, cardinality, xpath = dict['properties'].get(key)
+        dictionary = Word.get_semantic_dictionary()
+        for key in dictionary['properties'].keys():
+            xpath = dictionary['properties'].get(key).get('xpath')\
+                    if type(dictionary['properties'].get(key)) is dict\
+                    else dictionary['properties'].get(key)[2]
             results = empty_tree.xpath(xpath)
             self.assertEqual(len(results), 1)
             #print('{}: {}'.format(key, results[0]))
         #self.assertEqual(word.get_data_dictionary()['body'].get('text'), 'abc')
 
 
 if __name__ == "__main__":
     unittest.main()
Index: svgscripts/test_extractFaksimilePosition.py
===================================================================
--- svgscripts/test_extractFaksimilePosition.py	(revision 27)
+++ svgscripts/test_extractFaksimilePosition.py	(revision 28)
@@ -1,35 +1,35 @@
 import unittest
 import os
 from os import sep, path
 from os.path import isfile, isdir, dirname
 import shutil
 import tempfile
 import lxml.etree as ET
 
 import extractWordPosition 
 from myxmlwriter import write_pretty
 from datatypes.transkriptionField import TranskriptionField
 from datatypes.matrix import Matrix
 from datatypes.page import Page
 from datatypes.lineNumber import LineNumber
 from datatypes.word_insertion_mark import WordInsertionMark
 
 class TestExtractor(unittest.TestCase):
     def setUp(self):
         DATADIR = dirname(__file__) + sep + 'test_data'
         self.test_file_find_word = DATADIR + sep + 'test_find_word.xml'
         self.test_dir = tempfile.mkdtemp()
         self.title = 'ABC 111'
         self.matrix_string = 'matrix(1 0 0 1 183.6558 197.9131)'
         self.test_file = DATADIR + sep + 'Mp_XIV_1_mytest_421.svg'
 
     def test_main(self):
         argv = ['-d', self.test_dir, '-o', '--title=My Hero', '--page=1', self.test_file]
-        self.assertEqual(extractWordPosition.main(argv), 0)
+        pass
 
     def tearDown(self):
         isdir(self.test_dir) and shutil.rmtree(self.test_dir)
         isfile('{}/{}.xml'.format('xml', self.title.replace(' ', '_'))) and os.remove('{}/{}.xml'.format('xml', self.title.replace(' ', '_')))
 
 if __name__ == "__main__":
     unittest.main()
Index: py2ttl/test_py2ttl.py
===================================================================
--- py2ttl/test_py2ttl.py	(revision 27)
+++ py2ttl/test_py2ttl.py	(revision 28)
@@ -1,72 +1,79 @@
 import unittest
 import lxml.etree as ET
 from os import sep, path, remove
 from os.path import isfile, dirname
 from rdflib import Graph, URIRef
 import sys
 
 import py2ttl
 from py2ttl import Py2TTLConverter
 from config import PROJECT_NAME, PROJECT_ONTOLOGY_FILE
 
 if dirname(dirname(__file__)) not in sys.path:
     sys.path.append(dirname(dirname(__file__)))
 
 from svgscripts.datatypes.word import Word
+from svgscripts.datatypes.word_position import WordPosition
 
 class TestPy2TTL(unittest.TestCase):
     """This is the unittest for py2ttl.py2ttl.
     @label unittest
     """
     def setUp(self):
         self.ttl_target = __file__ + 'test.ttl'
 
     def test_main(self):
         argv = ['-t', self.ttl_target ]
         self.assertEqual(py2ttl.main(argv), 0)
 
     def test_init(self):
         converter = Py2TTLConverter(project_ontology_file=PROJECT_ONTOLOGY_FILE)
         self.assertEqual(converter.project_name, PROJECT_NAME)
 
     def test_get_semantic_classes(self):
         converter = Py2TTLConverter()
         classes = converter.get_semantic_classes('svgscripts/datatypes')
         self.assertEqual('FaksimileImage' in [ cls.__name__ for cls in classes ], True)
         self.assertEqual('Image' in [ cls.__name__ for cls in classes ], True)
         self.assertEqual('SemanticClass' in [ cls.__name__ for cls in classes ], False)
 
     def test_createProperty(self):
         converter = Py2TTLConverter(project_ontology_file=PROJECT_ONTOLOGY_FILE)
         converter.createProperty(converter.base_uriref + "#Test", 'test', str, 1)
         name_uri = converter.base_uriref + '#hasTest'
         self.assertEqual((name_uri, None, None) in converter.project_graph, True)
 
     def test_createPropertyName(self):
         converter = Py2TTLConverter()
         name = converter.createPropertyName(property_name='test_asdf_asdf')
         self.assertEqual(name, 'hasTestAsdfAsdf')
         name = converter.createPropertyName(object_uri=converter.base_uriref + '#Asdf')
         self.assertEqual(name, 'hasAsdf')
         name = converter.createPropertyName(subject_uri=converter.base_uriref + '#Test',object_uri=converter.base_uriref + '#Asdf')
         self.assertEqual(name, 'testBelongsToAsdf')
         name = converter.createPropertyName(subject_uri=converter.base_uriref + '#Test')
         self.assertEqual(name, 'testBelongsTo')
 
     def test_get_comment_label(self):
         converter = Py2TTLConverter()
         comment, label = converter.get_comment_label(TestPy2TTL)
         self.assertEqual(label, 'unittest')
         self.assertEqual(comment, self.__doc__.split('\n')[0].lstrip())
 
+    def test_get_builtin_cls_keys(self):
+        dictionary = WordPosition.get_semantic_dictionary()
+        converter = Py2TTLConverter()
+        builtin_cls_keys = converter._get_builtin_cls_keys(dictionary['properties'])
+        self.assertEqual('id' in builtin_cls_keys, True)
+
     def test_get_semantic_dictionary_keys_super_first(self):
         dict = Word.get_semantic_dictionary()
         converter = Py2TTLConverter()
-        keys = converter.get_semantic_dictionary_keys_super_first(dict['properties'])
+        keys = converter._get_semantic_dictionary_keys_super_first(dict['properties'])
         self.assertEqual(keys.index('faksimile_positions') < keys.index('transkription_positions'), True)
 
     def tearDown(self):
         isfile(self.ttl_target) and remove(self.ttl_target)
 
 if __name__ == "__main__":
     unittest.main()
Index: py2ttl/py2ttl.py
===================================================================
--- py2ttl/py2ttl.py	(revision 27)
+++ py2ttl/py2ttl.py	(revision 28)
@@ -1,373 +1,385 @@
 #!/usr/bin/env python3
 # -*- coding: utf-8 -*-
 
 """   This program can be used to convert py classes that are 
       subclasses of DATATYPES_DIR.class_spec.SemanticClass to 
       a owl ontology in turtle format.
 """
 #    Copyright (C) University of Basel 2019  {{{1
 #
 #    This program is free software: you can redistribute it and/or modify
 #    it under the terms of the GNU General Public License as published by
 #    the Free Software Foundation, either version 3 of the License, or
 #    (at your option) any later version.
 #
 #    This program is distributed in the hope that it will be useful,
 #    but WITHOUT ANY WARRANTY; without even the implied warranty of
 #    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 #    GNU General Public License for more details.
 #
 #    You should have received a copy of the GNU General Public License
 #    along with this program.  If not, see <https://www.gnu.org/licenses/> 1}}}
 
 import getopt
 import importlib
 import importlib.util
 import inspect
 import lxml.etree as ET
 from os import sep, path, listdir
 from os.path import isfile, isdir, dirname, basename
 from rdflib import Graph, URIRef, Literal, BNode, OWL, RDF, RDFS, XSD
 import re
 import sys
 
 
 if dirname(__file__) not in sys.path:
     sys.path.append(dirname(__file__))
 from config import check_config_files_exist, get_datatypes_dir, PROJECT_NAME, PROJECT_ONTOLOGY_FILE, PROJECT_URL, SHARED_ONTOLOGIES_DIR
 from knora_base import KNORA_BASE
 
 __author__ = "Christian Steiner"
 __maintainer__ = __author__
 __copyright__ = 'University of Basel'
 __email__ = "christian.steiner@unibas.ch"
 __status__ = "Development"
 __license__ = "GPL v3"
 __version__ = "0.0.1"
 
 
 class Py2TTLConverter:
     """This class can be used convert semantic_dictionaries to a owl ontology in turtle format.
     """
     def __init__(self, project_ontology_file=None, create_super_cls_for_multi_property=True):
         self.list_value = -99
         self.class_uri_dict = {}
         self.uri_xpath_mapping = {}
         self.create_super_cls_for_multi_property = create_super_cls_for_multi_property
         self.project_graph = Graph()
         self.base_uriref = URIRef(PROJECT_URL)
         self.project_name = PROJECT_NAME
         self.ns = { self.base_uriref + '#': self.project_name }
         if project_ontology_file is not None and isfile(project_ontology_file):
             self.project_graph.parse(project_ontology_file, format="turtle")
         if len(self.project_graph) > 0:
             self.base_uriref = self.project_graph.value(predicate=RDF.type, object=OWL.Ontology, any=False)
             self.ns = { uriref: ns for ns, uriref in self.project_graph.namespace_manager.namespaces() }
             self.project_name = self.ns.get(self.base_uriref + '#')
         self.project_graph.bind(self.project_name, self.base_uriref + '#')
 
     def get_semantic_classes(self, datatypes_dir):
         """Returns a list of all classes that are  contained in datatypes_dir that are subclasses of DATATYPES_DIR.class_spec.SemanticClass.
     
             :return: a list of (str_name, class)
         """
         base_dir = dirname(dirname(__file__))
         sys.path.append(base_dir)
         root_modul_name = datatypes_dir.replace('/','.')
         reference_cls = importlib.import_module('{}.{}'.format(root_modul_name, 'class_spec'))
         try:
             self.list_value = reference_cls.LIST
         except AttributeError:
             pass
         files = [ file.replace('.py','') for file in listdir(datatypes_dir) if file.endswith('.py') and not file.startswith('test_') and not file.startswith('_')]
         all_modules = []
         for name in files:
             all_modules.append(importlib.import_module('{}.{}'.format(root_modul_name, name)))
         all_classes = []
         for modul in all_modules:
             all_classes += inspect.getmembers(modul, inspect.isclass)
         all_classes = sorted(set(all_classes))
         semantic_classes = [ cls for name, cls in all_classes if issubclass(cls, reference_cls.SemanticClass) and not (cls == reference_cls.SemanticClass)]
         return semantic_classes
 
     def addRestriction2Class(self, cls_uri, property_uri, cardinality=0, comment="", label="", info_dict={}):
         """Adds restriction on property_uri to class cls_uri.
         """
         if (cls_uri, None, None) not in self.project_graph:
             self.addClass(cls_uri, comment=comment, label=label)
         restriction = BNode()
         if 'cardinality_restriction' in info_dict.keys():
             cardinality_restriction = URIRef(OWL + info_dict['cardinality_restriction'])
         else:
             cardinality_restriction = OWL.minCardinality if cardinality == 0 else OWL.cardinality
         self.project_graph.add((cls_uri, RDFS.subClassOf, restriction))
         self.project_graph.add((restriction, RDF.type, OWL.Restriction))
         self.project_graph.add((restriction, OWL.onProperty, property_uri))
         self.project_graph.add((restriction, cardinality_restriction, Literal(str(cardinality), datatype=XSD.nonNegativeInteger)))
 
     def createPropertyName(self, property_name=None, subject_uri=None, object_uri=None, connector='BelongsTo', prefix='has'):
         """Returns a property name.
         """
         if property_name is not None:
             property_name = ''.join([ property_name.split('_')[0].lower() ] + [ text.capitalize() for text in property_name.split('_')[1:] ])
             return prefix + property_name[0].upper() + property_name[1:] if property_name[0].islower()\
                     else prefix + property_name
         elif subject_uri is not None:
             property_name = subject_uri.split('#')[1] + self.createPropertyName(object_uri=object_uri, prefix=connector)
             return property_name[0].lower() + property_name[1:]
         elif object_uri is not None:
             return prefix + object_uri.split('#')[1]
         else:
             return prefix 
 
     def createSuperClassForSubjectClassConstraint(self, property_uri, sub_uri):
         """Creates a super class for classes that share a property.
         """
         super_uri = URIRef(property_uri.replace('has', '') + 'Holder')
         self.project_graph.add((sub_uri, RDFS.subClassOf, super_uri))
         self.project_graph.remove((sub_uri, RDFS.subClassOf, KNORA_BASE.Resource))
         if (super_uri, RDF.type, OWL.Class) not in self.project_graph:
             label = 'holder of ' + property_uri.split('#')[1].replace('has', '')
             comment = 'super class for classes that have a ' + property_uri.split('#')[1].replace('has', '')
             self.addRestriction2Class(super_uri, property_uri, comment=comment, label=label)
             for object_uri in self.project_graph.objects(subject=property_uri, predicate=KNORA_BASE.subjectClassConstraint):
                 self.project_graph.remove((property_uri, KNORA_BASE.subjectClassConstraint, object_uri))
                 self.project_graph.add((object_uri, RDFS.subClassOf, super_uri))
                 self.project_graph.remove((object_uri, RDFS.subClassOf, KNORA_BASE.Resource))
             self.project_graph.add((property_uri, KNORA_BASE.subjectClassConstraint, super_uri))
             objectClass = self.project_graph.value(subject=property_uri, predicate=KNORA_BASE.objectClassConstraint, any=False)
             comment = 'connects {} with {}'.format(super_uri.split('#')[1], objectClass.split('#')[1].replace('has', ''))
             self.project_graph.remove((property_uri, RDFS.comment, None)) 
             self.project_graph.add((property_uri, RDFS.comment, Literal(comment, lang='en')))
 
     def addProperty(self, property_uri, super_uri, subject_uri, object_uri, comment, label, cardinality, info_dict={}):
         """Add a property to self.project_graph.
         """
         self.project_graph.add((property_uri, RDF.type, OWL.ObjectProperty))
         self.project_graph.add((property_uri, RDFS.subPropertyOf, super_uri))
         self.project_graph.add((property_uri, KNORA_BASE.objectClassConstraint, object_uri))
         self.project_graph.add((property_uri, KNORA_BASE.subjectClassConstraint, subject_uri))
         self.project_graph.add((property_uri, RDFS.comment, Literal(comment, lang='en')))
         self.project_graph.add((property_uri, RDFS.label, Literal(label, lang='en')))
         self.addRestriction2Class(subject_uri, property_uri, cardinality=cardinality, info_dict=info_dict)
 
     def createProperty(self, cls_uri, property_name, property_cls, cardinality, info_dict={}):
         """Creates a owl:ObjectProperty.
 
             :return: tuple of subject_uri (rdflib.URIRef) and property_uri (rdflib.URIRef) of created property
         """
         inferredSubClass = RDFS.subClassOf * '*'
         name = self.createPropertyName(property_name=property_name)\
                 if 'name' not in info_dict.keys() else info_dict['name']
         property_uri = URIRef(self.base_uriref + '#' + name)
         subject_uri = cls_uri
         label = 'has ' + name.replace('has','')\
                 if 'label' not in info_dict.keys() else info_dict['label']
         super_uri = KNORA_BASE.hasValue
         if (property_uri, None, None) not in self.project_graph:
             if property_cls.__module__ == 'builtins':
                 datatype_mapping = { float: KNORA_BASE.DecimalValue, int: KNORA_BASE.IntValue, str: KNORA_BASE.TextValue }
                 object_uri = datatype_mapping.get(property_cls)
                 if object_uri == KNORA_BASE.TextValue:
                     if property_name == 'URL':
                         object_uri = KNORA_BASE.UriValue
                     elif property_name == 'file_name':
                         object_uri = KNORA_BASE.FileValue
             else:
                 object_uri = URIRef(self.base_uriref + '#' + property_cls.__name__)
                 # if class X has a list of objects Y, we create a property YbelongsToX.
                 if cardinality == self.list_value:
                     subject_uri = object_uri
                     object_uri = cls_uri
                     result = self.project_graph.query(\
                             'select ?p where {'\
                             + ' ?p <{0}> ?s .'.format(KNORA_BASE.subjectClassConstraint)\
                             + ' ?p <{0}> <{1}> .'.format(KNORA_BASE.objectClassConstraint, object_uri)\
                             + ' <{0}> <http://www.w3.org/2000/01/rdf-schema#subClassOf> ?s .'.format(subject_uri)\
                             + ' }')
                     # if subject_uri is a subclass of a uri that is a subjectClassConstraint to a property_uri
                     # that has object_uri as its objectClassConstraint, then we do not create a new property YbelongsToX,
                     # instead we return subject_uri and this already existing property_uri.
                     if len(result) > 0:
                         return subject_uri, [ property_uri for property_uri in result ][0]
                     name = self.createPropertyName(subject_uri=subject_uri, object_uri=object_uri) 
                     property_uri = URIRef(self.base_uriref + '#' + name)
                     cardinality = 1
                     label = subject_uri.split('#')[1] + ' belongs to ' + object_uri.split('#')[1]
                 super_uri = KNORA_BASE.hasLinkTo
                 property_value_uri = URIRef(property_uri + 'Value')
                 comment = 'Reification statement of relation between {} and {}'.format(subject_uri.split('#')[1], object_uri.split('#')[1])
                 reification_info_dict = {}
                 if 'cardinality_restriction' in info_dict.keys():
                     reification_info_dict.update({'cardinality_restriction': info_dict['cardinality_restriction']})
                 self.addProperty(property_value_uri, KNORA_BASE.hasLinkToValue, subject_uri, KNORA_BASE.LinkValue,\
                         comment,  label + ' - statement', cardinality, info_dict=reification_info_dict)
-            comment = 'connects {} with {}'.format(subject_uri.split('#')[1], object_uri.split('#')[1])
+            comment = 'connects {} with {}'.format(subject_uri.split('#')[1], object_uri.split('#')[1])\
+                    if 'comment' not in info_dict.keys() else info_dict['comment']
             self.addProperty(property_uri, super_uri, subject_uri, object_uri, comment, label, cardinality, info_dict=info_dict)
         elif not True in [\
                 (cls_uri, inferredSubClass, o) in self.project_graph\
                 for o in self.project_graph.objects(property_uri, KNORA_BASE.subjectClassConstraint)\
             ]:
             # if cls_uri is NOT a subclass of a cls specified by KNORA_BASE.subjectClassConstraint
             self.addRestriction2Class(subject_uri, property_uri, cardinality=cardinality, info_dict=info_dict)
             if self.create_super_cls_for_multi_property:
                 self.createSuperClassForSubjectClassConstraint(property_uri, subject_uri)
             else:
                 # not sure if Knora accepts this, i.e. several subject_uris specified by KNORA_BASE.subjectClassConstraint.
                 self.project_graph.add((property_uri, KNORA_BASE.subjectClassConstraint, subject_uri))
         return subject_uri, property_uri
 
     def get_comment_label(self, cls):
         """Returns comment and label from cls __doc__.
         """
         comment = cls.__doc__.replace('\n','').lstrip()
         label = cls.__name__
         if '.' in cls.__doc__:
             comment = [ text for text in cls.__doc__.split('\n') if text != '' ][0].lstrip()
         if '@label' in cls.__doc__:
             m = re.search('(@label[:]*\s)(.*[\.]*)', cls.__doc__)
             label_tag, label = m.groups()
         elif re.search('([A-Z][a-z]+)', label):
             m = re.search('([A-Z]\w+)([A-Z]\w+)', label)
             label = ' '.join([ text.lower() for text in re.split(r'([A-Z][a-z]+)', label) if text != '' ])
         return comment, label
 
     def addClass(self, cls_uri, comment='', label='', super_uri=KNORA_BASE.Resource):
         """Add a class to project_graph.
         """
         if comment == '' and cls_uri.split('#')[1] in self.class_uri_dict:
             comment, label = self.get_comment_label(self.class_uri_dict.get(cls_uri.split('#')[1]))
-        elif comment == '':
-            label = cls_uri.split('#')[1].lower()
-            comment = label 
         self.project_graph.add((cls_uri, RDF.type, OWL.Class))
-        self.project_graph.add((cls_uri, RDFS.comment, Literal(comment, lang='en')))
-        self.project_graph.add((cls_uri, RDFS.label, Literal(label, lang='en')))
+        if comment != '':
+            self.project_graph.add((cls_uri, RDFS.comment, Literal(comment, lang='en')))
+        if label != '':
+            self.project_graph.add((cls_uri, RDFS.label, Literal(label, lang='en')))
         self.project_graph.add((cls_uri, RDFS.subClassOf, super_uri))
 
-    def get_semantic_dictionary_keys_super_first(self, property_dict):
+    def _get_builtin_cls_keys(self, property_dict):
+        """Returns a list of keys for classes that are builtin.
+        """
+        builtin_cls_keys = []
+        for key in property_dict.keys():
+            property_cls = property_dict.get(key).get('class')\
+                    if type(property_dict.get(key)) is dict\
+                    else property_dict.get(key)[0]
+            if property_cls.__module__ == 'builtins':
+                    builtin_cls_keys.append(key)
+        return builtin_cls_keys
+
+    def _get_semantic_dictionary_keys_super_first(self, property_dict):
         """Sorts the keys of the property part of a semantic dictionary 
         and returns the keys for super classes before keys of subclasses.
 
             :return: a sorted list of keys.
         """
-        builtin_cls_keys = [ key for key in property_dict.keys()\
-                if int in property_dict.get(key)\
-                or str in property_dict.get(key)\
-                or float in property_dict.get(key) ]
+        builtin_cls_keys = self._get_builtin_cls_keys(property_dict) 
         complex_cls_keys = []
         for key in [ key for key in property_dict.keys()\
-                if int not in property_dict.get(key)\
-                and str not in property_dict.get(key)\
-                and float not in property_dict.get(key) ]:
-            current_cls = property_dict.get(key)[0]
+                if key not in builtin_cls_keys ]:
+            current_cls = property_dict.get(key).get('class')\
+                    if type(property_dict.get(key)) is dict\
+                    else property_dict.get(key)[0]
             key_inserted = False
             for index, cls_key in enumerate(complex_cls_keys):
-                if issubclass(property_dict.get(cls_key)[0], current_cls):
+                potential_sub_cls = property_dict.get(cls_key).get('class')\
+                    if type(property_dict.get(cls_key)) is dict\
+                    else property_dict.get(cls_key)[0]
+                if issubclass(potential_sub_cls, current_cls):
                     complex_cls_keys.insert(index, key)
                     key_inserted = True
                     break
             if not key_inserted:
                 complex_cls_keys.append(key)
         return builtin_cls_keys + complex_cls_keys
 
     def createClassAndProperties(self, cls):
         """Creates a owl:Class and some owl:ObjectProperty from semantic_dictionary of a python class.
         """
         if not cls.__name__ in self.class_uri_dict:
             self.class_uri_dict.update({cls.__name__: cls})
             semantic_dict = cls.get_semantic_dictionary()
             super_uri = KNORA_BASE.Resource
             if bool(semantic_dict['class'].get('type')):
                 super_cls = semantic_dict['class'].get('type')
                 self.createClassAndProperties(super_cls)
                 super_uri = URIRef(self.base_uriref + '#' + super_cls.__name__)
             cls_uri = URIRef(self.base_uriref + '#' + cls.__name__)
             comment, label = self.get_comment_label(cls) 
             self.addClass(cls_uri, comment, label, super_uri)
             #print('Mapping for {} <- {}'.format(cls_uri, cls))
-            for property_key in self.get_semantic_dictionary_keys_super_first(semantic_dict['properties']):
+            for property_key in self._get_semantic_dictionary_keys_super_first(semantic_dict['properties']):
                 try:
                     if type(semantic_dict['properties'].get(property_key)) == dict:
                         property_dict4key = semantic_dict['properties'].get(property_key)
                         property_cls = property_dict4key.get('class')
                         cardinality = property_dict4key.get('cardinality')
                         info_dict = { key: value for key, value in property_dict4key.items() if key in\
                                 [ 'cardinality_restriction', 'label', 'name', 'xpath' ]}
                         subject_uri, property_uri = self.createProperty(cls_uri, property_key, property_cls, cardinality, info_dict=info_dict)
                     else:
                         property_cls, cardinality, xpath = semantic_dict['properties'].get(property_key)
                         subject_uri, property_uri = self.createProperty(cls_uri, property_key, property_cls, cardinality)
                     if not subject_uri in self.uri_xpath_mapping:
                         self.uri_xpath_mapping.update({ subject_uri: {}})
                     self.uri_xpath_mapping.get(subject_uri).update({property_uri: xpath})
                 except ValueError:
                     raise Exception('Class {} does not have a xpath spec in its get_semantic_dictionary()'.format(cls))
     
     def convert_py2ttl(self, datatypes_dir, target_ontology_file):
         """Convert all classes contained in datatypes_dir that are subclasses of DATATYPES_DIR.class_spec.SemanticClass to rdf.
     
             :return: exit code (int)
         """
         if isdir(datatypes_dir):
             semantic_classes = self.get_semantic_classes(datatypes_dir)
             for cls in semantic_classes:
                 self.createClassAndProperties(cls)
             f = open(target_ontology_file, 'wb+')
             f.write(self.project_graph.serialize(format="turtle"))
             f.close()
             #print(self.uri_xpath_mapping.get(URIRef(self.base_uriref + '#TranskriptionPosition')))
         else:
             print('Error: dir {} does not exist!'.format(datatypes_dir))
             usage
             return 1
         return 0
 
 def usage():
     """prints information on how to use the script
     """
     print(main.__doc__)
 
 def main(argv):
     """This program can be used to convert py classes that are subclasses of <dir>.class_spec.SemanticClass to owl:Class.
 
     py2ttl/py2ttl.py [OPTIONS <dir>] 
 
         <dir>                              [optional] directory containing datatypes that are subclasses of <dir>.class_spec.SemanticClass.
                                             Overwrites DATATYPES_DIR in py2ttl/config.py.
         OPTIONS:
         -h|--help:                          show help
         -s|--source=source_ontology_file    source ontology ttl file, option overwrites PROJECT_ONTOLOGY_FILE in py2ttl/config.py
         -t|--target=target_ontology_file    target ontology ttl file, default: 'PROJECT_PREFIX-ontology_autogenerated.ttl'
 
         :return: exit code (int)
     """
     check_config_files_exist()
     datatypes_dir =  get_datatypes_dir()
     source_ontology_file = PROJECT_ONTOLOGY_FILE 
     target_ontology_file = '.{0}{1}-ontology_autogenerated.ttl'.format(sep, PROJECT_NAME)
     try:
         opts, args = getopt.getopt(argv, "hs:t:", ["help","source=", "target="])
     except getopt.GetoptError:
         usage()
         return 2
     for opt, arg in opts:
         if opt in ('-h', '--help'):
             usage()
             return 0
         elif opt in ('-t', '--target'):
             target_ontology_file = arg
         elif opt in ('-s', '--source'):
             source_ontology_file = arg
 
     converter = Py2TTLConverter(project_ontology_file=source_ontology_file)
     if len(args) < 1 and datatypes_dir is not None:
         return converter.convert_py2ttl(datatypes_dir, target_ontology_file)
     else:
         for datatypes_dir in args:
             if converter.convert_py2ttl(datatypes_dir, target_ontology_file) > 0:
                 return 2
         return 0 if len(args) > 1 else 2
 
 if __name__ == "__main__":
     sys.exit(main(sys.argv[1:]))