Index: svgscripts/datatypes/page.py
===================================================================
--- svgscripts/datatypes/page.py	(revision 12)
+++ svgscripts/datatypes/page.py	(revision 13)
@@ -1,238 +1,236 @@
 #!/usr/bin/env python3
 # -*- coding: utf-8 -*-
 
 """   This class can be used to represent a page.
 """
 #    Copyright (C) University of Basel 2019  {{{1
 #
 #    This program is free software: you can redistribute it and/or modify
 #    it under the terms of the GNU General Public License as published by
 #    the Free Software Foundation, either version 3 of the License, or
 #    (at your option) any later version.
 #
 #    This program is distributed in the hope that it will be useful,
 #    but WITHOUT ANY WARRANTY; without even the implied warranty of
 #    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 #    GNU General Public License for more details.
 #
 #    You should have received a copy of the GNU General Public License
 #    along with this program.  If not, see <https://www.gnu.org/licenses/> 1}}}
 
 __author__ = "Christian Steiner"
 __maintainer__ = __author__
 __copyright__ = 'University of Basel'
 __email__ = "christian.steiner@unibas.ch"
 __status__ = "Development"
 __license__ = "GPL v3"
 __version__ = "0.0.1"
 
 from lxml import etree as ET
 from os.path import isfile
 
 #from myxmlwriter import write_pretty
 from .class_spec import SemanticClass
 from .image import Image
 from .word import Word
 from .lineNumber import LineNumber
 from .word_insertion_mark import WordInsertionMark
 from .transkriptionField import TranskriptionField
 
 class Page(SemanticClass):
     """
     This class represents a page.
 
     Args:
         xml_source_file (str): name of the xml file to be instantiated.
         xml_target_file (str): name of the xml file to which page info will be written.
 
     """
-    class_dictionary = {}
+    RDF_SUBCLASSES = ['http://www.knora.org/ontology/0000/information-carrier#Page']
+
     def __init__(self, xml_source_file=None, xml_target_file=None, title=None, page_number=None, pdfFile=None, svg_file=None, extract_transkription_field_only=False):
         self.title = title
         self.line_numbers = []
         self.style_dict = {}
         self.sonderzeichen_list = []
         self.svg_file = None
         self.pdfFile = None
         self.source = None
         self.number = int(page_number) if page_number is not None else -1
         if xml_source_file is not None:
             if isfile(xml_source_file):
                 parser = ET.XMLParser(remove_blank_text=True)
                 self.page_tree = ET.parse(xml_source_file, parser)
                 self.title = self.page_tree.getroot().get('title')
                 self.number = self.page_tree.getroot().get('number')
                 self.source = self.page_tree.getroot().get('source')
                 self.init_words() 
                 self.add_style(style_node=self.page_tree.getroot().find('.//style'))
                 self.svg_file = self.page_tree.xpath('.//svg/@file')[0]\
                         if len(self.page_tree.xpath('.//svg/@file')) > 0 else None
                 self.width = float(self.page_tree.xpath('.//svg/@width')[0])\
                         if len(self.page_tree.xpath('.//svg/@width')) > 0 else 0.0
                 self.height = float(self.page_tree.xpath('.//svg/@height')[0])\
                         if len(self.page_tree.xpath('.//svg/@height')) > 0 else 0.0
                 self.pdfFile = self.page_tree.xpath('.//pdf/@file')[0]\
                         if len(self.page_tree.xpath('.//pdf/@file')) > 0 else None
                 if pdfFile is not None and self.pdfFile is None:
                     self.pdfFile = pdfFile
                     ET.SubElement(self.page_tree.getroot(), 'pdf', attrib={'file': self.pdfFile})
                     #write_pretty(xml_element_tree=self.page_tree, file_name=xml_source_file, script_name=__file__, file_type='svgWordPosition')
                 if svg_file is not None and self.svg_file is None:
                     self.svg_file = svg_file
                     tf = TranskriptionField(svg_file)
                     self.width = round(tf.documentWidth, 3)
                     self.height = round(tf.documentHeight, 3)
                     ET.SubElement(self.page_tree.getroot(), 'svg', attrib={'width': str(self.width), 'height': str(self.height), 'file': self.svg_file})
                     #write_pretty(xml_element_tree=self.page_tree, file_name=xml_source_file, script_name=__file__, file_type='svgWordPosition')
             else:
                 raise Exception('File "{}" does not exist!'.format(xml_source_file))
         elif xml_target_file is not None:
             self.word_insertion_marks = []
             self.words = []
             self.svg_file = svg_file
             self.pdfFile = pdfFile
             if isfile(xml_target_file):
                 parser = ET.XMLParser(remove_blank_text=True)
                 self.page_tree = ET.parse(xml_target_file, parser)
                 self.source = self.page_tree.getroot().get('source')
                 if bool(self.page_tree.getroot().get('title')):
                     self.title = self.page_tree.getroot().get('title')
                 elif title is not None:
                     self.page_tree.getroot().set('title', title)
                 if self.svg_file is None:
                     self.svg_file = self.page_tree.xpath('.//svg/@file')[0]\
                             if len(self.page_tree.xpath('.//svg/@file')) > 0 else None
                     self.width = float(self.page_tree.xpath('.//svg/@width')[0])\
                             if len(self.page_tree.xpath('.//svg/@width')) > 0 else 0.0
                     self.height = float(self.page_tree.xpath('.//svg/@height')[0])\
                             if len(self.page_tree.xpath('.//svg/@height')) > 0 else 0.0
                 elif len(self.page_tree.xpath('.//svg/@file')) == 0:
                     tf = TranskriptionField(svg_file)
                     self.width = round(tf.documentWidth, 3)
                     self.height = round(tf.documentHeight, 3)
                     ET.SubElement(self.page_tree.getroot(), 'svg', attrib={'width': str(self.width), 'height': str(self.height), 'file': self.svg_file})
                 else:
                     self.width = float(self.page_tree.xpath('.//svg/@width')[0])\
                             if len(self.page_tree.xpath('.//svg/@width')) > 0 else 0.0
                     self.height = float(self.page_tree.xpath('.//svg/@height')[0])\
                             if len(self.page_tree.xpath('.//svg/@height')) > 0 else 0.0
                 if self.pdfFile is None:
                     self.pdfFile = self.page_tree.xpath('.//pdf/@file')[0]\
                         if len(self.page_tree.xpath('.//pdf/@file')) > 0 else None
                 elif len(self.page_tree.xpath('.//pdf/@file')) == 0:
                     ET.SubElement(self.page_tree.getroot(), 'pdf', attrib={'file': self.pdfFile})
                 for xpath2remove in [ 'word', 'style', 'freehand', LineNumber.XML_TAG() ]:
                     for node in self.page_tree.xpath('//' + xpath2remove):
                         node.getparent().remove(node)
             else:
                 self.page_tree = ET.ElementTree(ET.Element('page'))
                 self.pdfFile = pdfFile
                 self.svg_file = svg_file
                 if title is not None:
                     self.page_tree.getroot().set('title', title)
                 self.page_tree.getroot().set('transkription-field-only', str(extract_transkription_field_only).lower())
                 if page_number is not None:
                     self.page_tree.getroot().set('number', str(page_number))
                 if self.pdfFile is not None:
                     ET.SubElement(self.page_tree.getroot(), 'pdf', attrib={'file': self.pdfFile})
                 if self.svg_file is not None:
                     tf = TranskriptionField(self.svg_file)
                     self.width = round(tf.documentWidth, 3)
                     self.height = round(tf.documentHeight, 3)
                     ET.SubElement(self.page_tree.getroot(), 'svg', attrib={'width': str(self.width), 'height': str(self.height), 'file': self.svg_file})
         self.svg_image = Image(file_name=self.svg_file, height=self.height, width=self.width) if self.svg_file is not None\
                 else None
-        self.create_semantic_dictionary(Page.class_dictionary)
 
     def init_line_numbers(self, line_numbers, document_bottom):
         """Init line numbers.
         """
         even_index = 0 
         MINABOVE = 1
         self.line_numbers = []
         if len(line_numbers) > 0:
             first_line_bottom = line_numbers[even_index].top - MINABOVE
             self.line_numbers.append(LineNumber(id=1, top=0, bottom=first_line_bottom))
             self.line_numbers.append(line_numbers[even_index])
             even_index += 1
             while even_index < len(line_numbers):
                 self.line_numbers.append(LineNumber(id=line_numbers[even_index].id-1,\
                         top=line_numbers[even_index-1].bottom+MINABOVE,\
                         bottom=line_numbers[even_index].top-MINABOVE))
                 self.line_numbers.append(line_numbers[even_index])
                 even_index += 1
             self.line_numbers.append(LineNumber(id=line_numbers[even_index-1].id+1,\
                     top=line_numbers[even_index-1].bottom+MINABOVE,\
                     bottom=document_bottom))
             for line_number in self.line_numbers:
                 line_number.attach_object_to_tree(self.page_tree)
     
     def init_words(self):
         self.word_insertion_marks = [ WordInsertionMark(wim_node=wim_node) for wim_node in self.page_tree.getroot().xpath('//' + WordInsertionMark.XML_TAG()) ]
         self.words = [ Word.CREATE_WORD(word_node=word_node) for word_node in self.page_tree.getroot().xpath('//word') ]
         self.line_numbers = [ LineNumber(xml_text_node=line_number_node) for line_number_node in self.page_tree.getroot().xpath('//' + LineNumber.XML_TAG()) ]
         for index, word in enumerate(self.words):
             for word_insertion_mark in self.word_insertion_marks:
                 self.words[index] = word_insertion_mark.attach_and_update_word_if_involved(word)
                 if self.words[index] != word:
                     break
 
     def add_style(self, sonderzeichen_list=[], letterspacing_list=[], style_dict={}, style_node=None):
         """Adds a list of classes that are sonderzeichen and a style dictionary to page.
         """
         self.sonderzeichen_list = sonderzeichen_list
         self.letterspacing_list = letterspacing_list
         self.style_dict = style_dict
         if style_node is not None:
             self.style_dict = { item.get('name'): { key: value for key, value in item.attrib.items() if key != 'name' } for item in style_node.findall('.//class') }
             self.sonderzeichen_list = [ item.get('name') for item in style_node.findall('.//class')\
                     if bool(item.get('font-family')) and 'Sonderzeichen' in item.get('font-family') ]
             self.letterspacing_list = [ item.get('name') for item in style_node.findall('.//class')\
                     if bool(item.get('letterspacing-list')) ]
         elif bool(self.style_dict):
             style_node = ET.SubElement(self.page_tree.getroot(), 'style')
             if len(self.sonderzeichen_list) > 0:
                 style_node.set('Sonderzeichen', ' '.join(self.sonderzeichen_list))
             if len(self.letterspacing_list) > 0:
                 style_node.set('letterspacing-list', ' '.join(self.letterspacing_list))
             for key in self.style_dict.keys():
                 self.style_dict[key]['name'] = key
                 ET.SubElement(style_node, 'class', attrib=self.style_dict[key])
 
     def get_biggest_fontSize4styles(self, style_set={}):
         """Returns biggest font size from style_dict for a set of style class names.
 
             [:returns:] (float) biggest font size OR 1 if style_dict is empty
         """
         if bool(self.style_dict):
             sorted_font_sizes = sorted( (float(self.style_dict[key]['font-size'].replace('px','')) for key in style_set if bool(self.style_dict[key].get('font-size'))), reverse=True)
             return sorted_font_sizes[0] if len(sorted_font_sizes) > 0 else 1
         else:
             return 1
 
     def get_line_number(self, y):
         """Returns line number id for element at y.
 
             [:return:] (int) line number id or -1
         """
         if len(self.line_numbers) > 0:
             result_list = [ line_number.id for line_number in self.line_numbers if y >= line_number.top and y <= line_number.bottom ]
             return result_list[0] if len(result_list) > 0 else -1
         else:
             return -1
 
-    def create_semantic_dictionary(self, dictionary):
+    @classmethod
+    def get_semantic_dictionary(cls):
         """ Creates a semantic dictionary as specified by SemanticClass.
         """
-        if len(dictionary) == 0:
-            class_dict = self.get_class_dictionary()
-            if self.__class__ == Page:
-                class_dict.update({'rdf:subClassOf': 'http://www.knora.org/ontology/0000/information-carrier#Page'})
-            properties = {'title': (str, 1), 'number': (str, 1), 'line_numbers': (LineNumber, SemanticClass.LIST), 'words': (Word, SemanticClass.LIST),\
-                    'svg_image': (Image, 1), 'word_insertion_marks': (WordInsertionMark, SemanticClass.LIST)}
-            dictionary.update({'class': class_dict})
-            dictionary.update({'properties': properties})
-
-    @staticmethod
-    def get_semantic_dictionary():
-        return __class__.class_dictionary
+        dictionary = {}
+        class_dict = cls.get_class_dictionary()
+        properties = {'title': (str, 1), 'number': (str, 1), 'line_numbers': (LineNumber, SemanticClass.LIST), 'words': (Word, SemanticClass.LIST),\
+                'svg_image': (Image, 1), 'word_insertion_marks': (WordInsertionMark, SemanticClass.LIST)}
+        dictionary.update({'class': class_dict})
+        dictionary.update({'properties': properties})
+        return dictionary
+
+