Index: svgscripts/datatypes/footnotes.py
===================================================================
--- svgscripts/datatypes/footnotes.py (revision 80)
+++ svgscripts/datatypes/footnotes.py (revision 81)
@@ -1,78 +1,130 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
""" This program can be used to extract footnotes from a svg file.
"""
# Copyright (C) University of Basel 2019 {{{1
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see 1}}}
import getopt
import re
import sys
from os import listdir, sep, path
from os.path import isfile, isdir, dirname
import lxml.etree as ET
__author__ = "Christian Steiner"
__maintainer__ = __author__
__copyright__ = 'University of Basel'
__email__ = "christian.steiner@unibas.ch"
__status__ = "Development"
__license__ = "GPL v3"
__version__ = "0.0.1"
from .matrix import Matrix
+from .standoff_tag import StandoffTag
+from .text import Text
from .transkriptionField import TranskriptionField
UNITTESTING = False
+
def extract_footnotes_as_strings(transkription_field=None, svg_tree=None, svg_file=None, contains_string=''):
"""Returns all footnotes as a list of strings.
"""
if transkription_field is None and svg_file is not None:
transkription_field = TranskriptionField(svg_file)
if svg_tree is None and svg_file is not None:
svg_tree = ET.parse(svg_file)
footnotes = []
nodes_in_footnote_area = [ item for item in filter(lambda x: Matrix.IS_IN_FOOTNOTE_AREA(x.get('transform'), transkription_field),\
svg_tree.getroot().iterfind('.//text', svg_tree.getroot().nsmap))]
bottom_values = sorted([ bottom_value for bottom_value in set(Matrix(transform_matrix_string=item.get('transform')).getY() for item in nodes_in_footnote_area) ])
for bottom_value in bottom_values:
nodes_on_line = [ item for item in nodes_in_footnote_area if Matrix(transform_matrix_string=item.get('transform')).getY() == bottom_value ]
nodes_on_line = sorted(nodes_on_line, key=lambda x: Matrix(transform_matrix_string=x.get('transform')).getX())
footnote_string = ''
for node in nodes_on_line:
if len(node.getchildren()) == 0:
if footnote_string != '' and re.match(r'.*[0-9]+:', node.text):
footnotes.append(footnote_string)
footnote_string = node.text
else:
footnote_string += node.text
else:
next_string = ''.join([ item.text for item in node.findall('tspan', svg_tree.getroot().nsmap)])
if footnote_string != '' and re.match(r'.*[0-9]+:', next_string):
footnotes.append(footnote_string)
footnote_string = next_string
else:
footnote_string += next_string
footnotes.append(footnote_string)
if contains_string != '':
footnotes = [ footnote_string for footnote_string in footnotes if contains_string in footnote_string ]
return footnotes
+def extract_footnotes(page, transkription_field=None, svg_tree=None, svg_file=None, contains_string=''):
+ """Returns all footnotes as a list of Text.
+ """
+ if transkription_field is None and svg_file is not None:
+ transkription_field = TranskriptionField(svg_file)
+ if svg_tree is None and svg_file is not None:
+ svg_tree = ET.parse(svg_file)
+ footnotes = []
+ style_dict = StandoffTag.create_relevant_style_dictionary(page)
+ nodes_in_footnote_area = [ item for item in filter(lambda x: Matrix.IS_IN_FOOTNOTE_AREA(x.get('transform'), transkription_field),\
+ svg_tree.getroot().iterfind('.//text', svg_tree.getroot().nsmap))]
+ bottom_values = sorted([ bottom_value for bottom_value in set(Matrix(transform_matrix_string=item.get('transform')).getY() for item in nodes_in_footnote_area) ])
+ for bottom_value in bottom_values:
+ nodes_on_line = [ item for item in nodes_in_footnote_area if Matrix(transform_matrix_string=item.get('transform')).getY() == bottom_value ]
+ nodes_on_line = sorted(nodes_on_line, key=lambda x: Matrix(transform_matrix_string=x.get('transform')).getX())
+ footnote = None
+ for node in nodes_on_line:
+ footnote = _process_content_and_markup(node, footnote, footnotes, style_dict, svg_tree)
+ if footnote is not None:
+ footnotes.append(footnote)
+ if contains_string != '':
+ footnotes = [ footnote for footnote in footnotes if contains_string in footnote.content ]
+ return footnotes
+
+def _process_content_and_markup(node, footnote, footnotes, style_dict, svg_tree):
+ startIndex = 0
+ next_text = node.text
+ if len(node.getchildren()) > 0:
+ next_text = ''.join([ item.text for item in node.findall('tspan', svg_tree.getroot().nsmap)])
+ if footnote != None and re.match(r'.*[0-9]+:', next_text):
+ footnotes.append(footnote)
+ footnote = None
+ if len(node.getchildren()) > 0:
+ for item in node.findall('tspan', svg_tree.getroot().nsmap):
+ footnote = _process_content_and_markup(item, footnote, footnotes, style_dict, svg_tree)
+ else:
+ if footnote is None:
+ footnote = Text(content=next_text)
+ else:
+ startIndex = footnote.append(next_text)
+ if bool(node.get('class')):
+ standoff_markups = StandoffTag.create_cls(startIndex, len(footnote.content), node.get('class'), style_dict=style_dict)
+ if len(standoff_markups) > 0:
+ if len(footnote.standoff_markups) > 0:
+ standoff_markups = footnote.standoff_markups[-1].join_list(standoff_markups)
+ if len(standoff_markups) > 0:
+ footnote.standoff_markups += standoff_markups
+ return footnote
+
if __name__ == "__main__":
sys.exit(main(sys.argv[1:]))
Index: svgscripts/datatypes/text.py
===================================================================
--- svgscripts/datatypes/text.py (revision 0)
+++ svgscripts/datatypes/text.py (revision 81)
@@ -0,0 +1,71 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+
+""" This class can be used to represent a text that may have standoff markup.
+"""
+# Copyright (C) University of Basel 2020 {{{1
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program. If not, see 1}}}
+
+__author__ = "Christian Steiner"
+__maintainer__ = __author__
+__copyright__ = 'University of Basel'
+__email__ = "christian.steiner@unibas.ch"
+__status__ = "Development"
+__license__ = "GPL v3"
+__version__ = "0.0.1"
+
+from lxml import etree as ET
+import re
+import sys
+
+from .standoff_tag import StandoffTag
+
+sys.path.append('py2ttl')
+from class_spec import SemanticClass
+
+
+class Text(SemanticClass):
+ """
+ This class represents a text that may have standoff markup.
+ """
+
+ def __init__(self, content: str, standoff_markups=None):
+ self.content = content
+ self.standoff_markups = standoff_markups\
+ if standoff_markups is not None\
+ else []
+
+ def append(self, content: str) -> int:
+ """Extend text with content.
+
+ [:return:] startIndex of appended content
+ """
+ startIndex = len(self.content)
+ self.content += content
+ return startIndex
+
+ @classmethod
+ def get_semantic_dictionary(cls):
+ """ Creates a semantic dictionary as specified by SemanticClass.
+ """
+ properties = {}
+ properties.update(cls.create_semantic_property_dictionary('content', str, cardinality=1,\
+ name='textHasContent', label='content of text', comment='Connects a text with its content.'))
+ properties.update(cls.create_semantic_property_dictionary('standoff_markups', list,\
+ name='textHasMarkup', label='standoff tag of text', comment='Connects a text with a list of standoff tags.'))
+ dictionary = { cls.CLASS_KEY: cls.get_class_dictionary(), cls.PROPERTIES_KEY: properties }
+ return cls.return_dictionary_after_updating_super_classes(dictionary)
+
+
Index: svgscripts/datatypes/standoff_tag.py
===================================================================
--- svgscripts/datatypes/standoff_tag.py (revision 0)
+++ svgscripts/datatypes/standoff_tag.py (revision 81)
@@ -0,0 +1,114 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+
+""" This class can be used to represent the standoff markup of a text.
+"""
+# Copyright (C) University of Basel 2020 {{{1
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program. If not, see 1}}}
+
+__author__ = "Christian Steiner"
+__maintainer__ = __author__
+__copyright__ = 'University of Basel'
+__email__ = "christian.steiner@unibas.ch"
+__status__ = "Development"
+__license__ = "GPL v3"
+__version__ = "0.0.1"
+
+from lxml import etree as ET
+import re
+import sys
+
+
+sys.path.append('py2ttl')
+from class_spec import SemanticClass
+
+
+class StandoffTag(SemanticClass):
+ """
+ This class represents the standoff markup of a text.
+ """
+ RELEVANT_STYLE_KEY = 'font-family'
+ RELEVANT_CONTENT_STARTSWITH = 'Frutiger-'
+ RELEVANT_PATTERN = re.compile('.*(Italic|Bold)$')
+ RELEVANT_SUB_PATTERN = re.compile('Frutiger-(Light)*')
+
+ def __init__(self, markup: str, startIndex: int, endIndex: int):
+ self.markup = markup
+ self.startIndex = startIndex
+ self.endIndex = endIndex
+
+ @classmethod
+ def create_cls(cls, start_index, end_index, style_string, page=None, style_dict=None):
+ """Creates a StandoffTag from a style_string.
+
+ :return: a list of (datatypes.standoff_tag) StandoffTag
+ """
+ if page is not None:
+ style_dict = cls.create_relevant_style_dictionary(page)
+ relevant_keys = [ key for key in set(style_string.split(' '))\
+ if key in style_dict.keys() ]
+ standoff_tags = []
+ if style_dict is None or len(style_dict) == 0:
+ return standoff_tags
+ for relevant_key in relevant_keys:
+ font_family = style_dict[relevant_key][cls.RELEVANT_STYLE_KEY]
+ if re.match(cls.RELEVANT_PATTERN, font_family):
+ markup = re.sub(cls.RELEVANT_SUB_PATTERN, '', font_family).lower()
+ standoff_tags.append(cls(markup, start_index, end_index))
+ return standoff_tags
+
+ @classmethod
+ def create_relevant_style_dictionary(cls, page):
+ """Return a style dictionary that contains only relevant keys and contents.
+ """
+ return { key: key_dict for key, key_dict in page.style_dict.items()\
+ if cls.RELEVANT_STYLE_KEY in key_dict.keys()\
+ and key_dict[cls.RELEVANT_STYLE_KEY].startswith(cls.RELEVANT_CONTENT_STARTSWITH) }
+
+ @classmethod
+ def get_semantic_dictionary(cls):
+ """ Creates a semantic dictionary as specified by SemanticClass.
+ """
+ properties = {}
+ properties.update(cls.create_semantic_property_dictionary('markup', str, cardinality=1,\
+ name='standoffTagHasMarkup', label='standoff tag has a specific markup', comment='Connects a standoff tag with its markup, e.g. bold or italic'))
+ properties.update(cls.create_semantic_property_dictionary('startIndex', int, cardinality=1,\
+ name='standoffTagHasStartIndex', label='standoff tag has a start index', comment='Connects a standoff tag with its start index.'))
+ properties.update(cls.create_semantic_property_dictionary('endIndex', int, cardinality=1,\
+ name='standoffTagHasEndIndex', label='standoff tag has a end index', comment='Connects a standoff tag with its end index.'))
+ dictionary = { cls.CLASS_KEY: cls.get_class_dictionary(), cls.PROPERTIES_KEY: properties }
+ return cls.return_dictionary_after_updating_super_classes(dictionary)
+
+
+ def is_joinable(self, other):
+ """Return true if self and other have same markup and self.endIndex == other.startIndex.
+ """
+ return self.markup == other.markup and self.endIndex == other.startIndex
+
+ def join(self, other):
+ """Join self with other.
+ """
+ self.endIndex = other.endIndex
+
+ def join_list(self, others):
+ """Join all others that are joinable, return remaining others as a list.
+ """
+ unjoinable_others = []
+ for other in others:
+ if self.is_joinable(other):
+ self.join(other)
+ else:
+ unjoinable_others.append(other)
+ return unjoinable_others
Index: svgscripts/extract_footnotes.py
===================================================================
--- svgscripts/extract_footnotes.py (revision 80)
+++ svgscripts/extract_footnotes.py (revision 81)
@@ -1,83 +1,90 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
""" This program can be used to extract footnotes from a svg file.
"""
# Copyright (C) University of Basel 2019 {{{1
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see 1}}}
import getopt
import re
import sys
from os import listdir, sep, path
from os.path import isfile, isdir, dirname
import lxml.etree as ET
__author__ = "Christian Steiner"
__maintainer__ = __author__
__copyright__ = 'University of Basel'
__email__ = "christian.steiner@unibas.ch"
__status__ = "Development"
__license__ = "GPL v3"
__version__ = "0.0.1"
from datatypes.matrix import Matrix
from datatypes.page import Page
from datatypes.transkriptionField import TranskriptionField
-from datatypes.footnotes import extract_footnotes_as_strings
+from datatypes.footnotes import extract_footnotes, extract_footnotes_as_strings
def usage():
"""prints information on how to use the script
"""
print(main.__doc__)
def main(argv):
"""This program can be used to extract footnotes from a svg file.
- svgscripts/extract_footnotes.py [OPTIONS]
+ svgscripts/extract_footnotes.py [OPTIONS]
- a svg text file.
a xml file containing information about the position of the svg words.
OPTIONS:
-h|--help: show help
:return: exit code (int)
"""
try:
opts, args = getopt.getopt(argv, "h", ["help"])
except getopt.GetoptError:
usage()
return 2
for opt, arg in opts:
if opt in ('-h', '--help'):
usage()
return 0
if len(args) < 1:
usage()
return 2
exit_status = 0
svg_file = args[0]
if svg_file.endswith('xml'):
page = Page(svg_file)
svg_file = page.source
- footnotes = extract_footnotes_as_strings(svg_file=svg_file)
- print(footnotes)
+ else:
+ usage()
+ return 2
+ #footnotes = extract_footnotes_as_strings(svg_file=svg_file)
+ #print(footnotes)
+ footnotes = extract_footnotes(page, svg_file=svg_file)
+ for footnote in footnotes:
+ print(footnote.content)
+ for markup in footnote.standoff_markups:
+ print(f'->{markup.markup}, start:{markup.startIndex}, end:{markup.endIndex}')
return exit_status
if __name__ == "__main__":
sys.exit(main(sys.argv[1:]))
Index: tests_svgscripts/test_standoff_tag.py
===================================================================
--- tests_svgscripts/test_standoff_tag.py (revision 0)
+++ tests_svgscripts/test_standoff_tag.py (revision 81)
@@ -0,0 +1,46 @@
+import unittest
+from os import sep, path
+from os.path import dirname, basename, isfile, isdir
+import lxml.etree as ET
+import sys
+
+sys.path.append('svgscripts')
+from datatypes.page import Page
+from datatypes.standoff_tag import StandoffTag
+
+class TestStandoffTag(unittest.TestCase):
+ def setUp(self):
+ DATADIR = dirname(__file__) + sep + 'test_data'
+ if not isdir(DATADIR):
+ DATADIR = dirname(dirname(__file__)) + sep + 'test_data'
+ self.test_file = DATADIR + sep + 'test.xml'
+ self.test_svg_file = DATADIR + sep + 'test421.svg'
+ self.pdf_xml = DATADIR + sep + 'W_I_8_page125.xml'
+ self.xml_file = DATADIR + sep + 'N_VII_1_page005.xml'
+ self.xml_fileB = DATADIR + sep + 'N_VII_1_page006.xml'
+ self.pdf_xml_source = DATADIR + sep + 'W_I_8_neu_125-01.svg'
+ self.test_page = DATADIR + sep + 'N_VII_1_page001.xml'
+ self.test_manuscript = DATADIR + sep + 'N_VII_1.xml'
+
+ def test_create_cls(self):
+ page = Page(self.test_page)
+ style_string = "st2 st6"
+ standoff_tags = StandoffTag.create_cls(0, 0, style_string, page=page)
+ self.assertEqual(len(standoff_tags), 1)
+ self.assertEqual(standoff_tags[0].markup, 'italic')
+ style_string = "st8 st6"
+ standoff_tags = StandoffTag.create_cls(0, 0, style_string, page=page)
+ self.assertEqual(len(standoff_tags), 1)
+ self.assertEqual(standoff_tags[0].markup, 'bold')
+
+ def test_create_relevant_dict(self):
+ page = Page(self.test_page)
+ relevant_dict = StandoffTag.create_relevant_style_dictionary(page)
+ self.assertEqual(len(relevant_dict), 4)
+
+ def test_semantic(self):
+ pass
+ #print(StandoffTag.get_semantic_dictionary())
+
+if __name__ == "__main__":
+ unittest.main()
Index: tests_svgscripts/test_data/N_VII_1_page006.xml
===================================================================
--- tests_svgscripts/test_data/N_VII_1_page006.xml (revision 80)
+++ tests_svgscripts/test_data/N_VII_1_page006.xml (revision 81)
@@ -1,1276 +1,1276 @@
svgWordPosition2019-08-02 15:17:372019-08-02 15:17:372019-08-02 15:30:592019-08-02 15:30:59
- 2019-12-20 11:38:48
+ 2020-01-08 09:49:28
Index: tests_svgscripts/test_footnotes.py
===================================================================
--- tests_svgscripts/test_footnotes.py (revision 80)
+++ tests_svgscripts/test_footnotes.py (revision 81)
@@ -1,33 +1,36 @@
import unittest
from os import sep, path, remove
from os.path import isdir, isfile, dirname
import shutil
import sys
import lxml.etree as ET
import warnings
import sys
sys.path.append('svgscripts')
-from datatypes.footnotes import extract_footnotes_as_strings, UNITTESTING
+from datatypes.footnotes import extract_footnotes, extract_footnotes_as_strings, UNITTESTING
from datatypes.page import Page
from datatypes.positional_word_part import PositionalWordPart
from datatypes.transkriptionField import TranskriptionField
class TestExtractFootnotes(unittest.TestCase):
def setUp(self):
UNITTESTING = True
DATADIR = path.dirname(__file__) + sep + 'test_data'
self.test_footnote = DATADIR + sep + 'W_I_8_neu_125-01.svg'
self.test_footnote_verso = DATADIR + sep + 'N_VII_1_xp5_4_page5.svg'
self.test_footnote_recto = DATADIR + sep + 'N_VII_1_xp5_4_page6.svg'
self.test_footnote_multi = DATADIR + sep + 'N_VII_1_xp5_4_page13.svg'
+ self.test_footnote_multi_xml = DATADIR + sep + 'N_VII_1_page013.xml'
def test_extract_footnotes(self):
footnotes = extract_footnotes_as_strings(svg_file=self.test_footnote_multi, contains_string='Anschlußzeichen')
+ self.assertEqual(len(footnotes), 4)
+ page = Page(self.test_footnote_multi_xml)
+ footnotes = extract_footnotes(page, svg_file=self.test_footnote_multi, contains_string='Anschlußzeichen')
#print(footnotes)
self.assertEqual(len(footnotes), 4)
-
if __name__ == "__main__":
unittest.main()
Index: tests_svgscripts/test_text.py
===================================================================
--- tests_svgscripts/test_text.py (revision 0)
+++ tests_svgscripts/test_text.py (revision 81)
@@ -0,0 +1,30 @@
+import unittest
+from os import sep, path
+from os.path import dirname, basename, isfile, isdir
+import lxml.etree as ET
+import sys
+
+sys.path.append('svgscripts')
+from datatypes.page import Page
+from datatypes.text import Text
+
+class TestText(unittest.TestCase):
+ def setUp(self):
+ DATADIR = dirname(__file__) + sep + 'test_data'
+ if not isdir(DATADIR):
+ DATADIR = dirname(dirname(__file__)) + sep + 'test_data'
+ self.test_file = DATADIR + sep + 'test.xml'
+ self.test_svg_file = DATADIR + sep + 'test421.svg'
+ self.pdf_xml = DATADIR + sep + 'W_I_8_page125.xml'
+ self.xml_file = DATADIR + sep + 'N_VII_1_page005.xml'
+ self.xml_fileB = DATADIR + sep + 'N_VII_1_page006.xml'
+ self.pdf_xml_source = DATADIR + sep + 'W_I_8_neu_125-01.svg'
+ self.test_page = DATADIR + sep + 'N_VII_1_page001.xml'
+ self.test_manuscript = DATADIR + sep + 'N_VII_1.xml'
+
+ def test_semantic(self):
+ pass
+ #print(Text.get_semantic_dictionary())
+
+if __name__ == "__main__":
+ unittest.main()
Index: TODO.md
===================================================================
--- TODO.md (revision 80)
+++ TODO.md (revision 81)
@@ -1,98 +1,96 @@
# Wortsuche:
- Die Wortsuche sollte über die topologische Nähe der Wörter zueinander gewichtet werden.
- Wortpfade, d.h. Abfolgen der Wörter sollen vermieden werden, da dies nicht automatisch generiert werden kann und
höchst fehleranfällig ist.
- Daher sollen die Worteinfügungen auch nicht dafür verwendet werden, alternative Textverläufe aufzuzeichnen.
# TODO
## Faksimile data input
- word boxes on faksimile by drawing rects with inkscape [IN PROGRESS, see "Leitfaden.pdf"]
- naming word boxes by using title of rects [IN PROGRESS, see "Leitfaden\_Kontrolle\_und\_Beschriftung\_der\_Wortrahmen.pdf"]
-- splitting word box if a word has parts by drawing a vertical path in rect [TODO]
+- correcting faksimile svg or transkription xml if words do not correspond
## Processing
### faksimile data input, i.e. svg-file resulting from drawing boxes etc. with inkscape
- process faksimile words:
- join\_faksimileAndTranskription.py [DONE]
- - create a data input task for words that have parts: [TODO]
- - create pdf marking relevant words and their parts of transkription
- - create faksimile svg highlighting relevant rects
- - copy pdf and svg to designated folder for this task
+ - create a data input task for words that do not correspond [DONE]
### transkription, i.e. svg-file resulting from pdf-file ->created with InDesign
- process text field:
- Word [DONE]
- SpecialWord
- MarkForeignHands [DONE]
- TextConnectionMark [DONE]
- WordInsertionMark [DONE]
- all paths -> page.categorize\_paths [TODO]
- word-deletion -> Path [DONE]
- - make parts of word if only parts of a word are deleted, also introduce earlier version of word [TODO]
+ - make parts of word if only parts of a word are deleted, also introduce earlier version of word [DONE]
+ - correction concerning punctuations in words that are deleted, script does not recognize parts of deleted
+ words as deleted if they consist of punctuation marks. [TODO]
- word-undeletion (e.g. N VII 1, 18,6 -> "mit")
- underline
- text-area-deletion
- text-connection-lines
- boxes
- process footnotes:
+ - Return footnotes with styles [TODO]+
- TextConnectionMark [DONE]
- TextConnection with uncertainty [TODO]
- "Fortsetzung [0-9]+,[0-9]+?"
- "Fortsetzung von [0-9]+,[0-9]+?"
- concerning Word:
- - uncertain transcription: "?"
+ - uncertain transcription: "?" / may have bold word parts
- atypical writting: "¿" and bold word parts
- clarification corrections ("Verdeutlichungskorrekturen"): "Vk" and bold word parts
- correction: "word>" and ">?" (with uncertainty)
- concerning word deletion:
- atypical writting: "¿" and "Durchstreichung" (see N VII 1, 11,2)
- process margins:
- MarkForeignHands [DONE]
- ForeignHandTextAreaDeletion [TODO]
- boxes: make earlier version of a word [TODO]
- TextConnection [TODO]
- from: ([0-9]+,)*[0-9]+ -)
- to: -) ([0-9]+,)*[0-9]+
## Datatypes
- make datatypes:
- Page [ok] --> page orientation!!!
- SimpleWord
- SpecialWord
- MarkForeignHands ("Zeichen für Fremde Hand") [DONE]
- TextConnectionMark ("Anschlußzeichen") [DONE]
- has a Reference
- - Word [ok] --> deal with non-horizontal text <<<< DONE!
- --> hyphenation
- --> add style info to word: font { German, Latin }
- --> pen color
+ - Word [ok] --> deal with non-horizontal text [DONE]
+ --> hyphenation [TODO]
+ --> add style info to word: font { German, Latin } [DONE]
+ --> pen color [DONE]
--> connect style with character glyph-id from svg path file
- --> handle word layers, i.e. later correction of words by insertion
- --> has parts [TODO]
- --> versions: later version of earlier version
- - WritingProcess
+ --> has parts [DONE]
+ --> versions: later version of earlier version [DONE]
+ - WritingProcess >>>> use only in connection with earlier versions of word
- correlates with font size:
- biggest font to biggest-1 font: stage 0
- font in between: stage 1
- smallest font to smallest+1 font: stage 2
- - Style
- - TODO: howto handle style_class in rdf? (as JSON?)
+ - Style [DONE]
- WordPosition [ok]
- TranskriptionPosition [ok]
- FaksimilePosition [ok]
- LineNumber [reDo]
- change to Line
- Reference [TODO]+
- TextConnection
- needs change of LineNumber to Line
- ForeignHandTextAreaDeletion [TODO]
- Freehand:
- Deletion [DONE]
- - make parts of word if only parts of a word are deleted, also introduce earlier version of word [TODO]
+ - make parts of word if only parts of a word are deleted, also introduce earlier version of word [DONE]
- WordInsertionMark [reDO]
- Underline [TODO]