Page MenuHomec4science

mark_foreign_hands.py
No OneTemporary

File Metadata

Created
Wed, May 8, 09:20

mark_foreign_hands.py

#!/usr/bin/env python3
# -*- coding: utf-8 -*-
""" This class can be used to represent the mark for text by some foreign hand.
"""
# Copyright (C) University of Basel 2019 {{{1
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <https://www.gnu.org/licenses/> 1}}}
__author__ = "Christian Steiner"
__maintainer__ = __author__
__copyright__ = 'University of Basel'
__email__ = "christian.steiner@unibas.ch"
__status__ = "Development"
__license__ = "GPL v3"
__version__ = "0.0.1"
from lxml import etree as ET
from .matrix import Matrix
from .special_word import SpecialWord
class MarkForeignHands(SpecialWord):
"""
This class represents the mark for text by some foreign hand.
"""
XML_TAG = 'mark-foreign-hands'
XML_SUB_TAG = 'text'
CLASS_MARK = '$'
REPLACE_DICT = { '+': 'x' }
def __init__(self, id=0, line_number=-1, text=CLASS_MARK, foreign_hands_text='', pen='', transkription_positions=[], faksimile_positions=[]):
super(MarkForeignHands, self).__init__(id=id, text=text, line_number=line_number,\
transkription_positions=transkription_positions, faksimile_positions=faksimile_positions)
self.foreign_hands_text = foreign_hands_text
self.pen = pen
def add_content(self, node):
"""Adds content to MarkForeignHands.
"""
self.foreign_hands_text = node.text
self.pen = node.get('pen')
def attach_word_to_tree(self, target_tree):
"""Attaches MarkForeignHands to tree target_tree.
"""
node = super(MarkForeignHands,self).attach_word_to_tree(target_tree)
if self.foreign_hands_text != '':
content_node = ET.SubElement(node, MarkForeignHands.XML_SUB_TAG)
content_node.text = self.foreign_hands_text
if self.pen != '':
content_node.set('pen', self.pen)
@classmethod
def get_semantic_dictionary(cls):
""" Creates a semantic dictionary as specified by SemanticClass.
"""
dictionary = super(MarkForeignHands,cls).get_semantic_dictionary()
dictionary['properties'].update({'foreign_hands_text':\
{'class': str, 'cardinality': 1, 'xpath': '{0}/{1}/text()'.format(cls.XML_TAG, MarkForeignHands.XML_SUB_TAG),\
'name': 'textOfForeignHands', 'label': 'text traces of some foreign hand'}})
dictionary['properties'].update({'pen':\
{'class': str, 'cardinality': 1, 'cardinality_restriction': 'maxCardinality',\
'xpath': '{0}/{1}/@pen'.format(cls.XML_TAG, MarkForeignHands.XML_SUB_TAG),\
'name': 'penOfForeignHands', 'label': 'pen used to write text by some foreign hand'}})
return dictionary
@classmethod
def get_special_char_list(cls):
"""Returns a list of the chars that define this special word.
"""
return [ cls.CLASS_MARK ]
@staticmethod
def find_content(list_of_special_words, transkription_field, svg_tree, style_dict={}, italic_classes=[], SonderzeichenList=[]):
"""Find content for the MarkForeignHands.
"""
if len(style_dict) > 0:
if len(italic_classes) == 0:
italic_classes = [ key for key in style_dict\
if bool(style_dict[key].get('font-family')) and style_dict[key]['font-family'].endswith('Italic') ]
if len(SonderzeichenList) == 0:
SonderzeichenList = [ key for key in style_dict\
if bool(style_dict[key].get('font-family')) and style_dict[key]['font-family'].startswith('Sonderzeichen') ]
nodes_in_margin_field = [ item for item in filter(lambda x: Matrix.IS_IN_MARGIN_FIELD(x.get('transform'), transkription_field),\
svg_tree.getroot().iterfind('.//text', svg_tree.getroot().nsmap))]
for mark_foreign_hands in list_of_special_words:
relevant_nodes = [ node for node in nodes_in_margin_field\
if is_close((mark_foreign_hands.transkription_positions[0].bottom+mark_foreign_hands.transkription_positions[0].top)/2,\
node.get('transform'), transkription_field) ]
relevant_nodes = sorted(relevant_nodes, key=lambda x: Matrix(transform_matrix_string=x.get('transform')).getX())
italic_found = False
mark_foreign_hands_text = ''
pen = ''
for node in relevant_nodes:
if len(node.getchildren()) == 0:
if italic_found:
pen += node.text
elif any(style in italic_classes for style in node.get('class').split(' ')):
italic_found = True
pen = node.text
else:
mark_foreign_hands_text += get_text_from_node(node, SonderzeichenList)
else:
for tspan in node.getchildren():
if italic_found:
pen += tspan.text
elif any(style in italic_classes for style in tspan.get('class').split(' ')):
italic_found = True
pen = tspan.text
else:
mark_foreign_hands_text += get_text_from_node(tspan, SonderzeichenList)
mark_foreign_hands.foreign_hands_text = mark_foreign_hands_text
mark_foreign_hands.pen = pen
def get_text_from_node(node, SonderzeichenList):
"""Returns the text of node. Replaces Sonderzeichen if node has a style class in SonderzeichenList.
"""
if any(style in SonderzeichenList for style in node.get('class').split(' '))\
and bool(MarkForeignHands.REPLACE_DICT.get(node.text)):
return MarkForeignHands.REPLACE_DICT[node.text]
else:
return node.text
def is_close(mark_foreign_hands_position, matrix_string, transkription_field):
"""Return true if mark_foreign_hands_position is == matrix.getY()+-THRESHOLD_Y
"""
THRESHOLD_Y = 4
matrix = Matrix(transform_matrix_string=matrix_string, transkription_field=transkription_field)
return abs(mark_foreign_hands_position-matrix.getY()) < THRESHOLD_Y

Event Timeline