Page MenuHomec4science

mark_foreign_hands.py
No OneTemporary

File Metadata

Created
Wed, May 15, 16:04

mark_foreign_hands.py

#!/usr/bin/env python3
# -*- coding: utf-8 -*-
""" This class can be used to represent the mark for text by some foreign hand.
"""
# Copyright (C) University of Basel 2019 {{{1
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <https://www.gnu.org/licenses/> 1}}}
__author__ = "Christian Steiner"
__maintainer__ = __author__
__copyright__ = 'University of Basel'
__email__ = "christian.steiner@unibas.ch"
__status__ = "Development"
__license__ = "GPL v3"
__version__ = "0.0.1"
from lxml import etree as ET
from .matrix import Matrix
from .special_word import SpecialWord
class MarkForeignHands(SpecialWord):
"""
This class represents the mark for text by some foreign hand.
"""
XML_TAG = 'mark-foreign-hands'
XML_SUB_TAG = 'text'
CLASS_MARK = '$'
REPLACE_DICT = { '+': 'x' }
def __init__(self, id=0, line_number=-1, text=CLASS_MARK, foreign_hands_text='', pen='', transkription_positions=[], faksimile_positions=[]):
super(MarkForeignHands, self).__init__(id=id, text=text, line_number=line_number,\
transkription_positions=transkription_positions, faksimile_positions=faksimile_positions)
self.foreign_hands_text = foreign_hands_text
self.pen = pen
def add_content(self, node):
"""Adds content to MarkForeignHands.
"""
self.foreign_hands_text = node.text
self.pen = node.get('pen')
def attach_word_to_tree(self, target_tree):
"""Attaches MarkForeignHands to tree target_tree.
"""
node = super(MarkForeignHands,self).attach_word_to_tree(target_tree)
if self.foreign_hands_text != '':
content_node = ET.SubElement(node, MarkForeignHands.XML_SUB_TAG)
content_node.text = self.foreign_hands_text
if self.pen != '':
content_node.set('pen', self.pen)
@classmethod
def get_semantic_dictionary(cls):
""" Creates a semantic dictionary as specified by SemanticClass.
"""
dictionary = super(MarkForeignHands,cls).get_semantic_dictionary()
dictionary[cls.PROPERTIES_KEY].update(cls.create_semantic_property_dictionary('foreign_hands_text',\
str, cardinality=1, name='textOfForeignHands', label='text traces of some foreign hand'))
dictionary[cls.PROPERTIES_KEY].update(cls.create_semantic_property_dictionary('pen',\
str, cardinality=1, cardinality_restriction='maxCardinality',\
name='penOfForeignHands', label='pen used to write text by some foreign hand'))
return cls.return_dictionary_after_updating_super_classes(dictionary)
@classmethod
def get_special_char_list(cls):
"""Returns a list of the chars that define this special word.
"""
return [ cls.CLASS_MARK ]
@staticmethod
def find_content(list_of_special_words, transkription_field, svg_tree, style_dict=None, italic_classes=None, SonderzeichenList=None, marginals_extra=False, set_to_text_field_zero=True):
"""Find content for the MarkForeignHands.
"""
if style_dict is None:
style_dict = {}
if italic_classes is None:
italic_classes = []
if SonderzeichenList is None:
SonderzeichenList = []
if len(style_dict) > 0:
if len(italic_classes) == 0:
italic_classes = [ key for key in style_dict\
if bool(style_dict[key].get('font-family')) and style_dict[key]['font-family'].endswith('Italic') ]
if len(SonderzeichenList) == 0:
SonderzeichenList = [ key for key in style_dict\
if bool(style_dict[key].get('font-family')) and style_dict[key]['font-family'].startswith('Sonderzeichen') ]
nodes_in_margin_field = [ item for item in filter(lambda x: Matrix.IS_IN_MARGIN_FIELD(x.get('transform'), transkription_field, marginals_on_extra_page=marginals_extra),\
svg_tree.getroot().iterfind('.//text', svg_tree.getroot().nsmap))]
provide_tf = transkription_field if set_to_text_field_zero else None
for mark_foreign_hands in list_of_special_words:
relevant_nodes = [ node for node in nodes_in_margin_field\
if is_close((mark_foreign_hands.transkription_positions[0].bottom+mark_foreign_hands.transkription_positions[0].top)/2,\
node.get('transform'), transkription_field=provide_tf) ]
relevant_nodes = sorted(relevant_nodes, key=lambda x: Matrix(transform_matrix_string=x.get('transform')).getX())
italic_found = False
mark_foreign_hands_text = ''
pen = ''
for node in relevant_nodes:
if len(node.getchildren()) == 0:
if italic_found:
pen += node.text
elif any(style in italic_classes for style in node.get('class').split(' ')):
italic_found = True
pen = node.text
else:
mark_foreign_hands_text += get_text_from_node(node, SonderzeichenList)
else:
for tspan in node.getchildren():
if italic_found:
pen += tspan.text
elif any(style in italic_classes for style in tspan.get('class').split(' ')):
italic_found = True
pen = tspan.text
else:
mark_foreign_hands_text += get_text_from_node(tspan, SonderzeichenList)
mark_foreign_hands.foreign_hands_text = mark_foreign_hands_text
mark_foreign_hands.pen = pen
def get_text_from_node(node, SonderzeichenList):
"""Returns the text of node. Replaces Sonderzeichen if node has a style class in SonderzeichenList.
"""
if any(style in SonderzeichenList for style in node.get('class').split(' '))\
and bool(MarkForeignHands.REPLACE_DICT.get(node.text)):
return MarkForeignHands.REPLACE_DICT[node.text]
else:
return node.text
def is_close(mark_foreign_hands_position, matrix_string, transkription_field=None):
"""Return true if mark_foreign_hands_position is == matrix.getY()+-THRESHOLD_Y
"""
THRESHOLD_Y = 4
matrix = Matrix(transform_matrix_string=matrix_string, transkription_field=transkription_field)
return abs(mark_foreign_hands_position-matrix.getY()) < THRESHOLD_Y

Event Timeline