Page MenuHomec4science

transkription_position.py
No OneTemporary

File Metadata

Created
Tue, Apr 23, 18:31

transkription_position.py

#!/usr/bin/env python3
# -*- coding: utf-8 -*-
""" This class can be used to represent a transkription word position.
"""
# Copyright (C) University of Basel 2019 {{{1
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <https://www.gnu.org/licenses/> 1}}}
__author__ = "Christian Steiner"
__maintainer__ = __author__
__copyright__ = 'University of Basel'
__email__ = "christian.steiner@unibas.ch"
__status__ = "Development"
__license__ = "GPL v3"
__version__ = "0.0.1"
from lxml import etree as ET
from os.path import isfile
import sys
from .debug_message import DebugMessage
from .image import SVGImage
from .positional_word_part import PositionalWordPart
from .word_position import WordPosition
from .matrix import Matrix
sys.path.append('py2ttl')
from class_spec import SemanticClass
class TranskriptionPosition(WordPosition):
"""
This class represents the position of a word on the transkription as it is displayed by a svg image.
@label position of a word on the topological transkription
Args:
id (int): word id
matrix (datatypes.Matrix): matrix containing information about transformation.
height (float): height of word
width (float): width of word
x (float): x position of word
y (float): y position of word
positional_word_parts a list of (datatypes.positional_word_part) PositionalWordPart
debug_message a (datatypes.debug_message) DebugMessage
"""
ADD2X = 0.15
ADD2TOP = 1.0
ADD2BOTTOM = 0.2
HEIGHT_FACTOR = 1.1 # factor that multiplies biggest_font_size -> height
XML_TAG = WordPosition.TRANSKRIPTION
def __init__(self, id=0, node=None, height=0.0, width=0.0, x=0.0, y=0.0, matrix=None, positional_word_parts=None, debug_message=None):
super(TranskriptionPosition, self).__init__(id=id, node=node, height=height, width=width, x=x, y=y, matrix=matrix, tag=WordPosition.TRANSKRIPTION)
self.positional_word_parts = positional_word_parts if positional_word_parts is not None else []
self.debug_message = debug_message
self.deleted = False
self.has_box = None
self.style = None
self.svg_image = None
if node is not None:
self.debug_message = DebugMessage(node=node.xpath('.//' + DebugMessage.XML_TAG)[0])\
if len(node.xpath('.//' + DebugMessage.XML_TAG)) > 0 else None
self.positional_word_parts = [ PositionalWordPart(node=pwp_node) for pwp_node in node.xpath('.//' + PositionalWordPart.XML_TAG) ]
self.attachable_objects += self.positional_word_parts
if self.debug_message is not None:
self.attachable_objects.append(self.debug_message)
@classmethod
def get_semantic_dictionary(cls):
""" Creates a semantic dictionary as specified by SemanticClass.
"""
dictionary = super(TranskriptionPosition,cls).get_semantic_dictionary()
dictionary[cls.PROPERTIES_KEY].update(cls.create_semantic_property_dictionary('svg_image', SVGImage, cardinality=1,\
name='isOnSvgImage', label='transkription position is on svg image'))
return cls.return_dictionary_after_updating_super_classes(dictionary)
def get_text(self):
"""Returns the concatenated text of all positional_word_parts.
"""
return ''.join([pwp.text for pwp in self.positional_word_parts])
def is_mergebale_with(self, other) -> bool:
"""Return whether self and other have same writing_process_id or style.
"""
if self.writing_process_id == other.writing_process_id:
return True
if self.writing_process_id == -1 or other.writing_process_id == -1\
and (len(self.positional_word_parts) > 0 and len(other.positional_word_parts) > 0):
return self.positional_word_parts[0].style_class == other.positional_word_parts[0].style_class
return False
def split(self, split_position, second_split=-1) ->list:
"""Split a transkription_position in two at split_position.
:return: a list of the new transkription_positions
"""
transkription_positions = []
left_pwp = [ pwp for pwp in self.positional_word_parts if pwp.left + pwp.width < split_position ]
transkription_positions += TranskriptionPosition.CREATE_TRANSKRIPTION_POSITION_LIST_FROM_PWPS(left_pwp, transkription_position_id=self.id)
if second_split == -1:
right_pwp = [ pwp for pwp in self.positional_word_parts if pwp not in left_pwp ]
next_id = int(self.id) + 1
transkription_positions += TranskriptionPosition.CREATE_TRANSKRIPTION_POSITION_LIST_FROM_PWPS(right_pwp, transkription_position_id=str(next_id))
else:
middle_pwp = [ pwp for pwp in self.positional_word_parts if pwp not in left_pwp and pwp.left + pwp.width < second_split ]
next_id = int(self.id) + 1
transkription_positions += TranskriptionPosition.CREATE_TRANSKRIPTION_POSITION_LIST_FROM_PWPS(middle_pwp, transkription_position_id=str(next_id))
right_pwp = [ pwp for pwp in self.positional_word_parts if pwp not in left_pwp and pwp not in middle_pwp ]
next_id = int(self.id) + 1
transkription_positions += TranskriptionPosition.CREATE_TRANSKRIPTION_POSITION_LIST_FROM_PWPS(right_pwp, transkription_position_id=str(next_id))
return transkription_positions
def update_positional_word_parts(self, positional_word_parts):
"""Update positional_word_parts.
"""
if len(self.positional_word_parts) > 0 and self.positional_word_parts in self.attachable_objects:
self.attachable_objects.remove(self.positional_word_parts)
self.positional_word_parts = positional_word_parts
self.attachable_objects += self.positional_word_parts
@staticmethod
def CREATE_TRANSKRIPTION_POSITION_LIST_FROM_PWPS(positional_word_parts, debug_message=None, debug_msg_string=None, transkription_position_id=0):
"""Creates a list of TranskriptionPosition from a list of (datatypes.positional_word_part) PositionalWordPart.
[:return:] a list of (datatypes.transkription_position) TranskriptionPosition
"""
TOPCORRECTION = 1
debug_message = DebugMessage(message=debug_msg_string)\
if debug_msg_string is not None else debug_message
transkription_positions = []
if len(positional_word_parts) < 1:
return []
matrix = positional_word_parts[0].transform
index = 0
matrices_differ = False
style_class = positional_word_parts[0].style_class
styles_differ = False
while index < len(positional_word_parts) and not matrices_differ and not styles_differ:
if Matrix.DO_CONVERSION_FACTORS_DIFFER(matrix, positional_word_parts[index].transform):
matrices_differ = True
elif style_class != positional_word_parts[index].style_class:
styles_differ = True
else:
index += 1
if (matrices_differ or styles_differ) and index < len(positional_word_parts):
debug_msg_string = 'matrices differ' if matrices_differ else 'styles differ'
transkription_positions += TranskriptionPosition.CREATE_TRANSKRIPTION_POSITION_LIST_FROM_PWPS(\
positional_word_parts[index:], debug_msg_string=debug_msg_string, transkription_position_id=int(transkription_position_id)+1)
positional_word_parts = positional_word_parts[:index]
height = [ pwp.height for pwp in sorted(positional_word_parts, key=lambda pwp: pwp.height, reverse=True)][0] + 2*TOPCORRECTION
x = positional_word_parts[0].left - TranskriptionPosition.ADD2X
y = [ pwp.top for pwp in sorted(positional_word_parts, key=lambda pwp: pwp.top)][0] - TOPCORRECTION
width = positional_word_parts[len(positional_word_parts)-1].left - x\
+ positional_word_parts[len(positional_word_parts)-1].width + TranskriptionPosition.ADD2X
for pwp_index, pwp in enumerate(positional_word_parts):
pwp.id = pwp_index
transkription_positions.insert(0, TranskriptionPosition(id=transkription_position_id, height=height, width=width, x=x, y=y, matrix=matrix,\
positional_word_parts=positional_word_parts, debug_message=debug_message))
return transkription_positions
@staticmethod
def CREATE_TRANSKRIPTION_POSITION_LIST(page, word_part_objs, matrix=None, debug_msg_string=None, transkription_field=None):
"""Creates a list of TranskriptionPosition from word_part_objs (i.e. a list of dictionaries
with the keys: text, x, y, matrix, class).
[:return:] a list of (datatypes.transkription_position) TranskriptionPosition
"""
positional_word_parts = []
debug_message = DebugMessage(message=debug_msg_string)\
if debug_msg_string is not None else None
if page.svg_file is not None and isfile(page.svg_file):
svg_path_tree = ET.parse(page.svg_file)
namespaces = { k if k is not None else 'ns': v for k, v in svg_path_tree.getroot().nsmap.items() }
xmin = 0.0
ymin = 0.0
if transkription_field is not None:
xmin = transkription_field.xmin
ymin = transkription_field.ymin
for part_obj in word_part_objs:
positional_word_parts += PositionalWordPart.CREATE_POSITIONAL_WORD_PART_LIST(\
part_obj, svg_path_tree, namespaces, page, start_id=len(positional_word_parts),\
xmin=xmin, ymin=ymin)
else:
positional_word_parts = PositionalWordPart.CREATE_SIMPLE_POSITIONAL_WORD_PART_LIST(page, word_part_objs)
if len(positional_word_parts) > 0:
return TranskriptionPosition.CREATE_TRANSKRIPTION_POSITION_LIST_FROM_PWPS(positional_word_parts, debug_message=debug_message)
else:
return [ TranskriptionPosition(matrix=matrix, debug_message=debug_message) ]

Event Timeline