Page MenuHomec4science

positional_word_part.py
No OneTemporary

File Metadata

Created
Sat, May 18, 19:45

positional_word_part.py

#!/usr/bin/env python3
# -*- coding: utf-8 -*-
""" This class can be used to represent a positional word part, i.e. part of a word that has a position on the transkription.
"""
# Copyright (C) University of Basel 2019 {{{1
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <https://www.gnu.org/licenses/> 1}}}
__author__ = "Christian Steiner"
__maintainer__ = __author__
__copyright__ = 'University of Basel'
__email__ = "christian.steiner@unibas.ch"
__status__ = "Development"
__license__ = "GPL v3"
__version__ = "0.0.1"
from lxml import etree as ET
from svgpathtools.parser import parse_path
import sys
import warnings
from .positional_object import PositionalObject
sys.path.append('py2ttl')
from class_spec import UnSemanticClass
class PositionalWordPart(PositionalObject,UnSemanticClass):
"""
This class represents a positional word part, i.e. a part of a word that has a position on the transkription.
Args:
id (int): object id
text (str): text
symbol_id (str): id of corresponding symbol
style_class (str) style class id
matrix (datatypes.Matrix): matrix containing information about conversion.
height (float): height of
width (float): width of object
x (float): x position of object
y (float): y position of object
"""
WARN_NO_USE_NODE_FOUND = 'No use_node found'
XML_TAG = 'word-part'
extraStringKeys = [ 'text', 'symbol_id', 'style_class' ]
def __init__(self, node=None, id=0, height=0.0, width=0.0, x=0.0, y=0.0, matrix=None, text=None, symbol_id=None, style_class=None):
super(PositionalWordPart, self).__init__(id=id, node=node, height=height, width=width, x=x, y=y, matrix=matrix, tag=PositionalWordPart.XML_TAG)
self.stringKeys += [ 'text', 'symbol_id', 'style_class' ]
self.text = text
self.symbol_id = symbol_id
self.style_class = style_class
if node is not None:
self.text = node.get('text')
self.symbol_id = node.get('symbol-id')
self.style_class = node.get('style-class')
@classmethod
def get_semantic_dictionary(cls):
""" Creates a semantic dictionary as specified by SemanticClass.
"""
dictionary = super(cls,cls).get_semantic_dictionary()
for extraStringKey in cls.extraStringKeys:
dictionary[cls.PROPERTIES_KEY].update(cls.create_semantic_property_dictionary(extraStringKey, str, cardinality=1))
return cls.return_dictionary_after_updating_super_classes(dictionary)
@staticmethod
def CREATE_POSITIONAL_WORD_PART(text, use_node, namespaces, start_id=0, xmin=0.0, ymin=0.0, matrix=None, style_class=None, original_x=0.0, original_y=0.0):
"""Creates a PositionalWordPart.
[:return:] a PositionalWordPart
"""
symbol_id = use_node.get('{%s}href' % namespaces['xlink']).replace('#', '')
x = float(use_node.get('x')) - xmin if bool(use_node.get('x')) else 0.0
y = float(use_node.get('y')) - ymin if bool(use_node.get('y')) else 0.0
if matrix is not None and matrix.isRotationMatrix():
x = matrix.get_old_x(x=x, y=y)
#print('origin_x {} -> x {}'.format(original_x, x))
y = original_y if original_y != 0 else y
d_strings = use_node.xpath('//ns:symbol[@id="{0}"]/ns:path/@d'.format(symbol_id), namespaces=namespaces)
if len(d_strings) > 0 and d_strings[0] != '':
path = parse_path(d_strings[0])
xmin, xmax, ymin, ymax = path.bbox()
width = xmax - xmin
height = ymax - ymin if ymax - ymin > 3 else 3
if ymin < 0 and ymax < 0:
y += ymin
return PositionalWordPart(id=start_id, text=text, height=height, width=width, x=x, y=y-height,\
matrix=matrix, symbol_id=symbol_id, style_class=style_class)
else:
return PositionalWordPart(id=start_id, text=text, x=x, y=y, matrix=matrix, symbol_id=symbol_id, style_class=style_class)
@staticmethod
def CREATE_POSITIONAL_WORD_PART_LIST(word_part_obj, svg_path_tree, namespaces, page=None, start_id=0, xmin=0.0, ymin=0.0, threshold=0.4, throw_error_if_not_found=False):
"""Creates a list of PositionalWordPart from a word_part_obj (a dictionary with the keys: text, x, y, matrix, class),
using a (lxml.ElementTree) svg_path_tree and the corresponding namespaces.
[:return:] a list of PositionalWordPart
"""
word_part_list = []
original_x, original_y = 0.0, 0.0
x = float(word_part_obj['x']) if bool(word_part_obj.get('x')) else 0.0
y = float(word_part_obj['y']) if bool(word_part_obj.get('y')) else 0.0
text = word_part_obj.get('text')
matrix = word_part_obj.get('matrix')
style_class = word_part_obj.get('class')
if matrix is not None and matrix.isRotationMatrix():
original_x, original_y = x, y
x = matrix.get_new_x(x=original_x, y=original_y)
y = matrix.get_new_y(x=original_x, y=original_y)
if text is not None and text != '':
svg_x = x + xmin
svg_y = y + ymin
use_nodes = svg_path_tree.xpath('//ns:use[@x>="{0}" and @x<="{1}" and @y>="{2}" and @y<="{3}"]'\
.format(svg_x-threshold, svg_x+threshold,svg_y-threshold, svg_y+threshold), namespaces=namespaces)
if len(use_nodes) > 0:
current_use_node = use_nodes[0]
index = 0
word_part_list.append(PositionalWordPart.CREATE_POSITIONAL_WORD_PART(text[index], current_use_node, namespaces,\
start_id=start_id, xmin=xmin, ymin=ymin, matrix=matrix, style_class=style_class, original_x=original_x, original_y=original_y))
index, start_id = index + 1, start_id + 1
while index < len(text) and current_use_node.getnext() is not None:
current_use_node = current_use_node.getnext()
word_part_list.append(PositionalWordPart.CREATE_POSITIONAL_WORD_PART(text[index], current_use_node, namespaces,\
start_id=start_id, xmin=xmin, ymin=ymin, matrix=matrix, style_class=style_class, original_x=original_x, original_y=original_y))
index, start_id = index+1, start_id+1
if index < len(text) and current_use_node.getnext() is None:
last_pwp = word_part_list[len(word_part_list)-1]
new_word_part_obj = word_part_obj.copy()
new_word_part_obj['x'] = last_pwp.left + last_pwp.width + 0.5
new_word_part_obj['y'] = last_pwp.bottom
new_word_part_obj['text'] = word_part_obj['text'][index:]
word_part_list += PositionalWordPart.CREATE_POSITIONAL_WORD_PART_LIST(new_word_part_obj,\
svg_path_tree, namespaces, page, start_id=start_id, xmin=xmin, ymin=ymin)
return word_part_list
elif page is None or throw_error_if_not_found:
raise Exception('{} for text {} svg_x {}, svg_y {}'.format(PositionalWordPart.WARN_NO_USE_NODE_FOUND, text, svg_x, svg_y))
else:
warnings.warn('{} for text {} svg_x {}, svg_y {}'.format(PositionalWordPart.WARN_NO_USE_NODE_FOUND, text, svg_x, svg_y))
return PositionalWordPart.CREATE_SIMPLE_POSITIONAL_WORD_PART_LIST(page, [word_part_obj])
else:
return [ ]
@staticmethod
def CREATE_SIMPLE_POSITIONAL_WORD_PART_LIST(page, word_part_objs):
"""Creates a list of PositionalWordPart from word_part_objs (i.e. a list of dictionaries
with the keys: text, x, y, matrix, class).
[:return:] a list of (datatypes.positional_word_part) PositionalWordPart
"""
positional_word_parts = []
HEIGHT_FACTOR = 1.1 # factor that multiplies font_size -> height
FONTWIDTHFACTOR = 0.7 # factor that multiplies lastCharFontSize
SPACING = 0.2
for index, part_obj in enumerate(word_part_objs):
text = part_obj.get('text')
matrix = part_obj.get('matrix')
style_class = part_obj.get('class')
x = float(part_obj['x']) if bool(part_obj.get('x')) else 0.0
y = float(part_obj['y']) if bool(part_obj.get('y')) else 0.0
font_size = page.get_biggest_fontSize4styles(style_set=set(style_class.split(' ')))
height = round(font_size * HEIGHT_FACTOR + HEIGHT_FACTOR / font_size, 3)
width = round(font_size * FONTWIDTHFACTOR, 3)
if index+1 < len(word_part_objs) and bool(word_part_objs[index+1].get('x')):
width = float(word_part_objs[index+1]['x']) - x - SPACING
positional_word_parts.append(PositionalWordPart(id=index, text=text, height=height, width=width, x=x, y=y, matrix=matrix, style_class=style_class))
return positional_word_parts

Event Timeline