Page MenuHomec4science

lineNumber.py
No OneTemporary

File Metadata

Created
Wed, May 8, 06:35

lineNumber.py

#!/usr/bin/env python3
# -*- coding: utf-8 -*-
""" This class can be used to represent a line number.
"""
# Copyright (C) University of Basel 2019 {{{1
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <https://www.gnu.org/licenses/> 1}}}
__author__ = "Christian Steiner"
__maintainer__ = __author__
__copyright__ = 'University of Basel'
__email__ = "christian.steiner@unibas.ch"
__status__ = "Development"
__license__ = "GPL v3"
__version__ = "0.0.1"
import re
from lxml import etree as ET
from os.path import isfile
import sys
from .matrix import Matrix
sys.path.append('py2ttl')
class LineNumber:
"""
This class represents a line number.
Args:
file_name (str): name of the xml file to be instantiated.
"""
XML_TAG = 'line-number'
WARN_NO_LINE_NUMBER = 'No line number found'
MIN_LINE_HIGHT = 5
def __init__(self, id=0, bottom=0.0, top=0.0, raw_text_node=None, transkription_field=None, xml_text_node=None):
self.id = id
self.bottom = bottom
self.top = top
self.faksimile_inner_bottom = 0.0
self.faksimile_inner_top = 0.0
self.faksimile_outer_bottom = 0.0
self.faksimile_outer_top = 0.0
if xml_text_node is not None:
self.id = int(xml_text_node.get('id'))
self.bottom = float(xml_text_node.get('bottom'))
self.top = float(xml_text_node.get('top'))
self.faksimile_inner_bottom = float(xml_text_node.get('faksimile-inner-bottom')) if bool(xml_text_node.get('faksimile-inner-bottom')) else 0.0
self.faksimile_inner_top = float(xml_text_node.get('faksimile-inner-top')) if bool(xml_text_node.get('faksimile-inner-top')) else 0.0
self.faksimile_outer_bottom = float(xml_text_node.get('faksimile-outer-bottom')) if bool(xml_text_node.get('faksimile-outer-bottom')) else 0.0
self.faksimile_outer_top = float(xml_text_node.get('faksimile-outer-top')) if bool(xml_text_node.get('faksimile-outer-top')) else 0.0
if raw_text_node is not None:
matrix = Matrix(raw_text_node.get('transform'), transkription_field=transkription_field)
self.bottom = matrix.getY()
self.id = int(raw_text_node.text) if raw_text_node.text is not None\
else int(''.join([x.text for x in raw_text_node.findall('.//tspan', raw_text_node.nsmap)]))
@classmethod
def extract_line_numbers(cls, svg_tree, transkription_field, set_to_text_field_zero=True) -> list:
"""Extracts line numbers.
"""
nodes_near_tf = [ item for item in filter(lambda x: Matrix.IS_NEARX_TRANSKRIPTION_FIELD(x.get('transform'), transkription_field),\
svg_tree.getroot().iterfind('.//text', svg_tree.getroot().nsmap))]
provide_tf = None if not set_to_text_field_zero else transkription_field
line_numbers = [ cls(raw_text_node=item, transkription_field=provide_tf) for item in nodes_near_tf if cls.IS_A_LINE_NUMBER(item)]
if len(line_numbers) > 0:
MINABOVE = 3
yoffset = 0 if not set_to_text_field_zero else transkription_field.ymin
last_to_position = transkription_field.ymin
for line_number in line_numbers:
last_to_position = set_line_number_top(svg_tree.getroot(), yoffset, line_number, last_to_position)
return line_numbers
@staticmethod
def IS_A_LINE_NUMBER(raw_text_node):
"""Returns whether svg node contains a line number.
"""
if raw_text_node.text is not None:
return bool(re.search(r'^[0-9]+$', raw_text_node.text))
elif len(raw_text_node.findall('.//tspan', raw_text_node.nsmap)) > 0:
text = ''.join([x.text for x in raw_text_node.findall('.//tspan', raw_text_node.nsmap)])
return bool(re.search(r'^[0-9]+$', text))
return False
def setTop(self, top):
"""Sets top position of line number.
"""
self.top = top
def attach_object_to_tree(self, target_tree):
"""Attach object to tree.
"""
obj_node = target_tree.getroot().xpath('//' + LineNumber.XML_TAG + '[@id="%s"]' % self.id)[0] \
if(len(target_tree.getroot().xpath('//' + LineNumber.XML_TAG + '[@id="%s"]' % self.id)) > 0) \
else ET.SubElement(target_tree.getroot(), LineNumber.XML_TAG)
for key in self.__dict__.keys():
obj_node.set(key.replace('_','-'), str(round(self.__dict__[key], 3)))
def set_line_number_top(tree_root, yoffset, line_number, last_to_position, minabove=3) -> float:
"""Set top position of line_number and return next last_to_position.
"""
above_current_line_bottom = line_number.bottom + yoffset - minabove
bottoms = get_bottoms(tree_root, from_position=last_to_position, to_position=above_current_line_bottom)
current_line_top = above_current_line_bottom
if len(bottoms) > 0:
current_line_top = bottoms[-1] - yoffset + minabove
if line_number.bottom-current_line_top >= LineNumber.MIN_LINE_HIGHT:
line_number.setTop(current_line_top)
else:
return set_line_number_top(tree_root, yoffset, line_number, last_to_position, minabove=minabove+1)
return current_line_top
def get_bottoms(tree_root, from_position=-1.0, to_position=-1.0, transkription_field=None) -> list:
"""Returns all unique bottom values (Float) as a sorted list.
"""
bottom_list = sorted(set(Matrix(transform_matrix_string=item.get('transform')).getY() for item in tree_root.findall(".//text", tree_root.nsmap)))
if transkription_field is not None:
from_position = transkription_field.ymin
to_position = transkription_field.ymax
if from_position > 0.0 and to_position > 0.0:
return [ item for item in bottom_list if item > from_position and item < to_position ]
else:
return bottom_list

Event Timeline