Page MenuHomec4science

transkriptionField.py
No OneTemporary

File Metadata

Created
Mon, May 6, 04:24

transkriptionField.py

#!/usr/bin/env python3
# -*- coding: utf-8 -*-
""" This program can be used to transform a svg file according to the dimension of its transkription field.
"""
# Copyright (C) University of Basel 2019 {{{1
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <https://www.gnu.org/licenses/> 1}}}
__author__ = "Christian Steiner"
__maintainer__ = __author__
__copyright__ = 'University of Basel'
__email__ = "christian.steiner@unibas.ch"
__status__ = "Development"
__version__ = "0.0.1"
import sys
from os.path import exists
from svgpathtools import svg_to_paths
import xml.etree.ElementTree as ET
from xml.parsers.expat import ExpatError
from .matrix import Matrix
from .text_field import TextField
MAX_SMALLER_PATH_WIDTH = 50.0
MAX_SMALLER_PATH_HEIGHT = 50.0
MAX_DIFF_DOC_SELF_WIDTH = 100.0
MAX_DIFF_DOC_SELF_HEIGHT = 100.0
MIN_AREA = 2500.0
class TranskriptionField:
"""
A class containing the dimensions of the transkription field.
Args:
filename (str): name of the svg file
"""
def __init__(self, filename, multipage_index=-1):
self.width = 0.0
self.height = 0.0
self.xmin = 0.0
self.xmax = 0.0
self.ymin = 0.0
self.ymin_without_title = 0.0
self.ymax = 0.0
self.documentWidth = 0.0
self.documentHeight = 0.0
self.path = None
self.second_field = None
self.filename = filename
self.line_number_area_width = 0.0
try:
paths, attributes, self.svg_attributes = svg_to_paths.svg2paths(filename, return_svg_attributes=True)
except ExpatError:
raise ExpatError('File {} is empty!'.format(filename))
if len(self.svg_attributes) > 0 and bool(self.svg_attributes.get('viewBox')):
viewBox = (self.svg_attributes['viewBox'].split())
else:
raise Exception('File "{}" does not have an attribute "viewBox"'.format(filename))
self.documentWidth = float(viewBox[2])
self.documentHeight = float(viewBox[3])
if self.is_shrunk():
self.xmin = float(viewBox[0])
self.ymin = float(viewBox[1])
self.width = self.documentWidth
self.height = self.documentHeight
else:
sorted_paths = self.sort_according_to_area_desc(paths, attributes)
if multipage_index < 0 and len(sorted_paths) > 0:
self.path = sorted_paths[0]
elif len(sorted_paths) > 1:
self.path = sorted(sorted_paths[:2], key=lambda path: path.bbox()[0])[multipage_index]
if multipage_index == 0:
self.second_field = TranskriptionField(filename, multipage_index=1)
if self.path is not None:
self.xmin, self.xmax, self.ymin, self.ymax = self.path.bbox()
self.width = self.xmax - self.xmin
self.height = self.ymax - self.ymin
self.ymin_without_title = self.ymin - 10
def add_line_number_area_width(self, end_positionX_of_line_number_area):
"""Adds the width of the line number area.
"""
if self.is_page_verso():
self.line_number_area_width = self.xmin - end_positionX_of_line_number_area
else:
self.line_number_area_width = end_positionX_of_line_number_area - self.xmax
def convert_to_text_field(self) ->TextField:
"""Convert to TextField.
"""
return TextField(width=self.width, height=self.height, x=self.xmin, y=self.ymin)
def is_page_verso(self) -> bool:
"""Returns true if the area right of the TranskriptionField is less than the left area.
"""
return self.documentWidth-self.xmax < self.xmin
def is_shrunk(self) -> bool:
"""Returns True if viewbox[0] and viewBox[1] != 0.
"""
if len(self.svg_attributes) == 0 or not bool(self.svg_attributes.get('viewBox')):
return False
viewBox = self.svg_attributes['viewBox'].split()
return float(viewBox[0]) != 0 and float(viewBox[1]) != 0
def get_svg_attributes(self, attrib_key):
"""Returns the svg attribute for the corresponding key or None if empty.
"""
if self.svg_attributes is None or len(self.svg_attributes) == 0 or not bool(self.svg_attributes.get(attrib_key)):
return None
return self.svg_attributes[attrib_key]
def shrink_svg_to_transkription_field(self, target_filename=None):
""" Changes the viewBox of the svg graphics to the size of the transkription field.
If a target_filename is specified, the changes are saved to a new file,
otherwise they are saved to the input file.
Args:
target_filename (str): name of the target svg file
"""
if bool(self.svg_attributes.get('xmlns')):
ET.register_namespace('', self.svg_attributes['xmlns'])
if bool(self.svg_attributes.get('xmlns:xlink')):
ET.register_namespace('xlink', self.svg_attributes['xmlns:xlink'])
et = ET.parse(self.filename)
root = et.getroot()
if bool(root.attrib.get('viewBox')):
if(not self.is_shrunk()):
root.attrib['viewBox'] = '{} {} {} {}'.format(self.xmin, self.ymin, self.width, self.height)
if bool(root.attrib.get('width')):
root.attrib['width'] = '{}pt'.format(self.width)
if bool(root.attrib.get('height')):
root.attrib['height'] = '{}pt'.format(self.height)
if not bool(target_filename):
target_filename = self.filename
et.write(target_filename)
return 0
else:
#print('File {} already transformed!'.format(self.filename))
return 1
else:
print('ERROR: file {} does not contain a svg/@viewBox!'.format(self.filename)) #TODO: throw error
return 2
"""Return a list of paths sorted according to volume, descending.
"""
def transkription_field_found(self) -> bool:
""" Returns whether transkription field was found in __init__
"""
return self.width > 0.0 and self.height > 0.0 and self.xmin > 0.0 and self.xmax > 0.0 and self.ymin > 0.0 and self.ymax > 0.0
def getWidth(self):
"""Returns documentWidth
"""
return self.documentWidth
def getHeight(self):
"""Returns documentHeight if not is_shrunk, else height.
"""
return self.documentHeight
def get_path_area(self, path, attribute_dict, removal_dict=None) -> float:
"""Return area of path.bbox
"""
try:
if not bool(path)\
or not path.iscontinuous()\
or not path.isclosed():
return 0.0
xmin, xmax, ymin, ymax = path.bbox()
width = xmax - xmin
height = ymax - ymin
if 'transform' in attribute_dict.keys():
matrix = Matrix(attribute_dict['transform'])
xmin, ymax, width, height = matrix.get_transformed_positions(xmin, ymin, width, height)
xmax = matrix.get_new_x()
ymin = matrix.get_new_y()
width = xmax - xmin
height = ymax - ymin
if self.documentWidth - width <= MAX_DIFF_DOC_SELF_WIDTH:
return 0.0
if self.documentHeight - height <= MAX_DIFF_DOC_SELF_HEIGHT:
return 0.0
return width * height
except AssertionError:
return 0.0
def sort_according_to_area_desc(self, paths, attributes, removal_dict=None) ->list:
"""Return a sorted list of paths sorted according to the area of their bbox, remove smaller paths.
"""
path_attributes = [ (path, attributes[index]) for index, path in enumerate(paths) if self.get_path_area(path, attributes[index]) > MAX_SMALLER_PATH_HEIGHT*self.documentWidth/4 ]
return [ path_tuple[0] for path_tuple in sorted(path_attributes, key=lambda path_tuple: self.get_path_area(*path_tuple, removal_dict=removal_dict), reverse=True) ]

Event Timeline