Page MenuHomec4science

util.py
No OneTemporary

File Metadata

Created
Sun, Apr 28, 18:56
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
""" This program can be used to copy a faksimile svg file with the option of highlighting some word boxes.
"""
# Copyright (C) University of Basel 2019 {{{1
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <https://www.gnu.org/licenses/> 1}}}
from colorama import Fore, Style
from functools import cmp_to_key
import getopt
import inspect
import itertools
import lxml.etree as ET
import re
import shutil
import signal
import string
import subprocess
from svgpathtools import svg_to_paths
import sys
import tempfile
import os
from os import listdir, sep, path, setpgrp, devnull
from os.path import exists, isfile, isdir, dirname, basename, splitext
import warnings
import xml.etree.ElementTree as XET
if dirname(__file__) not in sys.path:
sys.path.append(dirname(__file__))
from datatypes.faksimile import FaksimilePage, get_paths_inside_rect
from datatypes.lineNumber import LineNumber
from datatypes.mark_foreign_hands import MarkForeignHands
from datatypes.page import Page
from datatypes.transkriptionField import TranskriptionField
from datatypes.word import update_transkription_position_ids
from local_config import PDF_READER, SVG_EDITOR
from myxmlwriter import write_pretty, FILE_TYPE_SVG_WORD_POSITION, FILE_TYPE_XML_MANUSCRIPT
from process_files import update_svgposfile_status
__author__ = "Christian Steiner"
__maintainer__ = __author__
__copyright__ = 'University of Basel'
__email__ = "christian.steiner@unibas.ch"
__status__ = "Development"
__license__ = "GPL v3"
__version__ = "0.0.1"
UNITTESTING = False
HIGHLIGHT_COLOR = 'red'
OPACITY = '0.5'
class ExternalViewer:
"""This class can be used to show files with external viewers.
"""
file_format_viewer_dict = { '.pdf': PDF_READER, '.svg': SVG_EDITOR }
@classmethod
def show_files(cls, single_file=None, list_of_files=[]):
"""Opens file(s) with corresponding external viewer(s).
"""
DEVNULL = None
if type(single_file) == list:
list_of_files = single_file
elif single_file is not None:
list_of_files.append(single_file)
if len(list_of_files) > 1:
DEVNULL = open(devnull, 'wb')
process_list = []
list_of_files.reverse()
while len(list_of_files) > 0:
file2open = list_of_files.pop()
viewer = cls.file_format_viewer_dict.get(splitext(file2open)[1])
if viewer is not None:
if len(list_of_files) > 0:
process_list.append(\
subprocess.Popen([viewer, file2open], stdout=DEVNULL, stderr=DEVNULL, preexec_fn=os.setsid))
else:
subprocess.run([viewer, file2open])
for process in process_list:
os.killpg(os.getpgid(process.pid), signal.SIGTERM)
if DEVNULL is not None:
DEVNULL.close()
def copy_faksimile_svg_file(target_file=None, faksimile_source_file=None, faksimile_tree=None, target_directory=None, local_image_path=None):
"""Copy a faksimile_svg_file to target_file.
"""
if faksimile_source_file is None and faksimile_tree is not None:
faksimile_source_file = faksimile_tree.docinfo.URL
elif faksimile_source_file is None:
raise Exception('copy_faksimile_svg_file needs either a faksimile_tree (lxml.etree.ElementTree) or a faksimile_source_file')
if target_file is not None and target_directory is not None:
target_file = target_directory + sep + target_file
elif target_file is None and target_directory is not None:
target_file = target_directory + sep + basename(faksimile_source_file)
elif target_file is None:
raise Exception('copy_faksimile_svg_file needs either a target_file or a target_directory')
paths, attributes, svg_attributes = svg_to_paths.svg2paths(faksimile_source_file, return_svg_attributes=True)
for key in [ key for key in svg_attributes.keys() if key.startswith('xmlns:') ]:
try:
XET.register_namespace(key.replace('xmlns:', ''), svg_attributes[key])
except ValueError: pass
XET.register_namespace('', 'http://www.w3.org/2000/svg')
namespaces = { 'ns': svg_attributes['xmlns'], 'xlink': svg_attributes['xmlns:xlink'] }
if faksimile_tree is not None:
element = XET.fromstring(ET.tostring(faksimile_tree))\
if type(faksimile_tree) == ET._ElementTree\
else XET.fromstring(XET.tostring(faksimile_tree.getroot()))
target_tree = XET.ElementTree(element)
else:
target_tree = XET.parse(faksimile_source_file)
if local_image_path is not None\
and len(target_tree.findall('.//ns:image', namespaces=namespaces)) > 0:
image_node = target_tree.findall('.//ns:image', namespaces=namespaces)[0]
image_node.set('{%s}href' % namespaces['xlink'], local_image_path)
target_tree.write(target_file)
def create_highlighted_svg_file(faksimile_tree, node_ids, target_file=None, target_directory=None, local_image_path=None, namespaces={}, highlight_color=HIGHLIGHT_COLOR, opacity=OPACITY):
"""Highlights the nodes of a faksimile_tree that are specified by the list of node_ids and writes the tree to a file.
"""
if len(namespaces) == 0:
namespaces = { k if k is not None else 'ns': v for k, v in faksimile_tree.getroot().nsmap.items() }
for node in itertools.chain(*[\
faksimile_tree.xpath('//ns:rect[@id="{0}"]|//ns:path[@id="{0}"]'.format(node_id), namespaces=namespaces)\
for node_id in node_ids\
]):
node.set('fill', highlight_color)
node.set('opacity', opacity)
node.set('style', '')
copy_faksimile_svg_file(target_file=target_file, faksimile_tree=faksimile_tree, target_directory=target_directory,\
local_image_path=local_image_path)
def get_empty_node_ids(faksimile_tree, x_min=0.0, x_max=0.0, y_min=0.0, y_max=0.0, text_field_id=None, faksimile_page=None, namespaces={}):
"""Returns a list of ids of rect and path nodes that do not have a title element.
"""
THRESHOLD_X = 10
if faksimile_page is not None:
x_min = faksimile_page.text_field.xmin + faksimile_page.faksimile_image.x
x_max = faksimile_page.text_field.xmax + faksimile_page.faksimile_image.x - THRESHOLD_X
y_min = faksimile_page.text_field.ymin + faksimile_page.faksimile_image.y
y_max = faksimile_page.text_field.ymax + faksimile_page.faksimile_image.y
text_field_id = faksimile_page.text_field.id
if len(namespaces) == 0:
namespaces = { k if k is not None else 'ns': v for k, v in faksimile_tree.getroot().nsmap.items() }
empyt_node_ids = []
nodes_without_title = faksimile_tree.xpath('//ns:rect[@x>"{0}" and @x<"{1}" and @y>"{2}" and @y<"{3}" and @id!="{4}" and not(./ns:title)]'.format(\
x_min, x_max, y_min, y_max, text_field_id), namespaces=namespaces)
nodes_without_title += get_paths_inside_rect(faksimile_tree, '//ns:path[not(./ns:title)]', x_min, x_max, y_min, y_max, text_field_id, namespaces=namespaces)
for node_without_title in nodes_without_title:
empyt_node_ids.append(node_without_title.get('id'))
return empyt_node_ids
def record_changes(original_svg_file, changed_svg_file, node_ids, namespaces={}):
"""Copy changes made to changed_svg_file to original_svg_file.
"""
old_tree = ET.parse(original_svg_file)
new_tree = ET.parse(changed_svg_file)
if len(namespaces) == 0:
namespaces = { k if k is not None else 'ns': v for k, v in new_tree.getroot().nsmap.items() }
for node_id in node_ids:
new_titles = new_tree.xpath('//ns:rect[@id="{0}"]/ns:title|//ns:path[@id="{0}"]/ns:title'.format(node_id), namespaces=namespaces)
old_nodes = old_tree.xpath('//ns:rect[@id="{0}"]|//ns:path[@id="{0}"]'.format(node_id), namespaces=namespaces)
if len(new_titles) > 0 and len(old_nodes) > 0:
if old_nodes[0].find('ns:title', namespaces=namespaces) is not None:
old_nodes[0].find('ns:title', namespaces=namespaces).text = new_titles[0].text
else:
old_title_id_string = new_titles[0].get('id')
old_title = ET.SubElement(old_nodes[0], 'title', attrib={ 'id': old_title_id_string })
old_title.text = new_titles[0].text
elif len(old_nodes) > 0:
for old_node in old_nodes:
old_node.getparent().remove(old_node)
copy_faksimile_svg_file(target_file=original_svg_file, faksimile_tree=old_tree)
def record_changes_on_svg_file_to_page(xml_source_file, svg_file, word_ids=None):
"""Copy changes made to svg_file to xml_source_file.
:return: datatypes.page.Page
"""
svg_tree = ET.parse(svg_file)
namespaces = { k if k is not None else 'ns': v for k, v in svg_tree.getroot().nsmap.items() }
transkription_field = TranskriptionField(svg_file)
page = Page(xml_source_file=xml_source_file)
words = [ word for word in page.words if word.id in word_ids ]\
if word_ids is not None else page.words
new_page_words = []
for word in words:
word_id = 'word_' + str(word.id) + '_'
recorded_ids = []
for transkription_position in word.transkription_positions:
transkription_position_id = word_id + str(transkription_position.id)
tp_nodes = svg_tree.xpath('//ns:g[@id="Transkription"]/ns:rect[@id="{0}"]'.format(transkription_position_id), namespaces=namespaces)
if len(tp_nodes) > 0:
record_changes_to_transkription_position(tp_nodes[0], transkription_position,\
transkription_field.xmin, transkription_field.ymin, namespaces=namespaces)
recorded_ids.append(transkription_position_id)
extra_nodes = [ node for node in\
svg_tree.xpath('//ns:g[@id="Transkription"]/ns:rect[contains(@id, "{0}")]'.format(word_id), namespaces=namespaces)\
if node.get('id') not in recorded_ids ]
if len(extra_nodes) > 0:
for extra_node in extra_nodes:
old_ids = [ inkscape_id.replace('#','') for inkscape_id in\
svg_tree.xpath('//ns:g[@id="Transkription"]/ns:rect[@id="{0}"]/@inkscape:label'.format(extra_node.get('id')),\
namespaces=namespaces) ]
if len(old_ids) > 0 and re.match(r'word_[0-9]+_[0-9]+', old_ids[0]):
old_id_list = old_ids[0].split('_')
ref_word_id = int(old_id_list[1])
ref_tp_id = old_id_list[2]
ref_words = [ word for word in page.words if word.id == ref_word_id ]
if len(ref_words) > 0:
ref_tps = [ tp for tp in ref_words[0].transkription_positions\
if tp.id == ref_tp_id ]
if len(ref_tps) > 0:
ref_words[0].transkription_positions.remove(ref_tps[0])
record_changes_to_transkription_position(extra_node,\
ref_tps[0], transkription_field.xmin, transkription_field.ymin, namespaces=namespaces)
word.transkription_positions.append(ref_tps[0])
for word in page.words:
if word.has_mixed_status('text'):
new_page_words += [ word for word in word.split_according_to_status('text') if word.text is not None and word.text != '' ]
elif len(word.transkription_positions) > 0:
new_text = [ tp.text for tp in word.transkription_positions if tp.text is not None and tp.text != '' ]
if len(new_text) > 0:
word.text = new_text[0]
new_page_words.append(word)
page.words = new_page_words
page.update_and_attach_words2tree(update_function_on_word=update_transkription_position_ids)
page.unlock()
if not UNITTESTING:
write_pretty(xml_element_tree=page.page_tree, file_name=xml_source_file,\
script_name=__file__ + ' -> ' + inspect.currentframe().f_code.co_name, file_type=FILE_TYPE_SVG_WORD_POSITION)
return page
def record_changes_to_transkription_position(node, transkription_position, xmin=0.0, ymin=0.0, namespaces=None):
"""Record changes made to node to transkription_position.
"""
if namespaces is None:
namespaces = { k if k is not None else 'ns': v for k, v in node.nsmap.items() }
if bool(node.get('x')):
transkription_position.left = float(node.get('x')) - xmin
if bool(node.get('y')):
transkription_position.top = float(node.get('y')) - ymin
if bool(node.get('width')):
transkription_position.width = float(node.get('width'))
if bool(node.get('height')):
transkription_position.height = float(node.get('height'))
if len(node.xpath('./ns:title/text()', namespaces=namespaces)) > 0:
transkription_position.text = node.xpath('./ns:title/text()', namespaces=namespaces)[0]
def usage():
"""prints information on how to use the script
"""
print(main.__doc__)
def main(argv):
"""This program can be used to copy a faksimile svg file with the option of highlighting some word boxes.
svgscripts/copy_faksimile_svg_file.py [OPTIONS] <faksimile_svg_file> <target_dir>
<faksimile_svg_file> a svg file containing information about the word positions on the faksimile.
<target_dir> the target directory.
OPTIONS:
-h|--help: show help
:return: exit code (int)
"""
try:
opts, args = getopt.getopt(argv, "h", ["help" ])
except getopt.GetoptError:
usage()
return 2
for opt, arg in opts:
if opt in ('-h', '--help'):
usage()
return 0
if len(args) < 2:
usage()
return 2
exit_status = 0
if exists(args[0]) and exists(args[1]):
faksimile_svg_file = args[0] if isfile(args[0]) else args[1]
target_dir = args[1] if isdir(args[1]) else args[0]
copy_faksimile_svg_file(faksimile_source_file=faksimile_svg_file, target_directory=target_dir)
else:
file_a = args[0] if not exists(args[0]) else args[1]
raise FileNotFoundError('File {} does not exist!'.format(file_a))
return exit_status
if __name__ == "__main__":
sys.exit(main(sys.argv[1:]))

Event Timeline