Page MenuHomec4science

convert_wordPositions.py
No OneTemporary

File Metadata

Created
Tue, May 14, 11:21

convert_wordPositions.py

#!/usr/bin/env python3
# -*- coding: utf-8 -*-
""" This program can be used to convert the word positions to HTML for testing purposes.
"""
# Copyright (C) University of Basel 2019 {{{1
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <https://www.gnu.org/licenses/> 1}}}
import re
import getopt
import sys
from os import sep, listdir, mkdir, path
from os.path import exists, isfile, isdir
from lxml.html import builder as E
from lxml.html import open_in_browser
import lxml
import xml.etree.ElementTree as ET
from svgpathtools import svg_to_paths
from myxmlwriter import write_pretty
from datatypes.matrix import Matrix
from datatypes.page import Page
from datatypes.transkriptionField import TranskriptionField
from datatypes.word import Word
__author__ = "Christian Steiner"
__maintainer__ = __author__
__copyright__ = 'University of Basel'
__email__ = "christian.steiner@unibas.ch"
__status__ = "Development"
__license__ = "GPL v3"
__version__ = "0.0.1"
class Converter:
"""The converter super class.
"""
def __init__(self, page, non_testing=True):
self.page = page
self.non_testing = non_testing
def convert(self, output_file=None):
"""Prints all words.
"""
first_word_of_line = None
out = sys.stdout
if output_file is not None:
out = open(output_file, 'w')
for word in self.page.words:
if first_word_of_line is None or first_word_of_line.line_number != word.line_number:
out.write('\n')
first_word_of_line = word
if word.line_number % 2 == 0:
out.write(str(word.line_number).zfill(2) + ' ')
else:
out.write(' ')
out.write(word.text + ' ')
out.close()
@staticmethod
def CREATE_CONVERTER(page, non_testing=True, converter_type=None):
"""Returns a converter of type converter_type.
[:return:] SVGConverter for 'SVG', HTMLConverter for 'HTML', Converter for None
"""
if converter_type is None or bool(re.search(r'T[E]*XT', converter_type)):
return Converter(page, non_testing)
elif converter_type == 'SVG':
return SVGConverter(page, non_testing)
else:
return HTMLConverter(page, non_testing)
class SVGConverter(Converter):
"""This class can be used to convert a 'svgWordPositions' xml file to a svg file that combines text as path and text-as-text.
"""
BG_COLOR = 'yellow'
OPACITY = '0.2'
def __init__(self, page, non_testing=True, bg_color=BG_COLOR, opacity=OPACITY):
Converter.__init__(self, page, non_testing)
self.bg_color = bg_color
self.opacity = opacity
def convert(self, output_file=None):
"""Converts Page to SVG
"""
title = self.page.title if(self.page.title is not None) else 'Test Page'
title = '{}, S. {}'.format(title, self.page.number) if (self.page.number is not None) else title
transkription_field = TranskriptionField(self.page.svg_file)
if bool(transkription_field.get_svg_attributes('xmlns')):
ET.register_namespace('', transkription_field.get_svg_attributes('xmlns'))
if bool(transkription_field.get_svg_attributes('xmlns:xlink')):
ET.register_namespace('xlink', transkription_field.get_svg_attributes('xmlns:xlink'))
svg_tree = ET.parse(self.page.svg_file)
transkription_node = ET.SubElement(svg_tree.getroot(), 'g', attrib={'id': 'Transkription'})
colors = [ 'yellow', 'orange' ]
color_index = 0
for word in self.page.words:
for transkription_position in word.transkription_positions:
rect_node = ET.SubElement(transkription_node, 'rect',\
attrib={'id': str(transkription_position.id), 'x': str(transkription_position.left + transkription_field.xmin),\
'y': str(transkription_position.top + transkription_field.ymin), 'width': str(transkription_position.width),\
'height': str(transkription_position.height), 'fill': colors[color_index], 'opacity': self.opacity})
if transkription_position.transform is not None:
matrix = transkription_position.transform.clone_transformation_matrix()
matrix.matrix[Matrix.XINDEX] = round(transkription_position.transform.matrix[Matrix.XINDEX] + transkription_field.xmin, 3)
matrix.matrix[Matrix.YINDEX] = round(transkription_position.transform.matrix[Matrix.YINDEX] + transkription_field.ymin, 3)
rect_node.set('transform', matrix.toString())
rect_node.set('x', str(round(transkription_position.left - transkription_position.transform.matrix[Matrix.XINDEX], 3)))
rect_node.set('y', str(round((transkription_position.height-1.5)*-1, 3)))
ET.SubElement(rect_node, 'title').text = word.text
color_index = (color_index + 1) % len(colors)
if output_file is not None:
svg_tree.write(output_file)
class HTMLConverter(Converter):
"""This class can be used to convert a 'svgWordPositions' xml file to a test HTML file.
"""
CSS = """ .highlight0 { background-color: yellow; opacity: 0.2; }
.highlight1 { background-color: pink; opacity: 0.2; }
.previous { background-color: blue; opacity: 0.2; }
.next { background-color: cyan; opacity: 0.2; }
.inserted { background-color: green; opacity: 0.2; }
.inserted-head { background-color: orange; opacity: 0.2; }
.inserted-tail { background-color: red; opacity: 0.2; }
.inserted-head-tail { background-color: purple; opacity: 0.2; }
"""
def __init__(self, page, non_testing=True):
Converter.__init__(self, page, non_testing)
def convert(self, output_file=None):
"""Converts Page to HTML
"""
title = self.page.title if(self.page.title is not None) else 'Test Page'
title = '{}, S. {}'.format(title, self.page.number) if (self.page.number is not None) else title
width = self.page.width
height = self.page.height
style_content = ' position: relative; width: {}px; height: {}px; background-image: url({}); background-size: {}px {}px '\
.format(width, height, path.abspath(self.page.svg_file), width, height)
style = E.STYLE('#transkription {' + style_content + '}', HTMLConverter.CSS)
head = E.HEAD(E.TITLE(title),E.META(charset='UTF-8'), style)
transkription = E.DIV(id="transkription")
counter = 0
for word in self.page.words:
highlight_class = 'highlight' + str(counter)
if word.is_before_inserted_words:
highlight_class = 'previous'
elif word.is_after_inserted_words:
highlight_class = 'next'
elif word.is_head_of_inserted_words and word.is_tail_of_inserted_words:
highlight_class = 'inserted-head-tail'
elif word.is_head_of_inserted_words:
highlight_class = 'inserted-head'
elif word.is_tail_of_inserted_words:
highlight_class = 'inserted-tail'
elif word.word_insertion_mark is not None:
highlight_class = 'inserted'
word_title = '{}: {}'.format(str(word.id), word.text)
for transkription_position in word.transkription_positions:
style_content = 'position:absolute; top:{0}px; left:{1}px; width:{2}px; height:{3}px;'.format(\
transkription_position.top, transkription_position.left, transkription_position.width, transkription_position.height)
if transkription_position.transform is not None:
style_content = style_content + ' transform: {}; '.format(transkription_position.transform.toCSSTransformString())
transform_origin_x = (transkription_position.left-round(transkription_position.transform.getX(), 1))*-1\
if (transkription_position.left-round(transkription_position.transform.getX(), 1))*-1 < 0 else 0
style_content = style_content + ' transform-origin: {}px {}px; '.format(transform_origin_x, transkription_position.height)
#link = E.A(lxml.html.fromstring('&zwnj;'), E.CLASS(highlight_class), title=word_title, style=style_content)
link = E.A(' ', E.CLASS(highlight_class), title=word_title, style=style_content)
transkription.append(link)
counter = (counter + 1) % 2
html = E.HTML(head,E.BODY(transkription))
bool(self.non_testing) and open_in_browser(html)
if output_file is not None:
with open(output_file, 'wb') as f:
f.write(lxml.html.tostring(html, pretty_print=True, include_meta_content_type=True, encoding='utf-8'))
f.closed
def usage():
"""prints information on how to use the script
"""
print(main.__doc__)
def main(argv):
"""This program can be used to convert the word positions to HTML for testing purposes.
svgscripts/convert_wordPositions.py [-h|--help, -H|--HTML, -o|--output=outputFile, -S|--SVG, -s|--svg=svgFile, -T|--TEXT, -t|--testing] <file>
-h|--help: show help
-H|--HTML [default] convert to HTML test file
-o|--output=outputFile save output to file outputFile
-S|--SVG convert to SVG test file
-s|--svg=svgFile: svg web file
-T|--TEXT convert to TEXT output
-t|--testing execute in test mode, do not write to file or open browser
:return: exit code (int)
"""
convert_to_type = None
svg_file = None
output_file = None
non_testing = True
page = None
try:
opts, args = getopt.getopt(argv, "htHSTs:o:", ["help", "testing", "HTML", "SVG", "TEXT", "svg=", "output="])
except getopt.GetoptError:
usage()
return 2
for opt, arg in opts:
if opt in ('-h', '--help') or not args:
usage()
return 0
elif opt in ('-S', '--SVG'):
convert_to_type = 'SVG'
elif opt in ('-T', '--TEXT'):
convert_to_type = 'TEXT'
elif opt in ('-H', '--HTML'):
convert_to_type = 'HTML'
elif opt in ('-t', '--testing'):
non_testing = False
elif opt in ('-s', '--svg'):
svg_file = arg
elif opt in ('-o', '--output'):
output_file = arg
if len(args) < 1:
usage()
return 2
if convert_to_type is None:
if output_file is not None and len(re.split(r'\.', output_file)) > 1:
output_file_part_list = re.split(r'\.', output_file)
convert_to_type = output_file_part_list[len(output_file_part_list)-1].upper()
else:
convert_to_type = 'HTML'
for word_position_file in args:
if not isfile(word_position_file):
print("'{}' does not exist!".format(word_position_file))
return 2
if svg_file is not None:
if isfile(svg_file):
page = Page(xml_source_file=word_position_file, svg_file=svg_file)
else:
print("'{}' does not exist!".format(word_position_file))
return 2
else:
page = Page(xml_source_file=word_position_file)
if page.svg_file is None:
print('Please specify a svg file!')
usage()
return 2
converter = Converter.CREATE_CONVERTER(page, non_testing=non_testing, converter_type=convert_to_type)
converter.convert(output_file=output_file)
return 0
if __name__ == "__main__":
sys.exit(main(sys.argv[1:]))

Event Timeline