convert_wordPositions.py
No OneTemporary
Actions

Subscribers

None

File Metadata

Created: Fri, Apr 26, 11:01

convert_wordPositions.py
View Options

	#!/usr/bin/env python3
	# -- coding: utf-8 --

	""" This program can be used to convert the word positions to HTML for testing purposes.
	"""
	# Copyright (C) University of Basel 2019 {{{1
	#
	# This program is free software: you can redistribute it and/or modify
	# it under the terms of the GNU General Public License as published by
	# the Free Software Foundation, either version 3 of the License, or
	# (at your option) any later version.
	#
	# This program is distributed in the hope that it will be useful,
	# but WITHOUT ANY WARRANTY; without even the implied warranty of
	# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
	# GNU General Public License for more details.
	#
	# You should have received a copy of the GNU General Public License
	# along with this program. If not, see <https://www.gnu.org/licenses/> 1}}}

	import cairosvg
	import getopt
	import json
	from lxml.html import builder as E
	from lxml.html import open_in_browser
	import lxml
	from pathlib import Path as PathLibPath
	from os import sep, listdir, mkdir, path, remove
	from os.path import exists, isfile, isdir, dirname
	import re
	import sys
	from svgpathtools import svg_to_paths
	import xml.etree.ElementTree as ET

	if dirname(__file__) not in sys.path:
	sys.path.append(dirname(__file__))

	from datatypes.matrix import Matrix
	from datatypes.page import Page
	from datatypes.page_creator import PageCreator
	from datatypes.transkriptionField import TranskriptionField
	from datatypes.text_field import TextField
	from datatypes.writing_process import WritingProcess
	from datatypes.word import Word

	sys.path.append('shared_util')
	from main_util import extract_paths_on_tf, get_paths_near_position

	__author__ = "Christian Steiner"
	__maintainer__ = __author__
	__copyright__ = 'University of Basel'
	__email__ = "christian.steiner@unibas.ch"
	__status__ = "Development"
	__license__ = "GPL v3"
	__version__ = "0.0.1"

	EXIST_DB = 'http://existdb-test.dasch.swiss/exist/rest/db/storage/nietzsche/'
	LOCAL_SERVER = 'http://localhost:8000/'

	class Converter:
	"""The converter super class.
	"""
	def __init__(self, page, non_testing=True, show_word_insertion_mark=False):
	self.page = page
	self.non_testing = non_testing
	self.show_word_insertion_mark = show_word_insertion_mark

	def _get_transkription_positions(self, transkription_positions, stage_version=''):
	"""Returns the transkription_positions of the indicated stage_version.
	"""
	convertable_transkription_positions = transkription_positions
	if stage_version != '':
	convertable_transkription_positions = []
	if re.match(r'^\d$', stage_version):
	writing_process_id = int(stage_version)
	for transkription_position in transkription_positions:
	if transkription_position.writing_process_id == writing_process_id:
	convertable_transkription_positions.append(transkription_position)
	elif re.match(r'^\d\+$', stage_version):
	version_range = [ *range(int(stage_version.replace('+','')), len(WritingProcess.VERSION_DESCRIPTION)) ]
	for transkription_position in transkription_positions:
	if transkription_position.writing_process_id in version_range:
	convertable_transkription_positions.append(transkription_position)
	elif re.match(r'^\d\-\d$', stage_version):
	start_stop = [ int(i) for i in re.split(r'-', stage_version) ]
	version_range = [ *range(start_stop[0], start_stop[1]+1) ]
	for transkription_position in transkription_positions:
	if transkription_position.writing_process_id in version_range:
	convertable_transkription_positions.append(transkription_position)
	return convertable_transkription_positions

	def _get_words(self, words, highlighted_words=None):
	"""Return the words that will be hightlighted.
	"""
	return highlighted_words if highlighted_words is not None else words

	def convert(self, output_file=None, stage_version='', highlighted_words=None):
	"""Prints all words.
	"""
	first_word_of_line = None
	out = sys.stdout
	if output_file is not None:
	out = open(output_file, 'w')
	for word in self.page.words:
	if first_word_of_line is None or first_word_of_line.line_number != word.line_number:
	out.write('\n')
	first_word_of_line = word
	if output_file is None:
	if word.line_number % 2 == 0:
	out.write(str(word.line_number).zfill(2) + ' ')
	else:
	out.write(' ')
	if stage_version == '' or len(self._get_transkription_positions(word.transkription_positions, stage_version=stage_version)) > 0:
	if word.edited_text is not None:
	out.write(word.edited_text + ' ')
	elif word.text is not None:
	out.write(word.text + ' ')
	out.close()
	return 0

	@classmethod
	def CREATE_CONVERTER(cls, page, non_testing=True, converter_type='', show_word_insertion_mark=False, key=''):
	"""Returns a converter of type converter_type.

	[:return:] SVGConverter for 'SVG', HTMLConverter for 'HTML', Converter for None
	"""
	cls_dict = { subclass.__name__: subclass for subclass in cls.__subclasses__() }
	cls_key = converter_type + 'Converter'
	if bool(cls_dict.get(cls_key)):
	converter_cls = cls_dict[cls_key]
	if converter_cls == JSONConverter:
	return converter_cls(page, non_testing=non_testing, key=key)
	return converter_cls(page, non_testing, show_word_insertion_mark)
	else:
	return Converter(page, non_testing, show_word_insertion_mark)

	class JSONConverter(Converter):
	"""This class can be used to convert a 'svgWordPositions' xml file to a json file.
	"""
	def __init__(self, page, faksimile_page=None, non_testing=True, key=''):
	Converter.__init__(self, page, non_testing, False)
	self.faksimile_page = faksimile_page

	def _add_word_to_list(self, words, word, text, text_field=None, edited_text=None, earlier_version=None, overwrites_word=None, parent_id=-1, faksimile_positions=None):
	"""Add word to list.
	"""
	id = word.id\
	if parent_id == -1\
	else parent_id
	edited_text = word.edited_text\
	if edited_text is None\
	else edited_text
	earlier_version = word.earlier_version\
	if earlier_version is None\
	else earlier_version
	overwrites_word = word.overwrites_word\
	if overwrites_word is None\
	else overwrites_word
	line_number = word.line_number
	for tp in word.transkription_positions:
	tp_id = f'w{word.id}:tp{tp.id}'\
	if parent_id == -1\
	else f'w{parent_id}:w{word.id}:tp{tp.id}'
	if text_field is not None:
	word_dict = { 'id': id, 'text': text, 'left': tp.left + text_field.left, 'top': tp.top + text_field.top,\
	'width': tp.width, 'height': tp.height, 'line': line_number, 'tp_id': tp_id, 'deleted': word.deleted }
	if tp.transform is not None:
	matrix = tp.transform.clone_transformation_matrix()
	xmin = text_field.left
	ymin = text_field.top
	matrix.matrix[Matrix.XINDEX] = round(tp.transform.matrix[Matrix.XINDEX] + xmin, 3)
	matrix.matrix[Matrix.YINDEX] = round(tp.transform.matrix[Matrix.YINDEX] + ymin, 3)
	word_dict.update({ 'transform': matrix.toString() })
	if tp.left > 0:
	word_dict.update({ 'left': round(tp.left - tp.transform.matrix[Matrix.XINDEX], 3)})
	else:
	word_dict.update({ 'left': 0})
	word_dict.update({ 'top': round((tp.height-1.5)*-1, 3)})
	else:
	word_dict = { 'id': id, 'text': text, 'left': tp.left, 'top': tp.top, 'width': tp.width,\
	'height': tp.height, 'line': line_number, 'tp_id': tp_id, 'deleted': word.deleted }
	if tp.transform is not None:
	word_dict.update({ 'transform': tp.transform.toString() })
	if edited_text is not None:
	word_dict.update({'edited_text': edited_text})
	if earlier_version is not None:
	word_dict.update({'earlier_version': earlier_version.text })
	if overwrites_word is not None:
	word_dict.update({'overwrites_word': overwrites_word.text })
	if parent_id > -1:
	word_dict.update({'part_text': word.text })
	if len(word.deletion_paths) > 0:
	for dp_index, dp in enumerate(word.deletion_paths):
	if bool(word_dict.get('deletion_path')):
	word_dict = word_dict.copy()
	word_dict.update({'deletion_path': dp.d_attribute})
	words.append(word_dict)
	if len(word.deletion_paths_near_word) > 0:
	word_dict.update({'paths_near_word': word.deletion_paths_near_word })
	words.append(word_dict)
	else:
	words.append(word_dict)
	if faksimile_positions is not None:
	faksimile_dict = {}
	for fp in word.faksimile_positions:
	self._add_faksimile_to_list(id, line_number, fp, word.deleted, faksimile_positions, text, edited_text=edited_text,\
	earlier_version=earlier_version, overwrites_word=overwrites_word, parent_id=parent_id, word_text=word.text)
	for wp in word.word_parts:
	self._add_word_to_list(words, wp, text, text_field=text_field, edited_text=edited_text,\
	earlier_version=earlier_version, overwrites_word=overwrites_word, parent_id=word.id, faksimile_positions=faksimile_positions)


	def _add_faksimile_to_list(self, id, line_number, fp, deleted, faksimile_positions, text, edited_text=None, earlier_version=None, overwrites_word=None, parent_id=-1, word_text='') ->dict:
	"""Create and return a json dictionary.
	"""
	faksimile_dict = { 'id': id, 'text': text, 'left': fp.left, 'top': fp.top,\
	'width': fp.width, 'height': fp.height, 'line': line_number, 'fp_id': fp.id, 'deleted': deleted }
	if fp.transform is not None:
	faksimile_dict.update({ 'transform': fp.transform.toString() })
	if len(faksimile_dict) > 0:
	if edited_text is not None:
	faksimile_dict.update({'edited_text': edited_text})
	if earlier_version is not None:
	faksimile_dict.update({'earlier_version': earlier_version.text })
	if overwrites_word is not None:
	faksimile_dict.update({'overwrites_word': overwrites_word.text })
	if parent_id > -1:
	faksimile_dict.update({'part_text': word_text })
	faksimile_positions.append(faksimile_dict)

	def create_json_dict(self) ->dict:
	"""Create and return a json dictionary.
	"""
	words = []
	faksimile_positions = []
	text_field = None
	if self.page.svg_image is not None:
	if self.page.svg_image.text_field is None:
	text_field = self.page.svg_image.text_field = TranskriptionField(self.page.svg_image.file_name).convert_to_text_field()
	for word in self.page.words:
	self._add_word_to_list(words, word, word.text, text_field=text_field, faksimile_positions=faksimile_positions)
	lines = []
	faksimile_lines = []
	offset = 0 if text_field is None else text_field.ymin
	svg_image = self.add_object2dict(self.page.svg_image)
	if self.faksimile_page is not None:
	if self.page.faksimile_image is None:
	if self.faksimile_page.faksimile_image.text_field is None\
	and self.faksimile_page.text_field is not None:
	self.faksimile_page.faksimile_image.text_field = self.faksimile_page.text_field
	self.page.faksimile_image = self.faksimile_page.faksimile_image
	for fp in self.faksimile_page.word_positions:
	if fp.id not in [ f_dict.get('fp_id') for f_dict in faksimile_positions ]:
	self._add_faksimile_to_list(fp.id, -1, fp, False, faksimile_positions, fp.text)
	faksimile_image = self.add_object2dict(self.page.faksimile_image)
	if svg_image is not None:
	svg_image.update({ 'URL': self.page.svg_image.primaryURL })
	svg_image.update({ 'x': self.page.svg_image.text_field.left })
	svg_image.update({ 'y': self.page.svg_image.text_field.top })
	if faksimile_image is not None:
	if bool(faksimile_image.get('transform_string')):
	faksimile_image.update({ 'transform': faksimile_image.get('transform_string') })
	faksimile_image.update({ 'secondaryURL': LOCAL_SERVER + "faksimiles/" + self.page.faksimile_image.file_name })
	faksimile_image.update({ 'x': 0 })
	faksimile_image.update({ 'y': 0 })
	for line in self.page.lines:
	lines.append({ 'id': line.id, 'number': line.id, 'top': line.top + offset, 'bottom': line.bottom })
	faksimile_lines.append({ 'id': line.id, 'number': line.id, 'top': line.faksimile_inner_top, 'bottom': line.faksimile_inner_bottom })
	return { 'title': self.page.title, 'number': self.page.number, 'words': words, 'svg': svg_image, 'lines': lines,\
	'faksimile': faksimile_image, 'faksimile_positions': faksimile_positions, 'faksimile_lines': faksimile_lines }

	def convert(self, output_file=None, stage_version='', highlighted_words=None):
	"""Converts Page to JSON.
	"""
	if output_file is None:
	output_file = 'output.json'
	json_file = open(output_file, "w+")
	try:
	json.dump(self.create_json_dict(), json_file)
	except Exception:
	raise Exception('Error in json.dump')
	json_file.close()
	return 0

	def add_object2dict(self, object_instance):
	"""Add an object to json_dict and generate json data and interfaces.

	[:return:] json dict or object_instance
	"""
	json_dict = {}
	object_type = type(object_instance)
	if object_type.__module__ == 'builtins':
	if object_type != list:
	return object_instance
	else:
	items = []
	for item in object_instance:
	items.append(self.add_object2dict(item))
	if len(items) > 0:
	return items
	else:
	return { self.key: [] }
	semantic_dictionary = object_type.get_semantic_dictionary()
	for key, content_type in [ (key, content.get('class')) for key, content in semantic_dictionary['properties'].items()]:
	content = object_instance.__dict__.get(key)
	if content_type == list\
	and content is not None\
	and len(content) > 0\
	and type(content[0]).__module__ != 'builtins':
	content_list = []
	for content_item in content:
	content_list.append(self.add_object2dict(content_item))
	json_dict.update({key: content_list})
	elif content_type.__module__ == 'builtins':
	if content is not None:
	json_dict.update({key: content})
	else:
	if content is not None and type(content) == list:
	content_list = []
	for content_item in content:
	content_list.append(self.add_object2dict(content_item))
	json_dict.update({key: content_list})
	else:
	if content is not None:
	json_dict.update({key: self.add_object2dict(content)})
	return json_dict

	class oldJSONConverter(Converter):
	"""This class can be used to convert a 'svgWordPositions' xml file to a json file.
	"""
	PY2TS_DICT = { float: 'number', int: 'number', bool: 'boolean', str: 'string' }

	def __init__(self, page, non_testing=True, key=''):
	Converter.__init__(self, page, non_testing, False)
	self.key = key
	self.interface_output_dir = PathLibPath('ts_interfaces')
	if not self.interface_output_dir.is_dir():
	self.interface_output_dir.mkdir()
	elif len(list(self.interface_output_dir.glob('*.ts'))) > 0:
	for ts_file in self.interface_output_dir.glob('*.ts'):
	remove(ts_file)

	def convert(self, output_file=None, stage_version='', highlighted_words=None):
	"""Converts Page to JSON.
	"""
	if output_file is None:
	output_file = 'output.json'
	class_dict = {}
	if self.key != '':
	object_instance = self.page.__dict__.get(self.key)
	if object_instance is not None:
	json_dict = self.add_object2dict(object_instance, class_dict)
	if type(json_dict) == list:
	json_dict = { self.key : json_dict }
	else:
	print(f'Page initialized from {self.page.page_tree.docinfo.URL} does not have an object at "{self.key}"!')
	return 2
	else:
	json_dict = self.add_object2dict(self.page, class_dict)
	json_file = open(output_file, "w+")
	try:
	json.dump(json_dict, json_file)
	except Exception:
	raise Exception('Error in json.dump')
	json_file.close()
	self.create_imports(class_dict)
	return 0

	def add_object2dict(self, object_instance, class_dict):
	"""Add an object to json_dict and generate json data and interfaces.

	[:return:] json dict or object_instance
	"""
	json_dict = {}
	interface_list = []
	object_type = type(object_instance)
	if object_type.__module__ == 'builtins':
	if object_type != list:
	return object_instance
	else:
	items = []
	for item in object_instance:
	items.append(self.add_object2dict(item, class_dict))
	if len(items) > 0:
	return { self.key: items }
	else:
	return { self.key: 'null' }
	semantic_dictionary = object_type.get_semantic_dictionary()
	for key, content_type in [ (key, content.get('class')) for key, content in semantic_dictionary['properties'].items()]:
	content = object_instance.__dict__.get(key)
	if content_type == list\
	and content is not None\
	and len(content) > 0\
	and type(content[0]).__module__ != 'builtins':
	content_list = []
	for content_item in content:
	content_list.append(self.add_object2dict(content_item, class_dict))
	json_dict.update({key: content_list})
	interface_list.append(f'{key}: {type(content[0]).__name__}[];')
	elif content_type.__module__ == 'builtins':
	if content_type != list:
	ts_type = self.PY2TS_DICT[content_type]\
	if content_type in self.PY2TS_DICT.keys()\
	else 'string'
	interface_list.append(f'{key}: {ts_type};')
	json_dict.update({key: content})
	else:
	if content is not None and type(content) == list:
	interface_list.append(f'{key}: {content_type.__name__}[];')
	content_list = []
	for content_item in content:
	content_list.append(self.add_object2dict(content_item, class_dict))
	json_dict.update({key: content_list})
	else:
	interface_list.append(f'{key}: {content_type.__name__};')
	if content is not None:
	json_dict.update({key: self.add_object2dict(content, class_dict)})
	if object_type not in class_dict.keys():
	class_dict.update({object_type: self.create_interface(object_type.__name__, interface_list)})
	return json_dict

	def create_imports(self, class_dict):
	"""Create an ts interface from a list of key and content_types.

	[:return:] file_name of interface
	"""
	ts_file = PathLibPath('ts_imports.ts')
	file = open(ts_file, "w+")
	file.write(f'//import all interfaces from {self.interface_output_dir} ' + '\n')
	for interface_name, path_name in class_dict.items() :
	file.write('import {' + interface_name.__name__ + '} from \'./' + str(self.interface_output_dir.joinpath(path_name.stem)) + '\';\n')
	file.close()
	return ts_file

	def create_interface(self, class_name, interface_list) -> PathLibPath:
	"""Create an ts interface from a list of key and content_types.

	[:return:] file_name of interface
	"""
	ts_file = self.interface_output_dir.joinpath(PathLibPath(f'{class_name.lower()}.ts'))
	import_list = [ import_class_name for import_class_name in\
	[ import_class_name.split(': ')[1].replace(';','').replace('[]','') for import_class_name in interface_list ]\
	if import_class_name not in set(self.PY2TS_DICT.values()) ]
	file = open(ts_file, "w")
	for import_class_name in set(import_list):
	file.write('import {' + import_class_name + '} from \'./' + import_class_name.lower() + '\';\n')
	file.write(f'export interface {class_name} ' + '{\n')
	for interace_string in interface_list:
	file.write(f'\t' + interace_string + '\n')
	file.write('}')
	file.close()
	return ts_file

	class SVGConverter(Converter):
	"""This class can be used to convert a 'svgWordPositions' xml file to a svg file that combines text as path and text-as-text.
	"""
	BG_COLOR = 'yellow'
	OPACITY = '0.2'

	def __init__(self, page, non_testing=True, show_word_insertion_mark=False, bg_color=BG_COLOR, opacity=OPACITY):
	Converter.__init__(self, page, non_testing, show_word_insertion_mark)
	self.bg_color = bg_color
	self.opacity = opacity

	def convert(self, output_file=None, stage_version='', highlighted_words=None):
	"""Converts Page to SVG
	"""
	title = self.page.title if(self.page.title is not None) else 'Test Page'
	title = '{}, S. {}'.format(title, self.page.number) if (self.page.number is not None) else title
	svg_file = self.page.svg_file
	if svg_file is None and self.page.svg_image is not None:
	svg_file = self.page.svg_image.file_name
	elif svg_file is None:
	msg = f'ERROR: xml_source_file {self.page.docinfo.URL} does neither have a svg_file nor a svg_image!'
	raise Exception(msg)
	transkription_field = TranskriptionField(svg_file)
	if bool(transkription_field.get_svg_attributes('xmlns')):
	ET.register_namespace('', transkription_field.get_svg_attributes('xmlns'))
	if bool(transkription_field.get_svg_attributes('xmlns:xlink')):
	ET.register_namespace('xlink', transkription_field.get_svg_attributes('xmlns:xlink'))
	svg_tree = ET.parse(svg_file)
	transkription_node = ET.SubElement(svg_tree.getroot(), 'g', attrib={'id': 'Transkription'})
	colors = [ 'yellow', 'orange' ] if self.bg_color == self.BG_COLOR else [ self.bg_color ]
	if highlighted_words is not None:
	colors = ['yellow']
	else:
	highlighted_words = []
	color_index = 0
	for word in self.page.words:
	word_id = 'word_' + str(word.id)
	for transkription_position in self._get_transkription_positions(word.transkription_positions, stage_version=stage_version):
	transkription_position_id = word_id + '_' + str(transkription_position.id)
	color = colors[color_index] if word not in highlighted_words else self.bg_color
	rect_node = ET.SubElement(transkription_node, 'rect',\
	attrib={'id': transkription_position_id, 'x': str(transkription_position.left + transkription_field.xmin),\
	'y': str(transkription_position.top + transkription_field.ymin), 'width': str(transkription_position.width),\
	'height': str(transkription_position.height), 'fill': color, 'opacity': self.opacity})
	if transkription_position.transform is not None:
	matrix = transkription_position.transform.clone_transformation_matrix()
	matrix.matrix[Matrix.XINDEX] = round(transkription_position.transform.matrix[Matrix.XINDEX] + transkription_field.xmin, 3)
	matrix.matrix[Matrix.YINDEX] = round(transkription_position.transform.matrix[Matrix.YINDEX] + transkription_field.ymin, 3)
	rect_node.set('transform', matrix.toString())
	rect_node.set('x', str(round(transkription_position.left - transkription_position.transform.matrix[Matrix.XINDEX], 3)))
	rect_node.set('y', str(round((transkription_position.height-1.5)*-1, 3)))
	ET.SubElement(rect_node, 'title').text = word.text
	color_index = (color_index + 1) % len(colors)
	if output_file is not None:
	svg_tree.write(output_file)
	return 0

	class HTMLConverter(Converter):
	"""This class can be used to convert a 'svgWordPositions' xml file to a test HTML file.
	"""
	CSS = """ .highlight0 { background-color: yellow; opacity: 0.2; }
	.highlight1 { background-color: pink; opacity: 0.2; }
	.highlight2 { background-color: red; opacity: 0.2; }
	.foreign { background-color: blue; opacity: 0.4; }
	.overwritten { background-color: green; opacity: 0.4; }
	.word-insertion-mark { background-color: orange; opacity: 0.2; }
	.deleted { background-color: grey; opacity: 0.2; }
	"""
	def __init__(self, page, non_testing=True, show_word_insertion_mark=False):
	Converter.__init__(self, page, non_testing, show_word_insertion_mark)
	self.text_field = TextField()

	def convert(self, output_file=None, stage_version='', highlighted_words=None):
	"""Converts Page to HTML
	"""
	title = self.page.title if(self.page.title is not None) else 'Test Page'
	title = '{}, S. {}'.format(title, self.page.number) if (self.page.number is not None) else title
	if stage_version != '':
	title = title + ', Schreibstufe: ' + stage_version
	if self.page.svg_image is not None:
	width = self.page.svg_image.width
	height = self.page.svg_image.height
	svg_file = self.page.svg_image.file_name
	if self.page.svg_image.text_field is not None:
	self.text_field = self.page.svg_image.text_field
	print('Textfield found ->adjusting data')
	elif self.page.svg_file is not None:
	svg_file = self.page.svg_file
	transkription_field = TranskriptionField(svg_file)
	width = transkription_field.getWidth()
	height = transkription_field.getHeight()
	style_content = ' position: relative; width: {}px; height: {}px; background-image: url("{}"); background-size: {}px {}px '\
	.format(width, height, path.abspath(svg_file), width, height)
	style = E.STYLE('#transkription {' + style_content + '}', HTMLConverter.CSS)
	head = E.HEAD(E.TITLE(title),E.META(charset='UTF-8'), style)
	transkription = E.DIV(id="transkription")
	counter = 0
	for word in self.page.words:
	highlight_class = 'highlight' + str(counter)\
	if not word.deleted else 'deleted'
	if highlighted_words is not None\
	and word in highlighted_words:
	highlight_class = 'highlight2'
	earlier_text = '' if word.earlier_version is None else word.earlier_version.text
	if earlier_text == '' and len(word.word_parts) > 0:
	earlier_versions = [ word for word in word.word_parts if word.earlier_version is not None ]
	earlier_text = earlier_versions[0].text if len(earlier_versions) > 0 else ''
	if earlier_text != '':
	word_title = 'id: {}/line: {}\n0: {}\n1: {}'.format(str(word.id), str(word.line_number), earlier_text, word.text)
	else:
	word_title = 'id: {}/line: {}\n{}'.format(str(word.id), str(word.line_number), word.text)
	if word.edited_text is not None:
	word_title += f'\n>{word.edited_text}'
	for transkription_position in self._get_transkription_positions(word.transkription_positions, stage_version=stage_version):
	self._append2transkription(transkription, highlight_class, word_title, transkription_position)
	if word.overwrites_word is not None:
	overwritten_title = f'{word.text} overwrites {word.overwrites_word.text}'
	for overwritten_transkription_position in word.overwrites_word.transkription_positions:
	self._append2transkription(transkription, 'overwritten', overwritten_title, overwritten_transkription_position)
	for part_word in word.word_parts:
	highlight_class = 'highlight' + str(counter)\
	if not part_word.deleted else 'deleted'
	for part_transkription_position in self._get_transkription_positions(part_word.transkription_positions, stage_version=stage_version):
	self._append2transkription(transkription, highlight_class, word_title, part_transkription_position)
	if part_word.overwrites_word is not None:
	overwritten_title = f'{word.text} overwrites {part_word.overwrites_word.text}'
	for overwritten_transkription_position in part_word.overwrites_word.transkription_positions:
	self._append2transkription(transkription, 'overwritten', overwritten_title, overwritten_transkription_position)
	counter = (counter + 1) % 2
	word_insertion_mark_class = 'word-insertion-mark'
	counter = 0
	for mark_foreign_hands in self.page.mark_foreign_hands:
	highlight_class = 'foreign'
	title = 'id: {}/line: {}\n{} <i>{}</i>'.format(str(mark_foreign_hands.id), str(mark_foreign_hands.line_number),\
	mark_foreign_hands.foreign_hands_text, mark_foreign_hands.pen)
	for transkription_position in mark_foreign_hands.transkription_positions:
	self._append2transkription(transkription, highlight_class, title, transkription_position)
	if self.show_word_insertion_mark:
	for word_insertion_mark in self.page.word_insertion_marks:
	wim_title = 'id: {}/line: {}\nword insertion mark'.format(str(word_insertion_mark.id), str(word_insertion_mark.line_number))
	style_content = 'position:absolute; top:{0}px; left:{1}px; width:{2}px; height:{3}px;'.format(\
	word_insertion_mark.top, word_insertion_mark.left, word_insertion_mark.width, word_insertion_mark.height)
	link = E.A(' ', E.CLASS(word_insertion_mark_class), title=wim_title, style=style_content)
	transkription.append(link)
	html = E.HTML(head,E.BODY(transkription))
	bool(self.non_testing) and open_in_browser(html)
	if output_file is not None:
	with open(output_file, 'wb') as f:
	f.write(lxml.html.tostring(html, pretty_print=True, include_meta_content_type=True, encoding='utf-8'))
	f.closed
	return 0

	def _append2transkription(self, transkription, highlight_class, title, transkription_position):
	"""Append content to transkription-div.
	"""
	style_content = 'position:absolute; top:{0}px; left:{1}px; width:{2}px; height:{3}px;'.format(\
	transkription_position.top - self.text_field.top, transkription_position.left - self.text_field.left, transkription_position.width, transkription_position.height)
	if transkription_position.transform is not None:
	style_content = style_content + ' transform: {}; '.format(transkription_position.transform.toCSSTransformString())
	transform_origin_x = (transkription_position.left-round(transkription_position.transform.getX(), 1))*-1\
	if (transkription_position.left-round(transkription_position.transform.getX(), 1))*-1 < 0 else 0
	style_content = style_content + ' transform-origin: {}px {}px; '.format(transform_origin_x, transkription_position.height)
	link = E.A(' ', E.CLASS(highlight_class), title=title, style=style_content)
	transkription.append(link)

	def create_pdf_with_highlighted_words(xml_source_file=None, page=None, highlighted_words=None, pdf_file_name='output.pdf', bg_color=SVGConverter.BG_COLOR):
	"""Creates a pdf file highlighting some words.
	"""
	if not pdf_file_name.endswith('pdf'):
	pdf_file_name = pdf_file_name + '.pdf'
	tmp_svg_file = pdf_file_name.replace('.pdf', '.svg')
	create_svg_with_highlighted_words(xml_source_file=xml_source_file, page=page, highlighted_words=highlighted_words,\
	svg_file_name=tmp_svg_file, bg_color=bg_color)
	if isfile(tmp_svg_file):
	cairosvg.svg2pdf(url=tmp_svg_file, write_to=pdf_file_name)
	remove(tmp_svg_file)

	def create_svg_with_highlighted_words(xml_source_file=None, page=None, highlighted_words=None, svg_file_name='output.svg', bg_color=SVGConverter.BG_COLOR):
	"""Creates a svg file highlighting some words.
	"""
	if page is None and xml_source_file is not None:
	page = Page(xml_source_file)
	converter = SVGConverter(page, bg_color=bg_color)
	if not svg_file_name.endswith('svg'):
	svg_file_name = svg_file_name + '.svg'
	converter.convert(output_file=svg_file_name, highlighted_words=highlighted_words)

	def usage():
	"""prints information on how to use the script
	"""
	print(main.__doc__)

	def main(argv):
	"""This program can be used to convert the word positions to HTML, SVG or TEXT for testing purposes.

	svgscripts/convert_wordPositions.py OPTIONS <file>

	OPTIONS:

	-h\|--help: show help
	-H\|--HTML [default] convert to HTML test file
	-k\|--key=key option for json converter:
	only convert object == page.__dict__[key]
	-o\|--output=outputFile save output to file outputFile
	-P\|--PDF convert to PDF test file
	-S\|--SVG convert to SVG test file
	-s\|--svg=svgFile: svg web file
	-T\|--TEXT convert to TEXT output
	-t\|--text=text highlight word
	-w\|--word-insertion-mark show word insertion mark on HTML
	-v\|--version=VERSION show words that belong to writing process VERSION: { 0, 1, 2, 0-1, 0+, etc. }
	-x\|--testing execute in test mode, do not write to file or open browser

	:return: exit code (int)
	"""
	convert_to_type = None
	key = ''
	non_testing = True
	output_file = None
	page = None
	show_word_insertion_mark = False
	stage_version = ''
	svg_file = None
	text = None

	try:
	opts, args = getopt.getopt(argv, "hk:t:HPSTws:o:v:x", ["help", "key=", "text=", "HTML", "PDF", "SVG", "TEXT", "word-insertion-mark", "svg=", "output=", "version=", "testing"])
	except getopt.GetoptError:
	usage()
	return 2

	for opt, arg in opts:
	if opt in ('-h', '--help') or not args:
	usage()
	return 0
	elif opt in ('-v', '--version'):
	if re.match(r'^(\d\|\d\+\|\d\-\d)$', arg):
	stage_version = arg
	else:
	raise ValueError('OPTION -v\|--version=VERSION does not work with "{}" as value for VERSION!'.format(arg))
	elif opt in ('-w', '--word-insertion-mark'):
	show_word_insertion_mark = True
	elif opt in ('-P', '--PDF'):
	convert_to_type = 'PDF'
	elif opt in ('-S', '--SVG'):
	convert_to_type = 'SVG'
	elif opt in ('-T', '--TEXT'):
	convert_to_type = 'TEXT'
	elif opt in ('-H', '--HTML'):
	convert_to_type = 'HTML'
	elif opt in ('-x', '--testing'):
	non_testing = False
	elif opt in ('-s', '--svg'):
	svg_file = arg
	elif opt in ('-o', '--output'):
	output_file = arg
	elif opt in ('-k', '--key'):
	key = arg
	elif opt in ('-t', '--text'):
	text = arg
	print(arg)

	if len(args) < 1:
	usage()
	return 2
	if convert_to_type is None:
	if output_file is not None and len(re.split(r'\.', output_file)) > 1:
	output_file_part_list = re.split(r'\.', output_file)
	convert_to_type = output_file_part_list[len(output_file_part_list)-1].upper()
	else:
	convert_to_type = 'HTML'
	exit_code = 0
	for word_position_file in args:
	if not isfile(word_position_file):
	print("'{}' does not exist!".format(word_position_file))
	return 2
	if convert_to_type == 'PDF':
	if output_file is None:
	output_file = 'output.pdf'
	highlighted_words = None
	if text is not None:
	page = Page(word_position_file)
	highlighted_words = [ word for word in page.words if word.text == text ]
	create_pdf_with_highlighted_words(word_position_file, pdf_file_name=output_file, highlighted_words=highlighted_words)
	else:
	if svg_file is not None:
	if isfile(svg_file):
	page = PageCreator(word_position_file, svg_file=svg_file)
	else:
	print("'{}' does not exist!".format(word_position_file))
	return 2
	else:
	page = Page(word_position_file)
	if page.svg_file is None:
	print('Please specify a svg file!')
	usage()
	return 2
	highlighted_words = None
	if text is not None:
	highlighted_words = [ word for word in page.words if word.text == text ]
	print([ (word.id, word.text) for word in highlighted_words ])
	converter = Converter.CREATE_CONVERTER(page, non_testing=non_testing, converter_type=convert_to_type, show_word_insertion_mark=show_word_insertion_mark, key=key)
	exit_code = converter.convert(output_file=output_file, stage_version=stage_version, highlighted_words=highlighted_words)
	return exit_code

	if __name__ == "__main__":
	sys.exit(main(sys.argv[1:]))

convert_wordPositions.pyNo OneTemporaryActions

File Metadata

convert_wordPositions.pyView Options

Event Timeline

convert_wordPositions.py
No OneTemporary
Actions

convert_wordPositions.py
View Options