fix_old_data.py
No OneTemporary
Actions

Subscribers

None

File Metadata

Created: Sat, Apr 5, 05:37

fix_old_data.py
View Options

	#!/usr/bin/env python3
	# -- coding: utf-8 --

	""" This program can be used to fix old data.
	"""
	# Copyright (C) University of Basel 2019 {{{1
	#
	# This program is free software: you can redistribute it and/or modify
	# it under the terms of the GNU General Public License as published by
	# the Free Software Foundation, either version 3 of the License, or
	# (at your option) any later version.
	#
	# This program is distributed in the hope that it will be useful,
	# but WITHOUT ANY WARRANTY; without even the implied warranty of
	# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
	# GNU General Public License for more details.
	#
	# You should have received a copy of the GNU General Public License
	# along with this program. If not, see <https://www.gnu.org/licenses/> 1}}}

	from colorama import Fore, Style
	from deprecated import deprecated
	from functools import cmp_to_key
	import getopt
	import inspect
	import lxml.etree as ET
	import re
	import shutil
	import string
	from svgpathtools import svg2paths2, svg_to_paths
	from svgpathtools.path import Path as SVGPath
	from svgpathtools.path import Line
	import sys
	import tempfile
	from operator import attrgetter
	import os
	from os import listdir, sep, path, setpgrp, devnull
	from os.path import exists, isfile, isdir, dirname, basename
	from progress.bar import Bar
	import warnings


	sys.path.append('svgscripts')
	from convert_wordPositions import HTMLConverter
	from datatypes.box import Box
	from datatypes.faksimile import FaksimilePage
	from datatypes.imprint import Imprint
	from datatypes.archival_manuscript import ArchivalManuscriptUnity
	from datatypes.mark_foreign_hands import MarkForeignHands
	from datatypes.matrix import Matrix
	from datatypes.page import Page, STATUS_MERGED_OK, STATUS_POSTMERGED_OK, FILE_TYPE_SVG_WORD_POSITION, FILE_TYPE_XML_MANUSCRIPT
	from datatypes.positional_word_part import PositionalWordPart
	from datatypes.path import Path
	from datatypes.word import Word
	from datatypes.text_connection_mark import TextConnectionMark
	from datatypes.transkriptionField import TranskriptionField
	from datatypes.transkription_position import TranskriptionPosition
	from datatypes.word import Word, update_transkription_position_ids
	from join_faksimileAndTranskription import sort_words
	from util import back_up, back_up_svg_file, copy_faksimile_svg_file, reset_tp_with_matrix
	from process_files import update_svgposfile_status, get_extended_text_field
	from process_footnotes import save_imprints
	from process_words_post_merging import update_faksimile_line_positions, MERGED_DIR

	sys.path.append('shared_util')
	from myxmlwriter import write_pretty, xml_has_type, FILE_TYPE_SVG_WORD_POSITION, FILE_TYPE_XML_MANUSCRIPT
	from main_util import create_function_dictionary, get_manuscript_files


	__author__ = "Christian Steiner"
	__maintainer__ = __author__
	__copyright__ = 'University of Basel'
	__email__ = "christian.steiner@unibas.ch"
	__status__ = "Development"
	__license__ = "GPL v3"
	__version__ = "0.0.1"

	UNITTESTING = False
	MAX_SVG_XY_THRESHOLD = 10

	#TODO: fix all svg graphical files: change xlink:href to href!!!!

	def convert_old_matrix(tp, xmin, ymin) ->(Matrix, float, float):
	"""Return new matrix, x and y for old transkription_position.
	"""
	matrix = tp.transform.clone_transformation_matrix()
	matrix.matrix[Matrix.XINDEX] = round(tp.transform.matrix[Matrix.XINDEX] + xmin, 3)
	matrix.matrix[Matrix.YINDEX] = round(tp.transform.matrix[Matrix.YINDEX] + ymin, 3)
	x = round(tp.left - tp.transform.matrix[Matrix.XINDEX], 3)\
	if tp.left > 0\
	else 0
	y = round((tp.height-1.5)*-1, 3)
	return matrix, x, y

	def extend_text_field(page, redo=False) ->bool:
	"""Extend text_field of svg_image in page.
	"""
	if not redo and page_already_changed(page):
	return False
	text_field = get_extended_text_field(page.svg_image.file_name, multipage_index=page.multipage_index)
	page.svg_image.width = text_field.width
	page.svg_image.height = text_field.height
	page.svg_image.text_field = text_field
	page.svg_image.attach_object_to_tree(page.page_tree)
	tf = TranskriptionField(page.svg_image.file_name, multipage_index=page.multipage_index)
	tf.xmin = text_field.left
	tf.ymin = text_field.top
	tf.width = text_field.width
	tf.height = text_field.height
	tf.shrink_svg_to_transkription_field(redo=True)
	if not UNITTESTING:
	save_page(page)
	return True

	def save_page(page, attach_first=False, backup=False, script_name=None):
	"""Write page to xml file
	"""
	if backup:
	back_up(page, page.xml_file)
	if attach_first:
	page.update_and_attach_words2tree()
	if script_name is None:
	script_name = f'{__file__}:{inspect.currentframe().f_back.f_code.co_name}'
	write_pretty(xml_element_tree=page.page_tree, file_name=page.page_tree.docinfo.URL,\
	script_name=script_name, file_type=FILE_TYPE_SVG_WORD_POSITION)

	def page_already_changed(page) -> bool:
	"""Return whether page has alreadybeen changed by function
	"""
	return len(\
	page.page_tree.xpath(f'//metadata/modifiedBy[@script="{__file__}:{inspect.currentframe().f_back.f_code.co_name}"]')\
	) > 0

	def fix_faksimile_line_position(page, redo=False) -> bool:
	"""Create a faksimile line position.
	"""
	if not redo and page_already_changed(page):
	return False;
	update_faksimile_line_positions(page)
	if not UNITTESTING:
	save_page(page)
	return True

	def check_faksimile_positions(page, redo=False) -> bool:
	"""Check faksimile line position.
	"""
	if len(page.page_tree.xpath('//data-source/@file')) > 0:
	svg_file = page.page_tree.xpath('//data-source/@file')[0]
	svg_tree = ET.parse(svg_file)
	positions_are_equal_counter = 0
	page_changed = False
	for faksimile_page in FaksimilePage.GET_FAKSIMILEPAGES(svg_tree):
	if page.title == faksimile_page.title\
	and page.number == faksimile_page.page_number:
	#print([fp.id for fp in faksimile_page.word_positions ])
	for word in page.words:
	for fp in word.faksimile_positions:
	rect_fps = [ rfp for rfp in faksimile_page.word_positions if rfp.id == fp.id ]
	if len(rect_fps) > 0:
	rfp = rect_fps[0]
	if fp.left != rfp.left or fp.top != rfp.top:
	#print(f'{fp.id}: {fp.left}/{rfp.left} {fp.top}/{rfp.top}')
	fp.left = rfp.left
	fp.top = rfp.top
	fp.bottom = fp.top + rfp.height
	word.attach_word_to_tree(page.page_tree)
	page_changed = True
	else:
	positions_are_equal_counter += 1
	print(f'{positions_are_equal_counter}/{len(page.words)} are equal')
	if page_changed and not UNITTESTING:
	save_page(page)
	return page_changed

	def fix_faksimile_positions(page, redo=False) -> bool:
	"""Set faksimile positions to absolute values.

	[:return:] fixed
	"""
	if not redo and len(page.page_tree.xpath(f'//metadata/modifiedBy[@script="{__file__}"]')) > 0:
	return False
	x_min = page.text_field.xmin
	y_min = page.text_field.ymin
	for word in page.words:
	for fp in word.faksimile_positions:
	fp.left = fp.left + x_min
	fp.top = fp.top + y_min
	fp.bottom = fp.bottom + y_min
	word.attach_word_to_tree(page.page_tree)
	if not UNITTESTING:
	print(f'writing to {page.page_tree.docinfo.URL}')
	write_pretty(xml_element_tree=page.page_tree, file_name=page.page_tree.docinfo.URL,\
	script_name=__file__, file_type=FILE_TYPE_SVG_WORD_POSITION)
	return True

	def _fix_tp_of_word(page, word, text_field):
	"""Fix transkription positions ->set relative to 0,0 instead of text_field.left,text_field.top
	"""
	for tp in word.transkription_positions:
	tp.left += text_field.left
	tp.top += text_field.top
	reset_tp_with_matrix(word.transkription_positions)
	if type(word) == Word:
	words_in_word = word.word_parts + [ item for item in word.__dict__.items() if type(item) == Word ]
	for wp in words_in_word:
	_fix_tp_of_word(page, wp, text_field)

	def fix_tp_with_matrix(page, redo=False) -> bool:
	"""Fix transkription positions with rotation matrix ->set left to 0 and top to -5.

	[:return:] fixed
	"""
	xmin = 0 if page.svg_image is None or page.svg_image.text_field is None else page.svg_image.text_field.left
	ymin = 0 if page.svg_image is None or page.svg_image.text_field is None else page.svg_image.text_field.top
	for word in page.words:
	reset_tp_with_matrix(word.transkription_positions, tr_xmin=xmin, tr_ymin=ymin)
	for wp in word.word_parts:
	reset_tp_with_matrix(wp.transkription_positions, tr_xmin=xmin, tr_ymin=ymin)
	if not UNITTESTING:
	print(f'writing to {page.page_tree.docinfo.URL}')
	save_page(page, attach_first=True)
	return True

	def _fix_old_transkription_positions(page, redo=False) -> bool:
	"""Fix transkription positions ->set relative to 0,0 instead of text_field.left,text_field.top
	[:return:] fixed
	"""
	if page.svg_image is not None\
	and page.svg_image.text_field is None:
	if page.svg_image is None:
	if page.svg_file is not None:
	transkription_field = TranskriptionField(page.svg_file)
	width = round(tf.documentWidth, 3)
	height = round(tf.documentHeight, 3)
	page.svg_image = SVGImage(file_name=svg_file, width=width,\
	height=height, text_field=transkription_field.convert_to_text_field())
	page.svg_image.attach_object_to_tree(page.page_tree)
	else:
	raise Exception(f'ERROR page {page.page_tree.docinfo.URL} does not have a svg_file!')
	elif page.svg_image.text_field is None:
	page.svg_image.text_field = TranskriptionField(page.svg_image.file_name).convert_to_text_field()
	page.svg_image.attach_object_to_tree(page.page_tree)
	for line_number in page.line_numbers:
	line_number.top += page.svg_image.text_field.top
	line_number.bottom += page.svg_image.text_field.top
	line_number.attach_object_to_tree(page.page_tree)
	for word in page.words:
	_fix_tp_of_word(page, word, page.svg_image.text_field)
	for mark in page.mark_foreign_hands:
	_fix_tp_of_word(page, mark, page.svg_image.text_field)
	for tcm in page.text_connection_marks:
	_fix_tp_of_word(page, tcm, page.svg_image.text_field)
	if not UNITTESTING:
	print(f'writing to {page.page_tree.docinfo.URL}')
	save_page(page, attach_first=True)
	return True
	return False

	def _fix_old_pwps(page, old_tps):
	"""Adjust positional_word_parts to corrected transkription_positions.
	"""
	for tp in old_tps:
	for pwp in tp.xpath(f'./{PositionalWordPart.XML_TAG}'):
	left = float(pwp.get('left'))
	top = float(pwp.get('top'))
	bottom = float(pwp.get('bottom'))
	pwp.set('left', str(left + page.svg_image.text_field.left))
	pwp.set('top', str(top + page.svg_image.text_field.top))
	pwp.set('bottom', str(bottom + page.svg_image.text_field.top))

	def _fix_quotation_mark_tps(page, old_tps):
	"""Fix the height of transkription_positions of words with quotation marks.
	"""
	for tp in old_tps:
	heighest_pwp = sorted(tp.xpath(f'./{PositionalWordPart.XML_TAG}'), key=lambda pwp: float(pwp.get('height')), reverse=True)[0]
	toppest_pwp = sorted(tp.xpath(f'./{PositionalWordPart.XML_TAG}'), key=lambda pwp: float(pwp.get('top')))[0]
	new_height = float(tp.get('height')) + abs(float(heighest_pwp.get('top'))-float(toppest_pwp.get('top')))
	tp.set('height', str(new_height))

	def fix_transkription_positions(page, redo=False) -> bool:
	"""Fix transkription positions ->set relative to 0,0 instead of text_field.left,text_field.top
	[:return:] fixed
	"""
	THRESHOLD = 10
	if page.svg_image is not None\
	and page.svg_image.text_field is None:
	if not _fix_old_transkription_positions(page):
	return False
	_fix_old_pwps(page, [ pwp.getparent() for pwp in page.page_tree.xpath(f'//{PositionalWordPart.XML_TAG}[@id="0"]')\
	if abs(float(pwp.get('left')) - float(pwp.getparent().get('left'))) > THRESHOLD ])
	_fix_quotation_mark_tps(page, [ tp for tp in page.page_tree.xpath(f'//{TranskriptionPosition.XML_TAG}')\
	if len(tp.xpath(f'./{PositionalWordPart.XML_TAG}')) > 0\
	and sorted(tp.xpath(f'./{PositionalWordPart.XML_TAG}'), key=lambda pwp: float(pwp.get('height')), reverse=True)[0]\
	!= sorted(tp.xpath(f'./{PositionalWordPart.XML_TAG}'), key=lambda pwp: float(pwp.get('top')))[0] ])
	if not UNITTESTING:
	print(f'writing to {page.page_tree.docinfo.URL}')
	save_page(page)
	return True

	def fix_styles(page, redo=False):
	"""Remove unused styles from tree.
	"""
	if len(page.page_tree.xpath('//style')) > 1:
	for node in page.page_tree.xpath('//style')[1:]: node.getparent().remove(node)
	if not UNITTESTING:
	print(f'writing to {page.page_tree.docinfo.URL}')
	save_page(page)
	return True

	def fix_imprints(page, redo=False):
	"""Remove unused styles from tree.
	"""
	if len(page.page_tree.xpath('//' + Imprint.XML_TAG)) == 0:
	save_imprints(page)
	return True

	def merge_transkription_positions(page, redo=False) -> bool:
	"""Fix transkription positions of merged words

	[:return:] fixed
	"""
	if not isdir(dirname(page.page_tree.docinfo.URL) + sep + MERGED_DIR)\
	or not isfile(dirname(page.page_tree.docinfo.URL) + sep + MERGED_DIR + sep + basename(page.page_tree.docinfo.URL)):
	return False
	merged_page = Page(dirname(page.page_tree.docinfo.URL) + sep + MERGED_DIR + sep + basename(page.page_tree.docinfo.URL))
	sync_dictionary = sync_words_linewise(merged_page.words, page.words, merged_page.line_numbers)
	words = []
	for source_word in merged_page.words:
	words.append(source_word)
	if bool(sync_dictionary.get(source_word)):
	_sync_transkriptions_with_words(source_word, sync_dictionary)
	if source_word.text != ''.join([ t.get_text() for t in source_word.transkription_positions ]):
	text = ''.join([ t.get_text() for t in source_word.transkription_positions ])
	print(f'{source_word.line_number}: {source_word.text} has transkription_positions with text "{text}".')
	response = input('Change? [Y/n]>')
	if not response.startswith('n'):
	new_sync_dictionary = sync_words_linewise(merged_page.words, page.words,\
	[ line for line in merged_page.line_numbers if line.id == source_word.line_number ], force_sync_on_word=source_word)
	if bool(new_sync_dictionary.get(source_word)):
	_sync_transkriptions_with_words(source_word, new_sync_dictionary)
	else:
	raise Exception(f'Could not find sourc_word {source_word.text} in {new_sync_dictionary}!')
	page.words = words
	page.update_and_attach_words2tree()
	if not UNITTESTING:
	print(f'writing to {page.page_tree.docinfo.URL}')
	save_page(page)
	return True

	def fix_graphical_svg_file(page, redo=False) -> bool:
	"""Fix glyphs of word for which there is a /changed-word in page.page_tree
	"""
	svg_tree = ET.parse(page.svg_file)
	transkription_field = TranskriptionField(page.source)
	namespaces = { k if k is not None else 'ns': v for k, v in svg_tree.getroot().nsmap.items() }
	back_up_svg_file(svg_tree, namespaces=namespaces)
	tr_xmin = transkription_field.xmin if (page.svg_image is None or page.svg_image.text_field is None) else 0
	tr_ymin = transkription_field.ymin if (page.svg_image is None or page.svg_image.text_field is None) else 0
	for deleted_word_node in page.page_tree.xpath('//deleted-word'):
	deleted_word = Word.create_cls(deleted_word_node)
	_run_function_on_nodes_for_word(svg_tree, namespaces, deleted_word, tr_xmin, tr_ymin, _set_node_attribute_to, 'visibility', 'hidden')
	for changed_word_node in page.page_tree.xpath('//changed-word'):
	changed_word = Word.create_cls(changed_word_node)
	try:
	word = [ word for word in page.words if word.id == changed_word.id and word.text == changed_word.text ][0]
	left_difference = word.transkription_positions[0].left - changed_word.transkription_positions[0].left
	_run_function_on_nodes_for_word(svg_tree, namespaces, word, tr_xmin, tr_ymin, _add_value2attribute, 'x', left_difference)
	except IndexError:
	warnings.warn(f'There is no word for changed_word {changed_word.id}: "{changed_word.text}" in {page.page_tree.docinfo.URL}!')
	copy_faksimile_svg_file(target_file=page.svg_file, faksimile_tree=svg_tree, namespaces=namespaces)

	def _add_value2attribute(node, attribute, value):
	"""Add left_difference to x of node.
	"""
	node.set(attribute, str(float(node.get(attribute)) + value))
	node.set('changed', 'true')

	def _get_nodes_with_symbol_id(svg_tree, namespaces, symbol_id, svg_x, svg_y, threshold=0.1) -> list:
	"""Return nodes with symbol_id n x = svg_x and y = svg_y.
	"""
	nodes = [ node for node in svg_tree.xpath(\
	f'//ns:use[@xlink:href="#{symbol_id}" and @x > {svg_x-threshold} and @x < {svg_x+threshold} and @y > {svg_y-threshold} and @y < {svg_y+threshold} ]',\
	namespaces=namespaces) if not bool(node.get('changed')) ]
	if len(nodes) == 0 and threshold < MAX_SVG_XY_THRESHOLD:
	return _get_nodes_with_symbol_id(svg_tree, namespaces, symbol_id, svg_x, svg_y, threshold=threshold+1)
	return nodes

	def _run_function_on_nodes_for_word(svg_tree, namespaces, word, tr_xmin, tr_ymin, function_on_node, attribute, value):
	"""Run function on nodes for words.
	"""
	for tp in word.transkription_positions:
	for pwp in tp.positional_word_parts:
	symbol_id = pwp.symbol_id
	svg_x = pwp.left + tr_xmin
	svg_y = pwp.bottom + tr_ymin
	nodes = _get_nodes_with_symbol_id(svg_tree, namespaces, symbol_id, svg_x, svg_y)
	if len(nodes) > 0:
	node = nodes[0]
	function_on_node(node, attribute, value)

	def _set_node_attribute_to(node, attribute, value):
	"""Set attribute of node to value.
	"""
	node.set(attribute, str(value))
	node.set('changed', 'true')

	def sync_words_linewise(source_words, target_words, lines, force_sync_on_word=None) -> dict:
	"""Sync words an create a dictionary with source_words as keys, refering to a list of corresponding words.
	"""
	result_dict = {}
	for word in target_words + source_words: word.processed = False
	for line in lines:
	source_words_on_line = sorted([ word for word in source_words if word.line_number == line.id ], key=lambda word: word.transkription_positions[0].left)
	target_words_on_line = sorted([ word for word in target_words if word.line_number == line.id ], key=lambda word: word.transkription_positions[0].left)
	if len(target_words_on_line) == len(source_words_on_line):
	_sync_same_length(result_dict, source_words_on_line, target_words_on_line, force_sync_on_word=force_sync_on_word)
	elif len(source_words_on_line) < len(target_words_on_line):
	_sync_more_target_words(result_dict, source_words_on_line, target_words_on_line, force_sync_on_word=force_sync_on_word)
	else:
	print('okey dokey')
	return result_dict

	def _force_sync_on_word(force_sync_on_word, target_words_on_line, result_dict):
	"""Force sync on word.
	"""
	unprocessed_target_words = [t_word for t_word in target_words_on_line if not t_word.processed]
	if len(unprocessed_target_words) > 0:
	print([ (i, t_word.text) for i, t_word in enumerate(unprocessed_target_words)])
	response = input(f'Please specify indices of words to sync {force_sync_on_word.text} with: [default:0-{len(unprocessed_target_words)-1}]>')
	indices = [ i for i in range(0, len(unprocessed_target_words)) ]
	if re.match(r'\d+-\d+', response):
	index_strings = response.split('-')
	indices = [ i for i in range(int(index_strings[0]), int(index_strings[1])+1) ]
	elif response != '':
	indices = [ int(i) for i in response.split(' ') ]
	target_words = []
	for i in indices: target_words.append(unprocessed_target_words[i])
	result_dict.update({ force_sync_on_word: target_words })
	else:
	raise Exception(f'There are no unprocessed target_words for {force_sync_on_word.text} on line {force_sync_on_word.line_number}!')

	def _sync_transkriptions_with_words(word, sync_dictionary):
	"""Sync transkription_positions of word with syncronized words.
	"""
	word.transkription_positions = []
	for target_word in sync_dictionary[word]:
	word.transkription_positions += target_word.transkription_positions

	def _sync_more_target_words(result_dict, source_words_on_line, target_words_on_line, force_sync_on_word=None):
	"""Sync if there are more target words.
	"""
	current_source_word = None
	for target_word in target_words_on_line:
	if current_source_word is not None\
	and current_source_word.text.startswith(''.join([ w.text for w in result_dict[current_source_word]]) + target_word.text):
	result_dict[current_source_word].append(target_word)
	target_word.processed = True
	if current_source_word.text == ''.join([ w.text for w in result_dict[current_source_word]]):
	current_source_word = None
	elif len([ s_word for s_word in source_words_on_line if not s_word.processed and s_word.text == target_word.text ]) > 0:
	source_word = [ s_word for s_word in source_words_on_line if not s_word.processed and s_word.text == target_word.text ][0]
	target_word.processed = True
	source_word.processed = True
	result_dict.update({ source_word: [ target_word ] })
	elif len([ s_word for s_word in source_words_on_line if not s_word.processed and s_word.text.startswith(target_word.text) ]) > 0:
	current_source_word = [ s_word for s_word in source_words_on_line if not s_word.processed and s_word.text.startswith(target_word.text) ][0]
	current_source_word.processed = True
	target_word.processed = True
	result_dict.update({ current_source_word: [ target_word ] })
	else:
	msg = f'On line {target_word.line_number}: target_word "{target_word.text}" does not have a sibling in {[ s.text for s in source_words_on_line if not s.processed ]}'
	warnings.warn(msg)
	if force_sync_on_word is not None:
	_force_sync_on_word(force_sync_on_word, target_words_on_line, result_dict)

	def _sync_same_length(result_dict, source_words_on_line, target_words_on_line, force_sync_on_word=None):
	"""Sync same length
	"""
	for i, word in enumerate(source_words_on_line):
	if word.text == target_words_on_line[i].text:
	word.processed = True
	target_words_on_line[i].processed = True
	result_dict.update({ word: [ target_words_on_line[i] ] })
	elif len([ t_word for t_word in target_words_on_line if not t_word.processed and t_word.text == word.text ]) > 0:
	target_word = [ t_word for t_word in target_words_on_line if not t_word.processed and t_word.text == word.text ][0]
	word.processed = True
	target_word.processed = True
	result_dict.update({ word: [ target_word ] })
	else:
	msg = f'On line {word.line_number}: source_word "{word.text}" does not have a sibling in {[ s.text for s in target_words_on_line]}'
	warnings.warn(msg)
	if force_sync_on_word is not None:
	_force_sync_on_word(force_sync_on_word, target_words_on_line, result_dict)

	def usage():
	"""prints information on how to use the script
	"""
	print(main.__doc__)

	def main(argv):
	"""This program can be used to fix old data.

	svgscripts/fix_old_data.py [OPTIONS] <xmlManuscriptFile\|svg_pos_file>

	<xmlManuscriptFile> a xml file about a manuscript, containing information about its pages.
	<svg_pos_file> a xml file about a page, containing information about svg word positions.

	OPTIONS:
	-h\|--help show help
	-c\|--check-faksimile-positions check whether faksimile positions have been updated
	-e\|--update-extended-textfield update extended textfield to svg_image
	-i\|--fix-imprints add imprints to page
	-l\|--faksimile-line-position create faksimile line positions
	-p\|--faksimile-positions fix old faksimile positions
	-r\|--redo rerun
	-s\|--fix-graphical-svg fix use position of glyphs for words changed by 'changed-word' and 'deleted-word' in xml file.
	-S\|--fix-styles fix use position of glyphs for words changed by 'changed-word' and 'deleted-word' in xml file.
	-t\|--transkription-positions fix old transkription positions
	-M\|--matrix fix old transkription positions with transform matrix

	:return: exit code (int)
	"""
	function_list = []
	function_dict = create_function_dictionary(['-c', '--check-faksimile-positions'], check_faksimile_positions)
	function_dict = create_function_dictionary(['-l', '--faksimile-line-position'], fix_faksimile_line_position, function_dictionary=function_dict)
	function_dict = create_function_dictionary(['-p', '--faksimile-positions'], fix_faksimile_positions, function_dictionary=function_dict)
	function_dict = create_function_dictionary(['-m', '--merge-positions'], merge_transkription_positions, function_dictionary=function_dict)
	function_dict = create_function_dictionary(['-s', '--fix-graphical-svg'], fix_graphical_svg_file, function_dictionary=function_dict)
	function_dict = create_function_dictionary(['-M', '--matrix'], fix_tp_with_matrix, function_dictionary=function_dict)
	function_dict = create_function_dictionary(['-t', '--transkription-positions'], fix_transkription_positions, function_dictionary=function_dict)
	function_dict = create_function_dictionary(['-S', '--fix-styles'], fix_styles, function_dictionary=function_dict)
	function_dict = create_function_dictionary(['-i', '--fix-imprints'], fix_imprints, function_dictionary=function_dict)
	function_dict = create_function_dictionary(['default', '-e', '--update-extended-textfield'], extend_text_field, function_dictionary=function_dict)
	redo = False;
	try:
	opts, args = getopt.getopt(argv, "hcplrmsStMie", ["help", "check-faksimile-positions", "faksimile-positions", "faksimile-line-position",\
	"redo", "merge-positions", "fix-graphical-svg", "fix-styles", "transkription-positions", 'matrix', 'fix-imprints', 'update-extended-textfield' ])
	except getopt.GetoptError:
	usage()
	return 2
	for opt, arg in opts:
	if opt in ('-h', '--help'):
	usage()
	return 0
	elif opt in ('-r', '--redo'):
	redo = True;
	elif opt in function_dict.keys():
	function_list.append(function_dict[opt])
	if len(function_list) == 0:
	function_list.append(function_dict['default'])
	if len(args) < 1:
	usage()
	return 2
	exit_status = 0
	for xml_file in get_manuscript_files(args):
	if isfile(xml_file):
	counters = { f.__name__: 0 for f in function_list }
	for current_function in function_list:
	status_contains = STATUS_MERGED_OK if 'faksimile' in current_function.__name__ else 'OK'
	if 'extend_text_field' in current_function.__name__:
	status_contains = 'blank'
	for page in Page.get_pages_from_xml_file(xml_file, status_contains=status_contains):
	if not UNITTESTING:
	print(Fore.CYAN + f'Processing {page.title}, {page.number} with function {current_function.__name__} ...' + Style.RESET_ALL)
	back_up(page, page.xml_file)
	counters[current_function.__name__] += 1 if current_function(page, redo=redo) else 0
	if not UNITTESTING:
	for function_name, counter in counters.items():
	print(Style.RESET_ALL + f'[{counter} pages changed by {function_name}]')
	else:
	raise FileNotFoundError('File {} does not exist!'.format(xml_file))
	return exit_status

	if __name__ == "__main__":
	sys.exit(main(sys.argv[1:]))

fix_old_data.pyNo OneTemporaryActions

File Metadata

fix_old_data.pyView Options

Event Timeline

fix_old_data.py
No OneTemporary
Actions

fix_old_data.py
View Options