Page MenuHomec4science

interactive_editor.py
No OneTemporary

File Metadata

Created
Wed, May 1, 19:42

interactive_editor.py

#!/usr/bin/env python3
# -*- coding: utf-8 -*-
""" This program can be used to process words after they have been merged with faksimile data.
"""
# Copyright (C) University of Basel 2019 {{{1
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <https://www.gnu.org/licenses/> 1}}}
from colorama import Fore, Style
from datetime import datetime
from deprecated import deprecated
from functools import cmp_to_key
import getopt
import inspect
import lxml.etree as ET
import re
import shutil
import string
from svgpathtools import svg2paths2, svg_to_paths
from svgpathtools.path import Path as SVGPath
from svgpathtools.path import Line
import sys
import tempfile
from operator import attrgetter
import os
from os import listdir, sep, path, setpgrp, devnull
from os.path import exists, isfile, isdir, dirname, basename
from progress.bar import Bar
import warnings
from fix_old_data import save_page
from fix_boxes import attach_box, split_into_parts_and_attach_box
sys.path.append('svgscripts')
from convert_wordPositions import HTMLConverter, JSONConverter
from datatypes.box import Box
from datatypes.faksimile import FaksimilePage
from datatypes.manuscript import ArchivalManuscriptUnity
from datatypes.mark_foreign_hands import MarkForeignHands
from datatypes.page import Page, STATUS_MERGED_OK, STATUS_POSTMERGED_OK
from datatypes.path import Path
from datatypes.text_connection_mark import TextConnectionMark
from datatypes.transkriptionField import TranskriptionField
from datatypes.word import Word, update_transkription_position_ids
from join_faksimileAndTranskription import sort_words
from util import back_up, back_up_svg_file, copy_faksimile_svg_file
from process_files import update_svgposfile_status
from process_words_post_merging import update_faksimile_line_positions, MERGED_DIR
sys.path.append('shared_util')
from myxmlwriter import write_pretty, xml_has_type, FILE_TYPE_SVG_WORD_POSITION, FILE_TYPE_XML_MANUSCRIPT
from main_util import create_function_dictionary
__author__ = "Christian Steiner"
__maintainer__ = __author__
__copyright__ = 'University of Basel'
__email__ = "christian.steiner@unibas.ch"
__status__ = "Development"
__license__ = "GPL v3"
__version__ = "0.0.1"
UNITTESTING = False
MAX_SVG_XY_THRESHOLD = 10
class ResponseHandler:
def __init__(self, response_starts_with=None, dialog_string=None, action_name=None, description=None):
self.action_name = action_name
self.dialog_string = dialog_string
self.description = description
self.response_starts_with = response_starts_with
def create_requirement_list(self) ->list:
"""Create a requirement dictionary.
"""
return []
def create_json_dict(self)->dict:
"""Create a json dictionary.
"""
json_dict = { 'action_name': self.action_name, 'description': self.description }
requirements = self.create_requirement_list()
if len(requirements) > 0:
json_dict.update({ 'requirements': requirements })
return json_dict
def get_transkription_words(self, json_dict: dict) ->list:
"""Return words with transkription positions only.
"""
words = json_dict['words']\
if bool(json_dict.get('words'))\
else []
return [ w for w in words if bool(w.get('tp_id')) ]
def get_requirement(self, json_dict: dict, index=0) ->tuple:
"""Return requirement tuple (name, input).
"""
name = requirement = None
if dict_contains_keys(json_dict, ['response_handler','requirements'])\
and index < len(json_dict['response_handler']['requirements']):
requirement_dict = json_dict['response_handler']['requirements'][index]
if dict_contains_keys(requirement_dict, ['name'])\
and dict_contains_keys(requirement_dict, ['input']):
name = requirement_dict['name']
requirement = requirement_dict['input']
return name, requirement
def match(self, response: str) ->bool:
"""Return whether response matchs with handler.
"""
if self.response_starts_with is not None:
return response.startswith(self.response_starts_with)
return True
def print_dialog(self):
"""Print dialog.
"""
if self.dialog_string is not None:
print(f'[{self.dialog_string}]')
def handle_response(self, page: Page, json_dict: dict) -> int:
"""Handle response and return exit code.
"""
transkription_words = self.get_transkription_words(json_dict)
json_word_ids = [ jw.get('id') for jw in transkription_words ]
action_dictionary = { 'words': [ word for word in page.words if word.id in json_word_ids ] }
for index, item in enumerate(self.create_requirement_list()):
name, requirement = self.get_requirement(json_dict, index=index)
action_dictionary.update({name: requirement})
return self.run_change(page, action_dictionary)
def handle_interactive_response(self, page: Page, response: str, shell) -> int:
"""Handle response and return exit code.
"""
return self.run_change(page, {})
def run_change(self, page: Page, action_dictionary: dict) -> int:
"""Run changes on page and return exit code.
"""
exit_code = 0
return exit_code
class JoinWords(ResponseHandler):
def handle_interactive_response(self, page: Page, response: str, shell) -> int:
"""Handle response interactively and return exit code.
"""
action_dictionary = { 'words' : shell._get_words_from_response(re.compile('^\D+\s').sub('', response), page.words),\
'add_white_space_between_words': re.match(r'^\D+\s', response) }
if self.run_change(page, action_dictionary) == 0:
return shell.run_interactive_editor(page)
return 2
def run_change(self, page: Page, action_dictionary: dict) -> int:
"""Run changes on page and return exit code.
"""
exit_code = 0
add_white_space_between_words = action_dictionary['add_white_space_between_words']\
if bool(action_dictionary.get('add_white_space_between_words'))\
else False
words = action_dictionary['words']\
if bool(action_dictionary.get('words'))\
else []
if len(words) > 0:
if len(set([ word.line_number for word in words ])) == 1\
and len(set([ word.deleted for word in words ])) == 1:
new_word = words[0]
for word2join in words[1:]:
page.words.remove(word2join)
new_word.join(word2join, add_white_space_between_words=add_white_space_between_words)
else:
new_word = Word.join_words(words, add_white_space_between_words=add_white_space_between_words)
index = len(page.words)
if words[0] in page.words:
index = page.words.index(words[0])
elif len([ word for word in page.words if words[0] in word.word_parts ]) > 0:
index = page.words.index([ word for word in page.words if words[0] in word.word_parts ][0])
for word2join in words:
if word2join in page.words:
page.words.remove(word2join)
elif len([ word for word in page.words if word2join in word.word_parts ]) > 0:
page.words.remove([ word for word in page.words if word2join in word.word_parts ][0])
page.words.insert(index, new_word)
if not UNITTESTING:
print(f'writing to {page.page_tree.docinfo.URL}')
save_page(page, backup=True, attach_first=True, script_name=f'{__file__}:{inspect.currentframe().f_back.f_code.co_name}')
page = Page(page.page_tree.docinfo.URL)
else:
exit_code = 2
return exit_code
class SimpleJoinWords(JoinWords):
def match(self, response: str) ->bool:
"""Return whether response matchs with handler.
"""
return re.match(r'\d+', response)
class SaveChanges(ResponseHandler):
RELEVANT_PROPERTIES = [ ('deleted','deleted'), ('line_number','line') ] # 0 = word, 1 = word_dict
def handle_interactive_response(self, page: Page, response: str, shell) -> int:
"""Handle response and return exit code.
"""
self.run_change(page, {})
return shell.run_interactive_editor(page)
def _update_transkription_word(self, word, word_dict) ->int:
"""Update properites of word according to word_dict,
return exit_code
"""
exit_code = 0
for relevant_property in self.RELEVANT_PROPERTIES:
if len(word.word_parts) > 0:
if len(word_dict['tp_id'].split(':')) == 3:
wp_index = int(word_dict['tp_id'].split(':')[1].replace('w',''))
word.word_parts[wp_index].__dict__[relevant_property[0]] = word_dict[relevant_property[1]]
else:
return 2
else:
word.__dict__[relevant_property[0]] = word_dict[relevant_property[1]]
return exit_code
def _update_faksimile_word(self, word, word_dict, words) ->int:
"""Update properites of word according to word_dict,
return exit_code
"""
exit_code = 0
if word_dict.get('old_id') is not None:
fp_id = word_dict['fp_id']
old_id = int(word_dict['old_id'])
if len([w for w in words if w.id == old_id ]) > 0:
old_word = [w for w in words if w.id == old_id ][0]
faksimile_position = None
if len([ fp for fp in old_word.faksimile_positions if fp.id == fp_id ]) > 0:
faksimile_position = [ fp for fp in old_word.faksimile_positions if fp.id == fp_id ][0]
old_word.faksimile_positions.remove(faksimile_position)
elif len([ fp for w in old_word.word_parts for fp in w.faksimile_positions if fp.id == fp_id ]) > 0:
for w in old_word.word_parts:
for fp in w.faksimile_positions:
if fp.id == fp_id:
faksimile_position = fp
w.faksimile_positions.remove(faksimile_position)
break
if faksimile_position is not None:
word.faksimile_positions.append(faksimile_position)
else:
return 2
else:
return 3
return exit_code
def _update_word(self, word, word_dict, words) ->int:
"""Update properites of word according to word_dict,
return exit_code
"""
exit_code = 0
if bool(word_dict.get('tp_id')):
exit_code = self._update_transkription_word(word, word_dict)
if exit_code > 0:
return exit_code
elif bool(word_dict.get('fp_id')):
exit_code = self._update_faksimile_word(word, word_dict, words)
if exit_code > 0:
print(exit_code)
return exit_code
else:
return 2
return exit_code
def handle_response(self, page: Page, json_dict: dict) -> int:
"""Handle response and return exit code.
"""
json_word_ids = [ int(jw.get('id')) for jw in json_dict['words'] ]
print('updating word', json_dict, json_word_ids, page.words[0].id)
for word in page.words:
if word.id in json_word_ids:
print('updating word', word.id, word.text)
word_dict = [ jw for jw in json_dict['words'] if int(jw.get('id')) == word.id ][0]
if self._update_word(word, word_dict, page.words) > 0:
return 2
return self.run_change(page, {})
def run_change(self, page: Page, action_dictionary: dict) -> int:
"""Run changes on page and return exit code.
"""
exit_code = 0
if not UNITTESTING:
print(f'writing to {page.page_tree.docinfo.URL}')
save_page(page, backup=True, attach_first=True, script_name=f'{__file__}:{inspect.currentframe().f_back.f_code.co_name}')
page = Page(page.page_tree.docinfo.URL)
return exit_code
class SavePositions(SaveChanges):
def _update_word(self, word, word_dict_list) ->int:
"""Update properites of word according to word_dict,
return exit_code
"""
exit_code = 0
for word_dict in word_dict_list:
if bool(word_dict.get('tp_id')):
exit_code = self._update_transkription_position(word, word_dict)
if exit_code > 0:
return exit_code
elif bool(word_dict.get('fp_id')):
exit_code = self._update_faksimile_position(word, word_dict)
if exit_code > 0:
return exit_code
return exit_code
def _update_transkription_position(self, word, word_dict) ->int:
"""Update transkription position properites of word according to word_dict,
return exit_code
"""
tp_id_list = word_dict['tp_id'].split(':')
if len(tp_id_list) == 3 and len(word.word_parts) > 0:
wp_index = int(tp_id_list[1].replace('w',''))
tp_index = int(tp_id_list[2].replace('tp',''))
if wp_index < len(word.word_parts) and tp_index < len(word.word_parts[wp_index].transkription_positions):
word.word_parts[wp_index].transkription_positions[tp_index].left = float(word_dict['left'])
word.word_parts[wp_index].transkription_positions[tp_index].top = float(word_dict['top'])
word.word_parts[wp_index].transkription_positions[tp_index].bottom = word.word_parts[wp_index].transkription_positions[tp_index].top\
+ word.word_parts[wp_index].transkription_positions[tp_index].height
else:
return 2
elif len(tp_id_list) == 2:
tp_index = int(tp_id_list[1].replace('tp',''))
if tp_index < len(word.transkription_positions):
word.transkription_positions[tp_index].left = float(word_dict['left'])
word.transkription_positions[tp_index].top = float(word_dict['top'])
word.transkription_positions[tp_index].bottom = word.transkription_positions[tp_index].top\
+ word.transkription_positions[tp_index].height
else:
return 2
else:
return 2
return 0
def _update_faksimile_position(self, word, word_dict) ->int:
"""Update faksimile position properites of word according to word_dict,
return exit_code
"""
exit_code = 0
fp_id = word_dict['fp_id']
faksimile_position = None
if len([ fp for fp in word.faksimile_positions if fp.id == fp_id ]) > 0:
faksimile_position = [ fp for fp in word.faksimile_positions if fp.id == fp_id ][0]
if len([ fp for w in word.word_parts for fp in w.faksimile_positions if fp.id == fp_id ]) > 0:
faksimile_position = [ fp for w in word.word_parts for fp in w.faksimile_positions if fp.id == fp_id ][0]
if faksimile_position is not None:
faksimile_position.left = float(word_dict['left'])
faksimile_position.top = float(word_dict['top'])
faksimile_position.bottom = faksimile_position.top + faksimile_position.height
else:
return 2
return exit_code
def handle_response(self, page: Page, json_dict: dict) -> int:
"""Handle response and return exit code.
"""
json_word_ids = [ jw.get('id') for jw in json_dict['words'] ]
for word in page.words:
if word.id in json_word_ids:
word_dict_list = [ jw for jw in json_dict['words'] if jw.get('id') == word.id ]
if self._update_word(word, word_dict_list) > 0:
return 2
return self.run_change(page, {})
class Reload(ResponseHandler):
def handle_interactive_response(self, page: Page, response: str, shell) -> int:
"""Handle response and return exit code.
"""
return shell.run_interactive_editor(Page(page.page_tree.docinfo.URL))
class RestoreBackup(ResponseHandler):
def handle_interactive_response(self, page: Page, response: str, shell) -> int:
"""Handle response and return exit code.
"""
if page.bak_file is not None:
return shell.run_interactive_editor(Page(page.bak_file))
else:
print('Could not restore backup file, please restore manually!')
return 2
class ChangeLine2Value(ResponseHandler):
def handle_interactive_response(self, page: Page, response: str, shell) -> int:
"""Handle response and return exit code.
"""
words = []
line_number = -1
if re.match(r'l:\d+\s\d+', response):
line_number = int(response.replace('l:', '').split(' ')[0])
words = shell._get_words_from_response(re.compile('l:\d+\s').sub('', response), page.words)
else:
if not re.match(r'l:\d+$', response):
new_response_line = input('Specify new line number>')
if re.match(r'^\d+$', new_response_line):
line_number = int(new_response_line)
else:
line_number = int(response.replace('l:', ''))
new_response = input(f'Specify ids of words for which line number should be changed to {line_number}>')
if re.match(r'\d+', new_response):
words = shell_get_words_from_response(new_response, page.words)
action_dictionary = { 'words': words, 'line_number' : line_number }
if self.run_change(page, action_dictionary) == 0:
return shell.run_interactive_editor(page)
return 2
def run_change(self, page: Page, action_dictionary: dict) -> int:
"""Run changes on page and return exit code.
"""
exit_code = 0
line_number = action_dictionary['line_number']\
if bool(action_dictionary.get('line_number'))\
else -1
words = action_dictionary['words']\
if bool(action_dictionary.get('words'))\
else []
if line_number != -1:
for word in words: word.line_number = line_number
if not UNITTESTING:
print(f'writing to {page.page_tree.docinfo.URL}')
save_page(page, backup=True, attach_first=True, script_name=f'{__file__}:{inspect.currentframe().f_back.f_code.co_name}')
page = Page(page.page_tree.docinfo.URL)
else:
exit_code = 2
return exit_code
class CreateCorrectionHistory(ResponseHandler):
def handle_interactive_response(self, page: Page, response: str, shell) -> int:
"""Handle response and return exit code.
"""
if re.match(r'c\w*\s\d+', response):
words = shell._get_words_from_response(re.compile('c\w*\s').sub('', response), page.words)
else:
new_response = input(f'Specify ids of words to create a correction history. >')
if re.match(r'\d+', new_response):
words = shell._get_words_from_response(new_response, page.words)
action_dictionary = { 'words': words }
if self.run_change(page, action_dictionary) == 0:
return shell.run_interactive_editor(page)
return 2
def run_change(self, page: Page, action_dictionary: dict) -> int:
"""Run changes on page and return exit code.
"""
exit_code = 0
words = action_dictionary['words']\
if bool(action_dictionary.get('words'))\
else []
if len(words) > 0:
for word in words: word.create_correction_history()
if not UNITTESTING:
print(f'writing to {page.page_tree.docinfo.URL}')
save_page(page, backup=True, attach_first=True, script_name=f'{__file__}:{inspect.currentframe().f_back.f_code.co_name}')
page = Page(page.page_tree.docinfo.URL)
else:
exit_code = 2
return exit_code
class DeleteCorrectionHistory(ResponseHandler):
def handle_interactive_response(self, page: Page, response: str, shell) -> int:
"""Handle response interactively and return exit code.
"""
if re.match(r'D\w*\s\d+', response):
words = shell._get_words_from_response(re.compile('D\w*\s').sub('', response), page.words)
else:
new_response = input(f'Specify ids of words to delete their correction history. >')
if re.match(r'\d+', new_response):
words = shell._get_words_from_response(new_response, page.words)
action_dictionary = { 'words' : words }
if self.run_change(page, action_dictionary) == 0:
return shell.run_interactive_editor(page)
return 2
def run_change(self, page: Page, action_dictionary: dict) -> int:
"""Run changes on page and return exit code.
"""
exit_code = 0
words = action_dictionary['words']\
if bool(action_dictionary.get('words'))\
else []
if len(words) > 0:
for word in words:
print(word.text)
word.earlier_version = None
word.corrections = []
if not UNITTESTING:
print(f'writing to {page.page_tree.docinfo.URL}')
save_page(page, backup=True, attach_first=True, script_name=f'{__file__}:{inspect.currentframe().f_back.f_code.co_name}')
page = Page(page.page_tree.docinfo.URL)
else:
exit_code = 2
return exit_code
class ChangeDeletionStatus(ResponseHandler):
def handle_interactive_response(self, page: Page, response: str, shell) -> int:
"""Handle response and return exit code.
"""
if re.match(r'[du]\w*\s\d+', response):
words = shell._get_words_from_response(re.compile('[du]\w*\s').sub('', response), page.words)
else:
deletion_target = 'delete' if response.startswith('d') else 'undelete'
new_response = input(f'Specify ids of words to {deletion_target}. >')
if re.match(r'\d+', new_response):
words = shell._get_words_from_response(new_response, page.words)
action_dictionary = { 'words': words, 'deleted': response.startswith('d') }
if self.run_change(page, action_dictionary) == 0:
return shell.run_interactive_editor(page)
return 2
def run_change(self, page: Page, action_dictionary: dict) -> int:
"""Run changes on page and return exit code.
"""
exit_code = 0
words = action_dictionary['words']\
if bool(action_dictionary.get('words'))\
else []
word_should_be_deleted = bool(action_dictionary.get('deleted'))
if len(words) > 0:
for word in words: word.deleted = word_should_be_deleted
if not UNITTESTING:
print(f'writing to {page.page_tree.docinfo.URL}')
save_page(page, backup=True, attach_first=True, script_name=f'{__file__}:{inspect.currentframe().f_back.f_code.co_name}')
page = Page(page.page_tree.docinfo.URL)
else:
exit_code = 2
return exit_code
class SplitWords(ResponseHandler):
def _split_word(self, page, word, split_text):
"""Split word.
"""
index = page.words.index(word)
_, left, right = word.split(split_text)
page.words[index] = left
page.words.insert(index+1, right)
def create_requirement_list(self) ->list:
"""Create a requirement dictionary.
"""
return [{ 'name': 'split_text', 'type': 'string', 'input': None }]
def handle_interactive_response(self, page: Page, response: str, shell) -> int:
"""Handle response and return exit code.
"""
if re.match(r's\s\w+\s\d+', response):
words = shell._get_words_from_response(re.compile('s\s\w+\s').sub('', response), page.words)
split_text = response.split(' ')[1]
else:
split_text = input('Input split text>')
new_response = input(f'Specify ids of words to split. >')
if re.match(r'\d+', new_response):
words = shell._get_words_from_response(new_response, page.words)
action_dictionary = { 'words': words, 'split_text': split_text }
if self.run_change(page, action_dictionary) == 0:
return shell.run_interactive_editor(page)
return 2
def run_change(self, page: Page, action_dictionary: dict) -> int:
"""Run changes on page and return exit code.
"""
exit_code = 0
words = action_dictionary['words']\
if bool(action_dictionary.get('words'))\
else []
split_text = action_dictionary['split_text']\
if bool(action_dictionary.get('split_text'))\
else ''
if len(words) > 0 and split_text != '':
for word in words: self._split_word(page, word, split_text)
if not UNITTESTING:
print(f'writing to {page.page_tree.docinfo.URL}')
save_page(page, backup=True, attach_first=True, script_name=f'{__file__}:{inspect.currentframe().f_back.f_code.co_name}')
page = Page(page.page_tree.docinfo.URL)
else:
exit_code = 2
return exit_code
class AddBox(ResponseHandler):
def create_requirement_list(self) ->list:
"""Create a requirement dictionary.
"""
return [{ 'name': 'box_text', 'type': 'string', 'input': None },\
{ 'name': 'overwritten_by', 'type': 'string', 'input': None },\
{ 'name': 'is_earlier_version', 'type': 'boolean', 'input': False }]
def run_change(self, page: Page, action_dictionary: dict) -> int:
"""Run changes on page and return exit code.
"""
exit_code = 0
words = action_dictionary['words']\
if bool(action_dictionary.get('words'))\
else []
missing_text = action_dictionary.get('box_text')
is_earlier_version = action_dictionary.get('is_earlier_version')
overwritten_by = action_dictionary.get('overwritten_by')
if len(words) > 0 and missing_text is not None:
for word in words:
if overwritten_by is not None:
split_into_parts_and_attach_box(word, 0, missing_text, is_earlier_version, overwritten_by)
else:
attach_box(word, 0, missing_text, False)
word.create_correction_history()
if len(word.corrections) > 0:
for wp in word.word_parts:
wp.overwrites_word = None
if not UNITTESTING:
print(f'writing to {page.page_tree.docinfo.URL}')
save_page(page, backup=True, attach_first=True, script_name=f'{__file__}:{inspect.currentframe().f_back.f_code.co_name}')
page = Page(page.page_tree.docinfo.URL)
else:
exit_code = 2
return exit_code
class ResponseOrganizer:
RESULT = 'result'
def __init__(self):
self.response_handler_dictionary = {}
self._add_response_handler(JoinWords(action_name='join words', description='join words'))
self._add_response_handler(SplitWords(action_name='split words', description='split word according to split text'))
self._add_response_handler(CreateCorrectionHistory(action_name='create correction history', description='creates a correction history for selected words'))
self._add_response_handler(DeleteCorrectionHistory(action_name='delete correction history', description='deletes the correction history of selected words'))
self._add_response_handler(AddBox(action_name='add box', description='add box with overwritten text'))
self._add_response_handler(SaveChanges(action_name='save changes', description='save change to line number/deletion status for word(s)' ))
self._add_response_handler(SavePositions(action_name='save positions', description='save new transkription position(s)' ))
def _add_response_handler(self, response_handler: ResponseHandler):
"""Add response_handler to response_handler_dictionary.
"""
self.response_handler_dictionary.update({response_handler.action_name: response_handler})
def create_json_dict(self, xml_file: str, last_operation_result=None) ->dict:
"""Return a json dict of page with information about action.
"""
page = Page(xml_file)
replace_ligatures(page)
converter = JSONConverter(page)
json_dict = converter.create_json_dict()
action_dict = { 'target_file': xml_file,\
'date_stamp': os.path.getmtime(xml_file) }
if last_operation_result is not None:
action_dict.update({self.RESULT: last_operation_result })
response_handlers = []
for response_handler in self.response_handler_dictionary.values():
response_handlers.append(response_handler.create_json_dict())
action_dict.update({ 'response_handlers': response_handlers })
json_dict.update({ 'actions': action_dict})
return json_dict
def handle_response(self, json_dict: dict) ->dict:
"""Handle response in json_dict and return new data json_dict.
"""
if bool(json_dict.get('target_file')):
target_file = json_dict['target_file']
if bool(json_dict.get('date_stamp')):
current_stamp = os.path.getmtime(target_file)
if current_stamp <= json_dict['date_stamp']:
exit_code = 2
operation = 'unknown'
if bool(json_dict.get('response_handler'))\
and bool(self.response_handler_dictionary.get(json_dict['response_handler']['action_name'])):
operation = json_dict['response_handler']['action_name']
response_handler = self.response_handler_dictionary[operation]
exit_code = response_handler.handle_response(Page(target_file), json_dict)
message = f'Operation "{operation}" succeeded!' if exit_code == 0 else f'Operation "{operation}" failed'
return self.create_json_dict(target_file, last_operation_result=message)
else:
return self.create_json_dict(target_file,\
last_operation_result=f'FAIL: file {target_file} was changed between operations!')
else:
return self.create_json_dict(target_file,\
last_operation_result='ERROR: there was no key "date_stamp" in json')
else:
return { 'actions': { self.RESULT: 'ERROR: there was no key "target_file" in json!' }}
class InteractiveShell:
def __init__(self):
self.response_handlers = []
self.response_handlers.append(SimpleJoinWords(dialog_string='specify ids of words to join [default]'))
self.response_handlers.append(RestoreBackup(response_starts_with='b', dialog_string='b=restore backup'))
self.response_handlers.append(CreateCorrectionHistory(response_starts_with='c', dialog_string='c=create correction history [+ ids]'))
self.response_handlers.append(DeleteCorrectionHistory(response_starts_with='D', dialog_string='D=delete correction history [+ ids]'))
self.response_handlers.append(ChangeDeletionStatus(response_starts_with='d', dialog_string='d=mark deleted [+ ids]'))
self.response_handlers.append(SaveChanges(response_starts_with='i', dialog_string='i=fix ids' ))
self.response_handlers.append(ChangeLine2Value(response_starts_with='l', dialog_string='l[:value]=change line to value for ids' ))
self.response_handlers.append(Reload(response_starts_with='r', dialog_string='r=reload xml file'))
self.response_handlers.append(SplitWords(response_starts_with='s', dialog_string='s=split and join word ("s splittext id")'))
self.response_handlers.append(ChangeDeletionStatus(response_starts_with='u', dialog_string='u=undelete [+ ids]'))
self.response_handlers.append(JoinWords(response_starts_with='w', dialog_string='w=join words with whitespace between them [+ ids]'))
self.response_handlers.append(ResponseHandler())
def _get_words_from_response(self, response, words) ->list:
"""Return a list of word that correspond to indices
"""
if re.match(r'\d+-\d+', response)\
or re.match(r'\d+\+', response):
index_boundaries = []
if response[-1] == '+':
index_boundaries.append(int(response[:response.index('+')]))
index_boundaries.append(index_boundaries[0]+1)
else:
index_boundaries = [ int(i) for i in response.split('-') ]
index_boundaries_length_diff = len(response.split('-')[0]) - len(response.split('-')[1])
if index_boundaries_length_diff > 0:
index_boundaries[1] = int(response.split('-')[0][0-index_boundaries_length_diff-1] + response.split('-')[1])
indices = [ i for i in range(index_boundaries[0], index_boundaries[1]+1) ]
if index_boundaries[0] > index_boundaries[1]:
indices = [ index_boundaries[0] ]
while indices[-1] > index_boundaries[1]:
indices.append(indices[-1]-1)
else:
indices = [ int(i) for i in response.split(' ') ]
result_words = []
for index in indices:
if len([ word for word in words if word.id == index ]) > 0:
result_words += [ word for word in words if word.id == index ]
return result_words
def run_interactive_editor(self, page) -> int:
"""Run interactive shell.
"""
replace_ligatures(page)
HTMLConverter(page).convert()
for response_handler in self.response_handlers: response_handler.print_dialog()
response = input('>')
for response_handler in self.response_handlers:
if response_handler.match(response):
return response_handler.handle_interactive_response(page, response, self)
def replace_ligatures(page):
"""Replace ligatures
"""
if len([ word for word in page.words if re.match(r'.*[flfi]', word.text) ]) > 0:
for word in [ word for word in page.words if re.match(r'.*[fi]', word.text) ]:
word.text = word.text.replace('fi', 'fi')
for word in [ word for word in page.words if re.match(r'.*[fl]', word.text) ]:
word.text = word.text.replace('fl', 'fl')
def dict_contains_keys(a_dict, key_list)->bool:
"""Return whether dict a_dict contains key path given by key_list.
"""
if len(key_list) == 0:
return True
else:
if key_list[0] in a_dict.keys():
return dict_contains_keys(a_dict[key_list[0]], key_list[1:])
return False
def usage():
"""prints information on how to use the script
"""
print(main.__doc__)
def main(argv):
"""This program can be used to fix faksimile position ->set them to their absolute value.
fixes/interactive_editor.py [OPTIONS] <xmlManuscriptFile|svg_pos_file>
<xmlManuscriptFile> a xml file about a manuscript, containing information about its pages.
<svg_pos_file> a xml file about a page, containing information about svg word positions.
OPTIONS:
-h|--help show help
:return: exit code (int)
"""
try:
opts, args = getopt.getopt(argv, "h", ["help"])
except getopt.GetoptError:
usage()
return 2
for opt, arg in opts:
if opt in ('-h', '--help'):
usage()
return 0
if len(args) < 1:
usage()
return 2
exit_status = 0
xml_file = args[0]
if isfile(xml_file):
counter = 0
shell = InteractiveShell()
for page in Page.get_pages_from_xml_file(xml_file, status_contains=STATUS_MERGED_OK):
if not UNITTESTING:
print(Fore.CYAN + f'Processing {page.title}, {page.number} with interactive editor ...' + Style.RESET_ALL)
back_up(page, page.xml_file)
counter += 1 if shell.run_interactive_editor(page) == 0 else 0
if not UNITTESTING:
print(Style.RESET_ALL + f'[{counter} pages changed by interactive shell]')
else:
raise FileNotFoundError('File {} does not exist!'.format(xml_file))
return exit_status
if __name__ == "__main__":
sys.exit(main(sys.argv[1:]))

Event Timeline