page.py
No OneTemporary
Actions

Subscribers

None

File Metadata

Created: Fri, Jul 4, 21:53

page.py
View Options

	#!/usr/bin/env python3
	# -- coding: utf-8 --

	""" This class can be used to represent a page.
	"""
	# Copyright (C) University of Basel 2019 {{{1
	#
	# This program is free software: you can redistribute it and/or modify
	# it under the terms of the GNU General Public License as published by
	# the Free Software Foundation, either version 3 of the License, or
	# (at your option) any later version.
	#
	# This program is distributed in the hope that it will be useful,
	# but WITHOUT ANY WARRANTY; without even the implied warranty of
	# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
	# GNU General Public License for more details.
	#
	# You should have received a copy of the GNU General Public License
	# along with this program. If not, see <https://www.gnu.org/licenses/> 1}}}

	__author__ = "Christian Steiner"
	__maintainer__ = __author__
	__copyright__ = 'University of Basel'
	__email__ = "christian.steiner@unibas.ch"
	__status__ = "Development"
	__license__ = "GPL v3"
	__version__ = "0.0.1"

	from lxml import etree as ET
	from os.path import isfile
	from progress.bar import Bar
	from svgpathtools import svg2paths2, svg_to_paths
	from svgpathtools.parser import parse_path

	from .class_spec import SemanticClass
	from .image import Image, SVGImage
	from .faksimile_image import FaksimileImage
	from .lineNumber import LineNumber
	from .mark_foreign_hands import MarkForeignHands
	from .matrix import Matrix
	from .path import Path
	from .positional_word_part import PositionalWordPart
	from .text_connection_mark import TextConnectionMark
	from .transkriptionField import TranskriptionField
	from .writing_process import WritingProcess
	from .word import Word
	from .word_insertion_mark import WordInsertionMark

	class Page(SemanticClass):
	"""
	This class represents a page.

	Args:
	xml_source_file (str): name of the xml file to be instantiated.
	xml_target_file (str): name of the xml file to which page info will be written.

	"""
	UNITTESTING = False
	WARNING_MISSING_USE_NODE4PWP = PositionalWordPart.WARN_NO_USE_NODE_FOUND
	WARNING_MISSING_GLYPH_ID4WIM = WordInsertionMark.WARN_NO_GLYPH_ID
	PAGE_RECTO = 'recto'
	PAGE_VERSO = 'verso'

	def __init__(self, xml_source_file=None, xml_target_file=None, title=None, page_number=None, faksimile_image=None, faksimile_svgFile=None, pdfFile=None, svg_file=None, orientation='North', page_type=PAGE_VERSO, extract_transkription_field_only=False):
	self.title = title
	self.mark_foreign_hands = []
	self.text_connection_marks = []
	self.line_numbers = []
	self.style_dict = {}
	self.sonderzeichen_list = []
	self.svg_file = None
	self.svg_image = None
	self.pdfFile = None
	self.faksimile_svgFile = None
	self.source = None
	self.number = page_number if page_number is not None else -1
	self.orientation = orientation
	self.page_type = page_type
	self.word_deletion_paths = []
	self.faksimile_image = faksimile_image
	if xml_source_file is not None:
	if isfile(xml_source_file):
	parser = ET.XMLParser(remove_blank_text=True)
	self.page_tree = ET.parse(xml_source_file, parser)
	self.title = self.page_tree.getroot().get('title')
	self.number = self.page_tree.getroot().get('number')
	self.source = self.page_tree.getroot().get('source')
	self.orientation = self.page_tree.getroot().get('orientation')
	self.page_type = self.page_tree.getroot().get('pageType')
	self.init_words()
	self.add_style(style_node=self.page_tree.getroot().find('.//style'))
	self.pdfFile = self.page_tree.xpath('.//pdf/@file')[0]\
	if len(self.page_tree.xpath('.//pdf/@file')) > 0 else None
	self.faksimile_svgFile = self.page_tree.xpath('.//faksimile-svg/@file')[0]\
	if len(self.page_tree.xpath('.//faksimile-svg/@file')) > 0 else None
	self.svg_image = SVGImage(node=self.page_tree.xpath('.//' + SVGImage.XML_TAG)[0])\
	if len(self.page_tree.xpath('.//' + SVGImage.XML_TAG)) > 0 else None
	self.faksimile_image = FaksimileImage(node=self.page_tree.xpath('.//' + FaksimileImage.XML_TAG)[0])\
	if len(self.page_tree.xpath('.//' + FaksimileImage.XML_TAG)) > 0 else None
	self.svg_file = self.page_tree.xpath('.//svg/@file')[0]\
	if len(self.page_tree.xpath('.//svg/@file')) > 0 else None
	self.width = float(self.page_tree.xpath('.//svg/@width')[0])\
	if len(self.page_tree.xpath('.//svg/@width')) > 0 else 0.0
	self.height = float(self.page_tree.xpath('.//svg/@height')[0])\
	if len(self.page_tree.xpath('.//svg/@height')) > 0 else 0.0
	if pdfFile is not None and self.pdfFile is None:
	self.pdfFile = pdfFile
	ET.SubElement(self.page_tree.getroot(), 'pdf', attrib={'file': self.pdfFile})
	if faksimile_svgFile is not None and self.faksimile_svgFile is None:
	self.faksimile_svgFile = faksimile_svgFile
	ET.SubElement(self.page_tree.getroot(), 'faksimile-svg', attrib={'file': self.faksimile_svgFile})
	if faksimile_image is not None:
	self.faksimile_image = faksimile_image
	self.faksimile_image.attach_object_to_tree(self.page_tree)
	if svg_file is not None and self.svg_file is None:
	self.svg_file = svg_file
	tf = TranskriptionField(svg_file)
	self.width = round(tf.documentWidth, 3)
	self.height = round(tf.documentHeight, 3)
	self.svg_image = SVGImage(file_name=self.svg_file, width=self.width, height=self.height)
	self.svg_image.attach_object_to_tree(self.page_tree)
	if self.svg_image is not None and self.svg_file is None:
	self.svg_file = self.svg_image.file_name
	if self.svg_image is not None and self.width == 0.0:
	self.width = self.svg_image.width
	if self.svg_image is not None and self.height == 0.0:
	self.height = self.svg_image.height
	else:
	raise Exception('File "{}" does not exist!'.format(xml_source_file))
	elif xml_target_file is not None:
	self.word_insertion_marks = []
	self.words = []
	self.writing_processes = []
	self.svg_file = svg_file
	self.pdfFile = pdfFile
	self.faksimile_svgFile = faksimile_svgFile
	if isfile(xml_target_file):
	parser = ET.XMLParser(remove_blank_text=True)
	self.page_tree = ET.parse(xml_target_file, parser)
	self.source = self.page_tree.getroot().get('source')
	if bool(self.page_tree.getroot().get('orientation')):
	self.orientation = self.page_tree.getroot().get('orientation')
	elif orientation is not None:
	self.page_tree.getroot().set('orientation', orientation)
	if bool(self.page_tree.getroot().get('title')):
	self.title = self.page_tree.getroot().get('title')
	elif title is not None:
	self.page_tree.getroot().set('title', title)
	if self.svg_file is None:
	self.svg_file = self.page_tree.xpath('.//svg/@file')[0]\
	if len(self.page_tree.xpath('.//svg/@file')) > 0 else None
	self.width = float(self.page_tree.xpath('.//svg/@width')[0])\
	if len(self.page_tree.xpath('.//svg/@width')) > 0 else 0.0
	self.height = float(self.page_tree.xpath('.//svg/@height')[0])\
	if len(self.page_tree.xpath('.//svg/@height')) > 0 else 0.0
	elif len(self.page_tree.xpath('.//svg/@file')) == 0:
	tf = TranskriptionField(svg_file)
	self.width = round(tf.documentWidth, 3)
	self.height = round(tf.documentHeight, 3)
	self.svg_image = SVGImage(file_name=self.svg_file, width=self.width, height=self.height)
	self.svg_image.attach_object_to_tree(self.page_tree)
	#ET.SubElement(self.page_tree.getroot(), 'svg', attrib={'width': str(self.width), 'height': str(self.height), 'file': self.svg_file})
	else:
	self.width = float(self.page_tree.xpath('.//svg/@width')[0])\
	if len(self.page_tree.xpath('.//svg/@width')) > 0 else 0.0
	self.height = float(self.page_tree.xpath('.//svg/@height')[0])\
	if len(self.page_tree.xpath('.//svg/@height')) > 0 else 0.0
	if self.pdfFile is None:
	self.pdfFile = self.page_tree.xpath('.//pdf/@file')[0]\
	if len(self.page_tree.xpath('.//pdf/@file')) > 0 else None
	elif len(self.page_tree.xpath('.//pdf/@file')) == 0:
	ET.SubElement(self.page_tree.getroot(), 'pdf', attrib={'file': self.pdfFile})
	for xpath2remove in [ 'word', 'style', 'freehand', LineNumber.XML_TAG, WordInsertionMark.XML_TAG,\
	WritingProcess.XML_TAG, Path.WORD_DELETION_PATH_TAG ]:
	for node in self.page_tree.xpath('//' + xpath2remove):
	node.getparent().remove(node)
	else:
	self.page_tree = ET.ElementTree(ET.Element('page'))
	self.pdfFile = pdfFile
	self.svg_file = svg_file
	if title is not None:
	self.page_tree.getroot().set('title', title)
	if orientation is not None:
	self.page_tree.getroot().set('orientation', orientation)
	self.page_tree.getroot().set('transkription-field-only', str(extract_transkription_field_only).lower())
	if page_number is not None:
	self.page_tree.getroot().set('number', str(page_number))
	if self.pdfFile is not None:
	ET.SubElement(self.page_tree.getroot(), 'pdf', attrib={'file': self.pdfFile})
	if self.svg_file is not None:
	tf = TranskriptionField(self.svg_file)
	self.width = round(tf.documentWidth, 3)
	self.height = round(tf.documentHeight, 3)
	self.svg_image = SVGImage(file_name=self.svg_file, width=self.width, height=self.height)
	self.svg_image.attach_object_to_tree(self.page_tree)
	#ET.SubElement(self.page_tree.getroot(), 'svg', attrib={'width': str(self.width), 'height': str(self.height), 'file': self.svg_file})
	if self.svg_image is None and self.svg_file is not None:
	self.svg_image = SVGImage(file_name=self.svg_file, width=self.width, height=self.height)
	self.svg_image.attach_object_to_tree(self.page_tree)

	def add_style(self, sonderzeichen_list=[], letterspacing_list=[], style_dict={}, style_node=None):
	"""Adds a list of classes that are sonderzeichen and a style dictionary to page.
	"""
	self.sonderzeichen_list = sonderzeichen_list
	self.letterspacing_list = letterspacing_list
	self.style_dict = style_dict
	if style_node is not None:
	self.style_dict = { item.get('name'): { key: value for key, value in item.attrib.items() if key != 'name' } for item in style_node.findall('.//class') }
	self.sonderzeichen_list = [ item.get('name') for item in style_node.findall('.//class')\
	if bool(item.get('font-family')) and 'Sonderzeichen' in item.get('font-family') ]
	self.letterspacing_list = [ item.get('name') for item in style_node.findall('.//class')\
	if bool(item.get('letterspacing-list')) ]
	elif bool(self.style_dict):
	style_node = ET.SubElement(self.page_tree.getroot(), 'style')
	if len(self.sonderzeichen_list) > 0:
	style_node.set('Sonderzeichen', ' '.join(self.sonderzeichen_list))
	if len(self.letterspacing_list) > 0:
	style_node.set('letterspacing-list', ' '.join(self.letterspacing_list))
	for key in self.style_dict.keys():
	self.style_dict[key]['name'] = key
	ET.SubElement(style_node, 'class', attrib=self.style_dict[key])
	fontsize_dict = { key: float(value.get('font-size').replace('px','')) for key, value in self.style_dict.items() if 'font-size' in value }
	fontsizes = sorted(fontsize_dict.values(), reverse=True)
	# create a mapping between fontsizes and word stages
	self.fontsizekey2stage_mapping = {}
	for fontsize_key, value in fontsize_dict.items():
	if value >= fontsizes[0]-1:
	self.fontsizekey2stage_mapping.update({ fontsize_key: WritingProcess.FIRST_VERSION })
	elif value <= fontsizes[len(fontsizes)-1]+1:
	self.fontsizekey2stage_mapping.update({ fontsize_key: WritingProcess.LATER_INSERTION_AND_ADDITION })
	else:
	self.fontsizekey2stage_mapping.update({ fontsize_key: WritingProcess.INSERTION_AND_ADDITION })

	def add_source(self, source):
	"""Adds a source to page and attaches it to page_tree.
	"""
	self.source = source
	self.page_tree.getroot().set('source', self.source)

	def categorize_paths(self, transkription_field=None):
	"""Categorize all paths that are part of the transkription field.
	"""
	if self.source is not None and isfile(self.source):
	MAX_HEIGHT_LINES = 1
	max_line = sorted(\
	[line_number.bottom-line_number.top for line_number in self.line_numbers if line_number.id % 2 == 0],\
	reverse=True)[0] + 2 if len(self.line_numbers) > 0 else 17
	tr_xmin = transkription_field.xmin if transkription_field is not None else 0.0
	tr_ymin = transkription_field.ymin if transkription_field is not None else 0.0
	paths, attributes = svg_to_paths.svg2paths(self.source)
	allpaths_on_tf = []
	if transkription_field is not None:
	for index in range(0, len(paths)):
	path = paths[index]
	attribute = attributes[index]
	if len(path) > 0\
	and path != transkription_field.path\
	and path.start.real > tr_xmin\
	and path.end.real < transkription_field.xmax:
	allpaths_on_tf.append(Path(id=index, path=path, style_class=attribute.get('class')))
	text_area_deletion_paths = []
	deletion_or_underline_paths = []
	box_paths = []
	dots_paths = []
	word_connector_paths = []
	uncategorized_paths = []
	for mypath in allpaths_on_tf:
	xmin, xmax, ymin, ymax = mypath.path.bbox()
	start_line_number = self.get_line_number(mypath.path.start.imag-tr_ymin)
	if abs(xmax-xmin) < 1 and abs(ymax-ymin) < 1:
	dots_paths.append(mypath)
	elif abs(ymax-ymin) > MAX_HEIGHT_LINES and abs(ymax-ymin) < max_line and mypath.path.iscontinuous() and not mypath.path.isclosed():
	deletion_or_underline_paths.append(mypath)
	elif abs(ymax-ymin) > MAX_HEIGHT_LINES and abs(ymax-ymin) < max_line and mypath.path.iscontinuous() and mypath.path.isclosed():
	box_paths.append(mypath)
	elif abs(ymax-ymin) > MAX_HEIGHT_LINES and abs(ymax-ymin) > max_line and mypath.path.iscontinuous() and not mypath.path.isclosed():
	word_connector_paths.append(mypath)
	elif abs(ymax-ymin) < MAX_HEIGHT_LINES:
	deletion_or_underline_paths.append(mypath)
	elif start_line_number != -1 and start_line_number != self.get_line_number(mypath.path.end.imag-tr_ymin):
	text_area_deletion_paths.append(mypath)
	else:
	uncategorized_paths.append(mypath)
	self.mark_words_intersecting_with_paths_as_deleted(deletion_or_underline_paths, tr_xmin, tr_ymin)
	elif not Page.UNITTESTING:
	error_msg = 'Svg source file {} does not exist!'.format(self.source)\
	if self.source is not None else 'Page does not contain a source file!'
	raise FileNotFoundError(error_msg)

	def create_writing_processes_and_attach2tree(self):
	"""Creates three stages of Nietzsche's process of writing.
	"""
	self.writing_processes = [ WritingProcess(version=WritingProcess.FIRST_VERSION),\
	WritingProcess(version=WritingProcess.INSERTION_AND_ADDITION),\
	WritingProcess(version=WritingProcess.LATER_INSERTION_AND_ADDITION) ]
	for writing_process in self.writing_processes:
	writing_process.attach_object_to_tree(self.page_tree)
	for word in self.words:
	for transkription_position in word.transkription_positions:
	for font_key in transkription_position.positional_word_parts[0].style_class.split(' '):
	if font_key in self.fontsizekey2stage_mapping.keys():
	transkription_position.writing_process_id = self.fontsizekey2stage_mapping.get(font_key)

	def find_special_words(self, transkription_field=None):
	"""Find special words, remove them from words, process their content.
	"""
	if self.source is None or not isfile(self.source):
	raise FileNotFoundError('Page does not have a source!')
	if transkription_field is None:
	transkription_field = TranskriptionField(self.source)
	special_char_list = MarkForeignHands.get_special_char_list()
	special_char_list += TextConnectionMark.get_special_char_list()
	single_char_words = [ word for word in self.words if len(word.text) == 1 and word.text in special_char_list ]
	for word in single_char_words:
	if word.text == MarkForeignHands.CLASS_MARK:
	id = len(self.mark_foreign_hands)
	self.mark_foreign_hands.append(MarkForeignHands.create_cls_from_word(word, id=id))
	self.words.remove(word)
	elif word.text in TextConnectionMark.SPECIAL_CHAR_LIST[0]\
	or (word.text in TextConnectionMark.SPECIAL_CHAR_LIST\
	and any(style in self.sonderzeichen_list for style\
	in word.transkription_positions[0].positional_word_parts[0].style_class.split(' '))):
	id = len(self.text_connection_marks)
	self.text_connection_marks.append(TextConnectionMark.create_cls_from_word(word, id=id))
	self.words.remove(word)
	svg_tree = ET.parse(self.source)
	self.update_page_type(transkription_field=transkription_field)
	self.update_line_number_area(transkription_field, svg_tree=svg_tree)
	italic_classes = [ key for key in self.style_dict\
	if bool(self.style_dict[key].get('font-family')) and self.style_dict[key]['font-family'].endswith('Italic') ]
	if len(self.mark_foreign_hands) > 0:
	MarkForeignHands.find_content(self.mark_foreign_hands, transkription_field, svg_tree, italic_classes=italic_classes,\
	SonderzeichenList=self.sonderzeichen_list)
	if len(self.text_connection_marks) > 0:
	TextConnectionMark.find_content_in_footnotes(self.text_connection_marks, transkription_field, svg_tree,\
	title=self.title, page_number=self.number)

	def get_biggest_fontSize4styles(self, style_set={}):
	"""Returns biggest font size from style_dict for a set of style class names.

	[:returns:] (float) biggest font size OR 1 if style_dict is empty
	"""
	if bool(self.style_dict):
	sorted_font_sizes = sorted( (float(self.style_dict[key]['font-size'].replace('px','')) for key in style_set if bool(self.style_dict[key].get('font-size'))), reverse=True)
	return sorted_font_sizes[0] if len(sorted_font_sizes) > 0 else 1
	else:
	return 1

	def get_line_number(self, y):
	"""Returns line number id for element at y.

	[:return:] (int) line number id or -1
	"""
	if len(self.line_numbers) > 0:
	result_list = [ line_number.id for line_number in self.line_numbers if y >= line_number.top and y <= line_number.bottom ]
	return result_list[0] if len(result_list) > 0 else -1
	else:
	return -1

	@classmethod
	def get_semantic_dictionary(cls):
	""" Creates a semantic dictionary as specified by SemanticClass.
	"""
	dictionary = {}
	class_dict = cls.get_class_dictionary()
	properties = { 'title': (str, 1, '/page/@title'), 'number': (str, 1, '/page/@number'),\
	'image': { 'class': Image, 'cardinality': 1, 'xpath': '/page/{}'.format(FaksimileImage.XML_TAG)},\
	'line_numbers': (LineNumber, SemanticClass.LIST, '/page/@number\|/page/@title'),\
	'orientation': { 'class': str, 'cardinality': 1, 'xpath': '/page/@orientation'},\
	'words': (Word, SemanticClass.LIST, '/page/@number\|/page/@title'),\
	'svg_image': { 'class': SVGImage, 'cardinality': 1, 'xpath': '/page/{}'.format(SVGImage.XML_TAG)},\
	'writing_processes': (WritingProcess, SemanticClass.LIST, '/page/@number\|/page/@title'),\
	'word_deletion_paths': (Path, SemanticClass.LIST, '/page/@number\|/page/@title'),\
	'word_insertion_marks': (WordInsertionMark, SemanticClass.LIST, '/page/@number\|/page/@title')}
	dictionary.update({'class': class_dict})
	dictionary.update({'properties': properties})
	return dictionary

	def init_line_numbers(self, line_numbers, document_bottom):
	"""Init line numbers.
	"""
	even_index = 0
	MINABOVE = 1
	self.line_numbers = []
	if len(line_numbers) > 0:
	first_line_bottom = line_numbers[even_index].top - MINABOVE
	self.line_numbers.append(LineNumber(id=1, top=0, bottom=first_line_bottom))
	self.line_numbers.append(line_numbers[even_index])
	even_index += 1
	while even_index < len(line_numbers):
	self.line_numbers.append(LineNumber(id=line_numbers[even_index].id-1,\
	top=line_numbers[even_index-1].bottom+MINABOVE,\
	bottom=line_numbers[even_index].top-MINABOVE))
	self.line_numbers.append(line_numbers[even_index])
	even_index += 1
	self.line_numbers.append(LineNumber(id=line_numbers[even_index-1].id+1,\
	top=line_numbers[even_index-1].bottom+MINABOVE,\
	bottom=document_bottom))
	for line_number in self.line_numbers:
	line_number.attach_object_to_tree(self.page_tree)

	def init_words(self):
	self.word_insertion_marks = [ WordInsertionMark(wim_node=wim_node) for wim_node in self.page_tree.getroot().xpath('//' + WordInsertionMark.XML_TAG) ]
	self.words = [ Word.create_cls(word_node) for word_node in self.page_tree.getroot().xpath('//word') ]
	self.mark_foreign_hands = [ MarkForeignHands.create_cls(node) for node in self.page_tree.getroot().xpath('//' + MarkForeignHands.XML_TAG) ]
	self.text_connection_marks = [ TextConnectionMark.create_cls(node) for node in self.page_tree.getroot().xpath('//' + TextConnectionMark.XML_TAG) ]
	self.line_numbers = [ LineNumber(xml_text_node=line_number_node) for line_number_node in self.page_tree.getroot().xpath('//' + LineNumber.XML_TAG) ]
	self.writing_processes = [ WritingProcess.create_writing_process_from_xml(node, self.words) for node in self.page_tree.xpath('//' + WritingProcess.XML_TAG) ]
	self.word_deletion_paths = [ Path(node=node) for node in self.page_tree.xpath('//' + Path.WORD_DELETION_PATH_TAG) ]

	def mark_words_intersecting_with_paths_as_deleted(self, deletion_paths, tr_xmin=0.0, tr_ymin=0.0):
	"""Marks all words that intersect with deletion paths as deleted
	and adds these paths to word_deletion_paths.

	[:return:] list of .path.Path that might be word_underline_paths
	"""
	if not Page.UNITTESTING:
	bar = Bar('mark words that intersect with deletion paths', max=len(self.words))
	for word in self.words:
	not bool(Page.UNITTESTING) and bar.next()
	for transkription_position in word.transkription_positions:
	first_pwp = transkription_position.positional_word_parts[0]
	last_pwp = transkription_position.positional_word_parts[len(transkription_position.positional_word_parts)-1]
	xmin = tr_xmin + first_pwp.left
	xmax = tr_xmin + last_pwp.left + last_pwp.width
	ymin = tr_ymin + sorted(pwp.top for pwp in transkription_position.positional_word_parts)[0]
	ymax = tr_ymin + sorted([pwp.bottom for pwp in transkription_position.positional_word_parts], reverse=True)[0]
	word_path = parse_path('M {}, {} L {}, {} L {}, {} L {}, {} z'.format(xmin, ymin, xmax, ymin, xmax, ymax, xmin, ymax))
	intersecting_paths = [ deletion_path for deletion_path in deletion_paths\
	if do_paths_intersect_saveMode(deletion_path.path, word_path) ]
	if len(intersecting_paths) > 0:
	word.deleted = True
	for deletion_path in intersecting_paths:
	if deletion_path not in self.word_deletion_paths:
	deletion_path.tag = Path.WORD_DELETION_PATH_TAG
	deletion_path.attach_object_to_tree(self.page_tree)
	self.word_deletion_paths.append(deletion_path)
	not bool(Page.UNITTESTING) and bar.finish()
	# return those paths in deletion_paths that are not in self.word_deletion_paths
	return [ word_underline_path for word_underline_path in set(deletion_paths) - set(self.word_deletion_paths) ]

	def update_and_attach_words2tree(self, update_function_on_word=None, include_special_words_of_type=[]):
	"""Update word ids and attach them to page.page_tree.
	"""
	for node in self.page_tree.xpath('//word\|//' + MarkForeignHands.XML_TAG + '\|//' + TextConnectionMark.XML_TAG):
	node.getparent().remove(node)
	for index, word in enumerate(self.words):
	word.id = index
	if update_function_on_word is not None:
	update_function_on_word(word)
	word.attach_word_to_tree(self.page_tree)
	for index, mark_foreign_hands in enumerate(self.mark_foreign_hands):
	mark_foreign_hands.id = index
	if update_function_on_word is not None\
	and MarkForeignHands in include_special_words_of_type:
	update_function_on_word(mark_foreign_hands)
	mark_foreign_hands.attach_word_to_tree(self.page_tree)
	for index, text_connection_mark in enumerate(self.text_connection_marks):
	text_connection_mark.id = index
	if update_function_on_word is not None\
	and TextConnectionMark in include_special_words_of_type:
	update_function_on_word(text_connection_mark)
	text_connection_mark.attach_word_to_tree(self.page_tree)

	def update_line_number_area(self, transkription_field, svg_tree=None):
	"""Determines the width of the area where the line numbers are written in the page.source file.
	"""
	THRESHOLD = 0.4
	if svg_tree is None:
	svg_tree = ET.parse(self.source)
	if len(self.line_numbers) > 1:
	line_number = self.line_numbers[9]\
	if transkription_field.is_page_verso() and len(self.line_numbers) > 8\
	else self.line_numbers[1]
	ln_nodes = [ item for item in svg_tree.iterfind('//text', svg_tree.getroot().nsmap)\
	if Matrix.IS_NEARX_TRANSKRIPTION_FIELD(item.get('transform'), transkription_field)\
	and LineNumber.IS_A_LINE_NUMBER(item)\
	and LineNumber(raw_text_node=item, transkription_field=transkription_field).id == line_number.id ]
	if len(ln_nodes) > 0:
	matrix = Matrix(transform_matrix_string=ln_nodes[0].get('transform'))
	if transkription_field.is_page_verso():
	transkription_field.add_line_number_area_width(matrix.getX())
	elif self.svg_file is not None and isfile(self.svg_file):
	svg_path_tree = ET.parse(self.svg_file)
	namespaces = { k if k is not None else 'ns': v for k, v in svg_path_tree.getroot().nsmap.items() }
	svg_x = matrix.getX()
	svg_y = self.line_numbers[1].bottom + transkription_field.ymin
	use_nodes = svg_path_tree.xpath('//ns:use[@x>="{0}" and @x<="{1}" and @y>="{2}" and @y<="{3}"]'\
	.format(svg_x-THRESHOLD, svg_x+THRESHOLD,svg_y-THRESHOLD, svg_y+THRESHOLD), namespaces=namespaces)
	if len(use_nodes) > 0:
	symbol_id = use_nodes[0].get('{%s}href' % namespaces['xlink']).replace('#', '')
	d_strings = use_nodes[0].xpath('//ns:symbol[@id="{0}"]/ns:path/@d'.format(symbol_id), namespaces=namespaces)
	if len(d_strings) > 0 and d_strings[0] != '':
	path = parse_path(d_strings[0])
	xmin, xmax, ymin, ymax = path.bbox()
	width = xmax - xmin
	transkription_field.add_line_number_area_width(matrix.getX() + width)

	def update_page_type(self, transkription_field=None):
	"""Adds a source to page and attaches it to page_tree.
	"""
	if transkription_field is None:
	if self.source is None or not isfile(self.source):
	raise FileNotFoundError('Page does not have a source!')
	transkription_field = TranskriptionField(self.source)
	self.page_type = Page.PAGE_VERSO\
	if transkription_field.is_page_verso()\
	else Page.PAGE_RECTO
	self.page_tree.getroot().set('pageType', self.page_type)

	def do_paths_intersect_saveMode(path1, path2):
	"""Returns true if paths intersect, false if not or if there was an exception.
	"""
	try:
	return path1.intersect(path2, justonemode=True)
	except AssertionError:
	return False

page.pyNo OneTemporaryActions

File Metadata

page.pyView Options

Event Timeline

page.py
No OneTemporary
Actions

page.py
View Options