page.py
No OneTemporary
Actions

Subscribers

None

File Metadata

Created: Wed, May 1, 03:59

page.py
View Options

	#!/usr/bin/env python3
	# -- coding: utf-8 --

	""" This class can be used to represent a page.
	"""
	# Copyright (C) University of Basel 2019 {{{1
	#
	# This program is free software: you can redistribute it and/or modify
	# it under the terms of the GNU General Public License as published by
	# the Free Software Foundation, either version 3 of the License, or
	# (at your option) any later version.
	#
	# This program is distributed in the hope that it will be useful,
	# but WITHOUT ANY WARRANTY; without even the implied warranty of
	# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
	# GNU General Public License for more details.
	#
	# You should have received a copy of the GNU General Public License
	# along with this program. If not, see <https://www.gnu.org/licenses/> 1}}}

	__author__ = "Christian Steiner"
	__maintainer__ = __author__
	__copyright__ = 'University of Basel'
	__email__ = "christian.steiner@unibas.ch"
	__status__ = "Development"
	__license__ = "GPL v3"
	__version__ = "0.0.1"

	from lxml import etree as ET
	from os.path import isfile
	from progress.bar import Bar
	from svgpathtools import svg2paths2, svg_to_paths
	from svgpathtools.parser import parse_path
	import sys
	import warnings

	from .box import Box
	from .color import Color
	from .image import Image, SVGImage
	from .faksimile_image import FaksimileImage
	from .faksimile_position import FaksimilePosition
	from .lineNumber import LineNumber
	from .line import Line
	from .mark_foreign_hands import MarkForeignHands
	from .matrix import Matrix
	from .path import Path
	from .positional_word_part import PositionalWordPart
	from .super_page import SuperPage
	from .style import Style
	from .text_connection_mark import TextConnectionMark
	from .text_field import TextField
	from .transkriptionField import TranskriptionField
	from .writing_process import WritingProcess
	from .word import Word
	from .word_deletion_path import WordDeletionPath
	from .word_insertion_mark import WordInsertionMark

	sys.path.append('py2ttl')
	from class_spec import SemanticClass

	FILE_TYPE_SVG_WORD_POSITION = SuperPage.FILE_TYPE_SVG_WORD_POSITION
	FILE_TYPE_XML_MANUSCRIPT = SuperPage.FILE_TYPE_XML_MANUSCRIPT
	STATUS_MERGED_OK = SuperPage.STATUS_MERGED_OK
	STATUS_POSTMERGED_OK = SuperPage.STATUS_POSTMERGED_OK

	class Page(SemanticClass,SuperPage):
	"""
	This class represents a page.

	Args:
	xml_source_file (str): name of the xml file to be instantiated.
	xml_target_file (str): name of the xml file to which page info will be written.

	"""
	UNITTESTING = False

	def __init__(self, xml_source_file, faksimile_image=None, faksimile_svgFile=None, add_deletion_paths_to_words=True):
	super(Page,self).__init__(xml_source_file)
	self.update_property_dictionary('faksimile_image', faksimile_image)
	self.update_property_dictionary('faksimile_svgFile', faksimile_svgFile)
	self.init_all_properties()
	self.add_style(style_node=self.page_tree.getroot().find('.//style'))
	self.init_node_objects()
	if add_deletion_paths_to_words:
	self.add_deletion_paths_to_words()

	def add_deletion_paths_to_words(self):
	"""Add deletion paths to words.
	"""
	if (self.svg_file is not None and isfile(self.svg_file))\
	or (self.source is not None and isfile(self.source)):
	svg_file = self.svg_file if self.svg_file is not None else self.source
	transkription_field = TranskriptionField(svg_file)
	words = [ word for word in self.words if word.deleted or True in [ part.deleted for part in word.word_parts ]]
	for word in words:
	word.add_deletion_paths(self.word_deletion_paths, tr_xmin=transkription_field.xmin, tr_ymin=transkription_field.ymin)

	@classmethod
	def get_pages_from_xml_file(cls, xml_file, status_contains='', status_not_contain='', word_selection_function=None):
	"""Returns a list of Page instantiating a xml_file of type FILE_TYPE_SVG_WORD_POSITION
	or xml_files contained in xml_file of type FILE_TYPE_XML_MANUSCRIPT.
	[optional: instantiation depends on the fulfilment of a status_contains
	and/or on the selection of some words by a word_selection_function].
	"""
	source_tree = ET.parse(xml_file)
	if source_tree.getroot().find('metadata/type').text == FILE_TYPE_SVG_WORD_POSITION:
	page = cls(xml_file)
	if word_selection_function is None or len(word_selection_function(page.words)) > 0:
	return [ page ]
	else:
	return []
	elif source_tree.getroot().find('metadata/type').text == FILE_TYPE_XML_MANUSCRIPT:
	pages = []
	xpath = '//page/@output'
	if status_contains != '' and status_not_contain != '':
	xpath = '//page[contains(@status, "{0}") and not(contains(@status, "{1}"))]/@output'.format(status_contains, status_not_contain)
	elif status_contains != '':
	xpath = '//page[contains(@status, "{0}")]/@output'.format(status_contains)
	elif status_not_contain != '':
	xpath = '//page[not(contains(@status, "{0}"))]/@output'.format(status_not_contain)
	for xml_source_file in source_tree.xpath(xpath):
	if isfile(xml_source_file):
	pages += cls.get_pages_from_xml_file(xml_source_file, word_selection_function=word_selection_function)
	return pages
	else:
	return []

	@classmethod
	def get_semantic_dictionary(cls):
	""" Creates a semantic dictionary as specified by SemanticClass.
	"""
	dictionary = {}
	class_dict = cls.get_class_dictionary()
	properties = { 'number': { 'class': str, 'cardinality': 1},\
	'faksimile_image': { 'class': FaksimileImage, 'cardinality': 1},\
	'orientation': { 'class': str, 'cardinality': 1},\
	'svg_image': { 'class': SVGImage, 'cardinality': 1}}
	properties.update(cls.create_semantic_property_dictionary('text_field', TextField,\
	cardinality=1, name='pageIsOnTextField', label='page is on text field',\
	comment='Relates a page to the text field on a faksimile image.'))
	for key in [ 'lines', 'words', 'word_deletion_paths', 'word_insertion_marks']:
	properties.update(cls.create_semantic_property_dictionary(key, list))
	dictionary.update({cls.CLASS_KEY: class_dict})
	dictionary.update({cls.PROPERTIES_KEY: properties})
	return cls.return_dictionary_after_updating_super_classes(dictionary)

	def init_node_objects(self):
	"""Initialize all node objects.
	"""
	self.word_insertion_marks = [ WordInsertionMark(wim_node=wim_node) for wim_node in self.page_tree.getroot().xpath('//' + WordInsertionMark.XML_TAG) ]
	self.words = [ Word.create_cls(word_node) for word_node in self.page_tree.getroot().xpath('./word') ]
	self.mark_foreign_hands = [ MarkForeignHands.create_cls(node) for node in self.page_tree.getroot().xpath('//' + MarkForeignHands.XML_TAG) ]
	self.text_connection_marks = [ TextConnectionMark.create_cls(node) for node in self.page_tree.getroot().xpath('//' + TextConnectionMark.XML_TAG) ]
	self.line_numbers = [ LineNumber(xml_text_node=line_number_node) for line_number_node in self.page_tree.getroot().xpath('//' + LineNumber.XML_TAG) ]
	self.lines = [ Line.create_cls_from_node(node=line_number_node) for line_number_node in self.page_tree.getroot().xpath('//' + LineNumber.XML_TAG) ]
	self.writing_processes = [ WritingProcess.create_writing_process_from_xml(node, self.words) for node in self.page_tree.xpath('//' + WritingProcess.XML_TAG) ]
	self.word_deletion_paths = [ WordDeletionPath.create_cls(node, self) for node in self.page_tree.xpath('//' + WordDeletionPath.XML_TAG) ]
	if self.faksimile_image is not None and self.text_field is not None:
	for simple_word in self.words + self.mark_foreign_hands + self.text_connection_marks:
	simple_word.init_word(self)
	for wim in self.word_insertion_marks:
	if wim.line_number > -1:
	wim.line = [ line for line in self.lines if line.id == wim.line_number ][0]

	def update_and_attach_words2tree(self, update_function_on_word=None, include_special_words_of_type=[]):
	"""Update word ids and attach them to page.page_tree.
	"""
	if not self.is_locked():
	update_function_on_word = [ update_function_on_word ]\
	if type(update_function_on_word) != list\
	else update_function_on_word
	for node in self.page_tree.xpath('.//word\|.//' + MarkForeignHands.XML_TAG + '\|.//' + TextConnectionMark.XML_TAG):
	node.getparent().remove(node)
	for index, word in enumerate(self.words):
	word.id = index
	for func in update_function_on_word:
	if callable(func):
	func(word)
	word.attach_word_to_tree(self.page_tree)
	for index, mark_foreign_hands in enumerate(self.mark_foreign_hands):
	mark_foreign_hands.id = index
	if MarkForeignHands in include_special_words_of_type:
	for func in update_function_on_word:
	if callable(update_function_on_word):
	func(mark_foreign_hands)
	mark_foreign_hands.attach_word_to_tree(self.page_tree)
	for index, text_connection_mark in enumerate(self.text_connection_marks):
	text_connection_mark.id = index
	if TextConnectionMark in include_special_words_of_type:
	for func in update_function_on_word:
	if callable(update_function_on_word):
	func(text_connection_mark)
	text_connection_mark.attach_word_to_tree(self.page_tree)
	else:
	print('locked')

	def update_data_source(self, faksimile_svgFile=None, xml_correction_file=None):
	"""Update the data source of page.
	"""
	if faksimile_svgFile is not None:
	self.faksimile_svgFile = faksimile_svgFile
	data_node = self.page_tree.xpath('.//data-source')[0]\
	if len(self.page_tree.xpath('.//data-source')) > 0\
	else ET.SubElement(self.page_tree.getroot(), 'data-source')
	data_node.set('file', self.faksimile_svgFile)
	if xml_correction_file is not None:
	data_node.set('xml-corrected-words', xml_correction_file)

	def update_line_number_area(self, transkription_field, svg_tree=None):
	"""Determines the width of the area where the line numbers are written in the page.source file.
	"""
	THRESHOLD = 0.4
	if svg_tree is None:
	svg_tree = ET.parse(self.source)
	if len(self.line_numbers) > 1:
	line_number = self.line_numbers[9]\
	if transkription_field.is_page_verso() and len(self.line_numbers) > 8\
	else self.line_numbers[1]
	ln_nodes = [ item for item in svg_tree.iterfind('//text', svg_tree.getroot().nsmap)\
	if Matrix.IS_NEARX_TRANSKRIPTION_FIELD(item.get('transform'), transkription_field)\
	and LineNumber.IS_A_LINE_NUMBER(item)\
	and LineNumber(raw_text_node=item, transkription_field=transkription_field).id == line_number.id ]
	if len(ln_nodes) > 0:
	matrix = Matrix(transform_matrix_string=ln_nodes[0].get('transform'))
	if transkription_field.is_page_verso():
	transkription_field.add_line_number_area_width(matrix.getX())
	elif self.svg_file is not None and isfile(self.svg_file):
	svg_path_tree = ET.parse(self.svg_file)
	namespaces = { k if k is not None else 'ns': v for k, v in svg_path_tree.getroot().nsmap.items() }
	svg_x = matrix.getX()
	svg_y = self.line_numbers[1].bottom + transkription_field.ymin
	use_nodes = svg_path_tree.xpath('//ns:use[@x>="{0}" and @x<="{1}" and @y>="{2}" and @y<="{3}"]'\
	.format(svg_x-THRESHOLD, svg_x+THRESHOLD,svg_y-THRESHOLD, svg_y+THRESHOLD), namespaces=namespaces)
	if len(use_nodes) > 0:
	symbol_id = use_nodes[0].get('{%s}href' % namespaces['xlink']).replace('#', '')
	d_strings = use_nodes[0].xpath('//ns:symbol[@id="{0}"]/ns:path/@d'.format(symbol_id), namespaces=namespaces)
	if len(d_strings) > 0 and d_strings[0] != '':
	path = parse_path(d_strings[0])
	xmin, xmax, ymin, ymax = path.bbox()
	width = xmax - xmin
	transkription_field.add_line_number_area_width(matrix.getX() + width)

	def update_page_type(self, transkription_field=None):
	"""Adds a source to page and attaches it to page_tree.
	"""
	if transkription_field is None:
	if self.source is None or not isfile(self.source):
	raise FileNotFoundError('Page does not have a source!')
	transkription_field = TranskriptionField(self.source)
	self.page_type = Page.PAGE_VERSO\
	if transkription_field.is_page_verso()\
	else Page.PAGE_RECTO
	self.page_tree.getroot().set('pageType', self.page_type)

	def update_styles(self, words=None, manuscript=None, add_to_parents=False, partition_according_to_styles=False, create_css=False):
	"""Update styles of words and add them to their transkription_positions.
	Args:
	add_to_parents: Add styles also to word (and if not None to manuscript).
	partition_according_to_styles: Partition word if its transkription_positions have different styles.
	"""
	style_dictionary = {}
	if words is None:
	words = self.words
	for word in words:
	if len(word.word_parts) > 0:
	self.update_styles(words=word.word_parts, manuscript=manuscript, create_css=create_css,\
	add_to_parents=add_to_parents, partition_according_to_styles=partition_according_to_styles)
	for transkription_position in word.transkription_positions:
	if len(transkription_position.positional_word_parts) > 0:
	style_class = transkription_position.positional_word_parts[0].style_class
	writing_process_id = -1
	for font_key in [ font_key for font_key in style_class.split(' ') if font_key in self.fontsizekey2stage_mapping.keys() ]:
	writing_process_id = self.fontsizekey2stage_mapping.get(font_key)
	style_class_key = (Style.remove_irrelevant_style_keys(style_class, self, extended_styles=create_css), writing_process_id)
	if create_css:
	if style_dictionary.get((style_class_key, word.deleted)) is None:
	color = word.deletion_paths[0].style.color\
	if len(word.deletion_paths) > 0 else None
	style_dictionary[(style_class_key, word.deleted)] = Style.create_cls(self, style_class_key[0], manuscript=manuscript,\
	create_css=create_css, deletion_color=color, writing_process_id=style_class_key[1] )
	transkription_position.style = style_dictionary[(style_class_key, word.deleted)]
	#print(style_dictionary[(style_class_key, word.deleted)])
	else:
	if style_dictionary.get(style_class_key) is None:
	style_dictionary[style_class_key] = Style.create_cls(self, style_class_key[0], manuscript=manuscript, create_css=create_css)
	style_dictionary[style_class_key].writing_process_id = style_class_key[1]
	transkription_position.style = style_dictionary[style_class_key]
	if add_to_parents and transkription_position.style not in word.styles:
	word.styles.append(transkription_position.style)
	if partition_according_to_styles:
	word.split_according_to_status('style', splits_are_parts=True)
	if manuscript is not None\
	and add_to_parents:
	manuscript.update_styles(*style_dictionary.values())

page.pyNo OneTemporaryActions

File Metadata

page.pyView Options

Event Timeline

page.py
No OneTemporary
Actions

page.py
View Options