Page MenuHomec4science

manuscript.py
No OneTemporary

File Metadata

Created
Mon, May 6, 23:34

manuscript.py

#!/usr/bin/env python3
# -*- coding: utf-8 -*-
""" This class can be used to represent an archival unity of manuscript pages, i.e. workbooks, notebooks, folders of handwritten pages.
"""
# Copyright (C) University of Basel 2019 {{{1
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <https://www.gnu.org/licenses/> 1}}}
__author__ = "Christian Steiner"
__maintainer__ = __author__
__copyright__ = 'University of Basel'
__email__ = "christian.steiner@unibas.ch"
__status__ = "Development"
__license__ = "GPL v3"
__version__ = "0.0.1"
from lxml import etree as ET
from os.path import isfile
import sys
from .color import Color
from .description import Description
from .earlier_description import EarlierDescription
from .page import Page, FILE_TYPE_XML_MANUSCRIPT, FILE_TYPE_SVG_WORD_POSITION
sys.path.append('py2ttl')
from class_spec import SemanticClass
sys.path.append('shared_util')
from myxmlwriter import parse_xml_of_type, write_pretty, xml_has_type
class ArchivalManuscriptUnity(SemanticClass):
"""
This class represents an archival unity of manuscript pages (workbooks, notebooks and portfolios of handwritten pages).
@label archival unity of manuscript pages
Args:
title title of archival unity
manuscript_type type of manuscript: 'Arbeitsheft', 'Notizheft', 'Mappe'
manuscript_tree lxml.ElementTree
"""
XML_TAG = 'manuscript'
XML_COLORS_TAG = 'colors'
TYPE_DICTIONARY = { 'Mp': 'Mappe', 'N': 'Notizheft', 'W': 'Arbeitsheft' }
UNITTESTING = False
def __init__(self, title='', manuscript_type='', manuscript_tree=None):
self.colors = []
self.earlier_descriptions = []
self.description = None
self.manuscript_tree = manuscript_tree
self.manuscript_type = manuscript_type
self.pages = []
self.styles = []
self.title = title
if self.manuscript_type == '' and self.title != ''\
and self.title.split(' ')[0] in self.TYPE_DICTIONARY.keys():
self.manuscript_type = self.TYPE_DICTIONARY[self.title.split(' ')[0]]
def get_name_and_id(self):
"""Return an identification for object as 2-tuple.
"""
return '', self.title.replace(' ', '_')
@classmethod
def create_cls(cls, xml_manuscript_file, page_status_list=None, page_xpath='', update_page_styles=False):
"""Create an instance of ArchivalManuscriptUnity from a xml file of type FILE_TYPE_XML_MANUSCRIPT.
:return: ArchivalManuscriptUnity
"""
manuscript_tree = parse_xml_of_type(xml_manuscript_file, FILE_TYPE_XML_MANUSCRIPT)
title = manuscript_tree.getroot().get('title') if bool(manuscript_tree.getroot().get('title')) else ''
manuscript_type = manuscript_tree.getroot().get('type') if bool(manuscript_tree.getroot().get('type')) else ''
manuscript = cls(title=title, manuscript_type=manuscript_type, manuscript_tree=manuscript_tree)
manuscript.colors = [ Color.create_cls(node=color_node) for color_node in manuscript_tree.xpath('.//' + cls.XML_COLORS_TAG + '/' + Color.XML_TAG) ]
if page_xpath == '':
page_status = ''
if page_status_list is not None\
and type(page_status_list) is list\
and len(page_status_list) > 0:
page_status = '[' + ' and '.join([ f'contains(@status, "{status}")' for status in page_status_list ]) + ']'
page_xpath = f'//pages/page{page_status}/@output'
included_page_list = [ page_source\
for page_source in manuscript_tree.xpath(page_xpath)\
if isfile(page_source) and xml_has_type(FILE_TYPE_SVG_WORD_POSITION, xml_source_file=page_source) ]
manuscript.pages = [ Page.create_cls(page_source, create_dummy_page=(page_source not in included_page_list))\
for page_source in manuscript_tree.xpath('//pages/page/@output')\
if isfile(page_source) and xml_has_type(FILE_TYPE_SVG_WORD_POSITION, xml_source_file=page_source) ]
if update_page_styles:
for page in manuscript.pages:
if 'xml_file' in page.__dict__.keys():
page.update_styles(manuscript=manuscript, add_to_parents=True, create_css=True)
description_node = manuscript_tree.xpath(Description.XML_TAG)[0]\
if len(manuscript_tree.xpath(Description.XML_TAG)) > 0\
else None
if description_node is not None:
manuscript.description = Description.create_cls_from_node(description_node.xpath(Description.ROOT_TAG)[0])\
if len(description_node.xpath(Description.ROOT_TAG)) > 0\
else None
for earlier_description_node in description_node.xpath(EarlierDescription.ROOT_TAG):
earlier_description = EarlierDescription.create_cls_from_node(earlier_description_node)
if earlier_description is not None:
manuscript.earlier_descriptions.append(earlier_description)
return manuscript
def get_color(self, hex_color) -> Color:
"""Return color if it exists or None.
"""
if hex_color in [ color.hex_color for color in self.colors ]:
return [ color for color in self.colors if color.hex_color == hex_color ][0]
return None
@classmethod
def get_semantic_dictionary(cls):
""" Creates a semantic dictionary as specified by SemanticClass.
"""
dictionary = {}
class_dict = cls.get_class_dictionary()
properties = {}
properties.update(cls.create_semantic_property_dictionary('title', str, 1))
properties.update(cls.create_semantic_property_dictionary('manuscript_type', str, 1))
properties.update(cls.create_semantic_property_dictionary('styles', list))
properties.update(cls.create_semantic_property_dictionary('pages', list))
properties.update(cls.create_semantic_property_dictionary('description', Description))
properties.update(cls.create_semantic_property_dictionary('earlier_descriptions', EarlierDescription))
dictionary.update({cls.CLASS_KEY: class_dict})
dictionary.update({cls.PROPERTIES_KEY: properties})
return cls.return_dictionary_after_updating_super_classes(dictionary)
def update_colors(self, color):
"""Update manuscript colors if color is not contained.
"""
if self.get_color(color.hex_color) is None:
self.colors.append(color)
if self.manuscript_tree is not None:
if len(self.manuscript_tree.xpath('.//' + self.XML_COLORS_TAG)) > 0:
self.manuscript_tree.xpath('.//' + self.XML_COLORS_TAG)[0].getparent().remove(self.manuscript_tree.xpath('.//' + self.XML_COLORS_TAG)[0])
colors_node = ET.SubElement(self.manuscript_tree.getroot(), self.XML_COLORS_TAG)
for color in self.colors:
color.attach_object_to_tree(colors_node)
if not self.UNITTESTING:
write_pretty(xml_element_tree=self.manuscript_tree, file_name=self.manuscript_tree.docinfo.URL,\
script_name=__file__, backup=True,\
file_type=FILE_TYPE_XML_MANUSCRIPT)
def update_styles(self, *styles):
"""Update manuscript styles.
"""
for style in styles:
if style not in self.styles:
#print(style.css_styles)
self.styles.append(style)

Event Timeline