Page MenuHomec4science

manuscript.py
No OneTemporary

File Metadata

Created
Wed, May 8, 19:20

manuscript.py

#!/usr/bin/env python3
# -*- coding: utf-8 -*-
""" This class can be used to represent an archival unity of manuscript pages, i.e. workbooks, notebooks, folders of handwritten pages.
"""
# Copyright (C) University of Basel 2019 {{{1
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <https://www.gnu.org/licenses/> 1}}}
__author__ = "Christian Steiner"
__maintainer__ = __author__
__copyright__ = 'University of Basel'
__email__ = "christian.steiner@unibas.ch"
__status__ = "Development"
__license__ = "GPL v3"
__version__ = "0.0.1"
from lxml import etree as ET
from os.path import isfile
import sys
from .page import Page, FILE_TYPE_XML_MANUSCRIPT, FILE_TYPE_SVG_WORD_POSITION
sys.path.append('py2ttl')
from class_spec import SemanticClass
sys.path.append('shared_util')
from myxmlwriter import parse_xml_of_type, xml_has_type
class ArchivalManuscriptUnity(SemanticClass):
"""
This class represents an archival unity of manuscript pages, i.e. workbooks, notebooks, folders of handwritten pages.
@label archival unity of manuscript pages
Args:
title title of archival unity
manuscript_type type of manuscript: 'Arbeitsheft', 'Notizheft', 'Mappe'
"""
XML_TAG = 'manuscript'
RDFS_SUBCLASSOF = 'http://www.knora.org/ontology/0068/nietzsche#Manuscript'
def __init__(self, title='', manuscript_type=''):
self.title = title
self.manuscript_type = manuscript_type
self.pages = []
def get_name_and_id(self):
"""Return an identification for object as 2-tuple.
"""
return '', self.title.replace(' ', '_')
@classmethod
def get_semantic_dictionary(cls):
""" Creates a semantic dictionary as specified by SemanticClass.
"""
dictionary = {}
class_dict = cls.get_class_dictionary()
properties = {}
properties.update({'title': (str, 1, '{}/@title'.format(cls.XML_TAG))})
properties.update({'manuscript_type': (str, 1, '{}/@type'.format(cls.XML_TAG))})
properties.update({'pages': (Page, SemanticClass.LIST, '{}/pages/page'.format(cls.XML_TAG))})
dictionary.update({'class': class_dict})
dictionary.update({'properties': properties})
return dictionary
@classmethod
def create_cls(cls, xml_manuscript_file, page_status_list=None, page_xpath=''):
"""Create an instance of ArchivalManuscriptUnity from a xml file of type FILE_TYPE_XML_MANUSCRIPT.
:return: ArchivalManuscriptUnity
"""
manuscript_tree = parse_xml_of_type(xml_manuscript_file, FILE_TYPE_XML_MANUSCRIPT)
title = manuscript_tree.getroot().get('title') if bool(manuscript_tree.getroot().get('title')) else ''
manuscript_type = manuscript_tree.getroot().get('type') if bool(manuscript_tree.getroot().get('type')) else ''
manuscript = cls(title=title, manuscript_type=manuscript_type)
if page_xpath == '':
page_status = ''
if page_status_list is not None\
and type(page_status_list) is list\
and len(page_status_list) > 0:
page_status = '[' + ' and '.join([ 'contains(@status, "{}")'.format(status) for status in page_status_list ]) + ']'
page_xpath = '//pages/page{0}/@output'.format(page_status)
manuscript.pages = [ Page(xml_source_file=page_source)\
for page_source in manuscript_tree.xpath(page_xpath)\
if isfile(page_source) and xml_has_type(FILE_TYPE_SVG_WORD_POSITION, xml_source_file=page_source) ]
return manuscript

Event Timeline