Page MenuHomec4science

py2ttl_data.py
No OneTemporary

File Metadata

Created
Sun, Apr 28, 09:09

py2ttl_data.py

#!/usr/bin/env python3
# -*- coding: utf-8 -*-
""" This program can be used to convert py objects to data in turtle format.
"""
# Copyright (C) University of Basel 2019 {{{1
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <https://www.gnu.org/licenses/> 1}}}
from colorama import Fore, Style
import getopt
import lxml.etree as ET
from os import sep, path, listdir
from os.path import isfile, isdir, dirname, basename
from progress.bar import Bar
import re
import sys
sys.path.append('svgscripts')
from datatypes.manuscript import ArchivalManuscriptUnity
from datatypes.super_page import SuperPage
if dirname(__file__) not in sys.path:
sys.path.append(dirname(__file__))
from class_spec import SemanticClass
from config import check_config_files_exist, get_datatypes_dir, PROJECT_NAME, PROJECT_ONTOLOGY_FILE, PROJECT_URL
from data_handler import RDFDataHandler
sys.path.append('shared_util')
from myxmlwriter import xml2dict
__author__ = "Christian Steiner"
__maintainer__ = __author__
__copyright__ = 'University of Basel'
__email__ = "christian.steiner@unibas.ch"
__status__ = "Development"
__license__ = "GPL v3"
__version__ = "0.0.1"
class Py2TTLDataConverter:
"""This class can be used convert py objects to rdf data in turtle format.
"""
UNITTESTING = False
def __init__(self, manuscript_file, xml_dictionary_file=None, mapping_dictionary=None):
if mapping_dictionary is None and xml_dictionary_file is not None:
if not Py2TTLDataConverter.UNITTESTING:
print(Fore.CYAN + 'initializing mapping dictionary from file "{}" ...'.format(xml_dictionary_file))
self.mapping_dictionary = xml2dict(xml_dictionary_file)
if not Py2TTLDataConverter.UNITTESTING:
print(Fore.GREEN + '[{} classes added]'.format(str(len(self.mapping_dictionary['classes']))))
elif mapping_dictionary is not None:
self.mapping_dictionary = mapping_dictionary
else:
raise Exception('Error: Py2TTLDataConverter init expects either a xml_dictionary_file or a mapping_dictionary!')
self.manuscript_file = manuscript_file
def convert(self, page_status_list=None):
"""Convert manuscript instantiated with manuscript_file to rdf data and write to target_file.
"""
if page_status_list is None or len(page_status_list) < 1:
page_status_list = ['OK', SuperPage.STATUS_MERGED_OK]
not Py2TTLDataConverter.UNITTESTING and print(Fore.CYAN + 'initializing python objects with file "{}" ...'.format(self.manuscript_file))
manuscript = ArchivalManuscriptUnity.create_cls(self.manuscript_file, page_status_list=page_status_list, update_page_styles=True)
target_data_file = manuscript.title.replace(' ', '_') + '_DATA.ttl'
data_handler = RDFDataHandler(target_data_file, self.mapping_dictionary)
if not Py2TTLDataConverter.UNITTESTING:
print(Fore.GREEN + '[{} pages added]'.format(str(len([ page for page in manuscript.pages if 'xml_file' in page.__dict__.keys()]))))
print(Fore.CYAN + 'adding triples to rdf graph ... ')
data_handler.add_data(manuscript, '')
if not Py2TTLDataConverter.UNITTESTING:
print(Fore.GREEN + '[{} statements added]'.format(str(len(data_handler.data_graph))))
print(Fore.CYAN + 'writing graph to file "{}" ...'.format(target_data_file))
data_handler.write()
if not Py2TTLDataConverter.UNITTESTING:
print(Fore.GREEN + '[OK]')
print(Style.RESET_ALL)
def usage():
"""prints information on how to use the script
"""
print(main.__doc__)
def main(argv):
"""This program can be used to convert py objects to rdf data in turtle format.
py2ttl/py2ttl_data.py [OPTIONS] <manuscript.xml>
<manuscript.xml> xml file of type shared_util.myxmlwriter.FILE_TYPE_XML_MANUSCRIPT.
OPTIONS:
-h|--help: show help
-i|--include-status=STATUS include pages with status = STATUS. STATUS is a ':' seperated string of status, e.g. 'OK:faksimile merged'.
-m|--mapping=mapping_dict.xml xml file generated by py2ttl/py2ttl.py containing mapping information for each property of a class.
:return: exit code (int)
"""
check_config_files_exist()
datatypes_dir = get_datatypes_dir()
target_ontology_file = '.{0}{1}-ontology_autogenerated.ttl'.format(sep, PROJECT_NAME)
xml_dictionary_file = 'mapping_file4' + datatypes_dir.replace(sep, '.') + '2' + target_ontology_file.replace('.' + sep, '').replace(sep, '.').replace('.ttl', '.xml')
manuscript_file = None
page_status_list = None
try:
opts, args = getopt.getopt(argv, "hi:m:", ["help", "include-status=", "mapping="])
except getopt.GetoptError:
usage()
return 2
for opt, arg in opts:
if opt in ('-h', '--help'):
usage()
return 0
elif opt in ('-i', '--include-status'):
page_status_list = arg.split(':')
elif opt in ('-m', '--mapping'):
xml_dictionary_file = arg
if len(args) < 1 :
usage()
return 2
manuscript_file = args[0]
if not isfile(xml_dictionary_file) or not isfile(manuscript_file):
usage()
return 2
converter = Py2TTLDataConverter(manuscript_file, xml_dictionary_file=xml_dictionary_file)
converter.convert(page_status_list=page_status_list)
return 0
if __name__ == "__main__":
sys.exit(main(sys.argv[1:]))

Event Timeline