Page MenuHomec4science

create_manuscript_description.py
No OneTemporary

File Metadata

Created
Sun, Oct 6, 16:18

create_manuscript_description.py

#!/usr/bin/env python3
# -*- coding: utf-8 -*-
""" This program can be used to create a manuscript description in turtle format.
"""
# Copyright (C) University of Basel 2019 {{{1
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <https://www.gnu.org/licenses/> 1}}}
from colorama import Fore, Style
import csv
import getopt
import lxml.etree as ET
from os import sep, path, listdir
from os.path import isfile, isdir, dirname, basename
from progress.bar import Bar
import re
import sys
sys.path.append('svgscripts')
from datatypes.manuscript_description import ManuscriptDescription
if dirname(__file__) not in sys.path:
sys.path.append(dirname(__file__))
from class_spec import SemanticClass
from config import check_config_files_exist, get_datatypes_dir, PROJECT_NAME, PROJECT_ONTOLOGY_FILE, PROJECT_URL
from py2ttl_data import Py2TTLDataConverter
from py2ttl_ontology import Py2TTLOntologyConverter
from data_handler import RDFDataHandler
sys.path.append('shared_util')
from myxmlwriter import xml2dict
from main_util import get_manuscript_files_and_include_status
__author__ = "Christian Steiner"
__maintainer__ = __author__
__copyright__ = 'University of Basel'
__email__ = "christian.steiner@unibas.ch"
__status__ = "Development"
__license__ = "GPL v3"
__version__ = "0.0.1"
FILE_TYPE_XML_PROJECT = "xmlProjectFile"
class Py2TTLCSVManuscriptDescriptionConverter:
"""This class can be used convert manuscript description to rdf data in turtle format.
"""
UNITTESTING = False
NAME = 'folio-name'
DESCRIPTION_KEY = 'description'
ALIAS = 'alias'
LABEL = 'label'
URL = 'nietzschesource-url'
def __init__(self, csv_file: str, faksimile_mapping: str, mapping_dictionary: str):
self.mapping_dictionary = mapping_dictionary
self.csv_file = csv_file
self.faksimile_mapping_dict = self._init_mapping(faksimile_mapping)
def _init_mapping(self, faksimile_mapping) ->dict:
"""Init faksimile_mapping dictionary.
"""
faksimile_mapping_dict = {}
with open(faksimile_mapping, newline='') as csvfile:
reader = csv.DictReader(csvfile)
fieldnames = reader.fieldnames
for row in reader:
if bool(faksimile_mapping_dict.get(row[fieldnames[1]])):
faksimile_mapping_dict[row[fieldnames[1]]].append(row[fieldnames[0]])
else:
faksimile_mapping_dict.update({ row[fieldnames[1]]: [row[fieldnames[0]]]})
return faksimile_mapping_dict
def convert(self, target_data_file):
"""Convert manuscript decriptions from csv_file to rdf data and write to target_file.
"""
not Py2TTLCSVManuscriptDescriptionConverter.UNITTESTING and print(Fore.CYAN + 'initializing python objects with file "{}" ...'.format(self.csv_file))
data_handler = RDFDataHandler(target_data_file, self.mapping_dictionary)
with open(self.csv_file, newline='') as csvfile:
reader = csv.DictReader(csvfile)
fieldnames = reader.fieldnames
leaf_id = ''
description = ''
images = []
counter = 0
for row in [ row for row in reader if '_thumb' not in row[self.LABEL]]:
if row[self.NAME] != leaf_id:
if leaf_id != '':
counter += 1
identifier_uri = data_handler.add_external_class_item('Leaf', '', leaf_id)
data_handler.add_external_property(identifier_uri, 'isDepictedBy', images)
manuscript_description = ManuscriptDescription.create_cls_from_raw_text(description)
data_handler.add_data_instance2parent_uri(identifier_uri, 'leafHasDescription', manuscript_description)
leaf_id = row[self.NAME]
description = row[self.DESCRIPTION_KEY]
images = []
if bool(self.faksimile_mapping_dict.get(row[self.LABEL])):
for image in self.faksimile_mapping_dict[row[self.LABEL]]:
images.append(image)
if len(images) > 0:
counter += 1
identifier_uri = data_handler.add_external_class_item('Leaf', '', leaf_id)
data_handler.add_external_property(identifier_uri, 'isDepictedBy', images)
manuscript_description = ManuscriptDescription.create_cls_from_raw_text(description)
data_handler.add_data_instance2parent_uri(identifier_uri, 'leafHasDescription', manuscript_description)
if not Py2TTLCSVManuscriptDescriptionConverter.UNITTESTING:
print(Fore.GREEN + f'[{counter} leafs added]')
if not Py2TTLCSVManuscriptDescriptionConverter.UNITTESTING:
print(Fore.CYAN + 'adding triples to rdf graph ... ')
if not Py2TTLCSVManuscriptDescriptionConverter.UNITTESTING:
print(Fore.GREEN + '[{} statements added]'.format(str(len(data_handler.data_graph))))
print(Fore.CYAN + 'writing graph to file "{}" ...'.format(target_data_file))
data_handler.write()
if not Py2TTLCSVManuscriptDescriptionConverter.UNITTESTING:
print(Fore.GREEN + '[OK]')
print(Style.RESET_ALL)
def usage():
"""prints information on how to use the script
"""
print(main.__doc__)
def main(argv):
"""This program can be used to convert py objects to a owl:Ontology and rdf data in turtle format.
py2ttl/create_manuscript_description.py [OPTIONS] <csv-file> <mapping-csv>
<csv-file> csv file that contains the description of the manuscript.
<mapping-csv> a image to faksimile iri mapping csv file
OPTIONS:
-h|--help: show help
-o|--output=FILE specify output file
:return: exit code (int)
"""
check_config_files_exist()
datatypes_dir = get_datatypes_dir()
source_ontology_file = PROJECT_ONTOLOGY_FILE
target_ontology_file = '.{0}{1}-ontology_autogenerated.ttl'.format(sep, PROJECT_NAME)
output_file = './include-ttl/descriptions.ttl'
try:
opts, args = getopt.getopt(argv, "ho:", ["help", "output="])
except getopt.GetoptError:
usage()
return 2
for opt, arg in opts:
if opt in ('-h', '--help'):
usage()
return 0
elif opt in ('-0', '--output'):
output_file = arg
if len(args) < 2 :
usage()
return 2
ontology_created = False
ontology_converter = Py2TTLOntologyConverter(project_ontology_file=source_ontology_file)
csv_file = args[0]
mapping_csv_file = args[1]
output = 2
print(Fore.CYAN + 'Create ontology ...')
if ontology_converter.create_ontology(datatypes_dir, target_ontology_file) == 0:
print(Fore.GREEN + '[Ontology file {0} created]'.format(target_ontology_file))
else:
return 2
print(Fore.CYAN + f'Create data from "{csv_file}" with ...')
data_converter = Py2TTLCSVManuscriptDescriptionConverter(csv_file, mapping_csv_file, mapping_dictionary=ontology_converter.uri_mapping4cls_and_properties)
output = data_converter.convert(output_file)
return output
if __name__ == "__main__":
sys.exit(main(sys.argv[1:]))

Event Timeline