create_manuscript_description.py
No OneTemporary
Actions

Subscribers

None

File Metadata

Created: Sun, Jul 13, 17:41

create_manuscript_description.py
View Options

	#!/usr/bin/env python3
	# -- coding: utf-8 --

	""" This program can be used to create a manuscript description in turtle format.
	"""
	# Copyright (C) University of Basel 2019 {{{1
	#
	# This program is free software: you can redistribute it and/or modify
	# it under the terms of the GNU General Public License as published by
	# the Free Software Foundation, either version 3 of the License, or
	# (at your option) any later version.
	#
	# This program is distributed in the hope that it will be useful,
	# but WITHOUT ANY WARRANTY; without even the implied warranty of
	# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
	# GNU General Public License for more details.
	#
	# You should have received a copy of the GNU General Public License
	# along with this program. If not, see <https://www.gnu.org/licenses/> 1}}}

	from colorama import Fore, Style
	import csv
	import getopt
	import lxml.etree as ET
	from os import sep, path, listdir
	from os.path import isfile, isdir, dirname, basename
	from progress.bar import Bar
	import re
	import sys

	sys.path.append('svgscripts')
	from datatypes.manuscript_description import ManuscriptDescription

	if dirname(__file__) not in sys.path:
	sys.path.append(dirname(__file__))

	from class_spec import SemanticClass
	from config import check_config_files_exist, get_datatypes_dir, PROJECT_NAME, PROJECT_ONTOLOGY_FILE, PROJECT_URL
	from py2ttl_data import Py2TTLDataConverter
	from py2ttl_ontology import Py2TTLOntologyConverter
	from data_handler import RDFDataHandler


	sys.path.append('shared_util')
	from myxmlwriter import xml2dict
	from main_util import get_manuscript_files_and_include_status

	__author__ = "Christian Steiner"
	__maintainer__ = __author__
	__copyright__ = 'University of Basel'
	__email__ = "christian.steiner@unibas.ch"
	__status__ = "Development"
	__license__ = "GPL v3"
	__version__ = "0.0.1"

	FILE_TYPE_XML_PROJECT = "xmlProjectFile"

	class Py2TTLCSVManuscriptDescriptionConverter:
	"""This class can be used convert manuscript description to rdf data in turtle format.
	"""
	UNITTESTING = False
	NAME = 'folio-name'
	DESCRIPTION_KEY = 'description'
	ALIAS = 'alias'
	LABEL = 'label'
	URL = 'nietzschesource-url'

	def __init__(self, csv_file: str, faksimile_mapping: str, mapping_dictionary: str):
	self.mapping_dictionary = mapping_dictionary
	self.csv_file = csv_file
	self.faksimile_mapping_dict = self._init_mapping(faksimile_mapping)

	def _init_mapping(self, faksimile_mapping) ->dict:
	"""Init faksimile_mapping dictionary.
	"""
	faksimile_mapping_dict = {}
	with open(faksimile_mapping, newline='') as csvfile:
	reader = csv.DictReader(csvfile)
	fieldnames = reader.fieldnames
	for row in reader:
	if bool(faksimile_mapping_dict.get(row[fieldnames[1]])):
	faksimile_mapping_dict[row[fieldnames[1]]].append(row[fieldnames[0]])
	else:
	faksimile_mapping_dict.update({ row[fieldnames[1]]: [row[fieldnames[0]]]})
	return faksimile_mapping_dict

	def convert(self, target_data_file):
	"""Convert manuscript decriptions from csv_file to rdf data and write to target_file.
	"""
	not Py2TTLCSVManuscriptDescriptionConverter.UNITTESTING and print(Fore.CYAN + 'initializing python objects with file "{}" ...'.format(self.csv_file))
	data_handler = RDFDataHandler(target_data_file, self.mapping_dictionary)
	with open(self.csv_file, newline='') as csvfile:
	reader = csv.DictReader(csvfile)
	fieldnames = reader.fieldnames
	leaf_id = ''
	description = ''
	images = []
	counter = 0
	for row in [ row for row in reader if '_thumb' not in row[self.LABEL]]:
	if row[self.NAME] != leaf_id:
	if leaf_id != '':
	counter += 1
	identifier_uri = data_handler.add_external_class_item('Leaf', '', leaf_id)
	data_handler.add_external_property(identifier_uri, 'isDepictedBy', images)
	manuscript_description = ManuscriptDescription.create_cls_from_raw_text(description)
	data_handler.add_data_instance2parent_uri(identifier_uri, 'leafHasDescription', manuscript_description)
	leaf_id = row[self.NAME]
	description = row[self.DESCRIPTION_KEY]
	images = []
	if bool(self.faksimile_mapping_dict.get(row[self.LABEL])):
	for image in self.faksimile_mapping_dict[row[self.LABEL]]:
	images.append(image)
	if len(images) > 0:
	counter += 1
	identifier_uri = data_handler.add_external_class_item('Leaf', '', leaf_id)
	data_handler.add_external_property(identifier_uri, 'isDepictedBy', images)
	manuscript_description = ManuscriptDescription.create_cls_from_raw_text(description)
	data_handler.add_data_instance2parent_uri(identifier_uri, 'leafHasDescription', manuscript_description)
	if not Py2TTLCSVManuscriptDescriptionConverter.UNITTESTING:
	print(Fore.GREEN + f'[{counter} leafs added]')
	if not Py2TTLCSVManuscriptDescriptionConverter.UNITTESTING:
	print(Fore.CYAN + 'adding triples to rdf graph ... ')
	if not Py2TTLCSVManuscriptDescriptionConverter.UNITTESTING:
	print(Fore.GREEN + '[{} statements added]'.format(str(len(data_handler.data_graph))))
	print(Fore.CYAN + 'writing graph to file "{}" ...'.format(target_data_file))
	data_handler.write()
	if not Py2TTLCSVManuscriptDescriptionConverter.UNITTESTING:
	print(Fore.GREEN + '[OK]')
	print(Style.RESET_ALL)

	def usage():
	"""prints information on how to use the script
	"""
	print(main.__doc__)

	def main(argv):
	"""This program can be used to convert py objects to a owl:Ontology and rdf data in turtle format.

	py2ttl/create_manuscript_description.py [OPTIONS] <csv-file> <mapping-csv>

	<csv-file> csv file that contains the description of the manuscript.
	<mapping-csv> a image to faksimile iri mapping csv file

	OPTIONS:
	-h\|--help: show help
	-o\|--output=FILE specify output file

	:return: exit code (int)
	"""
	check_config_files_exist()
	datatypes_dir = get_datatypes_dir()
	source_ontology_file = PROJECT_ONTOLOGY_FILE
	target_ontology_file = '.{0}{1}-ontology_autogenerated.ttl'.format(sep, PROJECT_NAME)
	output_file = './include-ttl/descriptions.ttl'
	try:
	opts, args = getopt.getopt(argv, "ho:", ["help", "output="])
	except getopt.GetoptError:
	usage()
	return 2
	for opt, arg in opts:
	if opt in ('-h', '--help'):
	usage()
	return 0
	elif opt in ('-0', '--output'):
	output_file = arg
	if len(args) < 2 :
	usage()
	return 2
	ontology_created = False
	ontology_converter = Py2TTLOntologyConverter(project_ontology_file=source_ontology_file)
	csv_file = args[0]
	mapping_csv_file = args[1]
	output = 2
	print(Fore.CYAN + 'Create ontology ...')
	if ontology_converter.create_ontology(datatypes_dir, target_ontology_file) == 0:
	print(Fore.GREEN + '[Ontology file {0} created]'.format(target_ontology_file))
	else:
	return 2
	print(Fore.CYAN + f'Create data from "{csv_file}" with ...')
	data_converter = Py2TTLCSVManuscriptDescriptionConverter(csv_file, mapping_csv_file, mapping_dictionary=ontology_converter.uri_mapping4cls_and_properties)
	output = data_converter.convert(output_file)
	return output


	if __name__ == "__main__":
	sys.exit(main(sys.argv[1:]))

create_manuscript_description.pyNo OneTemporaryActions

File Metadata

create_manuscript_description.pyView Options

Event Timeline

create_manuscript_description.py
No OneTemporary
Actions

create_manuscript_description.py
View Options