Index: tests_svgscripts/test_data/N_VII_1_page006.xml =================================================================== --- tests_svgscripts/test_data/N_VII_1_page006.xml (revision 69) +++ tests_svgscripts/test_data/N_VII_1_page006.xml (revision 70) @@ -1,1275 +1,1275 @@ svgWordPosition 2019-08-02 15:17:37 2019-08-02 15:17:37 2019-08-02 15:30:59 2019-08-02 15:30:59 - 2019-11-08 10:36:22 + 2019-11-08 17:42:55 Index: py2ttl/data_handler.py =================================================================== --- py2ttl/data_handler.py (revision 69) +++ py2ttl/data_handler.py (revision 70) @@ -1,176 +1,189 @@ #!/usr/bin/env python3 # -*- coding: utf-8 -*- """ This class can be used to add data to a rdf graph. """ # Copyright (C) University of Basel 2019 {{{1 # # This program is free software: you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation, either version 3 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program. If not, see 1}}} __author__ = "Christian Steiner" __maintainer__ = __author__ __copyright__ = 'University of Basel' __email__ = "christian.steiner@unibas.ch" __status__ = "Development" __license__ = "GPL v3" __version__ = "0.0.1" from rdflib import Graph, URIRef, Literal, BNode, OWL, RDF, RDFS, XSD +from rdflib import RDF as ns_rdf from os.path import isfile import random import warnings from class_spec import SemanticClass from config import DATA_URL class RDFDataHandler: """ This class can be used to add data to a rdf graph. """ UNITTESTING = False SIMPLE_DATA_TYPE_MAPPING = { int: XSD.integer, float: XSD.float, str: XSD.string, bool: XSD.boolean, list: RDF.List } def __init__(self, target_file, mapping_dictionary): self.target_file = target_file self.mapping_dictionary = mapping_dictionary self.ontology_graph = Graph() self.data_graph = Graph() self.data_identifier_mapping = {} if bool(self.mapping_dictionary.get('ontology')): self.project_name = self.mapping_dictionary['ontology'].get('project_name') self.project_uri = URIRef(self.mapping_dictionary['ontology'].get('project_uri')) ontology_file = self.mapping_dictionary['ontology'].get('ontology_file') if bool(ontology_file) and isfile(ontology_file): self.ontology_graph.parse(ontology_file, format="turtle") self.ns = { uriref: ns for ns, uriref in self.data_graph.namespace_manager.namespaces() } self.data_graph.bind(self.project_name, self.project_uri) self.data_graph.bind('data', DATA_URL + '#') else: raise Exception('Error: mapping_dictionary does not contain key "ontology"!') def add_data(self, data_instance, identifier_prefix, parent_data_instance=None): """Add a data rdf instance of data_instance to the data_graph. :return: (rdflib.URIRef) subject_uri of data instance """ identifier_uri = self.create_identifier_uri(data_instance, identifier_prefix) if bool(self.mapping_dictionary['classes'].get(type(data_instance).__name__)): class_uri = self.mapping_dictionary['classes'][type(data_instance).__name__]['class_uri'] self.data_identifier_mapping.update({data_instance: identifier_uri}) self.data_graph_add((identifier_uri, RDF.type, class_uri)) semantic_dict = data_instance.get_semantic_dictionary() for key, content in semantic_dict['properties'].items(): if bool(self.mapping_dictionary['classes'][type(data_instance).__name__]['properties'].get(key)): datatype = content.get('class') cardinality = content.get('cardinality')\ if bool(content.get('cardinality')) else 0 if data_instance.__dict__.get(key) is not None\ and (type(data_instance.__dict__.get(key)) != int or data_instance.__dict__.get(key) != -1): predicate_uri = self.mapping_dictionary['classes'][type(data_instance).__name__]['properties'][key] child_data_instance = data_instance.__dict__.get(key) new_identifier_prefix = identifier_uri[identifier_uri.index('#')+1:] if datatype is list: self.add_ordered_list(child_data_instance, identifier_uri, predicate_uri,\ new_identifier_prefix, data_instance) elif issubclass(datatype, SemanticClass): if type(child_data_instance) is not list: if type(child_data_instance) != datatype: child_id = child_data_instance child_data_instance = parent_data_instance.get_object_from_list_with_id(datatype,\ child_id) if child_data_instance is None: msg = 'No child_data_instance found for data_instance {0}: looking for {1} with id {2}'.format(\ type(parent_data_instance), datatype, child_id) raise Exception(msg) else: new_list_name = 'list_of_' + datatype.__name__ + 's' if new_list_name in data_instance.__dict__.keys(): data_instance.__dict__[new_list_name].append(child_data_instance) else: data_instance.__dict__.update({ new_list_name: [ child_data_instance ]}) if child_data_instance not in self.data_identifier_mapping.keys(): child_identifier_uri = self.add_data(child_data_instance, new_identifier_prefix,\ parent_data_instance=data_instance) else: child_identifier_uri = self.data_identifier_mapping[child_data_instance] self.data_graph_add((identifier_uri, predicate_uri, child_identifier_uri)) else: for child_item in child_data_instance: if child_item not in self.data_identifier_mapping.keys(): child_identifier_uri = self.add_data(child_item, new_identifier_prefix,\ parent_data_instance=data_instance) else: child_identifier_uri = self.data_identifier_mapping[child_item] self.data_graph_add((identifier_uri, predicate_uri, child_identifier_uri)) else: literal_datatype = RDFDataHandler.SIMPLE_DATA_TYPE_MAPPING[datatype] ontology_datatypes = [ o for o in self.ontology_graph.objects(subject=predicate_uri, predicate=RDFS.range) ] if len(ontology_datatypes) > 0: literal_datatype = ontology_datatypes[0] object_literal = Literal(str(child_data_instance), datatype=literal_datatype) self.data_graph_add((identifier_uri, predicate_uri, object_literal)) else: msg = 'Mapping dictionary for {0} does not contain a entry for {1}!'.format(type(data_instance).__name__, key) raise Exception(msg) else: msg = 'Mapping dictionary does not contain a entry for {}!'.format(type(data_instance).__name__) raise Exception(msg) return identifier_uri def add_ordered_list(self, data_instance_list, identifier_uri, predicate_uri, identifier_prefix, data_instance): """Add a data rdf instance of data_instance to the data_graph. """ if len(data_instance_list) > 0: - list_node = BNode() - next_node = None - self.data_graph_add((identifier_uri, predicate_uri, list_node)) + child_identifiers = [] for item in data_instance_list: - if next_node is not None: - self.data_graph_add((list_node, RDF.rest, next_node)) - list_node = next_node if item not in self.data_identifier_mapping.keys(): - child_identifier_uri = self.add_data(item, identifier_prefix, data_instance) + child_identifiers.append(self.add_data(item, identifier_prefix, data_instance)) else: - child_identifier_uri = self.data_identifier_mapping[item] - self.data_graph_add((list_node, RDF.first, child_identifier_uri)) - next_node = BNode() - if next_node is not None: - self.data_graph_add((next_node, RDF.rest, RDF.nil)) + child_identifiers.append(self.data_identifier_mapping[item]) + list_node = self.generate_RDF_collection(child_identifiers) + self.data_graph_add((identifier_uri, predicate_uri, list_node)) def create_identifier_uri(self, data_instance, identifier_prefix): """Return a data identifier uri. :return: (rdflib.URIRef) subject_uri of data instance """ data_type, id = data_instance.get_name_and_id() identifier_uri = URIRef(DATA_URL + '#' + identifier_prefix + '_' + data_type + str(id)) randombit_length = 5 while (identifier_uri, None, None) in self.data_graph: identifier_uri = URIRef(DATA_URL + '#' + identifier_prefix + '_' + data_type + str(random.getrandbits(randombit_length))) randombit_length += 1 return identifier_uri def data_graph_add(self, rdf_triple): """Add a triple to the graph. """ #not RDFDataHandler.UNITTESTING and print(rdf_triple) self.data_graph.add(rdf_triple) + def generate_RDF_collection(self, vals ) -> BNode: + """ + Generate an RDF List from vals, returns the head of the list + @URL: + @organization: U{World Wide Web Consortium} + @author: U{Ivan Herman} + @license: + U{W3C® SOFTWARE NOTICE AND LICENSE} + @param graph: RDF graph + @type graph: RDFLib Graph + @param vals: array of RDF Resources + @return: head of the List (an RDF Resource) + """ + heads = [ BNode() for r in vals ] + [ ns_rdf["nil"] ] + for i in range(0, len(vals)) : + self.data_graph_add( (heads[i], ns_rdf["first"], vals[i]) ) + self.data_graph_add( (heads[i], ns_rdf["rest"], heads[i+1]) ) + return heads[0] + def write(self, output_format="turtle"): """Write graph. """ f = open(self.target_file, 'wb+') f.write(self.data_graph.serialize(format=output_format)) f.close() Index: py2ttl/py2ttl_ontology.py =================================================================== --- py2ttl/py2ttl_ontology.py (revision 69) +++ py2ttl/py2ttl_ontology.py (revision 70) @@ -1,349 +1,350 @@ #!/usr/bin/env python3 # -*- coding: utf-8 -*- """ This program can be used to convert py classes that are subclasses of class_spec.SemanticClass to a owl ontology in turtle format. """ # Copyright (C) University of Basel 2019 {{{1 # # This program is free software: you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation, either version 3 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program. If not, see 1}}} import getopt import importlib import importlib.util import inspect import lxml.etree as ET from os import sep, path, listdir from os.path import isfile, isdir, dirname, basename from progress.bar import Bar from rdflib import Graph, URIRef, Literal, BNode, OWL, RDF, RDFS, XSD import re import sys import warnings if dirname(__file__) not in sys.path: sys.path.append(dirname(__file__)) from class_spec import SemanticClass, UnSemanticClass from config import check_config_files_exist, get_datatypes_dir, PROJECT_NAME, PROJECT_ONTOLOGY_FILE, PROJECT_URL from data_handler import RDFDataHandler sys.path.append('shared_util') from myxmlwriter import dict2xml __author__ = "Christian Steiner" __maintainer__ = __author__ __copyright__ = 'University of Basel' __email__ = "christian.steiner@unibas.ch" __status__ = "Development" __license__ = "GPL v3" __version__ = "0.0.1" class Py2TTLOntologyConverter: """This class can be used convert semantic_dictionaries to a owl ontology in turtle format. """ UNITTESTING = False def __init__(self, project_ontology_file=None): + #TODO add a version to ontology and data! self.class_uri_dict = {} self.uri_mapping4cls_and_properties = {} self.project_graph = Graph() self.base_uriref = URIRef(PROJECT_URL) self.project_name = PROJECT_NAME self.ns = { self.base_uriref + '#': self.project_name } if project_ontology_file is not None and isfile(project_ontology_file): self.project_graph.parse(project_ontology_file, format="turtle") if len(self.project_graph) > 0: self.base_uriref = self.project_graph.value(predicate=RDF.type, object=OWL.Ontology, any=False) self.ns = { uriref: ns for ns, uriref in self.project_graph.namespace_manager.namespaces() } self.project_name = self.ns.get(self.base_uriref + '#') self.project_graph.bind(self.project_name, self.base_uriref + '#') self.uri_mapping4cls_and_properties.update({ 'ontology': { 'project_name': self.project_name, 'project_uri': self.base_uriref + '#' }}) self.uri_mapping4cls_and_properties.update({ 'classes': {} }) def addClass2Graph(self, cls, semantic_dict=None) -> (URIRef, type): """Add a class to project_graph. :return: (cls_uri (URIRef), super_cls (cls)) """ if semantic_dict is None: semantic_dict = cls.get_semantic_dictionary() comment, label = self.get_comment_label(cls) cls_uri = URIRef(self.base_uriref + '#' + cls.__name__) self.project_graph.add((cls_uri, RDF.type, OWL.Class)) self.project_graph.add((cls_uri, RDFS.isDefinedBy, self.base_uriref)) if comment != '': self.project_graph.add((cls_uri, RDFS.comment, Literal(comment, lang='en'))) if label != '': self.project_graph.add((cls_uri, RDFS.label, Literal(label, lang='en'))) super_uri = None super_cls = None if bool(semantic_dict['class'].get('rdfs:subClassOf')): super_uri = URIRef(semantic_dict['class'].get('rdfs:subClassOf')) if bool(semantic_dict['class'].get('type')): super_cls = semantic_dict['class'].get('type') super_uri = self.createClassAndProperties(super_cls) if super_uri is not None: self.project_graph.add((cls_uri, RDFS.subClassOf, super_uri)) return cls_uri, super_cls def addProperty2Graph(self, property_uri, domain_uri, range_uri, info_dict): """Add a property to self.project_graph. """ label = 'has ' + property_uri.split('#')[1].replace('has','')\ if SemanticClass.PROPERTY_LABEL not in info_dict.keys() else info_dict[SemanticClass.PROPERTY_LABEL] self.project_graph.add((property_uri, RDF.type, OWL.ObjectProperty)) self.project_graph.add((property_uri, RDFS.isDefinedBy, self.base_uriref)) self.project_graph.add((property_uri, RDFS.domain, domain_uri)) self.project_graph.add((property_uri, RDFS.range, range_uri)) if SemanticClass.PROPERTY_COMMENT in info_dict.keys(): comment = info_dict[SemanticClass.PROPERTY_COMMENT] self.project_graph.add((property_uri, RDFS.comment, Literal(comment, lang='en'))) self.project_graph.add((property_uri, RDFS.label, Literal(label, lang='en'))) if SemanticClass.CARDINALITY in info_dict.keys()\ and info_dict[SemanticClass.CARDINALITY] > 0: self.addRestriction2Class(domain_uri, property_uri, info_dict) def addRestriction2Class(self, cls_uri, property_uri, info_dict): """Adds restriction on property_uri to class cls_uri. """ if SemanticClass.CARDINALITY in info_dict.keys()\ and info_dict[SemanticClass.CARDINALITY] > 0: if (cls_uri, None, None) not in self.project_graph: warnings.warn('{} not in graph!'.format(cls_uri)) restriction = BNode() cardinality_restriction = URIRef(OWL + info_dict[SemanticClass.CARDINALITY_RESTRICTION])\ if SemanticClass.CARDINALITY_RESTRICTION in info_dict.keys()\ else OWL.cardinality cardinality = info_dict[SemanticClass.CARDINALITY] self.project_graph.add((cls_uri, RDFS.subClassOf, restriction)) self.project_graph.add((restriction, RDF.type, OWL.Restriction)) self.project_graph.add((restriction, OWL.onProperty, property_uri)) self.project_graph.add((restriction, cardinality_restriction, Literal(str(cardinality), datatype=XSD.nonNegativeInteger))) def convert_py2ttl(self, datatypes_dir, target_ontology_file): """Convert all classes contained in datatypes_dir that are subclasses of class_spec.SemanticClass to rdf. :return: exit code (int) """ if isdir(datatypes_dir): semantic_classes = self.get_semantic_classes(datatypes_dir) if not Py2TTLOntologyConverter.UNITTESTING: bar = Bar('creating classes and properties', max=len(semantic_classes)) for cls in semantic_classes: self.createClassAndProperties(cls) not bool(Py2TTLOntologyConverter.UNITTESTING) and bar.next() not bool(Py2TTLOntologyConverter.UNITTESTING) and bar.finish() self.uri_mapping4cls_and_properties['ontology'].update({'ontology_file': target_ontology_file}) f = open(target_ontology_file, 'wb+') f.write(self.project_graph.serialize(format="turtle")) f.close() if not Py2TTLOntologyConverter.UNITTESTING: xml_file = 'mapping_file4' + datatypes_dir.replace(sep, '.') + '2' + target_ontology_file.replace('.' + sep, '').replace(sep, '.').replace('.ttl', '.xml') dict2xml(self.uri_mapping4cls_and_properties, xml_file) else: print('Error: dir {} does not exist!'.format(datatypes_dir)) usage return 1 return 0 def createClassAndProperties(self, cls): """Creates a owl:Class and some owl:ObjectProperty from semantic_dictionary of a python class. """ if not cls.__name__ in self.class_uri_dict: self.class_uri_dict.update({cls.__name__: cls}) semantic_dict = cls.get_semantic_dictionary() cls_uri, super_cls = self.addClass2Graph(cls, semantic_dict) uri_mapping4properties = {} for property_key in self._get_semantic_dictionary_keys_super_first(semantic_dict['properties']): super_semantic_dict = {} if super_cls is None else super_cls.get_semantic_dictionary() if len(super_semantic_dict) == 0 or not bool(super_semantic_dict['properties'].get(property_key)): property_dict4key = semantic_dict['properties'].get(property_key) property_cls = property_dict4key.get('class') subject_uri, property_uri = self.createProperty(cls_uri, property_key, property_cls, property_dict4key) uri_mapping4properties.update({ property_key: property_uri }) elif bool(self.uri_mapping4cls_and_properties.get('classes').get(super_cls.__name__).get('properties').get(property_key)): property_uri = self.uri_mapping4cls_and_properties['classes'][super_cls.__name__]['properties'][property_key] uri_mapping4properties.update({ property_key: property_uri}) self.uri_mapping4cls_and_properties.get('classes').update({ cls.__name__: { 'class_uri': cls_uri, 'properties': uri_mapping4properties }}) return URIRef(self.base_uriref + '#' + cls.__name__) def createProperty(self, domain_uri, property_name, range_cls, info_dict) -> (URIRef, URIRef): """Creates a owl:ObjectProperty. :return: tuple of domain_uri (rdflib.URIRef) and property_uri (rdflib.URIRef) of created property """ name = self.createPropertyName(property_name=property_name)\ if SemanticClass.PROPERTY_NAME not in info_dict.keys() else info_dict[SemanticClass.PROPERTY_NAME] property_uri = URIRef(self.base_uriref + '#' + name) inferredSubClass = RDFS.subClassOf * '*' range_uri = URIRef(self.base_uriref + '#' + range_cls.__name__) if (property_uri, None, None) not in self.project_graph: if range_cls.__module__ == 'builtins': range_uri = RDFDataHandler.SIMPLE_DATA_TYPE_MAPPING.get(range_cls) if range_uri == XSD.string and property_name == 'URL': range_uri = XSD.anyURI self.addProperty2Graph(property_uri, domain_uri, range_uri, info_dict) elif not True in [\ (domain_uri, inferredSubClass, o) in self.project_graph\ for o in self.project_graph.objects(property_uri, RDFS.domain)\ ]: # if domain_uri is NOT a subclass of a cls specified by RDFS.domain if SemanticClass.CARDINALITY in info_dict.keys()\ and info_dict[SemanticClass.CARDINALITY] > 0: self.addRestriction2Class(domain_uri, property_uri, info_dict) self.project_graph.add((property_uri, RDFS.domain, domain_uri)) return domain_uri, property_uri def createPropertyName(self, property_name=None, subject_uri=None, object_uri=None, connector='BelongsTo', prefix='has'): """Returns a property name. """ if property_name is not None: property_name = ''.join([ property_name.split('_')[0].lower() ] + [ text.capitalize() for text in property_name.split('_')[1:] ]) return prefix + property_name[0].upper() + property_name[1:] if property_name[0].islower()\ else prefix + property_name elif subject_uri is not None: property_name = subject_uri.split('#')[1] + self.createPropertyName(object_uri=object_uri, prefix=connector) return property_name[0].lower() + property_name[1:] elif object_uri is not None: return prefix + object_uri.split('#')[1] else: return prefix def get_comment_label(self, cls): """Returns comment and label from cls __doc__. """ comment = cls.__doc__.replace('\n','').lstrip() label = cls.__name__ if '.' in cls.__doc__: comment = [ text for text in cls.__doc__.split('\n') if text != '' ][0].lstrip() if '@label' in cls.__doc__: m = re.search('(@label[:]*\s)(.*[\.]*)', cls.__doc__) label_tag, label = m.groups() elif re.search('([A-Z][a-z]+)', label): m = re.search('([A-Z]\w+)([A-Z]\w+)', label) label = ' '.join([ text.lower() for text in re.split(r'([A-Z][a-z]+)', label) if text != '' ]) return comment, label def get_semantic_classes(self, datatypes_dir): """Returns a list of all classes that are contained in datatypes_dir that are subclasses of class_spec.SemanticClass. :return: a list of (str_name, class) """ base_dir = dirname(dirname(__file__)) sys.path.append(base_dir) root_modul_name = datatypes_dir.replace('/','.') files = [ file.replace('.py','') for file in listdir(datatypes_dir) if file.endswith('.py') and not file.startswith('test_') and not file.startswith('_')] all_modules = [] for name in files: all_modules.append(importlib.import_module('{}.{}'.format(root_modul_name, name))) all_classes = [] for modul in all_modules: all_classes += inspect.getmembers(modul, inspect.isclass) all_classes = sorted(set(all_classes)) semantic_classes = [ cls for name, cls in all_classes if issubclass(cls, SemanticClass)\ and not issubclass(cls, UnSemanticClass)\ and not (cls == SemanticClass)] return semantic_classes def _get_builtin_cls_keys(self, property_dict): """Returns a list of keys for classes that are builtin. """ builtin_cls_keys = [] for key in property_dict.keys(): property_cls = property_dict.get(key).get('class')\ if type(property_dict.get(key)) is dict\ else property_dict.get(key)[0] if type(property_cls) != dict\ and property_cls.__module__ == 'builtins': builtin_cls_keys.append(key) return builtin_cls_keys def _get_semantic_dictionary_keys_super_first(self, property_dict): """Sorts the keys of the property part of a semantic dictionary and returns the keys for super classes before keys of subclasses. :return: a sorted list of keys. """ builtin_cls_keys = self._get_builtin_cls_keys(property_dict) complex_cls_keys = [] for key in [ key for key in property_dict.keys()\ if key not in builtin_cls_keys ]: current_cls = property_dict.get(key).get('class') key_inserted = False for index, cls_key in enumerate(complex_cls_keys): potential_sub_cls = property_dict.get(cls_key).get('class') if issubclass(potential_sub_cls, current_cls): complex_cls_keys.insert(index, key) key_inserted = True break if not key_inserted: complex_cls_keys.append(key) return builtin_cls_keys + complex_cls_keys def create_dummy_cls(class_name): """Return a dummy class for class_name (str). """ exec('class %s:pass' % class_name) return eval('%s' % class_name) def usage(): """prints information on how to use the script """ print(main.__doc__) def main(argv): """This program can be used to convert py classes that are subclasses of class_spec.SemanticClass to owl:Class. py2ttl/py2ttl_ontology.py [OPTIONS ] [optional] directory containing datatypes that are subclasses of class_spec.SemanticClass. Overwrites DATATYPES_DIR in py2ttl/config.py. OPTIONS: -h|--help: show help -s|--source=source_ontology_file source ontology ttl file, option overwrites PROJECT_ONTOLOGY_FILE in py2ttl/config.py -t|--target=target_ontology_file target ontology ttl file, default: 'PROJECT_PREFIX-ontology_autogenerated.ttl' :return: exit code (int) """ check_config_files_exist() datatypes_dir = get_datatypes_dir() source_ontology_file = PROJECT_ONTOLOGY_FILE target_ontology_file = '' try: opts, args = getopt.getopt(argv, "hs:t:", ["help","source=", "target="]) except getopt.GetoptError: usage() return 2 for opt, arg in opts: if opt in ('-h', '--help'): usage() return 0 elif opt in ('-t', '--target'): target_ontology_file = arg elif opt in ('-s', '--source'): source_ontology_file = arg converter = Py2TTLOntologyConverter(project_ontology_file=source_ontology_file) if len(args) > 0: datatypes_dir = args[0] if target_ontology_file == '': target_ontology_file = '.{0}{1}-ontology_autogenerated.ttl'.format(sep, converter.project_name) return converter.convert_py2ttl(datatypes_dir, target_ontology_file) if __name__ == "__main__": sys.exit(main(sys.argv[1:]))