Index: py2ttl/test_py2ttl.py =================================================================== --- py2ttl/test_py2ttl.py (revision 18) +++ py2ttl/test_py2ttl.py (revision 19) @@ -1,51 +1,51 @@ import unittest import lxml.etree as ET from os import sep, path, remove from os.path import isfile from rdflib import Graph, URIRef import py2ttl from py2ttl import Py2TTLConverter from config import PROJECT_NAME, PROJECT_ONTOLOGY_FILE class TestPy2TTL(unittest.TestCase): def setUp(self): self.ttl_target = __file__ + 'test.ttl' def test_main(self): argv = ['-t', self.ttl_target ] self.assertEqual(py2ttl.main(argv), 0) def test_init(self): - converter = Py2TTLConverter(project_onotology_file=PROJECT_ONTOLOGY_FILE) + converter = Py2TTLConverter(project_ontology_file=PROJECT_ONTOLOGY_FILE) self.assertEqual(converter.project_name, PROJECT_NAME) def test_get_semantic_classes(self): converter = Py2TTLConverter() classes = converter.get_semantic_classes('svgscripts/datatypes') self.assertEqual('FaksimileImage' in [ cls.__name__ for cls in classes ], True) self.assertEqual('Image' in [ cls.__name__ for cls in classes ], True) self.assertEqual('SemanticClass' in [ cls.__name__ for cls in classes ], False) def test_createProperty(self): - converter = Py2TTLConverter(project_onotology_file=PROJECT_ONTOLOGY_FILE) + converter = Py2TTLConverter(project_ontology_file=PROJECT_ONTOLOGY_FILE) converter.createProperty(converter.base_uriref + "#Test", 'test', str, 1) name_uri = converter.base_uriref + '#hasTest' self.assertEqual((name_uri, None, None) in converter.project_graph, True) def test_createPropertyName(self): converter = Py2TTLConverter() name = converter.createPropertyName(property_name='test_asdf_asdf') self.assertEqual(name, 'hasTestAsdfAsdf') name = converter.createPropertyName(object_uri=converter.base_uriref + '#Asdf') self.assertEqual(name, 'hasAsdf') name = converter.createPropertyName(subject_uri=converter.base_uriref + '#Test',object_uri=converter.base_uriref + '#Asdf') self.assertEqual(name, 'testBelongsToAsdf') name = converter.createPropertyName(subject_uri=converter.base_uriref + '#Test') self.assertEqual(name, 'testBelongsTo') def tearDown(self): isfile(self.ttl_target) and remove(self.ttl_target) if __name__ == "__main__": unittest.main() Index: py2ttl/py2ttl.py =================================================================== --- py2ttl/py2ttl.py (revision 18) +++ py2ttl/py2ttl.py (revision 19) @@ -1,314 +1,306 @@ #!/usr/bin/env python3 # -*- coding: utf-8 -*- """ This program can be used to convert py classes that are subclasses of DATATYPES_DIR.class_spec.SemanticClass to rdf. """ # Copyright (C) University of Basel 2019 {{{1 # # This program is free software: you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation, either version 3 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program. If not, see <https://www.gnu.org/licenses/> 1}}} import getopt import importlib import importlib.util import inspect import lxml.etree as ET from os import sep, path, listdir from os.path import isfile, isdir, dirname, basename from rdflib import Graph, URIRef, Literal, BNode, OWL, RDF, RDFS, XSD import re import sys if dirname(__file__) not in sys.path: sys.path.append(dirname(__file__)) from config import check_config_files_exist, get_datatypes_dir, PROJECT_NAME, PROJECT_ONTOLOGY_FILE, PROJECT_URL, SHARED_ONTOLOGIES_DIR from knora_base import KNORA_BASE __author__ = "Christian Steiner" __maintainer__ = __author__ __copyright__ = 'University of Basel' __email__ = "christian.steiner@unibas.ch" __status__ = "Development" __license__ = "GPL v3" __version__ = "0.0.1" class Py2TTLConverter: """This class can be used convert semantic_dictionaries to ttl. """ - def __init__(self, project_onotology_file=None, create_super_cls_for_multi_property=True): + def __init__(self, project_ontology_file=None, create_super_cls_for_multi_property=True): self.list_value = -99 + self.class_uri_dict = {} self.create_super_cls_for_multi_property = create_super_cls_for_multi_property - self.created_python_classes = [] - self.do_not_delete_classes = [] self.project_graph = Graph() self.base_uriref = URIRef(PROJECT_URL) self.project_name = PROJECT_NAME self.ns = { self.base_uriref + '#': self.project_name } - if project_onotology_file is not None and isfile(project_onotology_file): - self.project_graph.parse(project_onotology_file, format="turtle") + if project_ontology_file is not None and isfile(project_ontology_file): + self.project_graph.parse(project_ontology_file, format="turtle") if len(self.project_graph) > 0: self.base_uriref = self.project_graph.value(predicate=RDF.type, object=OWL.Ontology, any=False) self.ns = { uriref: ns for ns, uriref in self.project_graph.namespace_manager.namespaces() } self.project_name = self.ns.get(self.base_uriref + '#') self.project_graph.bind(self.project_name, self.base_uriref + '#') def get_semantic_classes(self, datatypes_dir): """Returns a list of all classes that are contained in datatypes_dir that are subclasses of DATATYPES_DIR.class_spec.SemanticClass. :return: a list of (str_name, class) """ base_dir = dirname(dirname(__file__)) sys.path.append(base_dir) root_modul_name = datatypes_dir.replace('/','.') reference_cls = importlib.import_module('{}.{}'.format(root_modul_name, 'class_spec')) try: self.list_value = reference_cls.LIST except AttributeError: pass files = [ file.replace('.py','') for file in listdir(datatypes_dir) if file.endswith('.py') and not file.startswith('test_') and not file.startswith('_')] all_modules = [] for name in files: all_modules.append(importlib.import_module('{}.{}'.format(root_modul_name, name))) all_classes = [] for modul in all_modules: all_classes += inspect.getmembers(modul, inspect.isclass) all_classes = sorted(set(all_classes)) semantic_classes = [ cls for name, cls in all_classes if issubclass(cls, reference_cls.SemanticClass) and not (cls == reference_cls.SemanticClass)] return semantic_classes def addRestriction2Class(self, cls_uri, property_uri, cardinality=0, comment="", label=""): """Adds restriction on property_uri to class cls_uri. """ -# # keep graph up-to-date: delete old definitions -# if (cls_uri, None, None) in self.project_graph\ -# and not (cls_uri.split('#')[1] in self.created_python_classes\ -# or cls_uri.split('#')[1] in self.do_not_delete_classes): -# self.project_graph.remove((cls_uri, None, None)) if (cls_uri, None, None) not in self.project_graph: self.addClass(cls_uri, comment=comment, label=label) -# # prevent createClassAndProperties from deleting new definition -# if cls_uri.split('#')[1] not in self.do_not_delete_classes: -# self.do_not_delete_classes.append(cls_uri.split('#')[1]) - # add restriction to cls_uri restriction = BNode() cardinality_restriction = OWL.minCardinality if cardinality == 0 else OWL.cardinality self.project_graph.add((cls_uri, RDFS.subClassOf, restriction)) self.project_graph.add((restriction, RDF.type, OWL.Restriction)) self.project_graph.add((restriction, OWL.onProperty, property_uri)) self.project_graph.add((restriction, cardinality_restriction, Literal(str(cardinality), datatype=XSD.nonNegativeInteger))) def createPropertyName(self, property_name=None, subject_uri=None, object_uri=None, connector='BelongsTo', prefix='has'): """Returns a property name. """ if property_name is not None: property_name = ''.join([ property_name.split('_')[0].lower() ] + [ text.capitalize() for text in property_name.split('_')[1:] ]) return prefix + property_name[0].upper() + property_name[1:] if property_name[0].islower()\ else prefix + property_name elif subject_uri is not None: property_name = subject_uri.split('#')[1] + self.createPropertyName(object_uri=object_uri, prefix=connector) return property_name[0].lower() + property_name[1:] elif object_uri is not None: return prefix + object_uri.split('#')[1] else: return prefix def createSuperClassForSubjectClassConstraint(self, property_uri, sub_uri): """Creates a super class for classes that share a property. """ super_uri = URIRef(property_uri.replace('has', '') + 'Holder') self.project_graph.add((sub_uri, RDFS.subClassOf, super_uri)) self.project_graph.remove((sub_uri, RDFS.subClassOf, KNORA_BASE.Resource)) if (super_uri, RDF.type, OWL.Class) not in self.project_graph: label = 'holder of ' + property_uri.split('#')[1].replace('has', '') comment = 'super class for classes that have a ' + property_uri.split('#')[1].replace('has', '') self.addRestriction2Class(super_uri, property_uri, comment=comment, label=label) for object_uri in self.project_graph.objects(subject=property_uri, predicate=KNORA_BASE.subjectClassConstraint): self.project_graph.remove((property_uri, KNORA_BASE.subjectClassConstraint, object_uri)) self.project_graph.add((object_uri, RDFS.subClassOf, super_uri)) self.project_graph.remove((object_uri, RDFS.subClassOf, KNORA_BASE.Resource)) self.project_graph.add((property_uri, KNORA_BASE.subjectClassConstraint, super_uri)) objectClass = self.project_graph.value(subject=property_uri, predicate=KNORA_BASE.objectClassConstraint, any=False) comment = 'connects {} with {}'.format(super_uri.split('#')[1], objectClass.split('#')[1].replace('has', '')) self.project_graph.remove((property_uri, RDFS.comment, None)) self.project_graph.add((property_uri, RDFS.comment, Literal(comment, lang='en'))) def addProperty(self, property_uri, super_uri, subject_uri, object_uri, comment, label, cardinality): """Add a property to self.project_graph. """ self.project_graph.add((property_uri, RDF.type, OWL.ObjectProperty)) self.project_graph.add((property_uri, RDFS.subPropertyOf, super_uri)) self.project_graph.add((property_uri, KNORA_BASE.objectClassConstraint, object_uri)) self.project_graph.add((property_uri, KNORA_BASE.subjectClassConstraint, subject_uri)) self.project_graph.add((property_uri, RDFS.comment, Literal(comment, lang='en'))) self.project_graph.add((property_uri, RDFS.label, Literal(label, lang='en'))) self.addRestriction2Class(subject_uri, property_uri, cardinality=cardinality) def createProperty(self, cls_uri, property_name, property_cls, cardinality): """Creates a owl:ObjectProperty. """ inferredSubClass = RDFS.subClassOf * '*' name = self.createPropertyName(property_name=property_name) property_uri = URIRef(self.base_uriref + '#' + name) subject_uri = cls_uri label = 'has ' + name.replace('has','') super_uri = KNORA_BASE.hasValue if (property_uri, None, None) not in self.project_graph: if property_cls.__module__ == 'builtins': datatype_mapping = { float: KNORA_BASE.DecimalValue, int: KNORA_BASE.IntValue, str: KNORA_BASE.TextValue } object_uri = datatype_mapping.get(property_cls) if object_uri == KNORA_BASE.TextValue: if property_name == 'URL': object_uri = KNORA_BASE.UriValue elif property_name == 'file_name': object_uri = KNORA_BASE.FileValue else: object_uri = URIRef(self.base_uriref + '#' + property_cls.__name__) if cardinality == self.list_value: subject_uri = object_uri object_uri = cls_uri name = self.createPropertyName(subject_uri=subject_uri, object_uri=object_uri) property_uri = URIRef(self.base_uriref + '#' + name) cardinality = 1 label = subject_uri.split('#')[1] + ' belongs to ' + object_uri.split('#')[1] super_uri = KNORA_BASE.hasLinkTo property_value_uri = URIRef(property_uri + 'Value') comment = 'Reification statement of relation between {} and {}'.format(subject_uri.split('#')[1], object_uri.split('#')[1]) self.addProperty(property_value_uri, KNORA_BASE.hasLinkToValue, subject_uri, KNORA_BASE.LinkValue,\ comment, label + ' - statement', cardinality) comment = 'connects {} with {}'.format(subject_uri.split('#')[1], object_uri.split('#')[1]) self.addProperty(property_uri, super_uri, subject_uri, object_uri, comment, label, cardinality) elif not True in [\ (cls_uri, inferredSubClass, o) in self.project_graph\ for o in self.project_graph.objects(property_uri, KNORA_BASE.subjectClassConstraint)\ ]: # if cls_uri is NOT a subclass of a cls specified by KNORA_BASE.subjectClassConstraint self.addRestriction2Class(subject_uri, property_uri, cardinality=cardinality) if self.create_super_cls_for_multi_property: self.createSuperClassForSubjectClassConstraint(property_uri, subject_uri) else: self.project_graph.add((property_uri, KNORA_BASE.subjectClassConstraint, subject_uri)) def get_comment_label(self, cls): """Returns comment and label from cls __doc__. """ comment = cls.__doc__.replace('\n','').lstrip() label = cls.__name__ if '.' in cls.__doc__: comment = [ text for text in cls.__doc__.split('\n') if text != '' ][0].lstrip() if '@label:' in cls.__doc__: m = re.search('(@label:\s)(.*[\.]*)', cls.__doc__) blabla, label = m.groups() elif re.search('([A-Z][a-z]+)', label): m = re.search('([A-Z]\w+)([A-Z]\w+)', label) label = ' '.join([ text.lower() for text in re.split(r'([A-Z][a-z]+)', label) if text != '' ]) return comment, label - def addClass(self, cls_uri, comment="", label="", super_uri=KNORA_BASE.Resource): + def addClass(self, cls_uri, comment='', label='', super_uri=KNORA_BASE.Resource): """Add a class to project_graph. """ + if comment == '' and cls_uri.split('#')[1] in self.class_uri_dict: + comment, label = self.get_comment_label(self.class_uri_dict.get(cls_uri.split('#')[1])) + elif comment == '': + label = cls_uri.split('#')[1].lower() + comment = label self.project_graph.add((cls_uri, RDF.type, OWL.Class)) self.project_graph.add((cls_uri, RDFS.comment, Literal(comment, lang='en'))) self.project_graph.add((cls_uri, RDFS.label, Literal(label, lang='en'))) self.project_graph.add((cls_uri, RDFS.subClassOf, super_uri)) def createClassAndProperties(self, cls): """Creates a owl:Class and some owl:ObjectProperty from semantic_dictionary of a python class. """ - if not cls.__name__ in self.created_python_classes: - self.created_python_classes.append(cls.__name__) + if not cls.__name__ in self.class_uri_dict: + self.class_uri_dict.update({cls.__name__: cls}) semantic_dict = cls.get_semantic_dictionary() super_uri = KNORA_BASE.Resource if bool(semantic_dict['class'].get('type')): super_cls = semantic_dict['class'].get('type') self.createClassAndProperties(super_cls) super_uri = URIRef(self.base_uriref + '#' + super_cls.__name__) cls_uri = URIRef(self.base_uriref + '#' + cls.__name__) -# if (cls_uri, None, None) in self.project_graph\ -# and cls.__name__ not in self.do_not_delete_classes: -# self.project_graph.remove((cls_uri, None, None)) comment, label = self.get_comment_label(cls) self.addClass(cls_uri, comment, label, super_uri) for property_key in semantic_dict['properties'].keys(): property_cls, cardinality = semantic_dict['properties'].get(property_key) self.createProperty(cls_uri, property_key, property_cls, cardinality) def convert_py2ttl(self, datatypes_dir, target_ontology_file): """Convert all classes contained in datatypes_dir that are subclasses of DATATYPES_DIR.class_spec.SemanticClass to rdf. :return: exit code (int) """ if isdir(datatypes_dir): semantic_classes = self.get_semantic_classes(datatypes_dir) for cls in semantic_classes: self.createClassAndProperties(cls) f = open(target_ontology_file, 'wb+') f.write(self.project_graph.serialize(format="turtle")) f.close() #print(self.do_not_delete_classes) else: print('Error: dir {} does not exist!'.format(datatypes_dir)) usage return 1 return 0 def usage(): """prints information on how to use the script """ print(main.__doc__) def main(argv): """This program can be used to convert py classes that are subclasses of <dir>.class_spec.SemanticClass to owl:Class. - py2ttl/py2ttl.py [OPTIONS] <dir> + py2ttl/py2ttl.py [OPTIONS <dir>] - <dir> directory containing datatypes that are subclasses of <dir>.class_spec.SemanticClass. - Overwrites DATATYPES_DIR in config.py. + <dir> [optional] directory containing datatypes that are subclasses of <dir>.class_spec.SemanticClass. + Overwrites DATATYPES_DIR in py2ttl/config.py. OPTIONS: -h|--help: show help - -s|--source=source_ontology_file source ontology ttl file, option overwrites PROJECT_ONTOLOGY_FILE in config.py - -t|--target=target_ontology_file target ontology ttl file + -s|--source=source_ontology_file source ontology ttl file, option overwrites PROJECT_ONTOLOGY_FILE in py2ttl/config.py + -t|--target=target_ontology_file target ontology ttl file, default: 'PROJECT_PREFIX-ontology_autogenerated.ttl' :return: exit code (int) """ check_config_files_exist() datatypes_dir = get_datatypes_dir() source_ontology_file = PROJECT_ONTOLOGY_FILE - target_ontology_file = '.{0}{1}-onotology_autogenerated.ttl'.format(sep, PROJECT_NAME) + target_ontology_file = '.{0}{1}-ontology_autogenerated.ttl'.format(sep, PROJECT_NAME) try: opts, args = getopt.getopt(argv, "hs:t:", ["help","source=", "target="]) except getopt.GetoptError: usage() return 2 for opt, arg in opts: if opt in ('-h', '--help'): usage() return 0 elif opt in ('-t', '--target'): target_ontology_file = arg elif opt in ('-s', '--source'): source_ontology_file = arg - converter = Py2TTLConverter(project_onotology_file=source_ontology_file) + converter = Py2TTLConverter(project_ontology_file=source_ontology_file) if len(args) < 1 and datatypes_dir is not None: return converter.convert_py2ttl(datatypes_dir, target_ontology_file) else: for datatypes_dir in args: if converter.convert_py2ttl(datatypes_dir, target_ontology_file) > 0: return 2 return 0 if len(args) > 1 else 2 if __name__ == "__main__": sys.exit(main(sys.argv[1:]))