Page MenuHomec4science

py2ttl_ontology.py
No OneTemporary

File Metadata

Created
Tue, May 28, 15:29

py2ttl_ontology.py

#!/usr/bin/env python3
# -*- coding: utf-8 -*-
""" This program can be used to convert py classes that are
subclasses of class_spec.SemanticClass to
a owl ontology in turtle format.
"""
# Copyright (C) University of Basel 2019 {{{1
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <https://www.gnu.org/licenses/> 1}}}
import getopt
import importlib
import importlib.util
import inspect
import lxml.etree as ET
from os import sep, path, listdir
from os.path import isfile, isdir, dirname, basename
from progress.bar import Bar
from rdflib import Graph, URIRef, Literal, BNode, OWL, RDF, RDFS, XSD
import re
import sys
import warnings
if dirname(__file__) not in sys.path:
sys.path.append(dirname(__file__))
from class_spec import SemanticClass, UnSemanticClass
from config import check_config_files_exist, get_datatypes_dir, PROJECT_NAME, PROJECT_ONTOLOGY_FILE, PROJECT_URL
from data_handler import RDFDataHandler
sys.path.append('shared_util')
from myxmlwriter import dict2xml
__author__ = "Christian Steiner"
__maintainer__ = __author__
__copyright__ = 'University of Basel'
__email__ = "christian.steiner@unibas.ch"
__status__ = "Development"
__license__ = "GPL v3"
__version__ = "0.0.1"
class Py2TTLOntologyConverter:
"""This class can be used convert semantic_dictionaries to a owl ontology in turtle format.
"""
UNITTESTING = False
INFERRED_SUB_CLASS = RDFS.subClassOf * '*'
def __init__(self, project_ontology_file=None):
self.class_uri_dict = {}
self.uri_mapping4cls_and_properties = {}
self.project_graph = Graph()
self.base_uriref = URIRef(PROJECT_URL)
self.project_name = PROJECT_NAME
self.ns = { self.base_uriref + '#': self.project_name }
if project_ontology_file is not None and isfile(project_ontology_file):
self.project_graph.parse(project_ontology_file, format="turtle")
if len(self.project_graph) > 0:
self.base_uriref = self.project_graph.value(predicate=RDF.type, object=OWL.Ontology, any=False)
self.ns = { uriref: ns for ns, uriref in self.project_graph.namespace_manager.namespaces() }
self.project_name = self.ns.get(self.base_uriref + '#')
self.project_graph.bind(self.project_name, self.base_uriref + '#')
self.uri_mapping4cls_and_properties.update({ 'ontology': { 'project_name': self.project_name, 'project_uri': self.base_uriref + '#' }})
self.uri_mapping4cls_and_properties.update({ 'classes': {} })
def addClass2Graph(self, cls, semantic_dict=None) -> (URIRef, type):
"""Add a class to project_graph.
:return: (cls_uri (URIRef), super_cls (cls))
"""
if semantic_dict is None:
semantic_dict = cls.get_semantic_dictionary()
comment, label = self.get_comment_label(cls)
cls_uri = URIRef(self.base_uriref + '#' + cls.__name__)
self.project_graph.add((cls_uri, RDF.type, OWL.Class))
self.project_graph.add((cls_uri, RDFS.isDefinedBy, self.base_uriref))
if comment != '':
self.project_graph.add((cls_uri, RDFS.comment, Literal(comment, lang='en')))
if label != '':
self.project_graph.add((cls_uri, RDFS.label, Literal(label, lang='en')))
super_uri = None
super_cls = None
if bool(semantic_dict[SemanticClass.CLASS_KEY].get(SemanticClass.TYPE)):
super_cls = semantic_dict[SemanticClass.CLASS_KEY].get(SemanticClass.TYPE)
super_uri = self.createClassAndProperties(super_cls)
if super_uri is not None:
self.project_graph.add((cls_uri, RDFS.subClassOf, super_uri))
if SemanticClass.SUBCLASS_OF in semantic_dict[SemanticClass.CLASS_KEY].keys()\
and len(semantic_dict[SemanticClass.CLASS_KEY][SemanticClass.SUBCLASS_OF]) > 0:
for super_uri_string in semantic_dict[SemanticClass.CLASS_KEY].get(SemanticClass.SUBCLASS_OF):
super_uri = URIRef(super_uri_string)
if not (cls_uri, self.INFERRED_SUB_CLASS, super_uri) in self.project_graph:
self.project_graph.add((cls_uri, RDFS.subClassOf, super_uri))
return cls_uri, super_cls
def addProperty2Graph(self, property_uri, domain_uri, range_uri, info_dict, property_type=OWL.ObjectProperty):
"""Add a property to self.project_graph.
"""
label = 'has ' + property_uri.split('#')[1].replace('has','')\
if SemanticClass.PROPERTY_LABEL not in info_dict.keys() else info_dict[SemanticClass.PROPERTY_LABEL]
self.project_graph.add((property_uri, RDF.type, property_type))
self.project_graph.add((property_uri, RDFS.isDefinedBy, self.base_uriref))
self.project_graph.add((property_uri, RDFS.domain, domain_uri))
self.project_graph.add((property_uri, RDFS.range, range_uri))
if SemanticClass.PROPERTY_COMMENT in info_dict.keys():
comment = info_dict[SemanticClass.PROPERTY_COMMENT]
self.project_graph.add((property_uri, RDFS.comment, Literal(comment, lang='en')))
self.project_graph.add((property_uri, RDFS.label, Literal(label, lang='en')))
if SemanticClass.CARDINALITY in info_dict.keys()\
and info_dict[SemanticClass.CARDINALITY] > 0:
self.addRestriction2Class(domain_uri, property_uri, info_dict)
def addRestriction2Class(self, cls_uri, property_uri, info_dict):
"""Adds restriction on property_uri to class cls_uri.
"""
if SemanticClass.CARDINALITY in info_dict.keys()\
and info_dict[SemanticClass.CARDINALITY] > 0:
if (cls_uri, None, None) not in self.project_graph:
warnings.warn('{} not in graph!'.format(cls_uri))
restriction = BNode()
cardinality_restriction = URIRef(OWL + info_dict[SemanticClass.CARDINALITY_RESTRICTION])\
if SemanticClass.CARDINALITY_RESTRICTION in info_dict.keys()\
else OWL.cardinality
cardinality = info_dict[SemanticClass.CARDINALITY]
self.project_graph.add((cls_uri, RDFS.subClassOf, restriction))
self.project_graph.add((restriction, RDF.type, OWL.Restriction))
self.project_graph.add((restriction, OWL.onProperty, property_uri))
self.project_graph.add((restriction, cardinality_restriction, Literal(str(cardinality), datatype=XSD.nonNegativeInteger)))
def create_ontology(self, datatypes_dir, target_ontology_file):
"""Convert all classes contained in datatypes_dir that are subclasses of class_spec.SemanticClass to rdf.
:return: exit code (int)
"""
if isdir(datatypes_dir):
semantic_classes = self.get_semantic_classes(datatypes_dir)
if not Py2TTLOntologyConverter.UNITTESTING:
bar = Bar('creating classes and properties', max=len(semantic_classes))
for cls in semantic_classes:
self.createClassAndProperties(cls)
not bool(Py2TTLOntologyConverter.UNITTESTING) and bar.next()
not bool(Py2TTLOntologyConverter.UNITTESTING) and bar.finish()
self.uri_mapping4cls_and_properties['ontology'].update({'ontology_file': target_ontology_file})
f = open(target_ontology_file, 'wb+')
f.write(self.project_graph.serialize(format="turtle"))
f.close()
if not Py2TTLOntologyConverter.UNITTESTING:
xml_file = 'mapping_file4' + datatypes_dir.replace(sep, '.') + '2' + target_ontology_file.replace('.' + sep, '').replace(sep, '.').replace('.ttl', '.xml')
dict2xml(self.uri_mapping4cls_and_properties, xml_file)
else:
print('Error: dir {} does not exist!'.format(datatypes_dir))
usage
return 1
return 0
def createClassAndProperties(self, cls):
"""Creates a owl:Class and some owl:ObjectProperty from semantic_dictionary of a python class.
"""
if not cls.__name__ in self.class_uri_dict:
self.class_uri_dict.update({cls.__name__: cls})
semantic_dict = cls.get_semantic_dictionary()
cls_uri, super_cls = self.addClass2Graph(cls, semantic_dict)
uri_mapping4properties = {}
for property_key in self._get_semantic_dictionary_keys_super_first(semantic_dict['properties']):
super_semantic_dict = {} if super_cls is None else super_cls.get_semantic_dictionary()
if len(super_semantic_dict) == 0 or not bool(super_semantic_dict['properties'].get(property_key)):
property_dict4key = semantic_dict['properties'].get(property_key)
property_cls = property_dict4key.get('class')
subject_uri, property_uri = self.createProperty(cls_uri, property_key, property_cls, property_dict4key)
uri_mapping4properties.update({ property_key: property_uri })
elif bool(self.uri_mapping4cls_and_properties.get('classes').get(super_cls.__name__).get('properties').get(property_key)):
property_uri = self.uri_mapping4cls_and_properties['classes'][super_cls.__name__]['properties'][property_key]
uri_mapping4properties.update({ property_key: property_uri})
self.uri_mapping4cls_and_properties.get('classes').update({ cls.__name__: { 'class_uri': cls_uri, 'properties': uri_mapping4properties }})
return URIRef(self.base_uriref + '#' + cls.__name__)
def createProperty(self, domain_uri, property_name, range_cls, info_dict) -> (URIRef, URIRef):
"""Creates a owl:ObjectProperty.
:return: tuple of domain_uri (rdflib.URIRef) and property_uri (rdflib.URIRef) of created property
"""
name = self.createPropertyName(property_name=property_name)\
if SemanticClass.PROPERTY_NAME not in info_dict.keys() else info_dict[SemanticClass.PROPERTY_NAME]
property_uri = URIRef(self.base_uriref + '#' + name)
inferredSubClass = RDFS.subClassOf * '*'
range_uri = URIRef(self.base_uriref + '#' + range_cls.__name__)
super_property_uri = None
if SemanticClass.SUBPROPERTYOF in info_dict.keys():
super_property_uri = URIRef(info_dict[SemanticClass.SUBPROPERTYOF])
elif SemanticClass.SUPER_PROPERTY in info_dict.keys():
domain_uri, super_property_uri = self.createProperty(domain_uri,\
info_dict[SemanticClass.SUPER_PROPERTY].get(SemanticClass.PROPERTY_NAME),\
range_cls, info_dict[SemanticClass.SUPER_PROPERTY])
if (property_uri, None, None) not in self.project_graph:
property_type = OWL.ObjectProperty
if range_cls.__module__ == 'builtins':
if range_cls != list:
property_type = OWL.DatatypeProperty
range_uri = RDFDataHandler.SIMPLE_DATA_TYPE_MAPPING.get(range_cls)
if range_uri == XSD.string and property_name == 'URL':
range_uri = XSD.anyURI
self.addProperty2Graph(property_uri, domain_uri, range_uri, info_dict, property_type=property_type)
elif not True in [\
(domain_uri, inferredSubClass, o) in self.project_graph\
for o in self.project_graph.objects(property_uri, RDFS.domain)\
]:
# if domain_uri is NOT a subclass of a cls specified by RDFS.domain
if SemanticClass.CARDINALITY in info_dict.keys()\
and info_dict[SemanticClass.CARDINALITY] > 0:
self.addRestriction2Class(domain_uri, property_uri, info_dict)
self.project_graph.add((property_uri, RDFS.domain, domain_uri))
if super_property_uri is not None\
and (property_uri, RDFS.subPropertyOf, super_property_uri) not in self.project_graph:
self.project_graph.add((property_uri, RDFS.subPropertyOf, super_property_uri))
return domain_uri, property_uri
def createPropertyName(self, property_name=None, subject_uri=None, object_uri=None, connector='BelongsTo', prefix='has'):
"""Returns a property name.
"""
if property_name is not None:
property_name = ''.join([ property_name.split('_')[0].lower() ] + [ text.capitalize() for text in property_name.split('_')[1:] ])
return prefix + property_name[0].upper() + property_name[1:] if property_name[0].islower()\
else prefix + property_name
elif subject_uri is not None:
property_name = subject_uri.split('#')[1] + self.createPropertyName(object_uri=object_uri, prefix=connector)
return property_name[0].lower() + property_name[1:]
elif object_uri is not None:
return prefix + object_uri.split('#')[1]
else:
return prefix
def get_comment_label(self, cls):
"""Returns comment and label from cls __doc__.
"""
comment = cls.__doc__.replace('\n','').lstrip()
label = cls.__name__
if '.' in cls.__doc__:
comment = [ text for text in cls.__doc__.split('\n') if text != '' ][0].lstrip()
if '@label' in cls.__doc__:
m = re.search('(@label[:]*\s)(.*[\.]*)', cls.__doc__)
label_tag, label = m.groups()
elif re.search('([A-Z][a-z]+)', label):
m = re.search('([A-Z]\w+)([A-Z]\w+)', label)
label = ' '.join([ text.lower() for text in re.split(r'([A-Z][a-z]+)', label) if text != '' ])
return comment, label
def get_semantic_classes(self, datatypes_dir):
"""Returns a list of all classes that are contained in datatypes_dir that are subclasses of class_spec.SemanticClass.
:return: a list of (str_name, class)
"""
base_dir = dirname(dirname(__file__))
sys.path.append(base_dir)
root_modul_name = datatypes_dir.replace('/','.')
files = [ file.replace('.py','') for file in listdir(datatypes_dir) if file.endswith('.py') and not file.startswith('test_') and not file.startswith('_')]
all_modules = []
for name in files:
all_modules.append(importlib.import_module('{}.{}'.format(root_modul_name, name)))
all_classes = []
for modul in all_modules:
all_classes += inspect.getmembers(modul, inspect.isclass)
#all_classes = sorted(set(all_classes))
all_classes = sorted(set(all_classes), key=lambda current_class: current_class[0])
semantic_classes = [ cls for name, cls in all_classes if issubclass(cls, SemanticClass)\
and not issubclass(cls, UnSemanticClass)\
and not (cls == SemanticClass)]
return semantic_classes
def _get_builtin_cls_keys(self, property_dict):
"""Returns a list of keys for classes that are builtin.
"""
builtin_cls_keys = []
for key in property_dict.keys():
property_cls = property_dict.get(key).get('class')\
if type(property_dict.get(key)) is dict\
else property_dict.get(key)[0]
if type(property_cls) != dict\
and property_cls.__module__ == 'builtins':
builtin_cls_keys.append(key)
return builtin_cls_keys
def _get_semantic_dictionary_keys_super_first(self, property_dict):
"""Sorts the keys of the property part of a semantic dictionary
and returns the keys for super classes before keys of subclasses.
:return: a sorted list of keys.
"""
builtin_cls_keys = self._get_builtin_cls_keys(property_dict)
complex_cls_keys = []
for key in [ key for key in property_dict.keys()\
if key not in builtin_cls_keys ]:
current_cls = property_dict.get(key).get('class')
key_inserted = False
for index, cls_key in enumerate(complex_cls_keys):
potential_sub_cls = property_dict.get(cls_key).get('class')
if issubclass(potential_sub_cls, current_cls):
complex_cls_keys.insert(index, key)
key_inserted = True
break
if not key_inserted:
complex_cls_keys.append(key)
return builtin_cls_keys + complex_cls_keys
def usage():
"""prints information on how to use the script
"""
print(main.__doc__)
def main(argv):
"""This program can be used to convert py classes that are subclasses of class_spec.SemanticClass to owl:Class
and its properties to owl:ObjectProperty.
py2ttl/py2ttl_ontology.py [OPTIONS <dir>]
<dir> [optional] directory containing datatypes that are subclasses of class_spec.SemanticClass.
Overwrites DATATYPES_DIR in py2ttl/config.py.
OPTIONS:
-h|--help: show help
-s|--source=source_ontology_file source ontology ttl file, option overwrites PROJECT_ONTOLOGY_FILE in py2ttl/config.py
-t|--target=target_ontology_file target ontology ttl file, default: 'PROJECT_PREFIX-ontology_autogenerated.ttl'
:return: exit code (int)
"""
check_config_files_exist()
datatypes_dir = get_datatypes_dir()
source_ontology_file = PROJECT_ONTOLOGY_FILE
target_ontology_file = ''
try:
opts, args = getopt.getopt(argv, "hs:t:", ["help","source=", "target="])
except getopt.GetoptError:
usage()
return 2
for opt, arg in opts:
if opt in ('-h', '--help'):
usage()
return 0
elif opt in ('-t', '--target'):
target_ontology_file = arg
elif opt in ('-s', '--source'):
source_ontology_file = arg
converter = Py2TTLOntologyConverter(project_ontology_file=source_ontology_file)
if len(args) > 0:
datatypes_dir = args[0]
if target_ontology_file == '':
target_ontology_file = '.{0}{1}-ontology_autogenerated.ttl'.format(sep, converter.project_name)
return converter.create_ontology(datatypes_dir, target_ontology_file)
if __name__ == "__main__":
sys.exit(main(sys.argv[1:]))

Event Timeline