Page MenuHomec4science

py2ttl.py
No OneTemporary

File Metadata

Created
Sat, May 11, 07:06

py2ttl.py

#!/usr/bin/env python3
# -*- coding: utf-8 -*-
""" This program can be used to convert py classes that are subclasses of DATATYPES_DIR.class_spec.SemanticClass to rdf.
"""
# Copyright (C) University of Basel 2019 {{{1
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <https://www.gnu.org/licenses/> 1}}}
import getopt
import importlib
import importlib.util
import inspect
import lxml.etree as ET
from os import sep, path, listdir
from os.path import isfile, isdir, dirname, basename
from rdflib import Graph, URIRef, Literal, BNode, OWL, RDF, RDFS, XSD
import re
import sys
if dirname(__file__) not in sys.path:
sys.path.append(dirname(__file__))
from config import check_config_files_exist, get_datatypes_dir, PROJECT_NAME, PROJECT_ONTOLOGY_FILE, PROJECT_URL, SHARED_ONTOLOGIES_DIR
from knora_base import KNORA_BASE
__author__ = "Christian Steiner"
__maintainer__ = __author__
__copyright__ = 'University of Basel'
__email__ = "christian.steiner@unibas.ch"
__status__ = "Development"
__license__ = "GPL v3"
__version__ = "0.0.1"
class Py2TTLConverter:
"""This class can be used convert semantic_dictionaries to ttl.
"""
def __init__(self, project_onotology_file=None, create_super_cls_for_multi_property=True):
self.list_value = -99
self.create_super_cls_for_multi_property = create_super_cls_for_multi_property
self.created_python_classes = []
self.do_not_delete_classes = []
self.project_graph = Graph()
self.base_uriref = URIRef(PROJECT_URL)
self.project_name = PROJECT_NAME
self.ns = { self.base_uriref + '#': self.project_name }
if project_onotology_file is not None and isfile(project_onotology_file):
self.project_graph.parse(project_onotology_file, format="turtle")
if len(self.project_graph) > 0:
self.base_uriref = self.project_graph.value(predicate=RDF.type, object=OWL.Ontology, any=False)
self.ns = { uriref: ns for ns, uriref in self.project_graph.namespace_manager.namespaces() }
self.project_name = self.ns.get(self.base_uriref + '#')
self.project_graph.bind(self.project_name, self.base_uriref + '#')
def get_semantic_classes(self, datatypes_dir):
"""Returns a list of all classes that are contained in datatypes_dir that are subclasses of DATATYPES_DIR.class_spec.SemanticClass.
:return: a list of (str_name, class)
"""
base_dir = dirname(dirname(__file__))
sys.path.append(base_dir)
root_modul_name = datatypes_dir.replace('/','.')
reference_cls = importlib.import_module('{}.{}'.format(root_modul_name, 'class_spec'))
try:
self.list_value = reference_cls.LIST
except AttributeError:
pass
files = [ file.replace('.py','') for file in listdir(datatypes_dir) if file.endswith('.py') and not file.startswith('test_') and not file.startswith('_')]
all_modules = []
for name in files:
all_modules.append(importlib.import_module('{}.{}'.format(root_modul_name, name)))
all_classes = []
for modul in all_modules:
all_classes += inspect.getmembers(modul, inspect.isclass)
all_classes = sorted(set(all_classes))
semantic_classes = [ cls for name, cls in all_classes if issubclass(cls, reference_cls.SemanticClass) and not (cls == reference_cls.SemanticClass)]
return semantic_classes
def addRestriction2Class(self, cls_uri, property_uri, cardinality=0, comment="", label=""):
"""Adds restriction on property_uri to class cls_uri.
"""
# # keep graph up-to-date: delete old definitions
# if (cls_uri, None, None) in self.project_graph\
# and not (cls_uri.split('#')[1] in self.created_python_classes\
# or cls_uri.split('#')[1] in self.do_not_delete_classes):
# self.project_graph.remove((cls_uri, None, None))
if (cls_uri, None, None) not in self.project_graph:
self.addClass(cls_uri, comment=comment, label=label)
# # prevent createClassAndProperties from deleting new definition
# if cls_uri.split('#')[1] not in self.do_not_delete_classes:
# self.do_not_delete_classes.append(cls_uri.split('#')[1])
# add restriction to cls_uri
restriction = BNode()
cardinality_restriction = OWL.minCardinality if cardinality == 0 else OWL.cardinality
self.project_graph.add((cls_uri, RDFS.subClassOf, restriction))
self.project_graph.add((restriction, RDF.type, OWL.Restriction))
self.project_graph.add((restriction, OWL.onProperty, property_uri))
self.project_graph.add((restriction, cardinality_restriction, Literal(str(cardinality), datatype=XSD.nonNegativeInteger)))
def createPropertyName(self, property_name=None, subject_uri=None, object_uri=None, connector='BelongsTo', prefix='has'):
"""Returns a property name.
"""
if property_name is not None:
property_name = ''.join([ property_name.split('_')[0].lower() ] + [ text.capitalize() for text in property_name.split('_')[1:] ])
return prefix + property_name[0].upper() + property_name[1:] if property_name[0].islower()\
else prefix + property_name
elif subject_uri is not None:
property_name = subject_uri.split('#')[1] + self.createPropertyName(object_uri=object_uri, prefix=connector)
return property_name[0].lower() + property_name[1:]
elif object_uri is not None:
return prefix + object_uri.split('#')[1]
else:
return prefix
def createSuperClassForSubjectClassConstraint(self, property_uri, sub_uri):
"""Creates a super class for classes that share a property.
"""
super_uri = URIRef(property_uri.replace('has', '') + 'Holder')
self.project_graph.add((sub_uri, RDFS.subClassOf, super_uri))
self.project_graph.remove((sub_uri, RDFS.subClassOf, KNORA_BASE.Resource))
if (super_uri, RDF.type, OWL.Class) not in self.project_graph:
label = 'holder of ' + property_uri.split('#')[1].replace('has', '')
comment = 'super class for classes that have a ' + property_uri.split('#')[1].replace('has', '')
self.addRestriction2Class(super_uri, property_uri, comment=comment, label=label)
for object_uri in self.project_graph.objects(subject=property_uri, predicate=KNORA_BASE.subjectClassConstraint):
self.project_graph.remove((property_uri, KNORA_BASE.subjectClassConstraint, object_uri))
self.project_graph.add((object_uri, RDFS.subClassOf, super_uri))
self.project_graph.remove((object_uri, RDFS.subClassOf, KNORA_BASE.Resource))
self.project_graph.add((property_uri, KNORA_BASE.subjectClassConstraint, super_uri))
objectClass = self.project_graph.value(subject=property_uri, predicate=KNORA_BASE.objectClassConstraint, any=False)
comment = 'connects {} with {}'.format(super_uri.split('#')[1], objectClass.split('#')[1].replace('has', ''))
self.project_graph.remove((property_uri, RDFS.comment, None))
self.project_graph.add((property_uri, RDFS.comment, Literal(comment, lang='en')))
def addProperty(self, property_uri, super_uri, subject_uri, object_uri, comment, label, cardinality):
"""Add a property to self.project_graph.
"""
self.project_graph.add((property_uri, RDF.type, OWL.ObjectProperty))
self.project_graph.add((property_uri, RDFS.subPropertyOf, super_uri))
self.project_graph.add((property_uri, KNORA_BASE.objectClassConstraint, object_uri))
self.project_graph.add((property_uri, KNORA_BASE.subjectClassConstraint, subject_uri))
self.project_graph.add((property_uri, RDFS.comment, Literal(comment, lang='en')))
self.project_graph.add((property_uri, RDFS.label, Literal(label, lang='en')))
self.addRestriction2Class(subject_uri, property_uri, cardinality=cardinality)
def createProperty(self, cls_uri, property_name, property_cls, cardinality):
"""Creates a owl:ObjectProperty.
"""
inferredSubClass = RDFS.subClassOf * '*'
name = self.createPropertyName(property_name=property_name)
property_uri = URIRef(self.base_uriref + '#' + name)
subject_uri = cls_uri
label = 'has ' + name.replace('has','')
super_uri = KNORA_BASE.hasValue
if (property_uri, None, None) not in self.project_graph:
if property_cls.__module__ == 'builtins':
datatype_mapping = { float: KNORA_BASE.DecimalValue, int: KNORA_BASE.IntValue, str: KNORA_BASE.TextValue }
object_uri = datatype_mapping.get(property_cls)
if object_uri == KNORA_BASE.TextValue:
if property_name == 'URL':
object_uri = KNORA_BASE.UriValue
elif property_name == 'file_name':
object_uri = KNORA_BASE.FileValue
else:
object_uri = URIRef(self.base_uriref + '#' + property_cls.__name__)
if cardinality == self.list_value:
subject_uri = object_uri
object_uri = cls_uri
name = self.createPropertyName(subject_uri=subject_uri, object_uri=object_uri)
property_uri = URIRef(self.base_uriref + '#' + name)
cardinality = 1
label = subject_uri.split('#')[1] + ' belongs to ' + object_uri.split('#')[1]
super_uri = KNORA_BASE.hasLinkTo
property_value_uri = URIRef(property_uri + 'Value')
comment = 'Reification statement of relation between {} and {}'.format(subject_uri.split('#')[1], object_uri.split('#')[1])
self.addProperty(property_value_uri, KNORA_BASE.hasLinkToValue, subject_uri, KNORA_BASE.LinkValue,\
comment, label + ' - statement', cardinality)
comment = 'connects {} with {}'.format(subject_uri.split('#')[1], object_uri.split('#')[1])
self.addProperty(property_uri, super_uri, subject_uri, object_uri, comment, label, cardinality)
elif not True in [\
(cls_uri, inferredSubClass, o) in self.project_graph\
for o in self.project_graph.objects(property_uri, KNORA_BASE.subjectClassConstraint)\
]:
# if cls_uri is NOT a subclass of a cls specified by KNORA_BASE.subjectClassConstraint
self.addRestriction2Class(subject_uri, property_uri, cardinality=cardinality)
if self.create_super_cls_for_multi_property:
self.createSuperClassForSubjectClassConstraint(property_uri, subject_uri)
else:
self.project_graph.add((property_uri, KNORA_BASE.subjectClassConstraint, subject_uri))
def get_comment_label(self, cls):
"""Returns comment and label from cls __doc__.
"""
comment = cls.__doc__.replace('\n','').lstrip()
label = cls.__name__
if '.' in cls.__doc__:
comment = [ text for text in cls.__doc__.split('\n') if text != '' ][0].lstrip()
if '@label:' in cls.__doc__:
m = re.search('(@label:\s)(.*[\.]*)', cls.__doc__)
blabla, label = m.groups()
elif re.search('([A-Z][a-z]+)', label):
m = re.search('([A-Z]\w+)([A-Z]\w+)', label)
label = ' '.join([ text.lower() for text in re.split(r'([A-Z][a-z]+)', label) if text != '' ])
return comment, label
def addClass(self, cls_uri, comment="", label="", super_uri=KNORA_BASE.Resource):
"""Add a class to project_graph.
"""
self.project_graph.add((cls_uri, RDF.type, OWL.Class))
self.project_graph.add((cls_uri, RDFS.comment, Literal(comment, lang='en')))
self.project_graph.add((cls_uri, RDFS.label, Literal(label, lang='en')))
self.project_graph.add((cls_uri, RDFS.subClassOf, super_uri))
def createClassAndProperties(self, cls):
"""Creates a owl:Class and some owl:ObjectProperty from semantic_dictionary of a python class.
"""
if not cls.__name__ in self.created_python_classes:
self.created_python_classes.append(cls.__name__)
semantic_dict = cls.get_semantic_dictionary()
super_uri = KNORA_BASE.Resource
if bool(semantic_dict['class'].get('type')):
super_cls = semantic_dict['class'].get('type')
self.createClassAndProperties(super_cls)
super_uri = URIRef(self.base_uriref + '#' + super_cls.__name__)
cls_uri = URIRef(self.base_uriref + '#' + cls.__name__)
# if (cls_uri, None, None) in self.project_graph\
# and cls.__name__ not in self.do_not_delete_classes:
# self.project_graph.remove((cls_uri, None, None))
comment, label = self.get_comment_label(cls)
self.addClass(cls_uri, comment, label, super_uri)
for property_key in semantic_dict['properties'].keys():
property_cls, cardinality = semantic_dict['properties'].get(property_key)
self.createProperty(cls_uri, property_key, property_cls, cardinality)
def convert_py2ttl(self, datatypes_dir, target_ontology_file):
"""Convert all classes contained in datatypes_dir that are subclasses of DATATYPES_DIR.class_spec.SemanticClass to rdf.
:return: exit code (int)
"""
if isdir(datatypes_dir):
semantic_classes = self.get_semantic_classes(datatypes_dir)
for cls in semantic_classes:
self.createClassAndProperties(cls)
f = open(target_ontology_file, 'wb+')
f.write(self.project_graph.serialize(format="turtle"))
f.close()
#print(self.do_not_delete_classes)
else:
print('Error: dir {} does not exist!'.format(datatypes_dir))
usage
return 1
return 0
def usage():
"""prints information on how to use the script
"""
print(main.__doc__)
def main(argv):
"""This program can be used to convert py classes that are subclasses of <dir>.class_spec.SemanticClass to owl:Class.
py2ttl/py2ttl.py [OPTIONS] <dir>
<dir> directory containing datatypes that are subclasses of <dir>.class_spec.SemanticClass.
Overwrites DATATYPES_DIR in config.py.
OPTIONS:
-h|--help: show help
-s|--source=source_ontology_file source ontology ttl file, option overwrites PROJECT_ONTOLOGY_FILE in config.py
-t|--target=target_ontology_file target ontology ttl file
:return: exit code (int)
"""
check_config_files_exist()
datatypes_dir = get_datatypes_dir()
source_ontology_file = PROJECT_ONTOLOGY_FILE
target_ontology_file = '.{0}{1}-onotology_autogenerated.ttl'.format(sep, PROJECT_NAME)
try:
opts, args = getopt.getopt(argv, "hs:t:", ["help","source=", "target="])
except getopt.GetoptError:
usage()
return 2
for opt, arg in opts:
if opt in ('-h', '--help'):
usage()
return 0
elif opt in ('-t', '--target'):
target_ontology_file = arg
elif opt in ('-s', '--source'):
source_ontology_file = arg
converter = Py2TTLConverter(project_onotology_file=source_ontology_file)
if len(args) < 1 and datatypes_dir is not None:
return converter.convert_py2ttl(datatypes_dir, target_ontology_file)
else:
for datatypes_dir in args:
if converter.convert_py2ttl(datatypes_dir, target_ontology_file) > 0:
return 2
return 0 if len(args) > 1 else 2
if __name__ == "__main__":
sys.exit(main(sys.argv[1:]))

Event Timeline