Page MenuHomec4science

data_handler.py
No OneTemporary

File Metadata

Created
Fri, May 24, 23:03

data_handler.py

#!/usr/bin/env python3
# -*- coding: utf-8 -*-
""" This class can be used to add data to a rdf graph.
"""
# Copyright (C) University of Basel 2019 {{{1
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <https://www.gnu.org/licenses/> 1}}}
__author__ = "Christian Steiner"
__maintainer__ = __author__
__copyright__ = 'University of Basel'
__email__ = "christian.steiner@unibas.ch"
__status__ = "Development"
__license__ = "GPL v3"
__version__ = "0.0.1"
from rdflib import Graph, URIRef, Literal, BNode, OWL, RDF, RDFS, XSD
from rdflib import RDF as ns_rdf
from os.path import isfile
import random
import warnings
from class_spec import SemanticClass
from config import DATA_URL
class RDFDataHandler:
"""
This class can be used to add data to a rdf graph.
"""
UNITTESTING = False
SIMPLE_DATA_TYPE_MAPPING = { int: XSD.integer, float: XSD.float, str: XSD.string, bool: XSD.boolean, list: RDF.List }
def __init__(self, target_file, mapping_dictionary):
self.target_file = target_file
self.mapping_dictionary = mapping_dictionary
self.ontology_graph = Graph()
self.data_graph = Graph()
self.data_identifier_mapping = {}
if bool(self.mapping_dictionary.get('ontology')):
self.project_name = self.mapping_dictionary['ontology'].get('project_name')
self.project_uri = URIRef(self.mapping_dictionary['ontology'].get('project_uri'))
ontology_file = self.mapping_dictionary['ontology'].get('ontology_file')
if bool(ontology_file) and isfile(ontology_file):
self.ontology_graph.parse(ontology_file, format="turtle")
self.ns = { uriref: ns for ns, uriref in self.data_graph.namespace_manager.namespaces() }
self.data_graph.bind(self.project_name, self.project_uri)
self.data_graph.bind('data', DATA_URL + '#')
else:
raise Exception('Error: mapping_dictionary does not contain key "ontology"!')
def add_data(self, data_instance, identifier_prefix, parent_data_instance=None):
"""Add a data rdf instance of data_instance to the data_graph.
:return: (rdflib.URIRef) subject_uri of data instance
"""
identifier_uri = self.create_identifier_uri(data_instance, identifier_prefix)
if bool(self.mapping_dictionary['classes'].get(type(data_instance).__name__)):
class_uri = self.mapping_dictionary['classes'][type(data_instance).__name__]['class_uri']
self.data_identifier_mapping.update({data_instance: identifier_uri})
self.data_graph_add((identifier_uri, RDF.type, class_uri))
semantic_dict = data_instance.get_semantic_dictionary()
for key, content in semantic_dict['properties'].items():
if bool(self.mapping_dictionary['classes'][type(data_instance).__name__]['properties'].get(key)):
datatype = content.get('class')
cardinality = content.get('cardinality')\
if bool(content.get('cardinality')) else 0
if data_instance.__dict__.get(key) is not None\
and (type(data_instance.__dict__.get(key)) != int or data_instance.__dict__.get(key) != -1):
predicate_uri = self.mapping_dictionary['classes'][type(data_instance).__name__]['properties'][key]
child_data_instance = data_instance.__dict__.get(key)
new_identifier_prefix = identifier_uri[identifier_uri.index('#')+1:]
if datatype is list:
self.add_ordered_list(child_data_instance, identifier_uri, predicate_uri,\
new_identifier_prefix, data_instance)
elif issubclass(datatype, SemanticClass):
if type(child_data_instance) is not list:
if type(child_data_instance) != datatype:
child_id = child_data_instance
child_data_instance = parent_data_instance.get_object_from_list_with_id(datatype,\
child_id)
if child_data_instance is None:
print(key, content)# parent_data_instance.number, child_id, type(child_id), datatype)
msg = 'No child_data_instance found for data_instance {0}: looking for {1} with id {2}'.format(\
type(parent_data_instance), datatype, child_id)
raise Exception(msg)
else:
new_list_name = 'list_of_' + datatype.__name__ + 's'
if new_list_name in data_instance.__dict__.keys():
data_instance.__dict__[new_list_name].append(child_data_instance)
else:
data_instance.__dict__.update({ new_list_name: [ child_data_instance ]})
if child_data_instance not in self.data_identifier_mapping.keys():
child_identifier_uri = self.add_data(child_data_instance, new_identifier_prefix,\
parent_data_instance=data_instance)
else:
child_identifier_uri = self.data_identifier_mapping[child_data_instance]
self.data_graph_add((identifier_uri, predicate_uri, child_identifier_uri))
else:
for child_item in child_data_instance:
if child_item not in self.data_identifier_mapping.keys():
child_identifier_uri = self.add_data(child_item, new_identifier_prefix,\
parent_data_instance=data_instance)
else:
child_identifier_uri = self.data_identifier_mapping[child_item]
self.data_graph_add((identifier_uri, predicate_uri, child_identifier_uri))
else:
literal_datatype = RDFDataHandler.SIMPLE_DATA_TYPE_MAPPING[datatype]
ontology_datatypes = [ o for o in self.ontology_graph.objects(subject=predicate_uri, predicate=RDFS.range) ]
if len(ontology_datatypes) > 0:
literal_datatype = ontology_datatypes[0]
object_literal = Literal(str(child_data_instance), datatype=literal_datatype)
self.data_graph_add((identifier_uri, predicate_uri, object_literal))
else:
msg = 'Mapping dictionary for {0} does not contain a entry for {1}!'.format(type(data_instance).__name__, key)
raise Exception(msg)
else:
msg = 'Mapping dictionary does not contain a entry for {}!'.format(type(data_instance).__name__)
raise Exception(msg)
return identifier_uri
def add_ordered_list(self, data_instance_list, identifier_uri, predicate_uri, identifier_prefix, data_instance):
"""Add a data rdf instance of data_instance to the data_graph.
"""
if len(data_instance_list) > 0:
child_identifiers = []
for item in data_instance_list:
if item not in self.data_identifier_mapping.keys():
child_identifiers.append(self.add_data(item, identifier_prefix, data_instance))
else:
child_identifiers.append(self.data_identifier_mapping[item])
list_node = self.generate_RDF_collection(child_identifiers)
self.data_graph_add((identifier_uri, predicate_uri, list_node))
def create_identifier_uri(self, data_instance, identifier_prefix):
"""Return a data identifier uri.
:return: (rdflib.URIRef) subject_uri of data instance
"""
data_type, id = data_instance.get_name_and_id()
identifier_uri = URIRef(DATA_URL + '#' + identifier_prefix + '_' + data_type + str(id))
randombit_length = 5
while (identifier_uri, None, None) in self.data_graph:
identifier_uri = URIRef(DATA_URL + '#' + identifier_prefix + '_' + data_type + str(random.getrandbits(randombit_length)))
randombit_length += 1
return identifier_uri
def data_graph_add(self, rdf_triple):
"""Add a triple to the graph.
"""
#not RDFDataHandler.UNITTESTING and print(rdf_triple)
self.data_graph.add(rdf_triple)
def generate_RDF_collection(self, vals ) -> BNode:
"""
Generate an RDF List from vals, returns the head of the list
@URL: <https://github.com/MKLab-ITI/prophet/blob/master/rdflib/plugins/parsers/pyMicrodata/utils.py>
@organization: U{World Wide Web Consortium<http://www.w3.org>}
@author: U{Ivan Herman<a href="http://www.w3.org/People/Ivan/">}
@license:
U{W3C® SOFTWARE NOTICE AND LICENSE<href="http://www.w3.org/Consortium/Legal/2002/copyright-software-20021231">}
@param graph: RDF graph
@type graph: RDFLib Graph
@param vals: array of RDF Resources
@return: head of the List (an RDF Resource)
"""
heads = [ BNode() for r in vals ] + [ ns_rdf["nil"] ]
for i in range(0, len(vals)) :
self.data_graph_add( (heads[i], ns_rdf["first"], vals[i]) )
self.data_graph_add( (heads[i], ns_rdf["rest"], heads[i+1]) )
return heads[0]
def write(self, output_format="turtle"):
"""Write graph.
"""
f = open(self.target_file, 'wb+')
f.write(self.data_graph.serialize(format=output_format))
f.close()

Event Timeline