Page Menu
Home
c4science
Search
Configure Global Search
Log In
Files
F64154517
data_handler.py
No One
Temporary
Actions
Download File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Award Token
Subscribers
None
File Metadata
Details
File Info
Storage
Attached
Created
Fri, May 24, 23:03
Size
10 KB
Mime Type
text/x-python
Expires
Sun, May 26, 23:03 (2 d)
Engine
blob
Format
Raw Data
Handle
17861636
Attached To
rNIETZSCHEPYTHON nietzsche-python
data_handler.py
View Options
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
""" This class can be used to add data to a rdf graph.
"""
# Copyright (C) University of Basel 2019 {{{1
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <https://www.gnu.org/licenses/> 1}}}
__author__
=
"Christian Steiner"
__maintainer__
=
__author__
__copyright__
=
'University of Basel'
__email__
=
"christian.steiner@unibas.ch"
__status__
=
"Development"
__license__
=
"GPL v3"
__version__
=
"0.0.1"
from
rdflib
import
Graph
,
URIRef
,
Literal
,
BNode
,
OWL
,
RDF
,
RDFS
,
XSD
from
rdflib
import
RDF
as
ns_rdf
from
os.path
import
isfile
import
random
import
warnings
from
class_spec
import
SemanticClass
from
config
import
DATA_URL
class
RDFDataHandler
:
"""
This class can be used to add data to a rdf graph.
"""
UNITTESTING
=
False
SIMPLE_DATA_TYPE_MAPPING
=
{
int
:
XSD
.
integer
,
float
:
XSD
.
float
,
str
:
XSD
.
string
,
bool
:
XSD
.
boolean
,
list
:
RDF
.
List
}
def
__init__
(
self
,
target_file
,
mapping_dictionary
):
self
.
target_file
=
target_file
self
.
mapping_dictionary
=
mapping_dictionary
self
.
ontology_graph
=
Graph
()
self
.
data_graph
=
Graph
()
self
.
data_identifier_mapping
=
{}
if
bool
(
self
.
mapping_dictionary
.
get
(
'ontology'
)):
self
.
project_name
=
self
.
mapping_dictionary
[
'ontology'
]
.
get
(
'project_name'
)
self
.
project_uri
=
URIRef
(
self
.
mapping_dictionary
[
'ontology'
]
.
get
(
'project_uri'
))
ontology_file
=
self
.
mapping_dictionary
[
'ontology'
]
.
get
(
'ontology_file'
)
if
bool
(
ontology_file
)
and
isfile
(
ontology_file
):
self
.
ontology_graph
.
parse
(
ontology_file
,
format
=
"turtle"
)
self
.
ns
=
{
uriref
:
ns
for
ns
,
uriref
in
self
.
data_graph
.
namespace_manager
.
namespaces
()
}
self
.
data_graph
.
bind
(
self
.
project_name
,
self
.
project_uri
)
self
.
data_graph
.
bind
(
'data'
,
DATA_URL
+
'#'
)
else
:
raise
Exception
(
'Error: mapping_dictionary does not contain key "ontology"!'
)
def
add_data
(
self
,
data_instance
,
identifier_prefix
,
parent_data_instance
=
None
):
"""Add a data rdf instance of data_instance to the data_graph.
:return: (rdflib.URIRef) subject_uri of data instance
"""
identifier_uri
=
self
.
create_identifier_uri
(
data_instance
,
identifier_prefix
)
if
bool
(
self
.
mapping_dictionary
[
'classes'
]
.
get
(
type
(
data_instance
)
.
__name__
)):
class_uri
=
self
.
mapping_dictionary
[
'classes'
][
type
(
data_instance
)
.
__name__
][
'class_uri'
]
self
.
data_identifier_mapping
.
update
({
data_instance
:
identifier_uri
})
self
.
data_graph_add
((
identifier_uri
,
RDF
.
type
,
class_uri
))
semantic_dict
=
data_instance
.
get_semantic_dictionary
()
for
key
,
content
in
semantic_dict
[
'properties'
]
.
items
():
if
bool
(
self
.
mapping_dictionary
[
'classes'
][
type
(
data_instance
)
.
__name__
][
'properties'
]
.
get
(
key
)):
datatype
=
content
.
get
(
'class'
)
cardinality
=
content
.
get
(
'cardinality'
)
\
if
bool
(
content
.
get
(
'cardinality'
))
else
0
if
data_instance
.
__dict__
.
get
(
key
)
is
not
None
\
and
(
type
(
data_instance
.
__dict__
.
get
(
key
))
!=
int
or
data_instance
.
__dict__
.
get
(
key
)
!=
-
1
):
predicate_uri
=
self
.
mapping_dictionary
[
'classes'
][
type
(
data_instance
)
.
__name__
][
'properties'
][
key
]
child_data_instance
=
data_instance
.
__dict__
.
get
(
key
)
new_identifier_prefix
=
identifier_uri
[
identifier_uri
.
index
(
'#'
)
+
1
:]
if
datatype
is
list
:
self
.
add_ordered_list
(
child_data_instance
,
identifier_uri
,
predicate_uri
,
\
new_identifier_prefix
,
data_instance
)
elif
issubclass
(
datatype
,
SemanticClass
):
if
type
(
child_data_instance
)
is
not
list
:
if
type
(
child_data_instance
)
!=
datatype
:
child_id
=
child_data_instance
child_data_instance
=
parent_data_instance
.
get_object_from_list_with_id
(
datatype
,
\
child_id
)
if
child_data_instance
is
None
:
print
(
key
,
content
)
# parent_data_instance.number, child_id, type(child_id), datatype)
msg
=
'No child_data_instance found for data_instance {0}: looking for {1} with id {2}'
.
format
(
\
type
(
parent_data_instance
),
datatype
,
child_id
)
raise
Exception
(
msg
)
else
:
new_list_name
=
'list_of_'
+
datatype
.
__name__
+
's'
if
new_list_name
in
data_instance
.
__dict__
.
keys
():
data_instance
.
__dict__
[
new_list_name
]
.
append
(
child_data_instance
)
else
:
data_instance
.
__dict__
.
update
({
new_list_name
:
[
child_data_instance
]})
if
child_data_instance
not
in
self
.
data_identifier_mapping
.
keys
():
child_identifier_uri
=
self
.
add_data
(
child_data_instance
,
new_identifier_prefix
,
\
parent_data_instance
=
data_instance
)
else
:
child_identifier_uri
=
self
.
data_identifier_mapping
[
child_data_instance
]
self
.
data_graph_add
((
identifier_uri
,
predicate_uri
,
child_identifier_uri
))
else
:
for
child_item
in
child_data_instance
:
if
child_item
not
in
self
.
data_identifier_mapping
.
keys
():
child_identifier_uri
=
self
.
add_data
(
child_item
,
new_identifier_prefix
,
\
parent_data_instance
=
data_instance
)
else
:
child_identifier_uri
=
self
.
data_identifier_mapping
[
child_item
]
self
.
data_graph_add
((
identifier_uri
,
predicate_uri
,
child_identifier_uri
))
else
:
literal_datatype
=
RDFDataHandler
.
SIMPLE_DATA_TYPE_MAPPING
[
datatype
]
ontology_datatypes
=
[
o
for
o
in
self
.
ontology_graph
.
objects
(
subject
=
predicate_uri
,
predicate
=
RDFS
.
range
)
]
if
len
(
ontology_datatypes
)
>
0
:
literal_datatype
=
ontology_datatypes
[
0
]
object_literal
=
Literal
(
str
(
child_data_instance
),
datatype
=
literal_datatype
)
self
.
data_graph_add
((
identifier_uri
,
predicate_uri
,
object_literal
))
else
:
msg
=
'Mapping dictionary for {0} does not contain a entry for {1}!'
.
format
(
type
(
data_instance
)
.
__name__
,
key
)
raise
Exception
(
msg
)
else
:
msg
=
'Mapping dictionary does not contain a entry for {}!'
.
format
(
type
(
data_instance
)
.
__name__
)
raise
Exception
(
msg
)
return
identifier_uri
def
add_ordered_list
(
self
,
data_instance_list
,
identifier_uri
,
predicate_uri
,
identifier_prefix
,
data_instance
):
"""Add a data rdf instance of data_instance to the data_graph.
"""
if
len
(
data_instance_list
)
>
0
:
child_identifiers
=
[]
for
item
in
data_instance_list
:
if
item
not
in
self
.
data_identifier_mapping
.
keys
():
child_identifiers
.
append
(
self
.
add_data
(
item
,
identifier_prefix
,
data_instance
))
else
:
child_identifiers
.
append
(
self
.
data_identifier_mapping
[
item
])
list_node
=
self
.
generate_RDF_collection
(
child_identifiers
)
self
.
data_graph_add
((
identifier_uri
,
predicate_uri
,
list_node
))
def
create_identifier_uri
(
self
,
data_instance
,
identifier_prefix
):
"""Return a data identifier uri.
:return: (rdflib.URIRef) subject_uri of data instance
"""
data_type
,
id
=
data_instance
.
get_name_and_id
()
identifier_uri
=
URIRef
(
DATA_URL
+
'#'
+
identifier_prefix
+
'_'
+
data_type
+
str
(
id
))
randombit_length
=
5
while
(
identifier_uri
,
None
,
None
)
in
self
.
data_graph
:
identifier_uri
=
URIRef
(
DATA_URL
+
'#'
+
identifier_prefix
+
'_'
+
data_type
+
str
(
random
.
getrandbits
(
randombit_length
)))
randombit_length
+=
1
return
identifier_uri
def
data_graph_add
(
self
,
rdf_triple
):
"""Add a triple to the graph.
"""
#not RDFDataHandler.UNITTESTING and print(rdf_triple)
self
.
data_graph
.
add
(
rdf_triple
)
def
generate_RDF_collection
(
self
,
vals
)
->
BNode
:
"""
Generate an RDF List from vals, returns the head of the list
@URL: <https://github.com/MKLab-ITI/prophet/blob/master/rdflib/plugins/parsers/pyMicrodata/utils.py>
@organization: U{World Wide Web Consortium<http://www.w3.org>}
@author: U{Ivan Herman<a href="http://www.w3.org/People/Ivan/">}
@license:
U{W3C® SOFTWARE NOTICE AND LICENSE<href="http://www.w3.org/Consortium/Legal/2002/copyright-software-20021231">}
@param graph: RDF graph
@type graph: RDFLib Graph
@param vals: array of RDF Resources
@return: head of the List (an RDF Resource)
"""
heads
=
[
BNode
()
for
r
in
vals
]
+
[
ns_rdf
[
"nil"
]
]
for
i
in
range
(
0
,
len
(
vals
))
:
self
.
data_graph_add
(
(
heads
[
i
],
ns_rdf
[
"first"
],
vals
[
i
])
)
self
.
data_graph_add
(
(
heads
[
i
],
ns_rdf
[
"rest"
],
heads
[
i
+
1
])
)
return
heads
[
0
]
def
write
(
self
,
output_format
=
"turtle"
):
"""Write graph.
"""
f
=
open
(
self
.
target_file
,
'wb+'
)
f
.
write
(
self
.
data_graph
.
serialize
(
format
=
output_format
))
f
.
close
()
Event Timeline
Log In to Comment