Page Menu
Home
c4science
Search
Configure Global Search
Log In
Files
F64676529
py2ttl_ontology.py
No One
Temporary
Actions
Download File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Award Token
Subscribers
None
File Metadata
Details
File Info
Storage
Attached
Created
Tue, May 28, 15:29
Size
18 KB
Mime Type
text/x-python
Expires
Thu, May 30, 15:29 (1 d, 23 h)
Engine
blob
Format
Raw Data
Handle
17943119
Attached To
rNIETZSCHEPYTHON nietzsche-python
py2ttl_ontology.py
View Options
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
""" This program can be used to convert py classes that are
subclasses of class_spec.SemanticClass to
a owl ontology in turtle format.
"""
# Copyright (C) University of Basel 2019 {{{1
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <https://www.gnu.org/licenses/> 1}}}
import
getopt
import
importlib
import
importlib.util
import
inspect
import
lxml.etree
as
ET
from
os
import
sep
,
path
,
listdir
from
os.path
import
isfile
,
isdir
,
dirname
,
basename
from
progress.bar
import
Bar
from
rdflib
import
Graph
,
URIRef
,
Literal
,
BNode
,
OWL
,
RDF
,
RDFS
,
XSD
import
re
import
sys
import
warnings
if
dirname
(
__file__
)
not
in
sys
.
path
:
sys
.
path
.
append
(
dirname
(
__file__
))
from
class_spec
import
SemanticClass
,
UnSemanticClass
from
config
import
check_config_files_exist
,
get_datatypes_dir
,
PROJECT_NAME
,
PROJECT_ONTOLOGY_FILE
,
PROJECT_URL
from
data_handler
import
RDFDataHandler
sys
.
path
.
append
(
'shared_util'
)
from
myxmlwriter
import
dict2xml
__author__
=
"Christian Steiner"
__maintainer__
=
__author__
__copyright__
=
'University of Basel'
__email__
=
"christian.steiner@unibas.ch"
__status__
=
"Development"
__license__
=
"GPL v3"
__version__
=
"0.0.1"
class
Py2TTLOntologyConverter
:
"""This class can be used convert semantic_dictionaries to a owl ontology in turtle format.
"""
UNITTESTING
=
False
INFERRED_SUB_CLASS
=
RDFS
.
subClassOf
*
'*'
def
__init__
(
self
,
project_ontology_file
=
None
):
self
.
class_uri_dict
=
{}
self
.
uri_mapping4cls_and_properties
=
{}
self
.
project_graph
=
Graph
()
self
.
base_uriref
=
URIRef
(
PROJECT_URL
)
self
.
project_name
=
PROJECT_NAME
self
.
ns
=
{
self
.
base_uriref
+
'#'
:
self
.
project_name
}
if
project_ontology_file
is
not
None
and
isfile
(
project_ontology_file
):
self
.
project_graph
.
parse
(
project_ontology_file
,
format
=
"turtle"
)
if
len
(
self
.
project_graph
)
>
0
:
self
.
base_uriref
=
self
.
project_graph
.
value
(
predicate
=
RDF
.
type
,
object
=
OWL
.
Ontology
,
any
=
False
)
self
.
ns
=
{
uriref
:
ns
for
ns
,
uriref
in
self
.
project_graph
.
namespace_manager
.
namespaces
()
}
self
.
project_name
=
self
.
ns
.
get
(
self
.
base_uriref
+
'#'
)
self
.
project_graph
.
bind
(
self
.
project_name
,
self
.
base_uriref
+
'#'
)
self
.
uri_mapping4cls_and_properties
.
update
({
'ontology'
:
{
'project_name'
:
self
.
project_name
,
'project_uri'
:
self
.
base_uriref
+
'#'
}})
self
.
uri_mapping4cls_and_properties
.
update
({
'classes'
:
{}
})
def
addClass2Graph
(
self
,
cls
,
semantic_dict
=
None
)
->
(
URIRef
,
type
):
"""Add a class to project_graph.
:return: (cls_uri (URIRef), super_cls (cls))
"""
if
semantic_dict
is
None
:
semantic_dict
=
cls
.
get_semantic_dictionary
()
comment
,
label
=
self
.
get_comment_label
(
cls
)
cls_uri
=
URIRef
(
self
.
base_uriref
+
'#'
+
cls
.
__name__
)
self
.
project_graph
.
add
((
cls_uri
,
RDF
.
type
,
OWL
.
Class
))
self
.
project_graph
.
add
((
cls_uri
,
RDFS
.
isDefinedBy
,
self
.
base_uriref
))
if
comment
!=
''
:
self
.
project_graph
.
add
((
cls_uri
,
RDFS
.
comment
,
Literal
(
comment
,
lang
=
'en'
)))
if
label
!=
''
:
self
.
project_graph
.
add
((
cls_uri
,
RDFS
.
label
,
Literal
(
label
,
lang
=
'en'
)))
super_uri
=
None
super_cls
=
None
if
bool
(
semantic_dict
[
SemanticClass
.
CLASS_KEY
]
.
get
(
SemanticClass
.
TYPE
)):
super_cls
=
semantic_dict
[
SemanticClass
.
CLASS_KEY
]
.
get
(
SemanticClass
.
TYPE
)
super_uri
=
self
.
createClassAndProperties
(
super_cls
)
if
super_uri
is
not
None
:
self
.
project_graph
.
add
((
cls_uri
,
RDFS
.
subClassOf
,
super_uri
))
if
SemanticClass
.
SUBCLASS_OF
in
semantic_dict
[
SemanticClass
.
CLASS_KEY
]
.
keys
()
\
and
len
(
semantic_dict
[
SemanticClass
.
CLASS_KEY
][
SemanticClass
.
SUBCLASS_OF
])
>
0
:
for
super_uri_string
in
semantic_dict
[
SemanticClass
.
CLASS_KEY
]
.
get
(
SemanticClass
.
SUBCLASS_OF
):
super_uri
=
URIRef
(
super_uri_string
)
if
not
(
cls_uri
,
self
.
INFERRED_SUB_CLASS
,
super_uri
)
in
self
.
project_graph
:
self
.
project_graph
.
add
((
cls_uri
,
RDFS
.
subClassOf
,
super_uri
))
return
cls_uri
,
super_cls
def
addProperty2Graph
(
self
,
property_uri
,
domain_uri
,
range_uri
,
info_dict
,
property_type
=
OWL
.
ObjectProperty
):
"""Add a property to self.project_graph.
"""
label
=
'has '
+
property_uri
.
split
(
'#'
)[
1
]
.
replace
(
'has'
,
''
)
\
if
SemanticClass
.
PROPERTY_LABEL
not
in
info_dict
.
keys
()
else
info_dict
[
SemanticClass
.
PROPERTY_LABEL
]
self
.
project_graph
.
add
((
property_uri
,
RDF
.
type
,
property_type
))
self
.
project_graph
.
add
((
property_uri
,
RDFS
.
isDefinedBy
,
self
.
base_uriref
))
self
.
project_graph
.
add
((
property_uri
,
RDFS
.
domain
,
domain_uri
))
self
.
project_graph
.
add
((
property_uri
,
RDFS
.
range
,
range_uri
))
if
SemanticClass
.
PROPERTY_COMMENT
in
info_dict
.
keys
():
comment
=
info_dict
[
SemanticClass
.
PROPERTY_COMMENT
]
self
.
project_graph
.
add
((
property_uri
,
RDFS
.
comment
,
Literal
(
comment
,
lang
=
'en'
)))
self
.
project_graph
.
add
((
property_uri
,
RDFS
.
label
,
Literal
(
label
,
lang
=
'en'
)))
if
SemanticClass
.
CARDINALITY
in
info_dict
.
keys
()
\
and
info_dict
[
SemanticClass
.
CARDINALITY
]
>
0
:
self
.
addRestriction2Class
(
domain_uri
,
property_uri
,
info_dict
)
def
addRestriction2Class
(
self
,
cls_uri
,
property_uri
,
info_dict
):
"""Adds restriction on property_uri to class cls_uri.
"""
if
SemanticClass
.
CARDINALITY
in
info_dict
.
keys
()
\
and
info_dict
[
SemanticClass
.
CARDINALITY
]
>
0
:
if
(
cls_uri
,
None
,
None
)
not
in
self
.
project_graph
:
warnings
.
warn
(
'{} not in graph!'
.
format
(
cls_uri
))
restriction
=
BNode
()
cardinality_restriction
=
URIRef
(
OWL
+
info_dict
[
SemanticClass
.
CARDINALITY_RESTRICTION
])
\
if
SemanticClass
.
CARDINALITY_RESTRICTION
in
info_dict
.
keys
()
\
else
OWL
.
cardinality
cardinality
=
info_dict
[
SemanticClass
.
CARDINALITY
]
self
.
project_graph
.
add
((
cls_uri
,
RDFS
.
subClassOf
,
restriction
))
self
.
project_graph
.
add
((
restriction
,
RDF
.
type
,
OWL
.
Restriction
))
self
.
project_graph
.
add
((
restriction
,
OWL
.
onProperty
,
property_uri
))
self
.
project_graph
.
add
((
restriction
,
cardinality_restriction
,
Literal
(
str
(
cardinality
),
datatype
=
XSD
.
nonNegativeInteger
)))
def
create_ontology
(
self
,
datatypes_dir
,
target_ontology_file
):
"""Convert all classes contained in datatypes_dir that are subclasses of class_spec.SemanticClass to rdf.
:return: exit code (int)
"""
if
isdir
(
datatypes_dir
):
semantic_classes
=
self
.
get_semantic_classes
(
datatypes_dir
)
if
not
Py2TTLOntologyConverter
.
UNITTESTING
:
bar
=
Bar
(
'creating classes and properties'
,
max
=
len
(
semantic_classes
))
for
cls
in
semantic_classes
:
self
.
createClassAndProperties
(
cls
)
not
bool
(
Py2TTLOntologyConverter
.
UNITTESTING
)
and
bar
.
next
()
not
bool
(
Py2TTLOntologyConverter
.
UNITTESTING
)
and
bar
.
finish
()
self
.
uri_mapping4cls_and_properties
[
'ontology'
]
.
update
({
'ontology_file'
:
target_ontology_file
})
f
=
open
(
target_ontology_file
,
'wb+'
)
f
.
write
(
self
.
project_graph
.
serialize
(
format
=
"turtle"
))
f
.
close
()
if
not
Py2TTLOntologyConverter
.
UNITTESTING
:
xml_file
=
'mapping_file4'
+
datatypes_dir
.
replace
(
sep
,
'.'
)
+
'2'
+
target_ontology_file
.
replace
(
'.'
+
sep
,
''
)
.
replace
(
sep
,
'.'
)
.
replace
(
'.ttl'
,
'.xml'
)
dict2xml
(
self
.
uri_mapping4cls_and_properties
,
xml_file
)
else
:
print
(
'Error: dir {} does not exist!'
.
format
(
datatypes_dir
))
usage
return
1
return
0
def
createClassAndProperties
(
self
,
cls
):
"""Creates a owl:Class and some owl:ObjectProperty from semantic_dictionary of a python class.
"""
if
not
cls
.
__name__
in
self
.
class_uri_dict
:
self
.
class_uri_dict
.
update
({
cls
.
__name__
:
cls
})
semantic_dict
=
cls
.
get_semantic_dictionary
()
cls_uri
,
super_cls
=
self
.
addClass2Graph
(
cls
,
semantic_dict
)
uri_mapping4properties
=
{}
for
property_key
in
self
.
_get_semantic_dictionary_keys_super_first
(
semantic_dict
[
'properties'
]):
super_semantic_dict
=
{}
if
super_cls
is
None
else
super_cls
.
get_semantic_dictionary
()
if
len
(
super_semantic_dict
)
==
0
or
not
bool
(
super_semantic_dict
[
'properties'
]
.
get
(
property_key
)):
property_dict4key
=
semantic_dict
[
'properties'
]
.
get
(
property_key
)
property_cls
=
property_dict4key
.
get
(
'class'
)
subject_uri
,
property_uri
=
self
.
createProperty
(
cls_uri
,
property_key
,
property_cls
,
property_dict4key
)
uri_mapping4properties
.
update
({
property_key
:
property_uri
})
elif
bool
(
self
.
uri_mapping4cls_and_properties
.
get
(
'classes'
)
.
get
(
super_cls
.
__name__
)
.
get
(
'properties'
)
.
get
(
property_key
)):
property_uri
=
self
.
uri_mapping4cls_and_properties
[
'classes'
][
super_cls
.
__name__
][
'properties'
][
property_key
]
uri_mapping4properties
.
update
({
property_key
:
property_uri
})
self
.
uri_mapping4cls_and_properties
.
get
(
'classes'
)
.
update
({
cls
.
__name__
:
{
'class_uri'
:
cls_uri
,
'properties'
:
uri_mapping4properties
}})
return
URIRef
(
self
.
base_uriref
+
'#'
+
cls
.
__name__
)
def
createProperty
(
self
,
domain_uri
,
property_name
,
range_cls
,
info_dict
)
->
(
URIRef
,
URIRef
):
"""Creates a owl:ObjectProperty.
:return: tuple of domain_uri (rdflib.URIRef) and property_uri (rdflib.URIRef) of created property
"""
name
=
self
.
createPropertyName
(
property_name
=
property_name
)
\
if
SemanticClass
.
PROPERTY_NAME
not
in
info_dict
.
keys
()
else
info_dict
[
SemanticClass
.
PROPERTY_NAME
]
property_uri
=
URIRef
(
self
.
base_uriref
+
'#'
+
name
)
inferredSubClass
=
RDFS
.
subClassOf
*
'*'
range_uri
=
URIRef
(
self
.
base_uriref
+
'#'
+
range_cls
.
__name__
)
super_property_uri
=
None
if
SemanticClass
.
SUBPROPERTYOF
in
info_dict
.
keys
():
super_property_uri
=
URIRef
(
info_dict
[
SemanticClass
.
SUBPROPERTYOF
])
elif
SemanticClass
.
SUPER_PROPERTY
in
info_dict
.
keys
():
domain_uri
,
super_property_uri
=
self
.
createProperty
(
domain_uri
,
\
info_dict
[
SemanticClass
.
SUPER_PROPERTY
]
.
get
(
SemanticClass
.
PROPERTY_NAME
),
\
range_cls
,
info_dict
[
SemanticClass
.
SUPER_PROPERTY
])
if
(
property_uri
,
None
,
None
)
not
in
self
.
project_graph
:
property_type
=
OWL
.
ObjectProperty
if
range_cls
.
__module__
==
'builtins'
:
if
range_cls
!=
list
:
property_type
=
OWL
.
DatatypeProperty
range_uri
=
RDFDataHandler
.
SIMPLE_DATA_TYPE_MAPPING
.
get
(
range_cls
)
if
range_uri
==
XSD
.
string
and
property_name
==
'URL'
:
range_uri
=
XSD
.
anyURI
self
.
addProperty2Graph
(
property_uri
,
domain_uri
,
range_uri
,
info_dict
,
property_type
=
property_type
)
elif
not
True
in
[
\
(
domain_uri
,
inferredSubClass
,
o
)
in
self
.
project_graph
\
for
o
in
self
.
project_graph
.
objects
(
property_uri
,
RDFS
.
domain
)
\
]:
# if domain_uri is NOT a subclass of a cls specified by RDFS.domain
if
SemanticClass
.
CARDINALITY
in
info_dict
.
keys
()
\
and
info_dict
[
SemanticClass
.
CARDINALITY
]
>
0
:
self
.
addRestriction2Class
(
domain_uri
,
property_uri
,
info_dict
)
self
.
project_graph
.
add
((
property_uri
,
RDFS
.
domain
,
domain_uri
))
if
super_property_uri
is
not
None
\
and
(
property_uri
,
RDFS
.
subPropertyOf
,
super_property_uri
)
not
in
self
.
project_graph
:
self
.
project_graph
.
add
((
property_uri
,
RDFS
.
subPropertyOf
,
super_property_uri
))
return
domain_uri
,
property_uri
def
createPropertyName
(
self
,
property_name
=
None
,
subject_uri
=
None
,
object_uri
=
None
,
connector
=
'BelongsTo'
,
prefix
=
'has'
):
"""Returns a property name.
"""
if
property_name
is
not
None
:
property_name
=
''
.
join
([
property_name
.
split
(
'_'
)[
0
]
.
lower
()
]
+
[
text
.
capitalize
()
for
text
in
property_name
.
split
(
'_'
)[
1
:]
])
return
prefix
+
property_name
[
0
]
.
upper
()
+
property_name
[
1
:]
if
property_name
[
0
]
.
islower
()
\
else
prefix
+
property_name
elif
subject_uri
is
not
None
:
property_name
=
subject_uri
.
split
(
'#'
)[
1
]
+
self
.
createPropertyName
(
object_uri
=
object_uri
,
prefix
=
connector
)
return
property_name
[
0
]
.
lower
()
+
property_name
[
1
:]
elif
object_uri
is
not
None
:
return
prefix
+
object_uri
.
split
(
'#'
)[
1
]
else
:
return
prefix
def
get_comment_label
(
self
,
cls
):
"""Returns comment and label from cls __doc__.
"""
comment
=
cls
.
__doc__
.
replace
(
'
\n
'
,
''
)
.
lstrip
()
label
=
cls
.
__name__
if
'.'
in
cls
.
__doc__
:
comment
=
[
text
for
text
in
cls
.
__doc__
.
split
(
'
\n
'
)
if
text
!=
''
][
0
]
.
lstrip
()
if
'@label'
in
cls
.
__doc__
:
m
=
re
.
search
(
'(@label[:]*\s)(.*[\.]*)'
,
cls
.
__doc__
)
label_tag
,
label
=
m
.
groups
()
elif
re
.
search
(
'([A-Z][a-z]+)'
,
label
):
m
=
re
.
search
(
'([A-Z]\w+)([A-Z]\w+)'
,
label
)
label
=
' '
.
join
([
text
.
lower
()
for
text
in
re
.
split
(
r'([A-Z][a-z]+)'
,
label
)
if
text
!=
''
])
return
comment
,
label
def
get_semantic_classes
(
self
,
datatypes_dir
):
"""Returns a list of all classes that are contained in datatypes_dir that are subclasses of class_spec.SemanticClass.
:return: a list of (str_name, class)
"""
base_dir
=
dirname
(
dirname
(
__file__
))
sys
.
path
.
append
(
base_dir
)
root_modul_name
=
datatypes_dir
.
replace
(
'/'
,
'.'
)
files
=
[
file
.
replace
(
'.py'
,
''
)
for
file
in
listdir
(
datatypes_dir
)
if
file
.
endswith
(
'.py'
)
and
not
file
.
startswith
(
'test_'
)
and
not
file
.
startswith
(
'_'
)]
all_modules
=
[]
for
name
in
files
:
all_modules
.
append
(
importlib
.
import_module
(
'{}.{}'
.
format
(
root_modul_name
,
name
)))
all_classes
=
[]
for
modul
in
all_modules
:
all_classes
+=
inspect
.
getmembers
(
modul
,
inspect
.
isclass
)
#all_classes = sorted(set(all_classes))
all_classes
=
sorted
(
set
(
all_classes
),
key
=
lambda
current_class
:
current_class
[
0
])
semantic_classes
=
[
cls
for
name
,
cls
in
all_classes
if
issubclass
(
cls
,
SemanticClass
)
\
and
not
issubclass
(
cls
,
UnSemanticClass
)
\
and
not
(
cls
==
SemanticClass
)]
return
semantic_classes
def
_get_builtin_cls_keys
(
self
,
property_dict
):
"""Returns a list of keys for classes that are builtin.
"""
builtin_cls_keys
=
[]
for
key
in
property_dict
.
keys
():
property_cls
=
property_dict
.
get
(
key
)
.
get
(
'class'
)
\
if
type
(
property_dict
.
get
(
key
))
is
dict
\
else
property_dict
.
get
(
key
)[
0
]
if
type
(
property_cls
)
!=
dict
\
and
property_cls
.
__module__
==
'builtins'
:
builtin_cls_keys
.
append
(
key
)
return
builtin_cls_keys
def
_get_semantic_dictionary_keys_super_first
(
self
,
property_dict
):
"""Sorts the keys of the property part of a semantic dictionary
and returns the keys for super classes before keys of subclasses.
:return: a sorted list of keys.
"""
builtin_cls_keys
=
self
.
_get_builtin_cls_keys
(
property_dict
)
complex_cls_keys
=
[]
for
key
in
[
key
for
key
in
property_dict
.
keys
()
\
if
key
not
in
builtin_cls_keys
]:
current_cls
=
property_dict
.
get
(
key
)
.
get
(
'class'
)
key_inserted
=
False
for
index
,
cls_key
in
enumerate
(
complex_cls_keys
):
potential_sub_cls
=
property_dict
.
get
(
cls_key
)
.
get
(
'class'
)
if
issubclass
(
potential_sub_cls
,
current_cls
):
complex_cls_keys
.
insert
(
index
,
key
)
key_inserted
=
True
break
if
not
key_inserted
:
complex_cls_keys
.
append
(
key
)
return
builtin_cls_keys
+
complex_cls_keys
def
usage
():
"""prints information on how to use the script
"""
print
(
main
.
__doc__
)
def
main
(
argv
):
"""This program can be used to convert py classes that are subclasses of class_spec.SemanticClass to owl:Class
and its properties to owl:ObjectProperty.
py2ttl/py2ttl_ontology.py [OPTIONS <dir>]
<dir> [optional] directory containing datatypes that are subclasses of class_spec.SemanticClass.
Overwrites DATATYPES_DIR in py2ttl/config.py.
OPTIONS:
-h|--help: show help
-s|--source=source_ontology_file source ontology ttl file, option overwrites PROJECT_ONTOLOGY_FILE in py2ttl/config.py
-t|--target=target_ontology_file target ontology ttl file, default: 'PROJECT_PREFIX-ontology_autogenerated.ttl'
:return: exit code (int)
"""
check_config_files_exist
()
datatypes_dir
=
get_datatypes_dir
()
source_ontology_file
=
PROJECT_ONTOLOGY_FILE
target_ontology_file
=
''
try
:
opts
,
args
=
getopt
.
getopt
(
argv
,
"hs:t:"
,
[
"help"
,
"source="
,
"target="
])
except
getopt
.
GetoptError
:
usage
()
return
2
for
opt
,
arg
in
opts
:
if
opt
in
(
'-h'
,
'--help'
):
usage
()
return
0
elif
opt
in
(
'-t'
,
'--target'
):
target_ontology_file
=
arg
elif
opt
in
(
'-s'
,
'--source'
):
source_ontology_file
=
arg
converter
=
Py2TTLOntologyConverter
(
project_ontology_file
=
source_ontology_file
)
if
len
(
args
)
>
0
:
datatypes_dir
=
args
[
0
]
if
target_ontology_file
==
''
:
target_ontology_file
=
'.{0}{1}-ontology_autogenerated.ttl'
.
format
(
sep
,
converter
.
project_name
)
return
converter
.
create_ontology
(
datatypes_dir
,
target_ontology_file
)
if
__name__
==
"__main__"
:
sys
.
exit
(
main
(
sys
.
argv
[
1
:]))
Event Timeline
Log In to Comment