Page Menu
Home
c4science
Search
Configure Global Search
Log In
Files
F74440142
py2ttl_data.py
No One
Temporary
Actions
Download File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Award Token
Subscribers
None
File Metadata
Details
File Info
Storage
Attached
Created
Sat, Jul 27, 20:33
Size
7 KB
Mime Type
text/x-python
Expires
Mon, Jul 29, 20:33 (2 d)
Engine
blob
Format
Raw Data
Handle
19394039
Attached To
rNIETZSCHEPYTHON nietzsche-python
py2ttl_data.py
View Options
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
""" This program can be used to convert py objects to data in turtle format.
"""
# Copyright (C) University of Basel 2019 {{{1
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <https://www.gnu.org/licenses/> 1}}}
from
colorama
import
Fore
,
Style
import
getopt
import
lxml.etree
as
ET
from
os
import
sep
,
path
,
listdir
from
os.path
import
isfile
,
isdir
,
dirname
,
basename
,
getmtime
from
progress.bar
import
Bar
import
re
import
sys
sys
.
path
.
append
(
'svgscripts'
)
from
datatypes.archival_manuscript
import
ArchivalManuscriptUnity
from
datatypes.super_page
import
SuperPage
if
dirname
(
__file__
)
not
in
sys
.
path
:
sys
.
path
.
append
(
dirname
(
__file__
))
from
class_spec
import
SemanticClass
from
config
import
check_config_files_exist
,
get_datatypes_dir
,
PROJECT_NAME
,
PROJECT_ONTOLOGY_FILE
,
PROJECT_URL
from
data_handler
import
RDFDataHandler
sys
.
path
.
append
(
'shared_util'
)
from
myxmlwriter
import
xml2dict
__author__
=
"Christian Steiner"
__maintainer__
=
__author__
__copyright__
=
'University of Basel'
__email__
=
"christian.steiner@unibas.ch"
__status__
=
"Development"
__license__
=
"GPL v3"
__version__
=
"0.0.1"
class
Py2TTLDataConverter
:
"""This class can be used convert py objects to rdf data in turtle format.
"""
UNITTESTING
=
False
def
__init__
(
self
,
manuscript_file
,
xml_dictionary_file
=
None
,
mapping_dictionary
=
None
):
if
mapping_dictionary
is
None
and
xml_dictionary_file
is
not
None
:
if
not
Py2TTLDataConverter
.
UNITTESTING
:
print
(
Fore
.
CYAN
+
'initializing mapping dictionary from file "{}" ...'
.
format
(
xml_dictionary_file
))
self
.
mapping_dictionary
=
xml2dict
(
xml_dictionary_file
)
if
not
Py2TTLDataConverter
.
UNITTESTING
:
print
(
Fore
.
GREEN
+
'[{} classes added]'
.
format
(
str
(
len
(
self
.
mapping_dictionary
[
'classes'
]))))
elif
mapping_dictionary
is
not
None
:
self
.
mapping_dictionary
=
mapping_dictionary
else
:
raise
Exception
(
'Error: Py2TTLDataConverter init expects either a xml_dictionary_file or a mapping_dictionary!'
)
self
.
manuscript_file
=
manuscript_file
def
convert
(
self
,
page_status_list
=
None
,
create_or_update_changed_pages
=
False
):
"""Convert manuscript instantiated with manuscript_file to rdf data and write to target_file.
"""
if
page_status_list
is
None
or
len
(
page_status_list
)
<
1
:
page_status_list
=
[
'OK'
,
SuperPage
.
STATUS_MERGED_OK
]
not
Py2TTLDataConverter
.
UNITTESTING
and
print
(
Fore
.
CYAN
+
'initializing python objects with file "{}" ...'
.
format
(
self
.
manuscript_file
))
manuscript
=
ArchivalManuscriptUnity
.
create_cls
(
self
.
manuscript_file
,
page_status_list
=
page_status_list
,
update_page_styles
=
True
)
include_tag
=
'_INCLUDE'
\
if
'OK'
in
page_status_list
and
len
(
page_status_list
)
==
1
\
else
''
target_data_file
=
manuscript
.
title
.
replace
(
' '
,
'_'
)
+
include_tag
+
'_DATA.ttl'
skip_list
=
None
\
if
create_or_update_changed_pages
is
False
\
else
[
'Page'
]
data_handler
=
RDFDataHandler
(
target_data_file
,
self
.
mapping_dictionary
)
identifier_uri
=
data_handler
.
add_data
(
manuscript
,
''
,
skip_data_instance_list
=
skip_list
)
if
create_or_update_changed_pages
:
counter
=
0
for
page
in
[
page
for
page
in
manuscript
.
pages
]:
if
'xml_file'
not
in
page
.
__dict__
.
keys
():
#TODO: change xml_file to @output in manuscrit_tree
page
.
xml_file
=
manuscript
.
manuscript_tree
.
docinfo
.
URL
.
replace
(
'.xml'
,
'_'
)
+
'page'
+
page
.
number
+
'.xml'
target_page_file
=
page
.
xml_file
.
replace
(
'xml'
,
'ttl'
)
if
isfile
(
page
.
xml_file
)
and
(
not
isfile
(
target_page_file
)
or
getmtime
(
page
.
xml_file
)
>
getmtime
(
target_page_file
)):
counter
+=
1
page_data_handler
=
RDFDataHandler
(
target_page_file
,
self
.
mapping_dictionary
)
page_data_handler
.
add_data
(
page
,
identifier_uri
.
split
(
'#'
)[
1
],
parent_data_instance
=
manuscript
)
page_data_handler
.
write
()
if
not
Py2TTLDataConverter
.
UNITTESTING
:
print
(
Fore
.
GREEN
+
f
'[{counter} pages created/updated]'
)
elif
not
Py2TTLDataConverter
.
UNITTESTING
:
print
(
Fore
.
GREEN
+
'[{} pages added]'
.
format
(
str
(
len
([
page
for
page
in
manuscript
.
pages
if
'xml_file'
in
page
.
__dict__
.
keys
()]))))
if
not
Py2TTLDataConverter
.
UNITTESTING
:
print
(
Fore
.
CYAN
+
'adding triples to rdf graph ... '
)
if
not
Py2TTLDataConverter
.
UNITTESTING
:
print
(
Fore
.
GREEN
+
'[{} statements added]'
.
format
(
str
(
len
(
data_handler
.
data_graph
))))
print
(
Fore
.
CYAN
+
'writing graph to file "{}" ...'
.
format
(
target_data_file
))
data_handler
.
write
()
if
not
Py2TTLDataConverter
.
UNITTESTING
:
print
(
Fore
.
GREEN
+
'[OK]'
)
print
(
Style
.
RESET_ALL
)
def
usage
():
"""prints information on how to use the script
"""
print
(
main
.
__doc__
)
def
main
(
argv
):
"""This program can be used to convert py objects to rdf data in turtle format.
py2ttl/py2ttl_data.py [OPTIONS] <manuscript.xml>
<manuscript.xml> xml file of type shared_util.myxmlwriter.FILE_TYPE_XML_MANUSCRIPT.
OPTIONS:
-h|--help: show help
-c|--create-or-update-pages create or update pages as seperate ttl files in dir 'ttl'
-i|--include-status=STATUS include pages with status = STATUS. STATUS is a ':' seperated string of status, e.g. 'OK:faksimile merged'.
-m|--mapping=mapping_dict.xml xml file generated by py2ttl/py2ttl.py containing mapping information for each property of a class.
:return: exit code (int)
"""
check_config_files_exist
()
datatypes_dir
=
get_datatypes_dir
()
target_ontology_file
=
'.{0}{1}-ontology_autogenerated.ttl'
.
format
(
sep
,
PROJECT_NAME
)
xml_dictionary_file
=
'mapping_file4'
+
datatypes_dir
.
replace
(
sep
,
'.'
)
+
'2'
+
target_ontology_file
.
replace
(
'.'
+
sep
,
''
)
.
replace
(
sep
,
'.'
)
.
replace
(
'.ttl'
,
'.xml'
)
create_or_update_pages
=
False
manuscript_file
=
None
page_status_list
=
None
try
:
opts
,
args
=
getopt
.
getopt
(
argv
,
"hci:I"
,
[
"help"
,
"create-or-update-pages"
,
"include-status="
,
"Include-files-only"
])
except
getopt
.
GetoptError
:
usage
()
return
2
for
opt
,
arg
in
opts
:
if
opt
in
(
'-h'
,
'--help'
):
usage
()
return
0
elif
opt
in
(
'-i'
,
'--include-status'
):
page_status_list
=
arg
.
split
(
':'
)
elif
opt
in
(
'-c'
,
'--create-or-update-pages'
):
create_or_update_pages
=
True
elif
opt
in
(
'-m'
,
'--mapping'
):
xml_dictionary_file
=
arg
if
len
(
args
)
<
1
:
usage
()
return
2
manuscript_file
=
args
[
0
]
if
not
isfile
(
xml_dictionary_file
)
or
not
isfile
(
manuscript_file
):
usage
()
return
2
converter
=
Py2TTLDataConverter
(
manuscript_file
,
xml_dictionary_file
=
xml_dictionary_file
)
converter
.
convert
(
page_status_list
=
page_status_list
,
create_or_update_changed_pages
=
create_or_update_pages
)
return
0
if
__name__
==
"__main__"
:
sys
.
exit
(
main
(
sys
.
argv
[
1
:]))
Event Timeline
Log In to Comment