Page Menu
Home
c4science
Search
Configure Global Search
Log In
Files
F86467634
create_manuscript_description.py
No One
Temporary
Actions
Download File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Award Token
Subscribers
None
File Metadata
Details
File Info
Storage
Attached
Created
Sun, Oct 6, 16:18
Size
7 KB
Mime Type
text/x-python
Expires
Tue, Oct 8, 16:18 (2 d)
Engine
blob
Format
Raw Data
Handle
21426297
Attached To
rNIETZSCHEPYTHON nietzsche-python
create_manuscript_description.py
View Options
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
""" This program can be used to create a manuscript description in turtle format.
"""
# Copyright (C) University of Basel 2019 {{{1
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <https://www.gnu.org/licenses/> 1}}}
from
colorama
import
Fore
,
Style
import
csv
import
getopt
import
lxml.etree
as
ET
from
os
import
sep
,
path
,
listdir
from
os.path
import
isfile
,
isdir
,
dirname
,
basename
from
progress.bar
import
Bar
import
re
import
sys
sys
.
path
.
append
(
'svgscripts'
)
from
datatypes.manuscript_description
import
ManuscriptDescription
if
dirname
(
__file__
)
not
in
sys
.
path
:
sys
.
path
.
append
(
dirname
(
__file__
))
from
class_spec
import
SemanticClass
from
config
import
check_config_files_exist
,
get_datatypes_dir
,
PROJECT_NAME
,
PROJECT_ONTOLOGY_FILE
,
PROJECT_URL
from
py2ttl_data
import
Py2TTLDataConverter
from
py2ttl_ontology
import
Py2TTLOntologyConverter
from
data_handler
import
RDFDataHandler
sys
.
path
.
append
(
'shared_util'
)
from
myxmlwriter
import
xml2dict
from
main_util
import
get_manuscript_files_and_include_status
__author__
=
"Christian Steiner"
__maintainer__
=
__author__
__copyright__
=
'University of Basel'
__email__
=
"christian.steiner@unibas.ch"
__status__
=
"Development"
__license__
=
"GPL v3"
__version__
=
"0.0.1"
FILE_TYPE_XML_PROJECT
=
"xmlProjectFile"
class
Py2TTLCSVManuscriptDescriptionConverter
:
"""This class can be used convert manuscript description to rdf data in turtle format.
"""
UNITTESTING
=
False
NAME
=
'folio-name'
DESCRIPTION_KEY
=
'description'
ALIAS
=
'alias'
LABEL
=
'label'
URL
=
'nietzschesource-url'
def
__init__
(
self
,
csv_file
:
str
,
faksimile_mapping
:
str
,
mapping_dictionary
:
str
):
self
.
mapping_dictionary
=
mapping_dictionary
self
.
csv_file
=
csv_file
self
.
faksimile_mapping_dict
=
self
.
_init_mapping
(
faksimile_mapping
)
def
_init_mapping
(
self
,
faksimile_mapping
)
->
dict
:
"""Init faksimile_mapping dictionary.
"""
faksimile_mapping_dict
=
{}
with
open
(
faksimile_mapping
,
newline
=
''
)
as
csvfile
:
reader
=
csv
.
DictReader
(
csvfile
)
fieldnames
=
reader
.
fieldnames
for
row
in
reader
:
if
bool
(
faksimile_mapping_dict
.
get
(
row
[
fieldnames
[
1
]])):
faksimile_mapping_dict
[
row
[
fieldnames
[
1
]]]
.
append
(
row
[
fieldnames
[
0
]])
else
:
faksimile_mapping_dict
.
update
({
row
[
fieldnames
[
1
]]:
[
row
[
fieldnames
[
0
]]]})
return
faksimile_mapping_dict
def
convert
(
self
,
target_data_file
):
"""Convert manuscript decriptions from csv_file to rdf data and write to target_file.
"""
not
Py2TTLCSVManuscriptDescriptionConverter
.
UNITTESTING
and
print
(
Fore
.
CYAN
+
'initializing python objects with file "{}" ...'
.
format
(
self
.
csv_file
))
data_handler
=
RDFDataHandler
(
target_data_file
,
self
.
mapping_dictionary
)
with
open
(
self
.
csv_file
,
newline
=
''
)
as
csvfile
:
reader
=
csv
.
DictReader
(
csvfile
)
fieldnames
=
reader
.
fieldnames
leaf_id
=
''
description
=
''
images
=
[]
counter
=
0
for
row
in
[
row
for
row
in
reader
if
'_thumb'
not
in
row
[
self
.
LABEL
]]:
if
row
[
self
.
NAME
]
!=
leaf_id
:
if
leaf_id
!=
''
:
counter
+=
1
identifier_uri
=
data_handler
.
add_external_class_item
(
'Leaf'
,
''
,
leaf_id
)
data_handler
.
add_external_property
(
identifier_uri
,
'isDepictedBy'
,
images
)
manuscript_description
=
ManuscriptDescription
.
create_cls_from_raw_text
(
description
)
data_handler
.
add_data_instance2parent_uri
(
identifier_uri
,
'leafHasDescription'
,
manuscript_description
)
leaf_id
=
row
[
self
.
NAME
]
description
=
row
[
self
.
DESCRIPTION_KEY
]
images
=
[]
if
bool
(
self
.
faksimile_mapping_dict
.
get
(
row
[
self
.
LABEL
])):
for
image
in
self
.
faksimile_mapping_dict
[
row
[
self
.
LABEL
]]:
images
.
append
(
image
)
if
len
(
images
)
>
0
:
counter
+=
1
identifier_uri
=
data_handler
.
add_external_class_item
(
'Leaf'
,
''
,
leaf_id
)
data_handler
.
add_external_property
(
identifier_uri
,
'isDepictedBy'
,
images
)
manuscript_description
=
ManuscriptDescription
.
create_cls_from_raw_text
(
description
)
data_handler
.
add_data_instance2parent_uri
(
identifier_uri
,
'leafHasDescription'
,
manuscript_description
)
if
not
Py2TTLCSVManuscriptDescriptionConverter
.
UNITTESTING
:
print
(
Fore
.
GREEN
+
f
'[{counter} leafs added]'
)
if
not
Py2TTLCSVManuscriptDescriptionConverter
.
UNITTESTING
:
print
(
Fore
.
CYAN
+
'adding triples to rdf graph ... '
)
if
not
Py2TTLCSVManuscriptDescriptionConverter
.
UNITTESTING
:
print
(
Fore
.
GREEN
+
'[{} statements added]'
.
format
(
str
(
len
(
data_handler
.
data_graph
))))
print
(
Fore
.
CYAN
+
'writing graph to file "{}" ...'
.
format
(
target_data_file
))
data_handler
.
write
()
if
not
Py2TTLCSVManuscriptDescriptionConverter
.
UNITTESTING
:
print
(
Fore
.
GREEN
+
'[OK]'
)
print
(
Style
.
RESET_ALL
)
def
usage
():
"""prints information on how to use the script
"""
print
(
main
.
__doc__
)
def
main
(
argv
):
"""This program can be used to convert py objects to a owl:Ontology and rdf data in turtle format.
py2ttl/create_manuscript_description.py [OPTIONS] <csv-file> <mapping-csv>
<csv-file> csv file that contains the description of the manuscript.
<mapping-csv> a image to faksimile iri mapping csv file
OPTIONS:
-h|--help: show help
-o|--output=FILE specify output file
:return: exit code (int)
"""
check_config_files_exist
()
datatypes_dir
=
get_datatypes_dir
()
source_ontology_file
=
PROJECT_ONTOLOGY_FILE
target_ontology_file
=
'.{0}{1}-ontology_autogenerated.ttl'
.
format
(
sep
,
PROJECT_NAME
)
output_file
=
'./include-ttl/descriptions.ttl'
try
:
opts
,
args
=
getopt
.
getopt
(
argv
,
"ho:"
,
[
"help"
,
"output="
])
except
getopt
.
GetoptError
:
usage
()
return
2
for
opt
,
arg
in
opts
:
if
opt
in
(
'-h'
,
'--help'
):
usage
()
return
0
elif
opt
in
(
'-0'
,
'--output'
):
output_file
=
arg
if
len
(
args
)
<
2
:
usage
()
return
2
ontology_created
=
False
ontology_converter
=
Py2TTLOntologyConverter
(
project_ontology_file
=
source_ontology_file
)
csv_file
=
args
[
0
]
mapping_csv_file
=
args
[
1
]
output
=
2
print
(
Fore
.
CYAN
+
'Create ontology ...'
)
if
ontology_converter
.
create_ontology
(
datatypes_dir
,
target_ontology_file
)
==
0
:
print
(
Fore
.
GREEN
+
'[Ontology file {0} created]'
.
format
(
target_ontology_file
))
else
:
return
2
print
(
Fore
.
CYAN
+
f
'Create data from "{csv_file}" with ...'
)
data_converter
=
Py2TTLCSVManuscriptDescriptionConverter
(
csv_file
,
mapping_csv_file
,
mapping_dictionary
=
ontology_converter
.
uri_mapping4cls_and_properties
)
output
=
data_converter
.
convert
(
output_file
)
return
output
if
__name__
==
"__main__"
:
sys
.
exit
(
main
(
sys
.
argv
[
1
:]))
Event Timeline
Log In to Comment