Page Menu
Home
c4science
Search
Configure Global Search
Log In
Files
F91239824
create_manuscript.py
No One
Temporary
Actions
Download File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Award Token
Subscribers
None
File Metadata
Details
File Info
Storage
Attached
Created
Sat, Nov 9, 06:47
Size
8 KB
Mime Type
text/x-python
Expires
Mon, Nov 11, 06:47 (1 d, 23 h)
Engine
blob
Format
Raw Data
Handle
22227540
Attached To
rNIETZSCHEPYTHON nietzsche-python
create_manuscript.py
View Options
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
""" This program can be used to create a ArchivalManuscriptUnity.
"""
# Copyright (C) University of Basel 2020 {{{1
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <https://www.gnu.org/licenses/> 1}}}
__author__
=
"Christian Steiner"
__maintainer__
=
__author__
__copyright__
=
'University of Basel'
__email__
=
"christian.steiner@unibas.ch"
__status__
=
"Development"
__license__
=
"GPL v3"
__version__
=
"0.0.1"
from
colorama
import
Fore
,
Style
import
getopt
import
re
import
sys
from
os
import
listdir
,
sep
,
path
from
os.path
import
isfile
,
isdir
,
dirname
,
basename
import
lxml.etree
as
ET
if
dirname
(
__file__
)
not
in
sys
.
path
:
sys
.
path
.
append
(
dirname
(
__file__
))
from
datatypes.archival_manuscript
import
ArchivalManuscriptUnity
from
datatypes.super_page
import
SuperPage
sys
.
path
.
append
(
'shared_util'
)
from
myxmlwriter
import
parse_xml_of_type
,
write_pretty
,
xml_has_type
,
FILE_TYPE_SVG_WORD_POSITION
,
FILE_TYPE_XML_MANUSCRIPT
UNITTESTING
=
False
class
ManuscriptCreator
:
"""This class can be used to create a ArchivalManuscriptUnity.
"""
def
__init__
(
self
,
xml_target_dir
):
self
.
xml_target_dir
=
xml_target_dir
def
_get_or_create_element
(
self
,
node
,
xpath
,
create_id
=
False
)
->
ET
.
Element
:
"""Return a element with name == element_name, or create if it does not exist.
"""
elements
=
node
.
xpath
(
xpath
)
if
len
(
elements
)
>
0
:
return
elements
[
0
]
else
:
if
re
.
match
(
r'[a-z]+\[@[a-z-]+='
,
xpath
):
element_name
=
re
.
match
(
r'(.+?)\[@[a-z]+.*'
,
xpath
)
.
group
(
1
)
num_elements
=
len
(
node
.
xpath
(
element_name
))
element
=
ET
.
SubElement
(
node
,
element_name
)
element_attribute
=
re
.
match
(
r'[a-z]+\[@(.+?)=.*'
,
xpath
)
.
group
(
1
)
element_value
=
re
.
match
(
r'[a-z]+\[@[a-z-]+="(.+?)"]'
,
xpath
)
.
group
(
1
)
element
.
set
(
element_attribute
,
element_value
)
if
create_id
:
element
.
set
(
'id'
,
str
(
num_elements
))
return
element
else
:
num_elements
=
len
(
node
.
xpath
(
xpath
))
element
=
ET
.
SubElement
(
node
,
xpath
)
if
create_id
:
element
.
set
(
'id'
,
str
(
num_elements
))
return
element
def
_create_or_update_pages
(
self
,
pages_node
,
manuscript_page_url_mapping
):
"""Create or update pages.
"""
for
page_number
,
url
in
manuscript_page_url_mapping
.
items
():
xpath
=
SuperPage
.
XML_TAG
+
f
'[@number="{page_number}"]'
page_node
=
self
.
_get_or_create_element
(
pages_node
,
xpath
,
create_id
=
True
)
if
not
bool
(
page_node
.
get
(
'alias'
)):
page_node
.
set
(
'alias'
,
basename
(
url
))
def
create_or_update_manuscripts
(
self
,
manuscript_files
,
page_url_mapping
):
"""Create or update manuscripts.
"""
for
key
in
page_url_mapping
:
relevant_files
=
[
manuscript_file
for
manuscript_file
in
manuscript_files
\
if
basename
(
manuscript_file
)
==
key
.
replace
(
' '
,
'_'
)
+
'.xml'
]
if
len
(
relevant_files
)
==
0
:
manuscript_files
.
append
(
key
.
replace
(
' '
,
'_'
)
+
'.xml'
)
for
manuscript_file
in
manuscript_files
:
target_file
=
self
.
xml_target_dir
+
sep
+
manuscript_file
\
if
dirname
(
manuscript_file
)
==
''
\
else
manuscript_file
title
=
basename
(
target_file
)
.
replace
(
'.xml'
,
''
)
.
replace
(
'_'
,
' '
)
manuscript
=
ArchivalManuscriptUnity
(
title
=
title
)
if
isfile
(
target_file
):
manuscript
=
ArchivalManuscriptUnity
.
create_cls
(
target_file
)
else
:
manuscript
.
manuscript_tree
=
ET
.
ElementTree
(
ET
.
Element
(
ArchivalManuscriptUnity
.
XML_TAG
))
manuscript
.
manuscript_tree
.
docinfo
.
URL
=
target_file
manuscript
.
manuscript_tree
.
getroot
()
.
set
(
'title'
,
manuscript
.
title
)
manuscript
.
manuscript_tree
.
getroot
()
.
set
(
'type'
,
manuscript
.
manuscript_type
)
if
title
in
page_url_mapping
.
keys
():
pages_node
=
self
.
_get_or_create_element
(
manuscript
.
manuscript_tree
.
getroot
(),
'pages'
)
self
.
_create_or_update_pages
(
pages_node
,
page_url_mapping
[
title
])
if
not
UNITTESTING
:
write_pretty
(
xml_element_tree
=
manuscript
.
manuscript_tree
,
file_name
=
target_file
,
\
script_name
=
__file__
,
file_type
=
FILE_TYPE_XML_MANUSCRIPT
)
def
create_page_url_mapping
(
input_file
,
mapping_dictionary
,
default_title
=
''
):
"""Create a page to url mapping from input file.
File content:
TITLE PAGENUMBER\nURL
See: 'tests_svgscripts/test_data/content.txt'
"""
lines
=
[]
with
open
(
input_file
,
'r'
)
as
f
:
lines
=
f
.
readlines
()
key
=
None
url
=
None
current_key
=
default_title
for
content
in
lines
:
if
content
.
startswith
(
'http'
)
\
or
content
.
startswith
(
'www'
):
url
=
content
.
replace
(
'
\n
'
,
''
)
\
if
content
.
startswith
(
'http'
)
\
else
'http://'
+
content
.
replace
(
'
\n
'
,
''
)
if
current_key
not
in
mapping_dictionary
.
keys
():
mapping_dictionary
.
update
({
current_key
:
{}})
mapping_dictionary
[
current_key
]
.
update
({
key
:
url
})
else
:
key_parts
=
[
part
.
strip
()
for
part
in
content
.
replace
(
'
\n
'
,
''
)
.
replace
(
'S.'
,
''
)
.
split
(
','
)
]
key_index
=
0
if
len
(
key_parts
)
>
1
:
title
=
key_parts
[
0
]
if
title
not
in
mapping_dictionary
.
keys
():
current_key
=
title
mapping_dictionary
.
update
({
current_key
:
{}})
key_index
=
1
key
=
key_parts
[
key_index
]
def
usage
():
"""prints information on how to use the script
"""
print
(
main
.
__doc__
)
def
main
(
argv
):
"""This program can be used to create or update one or more manuscripts.
svgscripts/create_manuscript.py [OPTIONS] [<input_fileA.txt>, ...] [<xmlManuscriptFile>, ...]
<input_file.txt> One or more files mapping pages to faksimile URLs, with 'txt'-suffix
<xmlManuscriptFile> manuscript file(s) (~ArchivalManuscriptUnity).
OPTIONS:
-h|--help: show help
-t|--title=title manuscript's title, e.g. "Mp XV".
-x|--xml-target-dir directory containing xmlManuscriptFile, default "./xml"
:return: exit code (int)
"""
title
=
''
xml_target_dir
=
".{}xml"
.
format
(
sep
)
page_url_mapping
=
{}
try
:
opts
,
args
=
getopt
.
getopt
(
argv
,
"ht:x:"
,
[
"help"
,
"title="
,
"xml-target-dir="
])
except
getopt
.
GetoptError
:
usage
()
return
2
for
opt
,
arg
in
opts
:
if
opt
in
(
'-h'
,
'--help'
):
usage
()
return
0
elif
opt
in
(
'-t'
,
'--title'
):
title
=
arg
elif
opt
in
(
'-x'
,
'--xml-target-dir'
):
xml_target_dir
=
arg
manuscript_files
=
[
arg
for
arg
in
args
if
arg
.
endswith
(
'.xml'
)
\
and
'_page'
not
in
arg
]
input_files
=
[
arg
for
arg
in
args
if
arg
.
endswith
(
'.txt'
)
\
and
isfile
(
arg
)]
for
input_file
in
input_files
:
create_page_url_mapping
(
input_file
,
page_url_mapping
,
default_title
=
title
)
creator
=
ManuscriptCreator
(
xml_target_dir
=
xml_target_dir
)
creator
.
create_or_update_manuscripts
(
manuscript_files
,
page_url_mapping
)
return
0
if
__name__
==
"__main__"
:
sys
.
exit
(
main
(
sys
.
argv
[
1
:]))
Event Timeline
Log In to Comment