Page Menu
Home
c4science
Search
Configure Global Search
Log In
Files
F67044673
create_blank_svg_files.py
No One
Temporary
Actions
Download File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Award Token
Subscribers
None
File Metadata
Details
File Info
Storage
Attached
Created
Wed, Jun 19, 14:41
Size
25 KB
Mime Type
text/x-python
Expires
Fri, Jun 21, 14:41 (2 d)
Engine
blob
Format
Raw Data
Handle
18332208
Attached To
rNIETZSCHEPYTHON nietzsche-python
create_blank_svg_files.py
View Options
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
""" This program can be used to create svg files with a rect for the text_field.
"""
# Copyright (C) University of Basel 2019 {{{1
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <https://www.gnu.org/licenses/> 1}}}
from
colorama
import
Fore
,
Style
import
csv
import
getopt
import
json
import
lxml.etree
as
ET
import
shutil
import
subprocess
import
sys
import
os
import
wget
from
os
import
listdir
,
sep
,
path
,
setpgrp
,
devnull
,
mkdir
,
remove
from
os.path
import
exists
,
isfile
,
isdir
,
dirname
,
basename
from
progress.bar
import
Bar
import
warnings
from
fix_old_data
import
save_page
from
get_text_field
import
get_text_field_on_image
,
get_text_field_on_thumb
sys
.
path
.
append
(
'svgscripts'
)
from
datatypes.faksimile_image
import
FaksimileImage
from
datatypes.faksimile
import
FaksimilePage
from
datatypes.archival_manuscript
import
ArchivalManuscriptUnity
from
datatypes.page
import
Page
,
STATUS_MERGED_OK
,
STATUS_POSTMERGED_OK
from
datatypes.matrix
import
Matrix
from
util
import
back_up
,
back_up_svg_file
,
copy_faksimile_update_image_location
,
copy_faksimile_svg_file
from
process_files
import
update_svgposfile_status
from
process_words_post_merging
import
update_faksimile_line_positions
,
MERGED_DIR
sys
.
path
.
append
(
'shared_util'
)
from
myxmlwriter
import
copy_to_bak_dir
,
write_pretty
,
xml_has_type
,
FILE_TYPE_SVG_WORD_POSITION
,
FILE_TYPE_XML_MANUSCRIPT
from
main_util
import
create_function_dictionary
__author__
=
"Christian Steiner"
__maintainer__
=
__author__
__copyright__
=
'University of Basel'
__email__
=
"christian.steiner@unibas.ch"
__status__
=
"Development"
__license__
=
"GPL v3"
__version__
=
"0.0.1"
MAX_SVG_XY_THRESHOLD
=
10
BLANK_STATUS
=
'blank'
class
SPARQLQuery
:
HEADER_OPTION
=
'--header'
HEADER_VALUE
=
'Accept: application/sparql-results+json,*/*;q=0.9'
ENDPOINT
=
'https://nietzsche.fuseki.services.dasch.swiss/nietzsche'
QUERY
=
'query=PREFIX+tln%3A+%3Chttp%3A
%2F%2F
www.nie.org
%2F
ontology
%2F
nietzsche
%23%
3E%0ASELECT+(AVG(
%3F
left)+as+
%3F
avg_left)+(AVG(
%3F
top)+as+
%3F
avg_top)+(AVG(
%3F
width)+as+
%3F
avg_width)+(AVG(
%3F
height)+as+
%3F
avg_height)+WHERE+%7B%0A+
%3F
image+a+tln%3AFaksimileImage%3B+tln%3AhasTextField+
%3F
textfield.%0A+
%3F
textfield+tln%3AhasLeft+
%3F
left%3B+tln%3AhasTop+
%3F
top%3B+tln%3AhasWidth+
%3F
width%3B+tln%3AhasHeight+
%3F
height.%7D+'
REQUEST_OPTION
=
'--request'
REQUEST_VALUE
=
'POST'
class
OldSVGFileCreator
:
"""This class can be used in order to create svg files with textfield rects.
"""
UNITTESTING
=
False
RESPONSE
=
'response.json'
def
__init__
(
self
,
title
,
faksimile_dir
,
endpoint
=
SPARQLQuery
.
ENDPOINT
,
target_dir
=
'./tmp'
):
self
.
avg_left
=
-
1
self
.
avg_top
=
-
1
self
.
avg_height
=
-
1
self
.
avg_width
=
-
1
self
.
endpoint
=
endpoint
self
.
faksimile_dir
=
faksimile_dir
self
.
target_dir
=
target_dir
self
.
title
=
title
self
.
curl
=
self
.
_get_ext_program_path
(
'curl'
)
self
.
inkscape
=
self
.
_get_ext_program_path
(
'inkscape'
)
self
.
namespaces
=
None
self
.
_init_averages
()
def
_init_averages
(
self
):
"""Initialize average dimension of textfield based on the data from the endpoint
"""
if
not
isfile
(
self
.
RESPONSE
):
subprocess
.
run
([
self
.
curl
,
self
.
endpoint
,
\
SPARQLQuery
.
REQUEST_OPTION
,
SPARQLQuery
.
REQUEST_VALUE
,
'--data'
,
SPARQLQuery
.
QUERY
,
SPARQLQuery
.
HEADER_OPTION
,
SPARQLQuery
.
HEADER_VALUE
,
'-o'
,
self
.
RESPONSE
],
check
=
True
)
with
open
(
self
.
RESPONSE
)
as
json_file
:
data
=
json
.
load
(
json_file
)
keys
=
data
[
'head'
][
'vars'
]
for
key
in
keys
:
for
item
in
data
[
'results'
][
'bindings'
]:
self
.
__dict__
[
key
]
=
float
(
item
[
key
][
'value'
])
def
_get_ext_program_path
(
self
,
program_name
)
->
str
:
"""Return path to external program
"""
program_path
=
None
error_msg
=
f
'External command "{program_name}" not found!
\n
Please install "{program_name}", check the output of "which {program_name}" and retry.'
try
:
cp
=
subprocess
.
run
([
"which"
,
program_name
],
stdout
=
subprocess
.
PIPE
,
check
=
True
)
program_path
=
cp
.
stdout
.
decode
()
.
strip
()
if
not
bool
(
program_path
)
or
not
isfile
(
program_path
):
raise
FileNotFoundError
(
error_msg
)
except
subprocess
.
CalledProcessError
:
print
(
error_msg
)
raise
return
program_path
def
create_svg_file
(
self
,
page
:
ET
.
Element
)
->
int
:
""" Create a svg file.
[return] exit_status
"""
number
=
page
.
get
(
'number'
)
page_file
=
page
.
get
(
'output'
)
faksimile_file
=
self
.
faksimile_dir
+
sep
+
page
.
get
(
'alias'
)
+
'.jpg'
\
if
page
.
get
(
'alias'
)
is
not
None
\
else
None
page_id
=
self
.
title
.
replace
(
' '
,
'_'
)
+
'_'
+
number
if
bool
(
page
.
get
(
'alias'
))
and
not
isfile
(
faksimile_file
):
wget
.
download
(
FaksimileImage
.
NIETZSCHE_SOURCES_URL
+
page
.
get
(
'alias'
),
out
=
faksimile_file
)
if
bool
(
page
.
get
(
'alias'
))
and
isfile
(
faksimile_file
)
and
page_file
is
not
None
and
isfile
(
page_file
):
target_file
=
basename
(
page_file
)
.
replace
(
'.xml'
,
'.svg'
)
if
not
isfile
(
self
.
target_dir
+
sep
+
target_file
):
prog_list
=
[
self
.
inkscape
,
'-z'
,
'-l'
,
target_file
,
faksimile_file
]
subprocess
.
run
(
prog_list
,
check
=
True
)
svg_tree
=
ET
.
parse
(
target_file
)
namespaces
=
{
k
if
k
is
not
None
else
'ns'
:
v
for
k
,
v
in
svg_tree
.
getroot
()
.
nsmap
.
items
()
}
images
=
svg_tree
.
xpath
(
'//ns:image'
,
namespaces
=
namespaces
)
if
len
(
images
)
>
0
:
image_file
=
dirname
(
target_file
)
+
sep
+
images
[
0
]
.
get
(
'{
%s
}href'
%
namespaces
[
'xlink'
])
image_width
=
float
(
images
[
0
]
.
get
(
'width'
))
if
bool
(
images
[
0
]
.
get
(
'width'
))
else
0
image_height
=
float
(
images
[
0
]
.
get
(
'height'
))
if
bool
(
images
[
0
]
.
get
(
'height'
))
else
0
text_filed
=
get_text_field_on_image
(
image_file
,
image_width
,
image_height
,
id
=
page_id
)
text_filed
.
attach_as_rect
(
svg_tree
.
getroot
())
copy_faksimile_update_image_location
(
faksimile_tree
=
svg_tree
,
target_directory
=
self
.
target_dir
)
remove
(
target_file
)
else
:
print
(
f
'There has been an error: could not find an image in {target_file}!'
)
return
2
return
0
else
:
print
(
faksimile_file
,
page_file
)
return
2
def
update_textfield_of_svg_file
(
self
,
svg_file
:
str
)
->
int
:
""" Update the textfield of the svg file by using image analysis.
[return] exit_status
"""
svg_tree
=
ET
.
parse
(
target_file
)
namespaces
=
{
k
if
k
is
not
None
else
'ns'
:
v
for
k
,
v
in
svg_tree
.
getroot
()
.
nsmap
.
items
()
}
rects
=
svg_tree
.
xpath
(
'//ns:rect[not(contains(@id, "rect"))]'
,
namespaces
=
namespaces
)
images
=
svg_tree
.
xpath
(
'//ns:image'
,
namespaces
=
namespaces
)
if
len
(
rects
)
>
0
and
len
(
images
)
>
0
:
image_file
=
dirname
(
svg_file
)
+
sep
+
images
[
0
]
.
get
(
'{
%s
}href'
%
namespaces
[
'xlink'
])
image_width
=
float
(
images
[
0
]
.
get
(
'width'
))
if
bool
(
images
[
0
]
.
get
(
'width'
))
else
0
image_height
=
float
(
images
[
0
]
.
get
(
'height'
))
if
bool
(
images
[
0
]
.
get
(
'height'
))
else
0
text_filed
=
get_text_field_on_image
(
image_file
,
image_width
,
image_height
)
text_filed
.
attach_as_rect
(
rects
[
0
])
copy_faksimile_svg_file
(
target_file
=
svg_file
,
faksimile_tree
=
svg_tree
)
return
0
else
:
print
(
f
'There has been an error: could not find a rect and an image in {svg_file}!'
)
return
2
return
2
class
SVGFileCreator
:
"""This class can be used in order to create svg files with textfield rects.
"""
UNITTESTING
=
False
def
__init__
(
self
,
title
=
None
,
faksimile_dir
=
None
,
target_dir
=
'./tmp'
):
self
.
faksimile_dir
=
faksimile_dir
self
.
target_dir
=
target_dir
self
.
title
=
title
self
.
inkscape
=
self
.
_get_ext_program_path
(
'inkscape'
)
self
.
namespaces
=
None
def
_get_ext_program_path
(
self
,
program_name
)
->
str
:
"""Return path to external program
"""
program_path
=
None
error_msg
=
f
'External command "{program_name}" not found!
\n
Please install "{program_name}", check the output of "which {program_name}" and retry.'
try
:
cp
=
subprocess
.
run
([
"which"
,
program_name
],
stdout
=
subprocess
.
PIPE
,
check
=
True
)
program_path
=
cp
.
stdout
.
decode
()
.
strip
()
if
not
bool
(
program_path
)
or
not
isfile
(
program_path
):
raise
FileNotFoundError
(
error_msg
)
except
subprocess
.
CalledProcessError
:
print
(
error_msg
)
raise
return
program_path
def
create_svg_file
(
self
,
page
:
ET
.
Element
)
->
int
:
""" Create a svg file.
[return] exit_status
"""
number
=
page
.
get
(
'number'
)
page_file
=
page
.
get
(
'output'
)
faksimile_file
=
self
.
faksimile_dir
+
sep
+
page
.
get
(
'alias'
)
+
'.jpg'
\
if
page
.
get
(
'alias'
)
is
not
None
\
else
None
page_id
=
self
.
title
.
replace
(
' '
,
'_'
)
+
'_'
+
number
if
bool
(
page
.
get
(
'alias'
))
and
not
isfile
(
faksimile_file
):
wget
.
download
(
FaksimileImage
.
NIETZSCHE_SOURCES_URL
+
page
.
get
(
'alias'
),
out
=
faksimile_file
)
if
bool
(
page
.
get
(
'alias'
))
and
isfile
(
faksimile_file
)
and
page_file
is
not
None
and
isfile
(
page_file
):
target_file
=
basename
(
page_file
)
.
replace
(
'.xml'
,
'.svg'
)
return
self
.
_create_svg_file
(
target_file
,
faksimile_file
,
page_id
)
else
:
print
(
faksimile_file
,
page_file
)
return
2
def
_create_svg_file
(
self
,
target_file
,
faksimile_file
,
page_id
=
0
)
->
int
:
""" Create a svg file.
[return] exit_status
"""
if
not
isfile
(
self
.
target_dir
+
sep
+
target_file
):
prog_list
=
[
self
.
inkscape
,
'-z'
,
'-l'
,
target_file
,
faksimile_file
]
subprocess
.
run
(
prog_list
,
check
=
True
)
svg_tree
=
ET
.
parse
(
target_file
)
namespaces
=
{
k
if
k
is
not
None
else
'ns'
:
v
for
k
,
v
in
svg_tree
.
getroot
()
.
nsmap
.
items
()
}
images
=
svg_tree
.
xpath
(
'//ns:image'
,
namespaces
=
namespaces
)
if
len
(
images
)
>
0
:
image_file
=
dirname
(
target_file
)
+
sep
+
images
[
0
]
.
get
(
'{
%s
}href'
%
namespaces
[
'xlink'
])
.
replace
(
'file://'
,
''
)
image_width
=
float
(
images
[
0
]
.
get
(
'width'
))
if
bool
(
images
[
0
]
.
get
(
'width'
))
else
0
image_height
=
float
(
images
[
0
]
.
get
(
'height'
))
if
bool
(
images
[
0
]
.
get
(
'height'
))
else
0
print
(
image_file
,
image_width
,
image_height
)
text_field
=
get_text_field_on_image
(
image_file
,
image_width
,
image_height
,
id
=
page_id
)
text_field
.
attach_as_rect
(
svg_tree
.
getroot
())
copy_faksimile_update_image_location
(
faksimile_tree
=
svg_tree
,
target_directory
=
self
.
target_dir
)
remove
(
target_file
)
else
:
print
(
f
'There has been an error: could not find an image in {target_file}!'
)
return
2
return
0
def
create_rotation_svg_file
(
self
,
page_file
:
str
)
->
int
:
""" Create a svg file.
[return] exit_status
"""
page_tree
=
ET
.
parse
(
page_file
)
transform
=
page_tree
.
xpath
(
'//faksimile-image/@transform'
)[
0
]
thumb
=
page_tree
.
xpath
(
'//faksimile-image/@file-name'
)[
0
]
.
replace
(
'.jpg'
,
'_thumb.svg'
)
if
len
(
page_tree
.
xpath
(
'//faksimile-image/@thumb'
))
>
0
:
thumb
=
page_tree
.
xpath
(
'//faksimile-image/@thumb'
)[
0
]
thumb_file
=
self
.
faksimile_dir
+
sep
+
thumb
faksimile_file
=
thumb_file
.
replace
(
'.svg'
,
'.jpg'
)
if
not
isfile
(
thumb_file
)
and
isfile
(
faksimile_file
):
faksimile_file
=
thumb_file
.
replace
(
'.svg'
,
'.jpg'
)
prog_list
=
[
self
.
inkscape
,
'-z'
,
'-l'
,
target_file
,
faksimile_file
]
subprocess
.
run
(
prog_list
,
check
=
True
)
elif
not
isfile
(
thumb_file
)
and
not
isfile
(
faksimile_file
):
print
(
f
'There has been an error: could not find the faksimile_file {faksimile_file}!'
)
return
2
if
isfile
(
thumb_file
):
svg_tree
=
ET
.
parse
(
thumb_file
)
namespaces
=
{
k
if
k
is
not
None
else
'ns'
:
v
for
k
,
v
in
svg_tree
.
getroot
()
.
nsmap
.
items
()
}
images
=
svg_tree
.
xpath
(
'//ns:image'
,
namespaces
=
namespaces
)
if
len
(
images
)
>
0
:
matrix
=
Matrix
(
transform
)
if
matrix
.
matrix
[
matrix
.
B
]
!=
0.0
and
matrix
.
matrix
[
matrix
.
C
]
!=
0.0
:
width
=
float
(
svg_tree
.
getroot
()
.
get
(
'width'
))
height
=
float
(
svg_tree
.
getroot
()
.
get
(
'height'
))
matrix
.
updateOffset
(
height
,
width
)
images
[
0
]
.
set
(
'{
%s
}href'
%
namespaces
[
'xlink'
],
basename
(
faksimile_file
))
svg_tree
.
getroot
()
.
set
(
'transform'
,
matrix
.
toString
())
copy_faksimile_svg_file
(
target_file
=
thumb_file
,
faksimile_tree
=
svg_tree
)
else
:
print
(
f
'There has been an error: could not find an image in {thumb_file}!'
)
return
2
return
0
else
:
print
(
thumb_file
,
page_file
)
return
2
def
create_svg_thumb_file
(
self
,
faksimile_file
:
str
)
->
int
:
""" Create a svg thumb file.
[return] exit_status
"""
if
isfile
(
faksimile_file
):
target_file
=
faksimile_file
.
replace
(
'.jpg'
,
'.svg'
)
if
not
isfile
(
target_file
):
prog_list
=
[
self
.
inkscape
,
'-z'
,
'-l'
,
target_file
,
faksimile_file
]
subprocess
.
run
(
prog_list
,
check
=
True
)
svg_tree
=
ET
.
parse
(
target_file
)
namespaces
=
{
k
if
k
is
not
None
else
'ns'
:
v
for
k
,
v
in
svg_tree
.
getroot
()
.
nsmap
.
items
()
}
images
=
svg_tree
.
xpath
(
'//ns:image'
,
namespaces
=
namespaces
)
if
len
(
images
)
>
0
:
text_field
,
width
,
height
=
get_text_field_on_thumb
(
faksimile_file
)
images
[
0
]
.
set
(
'width'
,
str
(
width
))
images
[
0
]
.
set
(
'height'
,
str
(
height
))
images
[
0
]
.
set
(
'{
%s
}href'
%
namespaces
[
'xlink'
],
basename
(
faksimile_file
))
svg_tree
.
getroot
()
.
set
(
'width'
,
str
(
text_field
.
width
))
svg_tree
.
getroot
()
.
set
(
'height'
,
str
(
text_field
.
height
))
svg_tree
.
getroot
()
.
set
(
'viewBox'
,
f
'{text_field.left} {text_field.top} {text_field.width} {text_field.height}'
)
copy_faksimile_svg_file
(
target_file
=
target_file
,
faksimile_tree
=
svg_tree
)
else
:
print
(
f
'There has been an error: could not find an image in {target_file}!'
)
return
2
return
0
else
:
print
(
faksimile_file
,
page_file
)
return
2
@staticmethod
def
UPDATE_TEXTFIELD_OF_SVG_FILE
(
svg_file
:
str
)
->
int
:
""" Update the textfield of the svg file by using image analysis.
[return] exit_status
"""
svg_tree
=
ET
.
parse
(
svg_file
)
namespaces
=
{
k
if
k
is
not
None
else
'ns'
:
v
for
k
,
v
in
svg_tree
.
getroot
()
.
nsmap
.
items
()
}
rects
=
svg_tree
.
xpath
(
'//ns:rect[not(contains(@id, "rect"))]'
,
namespaces
=
namespaces
)
images
=
svg_tree
.
xpath
(
'//ns:image'
,
namespaces
=
namespaces
)
if
len
(
rects
)
>
0
and
len
(
images
)
>
0
:
image_file
=
dirname
(
svg_file
)
+
sep
+
images
[
0
]
.
get
(
'{
%s
}href'
%
namespaces
[
'xlink'
])
image_width
=
float
(
images
[
0
]
.
get
(
'width'
))
if
bool
(
images
[
0
]
.
get
(
'width'
))
else
0
image_height
=
float
(
images
[
0
]
.
get
(
'height'
))
if
bool
(
images
[
0
]
.
get
(
'height'
))
else
0
text_field
=
get_text_field_on_image
(
image_file
,
image_width
,
image_height
)
text_field
.
update_rect
(
rects
[
0
])
copy_faksimile_svg_file
(
target_file
=
svg_file
,
faksimile_tree
=
svg_tree
)
return
0
else
:
print
(
f
'There has been an error: could not find a rect and an image in {svg_file}!'
)
return
2
return
2
class
CSVSVGFileCreator
(
SVGFileCreator
):
"""This class can be used in order to create svg files with textfield rects from a csv file.
"""
FOLIO
=
"Blatt-ID"
LABEL
=
"Label"
ALIAS
=
"Alias"
URL
=
"Nietzschesource-URL"
CONTENT
=
"Inhalt"
def
__init__
(
self
,
title
=
None
,
faksimile_dir
=
None
,
target_dir
=
'./tmp'
):
super
(
CSVSVGFileCreator
,
self
)
.
__init__
(
title
=
title
,
faksimile_dir
=
faksimile_dir
,
target_dir
=
target_dir
)
def
create_svg_file_from_csv_input
(
self
,
faksimile_file
,
page_id
)
->
int
:
"""Create a svg file from csv input.
"""
target_file
=
basename
(
faksimile_file
)
.
replace
(
'.jpg'
,
'.svg'
)
return
self
.
_create_svg_file
(
target_file
,
faksimile_file
,
page_id
)
def
process_update
(
args
)
->
int
:
""" Process option update
"""
if
len
(
args
)
<
0
:
usage
()
return
2
svg_dir
=
args
[
0
]
if
not
isdir
(
svg_dir
):
raise
FileNotFoundError
(
f
'Directory {svg_dir} does not exist!'
)
counter
=
0
for
svg_file
in
[
svg_dir
+
sep
+
svg_file
for
svg_file
in
listdir
(
svg_dir
)
if
isfile
(
svg_dir
+
sep
+
svg_file
)
and
svg_file
.
endswith
(
'.svg'
)
]:
if
not
SVGFileCreator
.
UNITTESTING
:
print
(
Fore
.
CYAN
+
f
'Updating svg file {svg_file} ...'
+
Style
.
RESET_ALL
)
copy_to_bak_dir
(
svg_file
)
if
SVGFileCreator
.
UPDATE_TEXTFIELD_OF_SVG_FILE
(
svg_file
)
==
0
:
counter
+=
1
if
not
SVGFileCreator
.
UNITTESTING
:
print
(
Style
.
RESET_ALL
+
f
'[{counter} pages created]'
)
return
0
def
process_rotate
(
args
)
->
int
:
""" Process option rotate
"""
if
len
(
args
)
<
2
:
usage
()
return
2
xml_file
=
args
[
0
]
faksimile_dir
=
args
[
1
]
if
not
isdir
(
faksimile_dir
):
raise
FileNotFoundError
(
f
'Directory {faksimile_dir} does not exist!'
)
xpath
=
f
'//page'
source_tree
=
ET
.
parse
(
xml_file
)
if
len
(
xml_file
.
split
(
'_'
))
>
2
:
# svg_pos_file
manuscript_file
=
'_'
.
join
(
xml_file
.
split
(
'_'
)[
0
:
2
])
+
'.xml'
if
isfile
(
manuscript_file
):
source_tree
=
ET
.
parse
(
manuscript_file
)
xpath
=
f
'//page[contains(@output,"{xml_file}")]'
else
:
raise
FileNotFoundError
(
f
'There is no manuscript file {manuscript_file} for svg_pos_file {xml_file}!'
)
counter
=
0
svg_creator
=
SVGFileCreator
(
''
,
faksimile_dir
)
for
page
in
[
page
for
page
in
source_tree
.
xpath
(
xpath
)
if
len
(
ET
.
parse
(
page
.
get
(
'output'
))
.
xpath
(
'//faksimile-image/@transform'
))
>
0
]:
if
not
SVGFileCreator
.
UNITTESTING
:
number
=
page
.
get
(
'number'
)
print
(
Fore
.
CYAN
+
f
'Rotating svg file for page {number} ...'
+
Style
.
RESET_ALL
)
if
svg_creator
.
create_rotation_svg_file
(
page
)
==
0
:
counter
+=
1
if
not
SVGFileCreator
.
UNITTESTING
:
print
(
Style
.
RESET_ALL
+
f
'[{counter} pages created]'
)
return
0
def
process_thumb
(
args
)
->
int
:
""" Process option thumb
"""
if
len
(
args
)
==
0
:
usage
()
return
2
faksimile_dir
=
args
[
0
]
title
=
args
[
1
]
\
if
len
(
args
)
>
1
\
else
''
svg_creator
=
SVGFileCreator
(
''
,
faksimile_dir
)
print
(
faksimile_dir
,
title
)
if
not
isdir
(
faksimile_dir
):
raise
FileNotFoundError
(
f
'Directory {faksimile_dir} does not exist!'
)
counter
=
0
for
faksimile_file
in
[
faksimile_dir
+
sep
+
faksimile_file
for
faksimile_file
in
listdir
(
faksimile_dir
)
\
if
isfile
(
faksimile_dir
+
sep
+
faksimile_file
)
\
and
basename
(
faksimile_file
)
.
startswith
(
title
)
\
and
faksimile_file
.
endswith
(
'_thumb.jpg'
)
]:
if
not
SVGFileCreator
.
UNITTESTING
:
print
(
Fore
.
CYAN
+
f
'Creating a svg for faksimile file {faksimile_file} ...'
+
Style
.
RESET_ALL
)
if
svg_creator
.
create_svg_thumb_file
(
faksimile_file
)
==
0
:
counter
+=
1
if
not
SVGFileCreator
.
UNITTESTING
:
print
(
Style
.
RESET_ALL
+
f
'[{counter} thumb svg files created]'
)
return
0
def
process_csv
(
csv_file_name
,
faksimile_dir
,
target_dir
,
downloadOnly
=
False
)
->
int
:
""" Default process
"""
title
=
basename
(
csv_file_name
)
.
replace
(
'.csv'
,
''
)
.
replace
(
'_'
,
' '
)
file_creator
=
CSVSVGFileCreator
(
title
,
faksimile_dir
,
target_dir
)
exit_status
=
0
with
open
(
csv_file_name
,
newline
=
''
)
as
csvfile
:
reader
=
csv
.
DictReader
(
csvfile
)
for
row
in
reader
:
faksimile_name
=
row
[
CSVSVGFileCreator
.
LABEL
]
if
'_thumb'
not
in
faksimile_name
:
faksimile_url
=
row
[
CSVSVGFileCreator
.
URL
]
page_id
=
row
[
CSVSVGFileCreator
.
ALIAS
]
.
replace
(
','
,
''
)
.
replace
(
' '
,
'_'
)
faksimile_file
=
faksimile_dir
+
sep
+
faksimile_name
if
not
isfile
(
faksimile_file
):
wget
.
download
(
faksimile_url
,
out
=
faksimile_file
)
if
not
downloadOnly
:
if
isfile
(
faksimile_file
):
print
(
f
'processing {faksimile_file} ...'
)
if
file_creator
.
create_svg_file_from_csv_input
(
faksimile_file
,
page_id
)
>
0
:
exit_status
=
2
else
:
exit_status
=
2
print
(
f
'There has been an error on downloading {faksimile_url}!'
,
e
)
return
exit_status
def
process_default
(
args
)
->
int
:
""" Default process
"""
if
len
(
args
)
<
3
:
usage
()
return
2
faksimile_dir
=
args
[
1
]
target_dir
=
args
[
2
]
not
isdir
(
target_dir
)
and
mkdir
(
target_dir
)
if
args
[
0
]
.
endswith
(
'.csv'
):
return
process_csv
(
args
[
0
],
faksimile_dir
,
target_dir
)
xml_file
=
args
[
0
]
if
isfile
(
xml_file
)
and
isdir
(
faksimile_dir
):
counter
=
0
xpath
=
f
'//page[contains(@status, "{BLANK_STATUS}")]'
if
len
(
xml_file
.
split
(
'_'
))
>
2
:
# svg_pos_file
manuscript_file
=
'_'
.
join
(
xml_file
.
split
(
'_'
)[
0
:
2
])
+
'.xml'
if
isfile
(
manuscript_file
):
source_tree
=
ET
.
parse
(
manuscript_file
)
xpath
=
f
'//page[contains(@output,"{xml_file}")]'
else
:
raise
FileNotFoundError
(
f
'There is no manuscript file {manuscript_file} for svg_pos_file {xml_file}!'
)
else
:
source_tree
=
ET
.
parse
(
xml_file
)
title
=
source_tree
.
getroot
()
.
get
(
'title'
)
svg_creator
=
SVGFileCreator
(
title
,
faksimile_dir
,
target_dir
=
target_dir
)
for
page
in
source_tree
.
xpath
(
xpath
):
if
not
SVGFileCreator
.
UNITTESTING
:
number
=
page
.
get
(
'number'
)
print
(
Fore
.
CYAN
+
f
'Creating a svg file for {title}, {number} ...'
+
Style
.
RESET_ALL
)
if
svg_creator
.
create_svg_file
(
page
)
==
0
:
counter
+=
1
if
not
SVGFileCreator
.
UNITTESTING
:
print
(
Style
.
RESET_ALL
+
f
'[{counter} pages created]'
)
else
:
if
not
isdir
(
faksimile_dir
):
raise
FileNotFoundError
(
f
'Directory {faksimile_dir} does not exist!'
)
raise
FileNotFoundError
(
'File {} does not exist!'
.
format
(
xml_file
))
return
0
def
usage
():
"""prints information on how to use the script
"""
print
(
main
.
__doc__
)
def
main
(
argv
):
"""This program can be used to create svg files with a rect for the text_field.
fixes/create_blank_svg_files.py [OPTIONS] <xmlManuscriptFile|svg_pos_file|csv_file> <faksimile-dir> <target-dir>
fixes/create_blank_svg_files.py -r|--rotate-thumb <xmlManuscriptFile|svg_pos_file> <faksimile-dir>
fixes/create_blank_svg_files.py -u|--update <svg-dir>
fixes/create_blank_svg_files.py -t|--thumb <faksimile-dir> [<title>]
<xmlManuscriptFile> a xml file about a manuscript, containing information about its pages.
<svg_pos_file> a xml file about a page, containing information about svg word positions.
<csv_file> a csv file with faksimile information.
<faksimile-dir> a directory containing faksimile images
<target-dir> the directory where the files should be saved to
OPTIONS:
-h|--help show help
-r|--rotate-thumb use image analysis in order to create a svg file for the thumb faksimile files that need rotation.
-u|--update update svg_files: use image analysis in order to update the textfield of the svg_files
-t|--thumb use image analysis in order to create a svg file for the thumb faksimile files that need cropping.
:return: exit code (int)
"""
update
=
False
thumb
=
False
rotate
=
True
try
:
opts
,
args
=
getopt
.
getopt
(
argv
,
"hrut"
,
[
"help"
,
"rotate-thumb"
,
"update"
,
"thumb"
])
except
getopt
.
GetoptError
:
usage
()
return
2
for
opt
,
arg
in
opts
:
if
opt
in
(
'-h'
,
'--help'
):
usage
()
return
0
elif
opt
in
(
'-r'
,
'--rotate-thumb'
):
rotate
=
True
elif
opt
in
(
'-u'
,
'--update'
):
update
=
True
elif
opt
in
(
'-t'
,
'--thumb'
):
thumb
=
True
if
update
:
return
process_update
(
args
)
elif
rotate
:
return
process_rotate
(
args
)
elif
thumb
:
return
process_thumb
(
args
)
return
process_default
(
args
)
if
__name__
==
"__main__"
:
sys
.
exit
(
main
(
sys
.
argv
[
1
:]))
Event Timeline
Log In to Comment