Page Menu
Home
c4science
Search
Configure Global Search
Log In
Files
F60267430
util.py
No One
Temporary
Actions
Download File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Award Token
Subscribers
None
File Metadata
Details
File Info
Storage
Attached
Created
Sun, Apr 28, 18:56
Size
14 KB
Mime Type
text/x-python
Expires
Tue, Apr 30, 18:56 (2 d)
Engine
blob
Format
Raw Data
Handle
17328516
Attached To
rNIETZSCHEPYTHON nietzsche-python
util.py
View Options
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
""" This program can be used to copy a faksimile svg file with the option of highlighting some word boxes.
"""
# Copyright (C) University of Basel 2019 {{{1
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <https://www.gnu.org/licenses/> 1}}}
from
colorama
import
Fore
,
Style
from
functools
import
cmp_to_key
import
getopt
import
inspect
import
itertools
import
lxml.etree
as
ET
import
re
import
shutil
import
signal
import
string
import
subprocess
from
svgpathtools
import
svg_to_paths
import
sys
import
tempfile
import
os
from
os
import
listdir
,
sep
,
path
,
setpgrp
,
devnull
from
os.path
import
exists
,
isfile
,
isdir
,
dirname
,
basename
,
splitext
import
warnings
import
xml.etree.ElementTree
as
XET
if
dirname
(
__file__
)
not
in
sys
.
path
:
sys
.
path
.
append
(
dirname
(
__file__
))
from
datatypes.faksimile
import
FaksimilePage
,
get_paths_inside_rect
from
datatypes.lineNumber
import
LineNumber
from
datatypes.mark_foreign_hands
import
MarkForeignHands
from
datatypes.page
import
Page
from
datatypes.transkriptionField
import
TranskriptionField
from
datatypes.word
import
update_transkription_position_ids
from
local_config
import
PDF_READER
,
SVG_EDITOR
from
myxmlwriter
import
write_pretty
,
FILE_TYPE_SVG_WORD_POSITION
,
FILE_TYPE_XML_MANUSCRIPT
from
process_files
import
update_svgposfile_status
__author__
=
"Christian Steiner"
__maintainer__
=
__author__
__copyright__
=
'University of Basel'
__email__
=
"christian.steiner@unibas.ch"
__status__
=
"Development"
__license__
=
"GPL v3"
__version__
=
"0.0.1"
UNITTESTING
=
False
HIGHLIGHT_COLOR
=
'red'
OPACITY
=
'0.5'
class
ExternalViewer
:
"""This class can be used to show files with external viewers.
"""
file_format_viewer_dict
=
{
'.pdf'
:
PDF_READER
,
'.svg'
:
SVG_EDITOR
}
@classmethod
def
show_files
(
cls
,
single_file
=
None
,
list_of_files
=
[]):
"""Opens file(s) with corresponding external viewer(s).
"""
DEVNULL
=
None
if
type
(
single_file
)
==
list
:
list_of_files
=
single_file
elif
single_file
is
not
None
:
list_of_files
.
append
(
single_file
)
if
len
(
list_of_files
)
>
1
:
DEVNULL
=
open
(
devnull
,
'wb'
)
process_list
=
[]
list_of_files
.
reverse
()
while
len
(
list_of_files
)
>
0
:
file2open
=
list_of_files
.
pop
()
viewer
=
cls
.
file_format_viewer_dict
.
get
(
splitext
(
file2open
)[
1
])
if
viewer
is
not
None
:
if
len
(
list_of_files
)
>
0
:
process_list
.
append
(
\
subprocess
.
Popen
([
viewer
,
file2open
],
stdout
=
DEVNULL
,
stderr
=
DEVNULL
,
preexec_fn
=
os
.
setsid
))
else
:
subprocess
.
run
([
viewer
,
file2open
])
for
process
in
process_list
:
os
.
killpg
(
os
.
getpgid
(
process
.
pid
),
signal
.
SIGTERM
)
if
DEVNULL
is
not
None
:
DEVNULL
.
close
()
def
copy_faksimile_svg_file
(
target_file
=
None
,
faksimile_source_file
=
None
,
faksimile_tree
=
None
,
target_directory
=
None
,
local_image_path
=
None
):
"""Copy a faksimile_svg_file to target_file.
"""
if
faksimile_source_file
is
None
and
faksimile_tree
is
not
None
:
faksimile_source_file
=
faksimile_tree
.
docinfo
.
URL
elif
faksimile_source_file
is
None
:
raise
Exception
(
'copy_faksimile_svg_file needs either a faksimile_tree (lxml.etree.ElementTree) or a faksimile_source_file'
)
if
target_file
is
not
None
and
target_directory
is
not
None
:
target_file
=
target_directory
+
sep
+
target_file
elif
target_file
is
None
and
target_directory
is
not
None
:
target_file
=
target_directory
+
sep
+
basename
(
faksimile_source_file
)
elif
target_file
is
None
:
raise
Exception
(
'copy_faksimile_svg_file needs either a target_file or a target_directory'
)
paths
,
attributes
,
svg_attributes
=
svg_to_paths
.
svg2paths
(
faksimile_source_file
,
return_svg_attributes
=
True
)
for
key
in
[
key
for
key
in
svg_attributes
.
keys
()
if
key
.
startswith
(
'xmlns:'
)
]:
try
:
XET
.
register_namespace
(
key
.
replace
(
'xmlns:'
,
''
),
svg_attributes
[
key
])
except
ValueError
:
pass
XET
.
register_namespace
(
''
,
'http://www.w3.org/2000/svg'
)
namespaces
=
{
'ns'
:
svg_attributes
[
'xmlns'
],
'xlink'
:
svg_attributes
[
'xmlns:xlink'
]
}
if
faksimile_tree
is
not
None
:
element
=
XET
.
fromstring
(
ET
.
tostring
(
faksimile_tree
))
\
if
type
(
faksimile_tree
)
==
ET
.
_ElementTree
\
else
XET
.
fromstring
(
XET
.
tostring
(
faksimile_tree
.
getroot
()))
target_tree
=
XET
.
ElementTree
(
element
)
else
:
target_tree
=
XET
.
parse
(
faksimile_source_file
)
if
local_image_path
is
not
None
\
and
len
(
target_tree
.
findall
(
'.//ns:image'
,
namespaces
=
namespaces
))
>
0
:
image_node
=
target_tree
.
findall
(
'.//ns:image'
,
namespaces
=
namespaces
)[
0
]
image_node
.
set
(
'{
%s
}href'
%
namespaces
[
'xlink'
],
local_image_path
)
target_tree
.
write
(
target_file
)
def
create_highlighted_svg_file
(
faksimile_tree
,
node_ids
,
target_file
=
None
,
target_directory
=
None
,
local_image_path
=
None
,
namespaces
=
{},
highlight_color
=
HIGHLIGHT_COLOR
,
opacity
=
OPACITY
):
"""Highlights the nodes of a faksimile_tree that are specified by the list of node_ids and writes the tree to a file.
"""
if
len
(
namespaces
)
==
0
:
namespaces
=
{
k
if
k
is
not
None
else
'ns'
:
v
for
k
,
v
in
faksimile_tree
.
getroot
()
.
nsmap
.
items
()
}
for
node
in
itertools
.
chain
(
*
[
\
faksimile_tree
.
xpath
(
'//ns:rect[@id="{0}"]|//ns:path[@id="{0}"]'
.
format
(
node_id
),
namespaces
=
namespaces
)
\
for
node_id
in
node_ids
\
]):
node
.
set
(
'fill'
,
highlight_color
)
node
.
set
(
'opacity'
,
opacity
)
node
.
set
(
'style'
,
''
)
copy_faksimile_svg_file
(
target_file
=
target_file
,
faksimile_tree
=
faksimile_tree
,
target_directory
=
target_directory
,
\
local_image_path
=
local_image_path
)
def
get_empty_node_ids
(
faksimile_tree
,
x_min
=
0.0
,
x_max
=
0.0
,
y_min
=
0.0
,
y_max
=
0.0
,
text_field_id
=
None
,
faksimile_page
=
None
,
namespaces
=
{}):
"""Returns a list of ids of rect and path nodes that do not have a title element.
"""
THRESHOLD_X
=
10
if
faksimile_page
is
not
None
:
x_min
=
faksimile_page
.
text_field
.
xmin
+
faksimile_page
.
faksimile_image
.
x
x_max
=
faksimile_page
.
text_field
.
xmax
+
faksimile_page
.
faksimile_image
.
x
-
THRESHOLD_X
y_min
=
faksimile_page
.
text_field
.
ymin
+
faksimile_page
.
faksimile_image
.
y
y_max
=
faksimile_page
.
text_field
.
ymax
+
faksimile_page
.
faksimile_image
.
y
text_field_id
=
faksimile_page
.
text_field
.
id
if
len
(
namespaces
)
==
0
:
namespaces
=
{
k
if
k
is
not
None
else
'ns'
:
v
for
k
,
v
in
faksimile_tree
.
getroot
()
.
nsmap
.
items
()
}
empyt_node_ids
=
[]
nodes_without_title
=
faksimile_tree
.
xpath
(
'//ns:rect[@x>"{0}" and @x<"{1}" and @y>"{2}" and @y<"{3}" and @id!="{4}" and not(./ns:title)]'
.
format
(
\
x_min
,
x_max
,
y_min
,
y_max
,
text_field_id
),
namespaces
=
namespaces
)
nodes_without_title
+=
get_paths_inside_rect
(
faksimile_tree
,
'//ns:path[not(./ns:title)]'
,
x_min
,
x_max
,
y_min
,
y_max
,
text_field_id
,
namespaces
=
namespaces
)
for
node_without_title
in
nodes_without_title
:
empyt_node_ids
.
append
(
node_without_title
.
get
(
'id'
))
return
empyt_node_ids
def
record_changes
(
original_svg_file
,
changed_svg_file
,
node_ids
,
namespaces
=
{}):
"""Copy changes made to changed_svg_file to original_svg_file.
"""
old_tree
=
ET
.
parse
(
original_svg_file
)
new_tree
=
ET
.
parse
(
changed_svg_file
)
if
len
(
namespaces
)
==
0
:
namespaces
=
{
k
if
k
is
not
None
else
'ns'
:
v
for
k
,
v
in
new_tree
.
getroot
()
.
nsmap
.
items
()
}
for
node_id
in
node_ids
:
new_titles
=
new_tree
.
xpath
(
'//ns:rect[@id="{0}"]/ns:title|//ns:path[@id="{0}"]/ns:title'
.
format
(
node_id
),
namespaces
=
namespaces
)
old_nodes
=
old_tree
.
xpath
(
'//ns:rect[@id="{0}"]|//ns:path[@id="{0}"]'
.
format
(
node_id
),
namespaces
=
namespaces
)
if
len
(
new_titles
)
>
0
and
len
(
old_nodes
)
>
0
:
if
old_nodes
[
0
]
.
find
(
'ns:title'
,
namespaces
=
namespaces
)
is
not
None
:
old_nodes
[
0
]
.
find
(
'ns:title'
,
namespaces
=
namespaces
)
.
text
=
new_titles
[
0
]
.
text
else
:
old_title_id_string
=
new_titles
[
0
]
.
get
(
'id'
)
old_title
=
ET
.
SubElement
(
old_nodes
[
0
],
'title'
,
attrib
=
{
'id'
:
old_title_id_string
})
old_title
.
text
=
new_titles
[
0
]
.
text
elif
len
(
old_nodes
)
>
0
:
for
old_node
in
old_nodes
:
old_node
.
getparent
()
.
remove
(
old_node
)
copy_faksimile_svg_file
(
target_file
=
original_svg_file
,
faksimile_tree
=
old_tree
)
def
record_changes_on_svg_file_to_page
(
xml_source_file
,
svg_file
,
word_ids
=
None
):
"""Copy changes made to svg_file to xml_source_file.
:return: datatypes.page.Page
"""
svg_tree
=
ET
.
parse
(
svg_file
)
namespaces
=
{
k
if
k
is
not
None
else
'ns'
:
v
for
k
,
v
in
svg_tree
.
getroot
()
.
nsmap
.
items
()
}
transkription_field
=
TranskriptionField
(
svg_file
)
page
=
Page
(
xml_source_file
=
xml_source_file
)
words
=
[
word
for
word
in
page
.
words
if
word
.
id
in
word_ids
]
\
if
word_ids
is
not
None
else
page
.
words
new_page_words
=
[]
for
word
in
words
:
word_id
=
'word_'
+
str
(
word
.
id
)
+
'_'
recorded_ids
=
[]
for
transkription_position
in
word
.
transkription_positions
:
transkription_position_id
=
word_id
+
str
(
transkription_position
.
id
)
tp_nodes
=
svg_tree
.
xpath
(
'//ns:g[@id="Transkription"]/ns:rect[@id="{0}"]'
.
format
(
transkription_position_id
),
namespaces
=
namespaces
)
if
len
(
tp_nodes
)
>
0
:
record_changes_to_transkription_position
(
tp_nodes
[
0
],
transkription_position
,
\
transkription_field
.
xmin
,
transkription_field
.
ymin
,
namespaces
=
namespaces
)
recorded_ids
.
append
(
transkription_position_id
)
extra_nodes
=
[
node
for
node
in
\
svg_tree
.
xpath
(
'//ns:g[@id="Transkription"]/ns:rect[contains(@id, "{0}")]'
.
format
(
word_id
),
namespaces
=
namespaces
)
\
if
node
.
get
(
'id'
)
not
in
recorded_ids
]
if
len
(
extra_nodes
)
>
0
:
for
extra_node
in
extra_nodes
:
old_ids
=
[
inkscape_id
.
replace
(
'#'
,
''
)
for
inkscape_id
in
\
svg_tree
.
xpath
(
'//ns:g[@id="Transkription"]/ns:rect[@id="{0}"]/@inkscape:label'
.
format
(
extra_node
.
get
(
'id'
)),
\
namespaces
=
namespaces
)
]
if
len
(
old_ids
)
>
0
and
re
.
match
(
r'word_[0-9]+_[0-9]+'
,
old_ids
[
0
]):
old_id_list
=
old_ids
[
0
]
.
split
(
'_'
)
ref_word_id
=
int
(
old_id_list
[
1
])
ref_tp_id
=
old_id_list
[
2
]
ref_words
=
[
word
for
word
in
page
.
words
if
word
.
id
==
ref_word_id
]
if
len
(
ref_words
)
>
0
:
ref_tps
=
[
tp
for
tp
in
ref_words
[
0
]
.
transkription_positions
\
if
tp
.
id
==
ref_tp_id
]
if
len
(
ref_tps
)
>
0
:
ref_words
[
0
]
.
transkription_positions
.
remove
(
ref_tps
[
0
])
record_changes_to_transkription_position
(
extra_node
,
\
ref_tps
[
0
],
transkription_field
.
xmin
,
transkription_field
.
ymin
,
namespaces
=
namespaces
)
word
.
transkription_positions
.
append
(
ref_tps
[
0
])
for
word
in
page
.
words
:
if
word
.
has_mixed_status
(
'text'
):
new_page_words
+=
[
word
for
word
in
word
.
split_according_to_status
(
'text'
)
if
word
.
text
is
not
None
and
word
.
text
!=
''
]
elif
len
(
word
.
transkription_positions
)
>
0
:
new_text
=
[
tp
.
text
for
tp
in
word
.
transkription_positions
if
tp
.
text
is
not
None
and
tp
.
text
!=
''
]
if
len
(
new_text
)
>
0
:
word
.
text
=
new_text
[
0
]
new_page_words
.
append
(
word
)
page
.
words
=
new_page_words
page
.
update_and_attach_words2tree
(
update_function_on_word
=
update_transkription_position_ids
)
page
.
unlock
()
if
not
UNITTESTING
:
write_pretty
(
xml_element_tree
=
page
.
page_tree
,
file_name
=
xml_source_file
,
\
script_name
=
__file__
+
' -> '
+
inspect
.
currentframe
()
.
f_code
.
co_name
,
file_type
=
FILE_TYPE_SVG_WORD_POSITION
)
return
page
def
record_changes_to_transkription_position
(
node
,
transkription_position
,
xmin
=
0.0
,
ymin
=
0.0
,
namespaces
=
None
):
"""Record changes made to node to transkription_position.
"""
if
namespaces
is
None
:
namespaces
=
{
k
if
k
is
not
None
else
'ns'
:
v
for
k
,
v
in
node
.
nsmap
.
items
()
}
if
bool
(
node
.
get
(
'x'
)):
transkription_position
.
left
=
float
(
node
.
get
(
'x'
))
-
xmin
if
bool
(
node
.
get
(
'y'
)):
transkription_position
.
top
=
float
(
node
.
get
(
'y'
))
-
ymin
if
bool
(
node
.
get
(
'width'
)):
transkription_position
.
width
=
float
(
node
.
get
(
'width'
))
if
bool
(
node
.
get
(
'height'
)):
transkription_position
.
height
=
float
(
node
.
get
(
'height'
))
if
len
(
node
.
xpath
(
'./ns:title/text()'
,
namespaces
=
namespaces
))
>
0
:
transkription_position
.
text
=
node
.
xpath
(
'./ns:title/text()'
,
namespaces
=
namespaces
)[
0
]
def
usage
():
"""prints information on how to use the script
"""
print
(
main
.
__doc__
)
def
main
(
argv
):
"""This program can be used to copy a faksimile svg file with the option of highlighting some word boxes.
svgscripts/copy_faksimile_svg_file.py [OPTIONS] <faksimile_svg_file> <target_dir>
<faksimile_svg_file> a svg file containing information about the word positions on the faksimile.
<target_dir> the target directory.
OPTIONS:
-h|--help: show help
:return: exit code (int)
"""
try
:
opts
,
args
=
getopt
.
getopt
(
argv
,
"h"
,
[
"help"
])
except
getopt
.
GetoptError
:
usage
()
return
2
for
opt
,
arg
in
opts
:
if
opt
in
(
'-h'
,
'--help'
):
usage
()
return
0
if
len
(
args
)
<
2
:
usage
()
return
2
exit_status
=
0
if
exists
(
args
[
0
])
and
exists
(
args
[
1
]):
faksimile_svg_file
=
args
[
0
]
if
isfile
(
args
[
0
])
else
args
[
1
]
target_dir
=
args
[
1
]
if
isdir
(
args
[
1
])
else
args
[
0
]
copy_faksimile_svg_file
(
faksimile_source_file
=
faksimile_svg_file
,
target_directory
=
target_dir
)
else
:
file_a
=
args
[
0
]
if
not
exists
(
args
[
0
])
else
args
[
1
]
raise
FileNotFoundError
(
'File {} does not exist!'
.
format
(
file_a
))
return
exit_status
if
__name__
==
"__main__"
:
sys
.
exit
(
main
(
sys
.
argv
[
1
:]))
Event Timeline
Log In to Comment