Page Menu
Home
c4science
Search
Configure Global Search
Log In
Files
F91333875
fix_old_data.py
No One
Temporary
Actions
Download File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Award Token
Subscribers
None
File Metadata
Details
File Info
Storage
Attached
Created
Sun, Nov 10, 02:28
Size
8 KB
Mime Type
text/x-python
Expires
Tue, Nov 12, 02:28 (2 d)
Engine
blob
Format
Raw Data
Handle
22221800
Attached To
rNIETZSCHEPYTHON nietzsche-python
fix_old_data.py
View Options
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
""" This program can be used to process words after they have been merged with faksimile data.
"""
# Copyright (C) University of Basel 2019 {{{1
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <https://www.gnu.org/licenses/> 1}}}
from
colorama
import
Fore
,
Style
from
deprecated
import
deprecated
from
functools
import
cmp_to_key
import
getopt
import
inspect
import
lxml.etree
as
ET
import
re
import
shutil
import
string
from
svgpathtools
import
svg2paths2
,
svg_to_paths
from
svgpathtools.path
import
Path
as
SVGPath
from
svgpathtools.path
import
Line
import
sys
import
tempfile
from
operator
import
attrgetter
import
os
from
os
import
listdir
,
sep
,
path
,
setpgrp
,
devnull
from
os.path
import
exists
,
isfile
,
isdir
,
dirname
,
basename
from
progress.bar
import
Bar
import
warnings
if
dirname
(
__file__
)
not
in
sys
.
path
:
sys
.
path
.
append
(
dirname
(
__file__
))
from
datatypes.box
import
Box
from
datatypes.faksimile
import
FaksimilePage
from
datatypes.manuscript
import
ArchivalManuscriptUnity
from
datatypes.mark_foreign_hands
import
MarkForeignHands
from
datatypes.page
import
Page
,
STATUS_MERGED_OK
,
STATUS_POSTMERGED_OK
from
datatypes.path
import
Path
from
datatypes.text_connection_mark
import
TextConnectionMark
from
datatypes.transkriptionField
import
TranskriptionField
from
datatypes.word
import
Word
,
update_transkription_position_ids
from
util
import
back_up
from
process_files
import
update_svgposfile_status
from
process_words_post_merging
import
update_faksimile_line_positions
sys
.
path
.
append
(
'shared_util'
)
from
myxmlwriter
import
write_pretty
,
xml_has_type
,
FILE_TYPE_SVG_WORD_POSITION
,
FILE_TYPE_XML_MANUSCRIPT
from
main_util
import
create_function_dictionary
__author__
=
"Christian Steiner"
__maintainer__
=
__author__
__copyright__
=
'University of Basel'
__email__
=
"christian.steiner@unibas.ch"
__status__
=
"Development"
__license__
=
"GPL v3"
__version__
=
"0.0.1"
UNITTESTING
=
False
def
save_page
(
page
):
"""Write page to xml file
"""
script_name
=
f
'{__file__}:{inspect.currentframe().f_back.f_code.co_name}'
write_pretty
(
xml_element_tree
=
page
.
page_tree
,
file_name
=
page
.
page_tree
.
docinfo
.
URL
,
\
script_name
=
script_name
,
file_type
=
FILE_TYPE_SVG_WORD_POSITION
)
def
page_already_changed
(
page
)
->
bool
:
"""Return whether page has alreadybeen changed by function
"""
return
len
(
\
page
.
page_tree
.
xpath
(
f
'//metadata/modifiedBy[@script="{__file__}:{inspect.currentframe().f_back.f_code.co_name}"]'
)
\
)
>
0
def
fix_faksimile_line_position
(
page
,
redo
=
False
)
->
bool
:
"""Create a faksimile line position.
"""
if
not
redo
and
page_already_changed
(
page
):
return
False
;
update_faksimile_line_positions
(
page
)
if
not
UNITTESTING
:
save_page
(
page
)
return
True
def
check_faksimile_positions
(
page
,
redo
=
False
)
->
bool
:
"""Check faksimile line position.
"""
if
len
(
page
.
page_tree
.
xpath
(
'//data-source/@file'
))
>
0
:
svg_file
=
page
.
page_tree
.
xpath
(
'//data-source/@file'
)[
0
]
svg_tree
=
ET
.
parse
(
svg_file
)
positions_are_equal_counter
=
0
page_changed
=
False
for
faksimile_page
in
FaksimilePage
.
GET_FAKSIMILEPAGES
(
svg_tree
):
if
page
.
title
==
faksimile_page
.
title
\
and
page
.
number
==
faksimile_page
.
page_number
:
#print([fp.id for fp in faksimile_page.word_positions ])
for
word
in
page
.
words
:
for
fp
in
word
.
faksimile_positions
:
rect_fps
=
[
rfp
for
rfp
in
faksimile_page
.
word_positions
if
rfp
.
id
==
fp
.
id
]
if
len
(
rect_fps
)
>
0
:
rfp
=
rect_fps
[
0
]
if
fp
.
left
!=
rfp
.
left
or
fp
.
top
!=
rfp
.
top
:
#print(f'{fp.id}: {fp.left}/{rfp.left} {fp.top}/{rfp.top}')
fp
.
left
=
rfp
.
left
fp
.
top
=
rfp
.
top
fp
.
bottom
=
fp
.
top
+
rfp
.
height
word
.
attach_word_to_tree
(
page
.
page_tree
)
page_changed
=
True
else
:
positions_are_equal_counter
+=
1
print
(
f
'{positions_are_equal_counter}/{len(page.words)} are equal'
)
if
page_changed
and
not
UNITTESTING
:
save_page
(
page
)
return
page_changed
def
fix_faksimile_positions
(
page
,
redo
=
False
)
->
bool
:
"""Set faksimile positions to absolute values.
[:return:] fixed
"""
if
not
redo
and
len
(
page
.
page_tree
.
xpath
(
f
'//metadata/modifiedBy[@script="{__file__}"]'
))
>
0
:
return
False
x_min
=
page
.
text_field
.
xmin
y_min
=
page
.
text_field
.
ymin
for
word
in
page
.
words
:
for
fp
in
word
.
faksimile_positions
:
fp
.
left
=
fp
.
left
+
x_min
fp
.
top
=
fp
.
top
+
y_min
fp
.
bottom
=
fp
.
bottom
+
y_min
word
.
attach_word_to_tree
(
page
.
page_tree
)
if
not
UNITTESTING
:
print
(
f
'writing to {page.page_tree.docinfo.URL}'
)
write_pretty
(
xml_element_tree
=
page
.
page_tree
,
file_name
=
page
.
page_tree
.
docinfo
.
URL
,
\
script_name
=
__file__
,
file_type
=
FILE_TYPE_SVG_WORD_POSITION
)
return
True
def
usage
():
"""prints information on how to use the script
"""
print
(
main
.
__doc__
)
def
main
(
argv
):
"""This program can be used to fix faksimile position ->set them to their absolute value.
svgscripts/fix_old_data.py [OPTIONS] <xmlManuscriptFile|svg_pos_file>
<xmlManuscriptFile> a xml file about a manuscript, containing information about its pages.
<svg_pos_file> a xml file about a page, containing information about svg word positions.
OPTIONS:
-h|--help show help
-c|--check-faksimile-positions check whether faksimile positions have been updated
-l|--faksimile-line-position create faksimile line positions
-p|--faksimile-positions fix old faksimile positions
-r|--redo rerun
:return: exit code (int)
"""
function_list
=
[]
function_dict
=
create_function_dictionary
([
'default'
,
'-c'
,
'--check-faksimile-positions'
],
check_faksimile_positions
)
function_dict
=
create_function_dictionary
([
'default'
,
'-l'
,
'--faksimile-line-position'
],
fix_faksimile_line_position
,
function_dictionary
=
function_dict
)
function_dict
=
create_function_dictionary
([
'-p'
,
'--faksimile-positions'
],
fix_faksimile_positions
,
function_dictionary
=
function_dict
)
redo
=
False
;
try
:
opts
,
args
=
getopt
.
getopt
(
argv
,
"hcplr"
,
[
"help"
,
"check-faksimile-positions"
,
"faksimile-positions"
,
"faksimile-line-position"
,
"redo"
])
except
getopt
.
GetoptError
:
usage
()
return
2
for
opt
,
arg
in
opts
:
if
opt
in
(
'-h'
,
'--help'
):
usage
()
return
0
elif
opt
in
(
'-r'
,
'--redo'
):
redo
=
True
;
elif
opt
in
function_dict
.
keys
():
function_list
.
append
(
function_dict
[
opt
])
if
len
(
function_list
)
==
0
:
function_list
.
append
(
function_dict
[
'default'
])
if
len
(
args
)
<
1
:
usage
()
return
2
exit_status
=
0
xml_file
=
args
[
0
]
if
isfile
(
xml_file
):
counters
=
{
f
.
__name__
:
0
for
f
in
function_list
}
for
page
in
Page
.
get_pages_from_xml_file
(
xml_file
,
status_contains
=
STATUS_MERGED_OK
):
for
current_function
in
function_list
:
if
not
UNITTESTING
:
print
(
Fore
.
CYAN
+
f
'Processing {page.title}, {page.number} with function {current_function.__name__} ...'
+
Style
.
RESET_ALL
)
back_up
(
page
,
page
.
xml_file
)
counters
[
current_function
.
__name__
]
+=
1
if
current_function
(
page
,
redo
=
redo
)
else
0
if
not
UNITTESTING
:
for
function_name
,
counter
in
counters
.
items
():
print
(
Style
.
RESET_ALL
+
f
'[{counter} pages changed by {function_name}]'
)
else
:
raise
FileNotFoundError
(
'File {} does not exist!'
.
format
(
xml_file
))
return
exit_status
if
__name__
==
"__main__"
:
sys
.
exit
(
main
(
sys
.
argv
[
1
:]))
Event Timeline
Log In to Comment