Page Menu
Home
c4science
Search
Configure Global Search
Log In
Files
F61256327
crop_rotate_thumbs.py
No One
Temporary
Actions
Download File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Award Token
Subscribers
None
File Metadata
Details
File Info
Storage
Attached
Created
Sun, May 5, 12:48
Size
9 KB
Mime Type
text/x-python
Expires
Tue, May 7, 12:48 (1 d, 23 h)
Engine
blob
Format
Raw Data
Handle
17486395
Attached To
rNIETZSCHEPYTHON nietzsche-python
crop_rotate_thumbs.py
View Options
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
""" This program can be used to crop and/or rotate thumb images.
"""
# Copyright (C) University of Basel 2019 {{{1
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <https://www.gnu.org/licenses/> 1}}}
from
colorama
import
Fore
,
Style
import
cv2
import
getopt
import
json
import
lxml.etree
as
ET
from
numpy
import
ndarray
import
shutil
import
subprocess
import
sys
import
os
import
wget
from
os
import
listdir
,
sep
,
path
,
setpgrp
,
devnull
,
mkdir
,
remove
from
os.path
import
exists
,
isfile
,
isdir
,
dirname
,
basename
from
progress.bar
import
Bar
import
warnings
from
fix_old_data
import
save_page
from
get_text_field
import
get_text_field_on_image
,
get_text_field_on_thumb
sys
.
path
.
append
(
'svgscripts'
)
from
datatypes.faksimile_image
import
FaksimileImage
from
datatypes.faksimile
import
FaksimilePage
from
datatypes.archival_manuscript
import
ArchivalManuscriptUnity
from
datatypes.page
import
Page
,
STATUS_MERGED_OK
,
STATUS_POSTMERGED_OK
from
datatypes.matrix
import
Matrix
from
util
import
back_up
,
back_up_svg_file
,
copy_faksimile_update_image_location
,
copy_faksimile_svg_file
from
process_files
import
update_svgposfile_status
from
process_words_post_merging
import
update_faksimile_line_positions
,
MERGED_DIR
sys
.
path
.
append
(
'shared_util'
)
from
myxmlwriter
import
copy_to_bak_dir
,
write_pretty
,
xml_has_type
,
FILE_TYPE_SVG_WORD_POSITION
,
FILE_TYPE_XML_MANUSCRIPT
from
main_util
import
create_function_dictionary
__author__
=
"Christian Steiner"
__maintainer__
=
__author__
__copyright__
=
'University of Basel'
__email__
=
"christian.steiner@unibas.ch"
__status__
=
"Development"
__license__
=
"GPL v3"
__version__
=
"0.0.1"
MAX_SVG_XY_THRESHOLD
=
10
BLANK_STATUS
=
'blank'
class
ImageManipulator
:
"""This class can be used in order to create svg files with textfield rects.
"""
UNITTESTING
=
False
def
__init__
(
self
,
faksimile_dir
=
None
,
fix_doubles
=
False
):
self
.
faksimile_dir
=
faksimile_dir
self
.
fix_doubles
=
fix_doubles
def
_crop_image
(
self
,
image
:
ndarray
,
image_width
:
float
,
image_height
:
float
,
x
:
float
,
y
:
float
,
height
:
float
,
width
:
float
)
->
ndarray
:
"""Crop image. Return cropped image.
"""
original_height
,
original_width
,
channel
=
image
.
shape
image_x
=
round
(
x
*
original_width
/
image_width
)
image_w
=
round
(
width
*
original_width
/
image_width
)
image_y
=
round
(
y
*
original_height
/
image_height
)
image_h
=
round
(
height
*
original_height
/
image_height
)
return
image
[
image_y
:
image_y
+
image_h
,
image_x
:
image_x
+
image_w
]
def
crop_according2svg
(
self
,
image
:
ndarray
,
thumb_svg
:
str
)
->
ndarray
:
"""Crop according to svg and return image.
"""
svg_tree
=
ET
.
parse
(
thumb_svg
)
namespaces
=
{
k
if
k
is
not
None
else
'ns'
:
v
for
k
,
v
in
svg_tree
.
getroot
()
.
nsmap
.
items
()
}
images
=
svg_tree
.
xpath
(
'//ns:image'
,
namespaces
=
namespaces
)
if
len
(
images
)
>
0
:
image_width
=
float
(
images
[
0
]
.
get
(
'width'
))
if
bool
(
images
[
0
]
.
get
(
'width'
))
else
0
image_height
=
float
(
images
[
0
]
.
get
(
'height'
))
if
bool
(
images
[
0
]
.
get
(
'height'
))
else
0
viewBox
=
svg_tree
.
getroot
()
.
get
(
'viewBox'
)
if
bool
(
svg_tree
.
getroot
()
.
get
(
'viewBox'
))
else
'0 0 0 0'
x
,
y
,
width
,
height
=
[
float
(
i
)
for
i
in
viewBox
.
split
(
' '
)
]
return
self
.
_crop_image
(
image
,
image_width
,
image_height
,
x
,
y
,
height
,
width
)
else
:
return
image
def
rotate_according2transform
(
self
,
image
,
transform
)
->
ndarray
:
"""Rotate image according to transform
"""
rotation_flags
=
[
cv2
.
ROTATE_90_CLOCKWISE
,
cv2
.
ROTATE_180
,
cv2
.
ROTATE_90_COUNTERCLOCKWISE
]
matrix
=
Matrix
(
transform
)
mindex
=
matrix
.
get90DegreeIndex
()
if
mindex
!=
-
1
:
return
cv2
.
rotate
(
image
,
rotation_flags
[
mindex
])
return
image
def
process_thumb_of_page
(
self
,
page_file
:
str
)
->
int
:
""" Create a svg file.
[return] exit_status
"""
page
=
Page
.
create_cls
(
page_file
)
faksimile_nodes
=
page
.
page_tree
.
xpath
(
'//faksimile-image'
)
if
len
(
faksimile_nodes
)
>
0
:
transform
=
faksimile_nodes
[
0
]
.
get
(
'transform'
)
thumb_image
=
self
.
faksimile_dir
+
sep
+
faksimile_nodes
[
0
]
.
get
(
'file-name'
)
.
replace
(
'.jpg'
,
'_thumb.jpg'
)
thumb_svg
=
thumb_image
.
replace
(
'.jpg'
,
'.svg'
)
if
bool
(
faksimile_nodes
[
0
]
.
get
(
'thumb'
)):
thumb_svg
=
self
.
faksimile_dir
+
sep
+
faksimile_nodes
[
0
]
.
get
(
'thumb'
)
if
thumb_svg
.
endswith
(
'jpg'
):
thumb_svg
=
thumb_image
.
replace
(
'.jpg'
,
'.svg'
)
if
self
.
fix_doubles
and
'et'
in
basename
(
thumb_svg
):
base_thumb_svg
=
basename
(
thumb_svg
)
thumb_svg
=
self
.
faksimile_dir
+
sep
+
base_thumb_svg
[:
base_thumb_svg
.
index
(
'et'
)]
+
'_thumb.svg'
if
isfile
(
thumb_svg
)
or
bool
(
transform
):
target_extension
=
''
image
=
cv2
.
imread
(
thumb_image
)
if
isfile
(
thumb_svg
):
image
=
self
.
crop_according2svg
(
image
,
thumb_svg
)
target_extension
=
'_cropped'
if
bool
(
transform
):
image
=
self
.
rotate_according2transform
(
image
,
transform
)
target_extension
=
target_extension
+
'_rotated'
if
target_extension
!=
''
:
target_file
=
thumb_image
.
replace
(
'_thumb'
,
target_extension
+
'_thumb'
)
if
'et'
in
basename
(
thumb_image
):
base_thumb_image
=
basename
(
thumb_image
)
if
page
.
faksimile_image
.
text_field
.
left
<
(
page
.
faksimile_image
.
width
-
page
.
faksimile_image
.
text_field
.
left
)
/
2
:
target_file
=
self
.
faksimile_dir
+
sep
+
base_thumb_image
[:
base_thumb_image
.
index
(
'et'
)]
+
target_extension
+
'_thumb.jpg'
else
:
target_file
=
self
.
faksimile_dir
+
sep
\
+
base_thumb_image
[:
base_thumb_image
.
index
(
'et'
)
-
2
]
\
+
base_thumb_image
[
base_thumb_image
.
index
(
'et'
)
+
2
:]
.
replace
(
'_thumb'
,
target_extension
+
'_thumb'
)
cv2
.
imwrite
(
target_file
,
image
)
faksimile_nodes
[
0
]
.
set
(
'thumb'
,
basename
(
target_file
))
save_page
(
page
,
backup
=
True
)
return
0
return
1
else
:
print
(
page_file
)
return
2
def
usage
():
"""prints information on how to use the script
"""
print
(
main
.
__doc__
)
def
main
(
argv
):
"""This program can be used to crop and/or rotate thumb images.
fixes/crop_rotate_thumbs.py [OPTIONS] <xmlManuscriptFile|svg_pos_file> <faksimile-dir>
<xmlManuscriptFile> a xml file about a manuscript, containing information about its pages.
<svg_pos_file> a xml file about a page, containing information about svg word positions.
<faksimile-dir> a directory containing faksimile images
OPTIONS:
-h|--help show help
-d|--fix-doubles fix double pages
:return: exit code (int)
"""
fix_doubles
=
False
try
:
opts
,
args
=
getopt
.
getopt
(
argv
,
"hd"
,
[
"help"
,
"fix-doubles"
])
except
getopt
.
GetoptError
:
usage
()
return
2
for
opt
,
arg
in
opts
:
if
opt
in
(
'-h'
,
'--help'
):
usage
()
return
0
elif
opt
in
(
'-d'
,
'--fix-doubles'
):
fix_doubles
=
True
exit_status
=
0
if
len
(
args
)
<
2
:
usage
()
return
2
xml_file
=
args
[
0
]
faksimile_dir
=
args
[
1
]
if
isfile
(
xml_file
)
and
isdir
(
faksimile_dir
):
counter
=
0
xpath
=
f
'//page'
manuscript_file
=
xml_file
if
len
(
xml_file
.
split
(
'_'
))
>
2
:
# svg_pos_file
manuscript_file
=
'_'
.
join
(
xml_file
.
split
(
'_'
)[
0
:
2
])
+
'.xml'
if
isfile
(
manuscript_file
):
source_tree
=
ET
.
parse
(
manuscript_file
)
xpath
=
f
'//page[contains(@output,"{xml_file}")]'
else
:
raise
FileNotFoundError
(
f
'There is no manuscript file {manuscript_file} for svg_pos_file {xml_file}!'
)
else
:
source_tree
=
ET
.
parse
(
xml_file
)
image_manipulator
=
ImageManipulator
(
faksimile_dir
,
fix_doubles
=
fix_doubles
)
title
=
basename
(
manuscript_file
)
.
replace
(
'.xml'
,
''
)
.
replace
(
'_'
,
' '
)
for
page
in
source_tree
.
xpath
(
xpath
):
if
not
ImageManipulator
.
UNITTESTING
:
number
=
page
.
get
(
'number'
)
print
(
Fore
.
CYAN
+
f
'Processing thumb image of {title}, {number} ...'
+
Style
.
RESET_ALL
)
if
image_manipulator
.
process_thumb_of_page
(
page
.
get
(
'output'
))
==
0
:
counter
+=
1
if
not
ImageManipulator
.
UNITTESTING
:
print
(
Style
.
RESET_ALL
+
f
'[{counter} images changed]'
)
else
:
if
not
isdir
(
faksimile_dir
):
raise
FileNotFoundError
(
f
'Directory {faksimile_dir} does not exist!'
)
raise
FileNotFoundError
(
'File {} does not exist!'
.
format
(
xml_file
))
return
exit_status
if
__name__
==
"__main__"
:
sys
.
exit
(
main
(
sys
.
argv
[
1
:]))
Event Timeline
Log In to Comment