Page Menu
Home
c4science
Search
Configure Global Search
Log In
Files
F121706362
get_text_field.py
No One
Temporary
Actions
Download File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Award Token
Subscribers
None
File Metadata
Details
File Info
Storage
Attached
Created
Sun, Jul 13, 08:08
Size
6 KB
Mime Type
text/x-python
Expires
Tue, Jul 15, 08:08 (2 d)
Engine
blob
Format
Raw Data
Handle
27377631
Attached To
rNIETZSCHEPYTHON nietzsche-python
get_text_field.py
View Options
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
""" This program can be used to create svg files with a rect for the text_field.
"""
# Copyright (C) University of Basel 2019 {{{1
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <https://www.gnu.org/licenses/> 1}}}
from
colorama
import
Fore
,
Style
import
getopt
import
json
import
lxml.etree
as
ET
import
shutil
import
subprocess
import
sys
import
os
import
wget
import
cv2
import
matplotlib.pyplot
as
plt
import
numpy
as
np
from
os
import
listdir
,
sep
,
path
,
setpgrp
,
devnull
,
mkdir
,
remove
from
os.path
import
exists
,
isfile
,
isdir
,
dirname
,
basename
from
progress.bar
import
Bar
import
warnings
if
dirname
(
__file__
)
not
in
sys
.
path
:
sys
.
path
.
append
(
dirname
(
__file__
))
from
fix_old_data
import
save_page
sys
.
path
.
append
(
'svgscripts'
)
from
datatypes.faksimile_image
import
FaksimileImage
from
datatypes.faksimile
import
FaksimilePage
from
datatypes.archival_manuscript
import
ArchivalManuscriptUnity
from
datatypes.page
import
Page
,
STATUS_MERGED_OK
,
STATUS_POSTMERGED_OK
from
datatypes.text_field
import
TextField
from
util
import
back_up
,
back_up_svg_file
,
copy_faksimile_update_image_location
,
copy_faksimile_svg_file
from
process_files
import
update_svgposfile_status
from
process_words_post_merging
import
update_faksimile_line_positions
,
MERGED_DIR
sys
.
path
.
append
(
'shared_util'
)
from
myxmlwriter
import
write_pretty
,
xml_has_type
,
FILE_TYPE_SVG_WORD_POSITION
,
FILE_TYPE_XML_MANUSCRIPT
from
main_util
import
create_function_dictionary
__author__
=
"Christian Steiner"
__maintainer__
=
__author__
__copyright__
=
'University of Basel'
__email__
=
"christian.steiner@unibas.ch"
__status__
=
"Development"
__license__
=
"GPL v3"
__version__
=
"0.0.1"
THRESHOLD
=
60
GRAY_THRESHOLD
=
200
BLANK_STATUS
=
'blank'
def
get_text_field_on_thumb
(
image_file
:
str
,
id
=
0
)
->
(
TextField
,
float
,
float
):
"""Find the area of of the faksimile image where a manuscript page is displayed
and return it as a TextField.
"""
image
=
cv2
.
imread
(
image_file
)
blur
=
cv2
.
GaussianBlur
(
image
,
(
3
,
3
),
0
)
#blur = cv2.bilateralFilter(image,9,75,75)
gray
=
cv2
.
cvtColor
(
blur
,
cv2
.
COLOR_BGR2GRAY
)
thresh
=
cv2
.
threshold
(
gray
,
220
,
255
,
cv2
.
THRESH_BINARY_INV
)[
1
]
original_height
,
original_width
,
channel
=
image
.
shape
top
,
bottom
=
get_start_and_end_index
(
thresh
)
height
=
bottom
-
top
left
,
right
=
get_start_and_end_index
(
thresh
.
T
)
width
=
right
-
left
return
(
TextField
(
id
=
id
,
x
=
left
,
y
=
top
,
width
=
width
,
height
=
height
),
original_width
,
original_height
)
def
lessThan
(
x
):
return
x
<
GRAY_THRESHOLD
def
get_text_field_on_image
(
image_file
:
str
,
image_width
:
float
,
image_height
:
float
,
id
=
0
)
->
TextField
:
"""Find the area of of the faksimile image where a manuscript page is displayed
and return it as a TextField.
"""
if
not
isfile
(
image_file
):
raise
FileNotFoundError
(
f
'file {image_file} not found!'
)
image
=
cv2
.
imread
(
image_file
)
blur
=
cv2
.
GaussianBlur
(
image
,
(
3
,
3
),
0
)
#blur = cv2.bilateralFilter(image,9,75,75)
gray
=
cv2
.
cvtColor
(
blur
,
cv2
.
COLOR_BGR2GRAY
)
less
=
np
.
frompyfunc
(
lessThan
,
1
,
1
)
thresh
=
cv2
.
threshold
(
gray
,
220
,
255
,
cv2
.
THRESH_BINARY_INV
)[
1
]
\
if
False
in
less
(
gray
[
THRESHOLD
])
\
else
cv2
.
threshold
(
gray
,
200
,
255
,
cv2
.
THRESH_BINARY
)[
1
]
original_height
,
original_width
,
channel
=
image
.
shape
startLine
,
lastLine
=
get_start_and_end_index
(
thresh
)
top
=
round
(
startLine
*
image_height
/
original_height
,
1
)
bottom
=
round
(
lastLine
*
image_height
/
original_height
,
1
)
height
=
bottom
-
top
startLine
,
lastLine
=
get_start_and_end_index
(
thresh
.
T
)
left
=
round
(
startLine
*
image_width
/
original_width
,
1
)
right
=
round
(
lastLine
*
image_width
/
original_width
,
1
)
width
=
right
-
left
return
TextField
(
id
=
id
,
x
=
left
,
y
=
top
,
width
=
width
,
height
=
height
)
def
get_start_and_end_index_gray
(
image
,
thresholdSum
)
->
(
int
,
int
):
"""
"""
startLine
=
-
1
lastLine
=
-
1
for
id
,
line
in
enumerate
(
image
):
if
startLine
==
-
1
and
np
.
sum
(
line
)
<
thresholdSum
:
if
id
-
lastLine
<
THRESHOLD
:
lastLine
=
id
else
:
startLine
=
lastLine
lastLine
=
id
break
return
startLine
,
lastLine
def
get_start_and_end_index
(
thresh
)
->
(
int
,
int
):
"""
"""
startLine
=
-
1
lastLine
=
-
1
for
id
,
line
in
enumerate
(
thresh
):
if
startLine
==
-
1
and
np
.
sum
(
line
)
==
0
:
if
id
-
lastLine
<
THRESHOLD
:
lastLine
=
id
else
:
startLine
=
lastLine
lastLine
=
id
break
return
startLine
,
lastLine
def
usage
():
"""prints information on how to use the script
"""
print
(
main
.
__doc__
)
def
main
(
argv
):
"""This program can be used to create svg files with a rect for the text_field.
fixes/get_text_field.py [OPTIONS] <faksimile-image> width height
<faksimile-image> the directory where the files should be saved to
OPTIONS:
-h|--help show help
:return: exit code (int)
"""
try
:
opts
,
args
=
getopt
.
getopt
(
argv
,
"h"
,
[
"help"
])
except
getopt
.
GetoptError
:
usage
()
return
2
for
opt
,
arg
in
opts
:
if
opt
in
(
'-h'
,
'--help'
):
usage
()
return
0
if
len
(
args
)
<
3
:
usage
()
return
2
exit_status
=
0
image_file
=
args
[
0
]
image_width
=
float
(
args
[
1
])
image_height
=
float
(
args
[
2
])
if
isfile
(
image_file
):
textfield
=
get_text_field_on_image
(
image_file
,
image_width
,
image_height
)
print
(
textfield
)
else
:
raise
FileNotFoundError
(
f
'File {image_file} does not exist!'
)
return
exit_status
if
__name__
==
"__main__"
:
sys
.
exit
(
main
(
sys
.
argv
[
1
:]))
Event Timeline
Log In to Comment