Page Menu
Home
c4science
Search
Configure Global Search
Log In
Files
F86467726
transkriptionField.py
No One
Temporary
Actions
Download File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Award Token
Subscribers
None
File Metadata
Details
File Info
Storage
Attached
Created
Sun, Oct 6, 16:19
Size
8 KB
Mime Type
text/x-python
Expires
Tue, Oct 8, 16:19 (2 d)
Engine
blob
Format
Raw Data
Handle
21426264
Attached To
rNIETZSCHEPYTHON nietzsche-python
transkriptionField.py
View Options
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
""" This program can be used to transform a svg file according to the dimension of its transkription field.
"""
# Copyright (C) University of Basel 2019 {{{1
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <https://www.gnu.org/licenses/> 1}}}
__author__
=
"Christian Steiner"
__maintainer__
=
__author__
__copyright__
=
'University of Basel'
__email__
=
"christian.steiner@unibas.ch"
__status__
=
"Development"
__version__
=
"0.0.1"
import
sys
from
os.path
import
exists
from
svgpathtools
import
svg_to_paths
import
xml.etree.ElementTree
as
ET
from
xml.parsers.expat
import
ExpatError
from
.matrix
import
Matrix
from
.text_field
import
TextField
MAX_SMALLER_PATH_WIDTH
=
50.0
MAX_SMALLER_PATH_HEIGHT
=
50.0
MAX_DIFF_DOC_SELF_WIDTH
=
100.0
MAX_DIFF_DOC_SELF_HEIGHT
=
100.0
MIN_AREA
=
2500.0
class
TranskriptionField
:
"""
A class containing the dimensions of the transkription field.
Args:
filename (str): name of the svg file
"""
def
__init__
(
self
,
filename
,
multipage_index
=-
1
):
self
.
width
=
0.0
self
.
height
=
0.0
self
.
xmin
=
0.0
self
.
xmax
=
0.0
self
.
ymin
=
0.0
self
.
ymin_without_title
=
0.0
self
.
ymax
=
0.0
self
.
documentWidth
=
0.0
self
.
documentHeight
=
0.0
self
.
path
=
None
self
.
second_field
=
None
self
.
filename
=
filename
self
.
line_number_area_width
=
0.0
try
:
paths
,
attributes
,
self
.
svg_attributes
=
svg_to_paths
.
svg2paths
(
filename
,
return_svg_attributes
=
True
)
except
ExpatError
:
raise
ExpatError
(
'File {} is empty!'
.
format
(
filename
))
if
len
(
self
.
svg_attributes
)
>
0
and
bool
(
self
.
svg_attributes
.
get
(
'viewBox'
)):
viewBox
=
(
self
.
svg_attributes
[
'viewBox'
]
.
split
())
else
:
raise
Exception
(
'File "{}" does not have an attribute "viewBox"'
.
format
(
filename
))
self
.
documentWidth
=
float
(
viewBox
[
2
])
self
.
documentHeight
=
float
(
viewBox
[
3
])
if
self
.
is_shrunk
():
self
.
xmin
=
float
(
viewBox
[
0
])
self
.
ymin
=
float
(
viewBox
[
1
])
self
.
width
=
self
.
documentWidth
self
.
height
=
self
.
documentHeight
else
:
sorted_paths
=
self
.
sort_according_to_area_desc
(
paths
,
attributes
)
if
multipage_index
<
0
and
len
(
sorted_paths
)
>
0
:
self
.
path
=
sorted_paths
[
0
]
elif
len
(
sorted_paths
)
>
1
:
self
.
path
=
sorted
(
sorted_paths
[:
2
],
key
=
lambda
path
:
path
.
bbox
()[
0
])[
multipage_index
]
if
multipage_index
==
0
:
self
.
second_field
=
TranskriptionField
(
filename
,
multipage_index
=
1
)
if
self
.
path
is
not
None
:
self
.
xmin
,
self
.
xmax
,
self
.
ymin
,
self
.
ymax
=
self
.
path
.
bbox
()
self
.
width
=
self
.
xmax
-
self
.
xmin
self
.
height
=
self
.
ymax
-
self
.
ymin
self
.
ymin_without_title
=
self
.
ymin
-
10
def
add_line_number_area_width
(
self
,
end_positionX_of_line_number_area
):
"""Adds the width of the line number area.
"""
if
self
.
is_page_verso
():
self
.
line_number_area_width
=
self
.
xmin
-
end_positionX_of_line_number_area
else
:
self
.
line_number_area_width
=
end_positionX_of_line_number_area
-
self
.
xmax
def
convert_to_text_field
(
self
)
->
TextField
:
"""Convert to TextField.
"""
return
TextField
(
width
=
self
.
width
,
height
=
self
.
height
,
x
=
self
.
xmin
,
y
=
self
.
ymin
)
def
is_page_verso
(
self
)
->
bool
:
"""Returns true if the area right of the TranskriptionField is less than the left area.
"""
return
self
.
documentWidth
-
self
.
xmax
<
self
.
xmin
def
is_shrunk
(
self
)
->
bool
:
"""Returns True if viewbox[0] and viewBox[1] != 0.
"""
if
len
(
self
.
svg_attributes
)
==
0
or
not
bool
(
self
.
svg_attributes
.
get
(
'viewBox'
)):
return
False
viewBox
=
self
.
svg_attributes
[
'viewBox'
]
.
split
()
return
float
(
viewBox
[
0
])
!=
0
and
float
(
viewBox
[
1
])
!=
0
def
get_svg_attributes
(
self
,
attrib_key
):
"""Returns the svg attribute for the corresponding key or None if empty.
"""
if
self
.
svg_attributes
is
None
or
len
(
self
.
svg_attributes
)
==
0
or
not
bool
(
self
.
svg_attributes
.
get
(
attrib_key
)):
return
None
return
self
.
svg_attributes
[
attrib_key
]
def
shrink_svg_to_transkription_field
(
self
,
target_filename
=
None
,
redo
=
False
):
""" Changes the viewBox of the svg graphics to the size of the transkription field.
If a target_filename is specified, the changes are saved to a new file,
otherwise they are saved to the input file.
Args:
target_filename (str): name of the target svg file
"""
if
bool
(
self
.
svg_attributes
.
get
(
'xmlns'
)):
ET
.
register_namespace
(
''
,
self
.
svg_attributes
[
'xmlns'
])
if
bool
(
self
.
svg_attributes
.
get
(
'xmlns:xlink'
)):
ET
.
register_namespace
(
'xlink'
,
self
.
svg_attributes
[
'xmlns:xlink'
])
et
=
ET
.
parse
(
self
.
filename
)
root
=
et
.
getroot
()
if
bool
(
root
.
attrib
.
get
(
'viewBox'
)):
if
(
redo
or
not
self
.
is_shrunk
()):
root
.
attrib
[
'viewBox'
]
=
'{} {} {} {}'
.
format
(
self
.
xmin
,
self
.
ymin
,
self
.
width
,
self
.
height
)
if
bool
(
root
.
attrib
.
get
(
'width'
)):
root
.
attrib
[
'width'
]
=
'{}pt'
.
format
(
self
.
width
)
if
bool
(
root
.
attrib
.
get
(
'height'
)):
root
.
attrib
[
'height'
]
=
'{}pt'
.
format
(
self
.
height
)
if
not
bool
(
target_filename
):
target_filename
=
self
.
filename
et
.
write
(
target_filename
)
return
0
else
:
#print('File {} already transformed!'.format(self.filename))
return
1
else
:
print
(
'ERROR: file {} does not contain a svg/@viewBox!'
.
format
(
self
.
filename
))
#TODO: throw error
return
2
"""Return a list of paths sorted according to volume, descending.
"""
def
transkription_field_found
(
self
)
->
bool
:
""" Returns whether transkription field was found in __init__
"""
return
self
.
width
>
0.0
and
self
.
height
>
0.0
and
self
.
xmin
>
0.0
and
self
.
xmax
>
0.0
and
self
.
ymin
>
0.0
and
self
.
ymax
>
0.0
def
getWidth
(
self
):
"""Returns documentWidth
"""
return
self
.
documentWidth
def
getHeight
(
self
):
"""Returns documentHeight if not is_shrunk, else height.
"""
return
self
.
documentHeight
def
get_path_area
(
self
,
path
,
attribute_dict
,
removal_dict
=
None
)
->
float
:
"""Return area of path.bbox
"""
try
:
if
not
bool
(
path
)
\
or
not
path
.
iscontinuous
()
\
or
not
path
.
isclosed
():
return
0.0
xmin
,
xmax
,
ymin
,
ymax
=
path
.
bbox
()
width
=
xmax
-
xmin
height
=
ymax
-
ymin
if
'transform'
in
attribute_dict
.
keys
():
matrix
=
Matrix
(
attribute_dict
[
'transform'
])
xmin
,
ymax
,
width
,
height
=
matrix
.
get_transformed_positions
(
xmin
,
ymin
,
width
,
height
)
xmax
=
matrix
.
get_new_x
()
ymin
=
matrix
.
get_new_y
()
width
=
xmax
-
xmin
height
=
ymax
-
ymin
if
self
.
documentWidth
-
width
<=
MAX_DIFF_DOC_SELF_WIDTH
:
return
0.0
if
self
.
documentHeight
-
height
<=
MAX_DIFF_DOC_SELF_HEIGHT
:
return
0.0
return
width
*
height
except
AssertionError
:
return
0.0
def
sort_according_to_area_desc
(
self
,
paths
,
attributes
,
removal_dict
=
None
)
->
list
:
"""Return a sorted list of paths sorted according to the area of their bbox, remove smaller paths.
"""
path_attributes
=
[
(
path
,
attributes
[
index
])
for
index
,
path
in
enumerate
(
paths
)
if
self
.
get_path_area
(
path
,
attributes
[
index
])
>
MAX_SMALLER_PATH_HEIGHT
*
self
.
documentWidth
/
4
]
return
[
path_tuple
[
0
]
for
path_tuple
in
sorted
(
path_attributes
,
key
=
lambda
path_tuple
:
self
.
get_path_area
(
*
path_tuple
,
removal_dict
=
removal_dict
),
reverse
=
True
)
]
Event Timeline
Log In to Comment