Page Menu
Home
c4science
Search
Configure Global Search
Log In
Files
F61493175
box.py
No One
Temporary
Actions
Download File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Award Token
Subscribers
None
File Metadata
Details
File Info
Storage
Attached
Created
Tue, May 7, 00:44
Size
6 KB
Mime Type
text/x-python
Expires
Thu, May 9, 00:44 (1 d, 23 h)
Engine
blob
Format
Raw Data
Handle
17519442
Attached To
rNIETZSCHEPYTHON nietzsche-python
box.py
View Options
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
""" This class can be used to represent svg paths of type 'box'.
"""
# Copyright (C) University of Basel 2019 {{{1
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <https://www.gnu.org/licenses/> 1}}}
__author__
=
"Christian Steiner"
__maintainer__
=
__author__
__copyright__
=
'University of Basel'
__email__
=
"christian.steiner@unibas.ch"
__status__
=
"Development"
__license__
=
"GPL v3"
__version__
=
"0.0.1"
from
lxml
import
etree
as
ET
from
os.path
import
isfile
from
svgpathtools.parser
import
parse_path
import
warnings
from
.matrix
import
Matrix
from
.path
import
Path
from
.transkriptionField
import
TranskriptionField
class
Box
(
Path
):
"""
This represents box svg paths.
Args:
node (lxml.etree.Element) node, containing information
path (svgpathtools.path.Path) svg path representation.
"""
XML_TAG
=
'box-path'
def
__init__
(
self
,
id
=
0
,
node
=
None
,
path
=
None
,
d_string
=
None
,
style_class
=
''
,
earlier_text
=
''
,
text_style_class
=
''
,
earlier_version
=
False
):
super
(
Box
,
self
)
.
__init__
(
id
=
id
,
node
=
node
,
path
=
path
,
d_string
=
d_string
,
style_class
=
style_class
,
tag
=
Box
.
XML_TAG
)
self
.
stringKeys
+=
[
'earlier_text'
,
'text_style_class'
]
self
.
earlier_text
=
earlier_text
self
.
text_style_class
=
text_style_class
self
.
earlier_version
=
earlier_version
if
node
is
not
None
:
if
bool
(
node
.
get
(
'earlier-text'
)):
self
.
earlier_text
=
node
.
get
(
'earlier-text'
)
if
bool
(
node
.
get
(
'text-style-class'
)):
self
.
text_style_class
=
node
.
get
(
'text-style-class'
)
@classmethod
def
create_box
(
cls
,
path
,
margin_boxes_on_line
,
svg_source
=
None
,
svg_tree
=
None
,
transkription_field
=
None
,
namespaces
=
{},
threshold
=
1.5
):
"""Create a Box from a path and find its corresponding earlier_text outside of transkription_field.
:return: box.Box
"""
if
svg_source
is
not
None
:
svg_tree
=
ET
.
parse
(
svg_source
)
if
len
(
namespaces
)
==
0
:
namespaces
=
{
k
if
k
is
not
None
else
'ns'
:
v
for
k
,
v
in
svg_tree
.
getroot
()
.
nsmap
.
items
()
}
if
transkription_field
is
None
:
transkription_field
=
TranskriptionField
(
svg_source
)
if
svg_source
is
not
None
\
else
TranskriptionField
(
svg_tree
.
docinfo
.
URL
)
matching_boxes
=
[
margin_box
for
margin_box
in
margin_boxes_on_line
\
if
abs
(
margin_box
.
get_median_y
()
-
path
.
get_median_y
())
<
threshold
]
box
=
None
if
len
(
matching_boxes
)
>
0
:
matching_box
=
matching_boxes
[
0
]
margin_boxes_on_line
.
remove
(
matching_box
)
xmin
,
xmax
,
ymin
,
ymax
=
matching_box
.
path
.
bbox
()
if
ymin
==
ymax
:
ymin
=
path
.
path
.
bbox
()[
2
]
ymax
=
path
.
path
.
bbox
()[
3
]
text_nodes
=
[
text_node
for
text_node
in
svg_tree
.
xpath
(
'//ns:text'
,
namespaces
=
namespaces
)
\
if
text_node_is_inside_match_box
(
text_node
,
xmin
,
xmax
,
ymin
,
ymax
)
]
tspan_nodes
=
[
tspan_node
for
tspan_node
in
svg_tree
.
xpath
(
'//ns:text/ns:tspan'
,
namespaces
=
namespaces
)
\
if
tspan_node_is_inside_match_box
(
tspan_node
,
xmin
,
xmax
,
ymin
,
ymax
)
]
box_text
=
''
text_styles
=
[]
if
len
(
text_nodes
)
>
0
:
text_nodes
=
sorted
(
text_nodes
,
key
=
lambda
node
:
Matrix
(
transform_matrix_string
=
node
.
get
(
'transform'
))
.
getX
())
for
text_node
in
text_nodes
:
if
len
(
text_node
.
xpath
(
'./ns:tspan'
,
namespaces
=
namespaces
))
==
0
:
text_styles
+=
[
text_node
.
get
(
'class'
)
]
box_text
+=
text_node
.
text
else
:
matrix
=
Matrix
(
transform_matrix_string
=
text_node
.
get
(
'transform'
))
for
tspan_node
in
text_node
.
xpath
(
'./ns:tspan'
,
namespaces
=
namespaces
):
if
matrix
.
add2X
(
add_to_x
=
tspan_node
.
get
(
'x'
))
<
xmax
:
text_styles
.
append
(
tspan_node
.
get
(
'class'
))
box_text
+=
tspan_node
.
text
elif
len
(
tspan_nodes
)
>
0
:
for
tspan_node
in
tspan_nodes
:
text_styles
.
append
(
tspan_node
.
get
(
'class'
))
box_text
+=
tspan_node
.
text
else
:
warnings
.
warn
(
'No text_node found for xmin, xmax, ymin, ymax: {0} {1} {2} {3}'
.
format
(
xmin
,
xmax
,
ymin
,
ymax
))
text_style_class
=
' '
.
join
(
list
(
set
([
item
for
style
in
text_styles
for
item
in
style
.
split
(
' '
)
])))
box
=
Box
(
id
=
path
.
id
,
path
=
path
.
path
,
style_class
=
path
.
style_class
,
\
earlier_text
=
box_text
.
replace
(
' '
,
''
),
text_style_class
=
text_style_class
)
else
:
#print([ margin_box.path.bbox() for margin_box in margin_boxes_on_line ], len(margin_boxes_on_line))
warnings
.
warn
(
f
'No margin box found for box with bbox: {path.path.bbox()}, {margin_boxes_on_line} {threshold}'
)
return
box
@classmethod
def
get_semantic_dictionary
(
cls
):
""" Creates and returns a semantic dictionary as specified by SemanticClass.
"""
dictionary
=
super
(
Box
,
cls
)
.
get_semantic_dictionary
()
dictionary
[
cls
.
PROPERTIES_KEY
]
.
update
(
cls
.
create_semantic_property_dictionary
(
'earlier_text'
,
str
))
return
cls
.
return_dictionary_after_updating_super_classes
(
dictionary
)
def
text_node_is_inside_match_box
(
text_node
,
xmin
,
xmax
,
ymin
,
ymax
):
"""Return true if text_node is inside xmin, xmax, ymin, ymax.
"""
if
not
bool
(
text_node
.
get
(
'transform'
)):
return
False
matrix
=
Matrix
(
transform_matrix_string
=
text_node
.
get
(
'transform'
))
return
matrix
.
getY
()
>
ymin
and
matrix
.
getY
()
<
ymax
\
and
matrix
.
getX
()
>
xmin
and
matrix
.
getX
()
<
xmax
def
tspan_node_is_inside_match_box
(
tspan_node
,
xmin
,
xmax
,
ymin
,
ymax
):
"""Return true if tspan_node is inside xmin, xmax, ymin, ymax.
"""
if
not
bool
(
tspan_node
.
getparent
()
.
get
(
'transform'
)):
return
False
matrix
=
Matrix
(
transform_matrix_string
=
tspan_node
.
getparent
()
.
get
(
'transform'
))
tspan_x
=
matrix
.
add2X
(
add_to_x
=
tspan_node
.
get
(
'x'
))
return
matrix
.
getY
()
>
ymin
and
matrix
.
getY
()
<
ymax
\
and
tspan_x
>
xmin
and
tspan_x
<
xmax
Event Timeline
Log In to Comment