Page Menu
Home
c4science
Search
Configure Global Search
Log In
Files
F63250816
positional_word_part.py
No One
Temporary
Actions
Download File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Award Token
Subscribers
None
File Metadata
Details
File Info
Storage
Attached
Created
Sat, May 18, 19:45
Size
9 KB
Mime Type
text/x-python
Expires
Mon, May 20, 19:45 (2 d)
Engine
blob
Format
Raw Data
Handle
17750337
Attached To
rNIETZSCHEPYTHON nietzsche-python
positional_word_part.py
View Options
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
""" This class can be used to represent a positional word part, i.e. part of a word that has a position on the transkription.
"""
# Copyright (C) University of Basel 2019 {{{1
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <https://www.gnu.org/licenses/> 1}}}
__author__
=
"Christian Steiner"
__maintainer__
=
__author__
__copyright__
=
'University of Basel'
__email__
=
"christian.steiner@unibas.ch"
__status__
=
"Development"
__license__
=
"GPL v3"
__version__
=
"0.0.1"
from
lxml
import
etree
as
ET
from
svgpathtools.parser
import
parse_path
import
sys
import
warnings
from
.positional_object
import
PositionalObject
sys
.
path
.
append
(
'py2ttl'
)
from
class_spec
import
UnSemanticClass
class
PositionalWordPart
(
PositionalObject
,
UnSemanticClass
):
"""
This class represents a positional word part, i.e. a part of a word that has a position on the transkription.
Args:
id (int): object id
text (str): text
symbol_id (str): id of corresponding symbol
style_class (str) style class id
matrix (datatypes.Matrix): matrix containing information about conversion.
height (float): height of
width (float): width of object
x (float): x position of object
y (float): y position of object
"""
WARN_NO_USE_NODE_FOUND
=
'No use_node found'
XML_TAG
=
'word-part'
extraStringKeys
=
[
'text'
,
'symbol_id'
,
'style_class'
]
def
__init__
(
self
,
node
=
None
,
id
=
0
,
height
=
0.0
,
width
=
0.0
,
x
=
0.0
,
y
=
0.0
,
matrix
=
None
,
text
=
None
,
symbol_id
=
None
,
style_class
=
None
):
super
(
PositionalWordPart
,
self
)
.
__init__
(
id
=
id
,
node
=
node
,
height
=
height
,
width
=
width
,
x
=
x
,
y
=
y
,
matrix
=
matrix
,
tag
=
PositionalWordPart
.
XML_TAG
)
self
.
stringKeys
+=
[
'text'
,
'symbol_id'
,
'style_class'
]
self
.
text
=
text
self
.
symbol_id
=
symbol_id
self
.
style_class
=
style_class
if
node
is
not
None
:
self
.
text
=
node
.
get
(
'text'
)
self
.
symbol_id
=
node
.
get
(
'symbol-id'
)
self
.
style_class
=
node
.
get
(
'style-class'
)
@classmethod
def
get_semantic_dictionary
(
cls
):
""" Creates a semantic dictionary as specified by SemanticClass.
"""
dictionary
=
super
(
cls
,
cls
)
.
get_semantic_dictionary
()
for
extraStringKey
in
cls
.
extraStringKeys
:
dictionary
[
cls
.
PROPERTIES_KEY
]
.
update
(
cls
.
create_semantic_property_dictionary
(
extraStringKey
,
str
,
cardinality
=
1
))
return
cls
.
return_dictionary_after_updating_super_classes
(
dictionary
)
@staticmethod
def
CREATE_POSITIONAL_WORD_PART
(
text
,
use_node
,
namespaces
,
start_id
=
0
,
xmin
=
0.0
,
ymin
=
0.0
,
matrix
=
None
,
style_class
=
None
,
original_x
=
0.0
,
original_y
=
0.0
):
"""Creates a PositionalWordPart.
[:return:] a PositionalWordPart
"""
symbol_id
=
use_node
.
get
(
'{
%s
}href'
%
namespaces
[
'xlink'
])
.
replace
(
'#'
,
''
)
x
=
float
(
use_node
.
get
(
'x'
))
-
xmin
if
bool
(
use_node
.
get
(
'x'
))
else
0.0
y
=
float
(
use_node
.
get
(
'y'
))
-
ymin
if
bool
(
use_node
.
get
(
'y'
))
else
0.0
if
matrix
is
not
None
and
matrix
.
isRotationMatrix
():
x
=
matrix
.
get_old_x
(
x
=
x
,
y
=
y
)
#print('origin_x {} -> x {}'.format(original_x, x))
y
=
original_y
if
original_y
!=
0
else
y
d_strings
=
use_node
.
xpath
(
'//ns:symbol[@id="{0}"]/ns:path/@d'
.
format
(
symbol_id
),
namespaces
=
namespaces
)
if
len
(
d_strings
)
>
0
and
d_strings
[
0
]
!=
''
:
path
=
parse_path
(
d_strings
[
0
])
xmin
,
xmax
,
ymin
,
ymax
=
path
.
bbox
()
width
=
xmax
-
xmin
height
=
ymax
-
ymin
if
ymax
-
ymin
>
3
else
3
if
ymin
<
0
and
ymax
<
0
:
y
+=
ymin
return
PositionalWordPart
(
id
=
start_id
,
text
=
text
,
height
=
height
,
width
=
width
,
x
=
x
,
y
=
y
-
height
,
\
matrix
=
matrix
,
symbol_id
=
symbol_id
,
style_class
=
style_class
)
else
:
return
PositionalWordPart
(
id
=
start_id
,
text
=
text
,
x
=
x
,
y
=
y
,
matrix
=
matrix
,
symbol_id
=
symbol_id
,
style_class
=
style_class
)
@staticmethod
def
CREATE_POSITIONAL_WORD_PART_LIST
(
word_part_obj
,
svg_path_tree
,
namespaces
,
page
=
None
,
start_id
=
0
,
xmin
=
0.0
,
ymin
=
0.0
,
threshold
=
0.4
,
throw_error_if_not_found
=
False
):
"""Creates a list of PositionalWordPart from a word_part_obj (a dictionary with the keys: text, x, y, matrix, class),
using a (lxml.ElementTree) svg_path_tree and the corresponding namespaces.
[:return:] a list of PositionalWordPart
"""
word_part_list
=
[]
original_x
,
original_y
=
0.0
,
0.0
x
=
float
(
word_part_obj
[
'x'
])
if
bool
(
word_part_obj
.
get
(
'x'
))
else
0.0
y
=
float
(
word_part_obj
[
'y'
])
if
bool
(
word_part_obj
.
get
(
'y'
))
else
0.0
text
=
word_part_obj
.
get
(
'text'
)
matrix
=
word_part_obj
.
get
(
'matrix'
)
style_class
=
word_part_obj
.
get
(
'class'
)
if
matrix
is
not
None
and
matrix
.
isRotationMatrix
():
original_x
,
original_y
=
x
,
y
x
=
matrix
.
get_new_x
(
x
=
original_x
,
y
=
original_y
)
y
=
matrix
.
get_new_y
(
x
=
original_x
,
y
=
original_y
)
if
text
is
not
None
and
text
!=
''
:
svg_x
=
x
+
xmin
svg_y
=
y
+
ymin
use_nodes
=
svg_path_tree
.
xpath
(
'//ns:use[@x>="{0}" and @x<="{1}" and @y>="{2}" and @y<="{3}"]'
\
.
format
(
svg_x
-
threshold
,
svg_x
+
threshold
,
svg_y
-
threshold
,
svg_y
+
threshold
),
namespaces
=
namespaces
)
if
len
(
use_nodes
)
>
0
:
current_use_node
=
use_nodes
[
0
]
index
=
0
word_part_list
.
append
(
PositionalWordPart
.
CREATE_POSITIONAL_WORD_PART
(
text
[
index
],
current_use_node
,
namespaces
,
\
start_id
=
start_id
,
xmin
=
xmin
,
ymin
=
ymin
,
matrix
=
matrix
,
style_class
=
style_class
,
original_x
=
original_x
,
original_y
=
original_y
))
index
,
start_id
=
index
+
1
,
start_id
+
1
while
index
<
len
(
text
)
and
current_use_node
.
getnext
()
is
not
None
:
current_use_node
=
current_use_node
.
getnext
()
word_part_list
.
append
(
PositionalWordPart
.
CREATE_POSITIONAL_WORD_PART
(
text
[
index
],
current_use_node
,
namespaces
,
\
start_id
=
start_id
,
xmin
=
xmin
,
ymin
=
ymin
,
matrix
=
matrix
,
style_class
=
style_class
,
original_x
=
original_x
,
original_y
=
original_y
))
index
,
start_id
=
index
+
1
,
start_id
+
1
if
index
<
len
(
text
)
and
current_use_node
.
getnext
()
is
None
:
last_pwp
=
word_part_list
[
len
(
word_part_list
)
-
1
]
new_word_part_obj
=
word_part_obj
.
copy
()
new_word_part_obj
[
'x'
]
=
last_pwp
.
left
+
last_pwp
.
width
+
0.5
new_word_part_obj
[
'y'
]
=
last_pwp
.
bottom
new_word_part_obj
[
'text'
]
=
word_part_obj
[
'text'
][
index
:]
word_part_list
+=
PositionalWordPart
.
CREATE_POSITIONAL_WORD_PART_LIST
(
new_word_part_obj
,
\
svg_path_tree
,
namespaces
,
page
,
start_id
=
start_id
,
xmin
=
xmin
,
ymin
=
ymin
)
return
word_part_list
elif
page
is
None
or
throw_error_if_not_found
:
raise
Exception
(
'{} for text {} svg_x {}, svg_y {}'
.
format
(
PositionalWordPart
.
WARN_NO_USE_NODE_FOUND
,
text
,
svg_x
,
svg_y
))
else
:
warnings
.
warn
(
'{} for text {} svg_x {}, svg_y {}'
.
format
(
PositionalWordPart
.
WARN_NO_USE_NODE_FOUND
,
text
,
svg_x
,
svg_y
))
return
PositionalWordPart
.
CREATE_SIMPLE_POSITIONAL_WORD_PART_LIST
(
page
,
[
word_part_obj
])
else
:
return
[
]
@staticmethod
def
CREATE_SIMPLE_POSITIONAL_WORD_PART_LIST
(
page
,
word_part_objs
):
"""Creates a list of PositionalWordPart from word_part_objs (i.e. a list of dictionaries
with the keys: text, x, y, matrix, class).
[:return:] a list of (datatypes.positional_word_part) PositionalWordPart
"""
positional_word_parts
=
[]
HEIGHT_FACTOR
=
1.1
# factor that multiplies font_size -> height
FONTWIDTHFACTOR
=
0.7
# factor that multiplies lastCharFontSize
SPACING
=
0.2
for
index
,
part_obj
in
enumerate
(
word_part_objs
):
text
=
part_obj
.
get
(
'text'
)
matrix
=
part_obj
.
get
(
'matrix'
)
style_class
=
part_obj
.
get
(
'class'
)
x
=
float
(
part_obj
[
'x'
])
if
bool
(
part_obj
.
get
(
'x'
))
else
0.0
y
=
float
(
part_obj
[
'y'
])
if
bool
(
part_obj
.
get
(
'y'
))
else
0.0
font_size
=
page
.
get_biggest_fontSize4styles
(
style_set
=
set
(
style_class
.
split
(
' '
)))
height
=
round
(
font_size
*
HEIGHT_FACTOR
+
HEIGHT_FACTOR
/
font_size
,
3
)
width
=
round
(
font_size
*
FONTWIDTHFACTOR
,
3
)
if
index
+
1
<
len
(
word_part_objs
)
and
bool
(
word_part_objs
[
index
+
1
]
.
get
(
'x'
)):
width
=
float
(
word_part_objs
[
index
+
1
][
'x'
])
-
x
-
SPACING
positional_word_parts
.
append
(
PositionalWordPart
(
id
=
index
,
text
=
text
,
height
=
height
,
width
=
width
,
x
=
x
,
y
=
y
,
matrix
=
matrix
,
style_class
=
style_class
))
return
positional_word_parts
Event Timeline
Log In to Comment