Page Menu
Home
c4science
Search
Configure Global Search
Log In
Files
F84257039
transkription_position.py
No One
Temporary
Actions
Download File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Award Token
Subscribers
None
File Metadata
Details
File Info
Storage
Attached
Created
Sat, Sep 21, 16:36
Size
8 KB
Mime Type
text/x-python
Expires
Mon, Sep 23, 16:36 (1 d, 23 h)
Engine
blob
Format
Raw Data
Handle
20971958
Attached To
rNIETZSCHEPYTHON nietzsche-python
transkription_position.py
View Options
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
""" This class can be used to represent a transkription word position.
"""
# Copyright (C) University of Basel 2019 {{{1
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <https://www.gnu.org/licenses/> 1}}}
__author__
=
"Christian Steiner"
__maintainer__
=
__author__
__copyright__
=
'University of Basel'
__email__
=
"christian.steiner@unibas.ch"
__status__
=
"Development"
__license__
=
"GPL v3"
__version__
=
"0.0.1"
from
lxml
import
etree
as
ET
from
os.path
import
isfile
from
.class_spec
import
SemanticClass
from
.debug_message
import
DebugMessage
from
.positional_word_part
import
PositionalWordPart
from
.word_position
import
WordPosition
from
.matrix
import
Matrix
class
TranskriptionPosition
(
WordPosition
):
"""
This class represents a transkription word position.
Args:
id (int): word id
matrix (datatypes.Matrix): matrix containing information about transformation.
height (float): height of word
width (float): width of word
x (float): x position of word
y (float): y position of word
positional_word_parts a list of (datatypes.positional_word_part) PositionalWordPart
debug_message a (datatypes.debug_message) DebugMessage
"""
ADD2X
=
0.15
ADD2TOP
=
1.0
ADD2BOTTOM
=
0.2
HEIGHT_FACTOR
=
1.1
# factor that multiplies biggest_font_size -> height
XML_TAG
=
WordPosition
.
TRANSKRIPTION
def
__init__
(
self
,
id
=
0
,
node
=
None
,
height
=
0.0
,
width
=
0.0
,
x
=
0.0
,
y
=
0.0
,
matrix
=
None
,
positional_word_parts
=
[],
debug_message
=
None
):
super
(
TranskriptionPosition
,
self
)
.
__init__
(
id
=
id
,
node
=
node
,
height
=
height
,
width
=
width
,
x
=
x
,
y
=
y
,
matrix
=
matrix
,
tag
=
WordPosition
.
TRANSKRIPTION
)
self
.
positional_word_parts
=
positional_word_parts
self
.
debug_message
=
debug_message
if
node
is
not
None
:
self
.
debug_message
=
DebugMessage
(
node
=
node
.
xpath
(
'.//'
+
DebugMessage
.
XML_TAG
)[
0
])
\
if
len
(
node
.
xpath
(
'.//'
+
DebugMessage
.
XML_TAG
))
>
0
else
None
self
.
positional_word_parts
=
[
PositionalWordPart
(
node
=
pwp_node
)
for
pwp_node
in
node
.
xpath
(
'.//'
+
PositionalWordPart
.
XML_TAG
)
]
self
.
attachable_objects
+=
self
.
positional_word_parts
if
self
.
debug_message
is
not
None
:
self
.
attachable_objects
.
append
(
self
.
debug_message
)
@classmethod
def
get_semantic_dictionary
(
cls
):
""" Creates a semantic dictionary as specified by SemanticClass.
"""
dictionary
=
super
(
cls
,
cls
)
.
get_semantic_dictionary
()
dictionary
[
'properties'
]
.
update
({
'positional_word_parts'
:
(
PositionalWordPart
,
SemanticClass
.
LIST
,
'{}/@id'
.
format
(
WordPosition
.
TRANSKRIPTION
))})
return
dictionary
@staticmethod
def
CREATE_TRANSKRIPTION_POSITION_LIST_FROM_PWPS
(
page
,
positional_word_parts
,
debug_message
=
None
,
debug_msg_string
=
None
,
transkription_position_id
=
0
):
"""Creates a list of TranskriptionPosition from a list of (datatypes.positional_word_part) PositionalWordPart.
[:return:] a list of (datatypes.transkription_position) TranskriptionPosition
"""
TOPCORRECTION
=
1
debug_message
=
DebugMessage
(
message
=
debug_msg_string
)
\
if
debug_msg_string
is
not
None
else
debug_message
transkription_positions
=
[]
if
len
(
positional_word_parts
)
<
1
:
return
[]
matrix
=
positional_word_parts
[
0
]
.
transform
index
=
0
matrices_differ
=
False
style_class
=
positional_word_parts
[
0
]
.
style_class
styles_differ
=
False
while
index
<
len
(
positional_word_parts
)
and
not
matrices_differ
and
not
styles_differ
:
if
Matrix
.
DO_CONVERSION_FACTORS_DIFFER
(
matrix
,
positional_word_parts
[
index
]
.
transform
):
matrices_differ
=
True
elif
style_class
!=
positional_word_parts
[
index
]
.
style_class
:
styles_differ
=
True
else
:
index
+=
1
if
(
matrices_differ
or
styles_differ
)
and
index
<
len
(
positional_word_parts
):
debug_msg_string
=
'matrices differ'
if
matrices_differ
else
'styles differ'
transkription_positions
+=
TranskriptionPosition
.
CREATE_TRANSKRIPTION_POSITION_LIST_FROM_PWPS
(
page
,
\
positional_word_parts
[
index
:],
debug_msg_string
=
debug_msg_string
,
transkription_position_id
=
transkription_position_id
+
1
)
positional_word_parts
=
positional_word_parts
[:
index
]
if
page
.
get_line_number
((
positional_word_parts
[
0
]
.
top
+
positional_word_parts
[
0
]
.
bottom
)
/
2
)
%
2
==
0
:
all_styles
=
[]
for
pwp
in
positional_word_parts
:
all_styles
+=
pwp
.
style_class
.
split
(
' '
)
biggest_font_size
=
page
.
get_biggest_fontSize4styles
(
style_set
=
set
(
all_styles
))
height
=
round
(
biggest_font_size
*
TranskriptionPosition
.
HEIGHT_FACTOR
+
TranskriptionPosition
.
HEIGHT_FACTOR
/
biggest_font_size
,
3
)
TOPCORRECTION
=
2
+
TranskriptionPosition
.
HEIGHT_FACTOR
/
biggest_font_size
else
:
# take greatest value for height
height
=
[
pwp
.
height
for
pwp
in
sorted
(
positional_word_parts
,
key
=
lambda
pwp
:
pwp
.
height
,
reverse
=
True
)][
0
]
+
2
*
TOPCORRECTION
x
=
positional_word_parts
[
0
]
.
left
-
TranskriptionPosition
.
ADD2X
y
=
[
pwp
.
top
for
pwp
in
sorted
(
positional_word_parts
,
key
=
lambda
pwp
:
pwp
.
top
)][
0
]
-
TOPCORRECTION
width
=
positional_word_parts
[
len
(
positional_word_parts
)
-
1
]
.
left
-
x
\
+
positional_word_parts
[
len
(
positional_word_parts
)
-
1
]
.
width
+
TranskriptionPosition
.
ADD2X
for
pwp_index
,
pwp
in
enumerate
(
positional_word_parts
):
pwp
.
id
=
pwp_index
transkription_positions
.
insert
(
0
,
TranskriptionPosition
(
id
=
transkription_position_id
,
height
=
height
,
width
=
width
,
x
=
x
,
y
=
y
,
matrix
=
matrix
,
\
positional_word_parts
=
positional_word_parts
,
debug_message
=
debug_message
))
return
transkription_positions
@staticmethod
def
CREATE_TRANSKRIPTION_POSITION_LIST
(
page
,
word_part_objs
,
matrix
=
None
,
debug_msg_string
=
None
,
transkription_field
=
None
):
"""Creates a list of TranskriptionPosition from word_part_objs (i.e. a list of dictionaries
with the keys: text, x, y, matrix, class).
[:return:] a list of (datatypes.transkription_position) TranskriptionPosition
"""
positional_word_parts
=
[]
debug_message
=
DebugMessage
(
message
=
debug_msg_string
)
\
if
debug_msg_string
is
not
None
else
None
if
page
.
svg_file
is
not
None
and
isfile
(
page
.
svg_file
):
svg_path_tree
=
ET
.
parse
(
page
.
svg_file
)
namespaces
=
{
k
if
k
is
not
None
else
'ns'
:
v
for
k
,
v
in
svg_path_tree
.
getroot
()
.
nsmap
.
items
()
}
xmin
=
0.0
ymin
=
0.0
if
transkription_field
is
not
None
:
xmin
=
transkription_field
.
xmin
ymin
=
transkription_field
.
ymin
for
part_obj
in
word_part_objs
:
positional_word_parts
+=
PositionalWordPart
.
CREATE_POSITIONAL_WORD_PART_LIST
(
\
part_obj
,
svg_path_tree
,
namespaces
,
page
,
start_id
=
len
(
positional_word_parts
),
\
xmin
=
xmin
,
ymin
=
ymin
)
else
:
positional_word_parts
=
PositionalWordPart
.
CREATE_SIMPLE_POSITIONAL_WORD_PART_LIST
(
page
,
word_part_objs
)
if
len
(
positional_word_parts
)
>
0
:
return
TranskriptionPosition
.
CREATE_TRANSKRIPTION_POSITION_LIST_FROM_PWPS
(
page
,
positional_word_parts
,
debug_message
=
debug_message
)
else
:
return
[
TranskriptionPosition
(
matrix
=
matrix
,
debug_message
=
debug_message
)
]
Event Timeline
Log In to Comment