Page Menu
Home
c4science
Search
Configure Global Search
Log In
Files
F59594058
transkription_position.py
No One
Temporary
Actions
Download File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Award Token
Subscribers
None
File Metadata
Details
File Info
Storage
Attached
Created
Tue, Apr 23, 18:31
Size
10 KB
Mime Type
text/x-python
Expires
Thu, Apr 25, 18:31 (2 d)
Engine
blob
Format
Raw Data
Handle
17216867
Attached To
rNIETZSCHEPYTHON nietzsche-python
transkription_position.py
View Options
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
""" This class can be used to represent a transkription word position.
"""
# Copyright (C) University of Basel 2019 {{{1
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <https://www.gnu.org/licenses/> 1}}}
__author__
=
"Christian Steiner"
__maintainer__
=
__author__
__copyright__
=
'University of Basel'
__email__
=
"christian.steiner@unibas.ch"
__status__
=
"Development"
__license__
=
"GPL v3"
__version__
=
"0.0.1"
from
lxml
import
etree
as
ET
from
os.path
import
isfile
import
sys
from
.debug_message
import
DebugMessage
from
.image
import
SVGImage
from
.positional_word_part
import
PositionalWordPart
from
.word_position
import
WordPosition
from
.matrix
import
Matrix
sys
.
path
.
append
(
'py2ttl'
)
from
class_spec
import
SemanticClass
class
TranskriptionPosition
(
WordPosition
):
"""
This class represents the position of a word on the transkription as it is displayed by a svg image.
@label position of a word on the topological transkription
Args:
id (int): word id
matrix (datatypes.Matrix): matrix containing information about transformation.
height (float): height of word
width (float): width of word
x (float): x position of word
y (float): y position of word
positional_word_parts a list of (datatypes.positional_word_part) PositionalWordPart
debug_message a (datatypes.debug_message) DebugMessage
"""
ADD2X
=
0.15
ADD2TOP
=
1.0
ADD2BOTTOM
=
0.2
HEIGHT_FACTOR
=
1.1
# factor that multiplies biggest_font_size -> height
XML_TAG
=
WordPosition
.
TRANSKRIPTION
def
__init__
(
self
,
id
=
0
,
node
=
None
,
height
=
0.0
,
width
=
0.0
,
x
=
0.0
,
y
=
0.0
,
matrix
=
None
,
positional_word_parts
=
None
,
debug_message
=
None
):
super
(
TranskriptionPosition
,
self
)
.
__init__
(
id
=
id
,
node
=
node
,
height
=
height
,
width
=
width
,
x
=
x
,
y
=
y
,
matrix
=
matrix
,
tag
=
WordPosition
.
TRANSKRIPTION
)
self
.
positional_word_parts
=
positional_word_parts
if
positional_word_parts
is
not
None
else
[]
self
.
debug_message
=
debug_message
self
.
deleted
=
False
self
.
has_box
=
None
self
.
style
=
None
self
.
svg_image
=
None
if
node
is
not
None
:
self
.
debug_message
=
DebugMessage
(
node
=
node
.
xpath
(
'.//'
+
DebugMessage
.
XML_TAG
)[
0
])
\
if
len
(
node
.
xpath
(
'.//'
+
DebugMessage
.
XML_TAG
))
>
0
else
None
self
.
positional_word_parts
=
[
PositionalWordPart
(
node
=
pwp_node
)
for
pwp_node
in
node
.
xpath
(
'.//'
+
PositionalWordPart
.
XML_TAG
)
]
self
.
attachable_objects
+=
self
.
positional_word_parts
if
self
.
debug_message
is
not
None
:
self
.
attachable_objects
.
append
(
self
.
debug_message
)
@classmethod
def
get_semantic_dictionary
(
cls
):
""" Creates a semantic dictionary as specified by SemanticClass.
"""
dictionary
=
super
(
TranskriptionPosition
,
cls
)
.
get_semantic_dictionary
()
dictionary
[
cls
.
PROPERTIES_KEY
]
.
update
(
cls
.
create_semantic_property_dictionary
(
'svg_image'
,
SVGImage
,
cardinality
=
1
,
\
name
=
'isOnSvgImage'
,
label
=
'transkription position is on svg image'
))
return
cls
.
return_dictionary_after_updating_super_classes
(
dictionary
)
def
get_text
(
self
):
"""Returns the concatenated text of all positional_word_parts.
"""
return
''
.
join
([
pwp
.
text
for
pwp
in
self
.
positional_word_parts
])
def
is_mergebale_with
(
self
,
other
)
->
bool
:
"""Return whether self and other have same writing_process_id or style.
"""
if
self
.
writing_process_id
==
other
.
writing_process_id
:
return
True
if
self
.
writing_process_id
==
-
1
or
other
.
writing_process_id
==
-
1
\
and
(
len
(
self
.
positional_word_parts
)
>
0
and
len
(
other
.
positional_word_parts
)
>
0
):
return
self
.
positional_word_parts
[
0
]
.
style_class
==
other
.
positional_word_parts
[
0
]
.
style_class
return
False
def
split
(
self
,
split_position
,
second_split
=-
1
)
->
list
:
"""Split a transkription_position in two at split_position.
:return: a list of the new transkription_positions
"""
transkription_positions
=
[]
left_pwp
=
[
pwp
for
pwp
in
self
.
positional_word_parts
if
pwp
.
left
+
pwp
.
width
<
split_position
]
transkription_positions
+=
TranskriptionPosition
.
CREATE_TRANSKRIPTION_POSITION_LIST_FROM_PWPS
(
left_pwp
,
transkription_position_id
=
self
.
id
)
if
second_split
==
-
1
:
right_pwp
=
[
pwp
for
pwp
in
self
.
positional_word_parts
if
pwp
not
in
left_pwp
]
next_id
=
int
(
self
.
id
)
+
1
transkription_positions
+=
TranskriptionPosition
.
CREATE_TRANSKRIPTION_POSITION_LIST_FROM_PWPS
(
right_pwp
,
transkription_position_id
=
str
(
next_id
))
else
:
middle_pwp
=
[
pwp
for
pwp
in
self
.
positional_word_parts
if
pwp
not
in
left_pwp
and
pwp
.
left
+
pwp
.
width
<
second_split
]
next_id
=
int
(
self
.
id
)
+
1
transkription_positions
+=
TranskriptionPosition
.
CREATE_TRANSKRIPTION_POSITION_LIST_FROM_PWPS
(
middle_pwp
,
transkription_position_id
=
str
(
next_id
))
right_pwp
=
[
pwp
for
pwp
in
self
.
positional_word_parts
if
pwp
not
in
left_pwp
and
pwp
not
in
middle_pwp
]
next_id
=
int
(
self
.
id
)
+
1
transkription_positions
+=
TranskriptionPosition
.
CREATE_TRANSKRIPTION_POSITION_LIST_FROM_PWPS
(
right_pwp
,
transkription_position_id
=
str
(
next_id
))
return
transkription_positions
def
update_positional_word_parts
(
self
,
positional_word_parts
):
"""Update positional_word_parts.
"""
if
len
(
self
.
positional_word_parts
)
>
0
and
self
.
positional_word_parts
in
self
.
attachable_objects
:
self
.
attachable_objects
.
remove
(
self
.
positional_word_parts
)
self
.
positional_word_parts
=
positional_word_parts
self
.
attachable_objects
+=
self
.
positional_word_parts
@staticmethod
def
CREATE_TRANSKRIPTION_POSITION_LIST_FROM_PWPS
(
positional_word_parts
,
debug_message
=
None
,
debug_msg_string
=
None
,
transkription_position_id
=
0
):
"""Creates a list of TranskriptionPosition from a list of (datatypes.positional_word_part) PositionalWordPart.
[:return:] a list of (datatypes.transkription_position) TranskriptionPosition
"""
TOPCORRECTION
=
1
debug_message
=
DebugMessage
(
message
=
debug_msg_string
)
\
if
debug_msg_string
is
not
None
else
debug_message
transkription_positions
=
[]
if
len
(
positional_word_parts
)
<
1
:
return
[]
matrix
=
positional_word_parts
[
0
]
.
transform
index
=
0
matrices_differ
=
False
style_class
=
positional_word_parts
[
0
]
.
style_class
styles_differ
=
False
while
index
<
len
(
positional_word_parts
)
and
not
matrices_differ
and
not
styles_differ
:
if
Matrix
.
DO_CONVERSION_FACTORS_DIFFER
(
matrix
,
positional_word_parts
[
index
]
.
transform
):
matrices_differ
=
True
elif
style_class
!=
positional_word_parts
[
index
]
.
style_class
:
styles_differ
=
True
else
:
index
+=
1
if
(
matrices_differ
or
styles_differ
)
and
index
<
len
(
positional_word_parts
):
debug_msg_string
=
'matrices differ'
if
matrices_differ
else
'styles differ'
transkription_positions
+=
TranskriptionPosition
.
CREATE_TRANSKRIPTION_POSITION_LIST_FROM_PWPS
(
\
positional_word_parts
[
index
:],
debug_msg_string
=
debug_msg_string
,
transkription_position_id
=
int
(
transkription_position_id
)
+
1
)
positional_word_parts
=
positional_word_parts
[:
index
]
height
=
[
pwp
.
height
for
pwp
in
sorted
(
positional_word_parts
,
key
=
lambda
pwp
:
pwp
.
height
,
reverse
=
True
)][
0
]
+
2
*
TOPCORRECTION
x
=
positional_word_parts
[
0
]
.
left
-
TranskriptionPosition
.
ADD2X
y
=
[
pwp
.
top
for
pwp
in
sorted
(
positional_word_parts
,
key
=
lambda
pwp
:
pwp
.
top
)][
0
]
-
TOPCORRECTION
width
=
positional_word_parts
[
len
(
positional_word_parts
)
-
1
]
.
left
-
x
\
+
positional_word_parts
[
len
(
positional_word_parts
)
-
1
]
.
width
+
TranskriptionPosition
.
ADD2X
for
pwp_index
,
pwp
in
enumerate
(
positional_word_parts
):
pwp
.
id
=
pwp_index
transkription_positions
.
insert
(
0
,
TranskriptionPosition
(
id
=
transkription_position_id
,
height
=
height
,
width
=
width
,
x
=
x
,
y
=
y
,
matrix
=
matrix
,
\
positional_word_parts
=
positional_word_parts
,
debug_message
=
debug_message
))
return
transkription_positions
@staticmethod
def
CREATE_TRANSKRIPTION_POSITION_LIST
(
page
,
word_part_objs
,
matrix
=
None
,
debug_msg_string
=
None
,
transkription_field
=
None
):
"""Creates a list of TranskriptionPosition from word_part_objs (i.e. a list of dictionaries
with the keys: text, x, y, matrix, class).
[:return:] a list of (datatypes.transkription_position) TranskriptionPosition
"""
positional_word_parts
=
[]
debug_message
=
DebugMessage
(
message
=
debug_msg_string
)
\
if
debug_msg_string
is
not
None
else
None
if
page
.
svg_file
is
not
None
and
isfile
(
page
.
svg_file
):
svg_path_tree
=
ET
.
parse
(
page
.
svg_file
)
namespaces
=
{
k
if
k
is
not
None
else
'ns'
:
v
for
k
,
v
in
svg_path_tree
.
getroot
()
.
nsmap
.
items
()
}
xmin
=
0.0
ymin
=
0.0
if
transkription_field
is
not
None
:
xmin
=
transkription_field
.
xmin
ymin
=
transkription_field
.
ymin
for
part_obj
in
word_part_objs
:
positional_word_parts
+=
PositionalWordPart
.
CREATE_POSITIONAL_WORD_PART_LIST
(
\
part_obj
,
svg_path_tree
,
namespaces
,
page
,
start_id
=
len
(
positional_word_parts
),
\
xmin
=
xmin
,
ymin
=
ymin
)
else
:
positional_word_parts
=
PositionalWordPart
.
CREATE_SIMPLE_POSITIONAL_WORD_PART_LIST
(
page
,
word_part_objs
)
if
len
(
positional_word_parts
)
>
0
:
return
TranskriptionPosition
.
CREATE_TRANSKRIPTION_POSITION_LIST_FROM_PWPS
(
positional_word_parts
,
debug_message
=
debug_message
)
else
:
return
[
TranskriptionPosition
(
matrix
=
matrix
,
debug_message
=
debug_message
)
]
Event Timeline
Log In to Comment