Page Menu
Home
c4science
Search
Configure Global Search
Log In
Files
F86463847
lineNumber.py
No One
Temporary
Actions
Download File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Award Token
Subscribers
None
File Metadata
Details
File Info
Storage
Attached
Created
Sun, Oct 6, 15:48
Size
6 KB
Mime Type
text/x-python
Expires
Tue, Oct 8, 15:48 (1 d, 23 h)
Engine
blob
Format
Raw Data
Handle
21427681
Attached To
rNIETZSCHEPYTHON nietzsche-python
lineNumber.py
View Options
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
""" This class can be used to represent a line number.
"""
# Copyright (C) University of Basel 2019 {{{1
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <https://www.gnu.org/licenses/> 1}}}
__author__
=
"Christian Steiner"
__maintainer__
=
__author__
__copyright__
=
'University of Basel'
__email__
=
"christian.steiner@unibas.ch"
__status__
=
"Development"
__license__
=
"GPL v3"
__version__
=
"0.0.1"
import
re
from
lxml
import
etree
as
ET
from
os.path
import
isfile
import
sys
from
.matrix
import
Matrix
sys
.
path
.
append
(
'py2ttl'
)
class
LineNumber
:
"""
This class represents a line number.
Args:
file_name (str): name of the xml file to be instantiated.
"""
XML_TAG
=
'line-number'
WARN_NO_LINE_NUMBER
=
'No line number found'
MIN_LINE_HIGHT
=
5
def
__init__
(
self
,
id
=
0
,
bottom
=
0.0
,
top
=
0.0
,
raw_text_node
=
None
,
transkription_field
=
None
,
xml_text_node
=
None
):
self
.
id
=
id
self
.
bottom
=
bottom
self
.
top
=
top
self
.
faksimile_inner_bottom
=
0.0
self
.
faksimile_inner_top
=
0.0
self
.
faksimile_outer_bottom
=
0.0
self
.
faksimile_outer_top
=
0.0
if
xml_text_node
is
not
None
:
self
.
id
=
int
(
xml_text_node
.
get
(
'id'
))
self
.
bottom
=
float
(
xml_text_node
.
get
(
'bottom'
))
self
.
top
=
float
(
xml_text_node
.
get
(
'top'
))
self
.
faksimile_inner_bottom
=
float
(
xml_text_node
.
get
(
'faksimile-inner-bottom'
))
if
bool
(
xml_text_node
.
get
(
'faksimile-inner-bottom'
))
else
0.0
self
.
faksimile_inner_top
=
float
(
xml_text_node
.
get
(
'faksimile-inner-top'
))
if
bool
(
xml_text_node
.
get
(
'faksimile-inner-top'
))
else
0.0
self
.
faksimile_outer_bottom
=
float
(
xml_text_node
.
get
(
'faksimile-outer-bottom'
))
if
bool
(
xml_text_node
.
get
(
'faksimile-outer-bottom'
))
else
0.0
self
.
faksimile_outer_top
=
float
(
xml_text_node
.
get
(
'faksimile-outer-top'
))
if
bool
(
xml_text_node
.
get
(
'faksimile-outer-top'
))
else
0.0
if
raw_text_node
is
not
None
and
transkription_field
is
not
None
:
matrix
=
Matrix
(
raw_text_node
.
get
(
'transform'
),
transkription_field
=
transkription_field
)
self
.
bottom
=
matrix
.
getY
()
self
.
id
=
int
(
raw_text_node
.
text
)
if
raw_text_node
.
text
is
not
None
\
else
int
(
''
.
join
([
x
.
text
for
x
in
raw_text_node
.
findall
(
'.//tspan'
,
raw_text_node
.
nsmap
)]))
@classmethod
def
extract_line_numbers
(
cls
,
svg_tree
,
transkription_field
)
->
list
:
"""Extracts line numbers.
"""
nodes_near_tf
=
[
item
for
item
in
filter
(
lambda
x
:
Matrix
.
IS_NEARX_TRANSKRIPTION_FIELD
(
x
.
get
(
'transform'
),
transkription_field
),
\
svg_tree
.
getroot
()
.
iterfind
(
'.//text'
,
svg_tree
.
getroot
()
.
nsmap
))]
line_numbers
=
[
cls
(
raw_text_node
=
item
,
transkription_field
=
transkription_field
)
for
item
in
nodes_near_tf
if
cls
.
IS_A_LINE_NUMBER
(
item
)]
if
len
(
line_numbers
)
>
0
:
MINABOVE
=
3
last_to_position
=
transkription_field
.
ymin
for
line_number
in
line_numbers
:
last_to_position
=
set_line_number_top
(
svg_tree
.
getroot
(),
transkription_field
,
line_number
,
last_to_position
)
return
line_numbers
@staticmethod
def
IS_A_LINE_NUMBER
(
raw_text_node
):
"""Returns whether svg node contains a line number.
"""
if
raw_text_node
.
text
is
not
None
:
return
bool
(
re
.
search
(
r'^[0-9]+$'
,
raw_text_node
.
text
))
elif
len
(
raw_text_node
.
findall
(
'.//tspan'
,
raw_text_node
.
nsmap
))
>
0
:
text
=
''
.
join
([
x
.
text
for
x
in
raw_text_node
.
findall
(
'.//tspan'
,
raw_text_node
.
nsmap
)])
return
bool
(
re
.
search
(
r'^[0-9]+$'
,
text
))
return
False
def
setTop
(
self
,
top
):
"""Sets top position of line number.
"""
self
.
top
=
top
def
attach_object_to_tree
(
self
,
target_tree
):
"""Attach object to tree.
"""
obj_node
=
target_tree
.
getroot
()
.
xpath
(
'//'
+
LineNumber
.
XML_TAG
+
'[@id="
%s
"]'
%
self
.
id
)[
0
]
\
if
(
len
(
target_tree
.
getroot
()
.
xpath
(
'//'
+
LineNumber
.
XML_TAG
+
'[@id="
%s
"]'
%
self
.
id
))
>
0
)
\
else
ET
.
SubElement
(
target_tree
.
getroot
(),
LineNumber
.
XML_TAG
)
for
key
in
self
.
__dict__
.
keys
():
obj_node
.
set
(
key
.
replace
(
'_'
,
'-'
),
str
(
round
(
self
.
__dict__
[
key
],
3
)))
def
set_line_number_top
(
tree_root
,
transkription_field
,
line_number
,
last_to_position
,
minabove
=
3
)
->
float
:
"""Set top position of line_number and return next last_to_position.
"""
above_current_line_bottom
=
line_number
.
bottom
+
transkription_field
.
ymin
-
minabove
bottoms
=
get_bottoms
(
tree_root
,
from_position
=
last_to_position
,
to_position
=
above_current_line_bottom
)
current_line_top
=
above_current_line_bottom
if
len
(
bottoms
)
>
0
:
current_line_top
=
bottoms
[
-
1
]
-
transkription_field
.
ymin
+
minabove
if
line_number
.
bottom
-
current_line_top
>=
LineNumber
.
MIN_LINE_HIGHT
:
line_number
.
setTop
(
current_line_top
)
else
:
return
set_line_number_top
(
tree_root
,
transkription_field
,
line_number
,
last_to_position
,
minabove
=
minabove
+
1
)
return
current_line_top
def
get_bottoms
(
tree_root
,
from_position
=-
1.0
,
to_position
=-
1.0
,
transkription_field
=
None
)
->
list
:
"""Returns all unique bottom values (Float) as a sorted list.
"""
bottom_list
=
sorted
(
set
(
Matrix
(
transform_matrix_string
=
item
.
get
(
'transform'
))
.
getY
()
for
item
in
tree_root
.
findall
(
".//text"
,
tree_root
.
nsmap
)))
if
transkription_field
is
not
None
:
from_position
=
transkription_field
.
ymin
to_position
=
transkription_field
.
ymax
if
from_position
>
0.0
and
to_position
>
0.0
:
return
[
item
for
item
in
bottom_list
if
item
>
from_position
and
item
<
to_position
]
else
:
return
bottom_list
Event Timeline
Log In to Comment