Page Menu
Home
c4science
Search
Configure Global Search
Log In
Files
F62635578
footnotes.py
No One
Temporary
Actions
Download File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Award Token
Subscribers
None
File Metadata
Details
File Info
Storage
Attached
Created
Tue, May 14, 12:18
Size
3 KB
Mime Type
text/x-python
Expires
Thu, May 16, 12:18 (2 d)
Engine
blob
Format
Raw Data
Handle
17670833
Attached To
rNIETZSCHEPYTHON nietzsche-python
footnotes.py
View Options
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
""" This program can be used to extract footnotes from a svg file.
"""
# Copyright (C) University of Basel 2019 {{{1
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <https://www.gnu.org/licenses/> 1}}}
import
getopt
import
re
import
sys
from
os
import
listdir
,
sep
,
path
from
os.path
import
isfile
,
isdir
,
dirname
import
lxml.etree
as
ET
__author__
=
"Christian Steiner"
__maintainer__
=
__author__
__copyright__
=
'University of Basel'
__email__
=
"christian.steiner@unibas.ch"
__status__
=
"Development"
__license__
=
"GPL v3"
__version__
=
"0.0.1"
from
.matrix
import
Matrix
from
.transkriptionField
import
TranskriptionField
UNITTESTING
=
False
def
extract_footnotes_as_strings
(
transkription_field
=
None
,
svg_tree
=
None
,
svg_file
=
None
,
contains_string
=
''
):
"""Returns all footnotes as a list of strings.
"""
if
transkription_field
is
None
and
svg_file
is
not
None
:
transkription_field
=
TranskriptionField
(
svg_file
)
if
svg_tree
is
None
and
svg_file
is
not
None
:
svg_tree
=
ET
.
parse
(
svg_file
)
footnotes
=
[]
nodes_in_footnote_area
=
[
item
for
item
in
filter
(
lambda
x
:
Matrix
.
IS_IN_FOOTNOTE_AREA
(
x
.
get
(
'transform'
),
transkription_field
),
\
svg_tree
.
getroot
()
.
iterfind
(
'.//text'
,
svg_tree
.
getroot
()
.
nsmap
))]
bottom_values
=
sorted
([
bottom_value
for
bottom_value
in
set
(
Matrix
(
transform_matrix_string
=
item
.
get
(
'transform'
))
.
getY
()
for
item
in
nodes_in_footnote_area
)
])
for
bottom_value
in
bottom_values
:
nodes_on_line
=
[
item
for
item
in
nodes_in_footnote_area
if
Matrix
(
transform_matrix_string
=
item
.
get
(
'transform'
))
.
getY
()
==
bottom_value
]
nodes_on_line
=
sorted
(
nodes_on_line
,
key
=
lambda
x
:
Matrix
(
transform_matrix_string
=
x
.
get
(
'transform'
))
.
getX
())
footnote_string
=
''
for
node
in
nodes_on_line
:
if
len
(
node
.
getchildren
())
==
0
:
if
footnote_string
!=
''
and
re
.
match
(
r'.*[0-9]+:'
,
node
.
text
):
footnotes
.
append
(
footnote_string
)
footnote_string
=
node
.
text
else
:
footnote_string
+=
node
.
text
else
:
next_string
=
''
.
join
([
item
.
text
for
item
in
node
.
findall
(
'tspan'
,
svg_tree
.
getroot
()
.
nsmap
)])
if
footnote_string
!=
''
and
re
.
match
(
r'.*[0-9]+:'
,
next_string
):
footnotes
.
append
(
footnote_string
)
footnote_string
=
next_string
else
:
footnote_string
+=
next_string
footnotes
.
append
(
footnote_string
)
if
contains_string
!=
''
:
footnotes
=
[
footnote_string
for
footnote_string
in
footnotes
if
contains_string
in
footnote_string
]
return
footnotes
if
__name__
==
"__main__"
:
sys
.
exit
(
main
(
sys
.
argv
[
1
:]))
Event Timeline
Log In to Comment