Page Menu
Home
c4science
Search
Configure Global Search
Log In
Files
F62629619
convert_wordPositions.py
No One
Temporary
Actions
Download File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Award Token
Subscribers
None
File Metadata
Details
File Info
Storage
Attached
Created
Tue, May 14, 11:21
Size
12 KB
Mime Type
text/x-python
Expires
Thu, May 16, 11:21 (1 d, 23 h)
Engine
blob
Format
Raw Data
Handle
17670078
Attached To
rNIETZSCHEPYTHON nietzsche-python
convert_wordPositions.py
View Options
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
""" This program can be used to convert the word positions to HTML for testing purposes.
"""
# Copyright (C) University of Basel 2019 {{{1
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <https://www.gnu.org/licenses/> 1}}}
import
re
import
getopt
import
sys
from
os
import
sep
,
listdir
,
mkdir
,
path
from
os.path
import
exists
,
isfile
,
isdir
from
lxml.html
import
builder
as
E
from
lxml.html
import
open_in_browser
import
lxml
import
xml.etree.ElementTree
as
ET
from
svgpathtools
import
svg_to_paths
from
myxmlwriter
import
write_pretty
from
datatypes.matrix
import
Matrix
from
datatypes.page
import
Page
from
datatypes.transkriptionField
import
TranskriptionField
from
datatypes.word
import
Word
__author__
=
"Christian Steiner"
__maintainer__
=
__author__
__copyright__
=
'University of Basel'
__email__
=
"christian.steiner@unibas.ch"
__status__
=
"Development"
__license__
=
"GPL v3"
__version__
=
"0.0.1"
class
Converter
:
"""The converter super class.
"""
def
__init__
(
self
,
page
,
non_testing
=
True
):
self
.
page
=
page
self
.
non_testing
=
non_testing
def
convert
(
self
,
output_file
=
None
):
"""Prints all words.
"""
first_word_of_line
=
None
out
=
sys
.
stdout
if
output_file
is
not
None
:
out
=
open
(
output_file
,
'w'
)
for
word
in
self
.
page
.
words
:
if
first_word_of_line
is
None
or
first_word_of_line
.
line_number
!=
word
.
line_number
:
out
.
write
(
'
\n
'
)
first_word_of_line
=
word
if
word
.
line_number
%
2
==
0
:
out
.
write
(
str
(
word
.
line_number
)
.
zfill
(
2
)
+
' '
)
else
:
out
.
write
(
' '
)
out
.
write
(
word
.
text
+
' '
)
out
.
close
()
@staticmethod
def
CREATE_CONVERTER
(
page
,
non_testing
=
True
,
converter_type
=
None
):
"""Returns a converter of type converter_type.
[:return:] SVGConverter for 'SVG', HTMLConverter for 'HTML', Converter for None
"""
if
converter_type
is
None
or
bool
(
re
.
search
(
r'T[E]*XT'
,
converter_type
)):
return
Converter
(
page
,
non_testing
)
elif
converter_type
==
'SVG'
:
return
SVGConverter
(
page
,
non_testing
)
else
:
return
HTMLConverter
(
page
,
non_testing
)
class
SVGConverter
(
Converter
):
"""This class can be used to convert a 'svgWordPositions' xml file to a svg file that combines text as path and text-as-text.
"""
BG_COLOR
=
'yellow'
OPACITY
=
'0.2'
def
__init__
(
self
,
page
,
non_testing
=
True
,
bg_color
=
BG_COLOR
,
opacity
=
OPACITY
):
Converter
.
__init__
(
self
,
page
,
non_testing
)
self
.
bg_color
=
bg_color
self
.
opacity
=
opacity
def
convert
(
self
,
output_file
=
None
):
"""Converts Page to SVG
"""
title
=
self
.
page
.
title
if
(
self
.
page
.
title
is
not
None
)
else
'Test Page'
title
=
'{}, S. {}'
.
format
(
title
,
self
.
page
.
number
)
if
(
self
.
page
.
number
is
not
None
)
else
title
transkription_field
=
TranskriptionField
(
self
.
page
.
svg_file
)
if
bool
(
transkription_field
.
get_svg_attributes
(
'xmlns'
)):
ET
.
register_namespace
(
''
,
transkription_field
.
get_svg_attributes
(
'xmlns'
))
if
bool
(
transkription_field
.
get_svg_attributes
(
'xmlns:xlink'
)):
ET
.
register_namespace
(
'xlink'
,
transkription_field
.
get_svg_attributes
(
'xmlns:xlink'
))
svg_tree
=
ET
.
parse
(
self
.
page
.
svg_file
)
transkription_node
=
ET
.
SubElement
(
svg_tree
.
getroot
(),
'g'
,
attrib
=
{
'id'
:
'Transkription'
})
colors
=
[
'yellow'
,
'orange'
]
color_index
=
0
for
word
in
self
.
page
.
words
:
for
transkription_position
in
word
.
transkription_positions
:
rect_node
=
ET
.
SubElement
(
transkription_node
,
'rect'
,
\
attrib
=
{
'id'
:
str
(
transkription_position
.
id
),
'x'
:
str
(
transkription_position
.
left
+
transkription_field
.
xmin
),
\
'y'
:
str
(
transkription_position
.
top
+
transkription_field
.
ymin
),
'width'
:
str
(
transkription_position
.
width
),
\
'height'
:
str
(
transkription_position
.
height
),
'fill'
:
colors
[
color_index
],
'opacity'
:
self
.
opacity
})
if
transkription_position
.
transform
is
not
None
:
matrix
=
transkription_position
.
transform
.
clone_transformation_matrix
()
matrix
.
matrix
[
Matrix
.
XINDEX
]
=
round
(
transkription_position
.
transform
.
matrix
[
Matrix
.
XINDEX
]
+
transkription_field
.
xmin
,
3
)
matrix
.
matrix
[
Matrix
.
YINDEX
]
=
round
(
transkription_position
.
transform
.
matrix
[
Matrix
.
YINDEX
]
+
transkription_field
.
ymin
,
3
)
rect_node
.
set
(
'transform'
,
matrix
.
toString
())
rect_node
.
set
(
'x'
,
str
(
round
(
transkription_position
.
left
-
transkription_position
.
transform
.
matrix
[
Matrix
.
XINDEX
],
3
)))
rect_node
.
set
(
'y'
,
str
(
round
((
transkription_position
.
height
-
1.5
)
*-
1
,
3
)))
ET
.
SubElement
(
rect_node
,
'title'
)
.
text
=
word
.
text
color_index
=
(
color_index
+
1
)
%
len
(
colors
)
if
output_file
is
not
None
:
svg_tree
.
write
(
output_file
)
class
HTMLConverter
(
Converter
):
"""This class can be used to convert a 'svgWordPositions' xml file to a test HTML file.
"""
CSS
=
""" .highlight0 { background-color: yellow; opacity: 0.2; }
.highlight1 { background-color: pink; opacity: 0.2; }
.previous { background-color: blue; opacity: 0.2; }
.next { background-color: cyan; opacity: 0.2; }
.inserted { background-color: green; opacity: 0.2; }
.inserted-head { background-color: orange; opacity: 0.2; }
.inserted-tail { background-color: red; opacity: 0.2; }
.inserted-head-tail { background-color: purple; opacity: 0.2; }
"""
def
__init__
(
self
,
page
,
non_testing
=
True
):
Converter
.
__init__
(
self
,
page
,
non_testing
)
def
convert
(
self
,
output_file
=
None
):
"""Converts Page to HTML
"""
title
=
self
.
page
.
title
if
(
self
.
page
.
title
is
not
None
)
else
'Test Page'
title
=
'{}, S. {}'
.
format
(
title
,
self
.
page
.
number
)
if
(
self
.
page
.
number
is
not
None
)
else
title
width
=
self
.
page
.
width
height
=
self
.
page
.
height
style_content
=
' position: relative; width: {}px; height: {}px; background-image: url({}); background-size: {}px {}px '
\
.
format
(
width
,
height
,
path
.
abspath
(
self
.
page
.
svg_file
),
width
,
height
)
style
=
E
.
STYLE
(
'#transkription {'
+
style_content
+
'}'
,
HTMLConverter
.
CSS
)
head
=
E
.
HEAD
(
E
.
TITLE
(
title
),
E
.
META
(
charset
=
'UTF-8'
),
style
)
transkription
=
E
.
DIV
(
id
=
"transkription"
)
counter
=
0
for
word
in
self
.
page
.
words
:
highlight_class
=
'highlight'
+
str
(
counter
)
if
word
.
is_before_inserted_words
:
highlight_class
=
'previous'
elif
word
.
is_after_inserted_words
:
highlight_class
=
'next'
elif
word
.
is_head_of_inserted_words
and
word
.
is_tail_of_inserted_words
:
highlight_class
=
'inserted-head-tail'
elif
word
.
is_head_of_inserted_words
:
highlight_class
=
'inserted-head'
elif
word
.
is_tail_of_inserted_words
:
highlight_class
=
'inserted-tail'
elif
word
.
word_insertion_mark
is
not
None
:
highlight_class
=
'inserted'
word_title
=
'{}: {}'
.
format
(
str
(
word
.
id
),
word
.
text
)
for
transkription_position
in
word
.
transkription_positions
:
style_content
=
'position:absolute; top:{0}px; left:{1}px; width:{2}px; height:{3}px;'
.
format
(
\
transkription_position
.
top
,
transkription_position
.
left
,
transkription_position
.
width
,
transkription_position
.
height
)
if
transkription_position
.
transform
is
not
None
:
style_content
=
style_content
+
' transform: {}; '
.
format
(
transkription_position
.
transform
.
toCSSTransformString
())
transform_origin_x
=
(
transkription_position
.
left
-
round
(
transkription_position
.
transform
.
getX
(),
1
))
*-
1
\
if
(
transkription_position
.
left
-
round
(
transkription_position
.
transform
.
getX
(),
1
))
*-
1
<
0
else
0
style_content
=
style_content
+
' transform-origin: {}px {}px; '
.
format
(
transform_origin_x
,
transkription_position
.
height
)
#link = E.A(lxml.html.fromstring('‌'), E.CLASS(highlight_class), title=word_title, style=style_content)
link
=
E
.
A
(
' '
,
E
.
CLASS
(
highlight_class
),
title
=
word_title
,
style
=
style_content
)
transkription
.
append
(
link
)
counter
=
(
counter
+
1
)
%
2
html
=
E
.
HTML
(
head
,
E
.
BODY
(
transkription
))
bool
(
self
.
non_testing
)
and
open_in_browser
(
html
)
if
output_file
is
not
None
:
with
open
(
output_file
,
'wb'
)
as
f
:
f
.
write
(
lxml
.
html
.
tostring
(
html
,
pretty_print
=
True
,
include_meta_content_type
=
True
,
encoding
=
'utf-8'
))
f
.
closed
def
usage
():
"""prints information on how to use the script
"""
print
(
main
.
__doc__
)
def
main
(
argv
):
"""This program can be used to convert the word positions to HTML for testing purposes.
svgscripts/convert_wordPositions.py [-h|--help, -H|--HTML, -o|--output=outputFile, -S|--SVG, -s|--svg=svgFile, -T|--TEXT, -t|--testing] <file>
-h|--help: show help
-H|--HTML [default] convert to HTML test file
-o|--output=outputFile save output to file outputFile
-S|--SVG convert to SVG test file
-s|--svg=svgFile: svg web file
-T|--TEXT convert to TEXT output
-t|--testing execute in test mode, do not write to file or open browser
:return: exit code (int)
"""
convert_to_type
=
None
svg_file
=
None
output_file
=
None
non_testing
=
True
page
=
None
try
:
opts
,
args
=
getopt
.
getopt
(
argv
,
"htHSTs:o:"
,
[
"help"
,
"testing"
,
"HTML"
,
"SVG"
,
"TEXT"
,
"svg="
,
"output="
])
except
getopt
.
GetoptError
:
usage
()
return
2
for
opt
,
arg
in
opts
:
if
opt
in
(
'-h'
,
'--help'
)
or
not
args
:
usage
()
return
0
elif
opt
in
(
'-S'
,
'--SVG'
):
convert_to_type
=
'SVG'
elif
opt
in
(
'-T'
,
'--TEXT'
):
convert_to_type
=
'TEXT'
elif
opt
in
(
'-H'
,
'--HTML'
):
convert_to_type
=
'HTML'
elif
opt
in
(
'-t'
,
'--testing'
):
non_testing
=
False
elif
opt
in
(
'-s'
,
'--svg'
):
svg_file
=
arg
elif
opt
in
(
'-o'
,
'--output'
):
output_file
=
arg
if
len
(
args
)
<
1
:
usage
()
return
2
if
convert_to_type
is
None
:
if
output_file
is
not
None
and
len
(
re
.
split
(
r'\.'
,
output_file
))
>
1
:
output_file_part_list
=
re
.
split
(
r'\.'
,
output_file
)
convert_to_type
=
output_file_part_list
[
len
(
output_file_part_list
)
-
1
]
.
upper
()
else
:
convert_to_type
=
'HTML'
for
word_position_file
in
args
:
if
not
isfile
(
word_position_file
):
print
(
"'{}' does not exist!"
.
format
(
word_position_file
))
return
2
if
svg_file
is
not
None
:
if
isfile
(
svg_file
):
page
=
Page
(
xml_source_file
=
word_position_file
,
svg_file
=
svg_file
)
else
:
print
(
"'{}' does not exist!"
.
format
(
word_position_file
))
return
2
else
:
page
=
Page
(
xml_source_file
=
word_position_file
)
if
page
.
svg_file
is
None
:
print
(
'Please specify a svg file!'
)
usage
()
return
2
converter
=
Converter
.
CREATE_CONVERTER
(
page
,
non_testing
=
non_testing
,
converter_type
=
convert_to_type
)
converter
.
convert
(
output_file
=
output_file
)
return
0
if
__name__
==
"__main__"
:
sys
.
exit
(
main
(
sys
.
argv
[
1
:]))
Event Timeline
Log In to Comment