Page Menu
Home
c4science
Search
Configure Global Search
Log In
Files
F61684119
test_process_words_post_merging.py
No One
Temporary
Actions
Download File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Award Token
Subscribers
None
File Metadata
Details
File Info
Storage
Attached
Created
Wed, May 8, 07:47
Size
4 KB
Mime Type
text/x-python
Expires
Fri, May 10, 07:47 (2 d)
Engine
blob
Format
Raw Data
Handle
17544668
Attached To
rNIETZSCHEPYTHON nietzsche-python
test_process_words_post_merging.py
View Options
import
unittest
from
os
import
sep
,
path
,
remove
from
os.path
import
isdir
,
isfile
,
dirname
import
shutil
import
sys
import
lxml.etree
as
ET
import
warnings
import
sys
sys
.
path
.
append
(
'svgscripts'
)
import
process_words_post_merging
from
datatypes.faksimile
import
FaksimilePage
from
datatypes.mark_foreign_hands
import
MarkForeignHands
from
datatypes.page
import
Page
from
datatypes.path
import
Path
from
datatypes.positional_word_part
import
PositionalWordPart
from
datatypes.text_connection_mark
import
TextConnectionMark
from
datatypes.transkriptionField
import
TranskriptionField
from
datatypes.word_position
import
WordPosition
class
TestPostMerge
(
unittest
.
TestCase
):
def
setUp
(
self
):
process_words_post_merging
.
UNITTESTING
=
True
DATADIR
=
path
.
dirname
(
__file__
)
+
sep
+
'test_data'
self
.
faksimile_dir
=
DATADIR
+
sep
+
'faksimile_svg'
self
.
manuscript
=
DATADIR
+
sep
+
'N_VII_1.xml'
self
.
manuscript_copy
=
self
.
manuscript
.
replace
(
'.'
,
'_copy.'
)
self
.
faksimile_file
=
self
.
faksimile_dir
+
sep
+
'N-VII-1,5et6.svg'
self
.
xml_file
=
DATADIR
+
sep
+
'N_VII_1_page005.xml'
self
.
Mp_XIV_1_mytest_421
=
DATADIR
+
sep
+
'Mp_XIV_1_mytest_421.xml'
self
.
test_tcm_xml
=
DATADIR
+
sep
+
'N_VII_1_page001.xml'
self
.
pdf_xml
=
DATADIR
+
sep
+
'W_I_8_page125.xml'
self
.
pdf_xml_source
=
DATADIR
+
sep
+
'W_I_8_neu_125-01.svg'
def
test_main
(
self
):
process_words_post_merging
.
main
([
self
.
manuscript
])
def
test_categorize_paths
(
self
):
page
=
Page
(
xml_source_file
=
self
.
pdf_xml
)
page
.
source
=
self
.
pdf_xml_source
tr
=
TranskriptionField
(
page
.
source
)
page
.
words
=
[
word
for
word
in
page
.
words
if
word
.
line_number
==
33
]
path_dict
=
process_words_post_merging
.
categorize_paths
(
page
,
tr
)
self
.
assertEqual
(
True
in
[
word
.
deleted
for
word
in
page
.
words
if
word
.
id
==
269
],
False
)
self
.
assertEqual
(
len
(
path_dict
.
get
(
'deletion_or_underline_paths'
))
>
0
,
True
)
self
.
assertEqual
(
len
(
path_dict
.
get
(
'box_paths'
)),
5
)
words
=
[
word
for
word
in
page
.
words
if
len
(
word
.
box_paths
)
>
0
]
self
.
assertEqual
(
len
(
words
),
1
)
self
.
assertEqual
(
words
[
0
]
.
word_parts
[
0
]
.
earlier_version
is
not
None
,
True
)
self
.
assertEqual
(
words
[
0
]
.
word_parts
[
0
]
.
earlier_version
.
text
,
')'
)
def
test_find_special_words
(
self
):
page
=
Page
(
xml_source_file
=
self
.
xml_file
)
process_words_post_merging
.
find_special_words
(
page
)
self
.
assertEqual
(
len
(
page
.
mark_foreign_hands
),
1
)
self
.
assertEqual
(
page
.
mark_foreign_hands
[
0
]
.
foreign_hands_text
,
'x'
)
page
.
update_and_attach_words2tree
()
nodes
=
page
.
page_tree
.
xpath
(
'//'
+
MarkForeignHands
.
XML_TAG
)
page
=
Page
(
xml_source_file
=
self
.
test_tcm_xml
)
process_words_post_merging
.
find_special_words
(
page
)
self
.
assertEqual
(
len
(
page
.
text_connection_marks
),
1
)
self
.
assertEqual
(
page
.
text_connection_marks
[
0
]
.
text_source
.
first_line
,
2
)
"""
page.update_and_attach_words2tree()
nodes = page.page_tree.xpath('//' + TextConnectionMark.XML_TAG)
print(ET.dump(nodes[0]))
"""
def
test_process_word_boxes
(
self
):
page
=
Page
(
xml_source_file
=
self
.
pdf_xml
)
page
.
source
=
self
.
pdf_xml_source
for
word
in
page
.
words
:
word
.
partition_according_to_writing_process_id
()
tr
=
TranskriptionField
(
page
.
source
)
box_path_d
=
[
'M 598.11,626.565 L 603.557,626.565 L 603.557,632.565 L 598.11,632.565 L 598.11,626.565'
,
\
'M 557.443,683.44 L 574.182,683.44 L 574.182,694.815 L 557.443,694.815 L 557.443,683.44'
,
\
'M 404.193,659.565 L 407.80699999999996,659.565 L 407.80699999999996,668.94 L 404.193,668.94 L 404.193,659.565'
,
\
'M 587.932,634.065 L 598.318,634.065 L 598.318,643.19 L 587.932,643.19 L 587.932,634.065'
,
\
'M 570.443,221.315 L 576.557,221.315 L 576.557,230.065 L 570.443,230.065 L 570.443,221.315'
]
box_paths
=
[
Path
(
d_string
=
d_string
)
for
d_string
in
box_path_d
]
process_words_post_merging
.
process_word_boxes
(
page
,
box_paths
,
tr
)
words_with_boxes
=
[
word
for
word
in
page
.
words
if
len
(
word
.
box_paths
)
>
0
]
self
.
assertEqual
(
len
(
words_with_boxes
),
5
)
def
test_update_writing_process_ids
(
self
):
page
=
Page
(
xml_source_file
=
self
.
pdf_xml
)
page
.
words
=
[
word
for
word
in
page
.
words
if
word
.
text
==
'Aber'
and
word
.
line_number
==
2
]
process_words_post_merging
.
update_writing_process_ids
(
page
)
self
.
assertEqual
(
len
(
page
.
words
[
0
]
.
word_parts
),
2
)
self
.
assertEqual
(
page
.
words
[
0
]
.
word_parts
[
0
]
.
writing_process_id
,
1
)
self
.
assertEqual
(
page
.
words
[
0
]
.
word_parts
[
1
]
.
writing_process_id
,
0
)
if
__name__
==
"__main__"
:
unittest
.
main
()
Event Timeline
Log In to Comment