Page Menu
Home
c4science
Search
Configure Global Search
Log In
Files
F74438684
test_compare_faksimile_words_line_wise.py
No One
Temporary
Actions
Download File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Award Token
Subscribers
None
File Metadata
Details
File Info
Storage
Attached
Created
Sat, Jul 27, 20:23
Size
4 KB
Mime Type
text/x-python
Expires
Mon, Jul 29, 20:23 (2 d)
Engine
blob
Format
Raw Data
Handle
19394042
Attached To
rNIETZSCHEPYTHON nietzsche-python
test_compare_faksimile_words_line_wise.py
View Options
import
unittest
from
os
import
sep
,
path
,
remove
from
os.path
import
isdir
,
isfile
,
dirname
import
shutil
import
sys
import
lxml.etree
as
ET
import
warnings
import
sys
sys
.
path
.
append
(
'svgscripts'
)
import
compare_faksimile_words_line_wise
from
datatypes.faksimile
import
FaksimilePage
from
datatypes.page
import
Page
from
datatypes.positional_word_part
import
PositionalWordPart
from
datatypes.transkriptionField
import
TranskriptionField
from
datatypes.word_position
import
WordPosition
class
TestCompareLineWise
(
unittest
.
TestCase
):
def
setUp
(
self
):
compare_faksimile_words_line_wise
.
UNITTESTING
=
True
DATADIR
=
path
.
dirname
(
__file__
)
+
sep
+
'test_data'
self
.
faksimile_dir
=
DATADIR
+
sep
+
'faksimile_svg'
self
.
manuscript
=
DATADIR
+
sep
+
'N_VII_1.xml'
self
.
manuscript_copy
=
self
.
manuscript
.
replace
(
'.'
,
'_copy.'
)
self
.
faksimile_file
=
self
.
faksimile_dir
+
sep
+
'N-VII-1,5et6.svg'
self
.
xml_file
=
DATADIR
+
sep
+
'N_VII_1_page005.xml'
self
.
Mp_XIV_1_mytest_421
=
DATADIR
+
sep
+
'Mp_XIV_1_mytest_421.xml'
self
.
correction_dir
=
DATADIR
+
sep
+
'correction_dir'
self
.
page138
=
DATADIR
+
sep
+
'N_VII_1_page138.xml'
def
test_mark_unmergeable_words_and_faksimile_positions
(
self
):
page
=
Page
(
self
.
xml_file
)
faksimile_page
=
FaksimilePage
.
get_faksimile_pages
(
self
.
faksimile_file
,
page_number
=
page
.
number
)[
0
]
compare_faksimile_words_line_wise
.
mark_unmergeable_words_and_faksimile_positions
(
page
.
words
,
faksimile_page
.
word_positions
)
word_gar
=
[
fp
for
fp
in
faksimile_page
.
word_positions
if
fp
.
text
==
'gar'
]
self
.
assertTrue
(
len
(
word_gar
)
==
1
)
self
.
assertFalse
(
word_gar
[
0
]
.
mergeable
)
#print([word.text for word in page.words if not word.mergeable])
#print([fp.text for fp in faksimile_page.word_positions if not fp.mergeable])
def
test_get_line
(
self
):
page
=
Page
(
self
.
xml_file
)
faksimile_page
=
FaksimilePage
.
get_faksimile_pages
(
self
.
faksimile_file
,
page_number
=
page
.
number
)[
0
]
flc
=
compare_faksimile_words_line_wise
.
FaksimileLineComposer
(
faksimile_page
.
word_positions
)
compare_faksimile_words_line_wise
.
mark_unmergeable_words_and_faksimile_positions
(
page
.
words
,
faksimile_page
.
word_positions
)
words_on_line
=
[
word
for
word
in
page
.
words
if
word
.
line_number
==
1
]
matched_line
=
flc
.
get_line
(
words_on_line
,
0
)
self
.
assertTrue
(
words_on_line
[
0
]
.
text
in
[
fp
.
text
for
fp
in
matched_line
])
words_on_line
[
0
]
.
joined
=
True
matched_line
[
0
]
.
joined
=
True
words_on_line
=
[
word
for
word
in
page
.
words
if
word
.
line_number
==
2
]
matched_line
=
flc
.
get_line
(
words_on_line
,
1
)
for
word
in
words_on_line
:
self
.
assertTrue
(
word
.
text
in
[
fp
.
text
for
fp
in
matched_line
])
words_on_line
=
[
word
for
word
in
page
.
words
if
word
.
line_number
==
42
]
matched_line
=
flc
.
get_line
(
words_on_line
,
22
)
self
.
assertTrue
(
len
(
matched_line
)
>
0
)
def
test_merge_line
(
self
):
page
=
Page
(
self
.
xml_file
)
faksimile_page
=
FaksimilePage
.
get_faksimile_pages
(
self
.
faksimile_file
,
page_number
=
page
.
number
)[
0
]
flc
=
compare_faksimile_words_line_wise
.
FaksimileLineComposer
(
faksimile_page
.
word_positions
)
compare_faksimile_words_line_wise
.
mark_unmergeable_words_and_faksimile_positions
(
page
.
words
,
faksimile_page
.
word_positions
)
words_on_line
=
[
word
for
word
in
page
.
words
if
word
.
line_number
==
42
]
new_words
=
[]
flc
.
merge_lines
(
words_on_line
,
new_words
,
index
=
22
)
for
word
in
new_words
:
self
.
assertTrue
(
len
(
word
.
faksimile_positions
)
>
0
)
self
.
assertEqual
(
len
([
word
for
word
in
words_on_line
if
not
word
.
joined
]),
0
)
def
test_merge_faksimile_positions_and_words
(
self
):
page
=
Page
(
self
.
xml_file
)
faksimile_page
=
FaksimilePage
.
get_faksimile_pages
(
self
.
faksimile_file
,
page_number
=
page
.
number
)[
0
]
flc
=
compare_faksimile_words_line_wise
.
FaksimileLineComposer
(
faksimile_page
.
word_positions
)
exit_code
=
compare_faksimile_words_line_wise
.
merge_faksimile_positions_and_words
(
page
,
faksimile_page
.
word_positions
)
self
.
assertEqual
(
exit_code
,
0
)
def
test_faksimile_line_composer
(
self
):
faksimile_page
=
FaksimilePage
.
get_faksimile_pages
(
self
.
faksimile_file
)[
0
]
flc
=
compare_faksimile_words_line_wise
.
FaksimileLineComposer
(
faksimile_page
.
word_positions
,
threshold
=
10
)
self
.
assertEqual
(
len
(
flc
.
lines_of_faksimile_positions
),
23
)
flc
=
compare_faksimile_words_line_wise
.
FaksimileLineComposer
(
faksimile_page
.
word_positions
,
threshold
=
20
,
num_lines_with_words
=
23
)
self
.
assertEqual
(
len
(
flc
.
lines_of_faksimile_positions
),
23
)
counter
=
200
while
counter
>
0
:
counter
-=
1
self
.
assertTrue
(
flc
.
get_next_faksimile
()
is
not
None
)
if
__name__
==
"__main__"
:
unittest
.
main
()
Event Timeline
Log In to Comment