Page Menu
Home
c4science
Search
Configure Global Search
Log In
Files
F86468005
test_join_faksimileAndTranskription.py
No One
Temporary
Actions
Download File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Award Token
Subscribers
None
File Metadata
Details
File Info
Storage
Attached
Created
Sun, Oct 6, 16:22
Size
7 KB
Mime Type
text/x-python
Expires
Tue, Oct 8, 16:22 (2 d)
Engine
blob
Format
Raw Data
Handle
21426442
Attached To
rNIETZSCHEPYTHON nietzsche-python
test_join_faksimileAndTranskription.py
View Options
import
unittest
from
os
import
sep
,
path
,
remove
from
os.path
import
isdir
,
isfile
,
dirname
import
shutil
import
sys
import
lxml.etree
as
ET
import
warnings
import
sys
sys
.
path
.
append
(
'svgscripts'
)
import
join_faksimileAndTranskription
from
datatypes.faksimile
import
FaksimilePage
from
datatypes.page
import
Page
from
datatypes.positional_word_part
import
PositionalWordPart
from
datatypes.transkriptionField
import
TranskriptionField
from
datatypes.word_position
import
WordPosition
class
TestJoin
(
unittest
.
TestCase
):
def
setUp
(
self
):
join_faksimileAndTranskription
.
UNITTESTING
=
True
DATADIR
=
path
.
dirname
(
__file__
)
+
sep
+
'test_data'
self
.
faksimile_dir
=
DATADIR
+
sep
+
'faksimile_svg'
self
.
manuscript
=
DATADIR
+
sep
+
'N_VII_1.xml'
self
.
manuscript_copy
=
self
.
manuscript
.
replace
(
'.'
,
'_copy.'
)
self
.
faksimile_file
=
self
.
faksimile_dir
+
sep
+
'N-VII-1,5et6.svg'
self
.
xml_file
=
DATADIR
+
sep
+
'N_VII_1_page005.xml'
self
.
Mp_XIV_1_mytest_421
=
DATADIR
+
sep
+
'Mp_XIV_1_mytest_421.xml'
self
.
correction_dir
=
DATADIR
+
sep
+
'correction_dir'
self
.
page138
=
DATADIR
+
sep
+
'N_VII_1_page138.xml'
def
test_sort_words
(
self
):
page
=
Page
(
self
.
Mp_XIV_1_mytest_421
)
words_line7
=
[
word
for
word
in
page
.
words
if
word
.
line_number
==
7
]
page
.
words
=
words_line7
sorted_words
=
join_faksimileAndTranskription
.
sort_words
(
page
)
self
.
assertEqual
(
len
(
sorted_words
),
len
(
words_line7
))
for
index
,
word
in
enumerate
(
words_line7
):
self
.
assertEqual
(
sorted_words
[
index
],
word
)
def
test_sort_faksimile_positions
(
self
):
faksimile_tree
=
ET
.
parse
(
self
.
faksimile_file
)
namespaces
=
{
k
if
k
is
not
None
else
'ns'
:
v
for
k
,
v
in
faksimile_tree
.
getroot
()
.
nsmap
.
items
()
}
faksimile_pages
=
FaksimilePage
.
GET_FAKSIMILEPAGES
(
faksimile_tree
,
namespaces
=
namespaces
)
self
.
assertEqual
(
len
(
faksimile_pages
),
2
)
svg_pos_file
,
manuscript_file
=
join_faksimileAndTranskription
.
get_svgPosFile_and_manuscriptFile
(
faksimile_pages
[
0
],
manuscript_file
=
self
.
manuscript
,
redo_ok
=
True
)
sorted_positions
=
join_faksimileAndTranskription
.
sort_faksimile_positions
(
faksimile_pages
[
0
]
.
word_positions
)
page
=
Page
(
svg_pos_file
)
#print(max(sorted_positions).text)
for
index
in
range
(
0
,
10
):
id
=
sorted_positions
[
index
]
.
id
if
len
(
faksimile_tree
.
getroot
()
.
xpath
(
'//ns:rect[@id="{0}"]/ns:title/text()|//ns:path[@id="{0}"]/ns:title/text()'
\
.
format
(
id
),
namespaces
=
namespaces
))
>
0
:
word_text
=
faksimile_tree
.
getroot
()
.
xpath
(
'//ns:rect[@id="{0}"]/ns:title/text()|//ns:path[@id="{0}"]/ns:title/text()'
\
.
format
(
id
),
namespaces
=
namespaces
)[
0
]
#print(sorted_positions[index].left, sorted_positions[index].top, word_text, page.words[index].text)
self
.
assertEqual
(
word_text
,
page
.
words
[
index
]
.
text
)
@unittest.skipUnless
(
__name__
==
"__main__"
,
'test uses path from within dir'
)
def
test_get_filelist_and_manuscript_file
(
self
):
file_list
,
manuscript_file
=
join_faksimileAndTranskription
.
get_filelist_and_manuscript_file
(
self
.
faksimile_dir
,
self
.
manuscript
)
self
.
assertEqual
(
len
(
file_list
),
2
)
self
.
assertEqual
(
file_list
[
0
],
self
.
faksimile_file
)
self
.
assertEqual
(
manuscript_file
,
self
.
manuscript
)
file_list
,
manuscript_file
=
join_faksimileAndTranskription
.
get_filelist_and_manuscript_file
(
self
.
manuscript
,
self
.
faksimile_file
)
self
.
assertEqual
(
len
(
file_list
),
1
)
self
.
assertEqual
(
file_list
[
0
],
self
.
faksimile_file
)
self
.
assertEqual
(
manuscript_file
,
self
.
manuscript
)
file_list
,
manuscript_file
=
join_faksimileAndTranskription
.
get_filelist_and_manuscript_file
(
self
.
manuscript
,
correction_dir
=
self
.
correction_dir
)
self
.
assertEqual
(
len
(
file_list
),
1
)
self
.
assertEqual
(
file_list
[
0
],
self
.
page138
)
@unittest.skipUnless
(
__name__
==
"__main__"
,
'test uses path from within dir'
)
def
test_get_svgPosFile_and_manuscriptFile
(
self
):
faksimile_tree
=
ET
.
parse
(
self
.
faksimile_file
)
faksimile_pages
=
FaksimilePage
.
GET_FAKSIMILEPAGES
(
faksimile_tree
)
self
.
assertEqual
(
len
(
faksimile_pages
),
2
)
svg_pos_file
,
manuscript_file
=
join_faksimileAndTranskription
.
get_svgPosFile_and_manuscriptFile
(
faksimile_pages
[
0
],
manuscript_file
=
self
.
manuscript
,
redo_ok
=
True
)
self
.
assertEqual
(
svg_pos_file
,
self
.
manuscript
.
replace
(
'.'
,
'_page00{}.'
.
format
(
faksimile_pages
[
0
]
.
page_number
)))
self
.
assertEqual
(
manuscript_file
,
self
.
manuscript
)
@unittest.skip
(
'join changed ... fix me'
)
def
test_join_faksimileAndTranskription
(
self
):
self
.
assertEqual
(
join_faksimileAndTranskription
.
join_faksimileAndTranskription
(
self
.
faksimile_file
,
self
.
manuscript
),
0
)
#self.assertEqual(join_faksimileAndTranskription.join_faksimileAndTranskription(self.faksimile_file, self.manuscript, test_word_text='gar'), 0)
@unittest.skip
(
'function update_writing_process is deprecated'
)
def
testupdate_writing_process
(
self
):
page
=
Page
(
self
.
xml_file
)
word
=
page
.
words
[
12
]
self
.
assertEqual
(
len
(
word
.
faksimile_positions
),
1
)
self
.
assertEqual
(
word
.
faksimile_positions
[
0
]
.
writing_process_id
,
-
1
)
join_faksimileAndTranskription
.
update_writing_process
(
word
)
self
.
assertEqual
(
word
.
faksimile_positions
[
0
]
.
writing_process_id
,
0
)
#@unittest.skipUnless(__name__ == "__main__", 'test takes too long, we do not run it with unittest discover')
@unittest.skip
(
'test takes too long, has been tested'
)
def
test_fix_errors
(
self
):
page
=
Page
(
self
.
xml_file
)
word_position
=
WordPosition
(
id
=
'rect945'
,
text
=
'Lenken'
)
exit_status
=
join_faksimileAndTranskription
.
fix_errors
(
self
.
faksimile_file
,
[
word_position
],
[
page
.
words
[
12
]],
xml_source_file
=
self
.
xml_file
,
manuscript_file
=
self
.
manuscript
)
self
.
assertEqual
(
exit_status
,
0
)
@unittest.skip
(
'tested with local file'
)
def
test_join_single_chars
(
self
):
page
=
Page
(
'xml/N_VII_1_page016.xml'
)
words
=
join_faksimileAndTranskription
.
sort_words
(
page
)
join_faksimileAndTranskription
.
join_single_char_words
(
words
)
new_words
=
[
word
for
word
in
words
if
word
.
text
==
'selber'
]
self
.
assertEqual
(
len
(
new_words
),
1
)
new_words
=
[
word
for
word
in
words
if
word
.
text
==
's'
]
self
.
assertEqual
(
len
(
new_words
),
0
)
def
test_get_mismatching_ids
(
self
):
page
=
Page
(
self
.
xml_file
)
word_position
=
WordPosition
(
id
=
'rect945'
,
text
=
'Lenken'
)
mwords
,
mfps
=
join_faksimileAndTranskription
.
get_mismatching_ids
([
page
.
words
[
12
]],
[
word_position
])
self
.
assertEqual
(
mwords
[
0
]
.
text
,
'Denken'
)
self
.
assertEqual
(
mfps
[
0
]
.
text
,
'Lenken'
)
@unittest.skip
(
'tested with local file'
)
def
test_a_file
(
self
):
#join_faksimileAndTranskription.UNITTESTING = False
join_faksimileAndTranskription
.
join_faksimileAndTranskription
(
'/home/knister0/ownCloud/nietzscheDE/Bearbeitung_Faksimile/Eric/Mp_XV/Kontrolle_und_Beschriftung_der_Wortrahmen/Fertig/Mp-XV-2d,3.svg'
,
'xml/Mp_XV.xml'
)
if
__name__
==
"__main__"
:
unittest
.
main
()
Event Timeline
Log In to Comment