Page Menu
Home
c4science
Search
Configure Global Search
Log In
Files
F62606865
test_page.py
No One
Temporary
Actions
Download File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Award Token
Subscribers
None
File Metadata
Details
File Info
Storage
Attached
Created
Tue, May 14, 07:35
Size
8 KB
Mime Type
text/x-python
Expires
Thu, May 16, 07:35 (2 d)
Engine
blob
Format
Raw Data
Handle
17667625
Attached To
rNIETZSCHEPYTHON nietzsche-python
test_page.py
View Options
import
unittest
from
os
import
sep
,
path
from
os.path
import
isdir
,
isfile
,
dirname
,
basename
import
lxml.etree
as
ET
import
sys
import
sys
sys
.
path
.
append
(
'svgscripts'
)
dir_changed
=
False
if
not
isdir
(
'datatypes'
):
sys
.
path
.
append
(
dirname
(
sys
.
path
[
0
]))
dir_changed
=
True
from
datatypes.lineNumber
import
LineNumber
from
datatypes.mark_foreign_hands
import
MarkForeignHands
from
datatypes.page
import
Page
from
datatypes.path
import
Path
from
datatypes.text_connection_mark
import
TextConnectionMark
from
datatypes.transkriptionField
import
TranskriptionField
from
datatypes.writing_process
import
WritingProcess
from
datatypes.word
import
Word
class
TestPage
(
unittest
.
TestCase
):
def
setUp
(
self
):
DATADIR
=
dirname
(
__file__
)
+
sep
+
'test_data'
if
not
isdir
(
DATADIR
):
DATADIR
=
dirname
(
dirname
(
__file__
))
+
sep
+
'test_data'
self
.
test_file
=
DATADIR
+
sep
+
'test.xml'
self
.
test_svg_file
=
DATADIR
+
sep
+
'test421.svg'
self
.
pdf_xml
=
DATADIR
+
sep
+
'W_I_8_page125.xml'
self
.
xml_file
=
DATADIR
+
sep
+
'N_VII_1_page005.xml'
self
.
xml_fileB
=
DATADIR
+
sep
+
'N_VII_1_page006.xml'
self
.
pdf_xml_source
=
DATADIR
+
sep
+
'W_I_8_neu_125-01.svg'
self
.
test_tcm_xml
=
DATADIR
+
sep
+
'N_VII_1_page001.xml'
self
.
test_manuscript
=
DATADIR
+
sep
+
'N_VII_1.xml'
def
test_Page
(
self
):
page
=
Page
(
xml_source_file
=
self
.
test_file
,
svg_file
=
self
.
test_svg_file
)
self
.
assertEqual
(
page
.
title
,
'Mp XIV 1'
)
self
.
assertEqual
(
page
.
number
,
'421'
)
self
.
assertEqual
(
len
(
page
.
sonderzeichen_list
),
2
)
self
.
assertEqual
(
'st21'
in
page
.
sonderzeichen_list
,
True
)
self
.
assertEqual
(
'st23'
in
page
.
sonderzeichen_list
,
True
)
self
.
assertEqual
(
page
.
style_dict
[
'st0'
][
'fill'
],
'#F8F9F8'
)
self
.
assertEqual
(
page
.
width
,
493.23
)
stage0
=
[
key
for
key
,
value
in
page
.
fontsizekey2stage_mapping
.
items
()
if
value
==
0
]
stage1
=
[
key
for
key
,
value
in
page
.
fontsizekey2stage_mapping
.
items
()
if
value
==
1
]
stage2
=
[
key
for
key
,
value
in
page
.
fontsizekey2stage_mapping
.
items
()
if
value
==
2
]
fontStage0
=
float
(
page
.
style_dict
.
get
(
stage0
[
0
])
.
get
(
'font-size'
)
.
replace
(
'px'
,
''
))
fontStage1
=
float
(
page
.
style_dict
.
get
(
stage1
[
0
])
.
get
(
'font-size'
)
.
replace
(
'px'
,
''
))
fontStage2
=
float
(
page
.
style_dict
.
get
(
stage2
[
0
])
.
get
(
'font-size'
)
.
replace
(
'px'
,
''
))
self
.
assertEqual
(
fontStage0
>
fontStage1
,
True
)
self
.
assertEqual
(
fontStage1
>
fontStage2
,
True
)
def
test_get_biggest_fontSize4styles
(
self
):
page
=
Page
(
xml_source_file
=
self
.
test_file
)
style_set
=
{
'st12'
,
'st2'
,
'st14'
,
'st13'
}
self
.
assertEqual
(
page
.
get_biggest_fontSize4styles
(
style_set
=
style_set
),
10
)
def
test_get_words
(
self
):
page
=
Page
(
xml_source_file
=
self
.
test_file
)
words
=
page
.
words
self
.
assertEqual
(
len
(
words
),
440
)
self
.
assertEqual
(
words
[
0
]
.
text
,
'$'
)
self
.
assertEqual
(
words
[
439
]
.
text
,
'mußte!'
)
def
test_create_writing_process
(
self
):
page
=
Page
(
xml_source_file
=
self
.
test_file
)
page
.
create_writing_processes_and_attach2tree
()
self
.
assertEqual
(
page
.
words
[
97
]
.
transkription_positions
[
0
]
.
writing_process_id
,
WritingProcess
.
LATER_INSERTION_AND_ADDITION
)
self
.
assertEqual
(
page
.
words
[
129
]
.
transkription_positions
[
0
]
.
writing_process_id
,
WritingProcess
.
LATER_INSERTION_AND_ADDITION
)
def
test_init_line_numbers
(
self
):
page
=
Page
(
xml_source_file
=
self
.
test_file
)
line_numbers
=
[
LineNumber
(
id
=
2
,
top
=
20
,
bottom
=
40
),
LineNumber
(
id
=
4
,
top
=
50
,
bottom
=
60
),
LineNumber
(
id
=
6
,
top
=
70
,
bottom
=
90
)
]
page
.
init_line_numbers
(
line_numbers
,
122.345
)
self
.
assertEqual
(
len
(
page
.
line_numbers
),
7
)
self
.
assertEqual
(
page
.
line_numbers
[
0
]
.
id
,
1
)
self
.
assertEqual
(
page
.
line_numbers
[
6
]
.
id
,
7
)
self
.
assertEqual
(
page
.
line_numbers
[
6
]
.
top
,
91
)
self
.
assertEqual
(
page
.
line_numbers
[
6
]
.
bottom
,
122.345
)
self
.
assertEqual
(
page
.
get_line_number
(
122
),
7
)
self
.
assertEqual
(
page
.
get_line_number
(
92
),
7
)
self
.
assertEqual
(
page
.
get_line_number
(
22
),
2
)
def
test_get_line_number
(
self
):
page
=
Page
(
xml_source_file
=
self
.
test_file
)
self
.
assertEqual
(
page
.
get_line_number
(
(
page
.
words
[
0
]
.
transkription_positions
[
0
]
.
bottom
+
page
.
words
[
0
]
.
transkription_positions
[
0
]
.
top
)
/
2
),
1
)
self
.
assertEqual
(
page
.
get_line_number
(
(
page
.
words
[
27
]
.
transkription_positions
[
0
]
.
bottom
+
page
.
words
[
27
]
.
transkription_positions
[
0
]
.
top
)
/
2
),
2
)
self
.
assertEqual
(
page
.
get_line_number
(
(
page
.
words
[
105
]
.
transkription_positions
[
0
]
.
bottom
+
page
.
words
[
105
]
.
transkription_positions
[
0
]
.
top
)
/
2
),
7
)
def
test_categorize_paths
(
self
):
Page
.
UNITTESTING
=
True
page
=
Page
(
xml_source_file
=
self
.
pdf_xml
)
page
.
source
=
self
.
pdf_xml_source
tr
=
TranskriptionField
(
page
.
source
)
page
.
words
=
[
word
for
word
in
page
.
words
if
word
.
line_number
==
33
]
path_dict
=
page
.
categorize_paths
(
tr
)
#print([ (len(path_dict.get(key)), key) for key in path_dict.keys() if len(path_dict.get(key)) > 0])
#print([(word.deleted, word.text, word.line_number) for word in page.words if word.id == 272 ])
self
.
assertEqual
(
True
in
[
word
.
deleted
for
word
in
page
.
words
if
word
.
id
==
269
],
False
)
self
.
assertEqual
(
len
(
path_dict
.
get
(
'deletion_or_underline_paths'
))
>
0
,
True
)
self
.
assertEqual
(
len
(
path_dict
.
get
(
'box_paths'
)),
5
)
def
test_find_special_words
(
self
):
page
=
Page
(
xml_source_file
=
self
.
xml_file
)
page
.
find_special_words
()
self
.
assertEqual
(
len
(
page
.
mark_foreign_hands
),
1
)
self
.
assertEqual
(
page
.
mark_foreign_hands
[
0
]
.
foreign_hands_text
,
'x'
)
page
.
update_and_attach_words2tree
()
nodes
=
page
.
page_tree
.
xpath
(
'//'
+
MarkForeignHands
.
XML_TAG
)
page
=
Page
(
xml_source_file
=
self
.
test_tcm_xml
)
page
.
find_special_words
()
self
.
assertEqual
(
len
(
page
.
text_connection_marks
),
1
)
self
.
assertEqual
(
page
.
text_connection_marks
[
0
]
.
text_source
.
first_line
,
2
)
"""
page.update_and_attach_words2tree()
nodes = page.page_tree.xpath('//' + TextConnectionMark.XML_TAG)
print(ET.dump(nodes[0]))
"""
def
test_update_page_type
(
self
):
page
=
Page
(
xml_source_file
=
self
.
pdf_xml
)
tf
=
TranskriptionField
(
self
.
pdf_xml_source
)
page
.
update_page_type
(
transkription_field
=
tf
)
self
.
assertEqual
(
page
.
page_type
,
Page
.
PAGE_VERSO
)
#page = Page(xml_source_file=self.xml_fileB)
#page.update_page_type()
#self.assertEqual(page.page_type, Page.PAGE_RECTO)
def
test_update_line_number_area
(
self
):
page
=
Page
(
xml_source_file
=
self
.
xml_file
)
transkription_field
=
TranskriptionField
(
page
.
source
)
page
.
update_line_number_area
(
transkription_field
)
self
.
assertEqual
(
transkription_field
.
line_number_area_width
>
0
,
True
)
self
.
assertEqual
(
transkription_field
.
line_number_area_width
<
15
,
True
)
page
=
Page
(
xml_source_file
=
self
.
xml_fileB
)
transkription_field
=
TranskriptionField
(
page
.
source
)
page
.
update_line_number_area
(
transkription_field
)
self
.
assertEqual
(
transkription_field
.
line_number_area_width
>
0
,
True
)
self
.
assertEqual
(
transkription_field
.
line_number_area_width
<
15
,
True
)
def
test_get_pages_from_xml_file
(
self
):
pages
=
Page
.
get_pages_from_xml_file
(
self
.
test_manuscript
)
self
.
assertEqual
(
len
(
pages
),
2
)
self
.
assertEqual
(
pages
[
0
]
.
number
,
'5'
)
self
.
assertEqual
(
pages
[
1
]
.
number
,
'6'
)
pages
=
Page
.
get_pages_from_xml_file
(
self
.
test_manuscript
,
status_contains
=
'faksimile merged'
)
self
.
assertEqual
(
len
(
pages
),
1
)
self
.
assertEqual
(
pages
[
0
]
.
number
,
'5'
)
def
test_process_word_boxes
(
self
):
page
=
Page
(
xml_source_file
=
self
.
pdf_xml
)
page
.
source
=
self
.
pdf_xml_source
for
word
in
page
.
words
:
word
.
partition_according_to_writing_process_id
()
tr
=
TranskriptionField
(
page
.
source
)
box_path_d
=
[
'M 598.11,626.565 L 603.557,626.565 L 603.557,632.565 L 598.11,632.565 L 598.11,626.565'
,
\
'M 557.443,683.44 L 574.182,683.44 L 574.182,694.815 L 557.443,694.815 L 557.443,683.44'
,
\
'M 404.193,659.565 L 407.80699999999996,659.565 L 407.80699999999996,668.94 L 404.193,668.94 L 404.193,659.565'
,
\
'M 587.932,634.065 L 598.318,634.065 L 598.318,643.19 L 587.932,643.19 L 587.932,634.065'
,
\
'M 570.443,221.315 L 576.557,221.315 L 576.557,230.065 L 570.443,230.065 L 570.443,221.315'
]
box_paths
=
[
Path
(
d_string
=
d_string
)
for
d_string
in
box_path_d
]
page
.
process_word_boxes
(
box_paths
,
tr
)
if
__name__
==
"__main__"
:
unittest
.
main
()
Event Timeline
Log In to Comment