Page Menu
Home
c4science
Search
Configure Global Search
Log In
Files
F62963898
test_word.py
No One
Temporary
Actions
Download File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Award Token
Subscribers
None
File Metadata
Details
File Info
Storage
Attached
Created
Thu, May 16, 19:53
Size
27 KB
Mime Type
text/x-python
Expires
Sat, May 18, 19:53 (2 d)
Engine
blob
Format
Raw Data
Handle
17713434
Attached To
rNIETZSCHEPYTHON nietzsche-python
test_word.py
View Options
import
unittest
from
os
import
sep
,
path
import
lxml.etree
as
ET
import
sys
sys
.
path
.
append
(
'svgscripts'
)
from
process_words_post_merging
import
reset_page
,
update_writing_process_ids
from
datatypes.box
import
Box
from
datatypes.manuscript
import
ArchivalManuscriptUnity
from
datatypes.matrix
import
Matrix
import
datatypes.page
from
datatypes.path
import
Path
from
datatypes.positional_word_part
import
PositionalWordPart
from
datatypes.style
import
Style
from
datatypes.transkriptionField
import
TranskriptionField
from
datatypes.transkription_position
import
TranskriptionPosition
from
datatypes.word
import
Word
,
execute_function_on_parts
,
update_transkription_position_ids
from
datatypes.word_deletion_path
import
WordDeletionPath
from
datatypes.word_position
import
WordPosition
sys
.
path
.
append
(
'py2ttl'
)
from
class_spec
import
SemanticClass
class
Page
:
def
__init__
(
self
):
self
.
svg_file
=
None
def
get_line_number
(
self
,
input
=
0
):
return
-
1
def
get_biggest_fontSize4styles
(
self
,
style_set
=
{}):
return
7
class
TestWord
(
unittest
.
TestCase
):
TESTCASE
=
None
def
setUp
(
self
):
DATADIR
=
path
.
dirname
(
__file__
)
+
sep
+
'test_data'
self
.
test_file
=
DATADIR
+
sep
+
'N_VII_1_page009.xml'
self
.
word_deletion_path_file
=
DATADIR
+
sep
+
'N_VII_1_page138.xml'
self
.
pdf_xml
=
DATADIR
+
sep
+
'W_I_8_page125.xml'
self
.
pdf_xml_source
=
DATADIR
+
sep
+
'W_I_8_neu_125-01.svg'
self
.
word_part_objs
=
[{
'text'
:
'a'
},
{
'text'
:
'b'
},
{
'text'
:
'c'
}]
x
=
0
for
dict
in
self
.
word_part_objs
:
dict
[
'class'
]
=
'st22'
dict
[
'x'
]
=
x
dict
[
'y'
]
=
11
x
+=
1
mylist
=
{
'text'
:
'abc'
,
'id'
:
'0'
,
'line-number'
:
'2'
,
'deleted'
:
'true'
}
word_position
=
TranskriptionPosition
(
x
=
0
,
y
=
1
,
height
=
10
,
width
=
10
,
matrix
=
Matrix
(
'matrix(0.94 0.342 -0.342 0.94 0 0)'
))
self
.
transkription_positions
=
[
word_position
]
self
.
word_node
=
ET
.
Element
(
'word'
,
attrib
=
mylist
)
word_position
.
attach_object_to_tree
(
self
.
word_node
)
x
=
0
for
char
in
mylist
[
'text'
]:
ET
.
SubElement
(
self
.
word_node
,
'part'
,
attrib
=
{
'text'
:
char
,
'x'
:
str
(
x
),
'y'
:
'11'
,
'class'
:
'st22'
})
x
+=
1
def
test_add_deletion_paths
(
self
):
page
=
datatypes
.
page
.
Page
(
self
.
word_deletion_path_file
,
add_deletion_paths_to_words
=
False
)
word
=
[
word
for
word
in
page
.
words
if
word
.
text
==
'AufBau'
][
0
]
#self.assertTrue(word.deleted)
self
.
assertTrue
(
len
(
word
.
word_parts
)
>
0
)
self
.
assertTrue
(
word
.
word_parts
[
0
]
.
deleted
)
word
.
add_deletion_paths
(
page
.
word_deletion_paths
,
tr_xmin
=
28.347656
,
tr_ymin
=
49.921875
)
self
.
assertTrue
(
len
(
word
.
word_parts
[
0
]
.
deletion_paths
)
>
0
)
#print(word.deletion_paths)
"""
page = datatypes.page.Page('xml/Mp_XIV_page420.xml')
words = [ word for word in page.words if word.deleted or True in [ part.deleted for part in word.word_parts ]]
words[0].add_deletion_paths(page.word_deletion_paths)
print(words[0].deletion_paths)
"""
def
test_join_words
(
self
):
words
=
[
Word
(
id
=
4
,
text
=
'asdf-'
,
line_number
=
1
,
deleted
=
True
),
Word
(
id
=
5
,
text
=
'bsdf'
,
line_number
=
2
,
deleted
=
False
)
]
new_word
=
Word
.
join_words
(
words
)
self
.
assertEqual
(
new_word
.
id
,
4
)
self
.
assertEqual
(
new_word
.
text
,
'asdf-bsdf'
)
self
.
assertEqual
(
new_word
.
edited_text
,
'asdfbsdf'
)
self
.
assertEqual
(
new_word
.
deleted
,
False
)
self
.
assertEqual
(
new_word
.
line_number
,
-
1
)
words
=
[
Word
(
id
=
1
,
word_parts
=
[
Word
(
id
=
4
,
text
=
'asdf-'
,
line_number
=
1
,
deleted
=
True
),
Word
(
id
=
5
,
text
=
'bsdf'
,
line_number
=
2
,
deleted
=
False
)]),
\
Word
(
id
=
4
,
text
=
'.'
,
line_number
=
2
,
deleted
=
True
),
Word
(
id
=
5
,
text
=
'.'
,
line_number
=
2
,
deleted
=
False
)
]
new_word
=
Word
.
join_words
(
words
)
self
.
assertEqual
(
new_word
.
text
,
'asdf-bsdf..'
)
new_word
=
Word
.
join_words
(
words
,
add_white_space_between_words
=
True
)
self
.
assertEqual
(
new_word
.
text
,
'asdf- bsdf . .'
)
def
test_Word_with_word_part_objs
(
self
):
word
=
Word
.
CREATE_WORD
(
word_part_objs
=
self
.
word_part_objs
,
height
=
10
,
endX
=
10
)
self
.
assertEqual
(
word
.
id
,
0
)
self
.
assertEqual
(
word
.
transkription_positions
[
0
]
.
bottom
,
13
)
self
.
assertEqual
(
word
.
transkription_positions
[
0
]
.
height
,
10
)
self
.
assertEqual
(
word
.
transkription_positions
[
0
]
.
top
,
3
)
self
.
assertEqual
(
word
.
transkription_positions
[
0
]
.
left
,
0
)
self
.
assertEqual
(
word
.
transkription_positions
[
0
]
.
width
,
10
)
self
.
assertEqual
(
word
.
text
,
'abc'
)
def
test_Word_with_word_node
(
self
):
word
=
Word
.
create_cls
(
self
.
word_node
)
self
.
assertEqual
(
word
.
id
,
0
)
self
.
assertEqual
(
word
.
deleted
,
True
)
self
.
assertEqual
(
word
.
transkription_positions
[
0
]
.
bottom
,
11
)
self
.
assertEqual
(
word
.
transkription_positions
[
0
]
.
height
,
10
)
self
.
assertEqual
(
word
.
transkription_positions
[
0
]
.
top
,
1
)
self
.
assertEqual
(
word
.
transkription_positions
[
0
]
.
left
,
0
)
self
.
assertEqual
(
word
.
transkription_positions
[
0
]
.
width
,
10
)
self
.
assertEqual
(
word
.
text
,
'abc'
)
self
.
assertEqual
(
word
.
line_number
,
2
)
self
.
assertEqual
(
word
.
transkription_positions
[
0
]
.
transform
.
isRotationMatrix
(),
True
)
def
test_attach_word_to_tree
(
self
):
newWord
=
Word
.
CREATE_WORD
(
word_part_objs
=
self
.
word_part_objs
,
height
=
10
,
endX
=
10
)
empty_tree
=
ET
.
ElementTree
(
ET
.
Element
(
'page'
))
newWord
.
attach_word_to_tree
(
empty_tree
)
for
word_node
in
empty_tree
.
getroot
()
.
xpath
(
'//word'
):
word
=
Word
.
CREATE_WORD
(
word_node
=
word_node
)
self
.
assertEqual
(
word
.
id
,
0
)
self
.
assertEqual
(
word
.
deleted
,
False
)
self
.
assertEqual
(
word
.
transkription_positions
[
0
]
.
bottom
,
13
)
self
.
assertEqual
(
word
.
transkription_positions
[
0
]
.
height
,
10
)
self
.
assertEqual
(
word
.
transkription_positions
[
0
]
.
top
,
3
)
self
.
assertEqual
(
word
.
transkription_positions
[
0
]
.
left
,
0
)
self
.
assertEqual
(
word
.
transkription_positions
[
0
]
.
width
,
10
)
self
.
assertEqual
(
word
.
text
,
'abc'
)
@unittest.skipUnless
(
TESTCASE
is
None
or
TESTCASE
==
0
,
'Not testing this case'
)
def
test_create_correction_history_case0
(
self
):
# Case 1: whole word over box
box
=
Box
(
earlier_text
=
'XYX'
)
word
=
Word
(
text
=
'ASDF'
,
transkription_positions
=
[
TranskriptionPosition
()])
word
.
word_box
=
box
word
.
create_correction_history
()
self
.
assertEqual
(
word
.
earlier_version
is
None
,
True
)
self
.
assertEqual
(
word
.
overwrites_word
is
not
None
,
True
)
@unittest.skipUnless
(
TESTCASE
is
None
or
TESTCASE
==
1
,
'Not testing this case'
)
def
test_create_correction_history_case1
(
self
):
# Case 2: part of word over box
box
=
Box
(
earlier_text
=
'XYX'
)
partA
=
Word
(
text
=
'A'
,
transkription_positions
=
[
TranskriptionPosition
()])
partA
.
word_box
=
box
partB
=
Word
(
text
=
'SDF'
,
transkription_positions
=
[
TranskriptionPosition
()])
word
=
Word
(
text
=
'ASDF'
,
word_parts
=
[
partA
,
partB
])
word
.
create_correction_history
()
self
.
assertEqual
(
word
.
earlier_version
is
None
,
True
)
self
.
assertEqual
(
word
.
word_parts
[
0
]
.
overwrites_word
is
not
None
,
True
)
@unittest.skipUnless
(
TESTCASE
is
None
or
TESTCASE
==
2
,
'Not testing this case'
)
def
test_create_correction_history_case3
(
self
):
# Case 3: part of word over box, word under box is part of earlier version
box
=
Box
(
earlier_text
=
'XYX'
)
tp0
=
TranskriptionPosition
()
tp0
.
style
=
Style
(
writing_process_id
=
0
)
tp1
=
TranskriptionPosition
()
tp1
.
style
=
Style
(
writing_process_id
=
1
)
partA
=
Word
(
id
=
0
,
text
=
'Test'
,
transkription_positions
=
[
tp0
])
partB
=
Word
(
id
=
1
,
text
=
'er'
,
transkription_positions
=
[
tp1
])
partB
.
word_box
=
box
word
=
Word
(
text
=
'Tester'
,
writing_process_id
=
1
,
word_parts
=
[
partA
,
partB
]
)
word
.
create_correction_history
(
box_style
=
tp0
.
style
)
self
.
assertEqual
(
word
.
text
,
'Tester'
)
self
.
assertEqual
(
word
.
earlier_version
is
not
None
,
True
)
self
.
assertEqual
(
word
.
earlier_version
.
text
,
'TestXYX'
)
self
.
assertEqual
(
word
.
word_parts
[
1
]
.
isTransformationOfWord
,
word
.
earlier_version
.
word_parts
[
1
])
@unittest.skipUnless
(
TESTCASE
is
None
or
TESTCASE
==
3
,
'Not testing this case'
)
def
test_create_correction_history_case4
(
self
):
# Case 4: part of word is deleted
partA
=
Word
(
id
=
0
,
text
=
'A'
,
deleted
=
True
,
transkription_positions
=
[
TranskriptionPosition
()])
partB
=
Word
(
id
=
1
,
text
=
'SDF'
,
transkription_positions
=
[
TranskriptionPosition
()])
word
=
Word
(
text
=
'ASDF'
,
word_parts
=
[
partA
,
partB
])
word
.
create_correction_history
()
self
.
assertEqual
(
word
.
earlier_version
is
not
None
,
True
)
self
.
assertEqual
(
word
.
word_parts
[
0
]
.
isDeletionOfWord
is
not
None
,
True
)
self
.
assertEqual
(
word
.
word_parts
[
0
]
.
isDeletionOfWord
,
word
.
earlier_version
.
word_parts
[
0
])
self
.
assertEqual
(
word
.
edited_text
,
'SDF'
)
@unittest.skipUnless
(
TESTCASE
is
None
or
TESTCASE
==
4
,
'Not testing this case'
)
def
test_create_correction_history_case5
(
self
):
tp0
=
TranskriptionPosition
()
tp0
.
style
=
Style
(
writing_process_id
=
0
)
tp1
=
TranskriptionPosition
()
tp1
.
style
=
Style
(
writing_process_id
=
1
)
partA
=
Word
(
id
=
0
,
text
=
'Test'
,
transkription_positions
=
[
tp0
])
partB
=
Word
(
id
=
1
,
text
=
'er'
,
transkription_positions
=
[
tp1
])
word
=
Word
(
text
=
'Tester'
,
word_parts
=
[
partA
,
partB
]
)
word
.
create_correction_history
()
self
.
assertEqual
(
word
.
earlier_version
is
not
None
,
True
)
self
.
assertEqual
(
word
.
word_parts
[
1
]
.
extendsEarlierVersion
,
True
)
self
.
assertEqual
(
word
.
word_parts
[
1
]
.
isExtensionOfWord
,
word
.
earlier_version
)
#@unittest.skipUnless(TESTCASE is None or TESTCASE == 5, 'Not testing this case')
#@unittest.skip('case tested, relies on a local xml file')
def
test_create_correction_history_case_full
(
self
):
page
=
datatypes
.
page
.
Page
(
'xml/N_VII_1_page138.xml'
)
manuscript
=
ArchivalManuscriptUnity
()
reset_page
(
page
)
update_writing_process_ids
(
page
)
word
=
[
word
for
word
in
page
.
words
if
word
.
text
==
'Verschiedenes'
and
word
.
line_number
==
4
][
0
]
wordAufBau
=
[
word
for
word
in
page
.
words
if
word
.
text
==
'AufBau'
][
0
]
#page.words = [ word ]
page
.
update_styles
(
manuscript
=
manuscript
,
partition_according_to_styles
=
True
)
word
.
word_parts
[
0
]
.
transkription_positions
[
0
]
.
has_box
=
Box
(
earlier_text
=
'v'
)
self
.
assertEqual
(
len
(
word
.
word_parts
),
2
)
word_over_box
=
word
.
_get_partial_word_over_box
()
update_transkription_position_ids
(
word
)
word
.
create_correction_history
(
page
)
self
.
assertEqual
(
word
.
writing_process_id
,
1
)
self
.
assertEqual
(
word
.
earlier_version
is
not
None
,
True
)
self
.
assertEqual
(
word
.
earlier_version
.
text
,
'verschiedenes'
)
#print(word.earlier_version.id, [ (w.id, w.text) for w in word.earlier_version.word_parts ])
empty_tree
=
ET
.
ElementTree
(
ET
.
Element
(
'page'
))
word_node
=
word
.
attach_word_to_tree
(
empty_tree
)
#print(ET.dump(word_node))
"""
self.assertEqual(word.word_parts[0].isDeletionOfWord, word.earlier_version.word_parts[0])
self.assertEqual(word.word_parts[1].isTransformationOfWord, word.earlier_version.word_parts[1])
self.assertEqual(word.word_parts[1].overwrites_word is not None, True)
"""
word
=
wordAufBau
page
.
words
=
[
word
]
page
.
update_styles
(
manuscript
=
manuscript
,
partition_according_to_styles
=
True
)
word
.
word_parts
[
0
]
.
deleted
=
True
word
.
word_parts
[
1
]
.
transkription_positions
[
0
]
.
has_box
=
Box
(
earlier_text
=
'b'
)
self
.
assertEqual
(
len
(
word
.
word_parts
),
3
)
word_over_box
=
word
.
_get_partial_word_over_box
()
self
.
assertEqual
(
len
(
word
.
word_parts
),
3
)
update_transkription_position_ids
(
word
)
word
.
create_correction_history
(
page
)
self
.
assertEqual
(
word
.
writing_process_id
,
2
)
self
.
assertEqual
(
word
.
earlier_version
is
not
None
,
True
)
self
.
assertEqual
(
word
.
text
,
'AufBau'
)
self
.
assertEqual
(
word
.
edited_text
,
'Bau'
)
self
.
assertEqual
(
word
.
earlier_version
.
text
,
'Aufbau'
)
self
.
assertEqual
(
word
.
word_parts
[
0
]
.
isDeletionOfWord
,
word
.
earlier_version
.
word_parts
[
0
])
self
.
assertEqual
(
word
.
word_parts
[
1
]
.
isTransformationOfWord
,
word
.
earlier_version
.
word_parts
[
1
])
self
.
assertEqual
(
word
.
word_parts
[
1
]
.
overwrites_word
is
not
None
,
True
)
empty_tree
=
ET
.
ElementTree
(
ET
.
Element
(
'page'
))
word_node
=
word
.
attach_word_to_tree
(
empty_tree
)
#print(ET.dump(word_node))
newWord
=
Word
.
create_cls
(
word_node
)
#@unittest.skip('')
def
test_earlier_version
(
self
):
partA
=
Word
(
id
=
0
,
text
=
'A'
,
deleted
=
True
,
transkription_positions
=
[
TranskriptionPosition
()])
partB
=
Word
(
id
=
1
,
text
=
'SDF'
,
transkription_positions
=
[
TranskriptionPosition
()])
word
=
Word
(
text
=
'ASDF'
,
word_parts
=
[
partA
,
partB
])
earlier_version
=
word
.
create_earlier_version
()
self
.
assertEqual
(
earlier_version
is
not
None
,
True
)
self
.
assertEqual
(
word
.
word_parts
[
0
]
.
isDeletionOfWord
is
not
None
,
True
)
self
.
assertEqual
(
word
.
word_parts
[
0
]
.
isDeletionOfWord
,
earlier_version
.
word_parts
[
0
])
def
test_undo_partitioning
(
self
):
tps
=
[]
for
i
,
xy
in
enumerate
([
3
,
4
,
5
]):
tps
.
append
(
TranskriptionPosition
(
id
=
i
,
x
=
xy
,
y
=
xy
,
height
=
10
,
width
=
10
))
partA
=
Word
(
id
=
0
,
text
=
'Auf'
,
writing_process_id
=
1
,
deleted
=
True
,
transkription_positions
=
[
tps
[
0
]])
partB
=
Word
(
id
=
1
,
text
=
'B'
,
writing_process_id
=
2
,
transkription_positions
=
[
tps
[
1
]])
partC
=
Word
(
id
=
2
,
text
=
'au'
,
writing_process_id
=
1
,
transkription_positions
=
[
tps
[
2
]])
word
=
Word
(
text
=
'Aufbau'
,
writing_process_id
=
2
,
word_parts
=
[
partA
,
partB
,
partC
]
)
word
.
undo_partitioning
()
self
.
assertEqual
(
len
(
word
.
transkription_positions
),
len
(
tps
))
self
.
assertEqual
(
len
(
word
.
word_parts
),
0
)
"""
page = datatypes.page.Page('xml/N_VII_1_page138.xml')
word = page.words[77]
word.undo_partitioning()
self.assertEqual(len(word.word_parts), 0)
self.assertEqual(len(word.transkription_positions), 3)
update_transkription_position_ids(word)
empty_tree = ET.ElementTree(ET.Element('page'))
word_node = word.attach_word_to_tree(empty_tree)
print(ET.dump(word_node))
"""
def
test_split
(
self
):
page
=
Page
()
pwps
=
PositionalWordPart
.
CREATE_SIMPLE_POSITIONAL_WORD_PART_LIST
(
page
,
self
.
word_part_objs
)
transkription_positions
=
TranskriptionPosition
.
CREATE_TRANSKRIPTION_POSITION_LIST_FROM_PWPS
(
pwps
)
word
=
Word
(
text
=
''
.
join
([
pwp
.
text
for
pwp
in
pwps
]),
transkription_positions
=
transkription_positions
)
previousWord
,
currentWord
,
nextWord
=
word
.
split
(
'b'
)
self
.
assertEqual
(
previousWord
.
id
,
0
)
self
.
assertEqual
(
previousWord
.
text
,
'a'
)
self
.
assertEqual
(
currentWord
.
id
,
1
)
self
.
assertEqual
(
nextWord
.
id
,
2
)
word
=
Word
(
text
=
''
.
join
([
pwp
.
text
for
pwp
in
pwps
]),
transkription_positions
=
transkription_positions
)
previousWord
,
currentWord
,
nextWord
=
word
.
split
(
'bc'
)
self
.
assertEqual
(
previousWord
.
id
,
0
)
self
.
assertEqual
(
previousWord
.
text
,
'a'
)
self
.
assertEqual
(
currentWord
.
id
,
1
)
word
=
Word
(
text
=
''
.
join
([
pwp
.
text
for
pwp
in
pwps
]),
transkription_positions
=
transkription_positions
)
previousWord
,
currentWord
,
nextWord
=
word
.
split
(
'ab'
,
start_id
=
10
)
self
.
assertEqual
(
currentWord
.
id
,
10
)
self
.
assertEqual
(
currentWord
.
text
,
'ab'
)
self
.
assertEqual
(
currentWord
.
transkription_positions
[
0
]
.
width
,
2.1
)
self
.
assertEqual
(
nextWord
.
id
,
11
)
self
.
assertEqual
(
nextWord
.
transkription_positions
[
0
]
.
width
,
5.2
)
word_part_objs
=
[{
'text'
:
'x'
,
'class'
:
'st22'
,
'x'
:
0
,
'y'
:
0
},
\
{
'text'
:
'Insofern'
,
'class'
:
'st22'
,
'x'
:
1
,
'y'
:
0
},
\
{
'text'
:
'x'
,
'class'
:
'st22'
,
'x'
:
10
,
'y'
:
0
}]
pwps
=
PositionalWordPart
.
CREATE_SIMPLE_POSITIONAL_WORD_PART_LIST
(
page
,
word_part_objs
)
transkription_positions
=
TranskriptionPosition
.
CREATE_TRANSKRIPTION_POSITION_LIST_FROM_PWPS
(
pwps
)
word
=
Word
(
text
=
''
.
join
([
pwp
.
text
for
pwp
in
pwps
]),
transkription_positions
=
transkription_positions
)
with
self
.
assertWarns
(
Warning
):
previousWord
,
currentWord
,
nextWord
=
word
.
split
(
'Insofer'
)
word_part_objs
=
[{
'text'
:
'xInsofern'
,
'class'
:
'st22'
,
'x'
:
0
,
'y'
:
0
}]
pwps
=
PositionalWordPart
.
CREATE_SIMPLE_POSITIONAL_WORD_PART_LIST
(
page
,
word_part_objs
)
transkription_positions
=
TranskriptionPosition
.
CREATE_TRANSKRIPTION_POSITION_LIST_FROM_PWPS
(
pwps
)
word
=
Word
(
text
=
''
.
join
([
pwp
.
text
for
pwp
in
pwps
]),
transkription_positions
=
transkription_positions
)
with
self
.
assertWarns
(
Warning
):
previousWord
,
currentWord
,
nextWord
=
word
.
split
(
'Insofern'
)
def
test_join
(
self
):
word
=
Word
.
CREATE_WORD
(
word_part_objs
=
self
.
word_part_objs
,
height
=
10
,
endX
=
10
)
other_word
=
Word
.
CREATE_WORD
(
word_part_objs
=
[{
'text'
:
'.'
,
'class'
:
'st22'
,
'x'
:
3
,
'y'
:
11
}])
word
.
join
(
other_word
,
add_white_space_between_words
=
True
)
self
.
assertEqual
(
word
.
text
,
'abc .'
)
word
=
Word
.
CREATE_WORD
(
word_part_objs
=
self
.
word_part_objs
,
height
=
10
,
endX
=
10
)
other_word
=
Word
.
CREATE_WORD
(
word_part_objs
=
[{
'text'
:
'.'
,
'class'
:
'st22'
,
'x'
:
3
,
'y'
:
11
}])
word
.
join
(
other_word
)
self
.
assertEqual
(
word
.
text
,
'abc.'
)
other_word
=
Word
.
CREATE_WORD
(
word_part_objs
=
[{
'text'
:
'.'
,
'class'
:
'st22'
,
'x'
:
3
,
'y'
:
11
}])
word
.
join
(
other_word
,
append_at_end_of_new_word
=
False
)
self
.
assertEqual
(
word
.
text
,
'.abc.'
)
"""
tree = ET.ElementTree(ET.Element('page'))
word.attach_word_to_tree(tree)
print(ET.dump(tree.getroot()))
"""
def
test_get_semanticAndDataDict
(
self
):
dictionary
=
Word
.
get_semantic_dictionary
()
#print(dictionary)
info_dict
=
dictionary
[
'properties'
]
.
get
(
'isDeletionOfWord'
)
self
.
assertEqual
(
SemanticClass
.
SUPER_PROPERTY
in
info_dict
.
keys
(),
True
)
super_info_dict
=
info_dict
[
SemanticClass
.
SUPER_PROPERTY
]
#print(info_dict[SemanticClass.SUPER_PROPERTY].get(SemanticClass.PROPERTY_NAME))
def
test_simplify_transkription_positions
(
self
):
node_string
=
"""<transkription-position bottom="234.0" height="7.328" id="0" left="144.925" top="225.672" width="4.703" writing-process-id="0">
<word-part bottom="234.0" height="7.328" id="0" left="144.925" style-class="st11 st12" symbol-id="glyph6-7" text="S" top="226.672" width="4.703"/>
</transkription-position> """
nodeA
=
ET
.
fromstring
(
node_string
)
node_string
=
"""<transkription-position bottom="234.0" height="7.078" id="1" left="150.586" top="232.438" width="0.844" writing-process-id="0">
<word-part bottom="234.0" height="7.078" id="1" left="150.586" style-class="st11 st12" symbol-id="glyph6-2" text="i" top="226.922" width="0.844"/>
</transkription-position>
"""
nodeB
=
ET
.
fromstring
(
node_string
)
word
=
Word
(
text
=
"Si"
,
transkription_positions
=
[
TranskriptionPosition
(
node
=
nodeA
),
TranskriptionPosition
(
node
=
nodeB
)
])
self
.
assertEqual
(
len
(
word
.
transkription_positions
),
2
)
word
.
simplify_transkription_positions
()
self
.
assertEqual
(
len
(
word
.
transkription_positions
),
1
)
word
=
Word
(
text
=
"Si"
,
transkription_positions
=
[
TranskriptionPosition
(
node
=
nodeA
),
TranskriptionPosition
(
node
=
nodeB
)
])
word
.
transkription_positions
[
1
]
.
writing_process_id
=
-
1
word
.
simplify_transkription_positions
()
self
.
assertEqual
(
len
(
word
.
transkription_positions
),
1
)
self
.
assertEqual
(
word
.
transkription_positions
[
0
]
.
writing_process_id
,
0
)
"""
tree = ET.ElementTree(ET.Element('page'))
word.attach_word_to_tree(tree)
print(ET.dump(tree.getroot()))
"""
def
test_partition
(
self
):
page
=
datatypes
.
page
.
Page
(
self
.
test_file
)
word
=
page
.
words
[
67
]
self
.
assertEqual
(
word
.
belongs_to_multiple_writing_processes
(),
True
)
word
.
partition_according_to_writing_process_id
()
self
.
assertEqual
(
len
(
word
.
word_parts
),
3
)
self
.
assertEqual
(
word
.
belongs_to_multiple_writing_processes
(),
False
)
self
.
assertEqual
(
word
.
belongs_to_multiple_writing_processes
(
include_parts
=
True
),
True
)
empty_tree
=
ET
.
ElementTree
(
ET
.
Element
(
'page'
))
word_node
=
word
.
attach_word_to_tree
(
empty_tree
)
newWord
=
Word
.
create_cls
(
word_node
)
self
.
assertEqual
(
len
(
newWord
.
word_parts
),
3
)
#print(ET.dump(empty_tree.getroot()))
def
test_partition_deletion
(
self
):
page
=
datatypes
.
page
.
Page
(
self
.
test_file
)
word
=
page
.
words
[
67
]
for
transkription_position
in
word
.
transkription_positions
:
transkription_position
.
deleted
=
transkription_position
.
writing_process_id
==
1
self
.
assertEqual
(
word
.
has_mixed_status
(
'deleted'
),
True
)
word
.
partition_according_to_deletion
()
self
.
assertEqual
(
len
(
word
.
word_parts
),
3
)
self
.
assertEqual
(
word
.
has_mixed_status
(
'deleted'
),
False
)
self
.
assertEqual
(
word
.
has_mixed_status
(
'deleted'
,
include_parts
=
True
),
True
)
page
=
datatypes
.
page
.
Page
(
self
.
test_file
)
word
=
page
.
words
[
67
]
word
.
partition_according_to_writing_process_id
()
#print([(word.text, word.deleted) for word in word.word_parts])
word
.
word_parts
[
1
]
.
transkription_positions
[
1
]
.
deleted
=
True
word
.
partition_according_to_deletion
()
self
.
assertEqual
(
len
(
word
.
word_parts
),
4
)
#print([(word.text, word.deleted) for word in word.word_parts])
partA
=
Word
(
text
=
'A'
,
deleted
=
True
)
partB
=
Word
(
text
=
'SDF'
,
deleted
=
False
)
word
=
Word
(
text
=
'ASDF'
,
word_parts
=
[
partA
,
partB
])
self
.
assertEqual
(
word
.
has_mixed_status
(
'deleted'
,
include_parts
=
True
),
True
)
def
test_execute_function_on_parts
(
self
):
page
=
datatypes
.
page
.
Page
(
self
.
test_file
)
word_parts
=
[
page
.
words
[
67
],
page
.
words
[
68
]
]
word_parts
,
none
=
execute_function_on_parts
(
word_parts
,
'partition_according_to_writing_process_id'
)
self
.
assertEqual
(
len
(
word_parts
)
==
4
,
True
)
def
test_process_word_boxes
(
self
):
page
=
datatypes
.
page
.
Page
(
self
.
pdf_xml
)
page
.
source
=
self
.
pdf_xml_source
page
.
update_styles
(
partition_according_to_styles
=
True
)
tr
=
TranskriptionField
(
page
.
source
)
box_path_d
=
[
'M 598.11,626.565 L 603.557,626.565 L 603.557,632.565 L 598.11,632.565 L 598.11,626.565'
,
\
'M 557.443,683.44 L 574.182,683.44 L 574.182,694.815 L 557.443,694.815 L 557.443,683.44'
,
\
'M 404.193,659.565 L 407.80699999999996,659.565 L 407.80699999999996,668.94 L 404.193,668.94 L 404.193,659.565'
,
\
'M 587.932,634.065 L 598.318,634.065 L 598.318,643.19 L 587.932,643.19 L 587.932,634.065'
,
\
'M 570.443,221.315 L 576.557,221.315 L 576.557,230.065 L 570.443,230.065 L 570.443,221.315'
]
box_paths
=
[
Box
(
d_string
=
d_string
,
earlier_text
=
'test'
)
for
d_string
in
box_path_d
]
indices
=
[
30
,
277
,
288
,
297
,
321
]
for
word_id
,
index
in
enumerate
(
indices
):
word_over_box
=
page
.
words
[
index
]
.
process_boxes
(
box_paths
,
tr_xmin
=
tr
.
xmin
,
tr_ymin
=
tr
.
ymin
)
self
.
assertEqual
(
word_over_box
is
not
None
,
True
)
self
.
assertEqual
(
word_over_box
==
page
.
words
[
index
]
or
word_over_box
in
page
.
words
[
index
]
.
word_parts
,
True
)
#self.assertEqual(word_over_box in page.words[index].word_parts, True)
def
test_process_word_several_boxesOn1LIne
(
self
):
page
=
datatypes
.
page
.
Page
(
self
.
pdf_xml
)
page
.
source
=
self
.
pdf_xml_source
for
word
in
page
.
words
:
word
.
set_writing_process_id_to_transkription_positions
(
page
)
word
.
partition_according_to_writing_process_id
()
tr
=
TranskriptionField
(
page
.
source
)
box_path_d
=
[
'M 598.11,626.565 L 603.557,626.565 L 603.557,632.565 L 598.11,632.565 L 598.11,626.565'
,
\
'M 557.443,683.44 L 574.182,683.44 L 574.182,694.815 L 557.443,694.815 L 557.443,683.44'
,
\
'M 404.193,659.565 L 407.80699999999996,659.565 L 407.80699999999996,668.94 L 404.193,668.94 L 404.193,659.565'
,
\
'M 587.932,634.065 L 598.318,634.065 L 598.318,643.19 L 587.932,643.19 L 587.932,634.065'
,
\
'M 570.443,221.315 L 576.557,221.315 L 576.557,230.065 L 570.443,230.065 L 570.443,221.315'
]
box_paths
=
[
Box
(
d_string
=
d_string
,
earlier_text
=
'test'
)
for
d_string
in
box_path_d
]
indices
=
[
30
,
277
,
288
,
297
,
321
]
empty_tree
=
ET
.
ElementTree
(
ET
.
Element
(
'page'
))
for
word_id
,
index
in
enumerate
(
indices
):
word_over_box
=
page
.
words
[
index
]
.
process_boxes
(
box_paths
,
tr_xmin
=
tr
.
xmin
,
tr_ymin
=
tr
.
ymin
)
self
.
assertEqual
(
word_over_box
is
not
None
,
True
)
def
test_split_according_to_status
(
self
):
page
=
datatypes
.
page
.
Page
(
self
.
test_file
)
word
=
page
.
words
[
67
]
for
transkription_position
in
word
.
transkription_positions
:
transkription_position
.
text
=
'asdf'
\
if
transkription_position
.
writing_process_id
==
1
\
else
word
.
text
self
.
assertEqual
(
word
.
has_mixed_status
(
'text'
),
True
)
new_words
=
word
.
split_according_to_status
(
'text'
)
#print([word.text for word in new_words ])
self
.
assertEqual
(
len
(
new_words
)
>
1
,
True
)
self
.
assertEqual
(
new_words
[
0
]
.
id
,
word
.
id
)
self
.
assertEqual
(
new_words
[
0
]
.
deleted
,
word
.
deleted
)
self
.
assertEqual
(
new_words
[
1
]
.
id
,
word
.
id
+
1
)
manuscript
=
ArchivalManuscriptUnity
()
page
=
datatypes
.
page
.
Page
(
self
.
test_file
)
word
=
page
.
words
[
67
]
page
.
words
=
[
word
]
page
.
update_styles
(
manuscript
=
manuscript
)
new_words
=
word
.
split_according_to_status
(
'style'
,
splits_are_parts
=
True
)
self
.
assertEqual
(
len
(
word
.
word_parts
),
3
)
def
test__create_new_word
(
self
):
manuscript
=
ArchivalManuscriptUnity
()
page
=
datatypes
.
page
.
Page
(
self
.
test_file
)
word
=
page
.
words
[
67
]
page
.
words
=
[
word
]
page
.
update_styles
(
manuscript
=
manuscript
)
newWord
=
word
.
_create_new_word
([
word
.
transkription_positions
[
0
]
],
'style'
)
for
key
in
Word
.
COPY_PROPERTY_KEY
:
self
.
assertEqual
(
newWord
.
__dict__
[
key
],
word
.
__dict__
[
key
])
self
.
assertEqual
(
len
(
newWord
.
styles
),
1
)
def
test__get_partial_word_over_box
(
self
):
word
=
Word
(
text
=
'test'
,
transkription_positions
=
[
TranskriptionPosition
(
id
=
0
),
TranskriptionPosition
(
id
=
1
)
])
word
.
transkription_positions
[
0
]
.
has_box
=
Box
(
earlier_text
=
'asdf'
)
word
.
_get_partial_word_over_box
()
self
.
assertEqual
(
len
(
word
.
word_parts
),
2
)
partA
=
Word
(
id
=
0
,
text
=
'A'
,
transkription_positions
=
[
TranskriptionPosition
()])
partB
=
Word
(
id
=
1
,
text
=
'SDF'
,
transkription_positions
=
[
TranskriptionPosition
(),
TranskriptionPosition
(
id
=
1
)])
partB
.
transkription_positions
[
0
]
.
has_box
=
Box
(
earlier_text
=
'asdf'
)
word
=
Word
(
text
=
'ASDF'
,
word_parts
=
[
partA
,
partB
])
word
.
_get_partial_word_over_box
()
self
.
assertEqual
(
len
(
word
.
word_parts
),
2
)
if
__name__
==
"__main__"
:
unittest
.
main
()
Event Timeline
Log In to Comment