Page Menu
Home
c4science
Search
Configure Global Search
Log In
Files
F62220997
test_text.py
No One
Temporary
Actions
Download File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Award Token
Subscribers
None
File Metadata
Details
File Info
Storage
Attached
Created
Sat, May 11, 17:46
Size
4 KB
Mime Type
text/x-python
Expires
Mon, May 13, 17:46 (2 d)
Engine
blob
Format
Raw Data
Handle
17621052
Attached To
rNIETZSCHEPYTHON nietzsche-python
test_text.py
View Options
import
unittest
from
os
import
sep
,
path
from
os.path
import
dirname
,
basename
,
isfile
,
isdir
import
lxml.etree
as
ET
import
sys
sys
.
path
.
append
(
'svgscripts'
)
from
datatypes.page
import
Page
from
datatypes.standoff_tag
import
StandoffTag
from
datatypes.text
import
Text
class
TestText
(
unittest
.
TestCase
):
def
setUp
(
self
):
DATADIR
=
dirname
(
__file__
)
+
sep
+
'test_data'
if
not
isdir
(
DATADIR
):
DATADIR
=
dirname
(
dirname
(
__file__
))
+
sep
+
'test_data'
self
.
test_file
=
DATADIR
+
sep
+
'test.xml'
self
.
test_svg_file
=
DATADIR
+
sep
+
'test421.svg'
self
.
pdf_xml
=
DATADIR
+
sep
+
'W_I_8_page125.xml'
self
.
xml_file
=
DATADIR
+
sep
+
'N_VII_1_page005.xml'
self
.
xml_fileB
=
DATADIR
+
sep
+
'N_VII_1_page006.xml'
self
.
pdf_xml_source
=
DATADIR
+
sep
+
'W_I_8_neu_125-01.svg'
self
.
test_page
=
DATADIR
+
sep
+
'N_VII_1_page001.xml'
self
.
test_manuscript
=
DATADIR
+
sep
+
'N_VII_1.xml'
def
test_semantic
(
self
):
pass
#print(Text.get_semantic_dictionary())
def
test_attach_to_tree
(
self
):
empty_tree
=
ET
.
ElementTree
(
ET
.
Element
(
'page'
))
content
=
'asdf'
standoff_tag
=
StandoffTag
(
'bold'
,
0
,
len
(
content
)
-
1
)
standoff_tag2
=
StandoffTag
(
'italic'
,
int
(
len
(
content
)
/
2
),
len
(
content
),
id
=
'1'
)
text
=
Text
(
content
,
standoff_markups
=
[
standoff_tag
,
standoff_tag2
])
text
.
attach_object_to_tree
(
empty_tree
)
text
=
Text
.
create_cls_from_node
(
empty_tree
.
xpath
(
'//'
+
Text
.
XML_TAG
)[
0
])
self
.
assertEqual
(
text
.
content
,
content
)
self
.
assertEqual
(
text
.
id
,
'0'
)
self
.
assertEqual
(
len
(
text
.
standoff_markups
),
2
)
#print(ET.dump(empty_tree.getroot()))
def
test_extract
(
self
):
content
=
'asdfa'
standoff_tag
=
StandoffTag
(
'bold'
,
0
,
len
(
content
)
-
2
)
standoff_tag2
=
StandoffTag
(
'italic'
,
int
(
len
(
content
)
/
2
),
len
(
content
)
-
1
,
id
=
'1'
)
textA
=
Text
(
content
,
standoff_markups
=
[
standoff_tag
,
standoff_tag2
])
textB
=
textA
.
extract_part
(
'sdf'
)
self
.
assertEqual
(
len
(
textB
.
standoff_markups
),
2
)
textB
=
textA
.
extract_part
(
'sdf'
,
css_filter
=
'bold'
)
self
.
assertEqual
(
len
(
textB
.
standoff_markups
),
1
)
"""
content = '26: von „Regel]¿'
textA = Text(content, standoff_markups=[ StandoffTag('bold', 6, 9)])
print(textA.extract_part('von', css_filter='bold'))
print(textA.extract_part('„Regel', css_filter='bold'))
"""
def
test_markup_contains_css_filter
(
self
):
content
=
'asdfa'
standoff_tag
=
StandoffTag
(
'bold'
,
0
,
len
(
content
)
-
2
)
standoff_tag2
=
StandoffTag
(
'italic'
,
int
(
len
(
content
)
/
2
),
len
(
content
)
-
1
,
id
=
'1'
)
textA
=
Text
(
content
,
standoff_markups
=
[
standoff_tag
,
standoff_tag2
])
self
.
assertTrue
(
textA
.
markup_contains_css_filter
(
'bold'
))
self
.
assertTrue
(
textA
.
markup_contains_css_filter
(
'italic'
))
textA
.
standoff_markups
.
pop
(
0
)
self
.
assertFalse
(
textA
.
markup_contains_css_filter
(
'bold'
))
def
test_join
(
self
):
content
=
'asdfa'
standoff_tag
=
StandoffTag
(
'bold'
,
0
,
len
(
content
)
-
2
)
standoff_tag2
=
StandoffTag
(
'italic'
,
int
(
len
(
content
)
/
2
),
len
(
content
)
-
1
,
id
=
'1'
)
textA
=
Text
(
content
,
standoff_markups
=
[
standoff_tag
,
standoff_tag2
])
standoff_tag
=
StandoffTag
(
'bold'
,
0
,
len
(
content
)
-
2
)
standoff_tag2
=
StandoffTag
(
'italic'
,
int
(
len
(
content
)
/
2
),
len
(
content
)
-
1
,
id
=
'1'
)
textB
=
Text
(
content
,
standoff_markups
=
[
standoff_tag
,
standoff_tag2
])
textA
.
join
(
textB
)
self
.
assertEqual
(
textA
.
content
,
content
+
' '
+
content
)
def
test_create_from_html
(
self
):
html
=
'asdf <b><i>test</i></b> the <del>best</del>'
text
=
Text
.
create_cls_from_html
(
html
)
self
.
assertEqual
(
len
(
text
.
standoff_markups
),
3
)
self
.
assertEqual
(
text
.
standoff_markups
[
0
]
.
startIndex
,
text
.
standoff_markups
[
1
]
.
startIndex
)
self
.
assertEqual
(
text
.
standoff_markups
[
0
]
.
endIndex
,
text
.
standoff_markups
[
1
]
.
endIndex
)
html
=
'asdf <i>test</i>'
text
=
Text
.
create_cls_from_html
(
html
)
self
.
assertEqual
(
len
(
text
.
standoff_markups
),
1
)
html
=
'Quart-, Oktav- und Folioblätter verschiedenen Formats (z. T. von Albert Brenners und Peter Gasts Hand); Entwürfe und Vorstufen aus dem Bereiche des <i>Menschlichen I</i> (die sogenannten <i>Sorrentiner Papiere</i>)'
text
=
Text
.
create_cls_from_html
(
html
)
#print(text)
if
__name__
==
"__main__"
:
unittest
.
main
()
Event Timeline
Log In to Comment