Page MenuHomec4science

test_text.py
No OneTemporary

File Metadata

Created
Sun, Apr 28, 12:35

test_text.py

import unittest
from os import sep, path
from os.path import dirname, basename, isfile, isdir
import lxml.etree as ET
import sys
sys.path.append('svgscripts')
from datatypes.page import Page
from datatypes.standoff_tag import StandoffTag
from datatypes.text import Text
class TestText(unittest.TestCase):
def setUp(self):
DATADIR = dirname(__file__) + sep + 'test_data'
if not isdir(DATADIR):
DATADIR = dirname(dirname(__file__)) + sep + 'test_data'
self.test_file = DATADIR + sep + 'test.xml'
self.test_svg_file = DATADIR + sep + 'test421.svg'
self.pdf_xml = DATADIR + sep + 'W_I_8_page125.xml'
self.xml_file = DATADIR + sep + 'N_VII_1_page005.xml'
self.xml_fileB = DATADIR + sep + 'N_VII_1_page006.xml'
self.pdf_xml_source = DATADIR + sep + 'W_I_8_neu_125-01.svg'
self.test_page = DATADIR + sep + 'N_VII_1_page001.xml'
self.test_manuscript = DATADIR + sep + 'N_VII_1.xml'
def test_semantic(self):
pass
#print(Text.get_semantic_dictionary())
def test_attach_to_tree(self):
empty_tree = ET.ElementTree(ET.Element('page'))
content = 'asdf'
standoff_tag = StandoffTag('bold', 0, len(content)-1)
standoff_tag2 = StandoffTag('italic', int(len(content)/2), len(content),id='1')
text = Text(content, standoff_markups=[ standoff_tag, standoff_tag2 ])
text.attach_object_to_tree(empty_tree)
text = Text.create_cls_from_node(empty_tree.xpath('//' + Text.XML_TAG)[0])
self.assertEqual(text.content, content)
self.assertEqual(text.id, '0')
self.assertEqual(len(text.standoff_markups), 2)
#print(ET.dump(empty_tree.getroot()))
def test_extract(self):
content = 'asdfa'
standoff_tag = StandoffTag('bold', 0, len(content)-2)
standoff_tag2 = StandoffTag('italic', int(len(content)/2), len(content)-1,id='1')
textA = Text(content, standoff_markups=[ standoff_tag, standoff_tag2 ])
textB = textA.extract_part('sdf')
self.assertEqual(len(textB.standoff_markups), 2)
textB = textA.extract_part('sdf', css_filter='bold')
self.assertEqual(len(textB.standoff_markups), 1)
"""
content = '26: von „Regel]¿'
textA = Text(content, standoff_markups=[ StandoffTag('bold', 6, 9)])
print(textA.extract_part('von', css_filter='bold'))
print(textA.extract_part('„Regel', css_filter='bold'))
"""
def test_markup_contains_css_filter(self):
content = 'asdfa'
standoff_tag = StandoffTag('bold', 0, len(content)-2)
standoff_tag2 = StandoffTag('italic', int(len(content)/2), len(content)-1,id='1')
textA = Text(content, standoff_markups=[ standoff_tag, standoff_tag2 ])
self.assertTrue(textA.markup_contains_css_filter('bold'))
self.assertTrue(textA.markup_contains_css_filter('italic'))
textA.standoff_markups.pop(0)
self.assertFalse(textA.markup_contains_css_filter('bold'))
def test_join(self):
content = 'asdfa'
standoff_tag = StandoffTag('bold', 0, len(content)-2)
standoff_tag2 = StandoffTag('italic', int(len(content)/2), len(content)-1,id='1')
textA = Text(content, standoff_markups=[ standoff_tag, standoff_tag2 ])
standoff_tag = StandoffTag('bold', 0, len(content)-2)
standoff_tag2 = StandoffTag('italic', int(len(content)/2), len(content)-1,id='1')
textB = Text(content, standoff_markups=[ standoff_tag, standoff_tag2 ])
textA.join(textB)
self.assertEqual(textA.content, content + ' ' + content)
def test_create_from_html(self):
html = 'asdf <b><i>test</i></b> the <del>best</del>'
text = Text.create_cls_from_html(html)
self.assertEqual(len(text.standoff_markups), 3)
self.assertEqual(text.standoff_markups[0].startIndex, text.standoff_markups[1].startIndex)
self.assertEqual(text.standoff_markups[0].endIndex, text.standoff_markups[1].endIndex)
html = 'asdf <i>test</i>'
text = Text.create_cls_from_html(html)
self.assertEqual(len(text.standoff_markups), 1)
html = 'Quart-, Oktav- und Folioblätter verschiedenen Formats (z. T. von Albert Brenners und Peter Gasts Hand); Entwürfe und Vorstufen aus dem Bereiche des &lt;i&gt;Menschlichen I&lt;/i&gt; (die sogenannten &lt;i&gt;Sorrentiner Papiere&lt;/i&gt;)'
text = Text.create_cls_from_html(html)
#print(text)
if __name__ == "__main__":
unittest.main()

Event Timeline