Page MenuHomec4science

test_page.py
No OneTemporary

File Metadata

Created
Sun, May 12, 20:44

test_page.py

import unittest
from os import sep, path
from os.path import isdir, dirname, basename
import lxml.etree as ET
import sys
import sys
sys.path.append('svgscripts')
dir_changed = False
if not isdir('datatypes'):
sys.path.append(dirname(sys.path[0]))
dir_changed = True
from datatypes.lineNumber import LineNumber
from datatypes.page import Page
from datatypes.transkriptionField import TranskriptionField
from datatypes.writing_process import WritingProcess
from datatypes.word import Word
class TestPage(unittest.TestCase):
def setUp(self):
DATADIR = dirname(__file__) + sep + 'test_data'
if not isdir(DATADIR):
DATADIR = dirname(dirname(__file__)) + sep + 'test_data'
self.test_file = DATADIR + sep + 'test.xml'
self.test_svg_file = DATADIR + sep + 'test421.svg'
self.pdf_xml = DATADIR + sep + 'W_I_8_page125.xml'
self.pdf_xml_source = DATADIR + sep + 'W_I_8_neu_125-01.svg'
def test_Page(self):
page = Page(xml_source_file=self.test_file, svg_file=self.test_svg_file)
self.assertEqual(page.title, 'Mp XIV 1')
self.assertEqual(page.number, '421')
self.assertEqual(len(page.sonderzeichen_list), 2)
self.assertEqual('st21' in page.sonderzeichen_list, True)
self.assertEqual('st23' in page.sonderzeichen_list, True)
self.assertEqual(page.style_dict['st0']['fill'], '#F8F9F8')
self.assertEqual(page.width, 493.23)
stage0 = [ key for key, value in page.fontsizekey2stage_mapping.items() if value == 0 ]
stage1 = [ key for key, value in page.fontsizekey2stage_mapping.items() if value == 1 ]
stage2 = [ key for key, value in page.fontsizekey2stage_mapping.items() if value == 2 ]
fontStage0 = float(page.style_dict.get(stage0[0]).get('font-size').replace('px',''))
fontStage1 = float(page.style_dict.get(stage1[0]).get('font-size').replace('px',''))
fontStage2 = float(page.style_dict.get(stage2[0]).get('font-size').replace('px',''))
self.assertEqual(fontStage0 > fontStage1, True)
self.assertEqual(fontStage1 > fontStage2, True)
def test_get_biggest_fontSize4styles(self):
page = Page(xml_source_file=self.test_file)
style_set = { 'st12', 'st2', 'st14', 'st13' }
self.assertEqual(page.get_biggest_fontSize4styles(style_set=style_set), 10)
def test_get_words(self):
page = Page(xml_source_file=self.test_file)
words = page.words
self.assertEqual(len(words), 440)
self.assertEqual(words[0].text, '$')
self.assertEqual(words[439].text, 'mußte!')
def test_create_writing_process(self):
page = Page(xml_source_file=self.test_file)
page.create_writing_processes_and_attach2tree()
self.assertEqual(page.words[97].transkription_positions[0].writing_process_id, WritingProcess.LATER_INSERTION_AND_ADDITION)
self.assertEqual(page.words[129].transkription_positions[0].writing_process_id, WritingProcess.LATER_INSERTION_AND_ADDITION)
def test_init_line_numbers(self):
page = Page(xml_source_file=self.test_file)
line_numbers = [ LineNumber(id=2, top=20, bottom=40), LineNumber(id=4, top=50, bottom=60), LineNumber(id=6, top=70, bottom=90) ]
page.init_line_numbers(line_numbers, 122.345)
self.assertEqual(len(page.line_numbers), 7)
self.assertEqual(page.line_numbers[0].id, 1)
self.assertEqual(page.line_numbers[6].id, 7)
self.assertEqual(page.line_numbers[6].top, 91)
self.assertEqual(page.line_numbers[6].bottom, 122.345)
self.assertEqual(page.get_line_number(122), 7)
self.assertEqual(page.get_line_number(92), 7)
self.assertEqual(page.get_line_number(22), 2)
def test_get_line_number(self):
page = Page(xml_source_file=self.test_file)
self.assertEqual(page.get_line_number( (page.words[0].transkription_positions[0].bottom+page.words[0].transkription_positions[0].top)/2), 1)
self.assertEqual(page.get_line_number( (page.words[27].transkription_positions[0].bottom+page.words[27].transkription_positions[0].top)/2), 2)
self.assertEqual(page.get_line_number( (page.words[105].transkription_positions[0].bottom+page.words[105].transkription_positions[0].top)/2), 7)
def test_categorize_paths(self):
Page.UNITTESTING = True
page = Page(xml_source_file=self.pdf_xml)
page.source = self.pdf_xml_source
tr = TranskriptionField(page.source)
page.words = [ word for word in page.words if word.line_number == 33 ]
page.categorize_paths(tr)
self.assertEqual([ word.deleted for word in page.words if word.id == 269 ][0], False)
if __name__ == "__main__":
unittest.main()

Event Timeline