Page MenuHomec4science

test_page.py
No OneTemporary

File Metadata

Created
Sat, May 11, 22:17

test_page.py

import unittest
from os import sep, path
from os.path import isdir, isfile, dirname, basename
import lxml.etree as ET
import sys
import sys
sys.path.append('svgscripts')
dir_changed = False
if not isdir('datatypes'):
sys.path.append(dirname(sys.path[0]))
dir_changed = True
from datatypes.lineNumber import LineNumber
from datatypes.mark_foreign_hands import MarkForeignHands
from datatypes.page import Page
from datatypes.path import Path
from datatypes.text_connection_mark import TextConnectionMark
from datatypes.transkriptionField import TranskriptionField
from datatypes.writing_process import WritingProcess
from datatypes.word import Word
class TestPage(unittest.TestCase):
def setUp(self):
DATADIR = dirname(__file__) + sep + 'test_data'
if not isdir(DATADIR):
DATADIR = dirname(dirname(__file__)) + sep + 'test_data'
self.test_file = DATADIR + sep + 'test.xml'
self.test_svg_file = DATADIR + sep + 'test421.svg'
self.pdf_xml = DATADIR + sep + 'W_I_8_page125.xml'
self.xml_file = DATADIR + sep + 'N_VII_1_page005.xml'
self.xml_fileB = DATADIR + sep + 'N_VII_1_page006.xml'
self.pdf_xml_source = DATADIR + sep + 'W_I_8_neu_125-01.svg'
self.test_tcm_xml = DATADIR + sep + 'N_VII_1_page001.xml'
self.test_manuscript = DATADIR + sep + 'N_VII_1.xml'
def test_Page(self):
page = Page(xml_source_file=self.test_file, svg_file=self.test_svg_file)
self.assertEqual(page.title, 'Mp XIV 1')
self.assertEqual(page.number, '421')
self.assertEqual(len(page.sonderzeichen_list), 2)
self.assertEqual('st21' in page.sonderzeichen_list, True)
self.assertEqual('st23' in page.sonderzeichen_list, True)
self.assertEqual(page.style_dict['st0']['fill'], '#F8F9F8')
self.assertEqual(page.width, 493.23)
stage0 = [ key for key, value in page.fontsizekey2stage_mapping.items() if value == 0 ]
stage1 = [ key for key, value in page.fontsizekey2stage_mapping.items() if value == 1 ]
stage2 = [ key for key, value in page.fontsizekey2stage_mapping.items() if value == 2 ]
fontStage0 = float(page.style_dict.get(stage0[0]).get('font-size').replace('px',''))
fontStage1 = float(page.style_dict.get(stage1[0]).get('font-size').replace('px',''))
fontStage2 = float(page.style_dict.get(stage2[0]).get('font-size').replace('px',''))
self.assertEqual(fontStage0 > fontStage1, True)
self.assertEqual(fontStage1 > fontStage2, True)
def test_get_biggest_fontSize4styles(self):
page = Page(xml_source_file=self.test_file)
style_set = { 'st12', 'st2', 'st14', 'st13' }
self.assertEqual(page.get_biggest_fontSize4styles(style_set=style_set), 10)
def test_get_words(self):
page = Page(xml_source_file=self.test_file)
words = page.words
self.assertEqual(len(words), 440)
self.assertEqual(words[0].text, '$')
self.assertEqual(words[439].text, 'mußte!')
def test_create_writing_process(self):
page = Page(xml_source_file=self.test_file)
page.create_writing_processes_and_attach2tree()
self.assertEqual(page.words[97].transkription_positions[0].writing_process_id, WritingProcess.LATER_INSERTION_AND_ADDITION)
self.assertEqual(page.words[129].transkription_positions[0].writing_process_id, WritingProcess.LATER_INSERTION_AND_ADDITION)
def test_init_line_numbers(self):
page = Page(xml_source_file=self.test_file)
line_numbers = [ LineNumber(id=2, top=20, bottom=40), LineNumber(id=4, top=50, bottom=60), LineNumber(id=6, top=70, bottom=90) ]
page.init_line_numbers(line_numbers, 122.345)
self.assertEqual(len(page.line_numbers), 7)
self.assertEqual(page.line_numbers[0].id, 1)
self.assertEqual(page.line_numbers[6].id, 7)
self.assertEqual(page.line_numbers[6].top, 91)
self.assertEqual(page.line_numbers[6].bottom, 122.345)
self.assertEqual(page.get_line_number(122), 7)
self.assertEqual(page.get_line_number(92), 7)
self.assertEqual(page.get_line_number(22), 2)
def test_get_line_number(self):
page = Page(xml_source_file=self.test_file)
self.assertEqual(page.get_line_number( (page.words[0].transkription_positions[0].bottom+page.words[0].transkription_positions[0].top)/2), 1)
self.assertEqual(page.get_line_number( (page.words[27].transkription_positions[0].bottom+page.words[27].transkription_positions[0].top)/2), 2)
self.assertEqual(page.get_line_number( (page.words[105].transkription_positions[0].bottom+page.words[105].transkription_positions[0].top)/2), 7)
def test_categorize_paths(self):
Page.UNITTESTING = True
page = Page(xml_source_file=self.pdf_xml)
page.source = self.pdf_xml_source
tr = TranskriptionField(page.source)
page.words = [ word for word in page.words if word.line_number == 33 ]
path_dict = page.categorize_paths(tr)
self.assertEqual(True in [ word.deleted for word in page.words if word.id == 269 ], False)
self.assertEqual(len(path_dict.get('deletion_or_underline_paths')) > 0, True)
self.assertEqual(len(path_dict.get('box_paths')), 5)
words = [ word for word in page.words if len(word.box_paths) > 0 ]
self.assertEqual(len(words), 1)
self.assertEqual(words[0].word_parts[0].earlier_version is not None, True)
self.assertEqual(words[0].word_parts[0].earlier_version.text, ')')
def test_find_special_words(self):
page = Page(xml_source_file=self.xml_file)
page.find_special_words()
self.assertEqual(len(page.mark_foreign_hands), 1)
self.assertEqual(page.mark_foreign_hands[0].foreign_hands_text, 'x')
page.update_and_attach_words2tree()
nodes = page.page_tree.xpath('//' + MarkForeignHands.XML_TAG)
page = Page(xml_source_file=self.test_tcm_xml)
page.find_special_words()
self.assertEqual(len(page.text_connection_marks), 1)
self.assertEqual(page.text_connection_marks[0].text_source.first_line, 2)
"""
page.update_and_attach_words2tree()
nodes = page.page_tree.xpath('//' + TextConnectionMark.XML_TAG)
print(ET.dump(nodes[0]))
"""
def test_update_page_type(self):
page = Page(xml_source_file=self.pdf_xml)
tf = TranskriptionField(self.pdf_xml_source)
page.update_page_type(transkription_field=tf)
self.assertEqual(page.page_type, Page.PAGE_VERSO)
#page = Page(xml_source_file=self.xml_fileB)
#page.update_page_type()
#self.assertEqual(page.page_type, Page.PAGE_RECTO)
def test_update_line_number_area(self):
page = Page(xml_source_file=self.xml_file)
transkription_field = TranskriptionField(page.source)
page.update_line_number_area(transkription_field)
self.assertEqual(transkription_field.line_number_area_width > 0, True)
self.assertEqual(transkription_field.line_number_area_width < 15, True)
page = Page(xml_source_file=self.xml_fileB)
transkription_field = TranskriptionField(page.source)
page.update_line_number_area(transkription_field)
self.assertEqual(transkription_field.line_number_area_width > 0, True)
self.assertEqual(transkription_field.line_number_area_width < 15, True)
def test_get_pages_from_xml_file(self):
pages = Page.get_pages_from_xml_file(self.test_manuscript)
self.assertEqual(len(pages), 2)
self.assertEqual(pages[0].number, '5')
self.assertEqual(pages[1].number, '6')
pages = Page.get_pages_from_xml_file(self.test_manuscript, status_contains='faksimile merged')
self.assertEqual(len(pages), 1)
self.assertEqual(pages[0].number, '5')
def test_process_word_boxes(self):
page = Page(xml_source_file=self.pdf_xml)
page.source = self.pdf_xml_source
for word in page.words:
word.partition_according_to_writing_process_id()
tr = TranskriptionField(page.source)
box_path_d = ['M 598.11,626.565 L 603.557,626.565 L 603.557,632.565 L 598.11,632.565 L 598.11,626.565',\
'M 557.443,683.44 L 574.182,683.44 L 574.182,694.815 L 557.443,694.815 L 557.443,683.44',\
'M 404.193,659.565 L 407.80699999999996,659.565 L 407.80699999999996,668.94 L 404.193,668.94 L 404.193,659.565',\
'M 587.932,634.065 L 598.318,634.065 L 598.318,643.19 L 587.932,643.19 L 587.932,634.065',\
'M 570.443,221.315 L 576.557,221.315 L 576.557,230.065 L 570.443,230.065 L 570.443,221.315']
box_paths = [ Path(d_string=d_string) for d_string in box_path_d ]
page.process_word_boxes(box_paths, tr)
page = Page(xml_source_file='xml/N_VII_1_page015.xml')
tr = TranskriptionField(page.source)
path_dict = page.categorize_paths(transkription_field=tr)
words_with_boxes = [ word for word in page.words if len(word.box_paths) > 0 ]
self.assertEqual(len(words_with_boxes), 1)
def test_lock(self):
page = Page(xml_source_file=self.test_tcm_xml)
self.assertEqual(page.is_locked(), False)
page.lock('asdf.txt')
self.assertEqual(page.is_locked(), True)
self.assertEqual(page.page_tree.xpath('//lock/reference-file/text()')[0], 'asdf.txt')
page.unlock()
self.assertEqual(page.is_locked(), False)
if __name__ == "__main__":
unittest.main()

Event Timeline