Page MenuHomec4science

test_compare_faksimile_words_line_wise.py
No OneTemporary

File Metadata

Created
Fri, May 10, 01:29

test_compare_faksimile_words_line_wise.py

import unittest
from os import sep, path, remove
from os.path import isdir, isfile, dirname
import shutil
import sys
import lxml.etree as ET
import warnings
import sys
sys.path.append('svgscripts')
import compare_faksimile_words_line_wise
from datatypes.faksimile import FaksimilePage
from datatypes.page import Page
from datatypes.positional_word_part import PositionalWordPart
from datatypes.transkriptionField import TranskriptionField
from datatypes.word_position import WordPosition
class TestCompareLineWise(unittest.TestCase):
def setUp(self):
compare_faksimile_words_line_wise.UNITTESTING = True
DATADIR = path.dirname(__file__) + sep + 'test_data'
self.faksimile_dir = DATADIR + sep + 'faksimile_svg'
self.manuscript = DATADIR + sep + 'N_VII_1.xml'
self.manuscript_copy = self.manuscript.replace('.', '_copy.')
self.faksimile_file = self.faksimile_dir + sep + 'N-VII-1,5et6.svg'
self.xml_file = DATADIR + sep + 'N_VII_1_page005.xml'
self.Mp_XIV_1_mytest_421 = DATADIR + sep + 'Mp_XIV_1_mytest_421.xml'
self.correction_dir = DATADIR + sep + 'correction_dir'
self.page138 = DATADIR + sep + 'N_VII_1_page138.xml'
def test_mark_unmergeable_words_and_faksimile_positions(self):
page = Page(self.xml_file)
faksimile_page = FaksimilePage.get_faksimile_pages(self.faksimile_file, page_number=page.number)[0]
compare_faksimile_words_line_wise.mark_unmergeable_words_and_faksimile_positions(page.words, faksimile_page.word_positions)
word_gar = [ fp for fp in faksimile_page.word_positions if fp.text == 'gar' ]
self.assertTrue(len(word_gar) == 1)
self.assertFalse(word_gar[0].mergeable)
#print([word.text for word in page.words if not word.mergeable])
#print([fp.text for fp in faksimile_page.word_positions if not fp.mergeable])
def test_get_line(self):
page = Page(self.xml_file)
faksimile_page = FaksimilePage.get_faksimile_pages(self.faksimile_file, page_number=page.number)[0]
flc = compare_faksimile_words_line_wise.FaksimileLineComposer(faksimile_page.word_positions)
compare_faksimile_words_line_wise.mark_unmergeable_words_and_faksimile_positions(page.words, faksimile_page.word_positions)
words_on_line = [ word for word in page.words if word.line_number == 1 ]
matched_line = flc.get_line(words_on_line, 0)
self.assertTrue(words_on_line[0].text in [ fp.text for fp in matched_line ])
words_on_line[0].joined = True
matched_line[0].joined = True
words_on_line = [ word for word in page.words if word.line_number == 2 ]
matched_line = flc.get_line(words_on_line, 1)
for word in words_on_line:
self.assertTrue(word.text in [ fp.text for fp in matched_line ])
words_on_line = [ word for word in page.words if word.line_number == 42 ]
matched_line = flc.get_line(words_on_line, 22)
self.assertTrue(len(matched_line) > 0)
def test_merge_line(self):
page = Page(self.xml_file)
faksimile_page = FaksimilePage.get_faksimile_pages(self.faksimile_file, page_number=page.number)[0]
flc = compare_faksimile_words_line_wise.FaksimileLineComposer(faksimile_page.word_positions)
compare_faksimile_words_line_wise.mark_unmergeable_words_and_faksimile_positions(page.words, faksimile_page.word_positions)
words_on_line = [ word for word in page.words if word.line_number == 42 ]
new_words = []
flc.merge_lines(words_on_line, new_words, index=22)
for word in new_words:
self.assertTrue(len(word.faksimile_positions) > 0)
self.assertEqual(len([word for word in words_on_line if not word.joined ]), 0)
def test_merge_faksimile_positions_and_words(self):
page = Page(self.xml_file)
faksimile_page = FaksimilePage.get_faksimile_pages(self.faksimile_file, page_number=page.number)[0]
flc = compare_faksimile_words_line_wise.FaksimileLineComposer(faksimile_page.word_positions)
exit_code = compare_faksimile_words_line_wise.merge_faksimile_positions_and_words(page, faksimile_page.word_positions)
self.assertEqual(exit_code, 0)
def test_faksimile_line_composer(self):
faksimile_page = FaksimilePage.get_faksimile_pages(self.faksimile_file)[0]
flc = compare_faksimile_words_line_wise.FaksimileLineComposer(faksimile_page.word_positions, threshold=10)
self.assertEqual(len(flc.lines_of_faksimile_positions), 23)
flc = compare_faksimile_words_line_wise.FaksimileLineComposer(faksimile_page.word_positions, threshold=20, num_lines_with_words=23)
self.assertEqual(len(flc.lines_of_faksimile_positions), 23)
counter = 200
while counter > 0:
counter -= 1
self.assertTrue(flc.get_next_faksimile() is not None)
if __name__ == "__main__":
unittest.main()

Event Timeline