Page MenuHomec4science

test_join_faksimileAndTranskription.py
No OneTemporary

File Metadata

Created
Sat, May 4, 13:50

test_join_faksimileAndTranskription.py

import unittest
from os import sep, path, remove
from os.path import isdir, isfile, dirname
import shutil
import sys
import lxml.etree as ET
import warnings
import sys
sys.path.append('svgscripts')
import join_faksimileAndTranskription
from datatypes.faksimile import FaksimilePage
from datatypes.page import Page
from datatypes.positional_word_part import PositionalWordPart
from datatypes.transkriptionField import TranskriptionField
from datatypes.word_position import WordPosition
class TestJoin(unittest.TestCase):
def setUp(self):
join_faksimileAndTranskription.UNITTESTING = True
DATADIR = path.dirname(__file__) + sep + 'test_data'
self.faksimile_dir = DATADIR + sep + 'faksimile_svg'
self.manuscript = DATADIR + sep + 'N_VII_1.xml'
self.manuscript_copy = self.manuscript.replace('.', '_copy.')
self.faksimile_file = self.faksimile_dir + sep + 'N-VII-1,5et6.svg'
self.xml_file = DATADIR + sep + 'N_VII_1_page005.xml'
self.Mp_XIV_1_mytest_421 = DATADIR + sep + 'Mp_XIV_1_mytest_421.xml'
self.correction_dir = DATADIR + sep + 'correction_dir'
self.page138 = DATADIR + sep + 'N_VII_1_page138.xml'
def test_sort_words(self):
page = Page(self.Mp_XIV_1_mytest_421)
words_line7 = [ word for word in page.words if word.line_number == 7 ]
page.words = words_line7
sorted_words = join_faksimileAndTranskription.sort_words(page)
self.assertEqual(len(sorted_words), len(words_line7))
for index, word in enumerate(words_line7):
self.assertEqual(sorted_words[index], word)
def test_sort_faksimile_positions(self):
faksimile_tree = ET.parse(self.faksimile_file)
namespaces = { k if k is not None else 'ns': v for k, v in faksimile_tree.getroot().nsmap.items() }
faksimile_pages = FaksimilePage.GET_FAKSIMILEPAGES(faksimile_tree, namespaces=namespaces)
self.assertEqual(len(faksimile_pages), 2)
svg_pos_file, manuscript_file = join_faksimileAndTranskription.get_svgPosFile_and_manuscriptFile(faksimile_pages[0], manuscript_file=self.manuscript, redo_ok=True)
sorted_positions = join_faksimileAndTranskription.sort_faksimile_positions(faksimile_pages[0].word_positions)
page = Page(svg_pos_file)
#print(max(sorted_positions).text)
for index in range(0, 10):
id = sorted_positions[index].id
if len(faksimile_tree.getroot().xpath('//ns:rect[@id="{0}"]/ns:title/text()|//ns:path[@id="{0}"]/ns:title/text()'\
.format(id), namespaces=namespaces)) > 0:
word_text = faksimile_tree.getroot().xpath('//ns:rect[@id="{0}"]/ns:title/text()|//ns:path[@id="{0}"]/ns:title/text()'\
.format(id), namespaces=namespaces)[0]
#print(sorted_positions[index].left, sorted_positions[index].top, word_text, page.words[index].text)
self.assertEqual(word_text, page.words[index].text)
@unittest.skipUnless(__name__ == "__main__", 'test uses path from within dir')
def test_get_filelist_and_manuscript_file(self):
file_list, manuscript_file = join_faksimileAndTranskription.get_filelist_and_manuscript_file(self.faksimile_dir, self.manuscript)
self.assertEqual(len(file_list), 1)
self.assertEqual(file_list[0], self.faksimile_file)
self.assertEqual(manuscript_file, self.manuscript)
file_list, manuscript_file = join_faksimileAndTranskription.get_filelist_and_manuscript_file(self.manuscript, self.faksimile_file)
self.assertEqual(len(file_list), 1)
self.assertEqual(file_list[0], self.faksimile_file)
self.assertEqual(manuscript_file, self.manuscript)
file_list, manuscript_file = join_faksimileAndTranskription.get_filelist_and_manuscript_file(self.manuscript, correction_dir=self.correction_dir)
self.assertEqual(len(file_list), 1)
self.assertEqual(file_list[0], self.page138)
@unittest.skipUnless(__name__ == "__main__", 'test uses path from within dir')
def test_get_svgPosFile_and_manuscriptFile(self):
faksimile_tree = ET.parse(self.faksimile_file)
faksimile_pages = FaksimilePage.GET_FAKSIMILEPAGES(faksimile_tree)
self.assertEqual(len(faksimile_pages), 2)
svg_pos_file, manuscript_file = join_faksimileAndTranskription.get_svgPosFile_and_manuscriptFile(faksimile_pages[0], manuscript_file=self.manuscript, redo_ok=True)
self.assertEqual(svg_pos_file, self.manuscript.replace('.', '_page00{}.'.format(faksimile_pages[0].page_number)))
self.assertEqual(manuscript_file, self.manuscript)
@unittest.skip('join changed ... fix me')
def test_join_faksimileAndTranskription(self):
self.assertEqual(join_faksimileAndTranskription.join_faksimileAndTranskription(self.faksimile_file, self.manuscript), 0)
#self.assertEqual(join_faksimileAndTranskription.join_faksimileAndTranskription(self.faksimile_file, self.manuscript, test_word_text='gar'), 0)
@unittest.skip('function update_writing_process is deprecated')
def testupdate_writing_process(self):
page = Page(self.xml_file)
word = page.words[12]
self.assertEqual(len(word.faksimile_positions), 1)
self.assertEqual(word.faksimile_positions[0].writing_process_id, -1)
join_faksimileAndTranskription.update_writing_process(word)
self.assertEqual(word.faksimile_positions[0].writing_process_id, 0)
#@unittest.skipUnless(__name__ == "__main__", 'test takes too long, we do not run it with unittest discover')
@unittest.skip('test takes too long, has been tested')
def test_fix_errors(self):
page = Page(self.xml_file)
word_position = WordPosition(id='rect945', text='Lenken')
exit_status = join_faksimileAndTranskription.fix_errors(self.faksimile_file, [ word_position], [page.words[12]], xml_source_file=self.xml_file, manuscript_file=self.manuscript )
self.assertEqual(exit_status, 0)
@unittest.skip('tested with local file')
def test_join_single_chars(self):
page = Page('xml/N_VII_1_page016.xml')
words = join_faksimileAndTranskription.sort_words(page)
join_faksimileAndTranskription.join_single_char_words(words)
new_words = [ word for word in words if word.text == 'selber' ]
self.assertEqual(len(new_words), 1)
new_words = [ word for word in words if word.text == 's' ]
self.assertEqual(len(new_words), 0)
def test_get_mismatching_ids(self):
page = Page(self.xml_file)
word_position = WordPosition(id='rect945', text='Lenken')
mwords, mfps = join_faksimileAndTranskription.get_mismatching_ids([ page.words[12]], [ word_position ])
self.assertEqual(mwords[0].text, 'Denken')
self.assertEqual(mfps[0].text, 'Lenken')
if __name__ == "__main__":
unittest.main()

Event Timeline