Page MenuHomec4science

test_create_task.py
No OneTemporary

File Metadata

Created
Sat, May 11, 17:16

test_create_task.py

import unittest
from os import sep, path, remove, listdir
from os.path import isdir, isfile, dirname, basename
import shutil
import sys
import lxml.etree as ET
import sys
import tempfile
import warnings
sys.path.append('svgscripts')
from create_task import Task, CorrectWords
from datatypes.faksimile import FaksimilePage
from datatypes.page import Page
from datatypes.positional_word_part import PositionalWordPart
from datatypes.transkriptionField import TranskriptionField
from datatypes.word_position import WordPosition
class TestTask(Task):
"""This is a test."""
def get_node_ids(self):
return [ word.faksimile_positions[0].id for word in self.words if len(word.faksimile_positions) > 0 ]
def select_words(self, words):
self.words = words
return words
class TestCreateTask(unittest.TestCase):
def setUp(self):
DATADIR = path.dirname(__file__) + sep + 'test_data'
self.xml_source = DATADIR + sep + 'N_VII_1_page005_faksimile_merged.xml'
self.xml_unmerged = DATADIR + sep + 'N_VII_1_page005.xml'
self.faksimile_svgFile = DATADIR + sep + 'faksimile_svg' + sep + 'N-VII-1,5et6.svg'
def test_task(self):
tmp_dir = tempfile.mkdtemp()
task = TestTask(self.xml_source, tmp_dir)
task.create()
dir_elements = listdir(tmp_dir)
self.assertEqual(task.description, TestTask.__doc__)
self.assertEqual(len(dir_elements), 3)
self.assertEqual(basename(self.xml_source).replace('.xml', '.pdf') in dir_elements, True)
#print(tmp_dir)
shutil.rmtree(tmp_dir)
def test_correction_task(self):
tmp_dir = tempfile.mkdtemp()
unmatched_strings = [ 'Das', 'Muster', 'einer' ]
unmatched_node_ids = []
svg_tree = ET.parse(self.faksimile_svgFile)
namespaces = { k if k is not None else 'ns': v for k, v in svg_tree.getroot().nsmap.items() }
for string in unmatched_strings:
unmatched_node_ids += [ node.getparent().get('id') for node in svg_tree.xpath('//ns:rect/ns:title[text() = "{0}"]'.format(string), namespaces=namespaces) ]
page = Page(xml_source_file=self.xml_unmerged)
unmatched_word_ids = [ word.id for word in page.words if word.text in unmatched_strings ]
task = CorrectWords(self.xml_unmerged, self.faksimile_svgFile, tmp_dir, unmatched_node_ids=unmatched_node_ids)
note = task.create_note_about_missing_words()
self.assertEqual('orgänge.' in note, True)
faksimile_svg_file = task.create_file_name(page)
self.assertEqual(faksimile_svg_file, 'N-VII-1,5.svg')
transkription_svg = task.create_file_name(page, is_faksimile_svg=False)
self.assertEqual(transkription_svg, basename(self.xml_unmerged.replace('.xml', '.svg')))
xml_file = task.create_file_name(page, is_faksimile_svg=False, suffix='.xml')
self.assertEqual(xml_file, basename(self.xml_unmerged))
self.assertEqual(task.has_been_created(page), False)
task.create()
self.assertEqual(task.has_been_created(page), True)
self.assertEqual(task.contains_file(faksimile_svg_file), True)
self.assertEqual(task.contains_file(transkription_svg), True)
self.assertEqual(task.contains_file(xml_file), True)
task2 = CorrectWords(self.xml_unmerged, self.faksimile_svgFile, tmp_dir, unmatched_node_ids=unmatched_node_ids)
self.assertEqual(task2.has_been_created(page), True)
self.assertEqual(task2.contains_file(faksimile_svg_file), True)
self.assertEqual(task2.contains_file(transkription_svg), True)
self.assertEqual(task2.contains_file(xml_file), True)
shutil.rmtree(tmp_dir)
def test_ids(self):
tmp_dir = tempfile.mkdtemp()
page = Page(xml_source_file=self.xml_unmerged)
unmatched_strings = [ 'Das', 'Muster', 'einer' ]
unmatched_node_ids = []
svg_tree = ET.parse(self.faksimile_svgFile)
namespaces = { k if k is not None else 'ns': v for k, v in svg_tree.getroot().nsmap.items() }
for string in unmatched_strings:
unmatched_node_ids += [ node.getparent().get('id') for node in svg_tree.xpath('//ns:rect/ns:title[text() = "{0}"]'.format(string), namespaces=namespaces) ]
task = CorrectWords(self.xml_unmerged, self.faksimile_svgFile, tmp_dir, unmatched_node_ids=unmatched_node_ids)
task.create()
xml_file = task.get_target_filepath(page, is_faksimile_svg=False, suffix='.xml')
self.assertEqual(isfile(xml_file), True)
new_page = Page(xml_source_file=xml_file)
for word in task.unmatched_words:
new_words = [ new_word for new_word in new_page.words if new_word.id == word.id ]
self.assertEqual(len(new_words), 1)
self.assertEqual(new_words[0].text, word.text)
if __name__ == "__main__":
unittest.main()

Event Timeline