Page MenuHomec4science

test_util.py
No OneTemporary

File Metadata

Created
Sat, May 11, 18:50

test_util.py

import unittest
from os import sep, path, remove, listdir
from os.path import isdir, isfile, dirname, basename
import shutil
import sys
import lxml.etree as ET
import sys
import tempfile
import warnings
sys.path.append('svgscripts')
import util
from local_config import FAKSIMILE_LOCATION, PDF_READER, SVG_EDITOR, USER_ROOT_LOCATION_DICT
from datatypes.faksimile import FaksimilePage
from datatypes.page import Page
from datatypes.page_creator import PageCreator
from datatypes.positional_word_part import PositionalWordPart
from datatypes.text_field import TextField
from datatypes.transkriptionField import TranskriptionField
from datatypes.word_position import WordPosition
from datatypes.word import Word
sys.path.append('shared_util')
from myxmlwriter import write_pretty, FILE_TYPE_SVG_WORD_POSITION, FILE_TYPE_XML_MANUSCRIPT
sys.path.append('fixes')
from fix_old_data import save_page
class TestCopy(unittest.TestCase):
def setUp(self):
util.UNITTESTING = True
DATADIR = path.dirname(__file__) + sep + 'test_data'
self.test_dir = DATADIR
self.faksimile_dir = DATADIR + sep + 'faksimile_svg'
self.faksimile_file = self.faksimile_dir + sep + 'N-VII-1,5et6.svg'
self.image = DATADIR + sep + 'image.jpg'
self.svg_testrecord = DATADIR + sep + 'TESTRECORD.svg'
self.xml_file = DATADIR + sep + 'N_VII_1_page005.xml'
self.Mp_XIV_page420 = DATADIR + sep + 'Mp_XIV_page420.xml'
self.tmp_dir = tempfile.mkdtemp()
def test_copy(self):
tmp_image = self.tmp_dir + sep + basename(self.image)
target_file = 'asdf.svg'
shutil.copy(self.image, self.tmp_dir)
util.copy_faksimile_svg_file(target_file, faksimile_source_file=self.faksimile_file,\
target_directory=self.tmp_dir, local_image_path=tmp_image)
self.assertEqual(isfile(self.tmp_dir + sep + target_file), True)
util.copy_faksimile_svg_file(faksimile_source_file=self.faksimile_file,\
target_directory=self.tmp_dir, local_image_path=tmp_image)
self.assertEqual(isfile(self.tmp_dir + sep + basename(self.faksimile_file)), True)
with self.assertRaises(Exception):
util.copy_faksimile_svg_file()
with self.assertRaises(Exception):
util.copy_faksimile_svg_file(faksimile_source_file=self.faksimile_source_file)
def test_copy_xml(self):
old_page = Page(self.xml_file)
xml_file = util.copy_xml_file_word_pos_only(self.xml_file, self.tmp_dir)
self.assertEqual(isfile(xml_file), True)
page = Page(xml_file)
self.assertEqual(len(page.words), len(old_page.words))
self.assertEqual(len(page.line_numbers), 0)
def test_create_highlighted_svg_file(self):
target_file = self.tmp_dir + sep + basename(self.faksimile_file)
tmp_image = self.tmp_dir + sep + basename(self.image)
faksimile_tree = ET.parse(self.faksimile_file)
namespaces = { k if k is not None else 'ns': v for k, v in faksimile_tree.getroot().nsmap.items() }
node_ids = ['rect947', 'rect951', 'rect953', 'rect955', 'rect959', 'rect961', 'rect963']
highlight_color = 'blue'
util.create_highlighted_svg_file(faksimile_tree, node_ids, target_directory=self.tmp_dir, highlight_color=highlight_color, namespaces=namespaces)
self.assertEqual(isfile(target_file), True)
new_tree = ET.parse(target_file)
for node in new_tree.xpath('//ns:rect[@fill="{0}"]|//ns:path[@fill="{0}"]'.format(highlight_color), namespaces=namespaces):
node_ids.remove(node.get('id'))
self.assertEqual(len(node_ids), 0)
def test_get_empty_node_ids(self):
faksimile_tree = ET.parse(self.faksimile_file)
faksimile_page = FaksimilePage.GET_FAKSIMILEPAGES(faksimile_tree)[0]
empty_node_ids = util.get_empty_node_ids(faksimile_tree, faksimile_page=faksimile_page)
self.assertEqual('rect1085' in empty_node_ids, True)
def test_record_changes(self):
new_tree = ET.parse(self.faksimile_file)
old_tree = ET.parse(self.faksimile_file)
empty_node_id = 'rect1085'
title_node_id = 'test001'
namespaces = { k if k is not None else 'ns': v for k, v in new_tree.getroot().nsmap.items() }
node = new_tree.xpath('//ns:rect[@id="{0}"]'.format(empty_node_id), namespaces=namespaces)[0]
title = ET.SubElement(node, 'title', attrib={ 'id': title_node_id })
title.text = 'test'
new_file = self.tmp_dir + sep + 'new.svg'
old_file = self.tmp_dir + sep + 'old.svg'
util.copy_faksimile_svg_file(target_file=new_file, faksimile_tree=new_tree)
util.copy_faksimile_svg_file(target_file=old_file, faksimile_tree=old_tree)
util.record_changes(old_file, new_file, [ empty_node_id ], namespaces=namespaces)
test_tree = ET.parse(old_file)
self.assertEqual(len(test_tree.xpath('//ns:rect[@id="{0}"]/ns:title[@id="{1}"]'.format(empty_node_id, title_node_id), namespaces=namespaces)), 1)
def test_replace_chars(self):
page = Page(self.xml_file)
faksimile_tree = ET.parse(self.faksimile_file)
namespaces = { k if k is not None else 'ns': v for k, v in faksimile_tree.getroot().nsmap.items() }
word_position = WordPosition(id='rect1159', text='„Gedächtniß"')
wps, texts = util.replace_chars(page.words, [ word_position ])
self.assertEqual(texts[0].endswith('“'), True)
self.assertEqual(wps[0].text.endswith('“'), True)
word_position = WordPosition(id='rect1173', text='-')
wps, texts = util.replace_chars(page.words, [ word_position ])
self.assertEqual(wps[0].text.endswith('–'), True)
def test_mismatch_words(self):
page = Page(self.xml_file)
faksimile_tree = ET.parse(self.faksimile_file)
faksimile_page = FaksimilePage.GET_FAKSIMILEPAGES(faksimile_tree)[0]
page = Page('xml/N_VII_1_page174.xml')
faksimile_tree = ET.parse('faksimile_svg/N-VII-1,173et174.svg')
faksimile_page = FaksimilePage.GET_FAKSIMILEPAGES(faksimile_tree)[0]
self.assertEqual('-' in [ tp.text for tp in faksimile_page.word_positions], True)
wps, texts = util.replace_chars(page.words,faksimile_page.word_positions)
self.assertEqual('–' in texts, True)
self.assertEqual(len([ faksimile_position for faksimile_position in wps\
if faksimile_position.text == '–' ]), 4)
mismatching_words, mismatching_faksimile_positions = util.get_mismatching_ids(page.words, faksimile_page.word_positions)
self.assertEqual(len([word for word in mismatching_words if word.text.endswith('“') ]), 0)
self.assertEqual(len([word for word in mismatching_words if word.text.endswith('–') ]), 0)
def test_process_warnings(self):
with warnings.catch_warnings(record=True) as w:
warnings.simplefilter('default')
warnings.warn('Test1: asdf')
warnings.warn('Test2: asdf')
status = util.process_warnings4status(w, ['Test1', 'Test2' ], 'asdf', 'OK', status_prefix='with warnings')
#print(status)
self.assertTrue('Test1' in status.split(':'))
self.assertTrue('Test2' in status.split(':'))
@unittest.skip('test uses external program, has been tested')
def test_show_files(self):
list_of_files = [ self.test_dir + sep + file for file in listdir(self.test_dir) if file.endswith('pdf') ][0:2]
util.ExternalViewer.show_files(single_file=self.faksimile_file, list_of_files=list_of_files)
def test_record_changes_to_page(self):
page = util.record_changes_on_svg_file_to_page(self.xml_file, self.svg_testrecord, [ 1 ])
old_length = len(page.words)
self.assertEqual(page.words[1].text, 'asdf')
self.assertEqual(page.words[1].transkription_positions[0].width, 353)
page = util.record_changes_on_svg_file_to_page(self.xml_file, self.svg_testrecord, [ 13 ])
self.assertEqual(page.words[13].text, 'er')
self.assertEqual(page.words[14].text, '=')
self.assertEqual(len(page.words), old_length+1)
page = util.record_changes_on_svg_file_to_page(self.xml_file, self.svg_testrecord, [ 64 ])
self.assertEqual(page.words[64].text, 'Simplifications-apparat')
self.assertEqual(len(page.words[64].transkription_positions), 3)
self.assertEqual(len(page.words), old_length-1)
@unittest.skipUnless(__name__ == "__main__", 'tests all words')
def test_extended__record_changes_to_page(self):
page = Page(self.xml_file)
old_length = len(page.words)
page = util.record_changes_on_svg_file_to_page(self.xml_file, self.svg_testrecord)
self.assertEqual(page.words[1].text, 'asdf')
self.assertEqual(page.words[13].text, 'er')
self.assertEqual(page.words[14].text, '=')
self.assertEqual(page.words[65].text, 'Simplifications-apparat')
self.assertEqual(len(page.words), old_length)
def test_copy_faksimile_update_image_location(self):
test_dir = self.tmp_dir #FAKSIMILE_LOCATION + '/Myriam/Fertig/'
util.copy_faksimile_update_image_location(self.faksimile_file, target_directory=test_dir)
with self.assertWarns(UserWarning):
util.copy_faksimile_update_image_location(self.faksimile_file, target_directory=test_dir)
def test_record_changes_on_xml(self):
old_page = Page(self.xml_file)
xml_file = util.copy_xml_file_word_pos_only(self.xml_file, self.tmp_dir)
tree = ET.parse(xml_file)
node = tree.xpath('//word[@id="135"]')[0]
counter =0
while node.get('text') != 'gar' or counter > 5:
counter += 1
nextnode = node.getnext()
node.set('text', node.get('text') + nextnode.get('text'))
for element in nextnode.getchildren():
node.append(element)
nextnode.getparent().remove(nextnode)
write_pretty(xml_element_tree=tree, file_name=xml_file,\
script_name=__file__, file_type=FILE_TYPE_SVG_WORD_POSITION)
new_page = util.record_changes_on_xml_file_to_page(self.xml_file, xml_file)
self.assertEqual(len(new_page.words), len(old_page.words)-2)
self.assertEqual(len([ word for word in new_page.words if word.text == 'gar']), 1)
old_page = Page(self.xml_file)
xml_file = util.copy_xml_file_word_pos_only(self.xml_file, self.tmp_dir)
tree = ET.parse(xml_file)
node = tree.xpath('//word[@id="138"]')[0]
counter =0
while node.get('text') != 'nichtvorkommt.' or counter > 5:
counter += 1
nextnode = node.getnext()
node.set('text', node.get('text') + nextnode.get('text'))
for element in nextnode.getchildren():
node.append(element)
nextnode.getparent().remove(nextnode)
node.set('split', 'nicht vorkommt.')
write_pretty(xml_element_tree=tree, file_name=xml_file,\
script_name=__file__, file_type=FILE_TYPE_SVG_WORD_POSITION)
joined_page = Page(xml_file)
self.assertEqual(len([word for word in joined_page.words if word.text == 'nichtvorkommt.']), 1)
self.assertEqual(len([word for word in joined_page.words if word.text == 'nichtvorkommt.'][0].split_strings), 2)
self.assertEqual(len(joined_page.words), len(old_page.words)-1)
new_page = util.record_changes_on_xml_file_to_page(self.xml_file, xml_file)
self.assertEqual(len(new_page.words), len(old_page.words))
self.assertEqual(len([word for word in new_page.words if word.text == 'vorkommt.']), 1)
self.assertEqual(len([word for word in old_page.words if word.text == 'nicht']),\
len([word for word in new_page.words if word.text == 'nicht']))
xml_file = util.copy_xml_file_word_pos_only(self.xml_file, self.tmp_dir)
tree = ET.parse(xml_file)
old_page = Page(xml_file)
nodes = tree.xpath('//word[@id>="85" and @id<="87"]')
self.assertEqual(len(nodes), 3)
prevWordText = nodes[0].get('text')
nodes[0].set('join', prevWordText + 'z')
nodes[1].set('split', 'z u')
lastWordText = nodes[2].get('text')
nodes[2].set('join', 'u' + lastWordText)
write_pretty(xml_element_tree=tree, file_name=xml_file,\
script_name=__file__, file_type=FILE_TYPE_SVG_WORD_POSITION)
joined_page = util.record_changes_on_xml_file_to_page(self.xml_file, xml_file)
self.assertEqual(len(joined_page.words), len(old_page.words)-1)
def test_reset_tp_with_matrix(self):
page = Page(self.Mp_XIV_page420)
util.reset_tp_with_matrix(page.words[0].transkription_positions)
self.assertTrue(page.words[0].transkription_positions[0].left > 0 and page.words[0].transkription_positions[0].top > -5)
transformed_words = [w for w in page.words if (len(w.transkription_positions) > 0 and w.transkription_positions[0].transform is not None) ]
util.reset_tp_with_matrix(transformed_words[0].transkription_positions)
self.assertEqual(transformed_words[0].transkription_positions[0].left, 0)
self.assertTrue(transformed_words[0].transkription_positions[0].top < 0)
def test_back_up(self):
test_dir = self.tmp_dir
page = Page(self.xml_file)
target_file_name = util.back_up(page, self.xml_file, bak_dir=test_dir)
self.assertEqual(isfile(target_file_name), True)
svg_tree = ET.parse(page.svg_file)
namespaces = { k if k is not None else 'ns': v for k, v in svg_tree.getroot().nsmap.items() }
util.back_up_svg_file(svg_tree, namespaces)
def tearDown(self):
shutil.rmtree(self.tmp_dir, ignore_errors=True)
pass
if __name__ == "__main__":
unittest.main()

Event Timeline