bibmatch_tests.py
No OneTemporary
Actions

Subscribers

None

File Metadata

Created: Fri, Apr 25, 10:42

bibmatch_tests.py
View Options

	# -- coding: utf-8 --
	##
	## This file is part of Invenio.
	## Copyright (C) 2010, 2011 CERN.
	##
	## Invenio is free software; you can redistribute it and/or
	## modify it under the terms of the GNU General Public License as
	## published by the Free Software Foundation; either version 2 of the
	## License, or (at your option) any later version.
	##
	## Invenio is distributed in the hope that it will be useful, but
	## WITHOUT ANY WARRANTY; without even the implied warranty of
	## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
	## General Public License for more details.
	##
	## You should have received a copy of the GNU General Public License
	## along with Invenio; if not, write to the Free Software Foundation, Inc.,
	## 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA.

	# pylint: disable=E1102

	"""Unit tests for bibmatch."""

	__revision__ = "$Id$"

	from invenio.testutils import make_test_suite, run_test_suite
	from invenio.bibmatch_validator import compare_fieldvalues_normal, \
	compare_fieldvalues_authorname, \
	compare_fieldvalues_identifier, \
	compare_fieldvalues_title, \
	compare_fieldvalues_date, \
	get_paired_comparisons
	import unittest

	class BibMatchTest(unittest.TestCase):
	"""Test functions to check the validator of Bibmatch."""

	def test_validation_get_paired_comparisons(self):
	"""bibmatch - validation: check generated paired comparisons """
	first_list = [1,2,3]
	second_list = [4,5]
	# Should return empty, as lists are not equal in length
	self.assertFalse(get_paired_comparisons(first_list, second_list, False))

	# Should return result, in un-ordered mode
	result = [((1, 4), (1, 5)), ((2, 4), (2, 5)), ((3, 4), (3, 5))]
	self.assertEqual(result, get_paired_comparisons(first_list, second_list))

	def test_validation_compare_authors(self):
	"""BibMatch comparison: compare authors"""
	original_record_instances = ['Brodsky, Stanley J.']
	matched_record_instances = ['Brodsky, S.J.', 'Not, M E']
	comparisons = get_paired_comparisons(original_record_instances, matched_record_instances)
	threshold = 0.8
	matches_needed = 1
	result, dummy = compare_fieldvalues_authorname(comparisons, threshold, matches_needed)
	self.assertTrue(result)

	original_record_instances = ['Brodsky, J.']
	matched_record_instances = ['Brodsky, S.J.']
	comparisons = get_paired_comparisons(original_record_instances, matched_record_instances)
	result, dummy = compare_fieldvalues_authorname(comparisons, threshold, matches_needed)
	self.assertFalse(result)

	def test_validation_compare_strings(self):
	"""BibMatch comparison: compare strings"""
	original_record_instances = ['This is some random text']
	matched_record_instances = ['I have some random text about nothing']
	comparisons = get_paired_comparisons(original_record_instances, matched_record_instances)
	threshold = 0.8
	matches_needed = 1
	result, dummy = compare_fieldvalues_normal(comparisons, threshold, matches_needed)
	self.assertFalse(result)

	original_record_instances = ['This is some random text']
	matched_record_instances = ['Is some random text']
	comparisons = get_paired_comparisons(original_record_instances, matched_record_instances)
	result, dummy = compare_fieldvalues_normal(comparisons, threshold, matches_needed)
	self.assertTrue(result)

	def test_validation_compare_identifiers(self):
	"""BibMatch comparison: compare identifiers"""
	original_record_instances = ['REP-NO-02123']
	matched_record_instances = ['REPNO123']
	comparisons = get_paired_comparisons(original_record_instances, matched_record_instances)
	threshold = 1.0
	matches_needed = 1
	result, dummy = compare_fieldvalues_identifier(comparisons, threshold, matches_needed)
	self.assertFalse(result)

	original_record_instances = ['REP-NO-0123']
	matched_record_instances = ['REPNO123']
	comparisons = get_paired_comparisons(original_record_instances, matched_record_instances)
	result, dummy = compare_fieldvalues_identifier(comparisons, threshold, matches_needed)
	self.assertTrue(result)

	def test_validation_compare_date(self):
	"""BibMatch comparison: compare date"""
	original_record_instances = ['2002-02']
	matched_record_instances = ['2001']
	comparisons = get_paired_comparisons(original_record_instances, matched_record_instances)
	threshold = 1.0
	matches_needed = 1
	result, dummy = compare_fieldvalues_date(comparisons, threshold, matches_needed)
	self.assertFalse(result)

	original_record_instances = ['2001-02']
	matched_record_instances = ['2001']
	comparisons = get_paired_comparisons(original_record_instances, matched_record_instances)
	result, dummy = compare_fieldvalues_date(comparisons, threshold, matches_needed)
	self.assertTrue(result)

	def test_validation_compare_title(self):
	"""BibMatch comparison: compare title"""
	original_record_instances = ['Assault frequency and preformation probability']
	matched_record_instances = ['Assault frequency and preformation probability : The alpha emission process']
	comparisons = get_paired_comparisons(original_record_instances, matched_record_instances)
	threshold = 0.9
	matches_needed = 1
	# This should fail
	result, dummy = compare_fieldvalues_normal(comparisons, threshold, matches_needed)
	self.assertFalse(result)
	# Title search however, takes separators into account
	result, dummy = compare_fieldvalues_title(comparisons, threshold, matches_needed)
	self.assertTrue(result)

	# Check longer titles
	original_record_instances = ['Buffered Electropolishing \xe2\x80\x93 A New Way for ' \
	'Achieving Extremely Smooth Surface Finish on Nb SRF ' \
	'Cavities to be Used in Particle Accelerators']
	matched_record_instances = ['Buffered Electropolishing: A New Way for Achieving ' \
	'Extremely Smooth Surface Finish on Nb SRF Cavities ' \
	'To be Used in Particle Accelerators']
	comparisons = get_paired_comparisons(original_record_instances, matched_record_instances)
	result, dummy = compare_fieldvalues_title(comparisons, threshold, matches_needed)
	self.assertTrue(result)


	TEST_SUITE = make_test_suite(BibMatchTest)

	if __name__ == "__main__":
	run_test_suite(TEST_SUITE, warn_user=False)

bibmatch_tests.pyNo OneTemporaryActions

File Metadata

bibmatch_tests.pyView Options

Event Timeline

bibmatch_tests.py
No OneTemporary
Actions

bibmatch_tests.py
View Options