bibrank_citation_indexer_regression_tests.py
No OneTemporary
Actions

Subscribers

None

File Metadata

Created: Sat, Oct 5, 04:13

bibrank_citation_indexer_regression_tests.py
View Options

	# -- coding: utf-8 --
	##
	## This file is part of Invenio.
	## Copyright (C) 2005, 2006, 2007, 2008, 2010, 2011, 2012 CERN.
	##
	## Invenio is free software; you can redistribute it and/or
	## modify it under the terms of the GNU General Public License as
	## published by the Free Software Foundation; either version 2 of the
	## License, or (at your option) any later version.
	##
	## Invenio is distributed in the hope that it will be useful, but
	## WITHOUT ANY WARRANTY; without even the implied warranty of
	## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
	## General Public License for more details.
	##
	## You should have received a copy of the GNU General Public License
	## along with Invenio; if not, write to the Free Software Foundation, Inc.,
	## 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA.

	"""Unit tests for the citation indexer."""

	from invenio.testutils import InvenioTestCase
	import ConfigParser
	import logging
	import sys

	from invenio.testutils import make_test_suite, run_test_suite
	from invenio.config import CFG_ETCDIR
	from invenio.dbquery import run_sql


	def load_config():
	config_path = CFG_ETCDIR + "/bibrank/citation.cfg"
	config = ConfigParser.ConfigParser()
	config.readfp(open(config_path))
	return config

	CONFIG = load_config()

	EXPECTED_DICTS = {
	'refs': {77: [95], 79: [78], 80: [94], 82: [81], 83: [81], 85: [77, 84], 86: [77, 95], 87: [81], 88: [84], 89: [81], 91: [78, 79, 84], 92: [74, 91], 96: [18]},
	'cites': {18: [96], 74: [92], 77: [85, 86], 78: [79, 91], 79: [91], 81: [82, 83, 87, 89], 84: [85, 88, 91], 91: [92], 94: [80], 95: [77, 86]},
	}


	def compare_dicts(tester, dic, expected):
	# Clean out empty sets
	for k, v in dic.items():
	if not v:
	del dic[k]

	dic = dict([(k, sorted(list(v))) for k, v in dic.iteritems()])
	tester.assertEqual(dic, expected)


	def remove_from_dicts(dicts, recid):
	for recid in dicts['cites'].keys():
	try:
	dicts['cites'][recid].remove(recid)
	dicts['cites_weight'] -= 1
	except ValueError:
	pass
	else:
	if not dicts['cites'][recid]:
	del dicts['cites'][recid]
	del dicts['cites_weight'][recid]

	for recid in dicts['refs'].keys():
	try:
	dicts['refs'][recid].remove(recid)
	except ValueError:
	pass
	else:
	if not dicts['refs'][recid]:
	del dicts['refs'][recid]


	class TestCitationIndexer(InvenioTestCase):
	"""Testing citation indexer."""
	def setUp(self):
	logger = logging.getLogger()
	for handler in logger.handlers:
	logger.removeHandler(handler)

	formatter = logging.Formatter('%(asctime)s --> %(message)s', '%Y-%m-%d %H:%M:%S')

	stdout_logger = logging.StreamHandler(sys.stdout)
	stdout_logger.setFormatter(formatter)
	# stdout_logger.setLevel(logging.DEBUG)
	stdout_logger.setLevel(logging.CRITICAL)
	stderr_logger = logging.StreamHandler(sys.stderr)
	stderr_logger.setFormatter(formatter)
	# stderr_logger.setLevel(logging.WARNING)
	stderr_logger.setLevel(logging.CRITICAL)
	logger.addHandler(stderr_logger)
	logger.addHandler(stdout_logger)
	logger.setLevel(logging.INFO)

	def test_basic(self):
	from invenio.bibrank_citation_indexer import process_chunk
	cites, refs = process_chunk(range(1, 100), CONFIG)
	compare_dicts(self, cites, EXPECTED_DICTS['cites'])
	compare_dicts(self, refs, EXPECTED_DICTS['refs'])

	def test_adding_record(self):
	"tests adding a record"
	from invenio.bibrank_citation_indexer import process_chunk
	cites, refs = process_chunk([92], CONFIG)
	expected_cites = {}
	compare_dicts(self, cites, expected_cites)
	expected_refs = {92: [74, 91]}
	compare_dicts(self, refs, expected_refs)

	def test_catchup(self):
	"tests adding a record (with catchup)"
	from invenio.bibrank_citation_indexer import process_chunk
	cites, refs = process_chunk([95], CONFIG)

	expected_cites = {95: [77, 86]}
	compare_dicts(self, cites, expected_cites)
	expected_refs = {}
	compare_dicts(self, refs, expected_refs)

	def test_db_adding_and_removing_records(self):
	from invenio.bibrank_citation_searcher import get_cited_by
	from invenio.bibrank_citation_indexer import store_dicts
	store_dicts([42222],
	refs={42222: set([43333])},
	cites={42222: set([40000, 40001])})
	cited_by_42222 = get_cited_by(42222)
	cited_by_43333 = get_cited_by(43333)
	store_dicts([42222],
	refs={42222: set()},
	cites={42222: set()})
	self.assertEqual(cited_by_42222, set([40000, 40001]))
	self.assertEqual(cited_by_43333, set([42222]))
	self.assertEqual(get_cited_by(42222), set())
	self.assertEqual(get_cited_by(43333), set())


	class TestCitationIndexerWarnings(InvenioTestCase):
	def cleanup(self):
	run_sql("""DELETE FROM rnkCITATIONDATAERR
	WHERE citinfo LIKE 'Test Ref %'""")

	def count(self):
	return run_sql("SELECT COUNT(*) FROM rnkCITATIONDATAERR")[0][0]

	def test_insert(self):
	from invenio.bibrank_citation_indexer import store_citation_warning
	self.cleanup()
	before = self.count()
	store_citation_warning('multiple-matches', 'Test Ref 1')
	store_citation_warning('not-well-formed', 'Test Ref 2')
	after = self.count()
	self.assertEqual(after - before, 2)
	store_citation_warning('not-well-formed', 'Test Ref 2')
	after2 = self.count()
	self.assertEqual(after2 - before, 2)
	self.cleanup()

	TEST_SUITE = make_test_suite(TestCitationIndexer, TestCitationIndexerWarnings)

	if __name__ == "__main__":
	run_test_suite(TEST_SUITE, warn_user=True)

bibrank_citation_indexer_regression_tests.pyNo OneTemporaryActions

File Metadata

bibrank_citation_indexer_regression_tests.pyView Options

Event Timeline

bibrank_citation_indexer_regression_tests.py
No OneTemporary
Actions

bibrank_citation_indexer_regression_tests.py
View Options