Page MenuHomec4science

refextract_unit_tests.py
No OneTemporary

File Metadata

Created
Wed, Aug 21, 01:23

refextract_unit_tests.py

# -*- coding: utf-8 -*-
##
## This file is part of Invenio.
## Copyright (C) 2010, 2011, 2013 CERN.
##
## Invenio is free software; you can redistribute it and/or
## modify it under the terms of the GNU General Public License as
## published by the Free Software Foundation; either version 2 of the
## License, or (at your option) any later version.
##
## Invenio is distributed in the hope that it will be useful, but
## WITHOUT ANY WARRANTY; without even the implied warranty of
## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
## General Public License for more details.
##
## You should have received a copy of the GNU General Public License
## along with Invenio; if not, write to the Free Software Foundation, Inc.,
## 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA.
"""
The Refextract unit test suite
The tests will not modifiy the database.
"""
from invenio.testutils import InvenioTestCase
import re
from invenio.testutils import make_test_suite, run_test_suite
# Import the minimal necessary methods and variables needed to run Refextract
from invenio.docextract_utils import setup_loggers
from invenio.refextract_tag import identify_ibids, \
find_numeration, \
find_numeration_more
from invenio import refextract_re
from invenio.refextract_find import get_reference_section_beginning
from invenio.refextract_api import search_from_reference
from invenio.refextract_text import rebuild_reference_lines
class ReTest(InvenioTestCase):
def setUp(self):
setup_loggers(verbosity=1)
def test_word(self):
r = refextract_re._create_regex_pattern_add_optional_spaces_to_word_characters('ABC')
self.assertEqual(r, ur'A\s*B\s*C\s*')
def test_reference_section_title_pattern(self):
r = refextract_re.get_reference_section_title_patterns()
self.assert_(len(r) > 2)
def test_get_reference_line_numeration_marker_patterns(self):
r = refextract_re.get_reference_line_numeration_marker_patterns()
self.assert_(len(r) > 2)
def test_get_reference_line_marker_pattern(self):
r = refextract_re.get_reference_line_marker_pattern('ABC')
self.assertNotEqual(r.pattern.find('ABC'), -1)
def test_get_post_reference_section_title_patterns(self):
r = refextract_re.get_post_reference_section_title_patterns()
self.assert_(len(r) > 2)
def test_get_post_reference_section_keyword_patterns(self):
r = refextract_re.get_post_reference_section_keyword_patterns()
self.assert_(len(r) > 2)
def test_regex_match_list(self):
s = 'ABC'
m = refextract_re.regex_match_list(s, [
re.compile('C.C'),
re.compile('A.C')
])
self.assert_(m)
m = refextract_re.regex_match_list(s, [
re.compile('C.C')
])
self.assertEqual(m, None)
class IbidTest(InvenioTestCase):
"""Testing output of refextract"""
def setUp(self):
setup_loggers(verbosity=1)
def test_identify_ibids_empty(self):
r = identify_ibids("")
self.assertEqual(r, ({}, ''))
def test_identify_ibids_simple(self):
ref_line = u"""[46] E. Schrodinger, Sitzungsber. Preuss. Akad. Wiss. Phys. Math. Kl. 24, 418(1930); ibid, 3, 1(1931)"""
r = identify_ibids(ref_line.upper())
self.assertEqual(r, ({85: u'IBID'}, u'[46] E. SCHRODINGER, SITZUNGSBER. PREUSS. AKAD. WISS. PHYS. MATH. KL. 24, 418(1930); ____, 3, 1(1931)'))
class FindNumerationTest(InvenioTestCase):
def setUp(self):
setup_loggers(verbosity=1)
def test_vol_page_year(self):
"<vol>, <page> (<year>)"
ref_line = u"""24, 418 (1930)"""
r = find_numeration(ref_line)
self.assertEqual(r['volume'], u"24")
self.assertEqual(r['year'], u"1930")
self.assertEqual(r['page'], u"418")
def test_vol_year_page(self):
"<vol>, (<year>) <page> "
ref_line = u"""24, (1930) 418"""
r = find_numeration(ref_line)
self.assertEqual(r['volume'], u"24")
self.assertEqual(r['year'], u"1930")
self.assertEqual(r['page'], u"418")
def test_year_title_volume_page(self):
"<year>, <title> <vol> <page> "
ref_line = u"""1930 <cds.JOURNAL>J.Phys.</cds.JOURNAL> 24, 418"""
r = find_numeration_more(ref_line)
self.assertEqual(r['volume'], u"24")
self.assertEqual(r['year'], u"1930")
self.assertEqual(r['page'], u"418")
class FindSectionTest(InvenioTestCase):
def setUp(self):
setup_loggers(verbosity=1)
def test_simple(self):
sect = get_reference_section_beginning([
"Hello",
"References",
"[1] Ref1"
])
self.assertEqual(sect, {
'marker': '[1]',
'marker_pattern': u'\\s*(?P<mark>\\[\\s*(?P<marknum>\\d+)\\s*\\])',
'start_line': 1,
'title_string': 'References',
'title_marker_same_line': False,
'how_found_start': 1,
})
def test_no_section(self):
sect = get_reference_section_beginning("")
self.assertEqual(sect, None)
def test_no_title_via_brackets(self):
sect = get_reference_section_beginning([
"Hello",
"[1] Ref1"
"[2] Ref2"
])
self.assertEqual(sect, {
'marker': '[1]',
'marker_pattern': u'(?P<mark>(?P<left>\\[)\\s*(?P<marknum>\\d+)\\s*(?P<right>\\]))',
'start_line': 1,
'title_string': None,
'title_marker_same_line': False,
'how_found_start': 2,
})
def test_no_title_via_dots(self):
sect = get_reference_section_beginning([
"Hello",
"1. Ref1"
"2. Ref2"
])
self.assertEqual(sect, {
'marker': '1.',
'marker_pattern': u'(?P<mark>(?P<left>)\\s*(?P<marknum>\\d+)\\s*(?P<right>\\.))',
'start_line': 1,
'title_string': None,
'title_marker_same_line': False,
'how_found_start': 3,
})
def test_no_title_via_numbers(self):
sect = get_reference_section_beginning([
"Hello",
"1 Ref1"
"2 Ref2"
])
self.assertEqual(sect, {
'marker': '1',
'marker_pattern': u'(?P<mark>(?P<left>)\\s*(?P<marknum>\\d+)\\s*(?P<right>))',
'start_line': 1,
'title_string': None,
'title_marker_same_line': False,
'how_found_start': 4,
})
def test_no_title_via_numbers2(self):
sect = get_reference_section_beginning([
"Hello",
"1",
"Ref1",
"(3)",
"2",
"Ref2",
])
self.assertEqual(sect, {
'marker': '1',
'marker_pattern': u'(?P<mark>(?P<left>)\\s*(?P<marknum>\\d+)\\s*(?P<right>))',
'start_line': 1,
'title_string': None,
'title_marker_same_line': False,
'how_found_start': 4,
})
class SearchTest(InvenioTestCase):
def setUp(self):
setup_loggers(verbosity=9)
from invenio import refextract_kbs
self.old_override = refextract_kbs.CFG_REFEXTRACT_KBS_OVERRIDE
refextract_kbs.CFG_REFEXTRACT_KBS_OVERRIDE = {}
def tearDown(self):
from invenio import refextract_kbs
refextract_kbs.CFG_REFEXTRACT_KBS_OVERRIDE = self.old_override
def test_not_recognized(self):
field, pattern = search_from_reference('[1] J. Mars, oh hello')
self.assertEqual(field, '')
self.assertEqual(pattern, '')
def test_report(self):
field, pattern = search_from_reference('[1] J. Mars, oh hello, [hep-ph/0104088]')
self.assertEqual(field, 'report')
self.assertEqual(pattern, 'hep-ph/0104088')
def test_journal(self):
field, pattern = search_from_reference('[1] J. Mars, oh hello, Nucl.Phys. B76 (1974) 477-482')
self.assertEqual(field, 'journal')
self.assert_('Nucl' in pattern)
self.assert_('B76' in pattern)
self.assert_('477' in pattern)
class RebuildReferencesTest(unittest.TestCase):
def setUp(self):
setup_loggers(verbosity=9)
def test_simple(self):
marker_pattern = ur"^\s*(?P<mark>\[\s*(?P<marknum>\d+)\s*\])"
refs = [
u"[1] hello",
u"hello2",
u"[2] foo",
]
rebuilt_refs = rebuild_reference_lines(refs, marker_pattern)
self.assertEqual(rebuilt_refs, [
u"[1] hello hello2",
u"[2] foo",
])
# def test_pagination_removal(self):
# marker_pattern = ur"^\s*(?P<mark>\[\s*(?P<marknum>\d+)\s*\])"
# refs = [
# u"[1] hello",
# u"hello2",
# u"[42]",
# u"[2] foo",
# ]
# rebuilt_refs = rebuild_reference_lines(refs, marker_pattern)
# self.assertEqual(rebuilt_refs, [
# u"[1] hello hello2",
# u"[2] foo",
# ])
def test_pagination_non_removal(self):
marker_pattern = ur"^\s*(?P<mark>\[\s*(?P<marknum>\d+)\s*\])"
refs = [
u"[1] hello",
u"hello2",
u"[2]",
u"foo",
]
rebuilt_refs = rebuild_reference_lines(refs, marker_pattern)
self.assertEqual(rebuilt_refs, [
u"[1] hello hello2",
u"[2] foo",
])
def test_2_lines_together(self):
marker_pattern = ur"\s*(?P<mark>\[\s*(?P<marknum>\d+)\s*\])"
refs = [
u"[1] hello",
u"hello2 [2] foo",
]
rebuilt_refs = rebuild_reference_lines(refs, marker_pattern)
self.assertEqual(rebuilt_refs, [
u"[1] hello hello2",
u"[2] foo",
])
print 'rebuilt_refs', repr(rebuilt_refs)
TEST_SUITE = make_test_suite(ReTest,
IbidTest,
FindNumerationTest,
FindSectionTest,
SearchTest,
RebuildReferencesTest)
if __name__ == '__main__':
run_test_suite(TEST_SUITE)

Event Timeline