Page Menu
Home
c4science
Search
Configure Global Search
Log In
Files
F110264236
bibmatch_tests.py
No One
Temporary
Actions
Download File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Award Token
Subscribers
None
File Metadata
Details
File Info
Storage
Attached
Created
Fri, Apr 25, 10:42
Size
6 KB
Mime Type
text/x-python
Expires
Sun, Apr 27, 10:42 (2 d)
Engine
blob
Format
Raw Data
Handle
25777800
Attached To
R3600 invenio-infoscience
bibmatch_tests.py
View Options
# -*- coding: utf-8 -*-
##
## This file is part of Invenio.
## Copyright (C) 2010, 2011 CERN.
##
## Invenio is free software; you can redistribute it and/or
## modify it under the terms of the GNU General Public License as
## published by the Free Software Foundation; either version 2 of the
## License, or (at your option) any later version.
##
## Invenio is distributed in the hope that it will be useful, but
## WITHOUT ANY WARRANTY; without even the implied warranty of
## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
## General Public License for more details.
##
## You should have received a copy of the GNU General Public License
## along with Invenio; if not, write to the Free Software Foundation, Inc.,
## 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA.
# pylint: disable=E1102
"""Unit tests for bibmatch."""
__revision__
=
"$Id$"
from
invenio.testutils
import
make_test_suite
,
run_test_suite
from
invenio.bibmatch_validator
import
compare_fieldvalues_normal
,
\
compare_fieldvalues_authorname
,
\
compare_fieldvalues_identifier
,
\
compare_fieldvalues_title
,
\
compare_fieldvalues_date
,
\
get_paired_comparisons
import
unittest
class
BibMatchTest
(
unittest
.
TestCase
):
"""Test functions to check the validator of Bibmatch."""
def
test_validation_get_paired_comparisons
(
self
):
"""bibmatch - validation: check generated paired comparisons """
first_list
=
[
1
,
2
,
3
]
second_list
=
[
4
,
5
]
# Should return empty, as lists are not equal in length
self
.
assertFalse
(
get_paired_comparisons
(
first_list
,
second_list
,
False
))
# Should return result, in un-ordered mode
result
=
[((
1
,
4
),
(
1
,
5
)),
((
2
,
4
),
(
2
,
5
)),
((
3
,
4
),
(
3
,
5
))]
self
.
assertEqual
(
result
,
get_paired_comparisons
(
first_list
,
second_list
))
def
test_validation_compare_authors
(
self
):
"""BibMatch comparison: compare authors"""
original_record_instances
=
[
'Brodsky, Stanley J.'
]
matched_record_instances
=
[
'Brodsky, S.J.'
,
'Not, M E'
]
comparisons
=
get_paired_comparisons
(
original_record_instances
,
matched_record_instances
)
threshold
=
0.8
matches_needed
=
1
result
,
dummy
=
compare_fieldvalues_authorname
(
comparisons
,
threshold
,
matches_needed
)
self
.
assertTrue
(
result
)
original_record_instances
=
[
'Brodsky, J.'
]
matched_record_instances
=
[
'Brodsky, S.J.'
]
comparisons
=
get_paired_comparisons
(
original_record_instances
,
matched_record_instances
)
result
,
dummy
=
compare_fieldvalues_authorname
(
comparisons
,
threshold
,
matches_needed
)
self
.
assertFalse
(
result
)
def
test_validation_compare_strings
(
self
):
"""BibMatch comparison: compare strings"""
original_record_instances
=
[
'This is some random text'
]
matched_record_instances
=
[
'I have some random text about nothing'
]
comparisons
=
get_paired_comparisons
(
original_record_instances
,
matched_record_instances
)
threshold
=
0.8
matches_needed
=
1
result
,
dummy
=
compare_fieldvalues_normal
(
comparisons
,
threshold
,
matches_needed
)
self
.
assertFalse
(
result
)
original_record_instances
=
[
'This is some random text'
]
matched_record_instances
=
[
'Is some random text'
]
comparisons
=
get_paired_comparisons
(
original_record_instances
,
matched_record_instances
)
result
,
dummy
=
compare_fieldvalues_normal
(
comparisons
,
threshold
,
matches_needed
)
self
.
assertTrue
(
result
)
def
test_validation_compare_identifiers
(
self
):
"""BibMatch comparison: compare identifiers"""
original_record_instances
=
[
'REP-NO-02123'
]
matched_record_instances
=
[
'REPNO123'
]
comparisons
=
get_paired_comparisons
(
original_record_instances
,
matched_record_instances
)
threshold
=
1.0
matches_needed
=
1
result
,
dummy
=
compare_fieldvalues_identifier
(
comparisons
,
threshold
,
matches_needed
)
self
.
assertFalse
(
result
)
original_record_instances
=
[
'REP-NO-0123'
]
matched_record_instances
=
[
'REPNO123'
]
comparisons
=
get_paired_comparisons
(
original_record_instances
,
matched_record_instances
)
result
,
dummy
=
compare_fieldvalues_identifier
(
comparisons
,
threshold
,
matches_needed
)
self
.
assertTrue
(
result
)
def
test_validation_compare_date
(
self
):
"""BibMatch comparison: compare date"""
original_record_instances
=
[
'2002-02'
]
matched_record_instances
=
[
'2001'
]
comparisons
=
get_paired_comparisons
(
original_record_instances
,
matched_record_instances
)
threshold
=
1.0
matches_needed
=
1
result
,
dummy
=
compare_fieldvalues_date
(
comparisons
,
threshold
,
matches_needed
)
self
.
assertFalse
(
result
)
original_record_instances
=
[
'2001-02'
]
matched_record_instances
=
[
'2001'
]
comparisons
=
get_paired_comparisons
(
original_record_instances
,
matched_record_instances
)
result
,
dummy
=
compare_fieldvalues_date
(
comparisons
,
threshold
,
matches_needed
)
self
.
assertTrue
(
result
)
def
test_validation_compare_title
(
self
):
"""BibMatch comparison: compare title"""
original_record_instances
=
[
'Assault frequency and preformation probability'
]
matched_record_instances
=
[
'Assault frequency and preformation probability : The alpha emission process'
]
comparisons
=
get_paired_comparisons
(
original_record_instances
,
matched_record_instances
)
threshold
=
0.9
matches_needed
=
1
# This should fail
result
,
dummy
=
compare_fieldvalues_normal
(
comparisons
,
threshold
,
matches_needed
)
self
.
assertFalse
(
result
)
# Title search however, takes separators into account
result
,
dummy
=
compare_fieldvalues_title
(
comparisons
,
threshold
,
matches_needed
)
self
.
assertTrue
(
result
)
# Check longer titles
original_record_instances
=
[
'Buffered Electropolishing
\xe2\x80\x93
A New Way for '
\
'Achieving Extremely Smooth Surface Finish on Nb SRF '
\
'Cavities to be Used in Particle Accelerators'
]
matched_record_instances
=
[
'Buffered Electropolishing: A New Way for Achieving '
\
'Extremely Smooth Surface Finish on Nb SRF Cavities '
\
'To be Used in Particle Accelerators'
]
comparisons
=
get_paired_comparisons
(
original_record_instances
,
matched_record_instances
)
result
,
dummy
=
compare_fieldvalues_title
(
comparisons
,
threshold
,
matches_needed
)
self
.
assertTrue
(
result
)
TEST_SUITE
=
make_test_suite
(
BibMatchTest
)
if
__name__
==
"__main__"
:
run_test_suite
(
TEST_SUITE
,
warn_user
=
False
)
Event Timeline
Log In to Comment