Page Menu
Home
c4science
Search
Configure Global Search
Log In
Files
F73873131
solrutils_regression_tests.py
No One
Temporary
Actions
Download File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Award Token
Subscribers
None
File Metadata
Details
File Info
Storage
Attached
Created
Wed, Jul 24, 23:52
Size
16 KB
Mime Type
text/x-python
Expires
Fri, Jul 26, 23:52 (1 d, 22 h)
Engine
blob
Format
Raw Data
Handle
19276474
Attached To
R3600 invenio-infoscience
solrutils_regression_tests.py
View Options
## This file is part of Invenio.
## Copyright (C) 2010, 2011, 2013 CERN.
##
## Invenio is free software; you can redistribute it and/or
## modify it under the terms of the GNU General Public License as
## published by the Free Software Foundation; either version 2 of the
## License, or (at your option) any later version.
##
## Invenio is distributed in the hope that it will be useful, but
## WITHOUT ANY WARRANTY; without even the implied warranty of
## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
## General Public License for more details.
##
## You should have received a copy of the GNU General Public License
## along with Invenio; if not, write to the Free Software Foundation, Inc.,
## 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA.
import
unittest
from
invenio.config
import
CFG_SOLR_URL
,
CFG_SITE_URL
,
CFG_SITE_NAME
from
invenio.testutils
import
make_test_suite
,
\
run_test_suite
,
\
test_web_page_content
,
\
nottest
from
invenio
import
intbitset
from
invenio.solrutils_bibindex_searcher
import
solr_get_bitset
from
invenio.solrutils_bibrank_searcher
import
solr_get_ranked
,
solr_get_similar_ranked
from
invenio.search_engine
import
get_collection_reclist
from
invenio.bibrank_bridge_utils
import
get_external_word_similarity_ranker
ROWS
=
100
HITSETS
=
{
'Willnotfind'
:
intbitset
.
intbitset
([]),
'higgs'
:
intbitset
.
intbitset
([
47
,
48
,
51
,
52
,
55
,
56
,
58
,
68
,
79
,
85
,
89
,
96
]),
'of'
:
intbitset
.
intbitset
([
8
,
10
,
11
,
12
,
15
,
43
,
44
,
45
,
46
,
47
,
48
,
49
,
50
,
51
,
52
,
53
,
54
,
55
,
56
,
57
,
58
,
59
,
60
,
61
,
62
,
64
,
68
,
74
,
77
,
78
,
79
,
80
,
81
,
82
,
83
,
84
,
85
,
86
,
87
,
88
,
89
,
90
,
91
,
92
,
93
,
94
,
95
,
96
,
97
]),
'"higgs boson"'
:
intbitset
.
intbitset
([
55
,
56
]),
}
def
get_topN
(
n
,
data
):
res
=
dict
()
for
key
,
value
in
data
.
iteritems
():
res
[
key
]
=
value
[
-
n
:]
return
res
class
TestSolrSearch
(
unittest
.
TestCase
):
"""Test for Solr search. Requires:
make install-solrutils
CFG_SOLR_URL set
fulltext index in idxINDEX containing 'SOLR' in indexer column
AND EITHER
Solr index built: ./bibindex -w fulltext for all records
OR
WRD method referring to Solr: <invenio installation>/etc/bibrank$ cp template_word_similarity_solr.cfg wrd.cfg
and ./bibrank -w wrd for all records
"""
def
_get_result
(
self
,
query
,
index
=
'fulltext'
):
return
solr_get_bitset
(
index
,
query
)
@nottest
def
test_get_bitset
(
self
):
"""solrutils - search results"""
self
.
assertEqual
(
HITSETS
[
'Willnotfind'
],
self
.
_get_result
(
'Willnotfind'
))
self
.
assertEqual
(
HITSETS
[
'higgs'
],
self
.
_get_result
(
'higgs'
))
self
.
assertEqual
(
HITSETS
[
'of'
],
self
.
_get_result
(
'of'
))
self
.
assertEqual
(
HITSETS
[
'"higgs boson"'
],
self
.
_get_result
(
'"higgs boson"'
))
class
TestSolrRanking
(
unittest
.
TestCase
):
"""Test for Solr ranking. Requires:
make install-solrutils
CFG_SOLR_URL set
fulltext index in idxINDEX containing 'SOLR' in indexer column
AND EITHER
Solr index built: ./bibindex -w fulltext for all records
OR
WRD method referring to Solr: <invenio installation>/etc/bibrank$ cp template_word_similarity_solr.cfg wrd.cfg
and ./bibrank -w wrd for all records
"""
def
_get_ranked_result_sequence
(
self
,
query
,
index
=
'fulltext'
,
rows
=
ROWS
,
hitset
=
None
):
if
hitset
is
None
:
hitset
=
HITSETS
[
query
]
ranked_result
=
solr_get_ranked
(
'
%s
:
%s
'
%
(
index
,
query
),
hitset
,
self
.
_get_ranking_params
(),
rows
)
return
tuple
([
pair
[
0
]
for
pair
in
ranked_result
[
0
]])
def
_get_ranked_topN
(
self
,
n
):
return
get_topN
(
n
,
self
.
_RANKED
)
_RANKED
=
{
'Willnotfind'
:
tuple
(),
'higgs'
:
(
79
,
51
,
55
,
47
,
56
,
96
,
58
,
68
,
52
,
48
,
89
,
85
),
'of'
:
(
50
,
61
,
60
,
54
,
56
,
53
,
10
,
68
,
44
,
57
,
83
,
95
,
92
,
91
,
74
,
45
,
48
,
62
,
82
,
49
,
51
,
89
,
90
,
96
,
43
,
8
,
64
,
97
,
15
,
85
,
78
,
46
,
55
,
79
,
84
,
88
,
81
,
52
,
58
,
86
,
11
,
80
,
93
,
77
,
12
,
59
,
87
,
47
,
94
),
'"higgs boson"'
:
(
55
,
56
),
}
def
_get_ranking_params
(
self
,
cutoff_amount
=
10000
,
cutoff_time
=
2000
):
"""
Default values from template_word_similarity_solr.cfg
"""
return
{
'cutoff_amount'
:
cutoff_amount
,
'cutoff_time_ms'
:
cutoff_time
}
@nottest
def
test_get_ranked
(
self
):
"""solrutils - ranking results"""
all_ranked
=
0
ranked_top
=
self
.
_get_ranked_topN
(
all_ranked
)
self
.
assertEqual
(
ranked_top
[
'Willnotfind'
],
self
.
_get_ranked_result_sequence
(
query
=
'Willnotfind'
))
self
.
assertEqual
(
ranked_top
[
'higgs'
],
self
.
_get_ranked_result_sequence
(
query
=
'higgs'
))
self
.
assertEqual
(
ranked_top
[
'of'
],
self
.
_get_ranked_result_sequence
(
query
=
'of'
))
self
.
assertEqual
(
ranked_top
[
'"higgs boson"'
],
self
.
_get_ranked_result_sequence
(
query
=
'"higgs boson"'
))
@nottest
def
test_get_ranked_top
(
self
):
"""solrutils - ranking top results"""
top_n
=
0
self
.
assertEqual
(
tuple
(),
self
.
_get_ranked_result_sequence
(
query
=
'Willnotfind'
,
rows
=
top_n
))
self
.
assertEqual
(
tuple
(),
self
.
_get_ranked_result_sequence
(
query
=
'higgs'
,
rows
=
top_n
))
self
.
assertEqual
(
tuple
(),
self
.
_get_ranked_result_sequence
(
query
=
'of'
,
rows
=
top_n
))
self
.
assertEqual
(
tuple
(),
self
.
_get_ranked_result_sequence
(
query
=
'"higgs boson"'
,
rows
=
top_n
))
top_n
=
2
ranked_top
=
self
.
_get_ranked_topN
(
top_n
)
self
.
assertEqual
(
ranked_top
[
'Willnotfind'
],
self
.
_get_ranked_result_sequence
(
query
=
'Willnotfind'
,
rows
=
top_n
))
self
.
assertEqual
(
ranked_top
[
'higgs'
],
self
.
_get_ranked_result_sequence
(
query
=
'higgs'
,
rows
=
top_n
))
self
.
assertEqual
(
ranked_top
[
'of'
],
self
.
_get_ranked_result_sequence
(
query
=
'of'
,
rows
=
top_n
))
self
.
assertEqual
(
ranked_top
[
'"higgs boson"'
],
self
.
_get_ranked_result_sequence
(
query
=
'"higgs boson"'
,
rows
=
top_n
))
top_n
=
10
ranked_top
=
self
.
_get_ranked_topN
(
top_n
)
self
.
assertEqual
(
ranked_top
[
'Willnotfind'
],
self
.
_get_ranked_result_sequence
(
query
=
'Willnotfind'
,
rows
=
top_n
))
self
.
assertEqual
(
ranked_top
[
'higgs'
],
self
.
_get_ranked_result_sequence
(
query
=
'higgs'
,
rows
=
top_n
))
self
.
assertEqual
(
ranked_top
[
'of'
],
self
.
_get_ranked_result_sequence
(
query
=
'of'
,
rows
=
top_n
))
self
.
assertEqual
(
ranked_top
[
'"higgs boson"'
],
self
.
_get_ranked_result_sequence
(
query
=
'"higgs boson"'
,
rows
=
top_n
))
@nottest
def
test_get_ranked_smaller_hitset
(
self
):
"""solrutils - ranking smaller hitset"""
hitset
=
intbitset
.
intbitset
([
47
,
56
,
58
,
68
,
85
,
89
])
self
.
assertEqual
((
47
,
56
,
58
,
68
,
89
,
85
),
self
.
_get_ranked_result_sequence
(
query
=
'higgs'
,
hitset
=
hitset
))
hitset
=
intbitset
.
intbitset
([
45
,
50
,
61
,
74
,
94
])
self
.
assertEqual
((
50
,
61
,
74
,
45
,
94
),
self
.
_get_ranked_result_sequence
(
query
=
'of'
,
hitset
=
hitset
))
self
.
assertEqual
((
74
,
45
,
94
),
self
.
_get_ranked_result_sequence
(
query
=
'of'
,
hitset
=
hitset
,
rows
=
3
))
@nottest
def
test_get_ranked_larger_hitset
(
self
):
"""solrutils - ranking larger hitset"""
hitset
=
intbitset
.
intbitset
([
47
,
56
,
58
,
68
,
85
,
89
])
self
.
assertEqual
(
tuple
(),
self
.
_get_ranked_result_sequence
(
query
=
'Willnotfind'
,
hitset
=
hitset
))
hitset
=
intbitset
.
intbitset
([
47
,
56
,
55
,
56
,
58
,
68
,
85
,
89
])
self
.
assertEqual
((
55
,
56
),
self
.
_get_ranked_result_sequence
(
query
=
'"higgs boson"'
,
hitset
=
hitset
))
class
TestSolrSimilarToRecid
(
unittest
.
TestCase
):
"""Test for Solr similar ranking. Requires:
make install-solrutils
CFG_SOLR_URL set
fulltext index in idxINDEX containing 'SOLR' in indexer column
WRD method referring to Solr: <invenio installation>/etc/bibrank$ cp template_word_similarity_solr.cfg wrd.cfg
./bibrank -w wrd for all records
"""
def
_get_similar_result_sequence
(
self
,
recid
,
rows
=
ROWS
):
similar_result
=
solr_get_similar_ranked
(
recid
,
self
.
_all_records
,
self
.
_get_similar_ranking_params
(),
rows
)
return
tuple
([
pair
[
0
]
for
pair
in
similar_result
[
0
]])[
-
rows
:]
def
_get_similar_topN
(
self
,
n
):
return
get_topN
(
n
,
self
.
_SIMILAR
)
_SIMILAR
=
{
30
:
(
12
,
95
,
85
,
82
,
44
,
1
,
89
,
64
,
58
,
15
,
96
,
61
,
50
,
86
,
78
,
77
,
65
,
62
,
60
,
47
,
46
,
100
,
99
,
102
,
91
,
80
,
7
,
92
,
88
,
74
,
57
,
55
,
108
,
84
,
81
,
79
,
54
,
101
,
11
,
103
,
94
,
48
,
83
,
72
,
63
,
2
,
68
,
51
,
5
,
53
,
97
,
93
,
70
,
45
,
52
,
14
,
59
,
6
,
10
,
32
,
33
,
29
,
30
),
59
:
(
17
,
69
,
3
,
20
,
109
,
14
,
22
,
33
,
24
,
60
,
6
,
73
,
113
,
107
,
78
,
4
,
13
,
5
,
45
,
8
,
72
,
46
,
74
,
63
,
71
,
44
,
87
,
70
,
103
,
57
,
92
,
49
,
88
,
7
,
68
,
77
,
10
,
62
,
93
,
2
,
65
,
55
,
96
,
43
,
94
,
1
,
11
,
99
,
91
,
61
,
51
,
15
,
89
,
64
,
97
,
108
,
80
,
101
,
86
,
90
,
54
,
95
,
102
,
47
,
100
,
79
,
83
,
48
,
12
,
81
,
82
,
58
,
50
,
56
,
84
,
85
,
53
,
52
,
59
)
}
def
_get_similar_ranking_params
(
self
,
cutoff_amount
=
10000
,
cutoff_time
=
2000
):
"""
Default values from template_word_similarity_solr.cfg
"""
return
{
'cutoff_amount'
:
cutoff_amount
,
'cutoff_time_ms'
:
cutoff_time
,
'find_similar_to_recid'
:
{
'more_results_factor'
:
5
,
'mlt_fl'
:
'mlt'
,
'mlt_mintf'
:
0
,
'mlt_mindf'
:
0
,
'mlt_minwl'
:
0
,
'mlt_maxwl'
:
0
,
'mlt_maxqt'
:
25
,
'mlt_maxntp'
:
1000
,
'mlt_boost'
:
'false'
}
}
_all_records
=
get_collection_reclist
(
CFG_SITE_NAME
)
@nottest
def
test_get_similar_ranked
(
self
):
"""solrutils - similar results"""
all_ranked
=
0
similar_top
=
self
.
_get_similar_topN
(
all_ranked
)
recid
=
30
self
.
assertEqual
(
similar_top
[
recid
],
self
.
_get_similar_result_sequence
(
recid
=
recid
))
recid
=
59
self
.
assertEqual
(
similar_top
[
recid
],
self
.
_get_similar_result_sequence
(
recid
=
recid
))
@nottest
def
test_get_similar_ranked_top
(
self
):
"""solrutils - similar top results"""
top_n
=
5
similar_top
=
self
.
_get_similar_topN
(
top_n
)
recid
=
30
self
.
assertEqual
(
similar_top
[
recid
],
self
.
_get_similar_result_sequence
(
recid
=
recid
,
rows
=
top_n
))
recid
=
59
self
.
assertEqual
(
similar_top
[
recid
],
self
.
_get_similar_result_sequence
(
recid
=
recid
,
rows
=
top_n
))
class
TestSolrWebSearch
(
unittest
.
TestCase
):
"""Test for webbased Solr search. Requires:
make install-solrutils
CFG_SOLR_URL set
fulltext index in idxINDEX containing 'SOLR' in indexer column
AND EITHER
Solr index built: ./bibindex -w fulltext for all records
OR
WRD method referring to Solr: <invenio installation>/etc/bibrank$ cp template_word_similarity_solr.cfg wrd.cfg
and ./bibrank -w wrd for all records
"""
@nottest
def
test_get_result
(
self
):
"""solrutils - web search results"""
self
.
assertEqual
([],
test_web_page_content
(
CFG_SITE_URL
+
'/search?of=id&p=fulltext%3AWillnotfind&rg=100'
,
expected_text
=
"[]"
))
self
.
assertEqual
([],
test_web_page_content
(
CFG_SITE_URL
+
'/search?of=id&p=fulltext%3Ahiggs&rg=100'
,
expected_text
=
"[47, 48, 51, 52, 55, 56, 58, 68, 79, 85, 89, 96]"
))
self
.
assertEqual
([],
test_web_page_content
(
CFG_SITE_URL
+
'/search?of=id&p=fulltext%3Aof&rg=100'
,
expected_text
=
"[8, 10, 11, 12, 15, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 64, 68, 74, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97]"
))
self
.
assertEqual
([],
test_web_page_content
(
CFG_SITE_URL
+
'/search?of=id&p=fulltext%3A
%22hi
ggs+boson%22&rg=100'
,
expected_text
=
"[55, 56]"
))
class
TestSolrWebRanking
(
unittest
.
TestCase
):
"""Test for webbased Solr ranking. Requires:
make install-solrutils
CFG_SOLR_URL set
fulltext index in idxINDEX containing 'SOLR' in indexer column
AND EITHER
Solr index built: ./bibindex -w fulltext for all records
OR
WRD method referring to Solr: <invenio installation>/etc/bibrank$ cp template_word_similarity_solr.cfg wrd.cfg
and ./bibrank -w wrd for all records
"""
@nottest
def
test_get_ranked
(
self
):
"""solrutils - web ranking results"""
self
.
assertEqual
([],
test_web_page_content
(
CFG_SITE_URL
+
'/search?of=id&p=fulltext%3AWillnotfind&rg=100&rm=wrd'
,
expected_text
=
"[]"
))
self
.
assertEqual
([],
test_web_page_content
(
CFG_SITE_URL
+
'/search?of=id&p=fulltext%3Ahiggs&rm=wrd'
,
expected_text
=
"[51, 79, 55, 47, 56, 96, 58, 68, 52, 48, 89, 85]"
))
self
.
assertEqual
([],
test_web_page_content
(
CFG_SITE_URL
+
'/search?of=id&p=fulltext%3Ahiggs&rg=100&rm=wrd'
,
expected_text
=
"[79, 51, 55, 47, 56, 96, 58, 68, 52, 48, 89, 85]"
))
# Record 77 is restricted
self
.
assertEqual
([],
test_web_page_content
(
CFG_SITE_URL
+
'/search?of=id&p=fulltext%3Aof&rm=wrd'
,
expected_text
=
"[8, 10, 15, 43, 44, 45, 46, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 60, 61, 62, 64, 68, 74, 78, 79, 81, 82, 83, 84, 85, 88, 89, 90, 91, 92, 95, 96, 97, 86, 11, 80, 93, 77, 12, 59, 87, 47, 94]"
,
username
=
'admin'
))
self
.
assertEqual
([],
test_web_page_content
(
CFG_SITE_URL
+
'/search?of=id&p=fulltext%3Aof&rg=100&rm=wrd'
,
expected_text
=
"[50, 61, 60, 54, 56, 53, 10, 68, 44, 57, 83, 95, 92, 91, 74, 45, 48, 62, 82, 49, 51, 89, 90, 96, 43, 8, 64, 97, 15, 85, 78, 46, 55, 79, 84, 88, 81, 52, 58, 86, 11, 80, 93, 77, 12, 59, 87, 47, 94]"
,
username
=
'admin'
))
self
.
assertEqual
([],
test_web_page_content
(
CFG_SITE_URL
+
'/search?of=id&p=fulltext%3A
%22hi
ggs+boson%22&rg=100&rm=wrd'
,
expected_text
=
"[55, 56]"
))
class
TestSolrWebSimilarToRecid
(
unittest
.
TestCase
):
"""Test for webbased Solr similar ranking. Requires:
make install-solrutils
CFG_SOLR_URL set
fulltext index in idxINDEX containing 'SOLR' in indexer column
WRD method referring to Solr: <invenio installation>/etc/bibrank$ cp template_word_similarity_solr.cfg wrd.cfg
./bibrank -w wrd for all records
"""
@nottest
def
test_get_similar_ranked
(
self
):
"""solrutils - web similar results"""
self
.
assertEqual
([],
test_web_page_content
(
CFG_SITE_URL
+
'/search?of=id&p=recid%3A30&rm=wrd'
,
expected_text
=
"[1, 3, 4, 8, 9, 12, 13, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 31, 34, 43, 44, 49, 50, 56, 58, 61, 64, 66, 67, 69, 71, 73, 75, 76, 77, 78, 82, 85, 86, 87, 89, 90, 95, 96, 98, 104, 107, 109, 113, 65, 62, 60, 47, 46, 100, 99, 102, 91, 80, 7, 92, 88, 74, 57, 55, 108, 84, 81, 79, 54, 101, 11, 103, 94, 48, 83, 72, 63, 2, 68, 51, 5, 53, 97, 93, 70, 45, 52, 14, 59, 6, 10, 32, 33, 29, 30]"
))
self
.
assertEqual
([],
test_web_page_content
(
CFG_SITE_URL
+
'/search?of=id&p=recid%3A30&rg=100&rm=wrd'
,
expected_text
=
"[3, 4, 8, 9, 13, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 31, 34, 43, 49, 56, 66, 67, 69, 71, 73, 75, 76, 87, 90, 98, 104, 107, 109, 113, 12, 95, 85, 82, 44, 1, 89, 64, 58, 15, 96, 61, 50, 86, 78, 77, 65, 62, 60, 47, 46, 100, 99, 102, 91, 80, 7, 92, 88, 74, 57, 55, 108, 84, 81, 79, 54, 101, 11, 103, 94, 48, 83, 72, 63, 2, 68, 51, 5, 53, 97, 93, 70, 45, 52, 14, 59, 6, 10, 32, 33, 29, 30]"
))
TESTS
=
[]
if
CFG_SOLR_URL
:
TESTS
.
extend
((
TestSolrSearch
,
TestSolrWebSearch
))
if
get_external_word_similarity_ranker
()
==
'solr'
:
TESTS
.
extend
((
TestSolrRanking
,
TestSolrSimilarToRecid
,
TestSolrWebRanking
,
TestSolrWebSimilarToRecid
))
TEST_SUITE
=
make_test_suite
(
*
TESTS
)
if
__name__
==
"__main__"
:
run_test_suite
(
TEST_SUITE
,
warn_user
=
True
)
Event Timeline
Log In to Comment