Page Menu
Home
c4science
Search
Configure Global Search
Log In
Files
F92230940
test_search_engine.py
No One
Temporary
Actions
Download File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Award Token
Subscribers
None
File Metadata
Details
File Info
Storage
Attached
Created
Mon, Nov 18, 14:00
Size
12 KB
Mime Type
text/x-python
Expires
Wed, Nov 20, 14:00 (2 d)
Engine
blob
Format
Raw Data
Handle
22399823
Attached To
R3600 invenio-infoscience
test_search_engine.py
View Options
# -*- coding: utf-8 -*-
#
# This file is part of Invenio.
# Copyright (C) 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012, 2013 CERN.
#
# Invenio is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License as
# published by the Free Software Foundation; either version 2 of the
# License, or (at your option) any later version.
#
# Invenio is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
# General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with Invenio; if not, write to the Free Software Foundation, Inc.,
# 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA.
"""Unit tests for the search engine."""
__revision__
=
\
"$Id$"
from
invenio.base.wrappers
import
lazy_import
from
invenio.testsuite
import
make_test_suite
,
run_test_suite
,
InvenioTestCase
search_engine
=
lazy_import
(
'invenio.legacy.search_engine'
)
class
TestMiscUtilityFunctions
(
InvenioTestCase
):
"""Test whatever non-data-specific utility functions are essential."""
def
test_ziplist2x2
(
self
):
"""search engine - ziplist 2 x 2"""
self
.
assertEqual
(
search_engine
.
ziplist
([
1
,
2
],
[
3
,
4
]),
[[
1
,
3
],
[
2
,
4
]])
def
test_ziplist3x3
(
self
):
"""search engine - ziplist 3 x 3"""
self
.
assertEqual
(
search_engine
.
ziplist
([
1
,
2
,
3
],
[
'a'
,
'b'
,
'c'
],
[
9
,
8
,
7
]),
[[
1
,
'a'
,
9
],
[
2
,
'b'
,
8
],
[
3
,
'c'
,
7
]])
class
TestWashQueryParameters
(
InvenioTestCase
):
"""Test for washing of search query parameters."""
def
test_wash_pattern
(
self
):
"""search engine - washing of query patterns"""
self
.
assertEqual
(
"Ellis, J"
,
search_engine
.
wash_pattern
(
'Ellis, J'
))
#self.assertEqual("ell", search_engine.wash_pattern('ell*'))
def
test_wash_dates_from_tuples
(
self
):
"""search engine - washing of date arguments from (year,month,day) tuples"""
self
.
assertEqual
(
search_engine
.
wash_dates
(
d1y
=
1980
,
d1m
=
1
,
d1d
=
28
,
d2y
=
2003
,
d2m
=
2
,
d2d
=
3
),
(
'1980-01-28 00:00:00'
,
'2003-02-03 00:00:00'
))
self
.
assertEqual
(
search_engine
.
wash_dates
(
d1y
=
1980
,
d1m
=
0
,
d1d
=
28
,
d2y
=
2003
,
d2m
=
2
,
d2d
=
0
),
(
'1980-01-28 00:00:00'
,
'2003-02-31 00:00:00'
))
def
test_wash_dates_from_datetexts
(
self
):
"""search engine - washing of date arguments from datetext strings"""
self
.
assertEqual
(
search_engine
.
wash_dates
(
d1
=
"1980-01-28 01:02:03"
,
d2
=
"1980-01-29 12:34:56"
),
(
'1980-01-28 01:02:03'
,
'1980-01-29 12:34:56'
))
self
.
assertEqual
(
search_engine
.
wash_dates
(
d1
=
"1980-01-28 01:02:03"
),
(
'1980-01-28 01:02:03'
,
'9999-12-31 00:00:00'
))
self
.
assertEqual
(
search_engine
.
wash_dates
(
d2
=
"1980-01-29 12:34:56"
),
(
'0000-01-01 00:00:00'
,
'1980-01-29 12:34:56'
))
def
test_wash_dates_from_both
(
self
):
"""search engine - washing of date arguments from both datetext strings and (year,month,day) tuples"""
# datetext mode takes precedence, d1* should be ignored
self
.
assertEqual
(
search_engine
.
wash_dates
(
d1
=
"1980-01-28 01:02:03"
,
d1y
=
1980
,
d1m
=
1
,
d1d
=
28
),
(
'1980-01-28 01:02:03'
,
'9999-12-31 00:00:00'
))
# datetext mode takes precedence, d2 missing, d2* should be ignored
self
.
assertEqual
(
search_engine
.
wash_dates
(
d1
=
"1980-01-28 01:02:03"
,
d2y
=
2003
,
d2m
=
2
,
d2d
=
3
),
(
'1980-01-28 01:02:03'
,
'2003-02-03 00:00:00'
))
class
TestQueryParser
(
InvenioTestCase
):
"""Test of search pattern (or query) parser."""
def
_check
(
self
,
p
,
f
,
m
,
result_wanted
):
"Internal checking function calling create_basic_search_units."
result_obtained
=
search_engine
.
create_basic_search_units
(
None
,
p
,
f
,
m
)
assert
result_obtained
==
result_wanted
,
\
'obtained
%s
instead of
%s
'
%
(
repr
(
result_obtained
),
repr
(
result_wanted
))
return
def
test_parsing_single_word_query
(
self
):
"search engine - parsing single word queries"
self
.
_check
(
'word'
,
''
,
None
,
[[
'+'
,
'word'
,
''
,
'w'
]])
def
test_parsing_single_word_with_boolean_operators
(
self
):
"search engine - parsing single word queries"
self
.
_check
(
'+word'
,
''
,
None
,
[[
'+'
,
'word'
,
''
,
'w'
]])
self
.
_check
(
'-word'
,
''
,
None
,
[[
'-'
,
'word'
,
''
,
'w'
]])
self
.
_check
(
'|word'
,
''
,
None
,
[[
'|'
,
'word'
,
''
,
'w'
]])
def
test_parsing_single_word_in_field
(
self
):
"search engine - parsing single word queries in a logical field"
self
.
_check
(
'word'
,
'title'
,
None
,
[[
'+'
,
'word'
,
'title'
,
'w'
]])
def
test_parsing_single_word_in_tag
(
self
):
"search engine - parsing single word queries in a physical tag"
self
.
_check
(
'word'
,
'500'
,
None
,
[[
'+'
,
'word'
,
'500'
,
'a'
]])
def
test_parsing_query_with_commas
(
self
):
"search engine - parsing queries with commas"
self
.
_check
(
'word,word'
,
'title'
,
None
,
[[
'+'
,
'word,word'
,
'title'
,
'a'
]])
def
test_parsing_exact_phrase_query
(
self
):
"search engine - parsing exact phrase"
self
.
_check
(
'"the word"'
,
'title'
,
None
,
[[
'+'
,
'the word'
,
'title'
,
'a'
]])
def
test_parsing_exact_phrase_query_unbalanced
(
self
):
"search engine - parsing unbalanced exact phrase"
self
.
_check
(
'"the word'
,
'title'
,
None
,
[[
'+'
,
'"the'
,
'title'
,
'w'
],
[
'+'
,
'word'
,
'title'
,
'w'
]])
def
test_parsing_exact_phrase_query_in_any_field
(
self
):
"search engine - parsing exact phrase in any field"
self
.
_check
(
'"the word"'
,
''
,
None
,
[[
'+'
,
'the word'
,
''
,
'a'
]])
def
test_parsing_partial_phrase_query
(
self
):
"search engine - parsing partial phrase"
self
.
_check
(
"'the word'"
,
'title'
,
None
,
[[
'+'
,
'%the word%'
,
'title'
,
'a'
]])
def
test_parsing_partial_phrase_query_unbalanced
(
self
):
"search engine - parsing unbalanced partial phrase"
self
.
_check
(
"'the word"
,
'title'
,
None
,
[[
'+'
,
"'the"
,
'title'
,
'w'
],
[
'+'
,
"word"
,
'title'
,
'w'
]])
def
test_parsing_partial_phrase_query_in_any_field
(
self
):
"search engine - parsing partial phrase in any field"
self
.
_check
(
"'the word'"
,
''
,
None
,
[[
'+'
,
'%the word%'
,
''
,
'a'
]])
def
test_parsing_regexp_query
(
self
):
"search engine - parsing regex matches"
self
.
_check
(
"/the word/"
,
'title'
,
None
,
[[
'+'
,
'the word'
,
'title'
,
'r'
]])
def
test_parsing_regexp_query_unbalanced
(
self
):
"search engine - parsing unbalanced regexp"
self
.
_check
(
"/the word"
,
'title'
,
None
,
[[
'+'
,
'/the'
,
'title'
,
'w'
],
[
'+'
,
'word'
,
'title'
,
'w'
]])
def
test_parsing_regexp_query_in_any_field
(
self
):
"search engine - parsing regexp searches in any field"
self
.
_check
(
"/the word/"
,
''
,
None
,
[[
'+'
,
'the word'
,
''
,
'r'
]])
def
test_parsing_boolean_query
(
self
):
"search engine - parsing boolean query with several words"
self
.
_check
(
"muon kaon ellis cern"
,
''
,
None
,
[[
'+'
,
'muon'
,
''
,
'w'
],
[
'+'
,
'kaon'
,
''
,
'w'
],
[
'+'
,
'ellis'
,
''
,
'w'
],
[
'+'
,
'cern'
,
''
,
'w'
]])
def
test_parsing_boolean_query_with_word_operators
(
self
):
"search engine - parsing boolean query with word operators"
self
.
_check
(
"muon and kaon or ellis not cern"
,
''
,
None
,
[[
'+'
,
'muon'
,
''
,
'w'
],
[
'+'
,
'kaon'
,
''
,
'w'
],
[
'|'
,
'ellis'
,
''
,
'w'
],
[
'-'
,
'cern'
,
''
,
'w'
]])
def
test_parsing_boolean_query_with_symbol_operators
(
self
):
"search engine - parsing boolean query with symbol operators"
self
.
_check
(
"muon +kaon |ellis -cern"
,
''
,
None
,
[[
'+'
,
'muon'
,
''
,
'w'
],
[
'+'
,
'kaon'
,
''
,
'w'
],
[
'|'
,
'ellis'
,
''
,
'w'
],
[
'-'
,
'cern'
,
''
,
'w'
]])
def
test_parsing_boolean_query_with_symbol_operators_and_spaces
(
self
):
"search engine - parsing boolean query with operators and spaces"
self
.
_check
(
"muon + kaon | ellis - cern"
,
''
,
None
,
[[
'+'
,
'muon'
,
''
,
'w'
],
[
'+'
,
'kaon'
,
''
,
'w'
],
[
'|'
,
'ellis'
,
''
,
'w'
],
[
'-'
,
'cern'
,
''
,
'w'
]])
def
test_parsing_boolean_query_with_symbol_operators_and_no_spaces
(
self
):
"search engine - parsing boolean query with operators and no spaces"
self
.
_check
(
"muon+kaon|ellis-cern"
,
''
,
None
,
[[
'+'
,
'muon+kaon|ellis-cern'
,
''
,
'w'
]])
def
test_parsing_structured_query_existing
(
self
):
"search engine - parsing structured query, existing index"
self
.
_check
(
"title:muon"
,
''
,
None
,
[[
'+'
,
'muon'
,
'title'
,
'w'
]])
def
test_parsing_structured_query_existing_field
(
self
):
"search engine - parsing structured query, existing field, but no word index"
from
invenio.config
import
CFG_CERN_SITE
if
not
CFG_CERN_SITE
:
self
.
_check
(
"experiment:LHC"
,
''
,
None
,
[[
'+'
,
'LHC'
,
'experiment'
,
'a'
]])
def
test_parsing_structured_query_nonexisting
(
self
):
"search engine - parsing structured query, non-existing index"
self
.
_check
(
"foo:muon"
,
''
,
None
,
[[
'+'
,
'foo:muon'
,
''
,
'w'
]])
def
test_parsing_structured_query_marc
(
self
):
"search engine - parsing structured query, MARC-tag defined index"
self
.
_check
(
"245:muon"
,
''
,
None
,
[[
'+'
,
'muon'
,
'245'
,
'a'
]])
def
test_parsing_combined_structured_query
(
self
):
"search engine - parsing combined structured query"
self
.
_check
(
"title:muon author:ellis"
,
''
,
None
,
[[
'+'
,
'muon'
,
'title'
,
'w'
],
[
'+'
,
'ellis'
,
'author'
,
'w'
]])
def
test_parsing_structured_regexp_query
(
self
):
"search engine - parsing structured regexp query"
self
.
_check
(
"title:/(one|two)/"
,
''
,
None
,
[[
'+'
,
'(one|two)'
,
'title'
,
'r'
]])
def
test_parsing_structured_regexp_marc_query
(
self
):
"search engine - parsing structured regexp MARC query"
self
.
_check
(
"245__a:/(one|two)/"
,
''
,
None
,
[[
'+'
,
'(one|two)'
,
'245__a'
,
'r'
]])
def
test_parsing_structured_regexp_refersto_query
(
self
):
"search engine - parsing structured regexp refersto query"
self
.
_check
(
"refersto:/(one|two)/"
,
''
,
None
,
[[
'+'
,
'(one|two)'
,
'refersto'
,
'r'
]])
def
test_parsing_combined_structured_query_in_a_field
(
self
):
"search engine - parsing structured query in a field"
self
.
_check
(
"title:muon author:ellis"
,
'abstract'
,
None
,
[[
'+'
,
'muon'
,
'title'
,
'w'
],
[
'+'
,
'ellis'
,
'author'
,
'w'
]])
def
test_parsing_colons_and_spaces_well_structured
(
self
):
"search engine - parsing query with colons and spaces, well structured"
self
.
_check
(
"title: muon author:ellis keyword: kaon"
,
'abstract'
,
None
,
[[
'+'
,
'muon'
,
'title'
,
'w'
],
[
'+'
,
'ellis'
,
'author'
,
'w'
],
[
'+'
,
'kaon'
,
'keyword'
,
'w'
]])
def
test_parsing_colons_and_spaces_badly_structured
(
self
):
"search engine - parsing query with colons and spaces, badly structured"
self
.
_check
(
"foo: bar"
,
'title'
,
None
,
[[
'+'
,
'foo'
,
'title'
,
'w'
],
[
'+'
,
'bar'
,
'title'
,
'w'
]])
def
test_parsing_colons_and_spaces_for_phrase_query
(
self
):
"search engine - parsing query with colons and spaces, phrase query"
self
.
_check
(
'author: "Ellis, J"'
,
None
,
None
,
[[
'+'
,
'Ellis, J'
,
'author'
,
'a'
]])
def
test_search_pattern_with_equal_sign
(
self
):
"search engine - parsing query with equal sign"
self
.
_check
(
'title:"s = 630"'
,
None
,
None
,
[[
'+'
,
's = 630'
,
'title'
,
'a'
]])
class
TestSearchUnitFunction
(
InvenioTestCase
):
"""Test for washing of search query parameters."""
def
test_collection_equality_results
(
self
):
"""search unit - getting same results for collection name."""
self
.
assertEqual
(
search_engine
.
search_unit
(
'Preprints'
,
'collection'
),
search_engine
.
search_unit
(
'PREPRINT'
,
'collection'
))
self
.
assertEqual
(
search_engine
.
search_unit
(
'Books'
,
'collection'
),
search_engine
.
search_unit
(
'BOOK'
,
'collection'
))
TEST_SUITE
=
make_test_suite
(
TestWashQueryParameters
,
TestQueryParser
,
TestMiscUtilityFunctions
,
TestSearchUnitFunction
)
if
__name__
==
"__main__"
:
run_test_suite
(
TEST_SUITE
)
Event Timeline
Log In to Comment