Page Menu
Home
c4science
Search
Configure Global Search
Log In
Files
F90935714
oai_repository_tests.py
No One
Temporary
Actions
Download File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Award Token
Subscribers
None
File Metadata
Details
File Info
Storage
Attached
Created
Wed, Nov 6, 04:32
Size
10 KB
Mime Type
text/x-python
Expires
Fri, Nov 8, 04:32 (2 d)
Engine
blob
Format
Raw Data
Handle
22162764
Attached To
R3600 invenio-infoscience
oai_repository_tests.py
View Options
# -*- coding: utf-8 -*-
##
## $Id$
## CDS Invenio OAI repository unit tests.
##
## This file is part of CDS Invenio.
## Copyright (C) 2002, 2003, 2004, 2005, 2006, 2007, 2008 CERN.
##
## CDS Invenio is free software; you can redistribute it and/or
## modify it under the terms of the GNU General Public License as
## published by the Free Software Foundation; either version 2 of the
## License, or (at your option) any later version.
##
## CDS Invenio is distributed in the hope that it will be useful, but
## WITHOUT ANY WARRANTY; without even the implied warranty of
## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
## General Public License for more details.
##
## You should have received a copy of the GNU General Public License
## along with CDS Invenio; if not, write to the Free Software Foundation, Inc.,
## 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA.
"""Unit tests for the oai repository."""
__revision__
=
"$Id$"
import
unittest
import
re
import
time
from
invenio
import
oai_repository
,
search_engine
from
invenio.testutils
import
make_test_suite
,
run_test_suite
from
invenio.config
import
\
CFG_OAI_LOAD
,
\
CFG_OAI_ID_FIELD
class
TestVerbs
(
unittest
.
TestCase
):
"""Test for OAI verb functionality."""
def
test_verbs
(
self
):
"""bibharvest oai repository - testing verbs"""
self
.
assertNotEqual
(
None
,
re
.
search
(
"Identify"
,
oai_repository
.
oaiidentify
(
""
)))
self
.
assertNotEqual
(
None
,
re
.
search
(
"ListIdentifiers"
,
oai_repository
.
oailistidentifiers
(
""
)))
self
.
assertNotEqual
(
None
,
re
.
search
(
"ListRecords"
,
oai_repository
.
oailistrecords
(
""
)))
self
.
assertNotEqual
(
None
,
re
.
search
(
"ListMetadataFormats"
,
oai_repository
.
oailistmetadataformats
(
""
)))
self
.
assertNotEqual
(
None
,
re
.
search
(
"ListSets"
,
oai_repository
.
oailistsets
(
""
)))
self
.
assertNotEqual
(
None
,
re
.
search
(
"GetRecord"
,
oai_repository
.
oaigetrecord
(
""
)))
class
TestSelectiveHarvesting
(
unittest
.
TestCase
):
"""Test set, from and until parameters used to do selective harvesting."""
def
test_set
(
self
):
"""bibharvest oai repository - testing selective harvesting with 'set' parameter"""
self
.
assertNotEqual
([],
oai_repository
.
oaigetsysnolist
(
set
=
"cern:experiment"
))
self
.
assert_
(
"Multifractal analysis of minimum bias events"
in
\
''
.
join
([
oai_repository
.
print_record
(
recID
)
for
recID
in
\
oai_repository
.
oaigetsysnolist
(
set
=
"cern:experiment"
)]))
self
.
assert_
(
"Multifractal analysis of minimum bias events"
not
in
\
''
.
join
([
oai_repository
.
print_record
(
recID
)
for
recID
in
\
oai_repository
.
oaigetsysnolist
(
set
=
"cern:theory"
)]))
self
.
assertEqual
([],
oai_repository
.
oaigetsysnolist
(
set
=
"nonExistingSet"
))
def
test_from_and_until
(
self
):
"""bibharvest oai repository - testing selective harvesting with 'from' and 'until' parameters"""
# List available records, get datestamps and play with them
identifiers
=
oai_repository
.
oailistidentifiers
(
""
)
datestamps
=
re
.
findall
(
'<identifier>(?P<id>.*)</identifier>\s*<datestamp>(?P<date>.*)</datestamp>'
,
identifiers
)
sample_datestamp
=
datestamps
[
0
][
1
]
# Take one datestamp
sample_oai_id
=
datestamps
[
0
][
0
]
# Take corresponding oai id
sample_id
=
search_engine
.
perform_request_search
(
p
=
sample_oai_id
,
f
=
CFG_OAI_ID_FIELD
)[
0
]
# Find corresponding system number id
# There must be some datestamps
self
.
assertNotEqual
([],
datestamps
)
# We must be able to retrieve an id with the date we have just found
self
.
assert_
(
sample_id
in
oai_repository
.
oaigetsysnolist
(
fromdate
=
sample_datestamp
))
self
.
assert_
(
sample_id
in
oai_repository
.
oaigetsysnolist
(
untildate
=
sample_datestamp
))
self
.
assert_
(
sample_id
in
oai_repository
.
oaigetsysnolist
(
untildate
=
sample_datestamp
,
\
fromdate
=
sample_datestamp
))
# Same, with short format date. Eg 2007-12-13
self
.
assert_
(
sample_id
in
oai_repository
.
oaigetsysnolist
(
fromdate
=
sample_datestamp
.
split
(
'T'
)[
0
]))
self
.
assert_
(
sample_id
in
oai_repository
.
oaigetsysnolist
(
untildate
=
sample_datestamp
.
split
(
'T'
)[
0
]))
self
.
assert_
(
sample_id
in
oai_repository
.
oaigetsysnolist
(
fromdate
=
sample_datestamp
.
split
(
'T'
)[
0
],
\
untildate
=
sample_datestamp
.
split
(
'T'
)[
0
]))
# At later date (year after) we should not find our id again
later_datestamp
=
sample_datestamp
later_datestamp
=
sample_datestamp
[
0
:
3
]
+
str
(
int
(
sample_datestamp
[
3
])
+
1
)
+
sample_datestamp
[
4
:]
self
.
assert_
(
sample_id
not
in
oai_repository
.
oaigetsysnolist
(
fromdate
=
later_datestamp
))
# At earlier date (year before) we should not find our id again
earlier_datestamp
=
sample_datestamp
earlier_datestamp
=
sample_datestamp
[
0
:
3
]
+
str
(
int
(
sample_datestamp
[
3
])
-
1
)
+
sample_datestamp
[
4
:]
self
.
assert_
(
sample_id
not
in
oai_repository
.
oaigetsysnolist
(
untildate
=
earlier_datestamp
))
# From earliest date to latest date must include all oai records
dates
=
[(
time
.
mktime
(
time
.
strptime
(
date
[
1
],
"%Y-%m-
%d
T%H:%M:%SZ"
)),
date
[
1
])
for
date
in
datestamps
]
dates
=
dict
(
dates
)
sorted_times
=
dates
.
keys
()
sorted_times
.
sort
()
earliest_datestamp
=
dates
[
sorted_times
[
0
]]
latest_datestamp
=
dates
[
sorted_times
[
-
1
]]
self
.
assertEqual
(
len
(
oai_repository
.
oaigetsysnolist
()),
\
len
(
oai_repository
.
oaigetsysnolist
(
fromdate
=
earliest_datestamp
,
\
untildate
=
latest_datestamp
)))
class
TestErrorCodes
(
unittest
.
TestCase
):
"""Test for handling OAI error codes."""
def
test_issue_error_identify
(
self
):
"""bibharvest oai repository - testing error codes"""
self
.
assertNotEqual
(
None
,
re
.
search
(
"badVerb"
,
oai_repository
.
check_argd
({
'verb'
:
"IllegalVerb"
})))
self
.
assertNotEqual
(
None
,
re
.
search
(
"badArgument"
,
oai_repository
.
check_argd
({
'verb'
:
"Identify"
,
'test'
:
"test"
})))
self
.
assertNotEqual
(
None
,
re
.
search
(
"badArgument"
,
oai_repository
.
check_argd
({
'verb'
:
"ListIdentifiers"
,
'metadataPrefix'
:
"oai_dc"
,
'from'
:
"some_random_date"
,
'until'
:
"some_random_date"
})))
self
.
assertNotEqual
(
None
,
re
.
search
(
"badArgument"
,
oai_repository
.
check_argd
({
'verb'
:
"ListIdentifiers"
,
'metadataPrefix'
:
"oai_dc"
,
'from'
:
"2001-01-01"
,
'until'
:
"2002-01-01T00:00:00Z"
})))
self
.
assertNotEqual
(
None
,
re
.
search
(
"badArgument"
,
oai_repository
.
check_argd
({
'verb'
:
"ListIdentifiers"
})))
self
.
assertNotEqual
(
None
,
re
.
search
(
"cannotDisseminateFormat"
,
oai_repository
.
check_argd
({
'verb'
:
"ListIdentifiers"
,
'metadataPrefix'
:
"illegal_mdp"
})))
self
.
assertNotEqual
(
None
,
re
.
search
(
"badArgument"
,
oai_repository
.
check_argd
({
'verb'
:
"ListIdentifiers"
,
'metadataPrefix'
:
"oai_dc"
,
'metadataPrefix'
:
"oai_dc"
})))
self
.
assertNotEqual
(
None
,
re
.
search
(
"badArgument"
,
oai_repository
.
check_argd
({
'verb'
:
"ListRecords"
,
'metadataPrefix'
:
"oai_dc"
,
'set'
:
"really_wrong_set"
,
'from'
:
"some_random_date"
,
'until'
:
"some_random_date"
})))
self
.
assertNotEqual
(
None
,
re
.
search
(
"badArgument"
,
oai_repository
.
check_argd
({
'verb'
:
"ListRecords"
})))
class
TestEncodings
(
unittest
.
TestCase
):
"""Test for OAI response encodings."""
def
test_encoding
(
self
):
"""bibharvest oai repository - testing encodings"""
self
.
assertEqual
(
"<&>"
,
oai_repository
.
encode_for_xml
(
"<&>"
))
self
.
assertEqual
(
"%20"
,
oai_repository
.
escape_space
(
" "
))
self
.
assertEqual
(
"
%25%
20
%3F%23%
3D
%26%
2F%3A%3B%2B"
,
oai_repository
.
encode_for_url
(
"% ?#=&/:;+"
))
class
TestPerformance
(
unittest
.
TestCase
):
"""Test performance of the repository """
def
setUp
(
self
):
"""Setting up some variables"""
# Determine how many records are served
self
.
number_of_records
=
oai_repository
.
oaigetsysnolist
(
""
,
""
,
""
)
if
CFG_OAI_LOAD
<
self
.
number_of_records
:
self
.
number_of_records
=
CFG_OAI_LOAD
def
test_response_speed_oai
(
self
):
"""bibharvest oai repository - speed of response for oai_dc output"""
allowed_seconds_per_record_oai
=
0.02
# Test oai ListRecords performance
t0
=
time
.
time
()
oai_repository
.
oailistrecords
(
'metadataPrefix=oai_dc&verb=ListRecords'
)
t
=
time
.
time
()
-
t0
if
t
>
self
.
number_of_records
*
allowed_seconds_per_record_oai
:
self
.
fail
(
"""Response for ListRecords with metadataPrefix=oai_dc took too much time:
%s seconds.
Limit: %s seconds"""
%
(
t
,
self
.
number_of_records
*
allowed_seconds_per_record_oai
))
def
test_response_speed_marcxml
(
self
):
"""bibharvest oai repository - speed of response for marcxml output"""
allowed_seconds_per_record_marcxml
=
0.05
# Test marcxml ListRecords performance
t0
=
time
.
time
()
oai_repository
.
oailistrecords
(
'metadataPrefix=marcxml&verb=ListRecords'
)
t
=
time
.
time
()
-
t0
if
t
>
self
.
number_of_records
*
allowed_seconds_per_record_marcxml
:
self
.
fail
(
"""Response for ListRecords with metadataPrefix=marcxml took too much time:\n
%s seconds.
Limit: %s seconds"""
%
(
t
,
self
.
number_of_records
*
allowed_seconds_per_record_marcxml
))
TEST_SUITE
=
make_test_suite
(
TestVerbs
,
TestErrorCodes
,
TestEncodings
,
TestSelectiveHarvesting
,
TestPerformance
,)
if
__name__
==
"__main__"
:
run_test_suite
(
TEST_SUITE
)
Event Timeline
Log In to Comment