Page Menu
Home
c4science
Search
Configure Global Search
Log In
Files
F86792522
bibharvest.wml
No One
Temporary
Actions
Download File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Award Token
Subscribers
None
File Metadata
Details
File Info
Storage
Attached
Created
Tue, Oct 8, 15:45
Size
8 KB
Mime Type
text/x-c
Expires
Thu, Oct 10, 15:45 (1 d, 23 h)
Engine
blob
Format
Raw Data
Handle
21480127
Attached To
R3600 invenio-infoscience
bibharvest.wml
View Options
## $Id$
## This file is part of the CERN Document Server Software (CDSware).
## Copyright (C) 2002, 2003, 2004, 2005 CERN.
##
## The CDSware is free software; you can redistribute it and/or
## modify it under the terms of the GNU General Public License as
## published by the Free Software Foundation; either version 2 of the
## License, or (at your option) any later version.
##
## The CDSware is distributed in the hope that it will be useful, but
## WITHOUT ANY WARRANTY; without even the implied warranty of
## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
## General Public License for more details.
##
## You should have received a copy of the GNU General Public License
## along with CDSware; if not, write to the Free Software Foundation, Inc.,
## 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA.
## read config variables:
#include "config.wml"
#include "configbis.wml"
#include "cdswmllib.wml"
## start Python:
<protect>#!</protect><PYTHON>
<protect>## $Id$</protect>
<protect>## DO NOT EDIT THIS FILE! IT WAS AUTOMATICALLY GENERATED FROM CDSware WML SOURCES.</protect>
"""CDSware OAI harvestor."""
__version__ = "<: print generate_pretty_version_string('$Id$'); :>"
## okay, rest of the Python code goes below
#######
<protect>
try:
import httplib
import urllib
import sys
import re
import string
import getopt
import time
except ImportError, e:
print "Error: %s" % e
import sys
sys.exit(1)
http_response_status_code = {
"000" : "Unknown",
"100" : "Continue",
"200" : "OK",
"302" : "Redirect",
"403" : "Forbidden",
"404" : "Not Found",
"500" : "Error",
"503" : "Service Unavailable"
}
def http_param_resume(http_param_dict,resumptionToken):
"Change parameter dictionary for harvest resumption"
http_param = {
'verb' : http_param_dict['verb'],
'resumptionToken' : resumptionToken
}
return http_param
def http_request_parameters(http_param_dict, method="POST"):
"Assembly http request parameters for http method used"
params = ""
if method == "GET":
for key in http_param_dict.keys():
if params:
params = "%s&" % (params)
if key:
params = "%s%s=%s" % (params, key, http_param_dict[key])
elif method == "POST":
http_param = {}
for key in http_param_dict.keys():
if http_param_dict[key]:
http_param[key] = http_param_dict[key]
params = urllib.urlencode(http_param)
return params
def OAI_Session(server, script, http_param_dict ,method="POST",output="", stylesheet=""):
"Handle OAi session"
sys.stderr.write("Starting the harvesting session at %s" % time.strftime("%Y-%m-%d %H:%M:%S --> ", time.localtime()))
sys.stderr.write("%s - %s\n" % (server, http_request_parameters(http_param_dict)))
a = OAI_Request(server, script, http_request_parameters(http_param_dict, method), method)
rt_obj = re.search('>.*</resumptionToken>',a)
i = 0
while rt_obj != None and rt_obj !="":
if output:
write_file( "%s.%07d" % (output,i), a)
else:
sys.stdout.write(a)
i = i + 1
time.sleep(1)
http_param_dict = http_param_resume(http_param_dict,rt_obj.group()[1:-18])
a = OAI_Request(server, script, http_request_parameters(http_param_dict, method), method)
rt_obj = re.search('>.*</resumptionToken>',a)
if output:
write_file("%s.%07d" % (output,i),a)
else:
sys.stdout.write(a)
def write_file(filename="harvest",a=""):
"Writes a to filename"
f = open(filename,"w")
f.write(a)
f.close()
def help():
"Print out info"
print "\n bibharvest -fhimoprsuv baseURL\n"
print " -h print this help"
print " -V print version number"
print " -o<outputfilename> specify output file"
print " -v<verb> OAI verb to be executed"
print " -m<method> http method (default POST)"
print " -p<metadataPrefix> metadata format"
print " -i<identifier> OAI identifier"
print " -s<set> OAI set"
print " -r<resuptionToken> Resume previous harvest"
print " -f<from> from date (datestamp)"
print " -u<until> until date (datestamp)\n"
def OAI_Request(server, script, params, method="POST"):
"Handle OAi request"
</protect>
headers = {"Content-type":"application/x-www-form-urlencoded", "Accept":"text/xml", "From":"<ADMINEMAIL>", "User-Agent":"CDSware <VERSION>"}
<protect>
i = 0
while i < 10:
i = i + 1
conn = httplib.HTTPConnection(server)
if method == "GET":
</protect>
conn.putrequest(method,script + "?" + params)
conn.putheader("Content-type","application/x-www-form-urlencoded")
conn.putheader("Accept","text/xml")
conn.putheader("From","<ADMINEMAIL>")
conn.putheader("User-Agent","<CDSNAME>")
conn.endheaders()
<protect>
elif method == "POST":
conn.request("POST", script, params, headers)
response = conn.getresponse()
status = "%d" % response.status
if http_response_status_code.has_key(status):
sys.stderr.write("%s(%s) : %s : %s\n" % (status, http_response_status_code[status], response.reason, params))
else:
sys.stderr.write("%s(%s) : %s : %s\n" % (status, http_response_status_code['000'], response.reason, params))
if response.status == 200:
i = 10
data = response.read()
conn.close()
return data
elif response.status == 503:
sys.stderr.write("Retry in %d seconds...\n" % string.atoi(response.getheader("Retry-After","%d" % (i*i))))
time.sleep(string.atoi(response.getheader("Retry-After","%d" % (i*i))))
elif response.status == 302:
sys.stderr.write("Redirecting...\n")
server = response.getheader("Location").split("/")[2]
script = "/" + string.join(response.getheader("Location").split("/")[3:],"/")
else:
sys.stderr.write("Retry in 10 seconds...\n")
time.sleep(10)
sys.stderr.write("Harvesting interrupted (after 10 attempts) at %s: %s\n" % (time.strftime("%Y-%m-%d %H:%M:%S --> ", time.localtime())),params)
sys.exit(1)
def main():
"Main"
try:
opts, args = getopt.getopt(sys.argv[1:],"hVo:v:m:p:i:s:f:u:r:x:",
[
"help",
"version",
"output",
"verb",
"method",
"metadataPrefix",
"identifier",
"set",
"from",
"until",
"resumptionToken"
]
)
except getopt.error:
help()
sys.exit(1)
http_param_dict = {}
method = "POST"
output = ""
stylesheet = ""
# get options and arguments
for opt, opt_value in opts:
if opt == "-v":
http_param_dict['verb'] = opt_value
elif opt == "-m":
if opt_value == "GET" or opt_value == "POST":
method = opt_value
elif opt == "-p":
http_param_dict['metadataPrefix'] = opt_value
elif opt == "-i":
http_param_dict['identifier'] = opt_value
elif opt == "-s":
http_param_dict['set'] = opt_value
elif opt == "-f":
http_param_dict['from'] = opt_value
elif opt == "-u":
http_param_dict['until'] = opt_value
elif opt == "-r":
http_param_dict['resumptionToken'] = opt_value
elif opt == "-o":
output = opt_value
elif opt == "-x":
stylesheet = opt_value
elif opt in ["-V", "--version"]:
print __version__
sys.exit(0)
else:
help()
sys.exit()
if len(args) > 0:
server = args[0].split("/")[2]
script = "/" + string.join(args[0].split("/")[3:],"/")
OAI_Session(server, script, http_param_dict, method, output, stylesheet)
sys.stderr.write("Harvesting successfully completed at: %s\n\n" % time.strftime("%Y-%m-%d %H:%M:%S --> ", time.localtime()))
else:
help()
sys.exit()
if __name__ == '__main__':
main()
</protect>
Event Timeline
Log In to Comment