Page MenuHomec4science

bibharvest.in
No OneTemporary

File Metadata

Created
Fri, Oct 4, 18:27

bibharvest.in

## $Id$
## This file is part of the CERN Document Server Software (CDSware).
## Copyright (C) 2002 CERN.
##
## The CDSware is free software; you can redistribute it and/or
## modify it under the terms of the GNU General Public License as
## published by the Free Software Foundation; either version 2 of the
## License, or (at your option) any later version.
##
## The CDSware is distributed in the hope that it will be useful, but
## WITHOUT ANY WARRANTY; without even the implied warranty of
## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
## General Public License for more details.
##
## You should have received a copy of the GNU General Public License
## along with CDSware; if not, write to the Free Software Foundation, Inc.,
## 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA.
## read config variables:
#include "config.wml"
#include "configbis.wml"
## start Python:
<protect>#!</protect><PYTHON>
<protect>## $Id$</protect>
<protect>## DO NOT EDIT THIS FILE! IT WAS AUTOMATICALLY GENERATED FROM CDSware WML SOURCES.</protect>
## okay, rest of the Python code goes below
#######
<protect>
try:
import httplib
import urllib
import sys
import re
import string
import getopt
import time
except ImportError, e:
print "Error: %s" % e
import sys
sys.exit(1)
def http_param_resume(http_param_dict,resumptionToken):
"Change parameter dictionary for harvest resumption"
http_param = {
'verb' : http_param_dict['verb'],
'resumptionToken' : resumptionToken
}
return http_param
def http_request_parameters(http_param_dict, method="POST"):
"Assembly http request parameters for http method used"
params = ""
if method == "GET":
for key in http_param_dict.keys():
if params:
params = "%s&" % (params)
if key:
params = "%s%s=%s" % (params, key, http_param_dict[key])
elif method == "POST":
http_param = {}
for key in http_param_dict.keys():
if http_param_dict[key]:
http_param[key] = http_param_dict[key]
params = urllib.urlencode(http_param)
return params
def OAI_Session(server, script, http_param_dict ,method="POST",output="", stylesheet=""):
"Handle OAi session"
a = OAI_Request(server, script, http_request_parameters(http_param_dict, method), method)
rt_obj = re.search('>.*</resumptionToken>',a)
i = 0
while rt_obj != None:
if output:
filename = "%s.%07d" % (output,i)
fileoutput = open(filename,"w")
fileoutput.write(a)
fileoutput.close()
else:
sys.stdout.write(a)
i = i + 1
time.sleep(1)
http_param_dict = http_param_resume(http_param_dict,rt_obj.group()[1:-18])
a = OAI_Request(server, script, http_request_parameters(http_param_dict, method), method)
rt_obj = re.search('>.*</resumptionToken>',a)
if output:
filename = "%s.%07d" % (output,i)
fileoutput = open(filename,"w")
fileoutput.write(a)
fileoutput.close()
else:
sys.stdout.write(a)
def help():
"Print out info"
print "\n bibharvest -fhimoprsuv baseURL\n"
print " -h help"
print " -o<outputfilename>"
print " -v<verb> OAI verb to be executed"
print " -m<method> http method (default POST)"
print " -p<metadataPrefix> metadata format"
print " -i<identifier> OAI identifier"
print " -s<set> OAI set"
print " -r<resuptionToken> Resume previous harvest"
print " -f<from> from date (datestamp)"
print " -u<until> until date (datestamp)\n"
def OAI_Request(server, script, params, method="POST"):
"Handle OAi request"
headers = {"Content-type":"application/x-www-form-urlencoded", "Accept":"text/xml"}
i = 0
while i < 10:
i = i + 1
conn = httplib.HTTPConnection(server)
if method == "GET":
conn.request("GET", script + "?" + params)
elif method == "POST":
conn.request("POST", script, params, headers)
response = conn.getresponse()
if response.status == 200:
i = 10
data = response.read()
conn.close()
return data
elif response.status == 503:
time.sleep(string.atoi(response.getheader("Retry-After","%d" % (i*i))))
elif response.status == 302:
server = response.getheader("Location").split("/")[2]
script = "/" + string.join(response.getheader("Location").split("/")[3:],"/")
else:
time.sleep(1)
sys.stderr.write("Harvesting interrupted (after 10 attempts): %s" % params)
sys.exit(1)
def main():
"Main"
opts, args = getopt.getopt(sys.argv[1:],"hno:v:m:p:i:s:f:u:r:x:",
[
"help",
"output",
"verb",
"method",
"metadataPrefix",
"identifier",
"set",
"from",
"until",
"resumptionToken"
]
)
http_param_dict = {}
method = "POST"
output = ""
stylesheet = ""
# get options and arguments
for opt, opt_value in opts:
if opt == "-v":
http_param_dict['verb'] = opt_value
elif opt == "-m":
if opt_value == "GET" or opt_value == "POST":
method = opt_value
elif opt == "-p":
http_param_dict['metadataPrefix'] = opt_value
elif opt == "-i":
http_param_dict['identifier'] = opt_value
elif opt == "-s":
http_param_dict['set'] = opt_value
elif opt == "-f":
http_param_dict['from'] = opt_value
elif opt == "-u":
http_param_dict['until'] = opt_value
elif opt == "-r":
http_param_dict['resumptionToken'] = opt_value
elif opt == "-o":
output = opt_value
elif opt == "-x":
stylesheet = opt_value
else:
help()
sys.exit()
if len(args) > 0:
server = args[0].split("/")[2]
script = "/" + string.join(args[0].split("/")[3:],"/")
OAI_Session(server, script, http_param_dict, method, output, stylesheet)
else:
help()
sys.exit()
if __name__ == '__main__':
main()
</protect>

Event Timeline