diff --git a/modules/bibharvest/lib/.cvsignore b/modules/bibharvest/lib/.cvsignore index 6a1572735..d4766d920 100644 --- a/modules/bibharvest/lib/.cvsignore +++ b/modules/bibharvest/lib/.cvsignore @@ -1,8 +1,5 @@ Makefile Makefile.in z_* *.O *~ -oai1d -oai2d -*.py \ No newline at end of file diff --git a/modules/bibharvest/lib/Makefile.am b/modules/bibharvest/lib/Makefile.am index 8bdf02188..a25e81f98 100644 --- a/modules/bibharvest/lib/Makefile.am +++ b/modules/bibharvest/lib/Makefile.am @@ -1,29 +1,28 @@ ## $Id$ ## This file is part of the CERN Document Server Software (CDSware). ## Copyright (C) 2002, 2003, 2004, 2005 CERN. ## ## The CDSware is free software; you can redistribute it and/or ## modify it under the terms of the GNU General Public License as ## published by the Free Software Foundation; either version 2 of the ## License, or (at your option) any later version. ## ## The CDSware is distributed in the hope that it will be useful, but ## WITHOUT ANY WARRANTY; without even the implied warranty of ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ## General Public License for more details. ## ## You should have received a copy of the GNU General Public License ## along with CDSware; if not, write to the Free Software Foundation, Inc., ## 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA. -pylibdir=$(libdir)/python/cdsware -pylib_DATA=oai_repository.py oai_repository_tests.py +pylibdir = $(libdir)/python/cdsware +pylib_DATA = oai_repository.py oai_repository_config.py oai_repository_tests.py -FILESWML = $(wildcard $(srcdir)/*.wml) -EXTRA_DIST = $(FILESWML:$(srcdir)/%=%) +EXTRA_DIST = oai_repository.py oai_repository_config.py.wml oai_repository_tests.py -CLEANFILES = $(pylib_DATA) *~ *.tmp *.pyc +CLEANFILES = oai_repository_config.py *~ *.tmp *.pyc -%.py: %.py.wml $(top_srcdir)/config/config.wml $(top_builddir)/config/configbis.wml +oai_repository_config.py: oai_repository_config.py.wml $(top_srcdir)/config/config.wml $(top_builddir)/config/configbis.wml $(WML) -o $@ $< diff --git a/modules/bibharvest/lib/oai_repository.py b/modules/bibharvest/lib/oai_repository.py index b98b85614..f37eb4ff8 100644 --- a/modules/bibharvest/lib/oai_repository.py +++ b/modules/bibharvest/lib/oai_repository.py @@ -1,914 +1,885 @@ ## $Id$ ## OAI interface for CDSware/MySQL written in Python compliant with OAI-PMH2.0 ## This file is part of the CERN Document Server Software (CDSware). ## Copyright (C) 2002, 2003, 2004, 2005 CERN. ## ## The CDSware is free software; you can redistribute it and/or ## modify it under the terms of the GNU General Public License as ## published by the Free Software Foundation; either version 2 of the ## License, or (at your option) any later version. ## ## The CDSware is distributed in the hope that it will be useful, but ## WITHOUT ANY WARRANTY; without even the implied warranty of ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ## General Public License for more details. ## ## You should have received a copy of the GNU General Public License ## along with CDSware; if not, write to the Free Software Foundation, Inc., ## 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA. -## read config variables: -#include "config.wml" -#include "configbis.wml" - -## $Id$ -## DO NOT EDIT THIS FILE! IT WAS AUTOMATICALLY GENERATED FROM CDSware WML SOURCES. """OAI interface for CDSware/MySQL written in Python compliant with OAI-PMH2.0""" -## OAI config variables -oaiidprefix = "" -oaisampleidentifier = "" -oaiidentifydescription = """""" -oaiidfield = "" -oaisetfield = "" -oaideleted = "" - import cPickle import string from string import split import os import re import urllib import sys import time import md5 +from oai_repository_config import * from config import * from dbquery import run_sql verbs = { "Identify" : [""], "ListSets" : ["resumptionToken"], "ListMetadataFormats" : ["resumptionToken"], "ListRecords" : ["resumptionToken"], "ListIdentifiers" : ["resumptionToken"], "GetRecord" : [""] } params = { "verb" : ["Identify","ListIdentifiers","ListSets","ListMetadataFormats","ListRecords","GetRecord"], "metadataPrefix" : ["","oai_dc","marcxml"], "from" :[""], "until":[""], "set" :[""], "identifier": [""] } - - def encode_for_xml(strxml): "Encode special chars in string for XML-compliancy." if strxml == None: return strxml else: strxml = string.replace(strxml, '&', '&') strxml = string.replace(strxml, '<', '<') return strxml def escape_space(strxml): "Encode special chars in string for URL-compliancy." strxml = string.replace(strxml, ' ', '%20') return strxml def encode_for_url(strxml): "Encode special chars in string for URL-compliancy." strxml = string.replace(strxml, '%', '%25') strxml = string.replace(strxml, ' ', '%20') strxml = string.replace(strxml, '?', '%3F') strxml = string.replace(strxml, '#', '%23') strxml = string.replace(strxml, '=', '%3D') strxml = string.replace(strxml, '&', '%26') strxml = string.replace(strxml, '/', '%2F') strxml = string.replace(strxml, ':', '%3A') strxml = string.replace(strxml, ';', '%3B') strxml = string.replace(strxml, '+', '%2B') return strxml def oai_header(args, verb): "Print OAI header" out = "" out = out + "" + "\n" out = out + "\n" out = out + " " + oaigetresponsedate() + "\n" if verb: out = out + " %s\n" % (verb, oaigetrequesturl(args)) out = out + " <%s>\n" % verb else: out = out + " %s\n" % (oaigetrequesturl(args)) return out def oai_footer(verb): "Print OAI footer" out = "" if verb: out = "%s \n" % (out, verb) out = out + "\n" return out def oai_error_header(args, verb): "Print OAI header" out = "" ### out = "Content-Type: text/xml\n\n" out = out + "" + "\n" out = out + "\n" out = out + " " + oaigetresponsedate() + "\n" out = out + " %s\n" % (verb, oaigetrequesturl(args)) return out def oai_error_footer(verb): "Print OAI footer" out = verb out = "\n" return out def get_field(sysno, field): "Gets list of field 'field' for the record with 'sysno' system number." out = [] digit = field[0:2] bibbx = "bib%sx" % digit bibx = "bibrec_bib%sx" % digit query = "SELECT bx.value FROM %s AS bx, %s AS bibx WHERE bibx.id_bibrec='%s' AND bx.id=bibx.id_bibxxx AND bx.tag='%s'" % (bibbx, bibx, sysno, field) res = run_sql(query) for row in res: out.append(row[0]) return out def utc_to_localtime(date): "Convert UTC to localtime" ldate = date.split("T")[0] ltime = date.split("T")[1] lhour = ltime.split(":")[0] lminute = ltime.split(":")[1] lsec = ltime.split(":")[2] lyear = ldate.split("-")[0] lmonth = ldate.split("-")[1] lday = ldate.split("-")[2] timetoconvert = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(time.mktime((string.atoi(lyear), string.atoi(lmonth), string.atoi(lday), string.atoi(lhour), string.atoi(lminute), string.atoi(lsec[:-1]), 0, 0, -1)) - time.timezone + (time.daylight)*3600)) return timetoconvert def localtime_to_utc(date): "Convert localtime to UTC" ldate = date.split(" ")[0] ltime = date.split(" ")[1] lhour = ltime.split(":")[0] lminute = ltime.split(":")[1] lsec = ltime.split(":")[2] lyear = ldate.split("-")[0] lmonth = ldate.split("-")[1] lday = ldate.split("-")[2] timetoconvert = time.strftime("%Y-%m-%dT%H:%M:%SZ", time.gmtime(time.mktime((string.atoi(lyear), string.atoi(lmonth), string.atoi(lday), string.atoi(lhour), string.atoi(lminute), string.atoi(lsec), 0, 0, -1)))) return timetoconvert def get_creation_date(sysno): "Returns the creation date of the record 'sysno'." out = "" res = run_sql("SELECT DATE_FORMAT(creation_date, '%%Y-%%m-%%d %%H:%%i:%%s') FROM bibrec WHERE id=%s", (sysno,), 1) if res[0][0]: out = localtime_to_utc(res[0][0]) return out def get_modification_date(sysno): "Returns the date of last modification for the record 'sysno'." out = "" res = run_sql("SELECT DATE_FORMAT(modification_date,'%%Y-%%m-%%d %%H:%%i:%%s') FROM bibrec WHERE id=%s", (sysno,), 1) if res[0][0]: out = localtime_to_utc(res[0][0]) return out def get_earliest_datestamp(): "Get earliest datestamp in the database" out = "" res = run_sql("SELECT MIN(DATE_FORMAT(creation_date,'%%Y-%%m-%%d %%H:%%i:%%s')) FROM bibrec", (), 1) if res[0][0]: out = localtime_to_utc(res[0][0]) return out def check_date(date, dtime="T00:00:00Z"): "Check if the date has a correct format" if(re.sub("[0123456789\-:TZ]", "", date) == ""): if len(date) == 10: date = date + dtime if len(date) == 20: date = utc_to_localtime(date) else: date = "" else: date = "" return date def record_exists(sysno): "Returns 1 if record with SYSNO 'sysno' exists. Returns 0 otherwise." out = 0 query = "SELECT id FROM bibrec WHERE id='%s'" % (sysno) res = run_sql(query) for row in res: if row[0] != "": out = 1 return out def print_record(sysno, format='marcxml'): "Prints record 'sysno' formatted accoding to 'format'." out = "" # sanity check: if not record_exists(sysno): return if (format == "dc") or (format == "oai_dc"): format = "xd" # print record opening tags: out = out + " \n" if is_deleted(sysno) and oaideleted != "no": out = out + "
\n" else: out = out + "
\n" for ident in get_field(sysno, oaiidfield): out = "%s %s\n" % (out, escape_space(ident)) out = "%s %s\n" % (out, get_modification_date(sysno)) for set in get_field(sysno, oaisetfield): out = "%s %s\n" % (out, set) out = out + "
\n" if is_deleted(sysno) and oaideleted != "no": pass else: out = out + " \n" if format == "marcxml": out = out + " " out = out + " 00000coc 2200000uu 4500" ## MARC21 and XML formats, possibley OAI -- they are not in "bibfmt" table; so fetch all the data from "bibXXx" tables: if format == "marcxml": out = "%s %d\n" % (out, int(sysno)) for digit1 in range(0, 10): for digit2 in range(0, 10): bibbx = "bib%d%dx" % (digit1, digit2) bibx = "bibrec_bib%d%dx" % (digit1, digit2) query = "SELECT b.tag,b.value,bb.field_number FROM %s AS b, %s AS bb "\ "WHERE bb.id_bibrec='%s' AND b.id=bb.id_bibxxx AND b.tag LIKE '%s%%' "\ "ORDER BY bb.field_number, b.tag ASC" % (bibbx, bibx, sysno, str(digit1)+str(digit2)) res = run_sql(query) field_number_old = -999 field_old = "" for row in res: field, value, field_number = row[0], row[1], row[2] ind1, ind2 = field[3], field[4] if ind1 == "_": ind1 = " " if ind2 == "_": ind2 = " " # print field tag if field_number != field_number_old or field[:-1] != field_old[:-1]: if format == "marcxml": if field_number_old != -999: out = out + " \n" out = "%s \n" % (out, encode_for_xml(field[0:3]), encode_for_xml(ind1).lower(), encode_for_xml(ind2).lower()) field_number_old = field_number field_old = field # print subfield value if format == "marcxml": value = encode_for_xml(value) out = "%s %s\n" % (out, encode_for_xml(field[-1:]), value) # fetch next subfield # all fields/subfields printed in this run, so close the tag: if (format == "marcxml") and field_number_old != -999: out = out + " \n" out = out + " \n" elif format == "xd": # XML Dublin Core format, possibly OAI -- select only some bibXXx fields: out = out + " \n" for field_ in get_field(sysno, "041__a"): out = "%s %s\n" % (out, field_) for field_ in get_field(sysno, "100__a"): out = "%s %s\n" % (out, encode_for_xml(field_)) for field_ in get_field(sysno, "700__a"): out = "%s %s\n" % (out, encode_for_xml(field_)) for field_ in get_field(sysno, "245__a"): out = "%s %s\n" % (out, encode_for_xml(field_)) for field_ in get_field(sysno, "111__a"): out = "%s %s\n" % (out, encode_for_xml(field_)) for field_ in get_field(sysno, "65017a"): out = "%s %s\n" % (out, encode_for_xml(field_)) for field_ in get_field(sysno, "8564_u"): out = "%s %s\n" % (out, encode_for_xml(escape_space(field_))) for field_ in get_field(sysno, "520__a"): out = "%s %s\n" % (out, encode_for_xml(field_)) date = get_creation_date(sysno) out = "%s %s\n" % (out, date) out = out + " \n" # print record closing tags: out = out + " \n" out = out + " \n" return out def oailistmetadataformats(args): "Generates response to oailistmetadataformats verb." arg = parse_args(args) out = "" flag = 1 # list or not depending on identifier if arg['identifier'] != "": flag = 0 sysno = oaigetsysno(arg['identifier']) if record_exists(sysno): flag = 1 else: out = out + oai_error("idDoesNotExist","invalid record Identifier") out = oai_error_header(args, "ListMetadataFormats") + out + oai_error_footer("ListMetadataFormats") return out if flag: out = out + " \n" out = out + " oai_dc\n" out = out + " http://www.openarchives.org/OAI/1.1/dc.xsd\n" out = out + " http://purl.org/dc/elements/1.1/\n" out = out + " \n" out = out + " \n" out = out + " marcxml\n" out = out + " http://www.loc.gov/standards/marcxml/schema/MARC21slim.xsd\n" out = out + " http://www.loc.gov/MARC21/slim\n" out = out + " \n" out = oai_header(args, "ListMetadataFormats") + out + oai_footer("ListMetadataFormats") return out def oailistrecords(args): "Generates response to oailistrecords verb." - - oai_rt_expire = - nb_records_in_resume = - - arg = parse_args(args) out = "" sysnos = [] sysno = [] # check if the resumptionToken did not expire if arg['resumptionToken']: filename = "%s/RTdata/%s" % (logdir, arg['resumptionToken']) if os.path.exists(filename) == 0: out = oai_error("badResumptionToken", "ResumptionToken expired") out = oai_error_header(args, "ListRecords") + out + oai_error_footer("ListRecords") return out if arg['resumptionToken'] != "": sysnos = oaicacheout(arg['resumptionToken']) arg['metadataPrefix'] = sysnos.pop() else: sysnos = oaigetsysnolist(arg['set'], arg['from'], arg['until']) if len(sysnos) == 0: # noRecordsMatch error out = out + oai_error("noRecordsMatch", "no_ records correspond to the request") out = oai_error_header(args, "ListRecords") + out + oai_error_footer("ListRecords") return out i = 0 for sysno_ in sysnos: if sysno_: i = i + 1 if i > nb_records_in_resume: # cache or write? if i == nb_records_in_resume + 1: # resumptionToken? arg['resumptionToken'] = oaigenresumptionToken() extdate = oaigetresponsedate(oai_rt_expire) if extdate: out = "%s %s\n" % (out, extdate, arg['resumptionToken']) else: out = "%s %s\n" % (out, arg['resumptionToken']) sysno.append(sysno_) else: done = 0 for field_ in get_field(sysno_, "245__a"): if done == 0: out = out + print_record(sysno_, arg['metadataPrefix']) if i > nb_records_in_resume: oaicacheclean() sysno.append(arg['metadataPrefix']) oaicachein(arg['resumptionToken'], sysno) out = oai_header(args, "ListRecords") + out + oai_footer("ListRecords") return out def oailistsets(args): "Lists available sets for OAI metadata harvesting." out = "" # note: no flow control in ListSets sets = get_sets() for set_ in sets: out = out + " \n" out = "%s %s\n" % (out, set_[0]) out = "%s %s\n" % (out, set_[1]) if set_[2]: out = "%s %s\n" % (out, set_[2]) out = out + " \n" out = oai_header(args, "ListSets") + out + oai_footer("ListSets") return out def oaigetrecord(args): """Returns record 'identifier' according to 'metadataPrefix' format for OAI metadata harvesting.""" arg = parse_args(args) out = "" sysno = oaigetsysno(arg['identifier']) if record_exists(sysno): datestamp = get_modification_date(sysno) out = out + print_record(sysno, arg['metadataPrefix']) else: out = out + oai_error("idDoesNotExist", "invalid record Identifier") out = oai_error_header(args, "GetRecord") + out + oai_error_footer("GetRecord") return out out = oai_header(args, "GetRecord") + out + oai_footer("GetRecord") return out def oailistidentifiers(args): "Prints OAI response to the ListIdentifiers verb." - - oai_rt_expire = - nb_identifiers_in_resume = - - arg = parse_args(args) out = "" sysno = [] sysnos = [] if arg['resumptionToken']: filename = "%s/RTdata/%s" % (logdir, arg['resumptionToken']) if os.path.exists(filename) == 0: out = out + oai_error("badResumptionToken", "ResumptionToken expired") out = oai_error_header(args, "ListIdentifiers") + out + oai_error_footer("ListIdentifiers") return out if arg['resumptionToken']: sysnos = oaicacheout(arg['resumptionToken']) else: sysnos = oaigetsysnolist(arg['set'], arg['from'], arg['until']) if len(sysnos) == 0: # noRecordsMatch error out = out + oai_error("noRecordsMatch", "no records correspond to the request") out = oai_error_header(args, "ListIdentifiers") + out + oai_error_footer("ListIdentifiers") return out i = 0 for sysno_ in sysnos: if sysno_: i = i + 1 if i > nb_identifiers_in_resume: # cache or write? if i == nb_identifiers_in_resume + 1: # resumptionToken? arg['resumptionToken'] = oaigenresumptionToken() extdate = oaigetresponsedate(oai_rt_expire) if extdate: out = "%s %s\n" % (out, extdate, arg['resumptionToken']) else: out = "%s %s\n" % (out, arg['resumptionToken']) sysno.append(sysno_) else: done = 0 for field_ in get_field(sysno_, "245__a"): if done == 0: for ident in get_field(sysno_, oaiidfield): if is_deleted(sysno_) and oaideleted != "no": out = out + "
\n" else: out = out + "
\n" out = "%s %s\n" % (out, escape_space(ident)) out = "%s %s\n" % (out, get_modification_date(oaigetsysno(ident))) for set in get_field(sysno_, oaisetfield): out = "%s %s\n" % (out, set) out = out + "
\n" done = 1 if i > nb_identifiers_in_resume: oaicacheclean() # clean cache from expired resumptionTokens oaicachein(arg['resumptionToken'], sysno) out = oai_header(args, "ListIdentifiers") + out + oai_footer("ListIdentifiers") return out def oaiidentify(args): "Generates response to oaiidentify verb." out = "" repositoryname = " " + cdsname + "\n" baseurl = " %s/oai2d.py/\n" % weburl protocolversion = " 2.0\n" adminemail = " %s\n" % supportemail earliestdst = " %s\n" % get_earliest_datestamp() deletedrecord = " %s\n" % oaideleted repositoryidentifier = "%s" % oaiidprefix sampleidentifier = oaisampleidentifier identifydescription = oaiidentifydescription + "\n" out = out + repositoryname out = out + baseurl out = out + protocolversion out = out + adminemail out = out + earliestdst out = out + deletedrecord out = out + " YYYY-MM-DDThh:mm:ssZ\n" # print " \n" out = out + oaiidentifydescription out = oai_header(args, "Identify") + out + oai_footer("Identify") return out def oaigetrequesturl(args): "Generates requesturl tag for OAI." # re_amp = re.compile('&') requesturl = weburl + "/" + "oai2d.py/"# + "?" + re_amp.sub("&", args) return requesturl def oaigetresponsedate(delay=0): "Generates responseDate tag for OAI." return time.strftime("%Y-%m-%dT%H:%M:%SZ", time.gmtime(time.time() + delay)) def oai_error(code, msg): "OAI error occured" return "%s\n" % (code, msg) def oaigetsysno(identifier): "Returns the first MySQL BIB ID for the OAI identifier 'identifier', if it exists." sysno = None if identifier: query = "SELECT DISTINCT(bb.id_bibrec) FROM bib%sx AS bx, bibrec_bib%sx AS bb WHERE bx.tag='%s' AND bb.id_bibxxx=bx.id AND bx.value='%s'" % (oaiidfield[0:2], oaiidfield[0:2], oaiidfield, identifier) res = run_sql(query) for row in res: sysno = row[0] return sysno def oaigetsysnolist(set, fromdate, untildate): "Returns list of system numbers for the OAI set 'set', modified from 'date_from' until 'date_until'." out_dict = {} # dict to hold list of out sysnos as its keys if set: query = "SELECT DISTINCT bibx.id_bibrec FROM bib%sx AS bx LEFT JOIN bibrec_bib%sx AS bibx ON bx.id=bibx.id_bibxxx LEFT JOIN bibrec AS b ON b.id=bibx.id_bibrec WHERE bx.tag='%s' AND bx.value='%s'" % (oaiidfield[0:2], oaiidfield[0:2], oaisetfield, set) else: query = "SELECT DISTINCT bibx.id_bibrec FROM bib%sx AS bx LEFT JOIN bibrec_bib%sx AS bibx ON bx.id=bibx.id_bibxxx LEFT JOIN bibrec AS b ON b.id=bibx.id_bibrec WHERE bx.tag='%s'" % (oaiidfield[0:2], oaiidfield[0:2], oaiidfield) if untildate: query = query + " AND b.modification_date <= '%s'" % untildate if fromdate: query = query + " AND b.modification_date >= '%s'" % fromdate res = run_sql(query) for row in res: out_dict[row[0]] = 1 return out_dict.keys() def is_deleted(recid): "Check if record with recid has been deleted. Return 1 if deleted." query = "select a.id from bibrec as a left join bibrec_bib98x as b on a.id=b.id_bibrec left join bib98x as c on b.id_bibxxx=c.id where c.value='DELETED' and a.id=%s" % recid res = run_sql(query) for item in res: if item == None: return 0 else: return 1 def oaigenresumptionToken(): "Generates unique ID for resumption token management." return md5.new(str(time.time())).hexdigest() def oaicachein(resumptionToken, sysnos): "Stores or adds sysnos in cache. Input is a string of sysnos separated by commas." filename = "%s/RTdata/%s" % (logdir, resumptionToken) fil = open(filename, "w") cPickle.dump(sysnos, fil) fil.close() return 1 def oaicacheout(resumptionToken): "Restores string of comma-separated system numbers from cache." sysnos = [] filename = "%s/RTdata/%s" % (logdir, resumptionToken) if oaicachestatus(resumptionToken): fil = open(filename, "r") sysnos = cPickle.load(fil) fil.close() else: return 0 return sysnos def oaicacheclean(): "Removes cached resumptionTokens older than specified" - - oai_rt_expire = - - directory = "%s/RTdata" % logdir files = os.listdir(directory) for file_ in files: filename = directory + "/" + file_ # cache entry expires when not modified during a specified period of time if ((time.time() - os.path.getmtime(filename)) > oai_rt_expire): os.remove(filename) return 1 def oaicachestatus(resumptionToken): "Checks cache status. Returns 0 for empty, 1 for full." filename = "%s/RTdata/%s" % (logdir, resumptionToken) if os.path.exists(filename): if os.path.getsize(filename) > 0: return 1 else: return 0 else: return 0 def get_sets(): "Returns list of sets." out = [] row = ['', ''] query = "SELECT setSpec,setName,setDescription FROM oaiSET" res = run_sql(query) for row in res: row_bis = [row[0], row[1], row[2]] out.append(row_bis) return out def parse_args(args=""): "Parse input args" out_args = { "verb" : "", "metadataPrefix" : "", "from" : "", "until" : "", "set" : "", "identifier" : "", "resumptionToken" : "" } if args == "" or args == None: pass else: list_of_arguments = args.split('&') for item in list_of_arguments: keyvalue = item.split('=') if len(keyvalue) == 2: if (out_args.has_key(keyvalue[0])): if(out_args[keyvalue[0]] != ""): out_args[keyvalue[0]] = "Error" else: out_args[keyvalue[0]] = urllib.unquote(keyvalue[1]) else: out_args[keyvalue[0]] = urllib.unquote(keyvalue[1]) else: out_args['verb'] = "" return out_args def check_args(arguments): "Check OAI arguments" out_args = { "verb" : "", "metadataPrefix" : "", "from" : "", "until" : "", "set" : "", "identifier" : "", "resumptionToken" : "" } out = "" ## principal argument required # # if verbs.has_key(arguments['verb']): pass else: out = out + oai_error("badVerb", "Illegal OAI verb") ## defined args # # for param in arguments.keys(): if out_args.has_key(param): pass else: out = out + oai_error("badArgument", "The request includes illegal arguments") ## unique args # # for param in arguments.keys(): if (arguments[param] == "Error"): out = out + oai_error("badArgument", "The request includes illegal arguments") ## resumptionToken exclusive # # if ((arguments['from'] != "" or arguments['until'] != "" or arguments['metadataPrefix'] != "" or arguments['identifier'] != "" or arguments['set'] != "") and arguments['resumptionToken'] != ""): out = out + oai_error("badArgument", "The request includes illegal arguments") ## datestamp formats # # if arguments['from'] != "" and arguments['from'] != "": from_length = len(arguments['from']) if check_date(arguments['from'], "T00:00:00Z") == "": out = out + oai_error("badArgument", "Bad datestamp format in from") else: from_length = 0 if arguments['until'] != "" and arguments['until'] != "": until_length = len(arguments['until']) if check_date(arguments['until'], "T23:59:59Z") == "": out = out + oai_error("badArgument", "Bad datestamp format in until") else: until_length = 0 if from_length != 0: if until_length != 0: if from_length != until_length: out = out + oai_error("badArgument", "Bad datestamp format") if arguments['from'] > arguments['until']: out = out + oai_error("badArgument", "Wrong date") ## Identify exclusive # # if (arguments['verb'] =="Identify" and (arguments['metadataPrefix'] != "" or arguments['identifier'] != "" or arguments['set'] != "" or arguments['from'] != "" or arguments['until'] != "" or arguments['resumptionToken'] != "")): out = out + oai_error("badArgument", "The request includes illegal arguments") ## parameters for GetRecord # # if arguments['verb'] =="GetRecord" and arguments['identifier'] == "": out = out + oai_error("badArgument", "Record identifier missing") if arguments['verb'] =="GetRecord" and arguments['metadataPrefix'] == "": out = out + oai_error("badArgument", "Missing metadataPrefix") ## parameters for ListRecords and ListIdentifiers # # if (arguments['verb'] =="ListRecords" or arguments['verb'] =="ListIdentifiers") and (arguments['metadataPrefix'] == "" and arguments['resumptionToken'] == ""): out = out + oai_error("badArgument", "Missing metadataPrefix") ## Metadata prefix defined # # if arguments.has_key('metadataPrefix'): if ((arguments['metadataPrefix'] in params['metadataPrefix']) or (params['metadataPrefix'] == "")): pass else: out = out + oai_error("badArgument", "Missing metadataPrefix") return out - + diff --git a/modules/bibharvest/lib/oai_repository.py.wml b/modules/bibharvest/lib/oai_repository.py.wml deleted file mode 100644 index b98b85614..000000000 --- a/modules/bibharvest/lib/oai_repository.py.wml +++ /dev/null @@ -1,914 +0,0 @@ -## $Id$ -## OAI interface for CDSware/MySQL written in Python compliant with OAI-PMH2.0 - -## This file is part of the CERN Document Server Software (CDSware). -## Copyright (C) 2002, 2003, 2004, 2005 CERN. -## -## The CDSware is free software; you can redistribute it and/or -## modify it under the terms of the GNU General Public License as -## published by the Free Software Foundation; either version 2 of the -## License, or (at your option) any later version. -## -## The CDSware is distributed in the hope that it will be useful, but -## WITHOUT ANY WARRANTY; without even the implied warranty of -## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -## General Public License for more details. -## -## You should have received a copy of the GNU General Public License -## along with CDSware; if not, write to the Free Software Foundation, Inc., -## 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA. - -## read config variables: -#include "config.wml" -#include "configbis.wml" - -## $Id$ -## DO NOT EDIT THIS FILE! IT WAS AUTOMATICALLY GENERATED FROM CDSware WML SOURCES. -"""OAI interface for CDSware/MySQL written in Python compliant with OAI-PMH2.0""" - -## OAI config variables -oaiidprefix = "" -oaisampleidentifier = "" -oaiidentifydescription = """""" -oaiidfield = "" -oaisetfield = "" -oaideleted = "" - -import cPickle -import string -from string import split -import os -import re -import urllib -import sys -import time -import md5 - -from config import * -from dbquery import run_sql - -verbs = { - "Identify" : [""], - "ListSets" : ["resumptionToken"], - "ListMetadataFormats" : ["resumptionToken"], - "ListRecords" : ["resumptionToken"], - "ListIdentifiers" : ["resumptionToken"], - "GetRecord" : [""] -} - -params = { - "verb" : ["Identify","ListIdentifiers","ListSets","ListMetadataFormats","ListRecords","GetRecord"], - "metadataPrefix" : ["","oai_dc","marcxml"], - "from" :[""], - "until":[""], - "set" :[""], - "identifier": [""] -} - - - -def encode_for_xml(strxml): - "Encode special chars in string for XML-compliancy." - - if strxml == None: - return strxml - else: - strxml = string.replace(strxml, '&', '&') - strxml = string.replace(strxml, '<', '<') - return strxml - -def escape_space(strxml): - "Encode special chars in string for URL-compliancy." - - strxml = string.replace(strxml, ' ', '%20') - return strxml - -def encode_for_url(strxml): - "Encode special chars in string for URL-compliancy." - - strxml = string.replace(strxml, '%', '%25') - strxml = string.replace(strxml, ' ', '%20') - strxml = string.replace(strxml, '?', '%3F') - strxml = string.replace(strxml, '#', '%23') - strxml = string.replace(strxml, '=', '%3D') - strxml = string.replace(strxml, '&', '%26') - strxml = string.replace(strxml, '/', '%2F') - strxml = string.replace(strxml, ':', '%3A') - strxml = string.replace(strxml, ';', '%3B') - strxml = string.replace(strxml, '+', '%2B') - - return strxml - -def oai_header(args, verb): - "Print OAI header" - - out = "" - - out = out + "" + "\n" - out = out + "\n" - - out = out + " " + oaigetresponsedate() + "\n" - - if verb: - out = out + " %s\n" % (verb, oaigetrequesturl(args)) - out = out + " <%s>\n" % verb - else: - out = out + " %s\n" % (oaigetrequesturl(args)) - - return out - -def oai_footer(verb): - "Print OAI footer" - - out = "" - - if verb: - out = "%s \n" % (out, verb) - out = out + "\n" - - return out - -def oai_error_header(args, verb): - "Print OAI header" - - out = "" - -### out = "Content-Type: text/xml\n\n" - out = out + "" + "\n" - out = out + "\n" - - out = out + " " + oaigetresponsedate() + "\n" - out = out + " %s\n" % (verb, oaigetrequesturl(args)) - - return out - -def oai_error_footer(verb): - "Print OAI footer" - - out = verb - out = "\n" - return out - -def get_field(sysno, field): - "Gets list of field 'field' for the record with 'sysno' system number." - - out = [] - digit = field[0:2] - - bibbx = "bib%sx" % digit - bibx = "bibrec_bib%sx" % digit - query = "SELECT bx.value FROM %s AS bx, %s AS bibx WHERE bibx.id_bibrec='%s' AND bx.id=bibx.id_bibxxx AND bx.tag='%s'" % (bibbx, bibx, sysno, field) - - res = run_sql(query) - - for row in res: - - out.append(row[0]) - - return out - -def utc_to_localtime(date): - "Convert UTC to localtime" - - ldate = date.split("T")[0] - ltime = date.split("T")[1] - - lhour = ltime.split(":")[0] - lminute = ltime.split(":")[1] - lsec = ltime.split(":")[2] - - lyear = ldate.split("-")[0] - lmonth = ldate.split("-")[1] - lday = ldate.split("-")[2] - - timetoconvert = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(time.mktime((string.atoi(lyear), string.atoi(lmonth), string.atoi(lday), string.atoi(lhour), string.atoi(lminute), string.atoi(lsec[:-1]), 0, 0, -1)) - time.timezone + (time.daylight)*3600)) - - return timetoconvert - -def localtime_to_utc(date): - "Convert localtime to UTC" - - ldate = date.split(" ")[0] - ltime = date.split(" ")[1] - - lhour = ltime.split(":")[0] - lminute = ltime.split(":")[1] - lsec = ltime.split(":")[2] - - lyear = ldate.split("-")[0] - lmonth = ldate.split("-")[1] - lday = ldate.split("-")[2] - - timetoconvert = time.strftime("%Y-%m-%dT%H:%M:%SZ", time.gmtime(time.mktime((string.atoi(lyear), string.atoi(lmonth), string.atoi(lday), string.atoi(lhour), string.atoi(lminute), string.atoi(lsec), 0, 0, -1)))) - - return timetoconvert - -def get_creation_date(sysno): - "Returns the creation date of the record 'sysno'." - out = "" - res = run_sql("SELECT DATE_FORMAT(creation_date, '%%Y-%%m-%%d %%H:%%i:%%s') FROM bibrec WHERE id=%s", (sysno,), 1) - if res[0][0]: - out = localtime_to_utc(res[0][0]) - return out - -def get_modification_date(sysno): - "Returns the date of last modification for the record 'sysno'." - out = "" - res = run_sql("SELECT DATE_FORMAT(modification_date,'%%Y-%%m-%%d %%H:%%i:%%s') FROM bibrec WHERE id=%s", (sysno,), 1) - if res[0][0]: - out = localtime_to_utc(res[0][0]) - return out - -def get_earliest_datestamp(): - "Get earliest datestamp in the database" - out = "" - res = run_sql("SELECT MIN(DATE_FORMAT(creation_date,'%%Y-%%m-%%d %%H:%%i:%%s')) FROM bibrec", (), 1) - if res[0][0]: - out = localtime_to_utc(res[0][0]) - return out - -def check_date(date, dtime="T00:00:00Z"): - "Check if the date has a correct format" - - if(re.sub("[0123456789\-:TZ]", "", date) == ""): - if len(date) == 10: - date = date + dtime - if len(date) == 20: - date = utc_to_localtime(date) - else: - date = "" - else: - date = "" - - return date - -def record_exists(sysno): - "Returns 1 if record with SYSNO 'sysno' exists. Returns 0 otherwise." - - out = 0 - query = "SELECT id FROM bibrec WHERE id='%s'" % (sysno) - - res = run_sql(query) - - for row in res: - if row[0] != "": - out = 1 - - return out - -def print_record(sysno, format='marcxml'): - "Prints record 'sysno' formatted accoding to 'format'." - - out = "" - - # sanity check: - if not record_exists(sysno): - return - - if (format == "dc") or (format == "oai_dc"): - format = "xd" - - # print record opening tags: - - out = out + " \n" - - if is_deleted(sysno) and oaideleted != "no": - out = out + "
\n" - else: - out = out + "
\n" - - for ident in get_field(sysno, oaiidfield): - out = "%s %s\n" % (out, escape_space(ident)) - out = "%s %s\n" % (out, get_modification_date(sysno)) - for set in get_field(sysno, oaisetfield): - out = "%s %s\n" % (out, set) - out = out + "
\n" - - if is_deleted(sysno) and oaideleted != "no": - pass - else: - out = out + " \n" - - if format == "marcxml": - out = out + " " - out = out + " 00000coc 2200000uu 4500" - ## MARC21 and XML formats, possibley OAI -- they are not in "bibfmt" table; so fetch all the data from "bibXXx" tables: - - if format == "marcxml": - - out = "%s %d\n" % (out, int(sysno)) - - for digit1 in range(0, 10): - for digit2 in range(0, 10): - bibbx = "bib%d%dx" % (digit1, digit2) - bibx = "bibrec_bib%d%dx" % (digit1, digit2) - query = "SELECT b.tag,b.value,bb.field_number FROM %s AS b, %s AS bb "\ - "WHERE bb.id_bibrec='%s' AND b.id=bb.id_bibxxx AND b.tag LIKE '%s%%' "\ - "ORDER BY bb.field_number, b.tag ASC" % (bibbx, bibx, sysno, str(digit1)+str(digit2)) - res = run_sql(query) - field_number_old = -999 - field_old = "" - for row in res: - field, value, field_number = row[0], row[1], row[2] - ind1, ind2 = field[3], field[4] - if ind1 == "_": - ind1 = " " - if ind2 == "_": - ind2 = " " - # print field tag - if field_number != field_number_old or field[:-1] != field_old[:-1]: - if format == "marcxml": - - if field_number_old != -999: - out = out + " \n" - - out = "%s \n" % (out, encode_for_xml(field[0:3]), encode_for_xml(ind1).lower(), encode_for_xml(ind2).lower()) - - field_number_old = field_number - field_old = field - # print subfield value - if format == "marcxml": - value = encode_for_xml(value) - out = "%s %s\n" % (out, encode_for_xml(field[-1:]), value) - - # fetch next subfield - # all fields/subfields printed in this run, so close the tag: - if (format == "marcxml") and field_number_old != -999: - out = out + " \n" - out = out + " \n" - - elif format == "xd": - # XML Dublin Core format, possibly OAI -- select only some bibXXx fields: - out = out + " \n" - - for field_ in get_field(sysno, "041__a"): - out = "%s %s\n" % (out, field_) - - for field_ in get_field(sysno, "100__a"): - out = "%s %s\n" % (out, encode_for_xml(field_)) - - for field_ in get_field(sysno, "700__a"): - out = "%s %s\n" % (out, encode_for_xml(field_)) - - for field_ in get_field(sysno, "245__a"): - out = "%s %s\n" % (out, encode_for_xml(field_)) - - for field_ in get_field(sysno, "111__a"): - out = "%s %s\n" % (out, encode_for_xml(field_)) - - for field_ in get_field(sysno, "65017a"): - out = "%s %s\n" % (out, encode_for_xml(field_)) - - for field_ in get_field(sysno, "8564_u"): - out = "%s %s\n" % (out, encode_for_xml(escape_space(field_))) - - for field_ in get_field(sysno, "520__a"): - out = "%s %s\n" % (out, encode_for_xml(field_)) - - date = get_creation_date(sysno) - - out = "%s %s\n" % (out, date) - out = out + " \n" - - # print record closing tags: - - out = out + " \n" - - out = out + " \n" - - return out - -def oailistmetadataformats(args): - "Generates response to oailistmetadataformats verb." - - arg = parse_args(args) - - out = "" - - flag = 1 # list or not depending on identifier - - if arg['identifier'] != "": - - flag = 0 - - sysno = oaigetsysno(arg['identifier']) - - if record_exists(sysno): - - flag = 1 - - else: - - out = out + oai_error("idDoesNotExist","invalid record Identifier") - out = oai_error_header(args, "ListMetadataFormats") + out + oai_error_footer("ListMetadataFormats") - return out - - if flag: - out = out + " \n" - out = out + " oai_dc\n" - out = out + " http://www.openarchives.org/OAI/1.1/dc.xsd\n" - out = out + " http://purl.org/dc/elements/1.1/\n" - out = out + " \n" - out = out + " \n" - out = out + " marcxml\n" - out = out + " http://www.loc.gov/standards/marcxml/schema/MARC21slim.xsd\n" - out = out + " http://www.loc.gov/MARC21/slim\n" - out = out + " \n" - - out = oai_header(args, "ListMetadataFormats") + out + oai_footer("ListMetadataFormats") - return out - - -def oailistrecords(args): - "Generates response to oailistrecords verb." - - - oai_rt_expire = - nb_records_in_resume = - - - arg = parse_args(args) - - out = "" - - sysnos = [] - sysno = [] - - # check if the resumptionToken did not expire - if arg['resumptionToken']: - filename = "%s/RTdata/%s" % (logdir, arg['resumptionToken']) - if os.path.exists(filename) == 0: - out = oai_error("badResumptionToken", "ResumptionToken expired") - out = oai_error_header(args, "ListRecords") + out + oai_error_footer("ListRecords") - return out - - if arg['resumptionToken'] != "": - sysnos = oaicacheout(arg['resumptionToken']) - arg['metadataPrefix'] = sysnos.pop() - else: - sysnos = oaigetsysnolist(arg['set'], arg['from'], arg['until']) - - if len(sysnos) == 0: # noRecordsMatch error - - out = out + oai_error("noRecordsMatch", "no_ records correspond to the request") - out = oai_error_header(args, "ListRecords") + out + oai_error_footer("ListRecords") - return out - - i = 0 - for sysno_ in sysnos: - if sysno_: - i = i + 1 - if i > nb_records_in_resume: # cache or write? - if i == nb_records_in_resume + 1: # resumptionToken? - arg['resumptionToken'] = oaigenresumptionToken() - extdate = oaigetresponsedate(oai_rt_expire) - if extdate: - out = "%s %s\n" % (out, extdate, arg['resumptionToken']) - else: - out = "%s %s\n" % (out, arg['resumptionToken']) - sysno.append(sysno_) - else: - done = 0 - for field_ in get_field(sysno_, "245__a"): - if done == 0: - out = out + print_record(sysno_, arg['metadataPrefix']) - - if i > nb_records_in_resume: - oaicacheclean() - sysno.append(arg['metadataPrefix']) - oaicachein(arg['resumptionToken'], sysno) - - out = oai_header(args, "ListRecords") + out + oai_footer("ListRecords") - return out - -def oailistsets(args): - "Lists available sets for OAI metadata harvesting." - - out = "" - - # note: no flow control in ListSets - - sets = get_sets() - - for set_ in sets: - - out = out + " \n" - out = "%s %s\n" % (out, set_[0]) - out = "%s %s\n" % (out, set_[1]) - if set_[2]: - out = "%s %s\n" % (out, set_[2]) - out = out + " \n" - - out = oai_header(args, "ListSets") + out + oai_footer("ListSets") - - return out - - -def oaigetrecord(args): - """Returns record 'identifier' according to 'metadataPrefix' format for OAI metadata harvesting.""" - - arg = parse_args(args) - out = "" - sysno = oaigetsysno(arg['identifier']) - - if record_exists(sysno): - datestamp = get_modification_date(sysno) - out = out + print_record(sysno, arg['metadataPrefix']) - else: - out = out + oai_error("idDoesNotExist", "invalid record Identifier") - out = oai_error_header(args, "GetRecord") + out + oai_error_footer("GetRecord") - return out - - out = oai_header(args, "GetRecord") + out + oai_footer("GetRecord") - - return out - - -def oailistidentifiers(args): - "Prints OAI response to the ListIdentifiers verb." - - - oai_rt_expire = - nb_identifiers_in_resume = - - - arg = parse_args(args) - - out = "" - - sysno = [] - sysnos = [] - - if arg['resumptionToken']: - filename = "%s/RTdata/%s" % (logdir, arg['resumptionToken']) - if os.path.exists(filename) == 0: - out = out + oai_error("badResumptionToken", "ResumptionToken expired") - out = oai_error_header(args, "ListIdentifiers") + out + oai_error_footer("ListIdentifiers") - return out - - if arg['resumptionToken']: - sysnos = oaicacheout(arg['resumptionToken']) - else: - sysnos = oaigetsysnolist(arg['set'], arg['from'], arg['until']) - - if len(sysnos) == 0: # noRecordsMatch error - out = out + oai_error("noRecordsMatch", "no records correspond to the request") - out = oai_error_header(args, "ListIdentifiers") + out + oai_error_footer("ListIdentifiers") - return out - - i = 0 - for sysno_ in sysnos: - if sysno_: - i = i + 1 - if i > nb_identifiers_in_resume: # cache or write? - if i == nb_identifiers_in_resume + 1: # resumptionToken? - arg['resumptionToken'] = oaigenresumptionToken() - extdate = oaigetresponsedate(oai_rt_expire) - if extdate: - out = "%s %s\n" % (out, extdate, arg['resumptionToken']) - else: - out = "%s %s\n" % (out, arg['resumptionToken']) - sysno.append(sysno_) - else: - done = 0 - for field_ in get_field(sysno_, "245__a"): - if done == 0: - for ident in get_field(sysno_, oaiidfield): - if is_deleted(sysno_) and oaideleted != "no": - out = out + "
\n" - else: - out = out + "
\n" - out = "%s %s\n" % (out, escape_space(ident)) - out = "%s %s\n" % (out, get_modification_date(oaigetsysno(ident))) - for set in get_field(sysno_, oaisetfield): - out = "%s %s\n" % (out, set) - out = out + "
\n" - done = 1 - - if i > nb_identifiers_in_resume: - oaicacheclean() # clean cache from expired resumptionTokens - oaicachein(arg['resumptionToken'], sysno) - - out = oai_header(args, "ListIdentifiers") + out + oai_footer("ListIdentifiers") - - return out - - -def oaiidentify(args): - "Generates response to oaiidentify verb." - - out = "" - - repositoryname = " " + cdsname + "\n" - baseurl = " %s/oai2d.py/\n" % weburl - protocolversion = " 2.0\n" - adminemail = " %s\n" % supportemail - earliestdst = " %s\n" % get_earliest_datestamp() - deletedrecord = " %s\n" % oaideleted - repositoryidentifier = "%s" % oaiidprefix - sampleidentifier = oaisampleidentifier - identifydescription = oaiidentifydescription + "\n" - - out = out + repositoryname - out = out + baseurl - out = out + protocolversion - out = out + adminemail - out = out + earliestdst - out = out + deletedrecord - out = out + " YYYY-MM-DDThh:mm:ssZ\n" - # print " \n" - out = out + oaiidentifydescription - - out = oai_header(args, "Identify") + out + oai_footer("Identify") - - return out - - -def oaigetrequesturl(args): - "Generates requesturl tag for OAI." - - # re_amp = re.compile('&') - - requesturl = weburl + "/" + "oai2d.py/"# + "?" + re_amp.sub("&", args) - - return requesturl - -def oaigetresponsedate(delay=0): - "Generates responseDate tag for OAI." - - return time.strftime("%Y-%m-%dT%H:%M:%SZ", time.gmtime(time.time() + delay)) - - -def oai_error(code, msg): - "OAI error occured" - - return "%s\n" % (code, msg) - - -def oaigetsysno(identifier): - "Returns the first MySQL BIB ID for the OAI identifier 'identifier', if it exists." - sysno = None - if identifier: - query = "SELECT DISTINCT(bb.id_bibrec) FROM bib%sx AS bx, bibrec_bib%sx AS bb WHERE bx.tag='%s' AND bb.id_bibxxx=bx.id AND bx.value='%s'" % (oaiidfield[0:2], oaiidfield[0:2], oaiidfield, identifier) - res = run_sql(query) - for row in res: - sysno = row[0] - return sysno - - -def oaigetsysnolist(set, fromdate, untildate): - "Returns list of system numbers for the OAI set 'set', modified from 'date_from' until 'date_until'." - - out_dict = {} # dict to hold list of out sysnos as its keys - - if set: - query = "SELECT DISTINCT bibx.id_bibrec FROM bib%sx AS bx LEFT JOIN bibrec_bib%sx AS bibx ON bx.id=bibx.id_bibxxx LEFT JOIN bibrec AS b ON b.id=bibx.id_bibrec WHERE bx.tag='%s' AND bx.value='%s'" % (oaiidfield[0:2], oaiidfield[0:2], oaisetfield, set) - else: - query = "SELECT DISTINCT bibx.id_bibrec FROM bib%sx AS bx LEFT JOIN bibrec_bib%sx AS bibx ON bx.id=bibx.id_bibxxx LEFT JOIN bibrec AS b ON b.id=bibx.id_bibrec WHERE bx.tag='%s'" % (oaiidfield[0:2], oaiidfield[0:2], oaiidfield) - - if untildate: - query = query + " AND b.modification_date <= '%s'" % untildate - if fromdate: - query = query + " AND b.modification_date >= '%s'" % fromdate - - res = run_sql(query) - - for row in res: - out_dict[row[0]] = 1 - - return out_dict.keys() - -def is_deleted(recid): - "Check if record with recid has been deleted. Return 1 if deleted." - - query = "select a.id from bibrec as a left join bibrec_bib98x as b on a.id=b.id_bibrec left join bib98x as c on b.id_bibxxx=c.id where c.value='DELETED' and a.id=%s" % recid - - res = run_sql(query) - - for item in res: - if item == None: - return 0 - else: - return 1 - -def oaigenresumptionToken(): - "Generates unique ID for resumption token management." - - return md5.new(str(time.time())).hexdigest() - - -def oaicachein(resumptionToken, sysnos): - "Stores or adds sysnos in cache. Input is a string of sysnos separated by commas." - - filename = "%s/RTdata/%s" % (logdir, resumptionToken) - - fil = open(filename, "w") - cPickle.dump(sysnos, fil) - fil.close() - return 1 - - -def oaicacheout(resumptionToken): - "Restores string of comma-separated system numbers from cache." - - sysnos = [] - - filename = "%s/RTdata/%s" % (logdir, resumptionToken) - - if oaicachestatus(resumptionToken): - fil = open(filename, "r") - sysnos = cPickle.load(fil) - fil.close() - else: - return 0 - return sysnos - - -def oaicacheclean(): - "Removes cached resumptionTokens older than specified" - - - oai_rt_expire = - - - directory = "%s/RTdata" % logdir - - files = os.listdir(directory) - - for file_ in files: - filename = directory + "/" + file_ - # cache entry expires when not modified during a specified period of time - if ((time.time() - os.path.getmtime(filename)) > oai_rt_expire): - os.remove(filename) - - return 1 - - -def oaicachestatus(resumptionToken): - "Checks cache status. Returns 0 for empty, 1 for full." - - filename = "%s/RTdata/%s" % (logdir, resumptionToken) - - if os.path.exists(filename): - if os.path.getsize(filename) > 0: - return 1 - else: - return 0 - else: - return 0 - - -def get_sets(): - "Returns list of sets." - - out = [] - row = ['', ''] - - query = "SELECT setSpec,setName,setDescription FROM oaiSET" - res = run_sql(query) - for row in res: - row_bis = [row[0], row[1], row[2]] - out.append(row_bis) - - return out - - -def parse_args(args=""): - "Parse input args" - - out_args = { - "verb" : "", - "metadataPrefix" : "", - "from" : "", - "until" : "", - "set" : "", - "identifier" : "", - "resumptionToken" : "" - } - - if args == "" or args == None: - pass - else: - - list_of_arguments = args.split('&') - - for item in list_of_arguments: - keyvalue = item.split('=') - if len(keyvalue) == 2: - if (out_args.has_key(keyvalue[0])): - if(out_args[keyvalue[0]] != ""): - out_args[keyvalue[0]] = "Error" - else: - out_args[keyvalue[0]] = urllib.unquote(keyvalue[1]) - else: - out_args[keyvalue[0]] = urllib.unquote(keyvalue[1]) - else: - out_args['verb'] = "" - - return out_args - -def check_args(arguments): - "Check OAI arguments" - - out_args = { - "verb" : "", - "metadataPrefix" : "", - "from" : "", - "until" : "", - "set" : "", - "identifier" : "", - "resumptionToken" : "" - } - - out = "" - -## principal argument required -# -# - if verbs.has_key(arguments['verb']): - pass - else: - out = out + oai_error("badVerb", "Illegal OAI verb") - -## defined args -# -# - for param in arguments.keys(): - if out_args.has_key(param): - pass - else: - out = out + oai_error("badArgument", "The request includes illegal arguments") - -## unique args -# -# - for param in arguments.keys(): - if (arguments[param] == "Error"): - out = out + oai_error("badArgument", "The request includes illegal arguments") - -## resumptionToken exclusive -# -# - if ((arguments['from'] != "" or arguments['until'] != "" or arguments['metadataPrefix'] != "" or arguments['identifier'] != "" or arguments['set'] != "") and arguments['resumptionToken'] != ""): - - out = out + oai_error("badArgument", "The request includes illegal arguments") - -## datestamp formats -# -# - if arguments['from'] != "" and arguments['from'] != "": - from_length = len(arguments['from']) - if check_date(arguments['from'], "T00:00:00Z") == "": - out = out + oai_error("badArgument", "Bad datestamp format in from") - else: - from_length = 0 - - if arguments['until'] != "" and arguments['until'] != "": - until_length = len(arguments['until']) - if check_date(arguments['until'], "T23:59:59Z") == "": - out = out + oai_error("badArgument", "Bad datestamp format in until") - else: - until_length = 0 - - if from_length != 0: - if until_length != 0: - if from_length != until_length: - out = out + oai_error("badArgument", "Bad datestamp format") - - if arguments['from'] > arguments['until']: - out = out + oai_error("badArgument", "Wrong date") - -## Identify exclusive -# -# - if (arguments['verb'] =="Identify" and (arguments['metadataPrefix'] != "" or arguments['identifier'] != "" or arguments['set'] != "" or arguments['from'] != "" or arguments['until'] != "" or arguments['resumptionToken'] != "")): - out = out + oai_error("badArgument", "The request includes illegal arguments") - -## parameters for GetRecord -# -# - if arguments['verb'] =="GetRecord" and arguments['identifier'] == "": - out = out + oai_error("badArgument", "Record identifier missing") - - if arguments['verb'] =="GetRecord" and arguments['metadataPrefix'] == "": - out = out + oai_error("badArgument", "Missing metadataPrefix") - - -## parameters for ListRecords and ListIdentifiers -# -# - if (arguments['verb'] =="ListRecords" or arguments['verb'] =="ListIdentifiers") and (arguments['metadataPrefix'] == "" and arguments['resumptionToken'] == ""): - out = out + oai_error("badArgument", "Missing metadataPrefix") - -## Metadata prefix defined -# -# - if arguments.has_key('metadataPrefix'): - if ((arguments['metadataPrefix'] in params['metadataPrefix']) or (params['metadataPrefix'] == "")): - pass - else: - out = out + oai_error("badArgument", "Missing metadataPrefix") - - return out - - diff --git a/modules/bibharvest/lib/Makefile.am b/modules/bibharvest/lib/oai_repository_config.py similarity index 54% copy from modules/bibharvest/lib/Makefile.am copy to modules/bibharvest/lib/oai_repository_config.py index 8bdf02188..295efb497 100644 --- a/modules/bibharvest/lib/Makefile.am +++ b/modules/bibharvest/lib/oai_repository_config.py @@ -1,29 +1,36 @@ ## $Id$ - +## ## This file is part of the CERN Document Server Software (CDSware). ## Copyright (C) 2002, 2003, 2004, 2005 CERN. ## ## The CDSware is free software; you can redistribute it and/or ## modify it under the terms of the GNU General Public License as ## published by the Free Software Foundation; either version 2 of the ## License, or (at your option) any later version. ## ## The CDSware is distributed in the hope that it will be useful, but ## WITHOUT ANY WARRANTY; without even the implied warranty of ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -## General Public License for more details. +## General Public License for more details. ## ## You should have received a copy of the GNU General Public License ## along with CDSware; if not, write to the Free Software Foundation, Inc., ## 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA. -pylibdir=$(libdir)/python/cdsware -pylib_DATA=oai_repository.py oai_repository_tests.py - -FILESWML = $(wildcard $(srcdir)/*.wml) -EXTRA_DIST = $(FILESWML:$(srcdir)/%=%) +## read config variables: +#include "config.wml" +#include "configbis.wml" -CLEANFILES = $(pylib_DATA) *~ *.tmp *.pyc +## $Id$ +## DO NOT EDIT THIS FILE! IT WAS AUTOMATICALLY GENERATED FROM CDSware WML SOURCES. +"""OAI repository config""" -%.py: %.py.wml $(top_srcdir)/config/config.wml $(top_builddir)/config/configbis.wml - $(WML) -o $@ $< +## OAI config variables +oaiidprefix = "" +oaisampleidentifier = "" +oaiidentifydescription = """""" +oaiidfield = "" +oaisetfield = "" +oaideleted = "" +oai_rt_expire = +nb_records_in_resume = diff --git a/modules/bibharvest/lib/Makefile.am b/modules/bibharvest/lib/oai_repository_config.py.wml similarity index 54% copy from modules/bibharvest/lib/Makefile.am copy to modules/bibharvest/lib/oai_repository_config.py.wml index 8bdf02188..295efb497 100644 --- a/modules/bibharvest/lib/Makefile.am +++ b/modules/bibharvest/lib/oai_repository_config.py.wml @@ -1,29 +1,36 @@ ## $Id$ - +## ## This file is part of the CERN Document Server Software (CDSware). ## Copyright (C) 2002, 2003, 2004, 2005 CERN. ## ## The CDSware is free software; you can redistribute it and/or ## modify it under the terms of the GNU General Public License as ## published by the Free Software Foundation; either version 2 of the ## License, or (at your option) any later version. ## ## The CDSware is distributed in the hope that it will be useful, but ## WITHOUT ANY WARRANTY; without even the implied warranty of ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -## General Public License for more details. +## General Public License for more details. ## ## You should have received a copy of the GNU General Public License ## along with CDSware; if not, write to the Free Software Foundation, Inc., ## 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA. -pylibdir=$(libdir)/python/cdsware -pylib_DATA=oai_repository.py oai_repository_tests.py - -FILESWML = $(wildcard $(srcdir)/*.wml) -EXTRA_DIST = $(FILESWML:$(srcdir)/%=%) +## read config variables: +#include "config.wml" +#include "configbis.wml" -CLEANFILES = $(pylib_DATA) *~ *.tmp *.pyc +## $Id$ +## DO NOT EDIT THIS FILE! IT WAS AUTOMATICALLY GENERATED FROM CDSware WML SOURCES. +"""OAI repository config""" -%.py: %.py.wml $(top_srcdir)/config/config.wml $(top_builddir)/config/configbis.wml - $(WML) -o $@ $< +## OAI config variables +oaiidprefix = "" +oaisampleidentifier = "" +oaiidentifydescription = """""" +oaiidfield = "" +oaisetfield = "" +oaideleted = "" +oai_rt_expire = +nb_records_in_resume = diff --git a/modules/bibharvest/lib/oai_repository_tests.py b/modules/bibharvest/lib/oai_repository_tests.py index a891bf70d..b757d8aee 100644 --- a/modules/bibharvest/lib/oai_repository_tests.py +++ b/modules/bibharvest/lib/oai_repository_tests.py @@ -1,84 +1,80 @@ +# -*- coding: utf-8 -*- +## ## $Id$ ## CDSware OAI repository unit tests. - +## ## This file is part of the CERN Document Server Software (CDSware). ## Copyright (C) 2002, 2003, 2004, 2005 CERN. ## ## The CDSware is free software; you can redistribute it and/or ## modify it under the terms of the GNU General Public License as ## published by the Free Software Foundation; either version 2 of the ## License, or (at your option) any later version. ## ## The CDSware is distributed in the hope that it will be useful, but ## WITHOUT ANY WARRANTY; without even the implied warranty of ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ## General Public License for more details. ## ## You should have received a copy of the GNU General Public License ## along with CDSware; if not, write to the Free Software Foundation, Inc., ## 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA. -# -*- coding: utf-8 -*- - """Unit tests for the oai repository.""" -__lastupdated__ = """<: print `date +"%d %b %Y %H:%M:%S %Z"`; :>""" - - ## okay, rest of the Python code goes below ####### - __version__ = "$Id$" import oai_repository import unittest import re class TestVerbs(unittest.TestCase): """Test for OAI verb functionality.""" def test_verbs(self): """bibharvest oai repository - testing verbs""" self.assertNotEqual(None, re.search("Identify", oai_repository.oaiidentify(""))) self.assertNotEqual(None, re.search("ListIdentifiers", oai_repository.oailistidentifiers(""))) self.assertNotEqual(None, re.search("ListRecords", oai_repository.oailistrecords(""))) self.assertNotEqual(None, re.search("ListMetadataFormats", oai_repository.oailistmetadataformats(""))) self.assertNotEqual(None, re.search("ListSets", oai_repository.oailistsets(""))) self.assertNotEqual(None, re.search("GetRecord", oai_repository.oaigetrecord(""))) class TestErrorCodes(unittest.TestCase): """Test for handling OAI error codes.""" def test_issue_error_identify(self): """bibharvest oai repository - testing error codes""" self.assertNotEqual(None, re.search("badVerb", oai_repository.check_args(oai_repository.parse_args("junk")))) self.assertNotEqual(None, re.search("badVerb", oai_repository.check_args(oai_repository.parse_args("verb=IllegalVerb")))) self.assertNotEqual(None, re.search("badArgument", oai_repository.check_args(oai_repository.parse_args("verb=Identify&test=test")))) self.assertNotEqual(None, re.search("badArgument", oai_repository.check_args(oai_repository.parse_args("verb=ListIdentifiers&metadataPrefix=oai_dc&from=some_random_date&until=some_random_date")))) self.assertNotEqual(None, re.search("badArgument", oai_repository.check_args(oai_repository.parse_args("verb=ListIdentifiers&metadataPrefix=oai_dc&from=2001-01-01&until=2002-01-01T00:00:00Z")))) self.assertNotEqual(None, re.search("badArgument", oai_repository.check_args(oai_repository.parse_args("verb=ListIdentifiers")))) self.assertNotEqual(None, re.search("badArgument", oai_repository.check_args(oai_repository.parse_args("verb=ListIdentifiers&metadataPrefix=illegal_mdp")))) self.assertNotEqual(None, re.search("badArgument", oai_repository.check_args(oai_repository.parse_args("verb=ListIdentifiers&metadataPrefix=oai_dc&metadataPrefix=oai_dc")))) self.assertNotEqual(None, re.search("badArgument", oai_repository.check_args(oai_repository.parse_args("verb=ListRecords&metadataPrefix=oai_dc&set=really_wrong_set&from=some_random_date&until=some_random_date")))) self.assertNotEqual(None, re.search("badArgument", oai_repository.check_args(oai_repository.parse_args("verb=ListRecords")))) class TestEncodings(unittest.TestCase): """Test for OAI response encodings.""" def test_encoding(self): """bibharvest oai repository - testing encodings""" self.assertEqual("<&>", oai_repository.encode_for_xml("<&>")) self.assertEqual("%20", oai_repository.escape_space(" ")) self.assertEqual("%25%20%3F%23%3D%26%2F%3A%3B%2B", oai_repository.encode_for_url("% ?#=&/:;+")) def create_test_suite(): """Return test suite for the oai repository.""" return unittest.TestSuite((unittest.makeSuite(TestVerbs, 'test'), unittest.makeSuite(TestErrorCodes, 'test'), unittest.makeSuite(TestEncodings, 'test'))) if __name__ == "__main__": unittest.TextTestRunner(verbosity=2).run(create_test_suite()) diff --git a/modules/bibharvest/lib/oai_repository_tests.py.wml b/modules/bibharvest/lib/oai_repository_tests.py.wml deleted file mode 100644 index a891bf70d..000000000 --- a/modules/bibharvest/lib/oai_repository_tests.py.wml +++ /dev/null @@ -1,84 +0,0 @@ -## $Id$ -## CDSware OAI repository unit tests. - -## This file is part of the CERN Document Server Software (CDSware). -## Copyright (C) 2002, 2003, 2004, 2005 CERN. -## -## The CDSware is free software; you can redistribute it and/or -## modify it under the terms of the GNU General Public License as -## published by the Free Software Foundation; either version 2 of the -## License, or (at your option) any later version. -## -## The CDSware is distributed in the hope that it will be useful, but -## WITHOUT ANY WARRANTY; without even the implied warranty of -## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -## General Public License for more details. -## -## You should have received a copy of the GNU General Public License -## along with CDSware; if not, write to the Free Software Foundation, Inc., -## 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA. - -# -*- coding: utf-8 -*- - -"""Unit tests for the oai repository.""" - -__lastupdated__ = """<: print `date +"%d %b %Y %H:%M:%S %Z"`; :>""" - - ## okay, rest of the Python code goes below ####### - -__version__ = "$Id$" - -import oai_repository -import unittest -import re - -class TestVerbs(unittest.TestCase): - """Test for OAI verb functionality.""" - - def test_verbs(self): - """bibharvest oai repository - testing verbs""" - self.assertNotEqual(None, re.search("Identify", oai_repository.oaiidentify(""))) - self.assertNotEqual(None, re.search("ListIdentifiers", oai_repository.oailistidentifiers(""))) - self.assertNotEqual(None, re.search("ListRecords", oai_repository.oailistrecords(""))) - self.assertNotEqual(None, re.search("ListMetadataFormats", oai_repository.oailistmetadataformats(""))) - self.assertNotEqual(None, re.search("ListSets", oai_repository.oailistsets(""))) - self.assertNotEqual(None, re.search("GetRecord", oai_repository.oaigetrecord(""))) - - -class TestErrorCodes(unittest.TestCase): - """Test for handling OAI error codes.""" - - def test_issue_error_identify(self): - """bibharvest oai repository - testing error codes""" - - self.assertNotEqual(None, re.search("badVerb", oai_repository.check_args(oai_repository.parse_args("junk")))) - self.assertNotEqual(None, re.search("badVerb", oai_repository.check_args(oai_repository.parse_args("verb=IllegalVerb")))) - self.assertNotEqual(None, re.search("badArgument", oai_repository.check_args(oai_repository.parse_args("verb=Identify&test=test")))) - self.assertNotEqual(None, re.search("badArgument", oai_repository.check_args(oai_repository.parse_args("verb=ListIdentifiers&metadataPrefix=oai_dc&from=some_random_date&until=some_random_date")))) - self.assertNotEqual(None, re.search("badArgument", oai_repository.check_args(oai_repository.parse_args("verb=ListIdentifiers&metadataPrefix=oai_dc&from=2001-01-01&until=2002-01-01T00:00:00Z")))) - self.assertNotEqual(None, re.search("badArgument", oai_repository.check_args(oai_repository.parse_args("verb=ListIdentifiers")))) - self.assertNotEqual(None, re.search("badArgument", oai_repository.check_args(oai_repository.parse_args("verb=ListIdentifiers&metadataPrefix=illegal_mdp")))) - self.assertNotEqual(None, re.search("badArgument", oai_repository.check_args(oai_repository.parse_args("verb=ListIdentifiers&metadataPrefix=oai_dc&metadataPrefix=oai_dc")))) - self.assertNotEqual(None, re.search("badArgument", oai_repository.check_args(oai_repository.parse_args("verb=ListRecords&metadataPrefix=oai_dc&set=really_wrong_set&from=some_random_date&until=some_random_date")))) - self.assertNotEqual(None, re.search("badArgument", oai_repository.check_args(oai_repository.parse_args("verb=ListRecords")))) - -class TestEncodings(unittest.TestCase): - """Test for OAI response encodings.""" - - def test_encoding(self): - """bibharvest oai repository - testing encodings""" - - self.assertEqual("<&>", oai_repository.encode_for_xml("<&>")) - self.assertEqual("%20", oai_repository.escape_space(" ")) - self.assertEqual("%25%20%3F%23%3D%26%2F%3A%3B%2B", oai_repository.encode_for_url("% ?#=&/:;+")) - -def create_test_suite(): - """Return test suite for the oai repository.""" - - - return unittest.TestSuite((unittest.makeSuite(TestVerbs, 'test'), - unittest.makeSuite(TestErrorCodes, 'test'), - unittest.makeSuite(TestEncodings, 'test'))) - -if __name__ == "__main__": - unittest.TextTestRunner(verbosity=2).run(create_test_suite())