diff --git a/modules/bibharvest/Makefile.am b/modules/bibharvest/Makefile.am index 47ad17ba4..7e62cbf12 100644 --- a/modules/bibharvest/Makefile.am +++ b/modules/bibharvest/Makefile.am @@ -1,22 +1,22 @@ ## $Id$ ## This file is part of the CERN Document Server Software (CDSware). ## Copyright (C) 2002 CERN. ## ## The CDSware is free software; you can redistribute it and/or ## modify it under the terms of the GNU General Public License as ## published by the Free Software Foundation; either version 2 of the ## License, or (at your option) any later version. ## ## The CDSware is distributed in the hope that it will be useful, but ## WITHOUT ANY WARRANTY; without even the implied warranty of ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ## General Public License for more details. ## ## You should have received a copy of the GNU General Public License ## along with CDSware; if not, write to the Free Software Foundation, Inc., ## 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA. -SUBDIRS = bin +SUBDIRS = bin lib web CLEANFILES = *~ \ No newline at end of file diff --git a/modules/bibharvest/lib/.cvsignore b/modules/bibharvest/lib/.cvsignore new file mode 100644 index 000000000..6a1572735 --- /dev/null +++ b/modules/bibharvest/lib/.cvsignore @@ -0,0 +1,8 @@ +Makefile +Makefile.in +z_* +*.O +*~ +oai1d +oai2d +*.py \ No newline at end of file diff --git a/modules/bibharvest/Makefile.am b/modules/bibharvest/lib/Makefile.am similarity index 78% copy from modules/bibharvest/Makefile.am copy to modules/bibharvest/lib/Makefile.am index 47ad17ba4..036d39c7b 100644 --- a/modules/bibharvest/Makefile.am +++ b/modules/bibharvest/lib/Makefile.am @@ -1,22 +1,28 @@ ## $Id$ ## This file is part of the CERN Document Server Software (CDSware). ## Copyright (C) 2002 CERN. ## ## The CDSware is free software; you can redistribute it and/or ## modify it under the terms of the GNU General Public License as ## published by the Free Software Foundation; either version 2 of the ## License, or (at your option) any later version. ## ## The CDSware is distributed in the hope that it will be useful, but ## WITHOUT ANY WARRANTY; without even the implied warranty of ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ## General Public License for more details. ## ## You should have received a copy of the GNU General Public License ## along with CDSware; if not, write to the Free Software Foundation, Inc., ## 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA. -SUBDIRS = bin +pylibdir=$(libdir)/python/cdsware +pylib_DATA=oai_repository.py -CLEANFILES = *~ \ No newline at end of file +EXTRA_DIST = $(wildcard *.wml) + +CLEANFILES = $(pylib_DATA) *~ *.tmp *.pyc + +%.py: %.py.wml ../../../config/config.wml ../../../config/configbis.wml + $(WML) -o $@ $< \ No newline at end of file diff --git a/modules/bibharvest/lib/oai_repository.py b/modules/bibharvest/lib/oai_repository.py new file mode 100644 index 000000000..1c28a236e --- /dev/null +++ b/modules/bibharvest/lib/oai_repository.py @@ -0,0 +1,862 @@ +## $Id$ +## OAI interface for CDSware/MySQL written in Python compliant with OAI-PMH2.0 + +## This file is part of the CERN Document Server Software (CDSware). +## Copyright (C) 2002 CERN. +## +## The CDSware is free software; you can redistribute it and/or +## modify it under the terms of the GNU General Public License as +## published by the Free Software Foundation; either version 2 of the +## License, or (at your option) any later version. +## +## The CDSware is distributed in the hope that it will be useful, but +## WITHOUT ANY WARRANTY; without even the implied warranty of +## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +## General Public License for more details. +## +## You should have received a copy of the GNU General Public License +## along with CDSware; if not, write to the Free Software Foundation, Inc., +## 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA. + +## read config variables: +#include "config.wml" +#include "configbis.wml" + +## start Python: +#! +## $Id$ +## DO NOT EDIT THIS FILE! IT WAS AUTOMATICALLY GENERATED FROM CDSware WML SOURCES. +"""OAI interface for CDSware/MySQL written in Python compliant with OAI-PMH2.0""" + +## fill config variables: +pylibdir = "/python" + + +## OAI config variables +oaiidprefix = "" +oaisampleidentifier = "" +oaiidentifydescription = """""" +oaiidfield = "909COo" +oaisetfield = "909COp" + + +try: + import cPickle + import string + from string import split + import os + import re + import sys + import time + import md5 + + sys.path.append('%s' % pylibdir) + from config import * + from dbquery import run_sql + +except ImportError, e: + import sys + sys.stderr.write("Error: %s" % e) + sys.exit(1) + +verbs = { + "Identify" : [""], + "ListSets" : ["resumptionToken"], + "ListMetadataFormats" : ["resumptionToken"], + "ListRecords" : ["resumptionToken"], + "ListIdentifiers" : ["resumptionToken"], + "GetRecord" : [""] +} + + + +def encode_for_xml(s): + "Encode special chars in string for XML-compliancy." + + s = string.replace(s, '&', '&') + s = string.replace(s, '<', '<') + return s + +def encode_for_url(s): + "Encode special chars in string for URL-compliancy." + + s = string.replace(s, ' ', '%20') + s = string.replace(s, '?', '%3F') + s = string.replace(s, '#', '%23') + s = string.replace(s, '=', '%3D') + s = string.replace(s, '&', '%26') + s = string.replace(s, '%', '%25') + + return s + +def oai_header(args, verb): + "Print OAI header" + + out = "" + + out = out + "" + "\n" + out = out + "\n" + + out = out + " " + OAIGetResponseDate() + "\n" + + if verb: + out = out + " %s\n" % (verb,OAIGetRequestURL(args)) + out = out + " <%s>\n" % verb + else: + out = out + " %s\n" % (OAIGetRequestURL(args)) + + return out + +def oai_footer(verb): + "Print OAI footer" + + out = "" + + if verb: + out = "%s \n" % (out, verb) + out = out + "\n" + + return out + +def oai_error_header(args, verb): + "Print OAI header" + + out = "" + +### out = "Content-Type: text/xml\n\n" + out = out + "" + "\n" + out = out + "\n" + + out = out + " " + OAIGetResponseDate() + "\n" + out = out + " %s\n" % (verb,OAIGetRequestURL(args)) + + return out + +def oai_error_footer(verb): + "Print OAI footer" + + out = "" + out = out + "\n" + + return out + +def get_field(sysno, field): + "Gets list of field 'field' for the record with 'sysno' system number." + + out = [] + digit = field[0:2] + + bx = "bib%sx" % digit + bibx = "bibrec_bib%sx" % digit + query = "SELECT bx.value FROM %s AS bx, %s AS bibx WHERE bibx.id_bibrec='%s' AND bx.id=bibx.id_bibxxx AND bx.tag='%s'" % (bx, bibx, sysno, field) + + res = run_sql(query) + + for row in res: + + out.append(row[0]) + + return out + +def UTC_to_localtime(date): + "Convert UTC to localtime" + + ldate = date.split("T")[0] + ltime = date.split("T")[1] + + lhour = ltime.split(":")[0] + lminute = ltime.split(":")[1] + lsec = ltime.split(":")[2] + + lyear = ldate.split("-")[0] + lmonth = ldate.split("-")[1] + lday = ldate.split("-")[2] + + timetoconvert = time.strftime("%Y-%m-%d %H:%M:%S",time.localtime(time.mktime((string.atoi(lyear),string.atoi(lmonth),string.atoi(lday),string.atoi(lhour),string.atoi(lminute),string.atoi(lsec[:-1]),0,0,-1)) - time.timezone + (time.daylight)*3600)) + + return timetoconvert + +def localtime_to_UTC(date): + "Convert localtime to UTC" + + ldate = date.split(" ")[0] + ltime = date.split(" ")[1] + + lhour = ltime.split(":")[0] + lminute = ltime.split(":")[1] + lsec = ltime.split(":")[2] + + lyear = ldate.split("-")[0] + lmonth = ldate.split("-")[1] + lday = ldate.split("-")[2] + + timetoconvert = time.strftime("%Y-%m-%dT%H:%M:%SZ",time.gmtime(time.mktime((string.atoi(lyear),string.atoi(lmonth),string.atoi(lday),string.atoi(lhour),string.atoi(lminute),string.atoi(lsec),0,0,-1)))) + + return timetoconvert + +def get_creation_date(sysno): + "Returns the creation date of the record 'sysno'." + + out = "" + + query = "SELECT DATE_FORMAT(creation_date,'%%Y-%%m-%%d %%H:%%i:%%s') FROM bibrec WHERE id='%s'" % (sysno) + + res = run_sql(query) + for row in res: + out = row[0] + return localtime_to_UTC(out) + +def get_modification_date(sysno): + "Returns the date of last modification for the record 'sysno'." + + out = "" + + query = "SELECT DATE_FORMAT(modification_date,'%%Y-%%m-%%d %%H:%%i:%%s') FROM bibrec WHERE id='%s'" % (sysno) + + res = run_sql(query) + for row in res: + out = row[0] + return localtime_to_UTC(out) + +def get_earliest_datestamp(): + "Get earliest datestamp in the database" + + out = "" + + query = "SELECT MIN(DATE_FORMAT(creation_date,'%Y-%m-%d %H:%i:%s')) FROM bibrec" + + res = run_sql(query) + return localtime_to_UTC(res[0][0]) + +def check_date(date, time="T00:00:00Z"): + "Check if the date has a correct format" + + if(re.sub("[0123456789\-:TZ]","",date) == ""): + if len(date) == 10: + date = date + time + if len(date) == 20: + date = UTC_to_localtime(date) + else: + date = "" + else: + date = "" + + return date + +def record_exists(sysno): + "Returns 1 if record with SYSNO 'sysno' exists. Returns 0 otherwise." + + out = 0 + query = "SELECT id FROM bibrec WHERE id='%s'" % (sysno) + + res = run_sql(query) + + for row in res: + if row[0] != "": + out = 1 + + return out + +def print_record(sysno, format='marcxml'): + "Prints record 'sysno' formatted accoding to 'format'." + + out = "" + + # sanity check: + if not record_exists(sysno): + return + + if (format == "dc") or (format == "oai_dc"): + format = "xd" + + # print record opening tags: + + out = out + " \n" + out = out + "
\n" + for id in get_field(sysno,oaiidfield): + out = "%s %s\n" % (out, encode_for_url(id)) + out = "%s %s\n" % (out, get_modification_date(sysno)) + for set in get_field(sysno,oaisetfield): + out = "%s %s\n" % (out, set) + out = out + "
\n" + out = out + " \n" + + if format == "marcxml": + out = out + " " + out = out + " 00000coc 2200000uu 4500" + ## MARC21 and XML formats, possibley OAI -- they are not in "bibfmt" table; so fetch all the data from "bibXXx" tables: + + if format == "marcxml": + + out = "%s %d\n" % (out, int(sysno)) + + for digit1 in range(0,10): + for digit2 in range(0,10): + bx = "bib%d%dx" % (digit1, digit2) + bibx = "bibrec_bib%d%dx" % (digit1, digit2) + query = "SELECT b.tag,b.value,bb.field_number FROM %s AS b, %s AS bb "\ + "WHERE bb.id_bibrec='%s' AND b.id=bb.id_bibxxx AND b.tag LIKE '%s%%' "\ + "ORDER BY bb.field_number, b.tag ASC" % (bx, bibx, sysno, str(digit1)+str(digit2)) + res = run_sql(query) + field_number_old = -999 + field_old = "" + for row in res: + field, value, field_number = row[0], row[1], row[2] + ind1, ind2 = field[3], field[4] + if ind1 == "_": + ind1 = " " + if ind2 == "_": + ind2 = " " + # print field tag + if field_number != field_number_old or field[:-1] != field_old[:-1]: + if format == "marcxml": + + fieldid = encode_for_xml(field[0:3]) + + if field_number_old != -999: + out = out + " \n" + + out = "%s \n" % (out, encode_for_xml(field[0:3]), encode_for_xml(ind1).lower(), encode_for_xml(ind2).lower()) + + field_number_old = field_number + field_old = field + # print subfield value + if format == "marcxml": + value = encode_for_xml(value) + out = "%s %s\n" % (out, encode_for_xml(field[-1:]), value) + + # fetch next subfield + # all fields/subfields printed in this run, so close the tag: + if (format == "marcxml") and field_number_old != -999: + out = out + " \n" + out = out + " \n" + + elif format == "xd": + # XML Dublin Core format, possibly OAI -- select only some bibXXx fields: + out = out + " \n" + + for f in get_field(sysno, "041__a"): + out = "%s %s\n" % (out, f) + + for f in get_field(sysno, "100__a"): + out = "%s %s\n" % (out, encode_for_xml(f)) + + for f in get_field(sysno, "700__a"): + out = "%s %s\n" % (out, encode_for_xml(f)) + + for f in get_field(sysno, "245__a"): + out = "%s %s\n" % (out, encode_for_xml(f)) + + for f in get_field(sysno, "65017a"): + out = "%s %s\n" % (out, encode_for_xml(f)) + + for f in get_field(sysno, "8564_u"): + out = "%s %s\n" % (out, encode_for_xml(encode_for_url(f))) + + for f in get_field(sysno, "520__a"): + out = "%s %s\n" % (out, encode_for_xml(f)) + + date = get_creation_date(sysno) + + out = "%s %s\n" % (out, date) + out = out + " \n" + + # print record closing tags: + + out = out + " \n" + out = out + "
\n" + + return out + +def OAIListMetadataFormats(args): + "Generates response to OAIListMetadataFormats verb." + +
+ + oai_rt_expire = + nb_formats_in_resume = + + + + arg = parse_args(args) + + out = "" + + flag = 1 # list or not depending on identifier + + if arg['identifier'] != "": + + flag = 0 + + sysno = OAIGetSysno(arg['identifier']) + + if record_exists(sysno): + + flag = 1 + + else: + + out = out + oai_error("badArgument","invalid record Identifier") + out = oai_error_header(args, "ListMetadataFormats") + out + oai_error_footer("ListMetadataFormats") + return out + + if flag: + out = out + " \n" + out = out + " oai_dc\n" + out = out + " http://www.openarchives.org/OAI/1.1/dc.xsd\n" + out = out + " http://purl.org/dc/elements/1.1/\n" + out = out + " \n" + out = out + " \n" + out = out + " marcxml\n" + out = out + " http://www.loc.gov/standards/marcxml/schema/MARC21slim.xsd\n" + out = out + " http://www.loc.gov/MARC21/slim\n" + out = out + " \n" + + out = oai_header(args,"ListMetadataFormats") + out + oai_footer("ListMetadataFormats") + return out + + +def OAIListRecords(args): + "Generates response to OAIListRecords verb." + + + oai_rt_expire = + nb_records_in_resume = + + + arg = parse_args(args) + + out = "" + + sysnos = [] + sysno = [] + + # check if the resumptionToken did not expire + if arg['resumptionToken']: + filename = "%s/RTdata/%s" % (logdir, arg['resumptionToken']) + if os.path.exists(filename) == 0: + out = oai_error("badResumptionToken","ResumptionToken expired") + out = oai_error_header(args, "ListRecords") + out + oai_error_footer("ListRecords") + return out + + if arg['resumptionToken'] != "": + sysnos = OAICacheOut(arg['resumptionToken']) + arg['metadataPrefix'] = sysnos.pop() + else: + sysnos = OAIGetSysnoList(arg['set'], arg['from'], arg['until']) + + if len(sysnos) == 0: # noRecordsMatch error + + out = out + oai_error("noRecordsMatch","no records correspond to the request") + out = oai_error_header(args, "ListRecords") + out + oai_error_footer("ListRecords") + return out + + i = 0 + for s in sysnos: + if s: + i = i + 1 + if i > nb_records_in_resume: # cache or write? + if i == nb_records_in_resume + 1: # resumptionToken? + arg['resumptionToken'] = OAIGenResumptionToken() + extdate = OAIGetResponseDate(oai_rt_expire) + if extdate: + out = "%s %s\n" % (out,extdate,arg['resumptionToken']) + else: + out = "%s %s\n" % (out, arg['resumptionToken']) + sysno.append(s) + else: + done = 0 + for f in get_field(s, "245__a"): + if done == 0: + out = out + print_record(s, arg['metadataPrefix']) + + if i > nb_records_in_resume: + OAICacheClean() + sysno.append(arg['metadataPrefix']) + OAICacheIn(arg['resumptionToken'],sysno) + + out = oai_header(args,"ListRecords") + out + oai_footer("ListRecords") + return out + +def OAIListSets(args): + "Lists available sets for OAI metadata harvesting." + + + oai_rt_expire = + nb_sets_in_resume = + + + arg = parse_args(args) + + out = "" + + # note: no flow control in ListSets + + sets = get_sets() + + for s in sets: + + out = out + " \n" + out = "%s %s\n" % (out, s[0]) + out = "%s %s\n" % (out, s[1]) + if s[2]: + out = "%s %s\n" % (out, s[2]) + out = out + " \n" + + out = oai_header(args,"ListSets") + out + oai_footer("ListSets") + + return out + + +def OAIGetRecord(args): + """Returns record 'identifier' according to 'metadataPrefix' format for OAI metadata harvesting.""" + + arg = parse_args(args) + out = "" + sysno = OAIGetSysno(arg['identifier']) + + if record_exists(sysno): + datestamp = get_modification_date(sysno) + out = out + print_record(sysno, arg['metadataPrefix']) + else: + out = out + oai_error("badArgument","invalid record Identifier") + out = oai_error_header(args, "GetRecord") + out + oai_error_footer("GetRecord") + return out + + out = oai_header(args,"GetRecord") + out + oai_footer("GetRecord") + + return out + + +def OAIListIdentifiers(args): + "Prints OAI response to the ListIdentifiers verb." + + + oai_rt_expire = + nb_identifiers_in_resume = + + + arg = parse_args(args) + + out = "" + + sysno = [] + sysnos = [] + + if arg['resumptionToken']: + filename = "%s/RTdata/%s" % (logdir, arg['resumptionToken']) + if os.path.exists(filename) == 0: + out = out + oai_error("badResumptionToken","ResumptionToken expired") + out = oai_error_header(args, "ListIdentifiers") + out + oai_error_footer("ListIdentifiers") + return out + + if arg['resumptionToken']: + sysnos = OAICacheOut(arg['resumptionToken']) + else: + sysnos = OAIGetSysnoList(arg['set'], arg['from'], arg['until']) + + if len(sysnos) == 0: # noRecordsMatch error + out = out + oai_error("noRecordsMatch","no records correspond to the request") + out = oai_error_header(args, "ListIdentifiers") + out + oai_error_footer("ListIdentifiers") + return out + + i = 0 + for s in sysnos: + if s: + i = i + 1 + if i > nb_identifiers_in_resume: # cache or write? + if i == nb_identifiers_in_resume + 1: # resumptionToken? + arg['resumptionToken'] = OAIGenResumptionToken() + extdate = OAIGetResponseDate(oai_rt_expire) + if extdate: + out = "%s %s\n" % (out, extdate,arg['resumptionToken']) + else: + out = "%s %s\n" % (out, arg['resumptionToken']) + sysno.append(s) + else: + done = 0 + for f in get_field(s, "245__a"): + if done == 0: + for id in get_field(s,oaiidfield): + out = out + "
\n" + out = "%s %s\n" % (out, encode_for_url(id)) + out = "%s %s\n" % (out, get_modification_date(OAIGetSysno(id))) + for set in get_field(s,oaisetfield): + out = "%s %s\n" % (out, arg['set']) + out = out + "
\n" + done = 1 + + if i > nb_identifiers_in_resume: + OAICacheClean() # clean cache from expired resumptionTokens + OAICacheIn(arg['resumptionToken'],sysno) + + out = oai_header(args,"ListIdentifiers") + out + oai_footer("ListIdentifiers") + + return out + + +def OAIIdentify(args): + "Generates response to OAIIdentify verb." + + out = "" + + repositoryName = " " + cdsname + "\n" + baseURL = " %s/oai2d.py/\n" % weburl + protocolVersion = " 2.0\n" + adminEmail = " mailto:%s\n" % supportemail + earliestDST = " %s\n" % get_earliest_datestamp() + repositoryIdentifier = "%s" % oaiidprefix + sampleIdentifier = oaisampleidentifier + identifyDescription = oaiidentifydescription + "\n" + + out = out + repositoryName + out = out + baseURL + out = out + protocolVersion + out = out + adminEmail + out = out + earliestDST + out = out + " no\n" + out = out + " YYYY-MM-DDThh:mm:ssZ\n" + # print " \n" + out = out + oaiidentifydescription + + out = oai_header(args,"Identify") + out + oai_footer("Identify") + + return out + + +def OAIGetRequestURL(args): + "Generates requestURL tag for OAI." + + re_amp = re.compile('&') + + requestURL = weburl + "/" + "oai2d.py/"# + "?" + re_amp.sub("&", args) + + return requestURL + +def OAIGetResponseDate(delay=0): + "Generates responseDate tag for OAI." + + return time.strftime("%Y-%m-%dT%H:%M:%SZ", time.gmtime(time.time() + delay)) + + +def oai_error(code, msg): + "OAI error occured" + + return "%s\n" % (code, msg) + + +def OAIGetSysno(identifier): + "Returns the first MySQL BIB ID for the OAI identifier 'identifier', if it exists." + sysno = None + if identifier: + query = "SELECT DISTINCT(bb.id_bibrec) FROM bib90x AS bx, bibrec_bib90x AS bb WHERE bx.tag='%s' AND bb.id_bibxxx=bx.id AND bx.value='%s'" % (oaiidfield,identifier) + res = run_sql(query) + for row in res: + sysno = row[0] + return sysno + + +def OAIGetSysnoList(set, fromDate, untilDate): + "Returns list of system numbers for the OAI set 'set', modified from 'date_from' until 'date_until'." + + out_dict = {} # dict to hold list of out sysnos as its keys + + if set: + query = "SELECT DISTINCT bibx.id_bibrec FROM bib90x AS bx LEFT JOIN bibrec_bib90x AS bibx ON bx.id=bibx.id_bibxxx LEFT JOIN bibrec AS b ON b.id=bibx.id_bibrec WHERE bx.tag='%s' AND bx.value='%s'" % (oaisetfield,set) + else: + query = "SELECT DISTINCT bibx.id_bibrec FROM bib90x AS bx LEFT JOIN bibrec_bib90x AS bibx ON bx.id=bibx.id_bibxxx LEFT JOIN bibrec AS b ON b.id=bibx.id_bibrec WHERE bx.tag='%s'" % (oaiidfield) + + if untilDate: + query = query + " AND b.modification_date <= '%s'" % untilDate + if fromDate: + query = query + " AND b.modification_date >= '%s'" % fromDate + + res = run_sql(query) + + for row in res: + out_dict[row[0]] = 1 + + return out_dict.keys() + + +def OAIGenResumptionToken(): + "Generates unique ID for resumption token management." + + return md5.new(str(time.time())).hexdigest() + + +def OAICacheIn(resumptionToken, sysnos): + "Stores or adds sysnos in cache. Input is a string of sysnos separated by commas." + + filename = "%s/RTdata/%s" % (logdir, resumptionToken) + + fil = open(filename,"w") + cPickle.dump(sysnos,fil) + fil.close() + return 1 + + +def OAICacheOut(resumptionToken): + "Restores string of comma-separated system numbers from cache." + + sysnos = [] + + filename = "%s/RTdata/%s" % (logdir, resumptionToken) + + if OAICacheStatus(resumptionToken): + fil = open(filename,"r") + sysnos = cPickle.load(fil) + fil.close() + else: + return 0 + return sysnos + + +def OAICacheClean(): + "Removes cached resumptionTokens older than specified" + +
+ oai_rt_expire = + + + directory = "%s/RTdata" % logdir + + files = os.listdir(directory) + + for f in files: + filename = directory + "/" + f + # cache entry expires when not modified during a specified period of time + if ((time.time() - os.path.getmtime(filename)) > oai_rt_expire): + os.remove(filename) + + return 1 + + +def OAICacheStatus(resumptionToken): + "Checks cache status. Returns 0 for empty, 1 for full." + + filename = "%s/RTdata/%s" % (logdir, resumptionToken) + + if os.path.exists(filename): + if os.path.getsize(filename) > 0: + return 1 + else: + return 0 + else: + return 0 + + +def get_sets(): + "Returns list of sets." + + out = [] + row = ['',''] + + query = "SELECT setSpec,setName,setDescription FROM oaiset" + res = run_sql (query) + for row in res: + row_bis = [row[0],row[1],row[2]] + out.append(row_bis) + + return out + + +def parse_args(args=""): + "Parse input args" + + out_args = { + "verb" : "", + "metadataPrefix" : "", + "from" : "", + "until" : "", + "set" : "", + "identifier" : "", + "resumptionToken" : "" + } + + if args == "" or args == None: + pass + else: + + list_of_arguments = args.split('&') + + for item in list_of_arguments: + keyvalue = item.split('=') + if len(keyvalue) == 2: + out_args[keyvalue[0]] = keyvalue[1] + else: + out_args['verb'] = "" + + return out_args + +def check_args(arguments): + "Check OAI arguments" + + out = "" + +## principal argument required +# +# + if verbs.has_key(arguments['verb']): + pass + else: + out = out + oai_error("badArgument","Malformed request") + +## resumptionToken exclusive +# +# + if ((arguments['from']!="" or arguments['until']!="" or arguments['metadataPrefix']!="" or arguments['identifier']!="" or arguments['set']!="") and arguments['resumptionToken']!=""): + + out = out + oai_error("badArgument","The request includes illegal arguments") + +## datestamp formats +# +# + if arguments['from']!="" and arguments['from']!="": + from_length = len(arguments['from']) + if check_date(arguments['from'],"T00:00:00Z") == "": + out = out + oai_error("badArgument","Bad datestamp format in from") + else: + from_length = 0 + + if arguments['until']!="" and arguments['until']!="": + until_length = len(arguments['until']) + if check_date(arguments['until'],"T23:59:59Z") == "": + out = out + oai_error("badArgument","Bad datestamp format in until") + else: + until_length = 0 + + if from_length <> 0: + if until_length <> 0: + if from_length <> until_length: + out = out + oai_error("badArgument","Bad datestamp format") + + if arguments['from'] > arguments['until']: + out = out + oai_error("badArgument", "Wrong date") + + +## Identify exclusive +# +# + if (arguments['verb']=="Identify" and (arguments['metadataPrefix']!="" or arguments['identifier']!="" or arguments['set']!="" or arguments['from']!="" or arguments['until']!="" or arguments['resumptionToken']!="")): + out = out + oai_error("badArgument","The request includes illegal arguments") + +## parameters for GetRecord +# +# + if arguments['verb']=="GetRecord" and arguments['identifier'] == "": + out = out + oai_error("badArgument","Record identifier missing") + + if arguments['verb']=="GetRecord" and arguments['metadataPrefix'] == "": + out = out + oai_error("badArgument","Missing metadataPrefix") + + +## parameters for ListRecords and ListIdentifiers +# +# + if (arguments['verb']=="ListRecords" or arguments['verb']=="ListIdentifiers") and (arguments['metadataPrefix'] == "" and arguments['resumptionToken'] == ""): + out = out + oai_error("badArgument","Missing metadataPrefix") + + return out + + \ No newline at end of file diff --git a/modules/bibharvest/lib/oai_repository.py.wml b/modules/bibharvest/lib/oai_repository.py.wml new file mode 100644 index 000000000..1c28a236e --- /dev/null +++ b/modules/bibharvest/lib/oai_repository.py.wml @@ -0,0 +1,862 @@ +## $Id$ +## OAI interface for CDSware/MySQL written in Python compliant with OAI-PMH2.0 + +## This file is part of the CERN Document Server Software (CDSware). +## Copyright (C) 2002 CERN. +## +## The CDSware is free software; you can redistribute it and/or +## modify it under the terms of the GNU General Public License as +## published by the Free Software Foundation; either version 2 of the +## License, or (at your option) any later version. +## +## The CDSware is distributed in the hope that it will be useful, but +## WITHOUT ANY WARRANTY; without even the implied warranty of +## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +## General Public License for more details. +## +## You should have received a copy of the GNU General Public License +## along with CDSware; if not, write to the Free Software Foundation, Inc., +## 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA. + +## read config variables: +#include "config.wml" +#include "configbis.wml" + +## start Python: +#! +## $Id$ +## DO NOT EDIT THIS FILE! IT WAS AUTOMATICALLY GENERATED FROM CDSware WML SOURCES. +"""OAI interface for CDSware/MySQL written in Python compliant with OAI-PMH2.0""" + +## fill config variables: +pylibdir = "/python" + + +## OAI config variables +oaiidprefix = "" +oaisampleidentifier = "" +oaiidentifydescription = """""" +oaiidfield = "909COo" +oaisetfield = "909COp" + + +try: + import cPickle + import string + from string import split + import os + import re + import sys + import time + import md5 + + sys.path.append('%s' % pylibdir) + from config import * + from dbquery import run_sql + +except ImportError, e: + import sys + sys.stderr.write("Error: %s" % e) + sys.exit(1) + +verbs = { + "Identify" : [""], + "ListSets" : ["resumptionToken"], + "ListMetadataFormats" : ["resumptionToken"], + "ListRecords" : ["resumptionToken"], + "ListIdentifiers" : ["resumptionToken"], + "GetRecord" : [""] +} + + + +def encode_for_xml(s): + "Encode special chars in string for XML-compliancy." + + s = string.replace(s, '&', '&') + s = string.replace(s, '<', '<') + return s + +def encode_for_url(s): + "Encode special chars in string for URL-compliancy." + + s = string.replace(s, ' ', '%20') + s = string.replace(s, '?', '%3F') + s = string.replace(s, '#', '%23') + s = string.replace(s, '=', '%3D') + s = string.replace(s, '&', '%26') + s = string.replace(s, '%', '%25') + + return s + +def oai_header(args, verb): + "Print OAI header" + + out = "" + + out = out + "" + "\n" + out = out + "\n" + + out = out + " " + OAIGetResponseDate() + "\n" + + if verb: + out = out + " %s\n" % (verb,OAIGetRequestURL(args)) + out = out + " <%s>\n" % verb + else: + out = out + " %s\n" % (OAIGetRequestURL(args)) + + return out + +def oai_footer(verb): + "Print OAI footer" + + out = "" + + if verb: + out = "%s \n" % (out, verb) + out = out + "\n" + + return out + +def oai_error_header(args, verb): + "Print OAI header" + + out = "" + +### out = "Content-Type: text/xml\n\n" + out = out + "" + "\n" + out = out + "\n" + + out = out + " " + OAIGetResponseDate() + "\n" + out = out + " %s\n" % (verb,OAIGetRequestURL(args)) + + return out + +def oai_error_footer(verb): + "Print OAI footer" + + out = "" + out = out + "\n" + + return out + +def get_field(sysno, field): + "Gets list of field 'field' for the record with 'sysno' system number." + + out = [] + digit = field[0:2] + + bx = "bib%sx" % digit + bibx = "bibrec_bib%sx" % digit + query = "SELECT bx.value FROM %s AS bx, %s AS bibx WHERE bibx.id_bibrec='%s' AND bx.id=bibx.id_bibxxx AND bx.tag='%s'" % (bx, bibx, sysno, field) + + res = run_sql(query) + + for row in res: + + out.append(row[0]) + + return out + +def UTC_to_localtime(date): + "Convert UTC to localtime" + + ldate = date.split("T")[0] + ltime = date.split("T")[1] + + lhour = ltime.split(":")[0] + lminute = ltime.split(":")[1] + lsec = ltime.split(":")[2] + + lyear = ldate.split("-")[0] + lmonth = ldate.split("-")[1] + lday = ldate.split("-")[2] + + timetoconvert = time.strftime("%Y-%m-%d %H:%M:%S",time.localtime(time.mktime((string.atoi(lyear),string.atoi(lmonth),string.atoi(lday),string.atoi(lhour),string.atoi(lminute),string.atoi(lsec[:-1]),0,0,-1)) - time.timezone + (time.daylight)*3600)) + + return timetoconvert + +def localtime_to_UTC(date): + "Convert localtime to UTC" + + ldate = date.split(" ")[0] + ltime = date.split(" ")[1] + + lhour = ltime.split(":")[0] + lminute = ltime.split(":")[1] + lsec = ltime.split(":")[2] + + lyear = ldate.split("-")[0] + lmonth = ldate.split("-")[1] + lday = ldate.split("-")[2] + + timetoconvert = time.strftime("%Y-%m-%dT%H:%M:%SZ",time.gmtime(time.mktime((string.atoi(lyear),string.atoi(lmonth),string.atoi(lday),string.atoi(lhour),string.atoi(lminute),string.atoi(lsec),0,0,-1)))) + + return timetoconvert + +def get_creation_date(sysno): + "Returns the creation date of the record 'sysno'." + + out = "" + + query = "SELECT DATE_FORMAT(creation_date,'%%Y-%%m-%%d %%H:%%i:%%s') FROM bibrec WHERE id='%s'" % (sysno) + + res = run_sql(query) + for row in res: + out = row[0] + return localtime_to_UTC(out) + +def get_modification_date(sysno): + "Returns the date of last modification for the record 'sysno'." + + out = "" + + query = "SELECT DATE_FORMAT(modification_date,'%%Y-%%m-%%d %%H:%%i:%%s') FROM bibrec WHERE id='%s'" % (sysno) + + res = run_sql(query) + for row in res: + out = row[0] + return localtime_to_UTC(out) + +def get_earliest_datestamp(): + "Get earliest datestamp in the database" + + out = "" + + query = "SELECT MIN(DATE_FORMAT(creation_date,'%Y-%m-%d %H:%i:%s')) FROM bibrec" + + res = run_sql(query) + return localtime_to_UTC(res[0][0]) + +def check_date(date, time="T00:00:00Z"): + "Check if the date has a correct format" + + if(re.sub("[0123456789\-:TZ]","",date) == ""): + if len(date) == 10: + date = date + time + if len(date) == 20: + date = UTC_to_localtime(date) + else: + date = "" + else: + date = "" + + return date + +def record_exists(sysno): + "Returns 1 if record with SYSNO 'sysno' exists. Returns 0 otherwise." + + out = 0 + query = "SELECT id FROM bibrec WHERE id='%s'" % (sysno) + + res = run_sql(query) + + for row in res: + if row[0] != "": + out = 1 + + return out + +def print_record(sysno, format='marcxml'): + "Prints record 'sysno' formatted accoding to 'format'." + + out = "" + + # sanity check: + if not record_exists(sysno): + return + + if (format == "dc") or (format == "oai_dc"): + format = "xd" + + # print record opening tags: + + out = out + " \n" + out = out + "
\n" + for id in get_field(sysno,oaiidfield): + out = "%s %s\n" % (out, encode_for_url(id)) + out = "%s %s\n" % (out, get_modification_date(sysno)) + for set in get_field(sysno,oaisetfield): + out = "%s %s\n" % (out, set) + out = out + "
\n" + out = out + " \n" + + if format == "marcxml": + out = out + " " + out = out + " 00000coc 2200000uu 4500" + ## MARC21 and XML formats, possibley OAI -- they are not in "bibfmt" table; so fetch all the data from "bibXXx" tables: + + if format == "marcxml": + + out = "%s %d\n" % (out, int(sysno)) + + for digit1 in range(0,10): + for digit2 in range(0,10): + bx = "bib%d%dx" % (digit1, digit2) + bibx = "bibrec_bib%d%dx" % (digit1, digit2) + query = "SELECT b.tag,b.value,bb.field_number FROM %s AS b, %s AS bb "\ + "WHERE bb.id_bibrec='%s' AND b.id=bb.id_bibxxx AND b.tag LIKE '%s%%' "\ + "ORDER BY bb.field_number, b.tag ASC" % (bx, bibx, sysno, str(digit1)+str(digit2)) + res = run_sql(query) + field_number_old = -999 + field_old = "" + for row in res: + field, value, field_number = row[0], row[1], row[2] + ind1, ind2 = field[3], field[4] + if ind1 == "_": + ind1 = " " + if ind2 == "_": + ind2 = " " + # print field tag + if field_number != field_number_old or field[:-1] != field_old[:-1]: + if format == "marcxml": + + fieldid = encode_for_xml(field[0:3]) + + if field_number_old != -999: + out = out + " \n" + + out = "%s \n" % (out, encode_for_xml(field[0:3]), encode_for_xml(ind1).lower(), encode_for_xml(ind2).lower()) + + field_number_old = field_number + field_old = field + # print subfield value + if format == "marcxml": + value = encode_for_xml(value) + out = "%s %s\n" % (out, encode_for_xml(field[-1:]), value) + + # fetch next subfield + # all fields/subfields printed in this run, so close the tag: + if (format == "marcxml") and field_number_old != -999: + out = out + " \n" + out = out + " \n" + + elif format == "xd": + # XML Dublin Core format, possibly OAI -- select only some bibXXx fields: + out = out + " \n" + + for f in get_field(sysno, "041__a"): + out = "%s %s\n" % (out, f) + + for f in get_field(sysno, "100__a"): + out = "%s %s\n" % (out, encode_for_xml(f)) + + for f in get_field(sysno, "700__a"): + out = "%s %s\n" % (out, encode_for_xml(f)) + + for f in get_field(sysno, "245__a"): + out = "%s %s\n" % (out, encode_for_xml(f)) + + for f in get_field(sysno, "65017a"): + out = "%s %s\n" % (out, encode_for_xml(f)) + + for f in get_field(sysno, "8564_u"): + out = "%s %s\n" % (out, encode_for_xml(encode_for_url(f))) + + for f in get_field(sysno, "520__a"): + out = "%s %s\n" % (out, encode_for_xml(f)) + + date = get_creation_date(sysno) + + out = "%s %s\n" % (out, date) + out = out + " \n" + + # print record closing tags: + + out = out + " \n" + out = out + "
\n" + + return out + +def OAIListMetadataFormats(args): + "Generates response to OAIListMetadataFormats verb." + +
+ + oai_rt_expire = + nb_formats_in_resume = + + + + arg = parse_args(args) + + out = "" + + flag = 1 # list or not depending on identifier + + if arg['identifier'] != "": + + flag = 0 + + sysno = OAIGetSysno(arg['identifier']) + + if record_exists(sysno): + + flag = 1 + + else: + + out = out + oai_error("badArgument","invalid record Identifier") + out = oai_error_header(args, "ListMetadataFormats") + out + oai_error_footer("ListMetadataFormats") + return out + + if flag: + out = out + " \n" + out = out + " oai_dc\n" + out = out + " http://www.openarchives.org/OAI/1.1/dc.xsd\n" + out = out + " http://purl.org/dc/elements/1.1/\n" + out = out + " \n" + out = out + " \n" + out = out + " marcxml\n" + out = out + " http://www.loc.gov/standards/marcxml/schema/MARC21slim.xsd\n" + out = out + " http://www.loc.gov/MARC21/slim\n" + out = out + " \n" + + out = oai_header(args,"ListMetadataFormats") + out + oai_footer("ListMetadataFormats") + return out + + +def OAIListRecords(args): + "Generates response to OAIListRecords verb." + + + oai_rt_expire = + nb_records_in_resume = + + + arg = parse_args(args) + + out = "" + + sysnos = [] + sysno = [] + + # check if the resumptionToken did not expire + if arg['resumptionToken']: + filename = "%s/RTdata/%s" % (logdir, arg['resumptionToken']) + if os.path.exists(filename) == 0: + out = oai_error("badResumptionToken","ResumptionToken expired") + out = oai_error_header(args, "ListRecords") + out + oai_error_footer("ListRecords") + return out + + if arg['resumptionToken'] != "": + sysnos = OAICacheOut(arg['resumptionToken']) + arg['metadataPrefix'] = sysnos.pop() + else: + sysnos = OAIGetSysnoList(arg['set'], arg['from'], arg['until']) + + if len(sysnos) == 0: # noRecordsMatch error + + out = out + oai_error("noRecordsMatch","no records correspond to the request") + out = oai_error_header(args, "ListRecords") + out + oai_error_footer("ListRecords") + return out + + i = 0 + for s in sysnos: + if s: + i = i + 1 + if i > nb_records_in_resume: # cache or write? + if i == nb_records_in_resume + 1: # resumptionToken? + arg['resumptionToken'] = OAIGenResumptionToken() + extdate = OAIGetResponseDate(oai_rt_expire) + if extdate: + out = "%s %s\n" % (out,extdate,arg['resumptionToken']) + else: + out = "%s %s\n" % (out, arg['resumptionToken']) + sysno.append(s) + else: + done = 0 + for f in get_field(s, "245__a"): + if done == 0: + out = out + print_record(s, arg['metadataPrefix']) + + if i > nb_records_in_resume: + OAICacheClean() + sysno.append(arg['metadataPrefix']) + OAICacheIn(arg['resumptionToken'],sysno) + + out = oai_header(args,"ListRecords") + out + oai_footer("ListRecords") + return out + +def OAIListSets(args): + "Lists available sets for OAI metadata harvesting." + + + oai_rt_expire = + nb_sets_in_resume = + + + arg = parse_args(args) + + out = "" + + # note: no flow control in ListSets + + sets = get_sets() + + for s in sets: + + out = out + " \n" + out = "%s %s\n" % (out, s[0]) + out = "%s %s\n" % (out, s[1]) + if s[2]: + out = "%s %s\n" % (out, s[2]) + out = out + " \n" + + out = oai_header(args,"ListSets") + out + oai_footer("ListSets") + + return out + + +def OAIGetRecord(args): + """Returns record 'identifier' according to 'metadataPrefix' format for OAI metadata harvesting.""" + + arg = parse_args(args) + out = "" + sysno = OAIGetSysno(arg['identifier']) + + if record_exists(sysno): + datestamp = get_modification_date(sysno) + out = out + print_record(sysno, arg['metadataPrefix']) + else: + out = out + oai_error("badArgument","invalid record Identifier") + out = oai_error_header(args, "GetRecord") + out + oai_error_footer("GetRecord") + return out + + out = oai_header(args,"GetRecord") + out + oai_footer("GetRecord") + + return out + + +def OAIListIdentifiers(args): + "Prints OAI response to the ListIdentifiers verb." + + + oai_rt_expire = + nb_identifiers_in_resume = + + + arg = parse_args(args) + + out = "" + + sysno = [] + sysnos = [] + + if arg['resumptionToken']: + filename = "%s/RTdata/%s" % (logdir, arg['resumptionToken']) + if os.path.exists(filename) == 0: + out = out + oai_error("badResumptionToken","ResumptionToken expired") + out = oai_error_header(args, "ListIdentifiers") + out + oai_error_footer("ListIdentifiers") + return out + + if arg['resumptionToken']: + sysnos = OAICacheOut(arg['resumptionToken']) + else: + sysnos = OAIGetSysnoList(arg['set'], arg['from'], arg['until']) + + if len(sysnos) == 0: # noRecordsMatch error + out = out + oai_error("noRecordsMatch","no records correspond to the request") + out = oai_error_header(args, "ListIdentifiers") + out + oai_error_footer("ListIdentifiers") + return out + + i = 0 + for s in sysnos: + if s: + i = i + 1 + if i > nb_identifiers_in_resume: # cache or write? + if i == nb_identifiers_in_resume + 1: # resumptionToken? + arg['resumptionToken'] = OAIGenResumptionToken() + extdate = OAIGetResponseDate(oai_rt_expire) + if extdate: + out = "%s %s\n" % (out, extdate,arg['resumptionToken']) + else: + out = "%s %s\n" % (out, arg['resumptionToken']) + sysno.append(s) + else: + done = 0 + for f in get_field(s, "245__a"): + if done == 0: + for id in get_field(s,oaiidfield): + out = out + "
\n" + out = "%s %s\n" % (out, encode_for_url(id)) + out = "%s %s\n" % (out, get_modification_date(OAIGetSysno(id))) + for set in get_field(s,oaisetfield): + out = "%s %s\n" % (out, arg['set']) + out = out + "
\n" + done = 1 + + if i > nb_identifiers_in_resume: + OAICacheClean() # clean cache from expired resumptionTokens + OAICacheIn(arg['resumptionToken'],sysno) + + out = oai_header(args,"ListIdentifiers") + out + oai_footer("ListIdentifiers") + + return out + + +def OAIIdentify(args): + "Generates response to OAIIdentify verb." + + out = "" + + repositoryName = " " + cdsname + "\n" + baseURL = " %s/oai2d.py/\n" % weburl + protocolVersion = " 2.0\n" + adminEmail = " mailto:%s\n" % supportemail + earliestDST = " %s\n" % get_earliest_datestamp() + repositoryIdentifier = "%s" % oaiidprefix + sampleIdentifier = oaisampleidentifier + identifyDescription = oaiidentifydescription + "\n" + + out = out + repositoryName + out = out + baseURL + out = out + protocolVersion + out = out + adminEmail + out = out + earliestDST + out = out + " no\n" + out = out + " YYYY-MM-DDThh:mm:ssZ\n" + # print " \n" + out = out + oaiidentifydescription + + out = oai_header(args,"Identify") + out + oai_footer("Identify") + + return out + + +def OAIGetRequestURL(args): + "Generates requestURL tag for OAI." + + re_amp = re.compile('&') + + requestURL = weburl + "/" + "oai2d.py/"# + "?" + re_amp.sub("&", args) + + return requestURL + +def OAIGetResponseDate(delay=0): + "Generates responseDate tag for OAI." + + return time.strftime("%Y-%m-%dT%H:%M:%SZ", time.gmtime(time.time() + delay)) + + +def oai_error(code, msg): + "OAI error occured" + + return "%s\n" % (code, msg) + + +def OAIGetSysno(identifier): + "Returns the first MySQL BIB ID for the OAI identifier 'identifier', if it exists." + sysno = None + if identifier: + query = "SELECT DISTINCT(bb.id_bibrec) FROM bib90x AS bx, bibrec_bib90x AS bb WHERE bx.tag='%s' AND bb.id_bibxxx=bx.id AND bx.value='%s'" % (oaiidfield,identifier) + res = run_sql(query) + for row in res: + sysno = row[0] + return sysno + + +def OAIGetSysnoList(set, fromDate, untilDate): + "Returns list of system numbers for the OAI set 'set', modified from 'date_from' until 'date_until'." + + out_dict = {} # dict to hold list of out sysnos as its keys + + if set: + query = "SELECT DISTINCT bibx.id_bibrec FROM bib90x AS bx LEFT JOIN bibrec_bib90x AS bibx ON bx.id=bibx.id_bibxxx LEFT JOIN bibrec AS b ON b.id=bibx.id_bibrec WHERE bx.tag='%s' AND bx.value='%s'" % (oaisetfield,set) + else: + query = "SELECT DISTINCT bibx.id_bibrec FROM bib90x AS bx LEFT JOIN bibrec_bib90x AS bibx ON bx.id=bibx.id_bibxxx LEFT JOIN bibrec AS b ON b.id=bibx.id_bibrec WHERE bx.tag='%s'" % (oaiidfield) + + if untilDate: + query = query + " AND b.modification_date <= '%s'" % untilDate + if fromDate: + query = query + " AND b.modification_date >= '%s'" % fromDate + + res = run_sql(query) + + for row in res: + out_dict[row[0]] = 1 + + return out_dict.keys() + + +def OAIGenResumptionToken(): + "Generates unique ID for resumption token management." + + return md5.new(str(time.time())).hexdigest() + + +def OAICacheIn(resumptionToken, sysnos): + "Stores or adds sysnos in cache. Input is a string of sysnos separated by commas." + + filename = "%s/RTdata/%s" % (logdir, resumptionToken) + + fil = open(filename,"w") + cPickle.dump(sysnos,fil) + fil.close() + return 1 + + +def OAICacheOut(resumptionToken): + "Restores string of comma-separated system numbers from cache." + + sysnos = [] + + filename = "%s/RTdata/%s" % (logdir, resumptionToken) + + if OAICacheStatus(resumptionToken): + fil = open(filename,"r") + sysnos = cPickle.load(fil) + fil.close() + else: + return 0 + return sysnos + + +def OAICacheClean(): + "Removes cached resumptionTokens older than specified" + +
+ oai_rt_expire = + + + directory = "%s/RTdata" % logdir + + files = os.listdir(directory) + + for f in files: + filename = directory + "/" + f + # cache entry expires when not modified during a specified period of time + if ((time.time() - os.path.getmtime(filename)) > oai_rt_expire): + os.remove(filename) + + return 1 + + +def OAICacheStatus(resumptionToken): + "Checks cache status. Returns 0 for empty, 1 for full." + + filename = "%s/RTdata/%s" % (logdir, resumptionToken) + + if os.path.exists(filename): + if os.path.getsize(filename) > 0: + return 1 + else: + return 0 + else: + return 0 + + +def get_sets(): + "Returns list of sets." + + out = [] + row = ['',''] + + query = "SELECT setSpec,setName,setDescription FROM oaiset" + res = run_sql (query) + for row in res: + row_bis = [row[0],row[1],row[2]] + out.append(row_bis) + + return out + + +def parse_args(args=""): + "Parse input args" + + out_args = { + "verb" : "", + "metadataPrefix" : "", + "from" : "", + "until" : "", + "set" : "", + "identifier" : "", + "resumptionToken" : "" + } + + if args == "" or args == None: + pass + else: + + list_of_arguments = args.split('&') + + for item in list_of_arguments: + keyvalue = item.split('=') + if len(keyvalue) == 2: + out_args[keyvalue[0]] = keyvalue[1] + else: + out_args['verb'] = "" + + return out_args + +def check_args(arguments): + "Check OAI arguments" + + out = "" + +## principal argument required +# +# + if verbs.has_key(arguments['verb']): + pass + else: + out = out + oai_error("badArgument","Malformed request") + +## resumptionToken exclusive +# +# + if ((arguments['from']!="" or arguments['until']!="" or arguments['metadataPrefix']!="" or arguments['identifier']!="" or arguments['set']!="") and arguments['resumptionToken']!=""): + + out = out + oai_error("badArgument","The request includes illegal arguments") + +## datestamp formats +# +# + if arguments['from']!="" and arguments['from']!="": + from_length = len(arguments['from']) + if check_date(arguments['from'],"T00:00:00Z") == "": + out = out + oai_error("badArgument","Bad datestamp format in from") + else: + from_length = 0 + + if arguments['until']!="" and arguments['until']!="": + until_length = len(arguments['until']) + if check_date(arguments['until'],"T23:59:59Z") == "": + out = out + oai_error("badArgument","Bad datestamp format in until") + else: + until_length = 0 + + if from_length <> 0: + if until_length <> 0: + if from_length <> until_length: + out = out + oai_error("badArgument","Bad datestamp format") + + if arguments['from'] > arguments['until']: + out = out + oai_error("badArgument", "Wrong date") + + +## Identify exclusive +# +# + if (arguments['verb']=="Identify" and (arguments['metadataPrefix']!="" or arguments['identifier']!="" or arguments['set']!="" or arguments['from']!="" or arguments['until']!="" or arguments['resumptionToken']!="")): + out = out + oai_error("badArgument","The request includes illegal arguments") + +## parameters for GetRecord +# +# + if arguments['verb']=="GetRecord" and arguments['identifier'] == "": + out = out + oai_error("badArgument","Record identifier missing") + + if arguments['verb']=="GetRecord" and arguments['metadataPrefix'] == "": + out = out + oai_error("badArgument","Missing metadataPrefix") + + +## parameters for ListRecords and ListIdentifiers +# +# + if (arguments['verb']=="ListRecords" or arguments['verb']=="ListIdentifiers") and (arguments['metadataPrefix'] == "" and arguments['resumptionToken'] == ""): + out = out + oai_error("badArgument","Missing metadataPrefix") + + return out + + \ No newline at end of file diff --git a/modules/bibharvest/web/.cvsignore b/modules/bibharvest/web/.cvsignore new file mode 100644 index 000000000..4e66d5c3e --- /dev/null +++ b/modules/bibharvest/web/.cvsignore @@ -0,0 +1,7 @@ +Makefile +Makefile.in +z_* +*.O +*~ +*.py +*.shtml \ No newline at end of file diff --git a/modules/bibharvest/Makefile.am b/modules/bibharvest/web/Makefile.am similarity index 71% copy from modules/bibharvest/Makefile.am copy to modules/bibharvest/web/Makefile.am index 47ad17ba4..d015c43e5 100644 --- a/modules/bibharvest/Makefile.am +++ b/modules/bibharvest/web/Makefile.am @@ -1,22 +1,33 @@ ## $Id$ ## This file is part of the CERN Document Server Software (CDSware). ## Copyright (C) 2002 CERN. ## ## The CDSware is free software; you can redistribute it and/or ## modify it under the terms of the GNU General Public License as ## published by the Free Software Foundation; either version 2 of the ## License, or (at your option) any later version. ## ## The CDSware is distributed in the hope that it will be useful, but ## WITHOUT ANY WARRANTY; without even the implied warranty of ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ## General Public License for more details. ## ## You should have received a copy of the GNU General Public License ## along with CDSware; if not, write to the Free Software Foundation, Inc., ## 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA. -SUBDIRS = bin +webappdir = $(WEBDIR) -CLEANFILES = *~ \ No newline at end of file +webapp_DATA=oai2d.py + +EXTRA_DIST = $(wildcard *.wml) + +CLEANFILES = $(bin_SCRIPTS) $(webapp_DATA) *~ *.tmp search.pyc + +%.py: %.py.wml ../../../config/config.wml ../../../config/configbis.wml + $(WML) -o $@ $< + +%: %.wml ../../../config/config.wml ../../../config/configbis.wml + $(WML) -o $@ $< + chmod u+x $@ diff --git a/modules/bibharvest/web/oai2d.py b/modules/bibharvest/web/oai2d.py new file mode 100644 index 000000000..1213249b4 --- /dev/null +++ b/modules/bibharvest/web/oai2d.py @@ -0,0 +1,138 @@ +## $Id$ +## OAI interface for CDSware/MySQL written in Python compliant with OAI-PMH2.0 + +## This file is part of the CERN Document Server Software (CDSware). +## Copyright (C) 2002 CERN. +## +## The CDSware is free software; you can redistribute it and/or +## modify it under the terms of the GNU General Public License as +## published by the Free Software Foundation; either version 2 of the +## License, or (at your option) any later version. +## +## The CDSware is distributed in the hope that it will be useful, but +## WITHOUT ANY WARRANTY; without even the implied warranty of +## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +## General Public License for more details. +## +## You should have received a copy of the GNU General Public License +## along with CDSware; if not, write to the Free Software Foundation, Inc., +## 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA. + +## read config variables: +#include "config.wml" +#include "configbis.wml" + +#! +## $Id$ +## DO NOT EDIT THIS FILE! IT WAS AUTOMATICALLY GENERATED FROM CDSware WML SOURCES. +"""OAI interface for CDSware/MySQL written in Python compliant with OAI-PMH2.0""" + +__lastupdated__ = """<: print `date +"%d %b %Y %H:%M:%S %Z"`; :>""" +__version__ = "$Id$" + + +## fill config variables: +pylibdir = "/python" + +## OAI config variables +oaiidprefix = "" +oaisampleidentifier = "" +oaiidentifydescription = """""" +oaiidfield = "909COo" +oaisetfield = "909COp" + +try: + import sys + import urllib + sys.path.append('%s' % pylibdir) + from cdsware.config import * + from cdsware.dbquery import run_sql + from cdsware import oai_repository +except ImportError, e: + import sys + sys.stderr.write("Error: %s" % e) + sys.exit(1) + + +def index (req): + "OAI repository interface" + + +## parse input parameters + + args = "" + + if req.method == "GET": + args = req.args + + elif req.method == "POST": + params = {} + for key in req.form.keys(): + params[key] = req.form[key] + args = urllib.urlencode(params) + + arg = oai_repository.parse_args(args) + + +## check request for OAI compliancy + + oai_error = oai_repository.check_args(arg) + + +## create OAI response + + req.content_type = "text/xml" + req.send_http_header() + + if oai_error == "": + +## OAI Identify + + if arg['verb'] == "Identify": + req.write(oai_repository.OAIIdentify(args)) + + +## OAI ListSets + + elif arg['verb'] == "ListSets": + req.write(oai_repository.OAIListSets(args)) + + +## OAI ListIdentifiers + + elif arg['verb'] == "ListIdentifiers": + req.write(oai_repository.OAIListIdentifiers(args)) + + +## OAI ListRecords + + elif arg['verb'] == "ListRecords": + req.write(oai_repository.OAIListRecords(args)) + + +## OAI GetRecord + + elif arg['verb'] == "GetRecord": + req.write(oai_repository.OAIGetRecord(args)) + + +## OAI ListMetadataFormats + + elif arg['verb'] == "ListMetadataFormats": + req.write(oai_repository.OAIListMetadataFormats(args)) + + +## Unknown verb + + else: + req.write(oai_repository.oai_error("badVerb","Illegal OAI verb")) + + +## OAI error + + else: + req.write(oai_repository.oai_header(args,"")) + req.write(oai_error) + req.write(oai_repository.oai_footer("")) + + return "\n" diff --git a/modules/bibharvest/web/oai2d.py.wml b/modules/bibharvest/web/oai2d.py.wml new file mode 100644 index 000000000..1213249b4 --- /dev/null +++ b/modules/bibharvest/web/oai2d.py.wml @@ -0,0 +1,138 @@ +## $Id$ +## OAI interface for CDSware/MySQL written in Python compliant with OAI-PMH2.0 + +## This file is part of the CERN Document Server Software (CDSware). +## Copyright (C) 2002 CERN. +## +## The CDSware is free software; you can redistribute it and/or +## modify it under the terms of the GNU General Public License as +## published by the Free Software Foundation; either version 2 of the +## License, or (at your option) any later version. +## +## The CDSware is distributed in the hope that it will be useful, but +## WITHOUT ANY WARRANTY; without even the implied warranty of +## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +## General Public License for more details. +## +## You should have received a copy of the GNU General Public License +## along with CDSware; if not, write to the Free Software Foundation, Inc., +## 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA. + +## read config variables: +#include "config.wml" +#include "configbis.wml" + +#! +## $Id$ +## DO NOT EDIT THIS FILE! IT WAS AUTOMATICALLY GENERATED FROM CDSware WML SOURCES. +"""OAI interface for CDSware/MySQL written in Python compliant with OAI-PMH2.0""" + +__lastupdated__ = """<: print `date +"%d %b %Y %H:%M:%S %Z"`; :>""" +__version__ = "$Id$" + + +## fill config variables: +pylibdir = "/python" + +## OAI config variables +oaiidprefix = "" +oaisampleidentifier = "" +oaiidentifydescription = """""" +oaiidfield = "909COo" +oaisetfield = "909COp" + +try: + import sys + import urllib + sys.path.append('%s' % pylibdir) + from cdsware.config import * + from cdsware.dbquery import run_sql + from cdsware import oai_repository +except ImportError, e: + import sys + sys.stderr.write("Error: %s" % e) + sys.exit(1) + + +def index (req): + "OAI repository interface" + + +## parse input parameters + + args = "" + + if req.method == "GET": + args = req.args + + elif req.method == "POST": + params = {} + for key in req.form.keys(): + params[key] = req.form[key] + args = urllib.urlencode(params) + + arg = oai_repository.parse_args(args) + + +## check request for OAI compliancy + + oai_error = oai_repository.check_args(arg) + + +## create OAI response + + req.content_type = "text/xml" + req.send_http_header() + + if oai_error == "": + +## OAI Identify + + if arg['verb'] == "Identify": + req.write(oai_repository.OAIIdentify(args)) + + +## OAI ListSets + + elif arg['verb'] == "ListSets": + req.write(oai_repository.OAIListSets(args)) + + +## OAI ListIdentifiers + + elif arg['verb'] == "ListIdentifiers": + req.write(oai_repository.OAIListIdentifiers(args)) + + +## OAI ListRecords + + elif arg['verb'] == "ListRecords": + req.write(oai_repository.OAIListRecords(args)) + + +## OAI GetRecord + + elif arg['verb'] == "GetRecord": + req.write(oai_repository.OAIGetRecord(args)) + + +## OAI ListMetadataFormats + + elif arg['verb'] == "ListMetadataFormats": + req.write(oai_repository.OAIListMetadataFormats(args)) + + +## Unknown verb + + else: + req.write(oai_repository.oai_error("badVerb","Illegal OAI verb")) + + +## OAI error + + else: + req.write(oai_repository.oai_header(args,"")) + req.write(oai_error) + req.write(oai_repository.oai_footer("")) + + return "\n"