diff --git a/modules/bibharvest/Makefile.am b/modules/bibharvest/Makefile.am
index 47ad17ba4..7e62cbf12 100644
--- a/modules/bibharvest/Makefile.am
+++ b/modules/bibharvest/Makefile.am
@@ -1,22 +1,22 @@
## $Id$
## This file is part of the CERN Document Server Software (CDSware).
## Copyright (C) 2002 CERN.
##
## The CDSware is free software; you can redistribute it and/or
## modify it under the terms of the GNU General Public License as
## published by the Free Software Foundation; either version 2 of the
## License, or (at your option) any later version.
##
## The CDSware is distributed in the hope that it will be useful, but
## WITHOUT ANY WARRANTY; without even the implied warranty of
## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
## General Public License for more details.
##
## You should have received a copy of the GNU General Public License
## along with CDSware; if not, write to the Free Software Foundation, Inc.,
## 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA.
-SUBDIRS = bin
+SUBDIRS = bin lib web
CLEANFILES = *~
\ No newline at end of file
diff --git a/modules/bibharvest/lib/.cvsignore b/modules/bibharvest/lib/.cvsignore
new file mode 100644
index 000000000..6a1572735
--- /dev/null
+++ b/modules/bibharvest/lib/.cvsignore
@@ -0,0 +1,8 @@
+Makefile
+Makefile.in
+z_*
+*.O
+*~
+oai1d
+oai2d
+*.py
\ No newline at end of file
diff --git a/modules/bibharvest/Makefile.am b/modules/bibharvest/lib/Makefile.am
similarity index 78%
copy from modules/bibharvest/Makefile.am
copy to modules/bibharvest/lib/Makefile.am
index 47ad17ba4..036d39c7b 100644
--- a/modules/bibharvest/Makefile.am
+++ b/modules/bibharvest/lib/Makefile.am
@@ -1,22 +1,28 @@
## $Id$
## This file is part of the CERN Document Server Software (CDSware).
## Copyright (C) 2002 CERN.
##
## The CDSware is free software; you can redistribute it and/or
## modify it under the terms of the GNU General Public License as
## published by the Free Software Foundation; either version 2 of the
## License, or (at your option) any later version.
##
## The CDSware is distributed in the hope that it will be useful, but
## WITHOUT ANY WARRANTY; without even the implied warranty of
## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
## General Public License for more details.
##
## You should have received a copy of the GNU General Public License
## along with CDSware; if not, write to the Free Software Foundation, Inc.,
## 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA.
-SUBDIRS = bin
+pylibdir=$(libdir)/python/cdsware
+pylib_DATA=oai_repository.py
-CLEANFILES = *~
\ No newline at end of file
+EXTRA_DIST = $(wildcard *.wml)
+
+CLEANFILES = $(pylib_DATA) *~ *.tmp *.pyc
+
+%.py: %.py.wml ../../../config/config.wml ../../../config/configbis.wml
+ $(WML) -o $@ $<
\ No newline at end of file
diff --git a/modules/bibharvest/lib/oai_repository.py b/modules/bibharvest/lib/oai_repository.py
new file mode 100644
index 000000000..1c28a236e
--- /dev/null
+++ b/modules/bibharvest/lib/oai_repository.py
@@ -0,0 +1,862 @@
+## $Id$
+## OAI interface for CDSware/MySQL written in Python compliant with OAI-PMH2.0
+
+## This file is part of the CERN Document Server Software (CDSware).
+## Copyright (C) 2002 CERN.
+##
+## The CDSware is free software; you can redistribute it and/or
+## modify it under the terms of the GNU General Public License as
+## published by the Free Software Foundation; either version 2 of the
+## License, or (at your option) any later version.
+##
+## The CDSware is distributed in the hope that it will be useful, but
+## WITHOUT ANY WARRANTY; without even the implied warranty of
+## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+## General Public License for more details.
+##
+## You should have received a copy of the GNU General Public License
+## along with CDSware; if not, write to the Free Software Foundation, Inc.,
+## 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA.
+
+## read config variables:
+#include "config.wml"
+#include "configbis.wml"
+
+## start Python:
+#!
+## $Id$
+## DO NOT EDIT THIS FILE! IT WAS AUTOMATICALLY GENERATED FROM CDSware WML SOURCES.
+"""OAI interface for CDSware/MySQL written in Python compliant with OAI-PMH2.0"""
+
+## fill config variables:
+pylibdir = "/python"
+
+
+## OAI config variables
+oaiidprefix = ""
+oaisampleidentifier = ""
+oaiidentifydescription = """"""
+oaiidfield = "909COo"
+oaisetfield = "909COp"
+
+
+try:
+ import cPickle
+ import string
+ from string import split
+ import os
+ import re
+ import sys
+ import time
+ import md5
+
+ sys.path.append('%s' % pylibdir)
+ from config import *
+ from dbquery import run_sql
+
+except ImportError, e:
+ import sys
+ sys.stderr.write("Error: %s" % e)
+ sys.exit(1)
+
+verbs = {
+ "Identify" : [""],
+ "ListSets" : ["resumptionToken"],
+ "ListMetadataFormats" : ["resumptionToken"],
+ "ListRecords" : ["resumptionToken"],
+ "ListIdentifiers" : ["resumptionToken"],
+ "GetRecord" : [""]
+}
+
+
+
+def encode_for_xml(s):
+ "Encode special chars in string for XML-compliancy."
+
+ s = string.replace(s, '&', '&')
+ s = string.replace(s, '<', '<')
+ return s
+
+def encode_for_url(s):
+ "Encode special chars in string for URL-compliancy."
+
+ s = string.replace(s, ' ', '%20')
+ s = string.replace(s, '?', '%3F')
+ s = string.replace(s, '#', '%23')
+ s = string.replace(s, '=', '%3D')
+ s = string.replace(s, '&', '%26')
+ s = string.replace(s, '%', '%25')
+
+ return s
+
+def oai_header(args, verb):
+ "Print OAI header"
+
+ out = ""
+
+ out = out + "" + "\n"
+ out = out + "\n"
+
+ out = out + " " + OAIGetResponseDate() + "\n"
+
+ if verb:
+ out = out + " %s\n" % (verb,OAIGetRequestURL(args))
+ out = out + " <%s>\n" % verb
+ else:
+ out = out + " %s\n" % (OAIGetRequestURL(args))
+
+ return out
+
+def oai_footer(verb):
+ "Print OAI footer"
+
+ out = ""
+
+ if verb:
+ out = "%s %s>\n" % (out, verb)
+ out = out + "\n"
+
+ return out
+
+def oai_error_header(args, verb):
+ "Print OAI header"
+
+ out = ""
+
+### out = "Content-Type: text/xml\n\n"
+ out = out + "" + "\n"
+ out = out + "\n"
+
+ out = out + " " + OAIGetResponseDate() + "\n"
+ out = out + " %s\n" % (verb,OAIGetRequestURL(args))
+
+ return out
+
+def oai_error_footer(verb):
+ "Print OAI footer"
+
+ out = ""
+ out = out + "\n"
+
+ return out
+
+def get_field(sysno, field):
+ "Gets list of field 'field' for the record with 'sysno' system number."
+
+ out = []
+ digit = field[0:2]
+
+ bx = "bib%sx" % digit
+ bibx = "bibrec_bib%sx" % digit
+ query = "SELECT bx.value FROM %s AS bx, %s AS bibx WHERE bibx.id_bibrec='%s' AND bx.id=bibx.id_bibxxx AND bx.tag='%s'" % (bx, bibx, sysno, field)
+
+ res = run_sql(query)
+
+ for row in res:
+
+ out.append(row[0])
+
+ return out
+
+def UTC_to_localtime(date):
+ "Convert UTC to localtime"
+
+ ldate = date.split("T")[0]
+ ltime = date.split("T")[1]
+
+ lhour = ltime.split(":")[0]
+ lminute = ltime.split(":")[1]
+ lsec = ltime.split(":")[2]
+
+ lyear = ldate.split("-")[0]
+ lmonth = ldate.split("-")[1]
+ lday = ldate.split("-")[2]
+
+ timetoconvert = time.strftime("%Y-%m-%d %H:%M:%S",time.localtime(time.mktime((string.atoi(lyear),string.atoi(lmonth),string.atoi(lday),string.atoi(lhour),string.atoi(lminute),string.atoi(lsec[:-1]),0,0,-1)) - time.timezone + (time.daylight)*3600))
+
+ return timetoconvert
+
+def localtime_to_UTC(date):
+ "Convert localtime to UTC"
+
+ ldate = date.split(" ")[0]
+ ltime = date.split(" ")[1]
+
+ lhour = ltime.split(":")[0]
+ lminute = ltime.split(":")[1]
+ lsec = ltime.split(":")[2]
+
+ lyear = ldate.split("-")[0]
+ lmonth = ldate.split("-")[1]
+ lday = ldate.split("-")[2]
+
+ timetoconvert = time.strftime("%Y-%m-%dT%H:%M:%SZ",time.gmtime(time.mktime((string.atoi(lyear),string.atoi(lmonth),string.atoi(lday),string.atoi(lhour),string.atoi(lminute),string.atoi(lsec),0,0,-1))))
+
+ return timetoconvert
+
+def get_creation_date(sysno):
+ "Returns the creation date of the record 'sysno'."
+
+ out = ""
+
+ query = "SELECT DATE_FORMAT(creation_date,'%%Y-%%m-%%d %%H:%%i:%%s') FROM bibrec WHERE id='%s'" % (sysno)
+
+ res = run_sql(query)
+ for row in res:
+ out = row[0]
+ return localtime_to_UTC(out)
+
+def get_modification_date(sysno):
+ "Returns the date of last modification for the record 'sysno'."
+
+ out = ""
+
+ query = "SELECT DATE_FORMAT(modification_date,'%%Y-%%m-%%d %%H:%%i:%%s') FROM bibrec WHERE id='%s'" % (sysno)
+
+ res = run_sql(query)
+ for row in res:
+ out = row[0]
+ return localtime_to_UTC(out)
+
+def get_earliest_datestamp():
+ "Get earliest datestamp in the database"
+
+ out = ""
+
+ query = "SELECT MIN(DATE_FORMAT(creation_date,'%Y-%m-%d %H:%i:%s')) FROM bibrec"
+
+ res = run_sql(query)
+ return localtime_to_UTC(res[0][0])
+
+def check_date(date, time="T00:00:00Z"):
+ "Check if the date has a correct format"
+
+ if(re.sub("[0123456789\-:TZ]","",date) == ""):
+ if len(date) == 10:
+ date = date + time
+ if len(date) == 20:
+ date = UTC_to_localtime(date)
+ else:
+ date = ""
+ else:
+ date = ""
+
+ return date
+
+def record_exists(sysno):
+ "Returns 1 if record with SYSNO 'sysno' exists. Returns 0 otherwise."
+
+ out = 0
+ query = "SELECT id FROM bibrec WHERE id='%s'" % (sysno)
+
+ res = run_sql(query)
+
+ for row in res:
+ if row[0] != "":
+ out = 1
+
+ return out
+
+def print_record(sysno, format='marcxml'):
+ "Prints record 'sysno' formatted accoding to 'format'."
+
+ out = ""
+
+ # sanity check:
+ if not record_exists(sysno):
+ return
+
+ if (format == "dc") or (format == "oai_dc"):
+ format = "xd"
+
+ # print record opening tags:
+
+ out = out + " \n"
+ out = out + " \n"
+ for id in get_field(sysno,oaiidfield):
+ out = "%s %s\n" % (out, encode_for_url(id))
+ out = "%s %s\n" % (out, get_modification_date(sysno))
+ for set in get_field(sysno,oaisetfield):
+ out = "%s %s\n" % (out, set)
+ out = out + " \n"
+ out = out + " \n"
+
+ if format == "marcxml":
+ out = out + " "
+ out = out + " 00000coc 2200000uu 4500"
+ ## MARC21 and XML formats, possibley OAI -- they are not in "bibfmt" table; so fetch all the data from "bibXXx" tables:
+
+ if format == "marcxml":
+
+ out = "%s %d\n" % (out, int(sysno))
+
+ for digit1 in range(0,10):
+ for digit2 in range(0,10):
+ bx = "bib%d%dx" % (digit1, digit2)
+ bibx = "bibrec_bib%d%dx" % (digit1, digit2)
+ query = "SELECT b.tag,b.value,bb.field_number FROM %s AS b, %s AS bb "\
+ "WHERE bb.id_bibrec='%s' AND b.id=bb.id_bibxxx AND b.tag LIKE '%s%%' "\
+ "ORDER BY bb.field_number, b.tag ASC" % (bx, bibx, sysno, str(digit1)+str(digit2))
+ res = run_sql(query)
+ field_number_old = -999
+ field_old = ""
+ for row in res:
+ field, value, field_number = row[0], row[1], row[2]
+ ind1, ind2 = field[3], field[4]
+ if ind1 == "_":
+ ind1 = " "
+ if ind2 == "_":
+ ind2 = " "
+ # print field tag
+ if field_number != field_number_old or field[:-1] != field_old[:-1]:
+ if format == "marcxml":
+
+ fieldid = encode_for_xml(field[0:3])
+
+ if field_number_old != -999:
+ out = out + " \n"
+
+ out = "%s \n" % (out, encode_for_xml(field[0:3]), encode_for_xml(ind1).lower(), encode_for_xml(ind2).lower())
+
+ field_number_old = field_number
+ field_old = field
+ # print subfield value
+ if format == "marcxml":
+ value = encode_for_xml(value)
+ out = "%s %s\n" % (out, encode_for_xml(field[-1:]), value)
+
+ # fetch next subfield
+ # all fields/subfields printed in this run, so close the tag:
+ if (format == "marcxml") and field_number_old != -999:
+ out = out + " \n"
+ out = out + " \n"
+
+ elif format == "xd":
+ # XML Dublin Core format, possibly OAI -- select only some bibXXx fields:
+ out = out + " \n"
+
+ for f in get_field(sysno, "041__a"):
+ out = "%s %s\n" % (out, f)
+
+ for f in get_field(sysno, "100__a"):
+ out = "%s %s\n" % (out, encode_for_xml(f))
+
+ for f in get_field(sysno, "700__a"):
+ out = "%s %s\n" % (out, encode_for_xml(f))
+
+ for f in get_field(sysno, "245__a"):
+ out = "%s %s\n" % (out, encode_for_xml(f))
+
+ for f in get_field(sysno, "65017a"):
+ out = "%s %s\n" % (out, encode_for_xml(f))
+
+ for f in get_field(sysno, "8564_u"):
+ out = "%s %s\n" % (out, encode_for_xml(encode_for_url(f)))
+
+ for f in get_field(sysno, "520__a"):
+ out = "%s %s\n" % (out, encode_for_xml(f))
+
+ date = get_creation_date(sysno)
+
+ out = "%s %s\n" % (out, date)
+ out = out + " \n"
+
+ # print record closing tags:
+
+ out = out + " \n"
+ out = out + " \n"
+
+ return out
+
+def OAIListMetadataFormats(args):
+ "Generates response to OAIListMetadataFormats verb."
+
+
+
+ oai_rt_expire =
+ nb_formats_in_resume =
+
+
+
+ arg = parse_args(args)
+
+ out = ""
+
+ flag = 1 # list or not depending on identifier
+
+ if arg['identifier'] != "":
+
+ flag = 0
+
+ sysno = OAIGetSysno(arg['identifier'])
+
+ if record_exists(sysno):
+
+ flag = 1
+
+ else:
+
+ out = out + oai_error("badArgument","invalid record Identifier")
+ out = oai_error_header(args, "ListMetadataFormats") + out + oai_error_footer("ListMetadataFormats")
+ return out
+
+ if flag:
+ out = out + " \n"
+ out = out + " oai_dc\n"
+ out = out + " http://www.openarchives.org/OAI/1.1/dc.xsd\n"
+ out = out + " http://purl.org/dc/elements/1.1/\n"
+ out = out + " \n"
+ out = out + " \n"
+ out = out + " marcxml\n"
+ out = out + " http://www.loc.gov/standards/marcxml/schema/MARC21slim.xsd\n"
+ out = out + " http://www.loc.gov/MARC21/slim\n"
+ out = out + " \n"
+
+ out = oai_header(args,"ListMetadataFormats") + out + oai_footer("ListMetadataFormats")
+ return out
+
+
+def OAIListRecords(args):
+ "Generates response to OAIListRecords verb."
+
+
+ oai_rt_expire =
+ nb_records_in_resume =
+
+
+ arg = parse_args(args)
+
+ out = ""
+
+ sysnos = []
+ sysno = []
+
+ # check if the resumptionToken did not expire
+ if arg['resumptionToken']:
+ filename = "%s/RTdata/%s" % (logdir, arg['resumptionToken'])
+ if os.path.exists(filename) == 0:
+ out = oai_error("badResumptionToken","ResumptionToken expired")
+ out = oai_error_header(args, "ListRecords") + out + oai_error_footer("ListRecords")
+ return out
+
+ if arg['resumptionToken'] != "":
+ sysnos = OAICacheOut(arg['resumptionToken'])
+ arg['metadataPrefix'] = sysnos.pop()
+ else:
+ sysnos = OAIGetSysnoList(arg['set'], arg['from'], arg['until'])
+
+ if len(sysnos) == 0: # noRecordsMatch error
+
+ out = out + oai_error("noRecordsMatch","no records correspond to the request")
+ out = oai_error_header(args, "ListRecords") + out + oai_error_footer("ListRecords")
+ return out
+
+ i = 0
+ for s in sysnos:
+ if s:
+ i = i + 1
+ if i > nb_records_in_resume: # cache or write?
+ if i == nb_records_in_resume + 1: # resumptionToken?
+ arg['resumptionToken'] = OAIGenResumptionToken()
+ extdate = OAIGetResponseDate(oai_rt_expire)
+ if extdate:
+ out = "%s %s\n" % (out,extdate,arg['resumptionToken'])
+ else:
+ out = "%s %s\n" % (out, arg['resumptionToken'])
+ sysno.append(s)
+ else:
+ done = 0
+ for f in get_field(s, "245__a"):
+ if done == 0:
+ out = out + print_record(s, arg['metadataPrefix'])
+
+ if i > nb_records_in_resume:
+ OAICacheClean()
+ sysno.append(arg['metadataPrefix'])
+ OAICacheIn(arg['resumptionToken'],sysno)
+
+ out = oai_header(args,"ListRecords") + out + oai_footer("ListRecords")
+ return out
+
+def OAIListSets(args):
+ "Lists available sets for OAI metadata harvesting."
+
+
+ oai_rt_expire =
+ nb_sets_in_resume =
+
+
+ arg = parse_args(args)
+
+ out = ""
+
+ # note: no flow control in ListSets
+
+ sets = get_sets()
+
+ for s in sets:
+
+ out = out + " \n"
+ out = "%s %s\n" % (out, s[0])
+ out = "%s %s\n" % (out, s[1])
+ if s[2]:
+ out = "%s %s\n" % (out, s[2])
+ out = out + " \n"
+
+ out = oai_header(args,"ListSets") + out + oai_footer("ListSets")
+
+ return out
+
+
+def OAIGetRecord(args):
+ """Returns record 'identifier' according to 'metadataPrefix' format for OAI metadata harvesting."""
+
+ arg = parse_args(args)
+ out = ""
+ sysno = OAIGetSysno(arg['identifier'])
+
+ if record_exists(sysno):
+ datestamp = get_modification_date(sysno)
+ out = out + print_record(sysno, arg['metadataPrefix'])
+ else:
+ out = out + oai_error("badArgument","invalid record Identifier")
+ out = oai_error_header(args, "GetRecord") + out + oai_error_footer("GetRecord")
+ return out
+
+ out = oai_header(args,"GetRecord") + out + oai_footer("GetRecord")
+
+ return out
+
+
+def OAIListIdentifiers(args):
+ "Prints OAI response to the ListIdentifiers verb."
+
+
+ oai_rt_expire =
+ nb_identifiers_in_resume =
+
+
+ arg = parse_args(args)
+
+ out = ""
+
+ sysno = []
+ sysnos = []
+
+ if arg['resumptionToken']:
+ filename = "%s/RTdata/%s" % (logdir, arg['resumptionToken'])
+ if os.path.exists(filename) == 0:
+ out = out + oai_error("badResumptionToken","ResumptionToken expired")
+ out = oai_error_header(args, "ListIdentifiers") + out + oai_error_footer("ListIdentifiers")
+ return out
+
+ if arg['resumptionToken']:
+ sysnos = OAICacheOut(arg['resumptionToken'])
+ else:
+ sysnos = OAIGetSysnoList(arg['set'], arg['from'], arg['until'])
+
+ if len(sysnos) == 0: # noRecordsMatch error
+ out = out + oai_error("noRecordsMatch","no records correspond to the request")
+ out = oai_error_header(args, "ListIdentifiers") + out + oai_error_footer("ListIdentifiers")
+ return out
+
+ i = 0
+ for s in sysnos:
+ if s:
+ i = i + 1
+ if i > nb_identifiers_in_resume: # cache or write?
+ if i == nb_identifiers_in_resume + 1: # resumptionToken?
+ arg['resumptionToken'] = OAIGenResumptionToken()
+ extdate = OAIGetResponseDate(oai_rt_expire)
+ if extdate:
+ out = "%s %s\n" % (out, extdate,arg['resumptionToken'])
+ else:
+ out = "%s %s\n" % (out, arg['resumptionToken'])
+ sysno.append(s)
+ else:
+ done = 0
+ for f in get_field(s, "245__a"):
+ if done == 0:
+ for id in get_field(s,oaiidfield):
+ out = out + " \n"
+ out = "%s %s\n" % (out, encode_for_url(id))
+ out = "%s %s\n" % (out, get_modification_date(OAIGetSysno(id)))
+ for set in get_field(s,oaisetfield):
+ out = "%s %s\n" % (out, arg['set'])
+ out = out + " \n"
+ done = 1
+
+ if i > nb_identifiers_in_resume:
+ OAICacheClean() # clean cache from expired resumptionTokens
+ OAICacheIn(arg['resumptionToken'],sysno)
+
+ out = oai_header(args,"ListIdentifiers") + out + oai_footer("ListIdentifiers")
+
+ return out
+
+
+def OAIIdentify(args):
+ "Generates response to OAIIdentify verb."
+
+ out = ""
+
+ repositoryName = " " + cdsname + "\n"
+ baseURL = " %s/oai2d.py/\n" % weburl
+ protocolVersion = " 2.0\n"
+ adminEmail = " mailto:%s\n" % supportemail
+ earliestDST = " %s\n" % get_earliest_datestamp()
+ repositoryIdentifier = "%s" % oaiidprefix
+ sampleIdentifier = oaisampleidentifier
+ identifyDescription = oaiidentifydescription + "\n"
+
+ out = out + repositoryName
+ out = out + baseURL
+ out = out + protocolVersion
+ out = out + adminEmail
+ out = out + earliestDST
+ out = out + " no\n"
+ out = out + " YYYY-MM-DDThh:mm:ssZ\n"
+ # print " \n"
+ out = out + oaiidentifydescription
+
+ out = oai_header(args,"Identify") + out + oai_footer("Identify")
+
+ return out
+
+
+def OAIGetRequestURL(args):
+ "Generates requestURL tag for OAI."
+
+ re_amp = re.compile('&')
+
+ requestURL = weburl + "/" + "oai2d.py/"# + "?" + re_amp.sub("&", args)
+
+ return requestURL
+
+def OAIGetResponseDate(delay=0):
+ "Generates responseDate tag for OAI."
+
+ return time.strftime("%Y-%m-%dT%H:%M:%SZ", time.gmtime(time.time() + delay))
+
+
+def oai_error(code, msg):
+ "OAI error occured"
+
+ return "%s\n" % (code, msg)
+
+
+def OAIGetSysno(identifier):
+ "Returns the first MySQL BIB ID for the OAI identifier 'identifier', if it exists."
+ sysno = None
+ if identifier:
+ query = "SELECT DISTINCT(bb.id_bibrec) FROM bib90x AS bx, bibrec_bib90x AS bb WHERE bx.tag='%s' AND bb.id_bibxxx=bx.id AND bx.value='%s'" % (oaiidfield,identifier)
+ res = run_sql(query)
+ for row in res:
+ sysno = row[0]
+ return sysno
+
+
+def OAIGetSysnoList(set, fromDate, untilDate):
+ "Returns list of system numbers for the OAI set 'set', modified from 'date_from' until 'date_until'."
+
+ out_dict = {} # dict to hold list of out sysnos as its keys
+
+ if set:
+ query = "SELECT DISTINCT bibx.id_bibrec FROM bib90x AS bx LEFT JOIN bibrec_bib90x AS bibx ON bx.id=bibx.id_bibxxx LEFT JOIN bibrec AS b ON b.id=bibx.id_bibrec WHERE bx.tag='%s' AND bx.value='%s'" % (oaisetfield,set)
+ else:
+ query = "SELECT DISTINCT bibx.id_bibrec FROM bib90x AS bx LEFT JOIN bibrec_bib90x AS bibx ON bx.id=bibx.id_bibxxx LEFT JOIN bibrec AS b ON b.id=bibx.id_bibrec WHERE bx.tag='%s'" % (oaiidfield)
+
+ if untilDate:
+ query = query + " AND b.modification_date <= '%s'" % untilDate
+ if fromDate:
+ query = query + " AND b.modification_date >= '%s'" % fromDate
+
+ res = run_sql(query)
+
+ for row in res:
+ out_dict[row[0]] = 1
+
+ return out_dict.keys()
+
+
+def OAIGenResumptionToken():
+ "Generates unique ID for resumption token management."
+
+ return md5.new(str(time.time())).hexdigest()
+
+
+def OAICacheIn(resumptionToken, sysnos):
+ "Stores or adds sysnos in cache. Input is a string of sysnos separated by commas."
+
+ filename = "%s/RTdata/%s" % (logdir, resumptionToken)
+
+ fil = open(filename,"w")
+ cPickle.dump(sysnos,fil)
+ fil.close()
+ return 1
+
+
+def OAICacheOut(resumptionToken):
+ "Restores string of comma-separated system numbers from cache."
+
+ sysnos = []
+
+ filename = "%s/RTdata/%s" % (logdir, resumptionToken)
+
+ if OAICacheStatus(resumptionToken):
+ fil = open(filename,"r")
+ sysnos = cPickle.load(fil)
+ fil.close()
+ else:
+ return 0
+ return sysnos
+
+
+def OAICacheClean():
+ "Removes cached resumptionTokens older than specified"
+
+
+ oai_rt_expire =
+
+
+ directory = "%s/RTdata" % logdir
+
+ files = os.listdir(directory)
+
+ for f in files:
+ filename = directory + "/" + f
+ # cache entry expires when not modified during a specified period of time
+ if ((time.time() - os.path.getmtime(filename)) > oai_rt_expire):
+ os.remove(filename)
+
+ return 1
+
+
+def OAICacheStatus(resumptionToken):
+ "Checks cache status. Returns 0 for empty, 1 for full."
+
+ filename = "%s/RTdata/%s" % (logdir, resumptionToken)
+
+ if os.path.exists(filename):
+ if os.path.getsize(filename) > 0:
+ return 1
+ else:
+ return 0
+ else:
+ return 0
+
+
+def get_sets():
+ "Returns list of sets."
+
+ out = []
+ row = ['','']
+
+ query = "SELECT setSpec,setName,setDescription FROM oaiset"
+ res = run_sql (query)
+ for row in res:
+ row_bis = [row[0],row[1],row[2]]
+ out.append(row_bis)
+
+ return out
+
+
+def parse_args(args=""):
+ "Parse input args"
+
+ out_args = {
+ "verb" : "",
+ "metadataPrefix" : "",
+ "from" : "",
+ "until" : "",
+ "set" : "",
+ "identifier" : "",
+ "resumptionToken" : ""
+ }
+
+ if args == "" or args == None:
+ pass
+ else:
+
+ list_of_arguments = args.split('&')
+
+ for item in list_of_arguments:
+ keyvalue = item.split('=')
+ if len(keyvalue) == 2:
+ out_args[keyvalue[0]] = keyvalue[1]
+ else:
+ out_args['verb'] = ""
+
+ return out_args
+
+def check_args(arguments):
+ "Check OAI arguments"
+
+ out = ""
+
+## principal argument required
+#
+#
+ if verbs.has_key(arguments['verb']):
+ pass
+ else:
+ out = out + oai_error("badArgument","Malformed request")
+
+## resumptionToken exclusive
+#
+#
+ if ((arguments['from']!="" or arguments['until']!="" or arguments['metadataPrefix']!="" or arguments['identifier']!="" or arguments['set']!="") and arguments['resumptionToken']!=""):
+
+ out = out + oai_error("badArgument","The request includes illegal arguments")
+
+## datestamp formats
+#
+#
+ if arguments['from']!="" and arguments['from']!="":
+ from_length = len(arguments['from'])
+ if check_date(arguments['from'],"T00:00:00Z") == "":
+ out = out + oai_error("badArgument","Bad datestamp format in from")
+ else:
+ from_length = 0
+
+ if arguments['until']!="" and arguments['until']!="":
+ until_length = len(arguments['until'])
+ if check_date(arguments['until'],"T23:59:59Z") == "":
+ out = out + oai_error("badArgument","Bad datestamp format in until")
+ else:
+ until_length = 0
+
+ if from_length <> 0:
+ if until_length <> 0:
+ if from_length <> until_length:
+ out = out + oai_error("badArgument","Bad datestamp format")
+
+ if arguments['from'] > arguments['until']:
+ out = out + oai_error("badArgument", "Wrong date")
+
+
+## Identify exclusive
+#
+#
+ if (arguments['verb']=="Identify" and (arguments['metadataPrefix']!="" or arguments['identifier']!="" or arguments['set']!="" or arguments['from']!="" or arguments['until']!="" or arguments['resumptionToken']!="")):
+ out = out + oai_error("badArgument","The request includes illegal arguments")
+
+## parameters for GetRecord
+#
+#
+ if arguments['verb']=="GetRecord" and arguments['identifier'] == "":
+ out = out + oai_error("badArgument","Record identifier missing")
+
+ if arguments['verb']=="GetRecord" and arguments['metadataPrefix'] == "":
+ out = out + oai_error("badArgument","Missing metadataPrefix")
+
+
+## parameters for ListRecords and ListIdentifiers
+#
+#
+ if (arguments['verb']=="ListRecords" or arguments['verb']=="ListIdentifiers") and (arguments['metadataPrefix'] == "" and arguments['resumptionToken'] == ""):
+ out = out + oai_error("badArgument","Missing metadataPrefix")
+
+ return out
+
+
\ No newline at end of file
diff --git a/modules/bibharvest/lib/oai_repository.py.wml b/modules/bibharvest/lib/oai_repository.py.wml
new file mode 100644
index 000000000..1c28a236e
--- /dev/null
+++ b/modules/bibharvest/lib/oai_repository.py.wml
@@ -0,0 +1,862 @@
+## $Id$
+## OAI interface for CDSware/MySQL written in Python compliant with OAI-PMH2.0
+
+## This file is part of the CERN Document Server Software (CDSware).
+## Copyright (C) 2002 CERN.
+##
+## The CDSware is free software; you can redistribute it and/or
+## modify it under the terms of the GNU General Public License as
+## published by the Free Software Foundation; either version 2 of the
+## License, or (at your option) any later version.
+##
+## The CDSware is distributed in the hope that it will be useful, but
+## WITHOUT ANY WARRANTY; without even the implied warranty of
+## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+## General Public License for more details.
+##
+## You should have received a copy of the GNU General Public License
+## along with CDSware; if not, write to the Free Software Foundation, Inc.,
+## 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA.
+
+## read config variables:
+#include "config.wml"
+#include "configbis.wml"
+
+## start Python:
+#!
+## $Id$
+## DO NOT EDIT THIS FILE! IT WAS AUTOMATICALLY GENERATED FROM CDSware WML SOURCES.
+"""OAI interface for CDSware/MySQL written in Python compliant with OAI-PMH2.0"""
+
+## fill config variables:
+pylibdir = "/python"
+
+
+## OAI config variables
+oaiidprefix = ""
+oaisampleidentifier = ""
+oaiidentifydescription = """"""
+oaiidfield = "909COo"
+oaisetfield = "909COp"
+
+
+try:
+ import cPickle
+ import string
+ from string import split
+ import os
+ import re
+ import sys
+ import time
+ import md5
+
+ sys.path.append('%s' % pylibdir)
+ from config import *
+ from dbquery import run_sql
+
+except ImportError, e:
+ import sys
+ sys.stderr.write("Error: %s" % e)
+ sys.exit(1)
+
+verbs = {
+ "Identify" : [""],
+ "ListSets" : ["resumptionToken"],
+ "ListMetadataFormats" : ["resumptionToken"],
+ "ListRecords" : ["resumptionToken"],
+ "ListIdentifiers" : ["resumptionToken"],
+ "GetRecord" : [""]
+}
+
+
+
+def encode_for_xml(s):
+ "Encode special chars in string for XML-compliancy."
+
+ s = string.replace(s, '&', '&')
+ s = string.replace(s, '<', '<')
+ return s
+
+def encode_for_url(s):
+ "Encode special chars in string for URL-compliancy."
+
+ s = string.replace(s, ' ', '%20')
+ s = string.replace(s, '?', '%3F')
+ s = string.replace(s, '#', '%23')
+ s = string.replace(s, '=', '%3D')
+ s = string.replace(s, '&', '%26')
+ s = string.replace(s, '%', '%25')
+
+ return s
+
+def oai_header(args, verb):
+ "Print OAI header"
+
+ out = ""
+
+ out = out + "" + "\n"
+ out = out + "\n"
+
+ out = out + " " + OAIGetResponseDate() + "\n"
+
+ if verb:
+ out = out + " %s\n" % (verb,OAIGetRequestURL(args))
+ out = out + " <%s>\n" % verb
+ else:
+ out = out + " %s\n" % (OAIGetRequestURL(args))
+
+ return out
+
+def oai_footer(verb):
+ "Print OAI footer"
+
+ out = ""
+
+ if verb:
+ out = "%s %s>\n" % (out, verb)
+ out = out + "\n"
+
+ return out
+
+def oai_error_header(args, verb):
+ "Print OAI header"
+
+ out = ""
+
+### out = "Content-Type: text/xml\n\n"
+ out = out + "" + "\n"
+ out = out + "\n"
+
+ out = out + " " + OAIGetResponseDate() + "\n"
+ out = out + " %s\n" % (verb,OAIGetRequestURL(args))
+
+ return out
+
+def oai_error_footer(verb):
+ "Print OAI footer"
+
+ out = ""
+ out = out + "\n"
+
+ return out
+
+def get_field(sysno, field):
+ "Gets list of field 'field' for the record with 'sysno' system number."
+
+ out = []
+ digit = field[0:2]
+
+ bx = "bib%sx" % digit
+ bibx = "bibrec_bib%sx" % digit
+ query = "SELECT bx.value FROM %s AS bx, %s AS bibx WHERE bibx.id_bibrec='%s' AND bx.id=bibx.id_bibxxx AND bx.tag='%s'" % (bx, bibx, sysno, field)
+
+ res = run_sql(query)
+
+ for row in res:
+
+ out.append(row[0])
+
+ return out
+
+def UTC_to_localtime(date):
+ "Convert UTC to localtime"
+
+ ldate = date.split("T")[0]
+ ltime = date.split("T")[1]
+
+ lhour = ltime.split(":")[0]
+ lminute = ltime.split(":")[1]
+ lsec = ltime.split(":")[2]
+
+ lyear = ldate.split("-")[0]
+ lmonth = ldate.split("-")[1]
+ lday = ldate.split("-")[2]
+
+ timetoconvert = time.strftime("%Y-%m-%d %H:%M:%S",time.localtime(time.mktime((string.atoi(lyear),string.atoi(lmonth),string.atoi(lday),string.atoi(lhour),string.atoi(lminute),string.atoi(lsec[:-1]),0,0,-1)) - time.timezone + (time.daylight)*3600))
+
+ return timetoconvert
+
+def localtime_to_UTC(date):
+ "Convert localtime to UTC"
+
+ ldate = date.split(" ")[0]
+ ltime = date.split(" ")[1]
+
+ lhour = ltime.split(":")[0]
+ lminute = ltime.split(":")[1]
+ lsec = ltime.split(":")[2]
+
+ lyear = ldate.split("-")[0]
+ lmonth = ldate.split("-")[1]
+ lday = ldate.split("-")[2]
+
+ timetoconvert = time.strftime("%Y-%m-%dT%H:%M:%SZ",time.gmtime(time.mktime((string.atoi(lyear),string.atoi(lmonth),string.atoi(lday),string.atoi(lhour),string.atoi(lminute),string.atoi(lsec),0,0,-1))))
+
+ return timetoconvert
+
+def get_creation_date(sysno):
+ "Returns the creation date of the record 'sysno'."
+
+ out = ""
+
+ query = "SELECT DATE_FORMAT(creation_date,'%%Y-%%m-%%d %%H:%%i:%%s') FROM bibrec WHERE id='%s'" % (sysno)
+
+ res = run_sql(query)
+ for row in res:
+ out = row[0]
+ return localtime_to_UTC(out)
+
+def get_modification_date(sysno):
+ "Returns the date of last modification for the record 'sysno'."
+
+ out = ""
+
+ query = "SELECT DATE_FORMAT(modification_date,'%%Y-%%m-%%d %%H:%%i:%%s') FROM bibrec WHERE id='%s'" % (sysno)
+
+ res = run_sql(query)
+ for row in res:
+ out = row[0]
+ return localtime_to_UTC(out)
+
+def get_earliest_datestamp():
+ "Get earliest datestamp in the database"
+
+ out = ""
+
+ query = "SELECT MIN(DATE_FORMAT(creation_date,'%Y-%m-%d %H:%i:%s')) FROM bibrec"
+
+ res = run_sql(query)
+ return localtime_to_UTC(res[0][0])
+
+def check_date(date, time="T00:00:00Z"):
+ "Check if the date has a correct format"
+
+ if(re.sub("[0123456789\-:TZ]","",date) == ""):
+ if len(date) == 10:
+ date = date + time
+ if len(date) == 20:
+ date = UTC_to_localtime(date)
+ else:
+ date = ""
+ else:
+ date = ""
+
+ return date
+
+def record_exists(sysno):
+ "Returns 1 if record with SYSNO 'sysno' exists. Returns 0 otherwise."
+
+ out = 0
+ query = "SELECT id FROM bibrec WHERE id='%s'" % (sysno)
+
+ res = run_sql(query)
+
+ for row in res:
+ if row[0] != "":
+ out = 1
+
+ return out
+
+def print_record(sysno, format='marcxml'):
+ "Prints record 'sysno' formatted accoding to 'format'."
+
+ out = ""
+
+ # sanity check:
+ if not record_exists(sysno):
+ return
+
+ if (format == "dc") or (format == "oai_dc"):
+ format = "xd"
+
+ # print record opening tags:
+
+ out = out + " \n"
+ out = out + " \n"
+ for id in get_field(sysno,oaiidfield):
+ out = "%s %s\n" % (out, encode_for_url(id))
+ out = "%s %s\n" % (out, get_modification_date(sysno))
+ for set in get_field(sysno,oaisetfield):
+ out = "%s %s\n" % (out, set)
+ out = out + " \n"
+ out = out + " \n"
+
+ if format == "marcxml":
+ out = out + " "
+ out = out + " 00000coc 2200000uu 4500"
+ ## MARC21 and XML formats, possibley OAI -- they are not in "bibfmt" table; so fetch all the data from "bibXXx" tables:
+
+ if format == "marcxml":
+
+ out = "%s %d\n" % (out, int(sysno))
+
+ for digit1 in range(0,10):
+ for digit2 in range(0,10):
+ bx = "bib%d%dx" % (digit1, digit2)
+ bibx = "bibrec_bib%d%dx" % (digit1, digit2)
+ query = "SELECT b.tag,b.value,bb.field_number FROM %s AS b, %s AS bb "\
+ "WHERE bb.id_bibrec='%s' AND b.id=bb.id_bibxxx AND b.tag LIKE '%s%%' "\
+ "ORDER BY bb.field_number, b.tag ASC" % (bx, bibx, sysno, str(digit1)+str(digit2))
+ res = run_sql(query)
+ field_number_old = -999
+ field_old = ""
+ for row in res:
+ field, value, field_number = row[0], row[1], row[2]
+ ind1, ind2 = field[3], field[4]
+ if ind1 == "_":
+ ind1 = " "
+ if ind2 == "_":
+ ind2 = " "
+ # print field tag
+ if field_number != field_number_old or field[:-1] != field_old[:-1]:
+ if format == "marcxml":
+
+ fieldid = encode_for_xml(field[0:3])
+
+ if field_number_old != -999:
+ out = out + " \n"
+
+ out = "%s \n" % (out, encode_for_xml(field[0:3]), encode_for_xml(ind1).lower(), encode_for_xml(ind2).lower())
+
+ field_number_old = field_number
+ field_old = field
+ # print subfield value
+ if format == "marcxml":
+ value = encode_for_xml(value)
+ out = "%s %s\n" % (out, encode_for_xml(field[-1:]), value)
+
+ # fetch next subfield
+ # all fields/subfields printed in this run, so close the tag:
+ if (format == "marcxml") and field_number_old != -999:
+ out = out + " \n"
+ out = out + " \n"
+
+ elif format == "xd":
+ # XML Dublin Core format, possibly OAI -- select only some bibXXx fields:
+ out = out + " \n"
+
+ for f in get_field(sysno, "041__a"):
+ out = "%s %s\n" % (out, f)
+
+ for f in get_field(sysno, "100__a"):
+ out = "%s %s\n" % (out, encode_for_xml(f))
+
+ for f in get_field(sysno, "700__a"):
+ out = "%s %s\n" % (out, encode_for_xml(f))
+
+ for f in get_field(sysno, "245__a"):
+ out = "%s %s\n" % (out, encode_for_xml(f))
+
+ for f in get_field(sysno, "65017a"):
+ out = "%s %s\n" % (out, encode_for_xml(f))
+
+ for f in get_field(sysno, "8564_u"):
+ out = "%s %s\n" % (out, encode_for_xml(encode_for_url(f)))
+
+ for f in get_field(sysno, "520__a"):
+ out = "%s %s\n" % (out, encode_for_xml(f))
+
+ date = get_creation_date(sysno)
+
+ out = "%s %s\n" % (out, date)
+ out = out + " \n"
+
+ # print record closing tags:
+
+ out = out + " \n"
+ out = out + " \n"
+
+ return out
+
+def OAIListMetadataFormats(args):
+ "Generates response to OAIListMetadataFormats verb."
+
+
+
+ oai_rt_expire =
+ nb_formats_in_resume =
+
+
+
+ arg = parse_args(args)
+
+ out = ""
+
+ flag = 1 # list or not depending on identifier
+
+ if arg['identifier'] != "":
+
+ flag = 0
+
+ sysno = OAIGetSysno(arg['identifier'])
+
+ if record_exists(sysno):
+
+ flag = 1
+
+ else:
+
+ out = out + oai_error("badArgument","invalid record Identifier")
+ out = oai_error_header(args, "ListMetadataFormats") + out + oai_error_footer("ListMetadataFormats")
+ return out
+
+ if flag:
+ out = out + " \n"
+ out = out + " oai_dc\n"
+ out = out + " http://www.openarchives.org/OAI/1.1/dc.xsd\n"
+ out = out + " http://purl.org/dc/elements/1.1/\n"
+ out = out + " \n"
+ out = out + " \n"
+ out = out + " marcxml\n"
+ out = out + " http://www.loc.gov/standards/marcxml/schema/MARC21slim.xsd\n"
+ out = out + " http://www.loc.gov/MARC21/slim\n"
+ out = out + " \n"
+
+ out = oai_header(args,"ListMetadataFormats") + out + oai_footer("ListMetadataFormats")
+ return out
+
+
+def OAIListRecords(args):
+ "Generates response to OAIListRecords verb."
+
+
+ oai_rt_expire =
+ nb_records_in_resume =
+
+
+ arg = parse_args(args)
+
+ out = ""
+
+ sysnos = []
+ sysno = []
+
+ # check if the resumptionToken did not expire
+ if arg['resumptionToken']:
+ filename = "%s/RTdata/%s" % (logdir, arg['resumptionToken'])
+ if os.path.exists(filename) == 0:
+ out = oai_error("badResumptionToken","ResumptionToken expired")
+ out = oai_error_header(args, "ListRecords") + out + oai_error_footer("ListRecords")
+ return out
+
+ if arg['resumptionToken'] != "":
+ sysnos = OAICacheOut(arg['resumptionToken'])
+ arg['metadataPrefix'] = sysnos.pop()
+ else:
+ sysnos = OAIGetSysnoList(arg['set'], arg['from'], arg['until'])
+
+ if len(sysnos) == 0: # noRecordsMatch error
+
+ out = out + oai_error("noRecordsMatch","no records correspond to the request")
+ out = oai_error_header(args, "ListRecords") + out + oai_error_footer("ListRecords")
+ return out
+
+ i = 0
+ for s in sysnos:
+ if s:
+ i = i + 1
+ if i > nb_records_in_resume: # cache or write?
+ if i == nb_records_in_resume + 1: # resumptionToken?
+ arg['resumptionToken'] = OAIGenResumptionToken()
+ extdate = OAIGetResponseDate(oai_rt_expire)
+ if extdate:
+ out = "%s %s\n" % (out,extdate,arg['resumptionToken'])
+ else:
+ out = "%s %s\n" % (out, arg['resumptionToken'])
+ sysno.append(s)
+ else:
+ done = 0
+ for f in get_field(s, "245__a"):
+ if done == 0:
+ out = out + print_record(s, arg['metadataPrefix'])
+
+ if i > nb_records_in_resume:
+ OAICacheClean()
+ sysno.append(arg['metadataPrefix'])
+ OAICacheIn(arg['resumptionToken'],sysno)
+
+ out = oai_header(args,"ListRecords") + out + oai_footer("ListRecords")
+ return out
+
+def OAIListSets(args):
+ "Lists available sets for OAI metadata harvesting."
+
+
+ oai_rt_expire =
+ nb_sets_in_resume =
+
+
+ arg = parse_args(args)
+
+ out = ""
+
+ # note: no flow control in ListSets
+
+ sets = get_sets()
+
+ for s in sets:
+
+ out = out + " \n"
+ out = "%s %s\n" % (out, s[0])
+ out = "%s %s\n" % (out, s[1])
+ if s[2]:
+ out = "%s %s\n" % (out, s[2])
+ out = out + " \n"
+
+ out = oai_header(args,"ListSets") + out + oai_footer("ListSets")
+
+ return out
+
+
+def OAIGetRecord(args):
+ """Returns record 'identifier' according to 'metadataPrefix' format for OAI metadata harvesting."""
+
+ arg = parse_args(args)
+ out = ""
+ sysno = OAIGetSysno(arg['identifier'])
+
+ if record_exists(sysno):
+ datestamp = get_modification_date(sysno)
+ out = out + print_record(sysno, arg['metadataPrefix'])
+ else:
+ out = out + oai_error("badArgument","invalid record Identifier")
+ out = oai_error_header(args, "GetRecord") + out + oai_error_footer("GetRecord")
+ return out
+
+ out = oai_header(args,"GetRecord") + out + oai_footer("GetRecord")
+
+ return out
+
+
+def OAIListIdentifiers(args):
+ "Prints OAI response to the ListIdentifiers verb."
+
+
+ oai_rt_expire =
+ nb_identifiers_in_resume =
+
+
+ arg = parse_args(args)
+
+ out = ""
+
+ sysno = []
+ sysnos = []
+
+ if arg['resumptionToken']:
+ filename = "%s/RTdata/%s" % (logdir, arg['resumptionToken'])
+ if os.path.exists(filename) == 0:
+ out = out + oai_error("badResumptionToken","ResumptionToken expired")
+ out = oai_error_header(args, "ListIdentifiers") + out + oai_error_footer("ListIdentifiers")
+ return out
+
+ if arg['resumptionToken']:
+ sysnos = OAICacheOut(arg['resumptionToken'])
+ else:
+ sysnos = OAIGetSysnoList(arg['set'], arg['from'], arg['until'])
+
+ if len(sysnos) == 0: # noRecordsMatch error
+ out = out + oai_error("noRecordsMatch","no records correspond to the request")
+ out = oai_error_header(args, "ListIdentifiers") + out + oai_error_footer("ListIdentifiers")
+ return out
+
+ i = 0
+ for s in sysnos:
+ if s:
+ i = i + 1
+ if i > nb_identifiers_in_resume: # cache or write?
+ if i == nb_identifiers_in_resume + 1: # resumptionToken?
+ arg['resumptionToken'] = OAIGenResumptionToken()
+ extdate = OAIGetResponseDate(oai_rt_expire)
+ if extdate:
+ out = "%s %s\n" % (out, extdate,arg['resumptionToken'])
+ else:
+ out = "%s %s\n" % (out, arg['resumptionToken'])
+ sysno.append(s)
+ else:
+ done = 0
+ for f in get_field(s, "245__a"):
+ if done == 0:
+ for id in get_field(s,oaiidfield):
+ out = out + " \n"
+ out = "%s %s\n" % (out, encode_for_url(id))
+ out = "%s %s\n" % (out, get_modification_date(OAIGetSysno(id)))
+ for set in get_field(s,oaisetfield):
+ out = "%s %s\n" % (out, arg['set'])
+ out = out + " \n"
+ done = 1
+
+ if i > nb_identifiers_in_resume:
+ OAICacheClean() # clean cache from expired resumptionTokens
+ OAICacheIn(arg['resumptionToken'],sysno)
+
+ out = oai_header(args,"ListIdentifiers") + out + oai_footer("ListIdentifiers")
+
+ return out
+
+
+def OAIIdentify(args):
+ "Generates response to OAIIdentify verb."
+
+ out = ""
+
+ repositoryName = " " + cdsname + "\n"
+ baseURL = " %s/oai2d.py/\n" % weburl
+ protocolVersion = " 2.0\n"
+ adminEmail = " mailto:%s\n" % supportemail
+ earliestDST = " %s\n" % get_earliest_datestamp()
+ repositoryIdentifier = "%s" % oaiidprefix
+ sampleIdentifier = oaisampleidentifier
+ identifyDescription = oaiidentifydescription + "\n"
+
+ out = out + repositoryName
+ out = out + baseURL
+ out = out + protocolVersion
+ out = out + adminEmail
+ out = out + earliestDST
+ out = out + " no\n"
+ out = out + " YYYY-MM-DDThh:mm:ssZ\n"
+ # print " \n"
+ out = out + oaiidentifydescription
+
+ out = oai_header(args,"Identify") + out + oai_footer("Identify")
+
+ return out
+
+
+def OAIGetRequestURL(args):
+ "Generates requestURL tag for OAI."
+
+ re_amp = re.compile('&')
+
+ requestURL = weburl + "/" + "oai2d.py/"# + "?" + re_amp.sub("&", args)
+
+ return requestURL
+
+def OAIGetResponseDate(delay=0):
+ "Generates responseDate tag for OAI."
+
+ return time.strftime("%Y-%m-%dT%H:%M:%SZ", time.gmtime(time.time() + delay))
+
+
+def oai_error(code, msg):
+ "OAI error occured"
+
+ return "%s\n" % (code, msg)
+
+
+def OAIGetSysno(identifier):
+ "Returns the first MySQL BIB ID for the OAI identifier 'identifier', if it exists."
+ sysno = None
+ if identifier:
+ query = "SELECT DISTINCT(bb.id_bibrec) FROM bib90x AS bx, bibrec_bib90x AS bb WHERE bx.tag='%s' AND bb.id_bibxxx=bx.id AND bx.value='%s'" % (oaiidfield,identifier)
+ res = run_sql(query)
+ for row in res:
+ sysno = row[0]
+ return sysno
+
+
+def OAIGetSysnoList(set, fromDate, untilDate):
+ "Returns list of system numbers for the OAI set 'set', modified from 'date_from' until 'date_until'."
+
+ out_dict = {} # dict to hold list of out sysnos as its keys
+
+ if set:
+ query = "SELECT DISTINCT bibx.id_bibrec FROM bib90x AS bx LEFT JOIN bibrec_bib90x AS bibx ON bx.id=bibx.id_bibxxx LEFT JOIN bibrec AS b ON b.id=bibx.id_bibrec WHERE bx.tag='%s' AND bx.value='%s'" % (oaisetfield,set)
+ else:
+ query = "SELECT DISTINCT bibx.id_bibrec FROM bib90x AS bx LEFT JOIN bibrec_bib90x AS bibx ON bx.id=bibx.id_bibxxx LEFT JOIN bibrec AS b ON b.id=bibx.id_bibrec WHERE bx.tag='%s'" % (oaiidfield)
+
+ if untilDate:
+ query = query + " AND b.modification_date <= '%s'" % untilDate
+ if fromDate:
+ query = query + " AND b.modification_date >= '%s'" % fromDate
+
+ res = run_sql(query)
+
+ for row in res:
+ out_dict[row[0]] = 1
+
+ return out_dict.keys()
+
+
+def OAIGenResumptionToken():
+ "Generates unique ID for resumption token management."
+
+ return md5.new(str(time.time())).hexdigest()
+
+
+def OAICacheIn(resumptionToken, sysnos):
+ "Stores or adds sysnos in cache. Input is a string of sysnos separated by commas."
+
+ filename = "%s/RTdata/%s" % (logdir, resumptionToken)
+
+ fil = open(filename,"w")
+ cPickle.dump(sysnos,fil)
+ fil.close()
+ return 1
+
+
+def OAICacheOut(resumptionToken):
+ "Restores string of comma-separated system numbers from cache."
+
+ sysnos = []
+
+ filename = "%s/RTdata/%s" % (logdir, resumptionToken)
+
+ if OAICacheStatus(resumptionToken):
+ fil = open(filename,"r")
+ sysnos = cPickle.load(fil)
+ fil.close()
+ else:
+ return 0
+ return sysnos
+
+
+def OAICacheClean():
+ "Removes cached resumptionTokens older than specified"
+
+
+ oai_rt_expire =
+
+
+ directory = "%s/RTdata" % logdir
+
+ files = os.listdir(directory)
+
+ for f in files:
+ filename = directory + "/" + f
+ # cache entry expires when not modified during a specified period of time
+ if ((time.time() - os.path.getmtime(filename)) > oai_rt_expire):
+ os.remove(filename)
+
+ return 1
+
+
+def OAICacheStatus(resumptionToken):
+ "Checks cache status. Returns 0 for empty, 1 for full."
+
+ filename = "%s/RTdata/%s" % (logdir, resumptionToken)
+
+ if os.path.exists(filename):
+ if os.path.getsize(filename) > 0:
+ return 1
+ else:
+ return 0
+ else:
+ return 0
+
+
+def get_sets():
+ "Returns list of sets."
+
+ out = []
+ row = ['','']
+
+ query = "SELECT setSpec,setName,setDescription FROM oaiset"
+ res = run_sql (query)
+ for row in res:
+ row_bis = [row[0],row[1],row[2]]
+ out.append(row_bis)
+
+ return out
+
+
+def parse_args(args=""):
+ "Parse input args"
+
+ out_args = {
+ "verb" : "",
+ "metadataPrefix" : "",
+ "from" : "",
+ "until" : "",
+ "set" : "",
+ "identifier" : "",
+ "resumptionToken" : ""
+ }
+
+ if args == "" or args == None:
+ pass
+ else:
+
+ list_of_arguments = args.split('&')
+
+ for item in list_of_arguments:
+ keyvalue = item.split('=')
+ if len(keyvalue) == 2:
+ out_args[keyvalue[0]] = keyvalue[1]
+ else:
+ out_args['verb'] = ""
+
+ return out_args
+
+def check_args(arguments):
+ "Check OAI arguments"
+
+ out = ""
+
+## principal argument required
+#
+#
+ if verbs.has_key(arguments['verb']):
+ pass
+ else:
+ out = out + oai_error("badArgument","Malformed request")
+
+## resumptionToken exclusive
+#
+#
+ if ((arguments['from']!="" or arguments['until']!="" or arguments['metadataPrefix']!="" or arguments['identifier']!="" or arguments['set']!="") and arguments['resumptionToken']!=""):
+
+ out = out + oai_error("badArgument","The request includes illegal arguments")
+
+## datestamp formats
+#
+#
+ if arguments['from']!="" and arguments['from']!="":
+ from_length = len(arguments['from'])
+ if check_date(arguments['from'],"T00:00:00Z") == "":
+ out = out + oai_error("badArgument","Bad datestamp format in from")
+ else:
+ from_length = 0
+
+ if arguments['until']!="" and arguments['until']!="":
+ until_length = len(arguments['until'])
+ if check_date(arguments['until'],"T23:59:59Z") == "":
+ out = out + oai_error("badArgument","Bad datestamp format in until")
+ else:
+ until_length = 0
+
+ if from_length <> 0:
+ if until_length <> 0:
+ if from_length <> until_length:
+ out = out + oai_error("badArgument","Bad datestamp format")
+
+ if arguments['from'] > arguments['until']:
+ out = out + oai_error("badArgument", "Wrong date")
+
+
+## Identify exclusive
+#
+#
+ if (arguments['verb']=="Identify" and (arguments['metadataPrefix']!="" or arguments['identifier']!="" or arguments['set']!="" or arguments['from']!="" or arguments['until']!="" or arguments['resumptionToken']!="")):
+ out = out + oai_error("badArgument","The request includes illegal arguments")
+
+## parameters for GetRecord
+#
+#
+ if arguments['verb']=="GetRecord" and arguments['identifier'] == "":
+ out = out + oai_error("badArgument","Record identifier missing")
+
+ if arguments['verb']=="GetRecord" and arguments['metadataPrefix'] == "":
+ out = out + oai_error("badArgument","Missing metadataPrefix")
+
+
+## parameters for ListRecords and ListIdentifiers
+#
+#
+ if (arguments['verb']=="ListRecords" or arguments['verb']=="ListIdentifiers") and (arguments['metadataPrefix'] == "" and arguments['resumptionToken'] == ""):
+ out = out + oai_error("badArgument","Missing metadataPrefix")
+
+ return out
+
+
\ No newline at end of file
diff --git a/modules/bibharvest/web/.cvsignore b/modules/bibharvest/web/.cvsignore
new file mode 100644
index 000000000..4e66d5c3e
--- /dev/null
+++ b/modules/bibharvest/web/.cvsignore
@@ -0,0 +1,7 @@
+Makefile
+Makefile.in
+z_*
+*.O
+*~
+*.py
+*.shtml
\ No newline at end of file
diff --git a/modules/bibharvest/Makefile.am b/modules/bibharvest/web/Makefile.am
similarity index 71%
copy from modules/bibharvest/Makefile.am
copy to modules/bibharvest/web/Makefile.am
index 47ad17ba4..d015c43e5 100644
--- a/modules/bibharvest/Makefile.am
+++ b/modules/bibharvest/web/Makefile.am
@@ -1,22 +1,33 @@
## $Id$
## This file is part of the CERN Document Server Software (CDSware).
## Copyright (C) 2002 CERN.
##
## The CDSware is free software; you can redistribute it and/or
## modify it under the terms of the GNU General Public License as
## published by the Free Software Foundation; either version 2 of the
## License, or (at your option) any later version.
##
## The CDSware is distributed in the hope that it will be useful, but
## WITHOUT ANY WARRANTY; without even the implied warranty of
## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
## General Public License for more details.
##
## You should have received a copy of the GNU General Public License
## along with CDSware; if not, write to the Free Software Foundation, Inc.,
## 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA.
-SUBDIRS = bin
+webappdir = $(WEBDIR)
-CLEANFILES = *~
\ No newline at end of file
+webapp_DATA=oai2d.py
+
+EXTRA_DIST = $(wildcard *.wml)
+
+CLEANFILES = $(bin_SCRIPTS) $(webapp_DATA) *~ *.tmp search.pyc
+
+%.py: %.py.wml ../../../config/config.wml ../../../config/configbis.wml
+ $(WML) -o $@ $<
+
+%: %.wml ../../../config/config.wml ../../../config/configbis.wml
+ $(WML) -o $@ $<
+ chmod u+x $@
diff --git a/modules/bibharvest/web/oai2d.py b/modules/bibharvest/web/oai2d.py
new file mode 100644
index 000000000..1213249b4
--- /dev/null
+++ b/modules/bibharvest/web/oai2d.py
@@ -0,0 +1,138 @@
+## $Id$
+## OAI interface for CDSware/MySQL written in Python compliant with OAI-PMH2.0
+
+## This file is part of the CERN Document Server Software (CDSware).
+## Copyright (C) 2002 CERN.
+##
+## The CDSware is free software; you can redistribute it and/or
+## modify it under the terms of the GNU General Public License as
+## published by the Free Software Foundation; either version 2 of the
+## License, or (at your option) any later version.
+##
+## The CDSware is distributed in the hope that it will be useful, but
+## WITHOUT ANY WARRANTY; without even the implied warranty of
+## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+## General Public License for more details.
+##
+## You should have received a copy of the GNU General Public License
+## along with CDSware; if not, write to the Free Software Foundation, Inc.,
+## 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA.
+
+## read config variables:
+#include "config.wml"
+#include "configbis.wml"
+
+#!
+## $Id$
+## DO NOT EDIT THIS FILE! IT WAS AUTOMATICALLY GENERATED FROM CDSware WML SOURCES.
+"""OAI interface for CDSware/MySQL written in Python compliant with OAI-PMH2.0"""
+
+__lastupdated__ = """<: print `date +"%d %b %Y %H:%M:%S %Z"`; :>"""
+__version__ = "$Id$"
+
+
+## fill config variables:
+pylibdir = "/python"
+
+## OAI config variables
+oaiidprefix = ""
+oaisampleidentifier = ""
+oaiidentifydescription = """"""
+oaiidfield = "909COo"
+oaisetfield = "909COp"
+
+try:
+ import sys
+ import urllib
+ sys.path.append('%s' % pylibdir)
+ from cdsware.config import *
+ from cdsware.dbquery import run_sql
+ from cdsware import oai_repository
+except ImportError, e:
+ import sys
+ sys.stderr.write("Error: %s" % e)
+ sys.exit(1)
+
+
+def index (req):
+ "OAI repository interface"
+
+
+## parse input parameters
+
+ args = ""
+
+ if req.method == "GET":
+ args = req.args
+
+ elif req.method == "POST":
+ params = {}
+ for key in req.form.keys():
+ params[key] = req.form[key]
+ args = urllib.urlencode(params)
+
+ arg = oai_repository.parse_args(args)
+
+
+## check request for OAI compliancy
+
+ oai_error = oai_repository.check_args(arg)
+
+
+## create OAI response
+
+ req.content_type = "text/xml"
+ req.send_http_header()
+
+ if oai_error == "":
+
+## OAI Identify
+
+ if arg['verb'] == "Identify":
+ req.write(oai_repository.OAIIdentify(args))
+
+
+## OAI ListSets
+
+ elif arg['verb'] == "ListSets":
+ req.write(oai_repository.OAIListSets(args))
+
+
+## OAI ListIdentifiers
+
+ elif arg['verb'] == "ListIdentifiers":
+ req.write(oai_repository.OAIListIdentifiers(args))
+
+
+## OAI ListRecords
+
+ elif arg['verb'] == "ListRecords":
+ req.write(oai_repository.OAIListRecords(args))
+
+
+## OAI GetRecord
+
+ elif arg['verb'] == "GetRecord":
+ req.write(oai_repository.OAIGetRecord(args))
+
+
+## OAI ListMetadataFormats
+
+ elif arg['verb'] == "ListMetadataFormats":
+ req.write(oai_repository.OAIListMetadataFormats(args))
+
+
+## Unknown verb
+
+ else:
+ req.write(oai_repository.oai_error("badVerb","Illegal OAI verb"))
+
+
+## OAI error
+
+ else:
+ req.write(oai_repository.oai_header(args,""))
+ req.write(oai_error)
+ req.write(oai_repository.oai_footer(""))
+
+ return "\n"
diff --git a/modules/bibharvest/web/oai2d.py.wml b/modules/bibharvest/web/oai2d.py.wml
new file mode 100644
index 000000000..1213249b4
--- /dev/null
+++ b/modules/bibharvest/web/oai2d.py.wml
@@ -0,0 +1,138 @@
+## $Id$
+## OAI interface for CDSware/MySQL written in Python compliant with OAI-PMH2.0
+
+## This file is part of the CERN Document Server Software (CDSware).
+## Copyright (C) 2002 CERN.
+##
+## The CDSware is free software; you can redistribute it and/or
+## modify it under the terms of the GNU General Public License as
+## published by the Free Software Foundation; either version 2 of the
+## License, or (at your option) any later version.
+##
+## The CDSware is distributed in the hope that it will be useful, but
+## WITHOUT ANY WARRANTY; without even the implied warranty of
+## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+## General Public License for more details.
+##
+## You should have received a copy of the GNU General Public License
+## along with CDSware; if not, write to the Free Software Foundation, Inc.,
+## 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA.
+
+## read config variables:
+#include "config.wml"
+#include "configbis.wml"
+
+#!
+## $Id$
+## DO NOT EDIT THIS FILE! IT WAS AUTOMATICALLY GENERATED FROM CDSware WML SOURCES.
+"""OAI interface for CDSware/MySQL written in Python compliant with OAI-PMH2.0"""
+
+__lastupdated__ = """<: print `date +"%d %b %Y %H:%M:%S %Z"`; :>"""
+__version__ = "$Id$"
+
+
+## fill config variables:
+pylibdir = "/python"
+
+## OAI config variables
+oaiidprefix = ""
+oaisampleidentifier = ""
+oaiidentifydescription = """"""
+oaiidfield = "909COo"
+oaisetfield = "909COp"
+
+try:
+ import sys
+ import urllib
+ sys.path.append('%s' % pylibdir)
+ from cdsware.config import *
+ from cdsware.dbquery import run_sql
+ from cdsware import oai_repository
+except ImportError, e:
+ import sys
+ sys.stderr.write("Error: %s" % e)
+ sys.exit(1)
+
+
+def index (req):
+ "OAI repository interface"
+
+
+## parse input parameters
+
+ args = ""
+
+ if req.method == "GET":
+ args = req.args
+
+ elif req.method == "POST":
+ params = {}
+ for key in req.form.keys():
+ params[key] = req.form[key]
+ args = urllib.urlencode(params)
+
+ arg = oai_repository.parse_args(args)
+
+
+## check request for OAI compliancy
+
+ oai_error = oai_repository.check_args(arg)
+
+
+## create OAI response
+
+ req.content_type = "text/xml"
+ req.send_http_header()
+
+ if oai_error == "":
+
+## OAI Identify
+
+ if arg['verb'] == "Identify":
+ req.write(oai_repository.OAIIdentify(args))
+
+
+## OAI ListSets
+
+ elif arg['verb'] == "ListSets":
+ req.write(oai_repository.OAIListSets(args))
+
+
+## OAI ListIdentifiers
+
+ elif arg['verb'] == "ListIdentifiers":
+ req.write(oai_repository.OAIListIdentifiers(args))
+
+
+## OAI ListRecords
+
+ elif arg['verb'] == "ListRecords":
+ req.write(oai_repository.OAIListRecords(args))
+
+
+## OAI GetRecord
+
+ elif arg['verb'] == "GetRecord":
+ req.write(oai_repository.OAIGetRecord(args))
+
+
+## OAI ListMetadataFormats
+
+ elif arg['verb'] == "ListMetadataFormats":
+ req.write(oai_repository.OAIListMetadataFormats(args))
+
+
+## Unknown verb
+
+ else:
+ req.write(oai_repository.oai_error("badVerb","Illegal OAI verb"))
+
+
+## OAI error
+
+ else:
+ req.write(oai_repository.oai_header(args,""))
+ req.write(oai_error)
+ req.write(oai_repository.oai_footer(""))
+
+ return "\n"