Page MenuHomec4science

oaiarchive.in
No OneTemporary

File Metadata

Created
Sat, Jul 6, 16:02

oaiarchive.in

#!@PYTHON@
## -*- mode: python; coding: utf-8; -*-
##
## $Id$
##
## This file is part of CDS Invenio.
## Copyright (C) 2002, 2003, 2004, 2005, 2006, 2007 CERN.
##
## CDS Invenio is free software; you can redistribute it and/or
## modify it under the terms of the GNU General Public License as
## published by the Free Software Foundation; either version 2 of the
## License, or (at your option) any later version.
##
## CDS Invenio is distributed in the hope that it will be useful, but
## WITHOUT ANY WARRANTY; without even the implied warranty of
## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
## General Public License for more details.
##
## You should have received a copy of the GNU General Public License
## along with CDS Invenio; if not, write to the Free Software Foundation, Inc.,
## 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA.
"""
OAI repository archive and management tool
Usage: oaiarchive [options]
Options:
-o --oaiset= Specify setSpec
-h --help Print this help
-V --version Print version information and exit
-a --add Add records to OAI repository
-d --delete Remove records from OAI repository
-r --report Print OAI repository status
-i --info Give info about OAI set (default)
-p --upload Upload records
-n --no-process Do not upload records (default)
Examples:
Expose set -setname- via OAI gateway
oaiarchive --oaiset='setname' --add --upload
oaiarchive -apo 'setname'
Remove records defined by set -setname- from OAI repository
oaiarchive --oaiset='setname' --delete --upload
oaiarchive -dpo 'setname'
Expose entire repository via OAI gateway
oaiarchive --oaiset=global --add --upload
oaiarchive -apo global
Print OAI repository status
oaiarchive -r
"""
__revision__ = "$Id$"
try:
import sys
from invenio.oaiarchive_engine import oaiarchive_task
from invenio.oaiarchive_engine import printInfo
from invenio.dbquery import run_sql, escape_string
from invenio.config import *
from invenio.search_engine import perform_request_search
from invenio.search_engine import print_record
from invenio.access_control_engine import acc_authorize_action
import getopt
import getpass
import string
import marshal
import signal
import time
import re
except ImportError, e:
print "Error: %s" % e
sys.exit(1)
options = {} # global variable to hold task options
sleeptime = "" # default sleeptime
sched_time = time.strftime("%Y-%m-%d %H:%M:%S") # scheduled execution time in the date/time format
### Bibshed compatibility procedures
###
def get_date(var, format_string = "%Y-%m-%d %H:%M:%S"):
"""Returns a date string according to the format string.
It can handle normal date strings and shifts with respect
to now."""
date = time.time()
shift_re=re.compile("([-\+]{0,1})([\d]+)([dhms])")
factors = {"d":24*3600, "h":3600, "m":60, "s":1}
m = shift_re.match(var)
if m:
sign = m.groups()[0] == "-" and -1 or 1
factor = factors[m.groups()[2]]
value = float(m.groups()[1])
date = time.localtime(date + sign * factor * value)
date = time.strftime(format_string, date)
else:
date = time.strptime(var, format_string)
date = time.strftime(format_string, date)
return date
def write_message(msg, stream=sys.stdout):
"""Prints message and flush output stream (may be sys.stdout or sys.stderr). Useful for debugging stuff."""
if stream == sys.stdout or stream == sys.stderr:
stream.write(time.strftime("%Y-%m-%d %H:%M:%S --> ", time.localtime()))
try:
stream.write("%s\n" % msg)
except UnicodeEncodeError:
stream.write("%s\n" % msg.encode('ascii', 'backslashreplace'))
stream.flush()
else:
sys.stderr.write("Unknown stream %s. [must be sys.stdout or sys.stderr]\n" % stream)
def task_sig_sleep(sig, frame):
"""Signal handler for the 'sleep' signal sent by BibSched."""
if options["verbose"] >= 9:
write_message("task_sig_sleep(), got signal %s frame %s" % (sig, frame))
write_message("sleeping...")
task_update_status("SLEEPING")
signal.pause() # wait for wake-up signal
def task_sig_wakeup(sig, frame):
"""Signal handler for the 'wakeup' signal sent by BibSched."""
if options["verbose"] >= 9:
write_message("task_sig_wakeup(), got signal %s frame %s" % (sig, frame))
write_message("continuing...")
task_update_status("CONTINUING")
def task_sig_stop(sig, frame):
"""Signal handler for the 'stop' signal sent by BibSched."""
if options["verbose"] >= 9:
write_message("task_sig_stop(), got signal %s frame %s" % (sig, frame))
write_message("stopping...")
task_update_status("STOPPING")
write_message("flushing cache or whatever...")
time.sleep(3)
write_message("closing tables or whatever...")
time.sleep(1)
write_message("stopped")
task_update_status("STOPPED")
sys.exit(0)
def task_sig_suicide(sig, frame):
"""Signal handler for the 'suicide' signal sent by BibSched."""
if options["verbose"] >= 9:
write_message("task_sig_suicide(), got signal %s frame %s" % (sig, frame))
write_message("suiciding myself now...")
task_update_status("SUICIDING")
write_message("suicided")
task_update_status("SUICIDED")
sys.exit(0)
def task_sig_unknown(sig, frame):
"""Signal handler for the other unknown signals sent by shell or user."""
# do nothing for unknown signals:
write_message("unknown signal %d (frame %s) ignored" % (sig, frame))
def authenticate(user, header="OAI Archive Task Submission", action="runoaiarchive"):
"""Authenticate the user against the user database.
Check for its password, if it exists.
Check for action access rights.
Return user name upon authorization success,
do system exit upon authorization failure.
"""
print header
print "=" * len(header)
if user == "":
print >> sys.stdout, "\rUsername: ",
user = string.strip(string.lower(sys.stdin.readline()))
else:
print >> sys.stdout, "\rUsername:", user
## first check user pw:
res = run_sql("select id,password from user where email=%s", (user,), 1) + \
run_sql("select id,password from user where nickname=%s", (user,), 1)
if not res:
print "Sorry, %s does not exist." % user
sys.exit(1)
else:
(uid_db, password_db) = res[0]
if password_db:
password_entered = getpass.getpass()
if password_db == password_entered:
pass
else:
print "Sorry, wrong credentials for %s." % user
sys.exit(1)
## secondly check authorization for the action:
(auth_code, auth_message) = acc_authorize_action(uid_db, action)
if auth_code != 0:
print auth_message
sys.exit(1)
return user
def task_submit():
"""Submits task to the BibSched task queue. This is what people will be invoking via command line."""
global options, sched_time, sleep_time
## sanity check: remove eventual "task" option:
if options.has_key("task"):
del options["task"]
## authenticate user:
user = authenticate(options.get("user", ""))
## submit task:
task_id = run_sql("""INSERT INTO schTASK (id,proc,user,status,arguments,sleeptime,runtime) VALUES (NULL,'oaiarchive',%s,'WAITING',%s,%s,%s)""",
(user, marshal.dumps(options),sleeptime,escape_string(sched_time)))
## update task number:
options["task"] = task_id
run_sql("""UPDATE schTASK SET arguments=%s WHERE id=%s""", (marshal.dumps(options),task_id))
write_message("Task #%d submitted." % task_id)
return task_id
def task_update_progress(msg):
"""Updates progress information in the BibSched task table."""
global options
return run_sql("UPDATE schTASK SET progress=%s where id=%s", (msg, options["task"]))
def task_update_status(val):
"""Updates status information in the BibSched task table."""
global options
return run_sql("UPDATE schTASK SET status=%s where id=%s", (val, options["task"]))
def task_read_status(task_id):
"""Read status information in the BibSched task table."""
res = run_sql("SELECT status FROM schTASK where id=%s", (task_id,), 1)
try:
out = res[0][0]
except:
out = 'UNKNOWN'
return out
def task_get_options(id):
"""Returns options for the task 'id' read from the BibSched task queue table."""
out = {}
res = run_sql("SELECT arguments FROM schTASK WHERE id=%s AND proc='oaiarchive'", (id,))
try:
out = marshal.loads(res[0][0])
except:
write_message("Error: OAIarchive task %d does not seem to exist." % id)
sys.exit(1)
return out
def task_run(task_id):
"""Runs the task"""
global options
options = task_get_options(task_id) # get options from BibSched task table
## check task id:
if not options.has_key("task"):
write_message("Error: The task #%d does not seem to be a OAI archive task." % task_id)
return
## initialize parameters
if options.has_key("option"):
### sql commands to be executed during the script run
###
## check task status:
task_status = task_read_status(task_id)
if task_status != "WAITING":
write_message("Error: The task #%d is %s. I expected WAITING." % (task_id, task_status))
return
## update task status:
task_update_status("RUNNING")
## initialize signal handler:
signal.signal(signal.SIGUSR1, task_sig_sleep)
signal.signal(signal.SIGTERM, task_sig_stop)
signal.signal(signal.SIGABRT, task_sig_suicide)
signal.signal(signal.SIGCONT, task_sig_wakeup)
signal.signal(signal.SIGINT, task_sig_unknown)
## run the task:
oaiarchive_task(options)
## we are done:
task_update_status("DONE")
return
#########################
def main():
"""Main function that analyzes command line input and calls whatever is appropriate.
Useful for learning on how to write BibSched tasks."""
global options, sched_time, sleeptime
## parse command line:
if len(sys.argv) == 2 and sys.argv[1].isdigit():
## A - run the task
task_id = int(sys.argv[1])
task_run(task_id)
else:
## B - submit the task
options = {} # will hold command-line options
options["verbose"] = 1
try:
opts, args = getopt.getopt(sys.argv[1:], "hVv:u:s:t:ado:pirn", ["help", "version", "verbose=","user=","sleeptime=","time=","add","delete","oaiset=","upload","info","report","no-process"])
except getopt.GetoptError:
printInfo()
sys.exit(1)
## set defaults
options["upload"] = 0
options["mode"] = 0
options["oaiset"] = ""
options["nice"] = 0
try:
for opt in opts:
if opt[0] in ["-h", "--help"]:
printInfo()
sys.exit(0)
elif opt[0] in ["-V", "--version"]:
print __revision__
sys.exit(0)
elif opt[0] in [ "-u", "--user"]:
options["user"] = opt[1]
elif opt[0] in ["-v", "--verbose"]:
options["verbose"] = int(opt[1])
elif opt[0] in ["-s", "--sleeptime" ]:
get_date(opt[1]) # see if it is a valid shift
sleeptime = opt[1]
elif opt[0] in [ "-t", "--time" ]:
sched_time = get_date(opt[1])
elif opt[0] in ["-n", "--nice"]:
options["nice"] = opt[1]
elif opt[0] in ["-o", "--oaiset"]:
options["oaiset"] = opt[1]
elif opt[0] in ["-a", "--add"]:
options["mode"] = 1
elif opt[0] in ["-d", "--delete"]:
options["mode"] = 2
elif opt[0] in ["-p", "--upload"]:
options["upload"] = 1
elif opt[0] in ["-i", "--info"]:
options["mode"] = 0
elif opt[0] in ["-r", "--report"]:
options["mode"] = 3
elif opt[0] in ["-n", "--no-process"]:
options["upload"] = 0
except StandardError:
printInfo()
sys.exit(1)
task_submit()
return
### okay, here we go:
if __name__ == '__main__':
main()

Event Timeline