diff --git a/modules/bibrank/bin/bibrank.in b/modules/bibrank/bin/bibrank.in index a27d2def5..11e54179e 100644 --- a/modules/bibrank/bin/bibrank.in +++ b/modules/bibrank/bin/bibrank.in @@ -1,467 +1,467 @@ ##Ranking of records using different parameters and methods. ## This file is part of the CERN Document Server Software (CDSware). ## Copyright (C) 2002 CERN. ## ## The CDSware is free software; you can redistribute it and/or ## modify it under the terms of the GNU General Public License as ## published by the Free Software Foundation; either version 2 of the ## License, or (at your option) any later version. ## ## The CDSware is distributed in the hope that it will be useful, but ## WITHOUT ANY WARRANTY; without even the implied warranty of ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ## General Public License for more details. ## ## You should have received a copy of the GNU General Public License ## along with CDSware; if not, write to the Free Software Foundation, Inc., ## 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA. ## read config variables: #include "config.wml" #include "configbis.wml" #include "cdswmllib.wml" ## start Python: #! # -*- coding: utf-8 -*- ## $Id$ ## DO NOT EDIT THIS FILE! IT WAS AUTOMATICALLY GENERATED FROM CDSware WML SOURCES. """ BibRank ranking daemon. Usage: %s [options] Ranking examples: %s -wjif -a --id=0-30000,30001-860000 --verbose=9 %s -wjif -d --modified='2002-10-27 13:57:26' %s -wjif --rebalance --collection=Articles %s -wsbr -a -i 234-250,293,300-500 -u admin@cdsware Ranking options: -w, --run=r1[,r2] runs each rank method in the order given -c, --collection=c1[,c2] select according to collection -i, --id=low[-high] select according to doc recID -m, --modified=from[,to] select according to modification date -l, --lastupdate select according to last update -a, --add add or update words for selected records -d, --del delete words for selected records -S, --stat show statistics for a method -R, --rebalance rebalancing rank data: does complete update. if not used: quick update Repairing options: -k, --check check consistency for all records in the table(s) check if update of ranking data is necessary -r, --repair try to repair all records in the table(s) Scheduling options: -u, --user=USER user name to store task, password needed -s, --sleeptime=SLEEP time after which to repeat tasks (no) e.g.: 1s, 30m, 24h, 7d -t, --time=TIME moment for the task to be active (now) e.g.: +15s, 5m, 3h , 2002-10-27 13:57:26 General options: -h, --help print this help and exit -V, --version print version and exit -v, --verbose=LEVEL verbose level (from 0 to 9, default 1) """ __version__ = "<: print generate_pretty_version_string('$Id$'); :>" ## fill config variables: pylibdir = "/python" try: from marshal import loads,dumps from zlib import compress,decompress from string import split,translate,lower,upper import getopt import getpass import string import os import sre import sys import time import MySQLdb import urllib import signal import tempfile import traceback import cStringIO import re import copy import types import ConfigParser import cdsware.search_engine except ImportError, e: import sys try: sys.path.append('%s' % pylibdir) from cdsware.dbquery import run_sql from cdsware.bibrank_tag_based_indexer import * from cdsware.bibrank_word_indexer import * from cdsware.access_control_engine import acc_authorize_action from cdsware.search_engine import perform_request_search except ImportError, e: import sys task_id = -1 # the task id nb_char_in_line = 50 # for verbose pretty printing chunksize = 1000 # default size of chunks that the records will be treated by base_process_size = 4500 # process base size options = {} # will hold task options def serialize_via_numeric_array_dumps(arr): return Numeric.dumps(arr) def serialize_via_numeric_array_compr(str): return compress(str) def serialize_via_numeric_array_escape(str): return MySQLdb.escape_string(str) def serialize_via_numeric_array(arr): """Serialize Numeric array into a compressed string.""" return serialize_via_numeric_array_escape(serialize_via_numeric_array_compr(serialize_via_numeric_array_dumps(arr))) def deserialize_via_numeric_array(string): """Decompress and deserialize string into a Numeric array.""" return Numeric.loads(decompress(string)) def serialize_via_marshal(obj): """Serialize Python object via marshal into a compressed string.""" return MySQLdb.escape_string(compress(dumps(obj))) def deserialize_via_marshal(string): """Decompress and deserialize string into a Python object via marshal.""" return loads(decompress(string)) def authenticate(user, header="BibRank Task Submission", action="runbibrank"): print header print "=" * len(header) if user == "": print>> sys.stdout, "\rUsername: ", user = string.strip(string.lower(sys.stdin.readline())) else: print>> sys.stdout, "\rUsername: ", user res = run_sql("select id,password from user where email=%s", (user,), 1) if not res: print "Sorry, %s does not exist." % user sys.exit(1) else: (uid_db, password_db) = res[0] if password_db: password_entered = getpass.getpass() if password_db == password_entered: pass else: print "Sorry, wrong credentials for %s." % user sys.exit(1) if not acc_authorize_action(uid_db, action): print "Sorry, %s has no right to %s." % (user, action) sys.exit(1) return user def usage(code, msg=''): "Prints usage for this module." if msg: sys.stderr.write("Error: %s.\n" % msg) print >> sys.stderr, \ """ Usage: %s [options] Ranking examples: %s -wjif -a --id=0-30000,30001-860000 --verbose=9 %s -wjif -d --modified='2002-10-27 13:57:26' %s -wjif --rebalance --collection=Articles %s -wsbr -a -i 234-250,293,300-500 -u admin@cdsware Ranking options: -w, --run=r1[,r2] runs each rank method in the order given -c, --collection=c1[,c2] select according to collection -i, --id=low[-high] select according to doc recID -m, --modified=from[,to] select according to modification date -l, --lastupdate select according to last update -a, --add add or update words for selected records -d, --del delete words for selected records -S, --stat show statistics for a method -R, --rebalance rebalancing rank data: does complete update. if not used: quick update Repairing options: -k, --check check consistency for all records in the table(s) check if update of ranking data is necessary -r, --repair try to repair all records in the table(s) Scheduling options: -u, --user=USER user name to store task, password needed -s, --sleeptime=SLEEP time after which to repeat tasks (no) e.g.: 1s, 30m, 24h, 7d -t, --time=TIME moment for the task to be active (now) e.g.: +15s, 5m, 3h , 2002-10-27 13:57:26 General options: -h, --help print this help and exit -V, --version print version and exit -v, --verbose=LEVEL verbose level (from 0 to 9, default 1) """ % ((sys.argv[0],) * 5) sys.exit(code) def get_datetime(var, format_string="%Y-%m-%d %H:%M:%S"): """Returns a date string according to the format string. It can handle normal date strings and shifts with respect to now.""" date = time.time() shift_re = sre.compile("([-\+]{0,1})([\d]+)([dhms])") factors = {"d":24*3600, "h":3600, "m":60, "s":1} m = shift_re.match(var) if m: sign = m.groups()[0] == "-" and -1 or 1 factor = factors[m.groups()[2]] value = float(m.groups()[1]) date = time.localtime(date + sign * factor * value) date = time.strftime(format_string, date) else: date = time.strptime(var, format_string) date = time.strftime(format_string, date) return date def task_sig_sleep(sig, frame): """Signal handler for the 'sleep' signal sent by BibSched.""" if options["verbose"]>= 9: write_message("got signal %d" % sig) write_message("sleeping...") task_update_status("SLEEPING") signal.pause() # wait for wake-up signal def task_sig_wakeup(sig, frame): """Signal handler for the 'wakeup' signal sent by BibSched.""" if options["verbose"]>= 9: write_message("got signal %d" % sig) write_message("continuing...") task_update_status("CONTINUING") def task_sig_stop(sig, frame): """Signal handler for the 'stop' signal sent by BibSched.""" if options["verbose"]>= 9: write_message("got signal %d" % sig) write_message("stopping...") task_update_status("STOPPING") errcode = 0 try: task_sig_stop_commands() write_message("stopped") task_update_status("STOPPED") except StandardError, err: write_message("Error during stopping! %e" % err) task_update_status("STOPPINGFAILED") errcode = 1 sys.exit(errcode) def task_sig_stop_commands(): """Do all the commands necessary to stop the task before quitting. Useful for task_sig_stop() handler. """ write_message("stopping commands started") write_message("stopping commands ended") def task_sig_suicide(sig, frame): """Signal handler for the 'suicide' signal sent by BibSched.""" if options["verbose"]>= 9: write_message("got signal %d" % sig) write_message("suiciding myself now...") task_update_status("SUICIDING") write_message("suicided") task_update_status("SUICIDED") sys.exit(0) def task_sig_unknown(sig, frame): """Signal handler for the other unknown signals sent by shell or user.""" if options["verbose"]>= 9: write_message("got signal %d" % sig) write_message("unknown signal %d ignored" % sig) # do nothing for other signals def task_update_progress(msg): """Updates progress information in the BibSched task table.""" query = "UPDATE schTASK SET progress='%s' where id=%d" % (MySQLdb.escape_string(msg), task_id) if options["verbose"]>= 9: write_message(query) run_sql(query) return def task_update_status(val): """Updates state information in the BibSched task table.""" query = "UPDATE schTASK SET status='%s' where id=%d" % (MySQLdb.escape_string(val), task_id) if options["verbose"]>= 9: write_message(query) run_sql(query) return def split_ranges(parse_string): recIDs = [] ranges = string.split(parse_string, ",") for range in ranges: tmp_recIDs = string.split(range, "-") if len(tmp_recIDs)==1: recIDs.append([int(tmp_recIDs[0]), int(tmp_recIDs[0])]) else: if int(tmp_recIDs[0]) > int(tmp_recIDs[1]): # sanity check tmp = tmp_recIDs[0] tmp_recIDs[0] = tmp_recIDs[1] tmp_recIDs[1] = tmp recIDs.append([int(tmp_recIDs[0]), int(tmp_recIDs[1])]) return recIDs def get_date_range(var): "Returns the two dates contained as a low,high tuple" limits = string.split(var, ",") if len(limits)==1: low = get_datetime(limits[0]) return low,None if len(limits)==2: low = get_datetime(limits[0]) high = get_datetime(limits[1]) return low,high def command_line(): """Storing the task together with the parameters given.""" global options long_flags = ["lastupdate","add","del","repair","maxmem", "flush","stat", "rebalance", "id=", "collection=", "check", "modified=", "update", "run=", "user=", "sleeptime=", "time=", "help", "version", "verbose="] short_flags = "ladSi:m:c:kUrRM:f:w:u:s:t:hVv:" format_string = "%Y-%m-%d %H:%M:%S" sleeptime = "" try: opts, args = getopt.getopt(sys.argv[1:], short_flags, long_flags) except getopt.GetoptError, err: write_message(err, sys.stderr) usage(1) if args: usage(1) options = {"quick":"yes","cmd":"","flush":10000,"validset":"", "collection":[], "id":[], "check": "", "stat":"", "modified":"", "last_updated":"","run":"", "verbose":1} sched_time = time.strftime(format_string) user = "" try: for opt in opts: if opt == ("-h","") or opt == ("--help",""): usage(1) elif opt == ("-V","") or opt == ("--version",""): print __version__ sys.exit(1) elif opt[0] in ["--verbose", "-v"]: options["verbose"] = int(opt[1]) elif opt == ("-a","") or opt == ("--add",""): options["cmd"] = "add" if ("-x","") in opts or ("--del","") in opts: usage(1) elif opt[0] in ["--run", "-w"]: options["run"] = [] run = split(opt[1],",") for key in range(0,len(run)): options["run"].append(run[key]) elif opt == ("-r","") or opt == ("--repair",""): options["cmd"] = "repair" elif opt == ("-d","") or opt == ("--del",""): options["cmd"]="del" elif opt[0] in [ "-u", "--user"]: user = opt[1] elif opt[0] in [ "-k", "--check"]: options["cmd"]= "check" elif opt[0] in [ "-S", "--stat"]: options["cmd"] = "stat" elif opt[0] in [ "-i", "--id" ]: options["id"] = options["id"] + split_ranges(opt[1]) elif opt[0] in [ "-c", "--collection" ]: options["collection"] = opt[1] elif opt[0] in [ "-R", "--rebalance"]: options["quick"] = "no" elif opt[0] in [ "-f", "--flush"]: options["flush"]=int(opt[1]) elif opt[0] in [ "-M", "--maxmem"]: options["maxmem"]=int(opt[1]) if options["maxmem"] < base_process_size + 1000: raise StandardError, "Memory usage should be higher than %d kB" % (base_process_size + 1000) elif opt[0] in [ "-m", "--modified" ]: options["modified"] = get_date_range(opt[1]) #2002-10-27 13:57:26 elif opt[0] in [ "-l", "--lastupdate" ]: options["last_updated"] = "last_updated" elif opt[0] in [ "-s", "--sleeptime" ]: get_datetime(opt[1]) # see if it is a valid shift sleeptime=opt[1] elif opt[0] in [ "-t", "--time" ]: sched_time = get_datetime(opt[1]) else: usage(1) except StandardError, e: write_message(e, sys.stderr) sys.exit(1) if len(opts) == 0: options["last_updated"] = "last_updated" options["cmd"] = "add" options["quick"] = "no" options["flush"]= 100000 res = run_sql("SELECT name from rnkMETHOD") options["run"] = [] for (name,) in res: options["run"].append(name) - #user = authenticate(user) + user = authenticate(user) if options["verbose"]>=9: write_message("Storing task options %s" % options) query = """INSERT INTO schTASK (proc,user,runtime,sleeptime,arguments,status) VALUES ('runbibrank','%s',CAST('%s' AS DATE),'%s','%s','WAITING')""" % (MySQLdb.escape_string(user),MySQLdb.escape_string(sched_time), sleeptime, MySQLdb.escape_string(dumps(options))) new_task_id = run_sql(query) print "Task #%d was successfully scheduled for execution." % new_task_id return def task_run(row): """Run the indexing task. The row argument is the BibSched task queue row, containing if, arguments, etc. Return 1 in case of success and 0 in case of failure. """ try: options = marshal.loads(row[6]) for key in options["run"]: file = etcdir + "/bibrank/" + key + ".cfg" if options["verbose"] >= 9: write_message("Getting configuration from file: %s" % file) config = ConfigParser.ConfigParser() try: config.readfp(open(file)) except StandardError, e: write_message("Cannot find configurationfile: %s. The rankmethod may also not be registered using the BibRank Admin Interface." % file, sys.stderr) raise StandardError #Using the function variable to call the function related to the rank method cfg_function = config.get("rank_method", "function") func_object = globals().get(cfg_function) func_object(row, key) except StandardError, e: write_message("\nException caught: %s" % e, sys.stderr) traceback.print_tb(sys.exc_info()[2]) sys.exit(1) return 1 def main(): if len(sys.argv) == 2: try: id = int(sys.argv[1]) except StandardError, err: command_line() sys.exit() res = run_sql("SELECT * FROM schTASK WHERE id='%d'" % (id), None, 1) if not res: write_message("Selected task not found.", sys.stderr) sys.exit(1) try: if not task_run(res[0]): write_message("Error occurred. Exiting.", sys.stderr) except StandardError, e: write_message("Unexpected error occurred: %s." % e, sys.stderr) write_message("Traceback is:") traceback.print_tb(sys.exc_info()[2]) write_message("Exiting.") task_update_status("ERROR") else: command_line() if __name__ == "__main__": main() diff --git a/modules/bibrank/bin/bibrank.wml b/modules/bibrank/bin/bibrank.wml index a27d2def5..11e54179e 100644 --- a/modules/bibrank/bin/bibrank.wml +++ b/modules/bibrank/bin/bibrank.wml @@ -1,467 +1,467 @@ ##Ranking of records using different parameters and methods. ## This file is part of the CERN Document Server Software (CDSware). ## Copyright (C) 2002 CERN. ## ## The CDSware is free software; you can redistribute it and/or ## modify it under the terms of the GNU General Public License as ## published by the Free Software Foundation; either version 2 of the ## License, or (at your option) any later version. ## ## The CDSware is distributed in the hope that it will be useful, but ## WITHOUT ANY WARRANTY; without even the implied warranty of ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ## General Public License for more details. ## ## You should have received a copy of the GNU General Public License ## along with CDSware; if not, write to the Free Software Foundation, Inc., ## 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA. ## read config variables: #include "config.wml" #include "configbis.wml" #include "cdswmllib.wml" ## start Python: #! # -*- coding: utf-8 -*- ## $Id$ ## DO NOT EDIT THIS FILE! IT WAS AUTOMATICALLY GENERATED FROM CDSware WML SOURCES. """ BibRank ranking daemon. Usage: %s [options] Ranking examples: %s -wjif -a --id=0-30000,30001-860000 --verbose=9 %s -wjif -d --modified='2002-10-27 13:57:26' %s -wjif --rebalance --collection=Articles %s -wsbr -a -i 234-250,293,300-500 -u admin@cdsware Ranking options: -w, --run=r1[,r2] runs each rank method in the order given -c, --collection=c1[,c2] select according to collection -i, --id=low[-high] select according to doc recID -m, --modified=from[,to] select according to modification date -l, --lastupdate select according to last update -a, --add add or update words for selected records -d, --del delete words for selected records -S, --stat show statistics for a method -R, --rebalance rebalancing rank data: does complete update. if not used: quick update Repairing options: -k, --check check consistency for all records in the table(s) check if update of ranking data is necessary -r, --repair try to repair all records in the table(s) Scheduling options: -u, --user=USER user name to store task, password needed -s, --sleeptime=SLEEP time after which to repeat tasks (no) e.g.: 1s, 30m, 24h, 7d -t, --time=TIME moment for the task to be active (now) e.g.: +15s, 5m, 3h , 2002-10-27 13:57:26 General options: -h, --help print this help and exit -V, --version print version and exit -v, --verbose=LEVEL verbose level (from 0 to 9, default 1) """ __version__ = "<: print generate_pretty_version_string('$Id$'); :>" ## fill config variables: pylibdir = "/python" try: from marshal import loads,dumps from zlib import compress,decompress from string import split,translate,lower,upper import getopt import getpass import string import os import sre import sys import time import MySQLdb import urllib import signal import tempfile import traceback import cStringIO import re import copy import types import ConfigParser import cdsware.search_engine except ImportError, e: import sys try: sys.path.append('%s' % pylibdir) from cdsware.dbquery import run_sql from cdsware.bibrank_tag_based_indexer import * from cdsware.bibrank_word_indexer import * from cdsware.access_control_engine import acc_authorize_action from cdsware.search_engine import perform_request_search except ImportError, e: import sys task_id = -1 # the task id nb_char_in_line = 50 # for verbose pretty printing chunksize = 1000 # default size of chunks that the records will be treated by base_process_size = 4500 # process base size options = {} # will hold task options def serialize_via_numeric_array_dumps(arr): return Numeric.dumps(arr) def serialize_via_numeric_array_compr(str): return compress(str) def serialize_via_numeric_array_escape(str): return MySQLdb.escape_string(str) def serialize_via_numeric_array(arr): """Serialize Numeric array into a compressed string.""" return serialize_via_numeric_array_escape(serialize_via_numeric_array_compr(serialize_via_numeric_array_dumps(arr))) def deserialize_via_numeric_array(string): """Decompress and deserialize string into a Numeric array.""" return Numeric.loads(decompress(string)) def serialize_via_marshal(obj): """Serialize Python object via marshal into a compressed string.""" return MySQLdb.escape_string(compress(dumps(obj))) def deserialize_via_marshal(string): """Decompress and deserialize string into a Python object via marshal.""" return loads(decompress(string)) def authenticate(user, header="BibRank Task Submission", action="runbibrank"): print header print "=" * len(header) if user == "": print>> sys.stdout, "\rUsername: ", user = string.strip(string.lower(sys.stdin.readline())) else: print>> sys.stdout, "\rUsername: ", user res = run_sql("select id,password from user where email=%s", (user,), 1) if not res: print "Sorry, %s does not exist." % user sys.exit(1) else: (uid_db, password_db) = res[0] if password_db: password_entered = getpass.getpass() if password_db == password_entered: pass else: print "Sorry, wrong credentials for %s." % user sys.exit(1) if not acc_authorize_action(uid_db, action): print "Sorry, %s has no right to %s." % (user, action) sys.exit(1) return user def usage(code, msg=''): "Prints usage for this module." if msg: sys.stderr.write("Error: %s.\n" % msg) print >> sys.stderr, \ """ Usage: %s [options] Ranking examples: %s -wjif -a --id=0-30000,30001-860000 --verbose=9 %s -wjif -d --modified='2002-10-27 13:57:26' %s -wjif --rebalance --collection=Articles %s -wsbr -a -i 234-250,293,300-500 -u admin@cdsware Ranking options: -w, --run=r1[,r2] runs each rank method in the order given -c, --collection=c1[,c2] select according to collection -i, --id=low[-high] select according to doc recID -m, --modified=from[,to] select according to modification date -l, --lastupdate select according to last update -a, --add add or update words for selected records -d, --del delete words for selected records -S, --stat show statistics for a method -R, --rebalance rebalancing rank data: does complete update. if not used: quick update Repairing options: -k, --check check consistency for all records in the table(s) check if update of ranking data is necessary -r, --repair try to repair all records in the table(s) Scheduling options: -u, --user=USER user name to store task, password needed -s, --sleeptime=SLEEP time after which to repeat tasks (no) e.g.: 1s, 30m, 24h, 7d -t, --time=TIME moment for the task to be active (now) e.g.: +15s, 5m, 3h , 2002-10-27 13:57:26 General options: -h, --help print this help and exit -V, --version print version and exit -v, --verbose=LEVEL verbose level (from 0 to 9, default 1) """ % ((sys.argv[0],) * 5) sys.exit(code) def get_datetime(var, format_string="%Y-%m-%d %H:%M:%S"): """Returns a date string according to the format string. It can handle normal date strings and shifts with respect to now.""" date = time.time() shift_re = sre.compile("([-\+]{0,1})([\d]+)([dhms])") factors = {"d":24*3600, "h":3600, "m":60, "s":1} m = shift_re.match(var) if m: sign = m.groups()[0] == "-" and -1 or 1 factor = factors[m.groups()[2]] value = float(m.groups()[1]) date = time.localtime(date + sign * factor * value) date = time.strftime(format_string, date) else: date = time.strptime(var, format_string) date = time.strftime(format_string, date) return date def task_sig_sleep(sig, frame): """Signal handler for the 'sleep' signal sent by BibSched.""" if options["verbose"]>= 9: write_message("got signal %d" % sig) write_message("sleeping...") task_update_status("SLEEPING") signal.pause() # wait for wake-up signal def task_sig_wakeup(sig, frame): """Signal handler for the 'wakeup' signal sent by BibSched.""" if options["verbose"]>= 9: write_message("got signal %d" % sig) write_message("continuing...") task_update_status("CONTINUING") def task_sig_stop(sig, frame): """Signal handler for the 'stop' signal sent by BibSched.""" if options["verbose"]>= 9: write_message("got signal %d" % sig) write_message("stopping...") task_update_status("STOPPING") errcode = 0 try: task_sig_stop_commands() write_message("stopped") task_update_status("STOPPED") except StandardError, err: write_message("Error during stopping! %e" % err) task_update_status("STOPPINGFAILED") errcode = 1 sys.exit(errcode) def task_sig_stop_commands(): """Do all the commands necessary to stop the task before quitting. Useful for task_sig_stop() handler. """ write_message("stopping commands started") write_message("stopping commands ended") def task_sig_suicide(sig, frame): """Signal handler for the 'suicide' signal sent by BibSched.""" if options["verbose"]>= 9: write_message("got signal %d" % sig) write_message("suiciding myself now...") task_update_status("SUICIDING") write_message("suicided") task_update_status("SUICIDED") sys.exit(0) def task_sig_unknown(sig, frame): """Signal handler for the other unknown signals sent by shell or user.""" if options["verbose"]>= 9: write_message("got signal %d" % sig) write_message("unknown signal %d ignored" % sig) # do nothing for other signals def task_update_progress(msg): """Updates progress information in the BibSched task table.""" query = "UPDATE schTASK SET progress='%s' where id=%d" % (MySQLdb.escape_string(msg), task_id) if options["verbose"]>= 9: write_message(query) run_sql(query) return def task_update_status(val): """Updates state information in the BibSched task table.""" query = "UPDATE schTASK SET status='%s' where id=%d" % (MySQLdb.escape_string(val), task_id) if options["verbose"]>= 9: write_message(query) run_sql(query) return def split_ranges(parse_string): recIDs = [] ranges = string.split(parse_string, ",") for range in ranges: tmp_recIDs = string.split(range, "-") if len(tmp_recIDs)==1: recIDs.append([int(tmp_recIDs[0]), int(tmp_recIDs[0])]) else: if int(tmp_recIDs[0]) > int(tmp_recIDs[1]): # sanity check tmp = tmp_recIDs[0] tmp_recIDs[0] = tmp_recIDs[1] tmp_recIDs[1] = tmp recIDs.append([int(tmp_recIDs[0]), int(tmp_recIDs[1])]) return recIDs def get_date_range(var): "Returns the two dates contained as a low,high tuple" limits = string.split(var, ",") if len(limits)==1: low = get_datetime(limits[0]) return low,None if len(limits)==2: low = get_datetime(limits[0]) high = get_datetime(limits[1]) return low,high def command_line(): """Storing the task together with the parameters given.""" global options long_flags = ["lastupdate","add","del","repair","maxmem", "flush","stat", "rebalance", "id=", "collection=", "check", "modified=", "update", "run=", "user=", "sleeptime=", "time=", "help", "version", "verbose="] short_flags = "ladSi:m:c:kUrRM:f:w:u:s:t:hVv:" format_string = "%Y-%m-%d %H:%M:%S" sleeptime = "" try: opts, args = getopt.getopt(sys.argv[1:], short_flags, long_flags) except getopt.GetoptError, err: write_message(err, sys.stderr) usage(1) if args: usage(1) options = {"quick":"yes","cmd":"","flush":10000,"validset":"", "collection":[], "id":[], "check": "", "stat":"", "modified":"", "last_updated":"","run":"", "verbose":1} sched_time = time.strftime(format_string) user = "" try: for opt in opts: if opt == ("-h","") or opt == ("--help",""): usage(1) elif opt == ("-V","") or opt == ("--version",""): print __version__ sys.exit(1) elif opt[0] in ["--verbose", "-v"]: options["verbose"] = int(opt[1]) elif opt == ("-a","") or opt == ("--add",""): options["cmd"] = "add" if ("-x","") in opts or ("--del","") in opts: usage(1) elif opt[0] in ["--run", "-w"]: options["run"] = [] run = split(opt[1],",") for key in range(0,len(run)): options["run"].append(run[key]) elif opt == ("-r","") or opt == ("--repair",""): options["cmd"] = "repair" elif opt == ("-d","") or opt == ("--del",""): options["cmd"]="del" elif opt[0] in [ "-u", "--user"]: user = opt[1] elif opt[0] in [ "-k", "--check"]: options["cmd"]= "check" elif opt[0] in [ "-S", "--stat"]: options["cmd"] = "stat" elif opt[0] in [ "-i", "--id" ]: options["id"] = options["id"] + split_ranges(opt[1]) elif opt[0] in [ "-c", "--collection" ]: options["collection"] = opt[1] elif opt[0] in [ "-R", "--rebalance"]: options["quick"] = "no" elif opt[0] in [ "-f", "--flush"]: options["flush"]=int(opt[1]) elif opt[0] in [ "-M", "--maxmem"]: options["maxmem"]=int(opt[1]) if options["maxmem"] < base_process_size + 1000: raise StandardError, "Memory usage should be higher than %d kB" % (base_process_size + 1000) elif opt[0] in [ "-m", "--modified" ]: options["modified"] = get_date_range(opt[1]) #2002-10-27 13:57:26 elif opt[0] in [ "-l", "--lastupdate" ]: options["last_updated"] = "last_updated" elif opt[0] in [ "-s", "--sleeptime" ]: get_datetime(opt[1]) # see if it is a valid shift sleeptime=opt[1] elif opt[0] in [ "-t", "--time" ]: sched_time = get_datetime(opt[1]) else: usage(1) except StandardError, e: write_message(e, sys.stderr) sys.exit(1) if len(opts) == 0: options["last_updated"] = "last_updated" options["cmd"] = "add" options["quick"] = "no" options["flush"]= 100000 res = run_sql("SELECT name from rnkMETHOD") options["run"] = [] for (name,) in res: options["run"].append(name) - #user = authenticate(user) + user = authenticate(user) if options["verbose"]>=9: write_message("Storing task options %s" % options) query = """INSERT INTO schTASK (proc,user,runtime,sleeptime,arguments,status) VALUES ('runbibrank','%s',CAST('%s' AS DATE),'%s','%s','WAITING')""" % (MySQLdb.escape_string(user),MySQLdb.escape_string(sched_time), sleeptime, MySQLdb.escape_string(dumps(options))) new_task_id = run_sql(query) print "Task #%d was successfully scheduled for execution." % new_task_id return def task_run(row): """Run the indexing task. The row argument is the BibSched task queue row, containing if, arguments, etc. Return 1 in case of success and 0 in case of failure. """ try: options = marshal.loads(row[6]) for key in options["run"]: file = etcdir + "/bibrank/" + key + ".cfg" if options["verbose"] >= 9: write_message("Getting configuration from file: %s" % file) config = ConfigParser.ConfigParser() try: config.readfp(open(file)) except StandardError, e: write_message("Cannot find configurationfile: %s. The rankmethod may also not be registered using the BibRank Admin Interface." % file, sys.stderr) raise StandardError #Using the function variable to call the function related to the rank method cfg_function = config.get("rank_method", "function") func_object = globals().get(cfg_function) func_object(row, key) except StandardError, e: write_message("\nException caught: %s" % e, sys.stderr) traceback.print_tb(sys.exc_info()[2]) sys.exit(1) return 1 def main(): if len(sys.argv) == 2: try: id = int(sys.argv[1]) except StandardError, err: command_line() sys.exit() res = run_sql("SELECT * FROM schTASK WHERE id='%d'" % (id), None, 1) if not res: write_message("Selected task not found.", sys.stderr) sys.exit(1) try: if not task_run(res[0]): write_message("Error occurred. Exiting.", sys.stderr) except StandardError, e: write_message("Unexpected error occurred: %s." % e, sys.stderr) write_message("Traceback is:") traceback.print_tb(sys.exc_info()[2]) write_message("Exiting.") task_update_status("ERROR") else: command_line() if __name__ == "__main__": main()