diff --git a/modules/bibformat/bin/bibreformat.in b/modules/bibformat/bin/bibreformat.in index 5c271425b..859fd83dc 100644 --- a/modules/bibformat/bin/bibreformat.in +++ b/modules/bibformat/bin/bibreformat.in @@ -1,606 +1,608 @@ ## $Id$ ## BibReformat -- to reformat HTML brief (and other) formats for bibliographic records ## This file is part of the CERN Document Server Software (CDSware). ## Copyright (C) 2002 CERN. ## ## The CDSware is free software; you can redistribute it and/or ## modify it under the terms of the GNU General Public License as ## published by the Free Software Foundation; either version 2 of the ## License, or (at your option) any later version. ## ## The CDSware is distributed in the hope that it will be useful, but ## WITHOUT ANY WARRANTY; without even the implied warranty of ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ## General Public License for more details. ## ## You should have received a copy of the GNU General Public License ## along with CDSware; if not, write to the Free Software Foundation, Inc., ## 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA. ## read config variables: #include "config.wml" #include "configbis.wml" ## start Python: #! ## $Id$ ## DO NOT EDIT THIS FILE! IT WAS AUTOMATICALLY GENERATED FROM CDSware WML SOURCES. ## import interesting modules: pylibdir = "/python" ## okay, rest of the Python code goes below ####### ## version number: __version__ = "$Id$" ## import interesting modules: try: import sys sys.path.append('%s' % pylibdir) from cdsware.dbquery import run_sql from cdsware.config import * from cdsware.search_engine import perform_request_search import getopt import urllib import marshal import signal import string import sys import os import time except ImportError, e: print "Error: %s" % e import sys sys.exit(1) sql_queries = [] mysql_queries = [] process_hb = 0 process = 1 fmt = "hb" def bibreformat_task(sql_queries, mysql_queries, process_hb): global process, fmt t1 = os.times()[4] ### Options, parameters ### if len(sql_queries)+len(mysql_queries) == 0: print_info() sys.exit() + print "Querying the database.." + if process_hb: without_hb = withouthb() recIDs = [] if (mysql_queries[0] != "") or (mysql_queries[1] != "") or (mysql_queries[2] != ""): res = perform_request_search(req=None, of='id', c=mysql_queries[0], p=mysql_queries[2], f=mysql_queries[1]).tolist() for item in res: recIDs.append(item) for sql_query in sql_queries: res = run_sql(sql_query) - for item in res: + for item in res: recIDs.append(item[0]) ### list of corresponding record IDs was retrieved ### bibformat the records selected if process_hb: print "Records to be processed: %d" % (len(recIDs)+len(without_hb)) print "Out of it records without created format: %d" % len(without_hb) else: print "Records to be processed: %d" % (len(recIDs)) ### Initialize main loop total_rec = 0 # Total number of records xml_content = '' # hold the contents tbibformat = 0 # time taken up by external call tbibupload = 0 # time taken up by external call # weburl = "" ### Iterate over all records prepared in lists I (option) if process: iterate_over(recIDs, weburl, fmt) ### Iterate over all records prepared in list II (no_hb) if process_hb and process: iterate_over(without_hb, weburl, fmt) ### Final statistics t2 = os.times()[4] elapsed = t2 - t1 message = "total records processed: %d" % total_rec print message message = "total processing time: %2f sec" % elapsed print message message = "Time spent on external call (os.system):" print message message = " bibformat: %2f sec" % tbibformat print message message = " bibupload: %2f sec" % tbibupload print message ### MySQL Search by recID ### def read_xml_input(weburl, query): "Read records to preprocess via http" url = "%s/search.py?%s" % (weburl, query) return urllib.urlopen(url).read() ### Result set operations ### def lhdiff(l1, l2): "Does list difference via intermediate hash." d = {} ld = [] for e in l2: d[e]=1 for e in l1: if not d.has_key(e): ld.append(e) return ld ### Result set operations ### def ldiff(l1, l2): "Returns l1 - l2." ld = [] for e in l1: if not e in l2: ld.append(e) return ld ### Identify recIDs of records with missing hb ### def withouthb(): "List of record IDs to be reformated, not having the hb format yet" global fmt xm1, xm2, hb1, hb2 = [],[],[],[] q1 = "select br.id from bibrec as br, bibfmt as bf where bf.id_bibrec=br.id and bf.format ='xm'" q2 = "select br.id from bibrec as br, bibfmt as bf where bf.id_bibrec=br.id and bf.format ='%s'" % fmt ## get complete recID list of xm formatted records xm1 = run_sql(q1) for item in xm1: xm2.append(item[0]) ## get complete recID list of hb formatted records hb1 = run_sql(q2) for item in hb1: hb2.append(item[0]) return lhdiff(xm2,hb2) ### Bibreformat all selected records ### def iterate_over(list, weburl, fmt): "Iterate odver list of IDs" n_rec = 0 n_max = 1000 total_rec = 0 # Total number of records xml_content = '' # hold the contents tbibformat = 0 # time taken up by external call tbibupload = 0 # time taken up by external call # tmpdir = "" # bindir = "" for record in list: n_rec = n_rec + 1 total_rec = total_rec + 1 message = "Processing record: %d" % (record) print message query = "id=%d&of=xm" % (record) xml_content = xml_content + read_xml_input(weburl, query) if xml_content: if n_rec >= n_max: filename = "%s/bibreformat.xml" % tmpdir filehandle = open(filename ,"w") filehandle.write(xml_content) filehandle.close() ### bibformat external call ### t11 = os.times()[4] message = "START bibformat external call" print message command = "%s/bibformat otype='%s' < %s/bibreformat.xml > %s/rec_fmt.xml 2> %s/bibreformat.err" % (bindir,string.upper(fmt),tmpdir,tmpdir,tmpdir) os.system(command) t22 = os.times()[4] message = "END bibformat external call (time elapsed:%2f)" % (t22-t11) print message tbibformat = tbibformat + (t22 - t11) ### bibupload external call ### t11 = os.times()[4] message = "START bibupload external call" print message command = "%s/bibupload -f %s/rec_fmt.xml" % (bindir,tmpdir) os.system(command) t22 = os.times()[4] message = "END bibupload external call (time elapsed:%2f)" % (t22-t11) print message tbibupload = tbibupload + (t22- t11) n_rec = 0 xml_content = '' ### Process the last re-formated chunk ### if n_rec > 0: print "Processing last record set (%d)" % n_rec filename = "%s/bibreformat.xml" % tmpdir filehandle = open(filename ,"w") filehandle.write(xml_content) filehandle.close() ### bibformat external call ### t11 = os.times()[4] message = "START bibformat external call" print message command = "%s/bibformat otype='%s' < %s/bibreformat.xml > %s/rec_fmt.xml 2> %s/bibreformat.err" % (bindir,string.upper(fmt),tmpdir,tmpdir,tmpdir) os.system(command) t22 = os.times()[4] message = "END bibformat external call (time elapsed:%2f)" % (t22-t11) print message tbibformat = tbibformat + (t22 - t11) ### bibupload external call ### t11 = os.times()[4] message = "START bibupload external call" print message command = "%s/bibupload -f %s/rec_fmt.xml" % (bindir,tmpdir) os.system(command) t22 = os.times()[4] message = "END bibupload external call (time elapsed:%2f)" % (t22-t11) print message tbibupload = tbibupload + (t22- t11) return ### Bibshed compatibility procedures ### def write_message(msg, stream=sys.stdout): """Prints message and flush output stream (may be sys.stdout or sys.stderr). Useful for debugging stuff.""" if stream == sys.stdout or stream == sys.stderr: stream.write(time.strftime("%Y-%m-%d %H:%M:%S --> ", time.localtime())) stream.write("%s\n" % msg) stream.flush() else: sys.stderr.write("Unknown stream %s. [must be sys.stdout or sys.stderr]\n" % stream) def task_sig_sleep(sig, frame): """Signal handler for the 'sleep' signal sent by BibSched.""" write_message("sleeping...") task_update_state("SLEEPING") signal.pause() # wait for wake-up signal def task_sig_wakeup(sig, frame): """Signal handler for the 'wakeup' signal sent by BibSched.""" write_message("continuing...") task_update_state("CONTINUING") def task_sig_stop(sig, frame): """Signal handler for the 'stop' signal sent by BibSched.""" write_message("stopping...") task_update_state("STOPPING") write_message("flushing cache or whatever...") time.sleep(3) write_message("closing tables or whatever...") time.sleep(1) write_message("stopped") task_update_state("STOPPED") sys.exit(0) def task_sig_suicide(sig, frame): """Signal handler for the 'suicide' signal sent by BibSched.""" write_message("suiciding myself now...") task_update_state("SUICIDING") write_message("suicided") task_update_state("SUICIDED") sys.exit(0) def task_sig_unknown(sig, frame): """Signal handler for the other unknown signals sent by shell or user.""" write_message("unknown signal %d ignored" % sig) # do nothing for other signals def getpass(prompt = "Password: "): """Prompts for a password without echoing it back to the screen""" import termios, sys fd = sys.stdin.fileno() old = termios.tcgetattr(fd) new = termios.tcgetattr(fd) new[3] = new[3] & ~termios.ECHO # lflags passwd = "" try: termios.tcsetattr(fd, termios.TCSADRAIN, new) passwd = raw_input(prompt) print finally: termios.tcsetattr(fd, termios.TCSADRAIN, old) return passwd def authenticate(user): """Authenticates a user against the user database. NOTE: Access might be more complex in the future""" print "BibReformat Task Submission" print "=========================" if user == "": print >> sys.stdout, "\rUsername: ", user = string.strip(string.lower(sys.stdin.readline())) else: print >> sys.stdout, "\rUsername:", user res = run_sql("select password from user where email=%s", (user,), 1) if res: row = res[0] password_db = row[0] if password_db != '': # authentication needed password_entered = getpass() if password_db == password_entered: return user else: print "Sorry, you seem to be unauthorized user. Exiting." sys.exit(1) else: # no authentication needed return user else: print "Sorry, %s seems to be unauthorized user. Exiting." % user sys.exit(1) def task_submit(options): """Submits task to the BibSched task queue. This is what people will be invoking via command line.""" ## sanity check: remove eventual "task" option: if options.has_key("task"): del options["task"] ## authenticate user: user = authenticate(options.get("user", "")) ## submit task: task_id = run_sql("""INSERT INTO schTASKS (id,proc,user,state,date,arguments) VALUES (NULL,'bibreformat',%s,'WAITING',NOW(),%s)""", (user, marshal.dumps(options))) ## update task number: options["task"] = task_id run_sql("""UPDATE schTASKS SET arguments=%s WHERE id=%s""", (marshal.dumps(options),task_id)) write_message("Task #%d submitted." % task_id) return task_id def task_update_progress(msg): """Updates progress information in the BibSched task table.""" global task_id return run_sql("UPDATE schTASKS SET progress=%s where id=%s", (msg, task_id)) def task_update_state(val): """Updates state information in the BibSched task table.""" global task_id return run_sql("UPDATE schTASKS SET state=%s where id=%s", (val, task_id)) def task_read_state(task_id): """Read state information in the BibSched task table.""" res = run_sql("SELECT state FROM schTASKS where id=%s", (task_id,), 1) try: out = res[0][0] except: out = 'UNKNOWN' return out def task_get_options(id): """Returns options for the task 'id' read from the BibSched task queue table.""" out = {} res = run_sql("SELECT arguments FROM schTASKS WHERE id=%s AND proc='bibreformat'", (id,)) try: out = marshal.loads(res[0][0]) except: write_message("Error: BibReformat task %d does not seem to exist." % id) sys.exit(1) return out def task_run(process_hb): """Runs the task by fetching arguments from the BibSched task queue. This is what BibSched will be invoking via daemon call.""" global task_id, process, fmt options = task_get_options(task_id) # get options from BibSched task table ## check task id: if not options.has_key("task"): write_message("Error: The task #%d does not seem to be a BibReformat task." % task_id) return ## initialize parameters if options.has_key("all"): sql_queries.append("select br.id from bibrec as br, bibfmt as bf where bf.id_bibrec=br.id and bf.format ='%s'" % fmt) if options.has_key("new"): process_hb = 1 if options.has_key("noprocess"): process = 0 if options.has_key("last"): sql_queries.append("select br.id from bibrec as br, bibfmt as bf where bf.id_bibrec=br.id and bf.format='%s' and bf.last_updated < br.modification_date" % fmt) if options.has_key("collection"): mysql_queries.append(options["collection"]) else: mysql_queries.append("") if options.has_key("field"): mysql_queries.append(options["field"]) else: mysql_queries.append("") if options.has_key("phrase"): mysql_queries.append(options["phrase"]) else: mysql_queries.append("") if options.has_key("format"): fmt = options["format"] ## check task state: task_state = task_read_state(task_id) if task_state != "WAITING": write_message("Error: The task #%d is %s. I expected WAITING." % (task_id, task_state)) return ## update task state: task_update_state("RUNNING") ## initialize signal handler: signal.signal(signal.SIGUSR1, task_sig_sleep) signal.signal(signal.SIGTERM, task_sig_stop) signal.signal(signal.SIGABRT, task_sig_suicide) signal.signal(signal.SIGCONT, task_sig_wakeup) signal.signal(signal.SIGINT, task_sig_unknown) ## run the task: bibreformat_task(sql_queries, mysql_queries, process_hb) ## we are done: task_update_state("DONE") return def usage(exitcode=1, msg=""): """Prints usage info.""" if msg: sys.stderr.write("Error: %s.\n" % msg) sys.stderr.write("Usage: %s [options]\n" % sys.argv[0]) sys.stderr.write(" -u, --user=USER \t\t User name to submit the task as, password needed.\n") sys.stderr.write(" -h, --help \t\t Print this help.\n") sys.stderr.write(" -V, --version \t\t Print version information.\n") sys.stderr.write(" -d, --debug \t\t Print debugging information.\n") sys.stderr.write(" -a, --all \t\t All records\n") sys.stderr.write(" -c, --collection\t\t Select records by collection\n") sys.stderr.write(" -f, --field \t\t Select records by field.\n") sys.stderr.write(" -p, --phrase \t\t Select records by phrase.\n") sys.stderr.write(" -o, --format \t\t Specify output format to be (re-)created. (default HB)\n") sys.stderr.write(" -n, --noprocess \t\t Count records to be processed only (no processing done)\n") sys.stderr.write("\n") sys.stderr.write(" Example: bibreformat -n Show how many records are to be bibreformated.") sys.exit(exitcode) def main(): """Main function that analyzes command line input and calls whatever is appropriate. Useful for learning on how to write BibSched tasks.""" global task_id ## parse command line: if len(sys.argv) == 2 and sys.argv[1].isdigit(): ## A - run the task task_id = int(sys.argv[1]) process_hb = 0 task_run(process_hb) else: ## B - submit the task process_hb = 0 options = {} # will hold command-line options try: opts, args = getopt.getopt(sys.argv[1:], "hVdu:ac:f:p:lo:n", ["help", "version", "debug","user=","all","collection=","field=","phrase=","format=","noprocess"]) except getopt.GetoptError, err: usage(1, err) if len(sys.argv) == 1: # default options["new"] = 1 options["last"] = 1 try: for opt in opts: if opt[0] in ["-h", "--help"]: usage(0) elif opt[0] in ["-V", "--version"]: print __version__ sys.exit(0) elif opt[0] in [ "-u", "--user"]: options["user"] = opt[1] elif opt[0] in ["-d", "--debug"]: options["debug"] = 1 elif opt[0] in ["-a", "--all"]: options["all"] = 1 options["new"] = 1 elif opt[0] in ["-c", "--collection"]: options["collection"]=opt[1] elif opt[0] in ["-n", "--noprocess"]: options["noprocess"] = 1 if len(sys.argv) == 2: options["new"] = 1 options["last"] = 1 elif opt[0] in ["-f", "--field"]: options["field"] = opt[1] elif opt[0] in ["-p","--phrase"]: options["phrase"] = opt[1] elif opt[0] in ["-o","--format"]: options["format"] = opt[1] except StandardError, e: usage(e) task_submit(options) return ### okay, here we go: if __name__ == '__main__': main() \ No newline at end of file diff --git a/modules/bibformat/bin/bibreformat.wml b/modules/bibformat/bin/bibreformat.wml index 5c271425b..859fd83dc 100644 --- a/modules/bibformat/bin/bibreformat.wml +++ b/modules/bibformat/bin/bibreformat.wml @@ -1,606 +1,608 @@ ## $Id$ ## BibReformat -- to reformat HTML brief (and other) formats for bibliographic records ## This file is part of the CERN Document Server Software (CDSware). ## Copyright (C) 2002 CERN. ## ## The CDSware is free software; you can redistribute it and/or ## modify it under the terms of the GNU General Public License as ## published by the Free Software Foundation; either version 2 of the ## License, or (at your option) any later version. ## ## The CDSware is distributed in the hope that it will be useful, but ## WITHOUT ANY WARRANTY; without even the implied warranty of ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ## General Public License for more details. ## ## You should have received a copy of the GNU General Public License ## along with CDSware; if not, write to the Free Software Foundation, Inc., ## 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA. ## read config variables: #include "config.wml" #include "configbis.wml" ## start Python: #! ## $Id$ ## DO NOT EDIT THIS FILE! IT WAS AUTOMATICALLY GENERATED FROM CDSware WML SOURCES. ## import interesting modules: pylibdir = "/python" ## okay, rest of the Python code goes below ####### ## version number: __version__ = "$Id$" ## import interesting modules: try: import sys sys.path.append('%s' % pylibdir) from cdsware.dbquery import run_sql from cdsware.config import * from cdsware.search_engine import perform_request_search import getopt import urllib import marshal import signal import string import sys import os import time except ImportError, e: print "Error: %s" % e import sys sys.exit(1) sql_queries = [] mysql_queries = [] process_hb = 0 process = 1 fmt = "hb" def bibreformat_task(sql_queries, mysql_queries, process_hb): global process, fmt t1 = os.times()[4] ### Options, parameters ### if len(sql_queries)+len(mysql_queries) == 0: print_info() sys.exit() + print "Querying the database.." + if process_hb: without_hb = withouthb() recIDs = [] if (mysql_queries[0] != "") or (mysql_queries[1] != "") or (mysql_queries[2] != ""): res = perform_request_search(req=None, of='id', c=mysql_queries[0], p=mysql_queries[2], f=mysql_queries[1]).tolist() for item in res: recIDs.append(item) for sql_query in sql_queries: res = run_sql(sql_query) - for item in res: + for item in res: recIDs.append(item[0]) ### list of corresponding record IDs was retrieved ### bibformat the records selected if process_hb: print "Records to be processed: %d" % (len(recIDs)+len(without_hb)) print "Out of it records without created format: %d" % len(without_hb) else: print "Records to be processed: %d" % (len(recIDs)) ### Initialize main loop total_rec = 0 # Total number of records xml_content = '' # hold the contents tbibformat = 0 # time taken up by external call tbibupload = 0 # time taken up by external call # weburl = "" ### Iterate over all records prepared in lists I (option) if process: iterate_over(recIDs, weburl, fmt) ### Iterate over all records prepared in list II (no_hb) if process_hb and process: iterate_over(without_hb, weburl, fmt) ### Final statistics t2 = os.times()[4] elapsed = t2 - t1 message = "total records processed: %d" % total_rec print message message = "total processing time: %2f sec" % elapsed print message message = "Time spent on external call (os.system):" print message message = " bibformat: %2f sec" % tbibformat print message message = " bibupload: %2f sec" % tbibupload print message ### MySQL Search by recID ### def read_xml_input(weburl, query): "Read records to preprocess via http" url = "%s/search.py?%s" % (weburl, query) return urllib.urlopen(url).read() ### Result set operations ### def lhdiff(l1, l2): "Does list difference via intermediate hash." d = {} ld = [] for e in l2: d[e]=1 for e in l1: if not d.has_key(e): ld.append(e) return ld ### Result set operations ### def ldiff(l1, l2): "Returns l1 - l2." ld = [] for e in l1: if not e in l2: ld.append(e) return ld ### Identify recIDs of records with missing hb ### def withouthb(): "List of record IDs to be reformated, not having the hb format yet" global fmt xm1, xm2, hb1, hb2 = [],[],[],[] q1 = "select br.id from bibrec as br, bibfmt as bf where bf.id_bibrec=br.id and bf.format ='xm'" q2 = "select br.id from bibrec as br, bibfmt as bf where bf.id_bibrec=br.id and bf.format ='%s'" % fmt ## get complete recID list of xm formatted records xm1 = run_sql(q1) for item in xm1: xm2.append(item[0]) ## get complete recID list of hb formatted records hb1 = run_sql(q2) for item in hb1: hb2.append(item[0]) return lhdiff(xm2,hb2) ### Bibreformat all selected records ### def iterate_over(list, weburl, fmt): "Iterate odver list of IDs" n_rec = 0 n_max = 1000 total_rec = 0 # Total number of records xml_content = '' # hold the contents tbibformat = 0 # time taken up by external call tbibupload = 0 # time taken up by external call # tmpdir = "" # bindir = "" for record in list: n_rec = n_rec + 1 total_rec = total_rec + 1 message = "Processing record: %d" % (record) print message query = "id=%d&of=xm" % (record) xml_content = xml_content + read_xml_input(weburl, query) if xml_content: if n_rec >= n_max: filename = "%s/bibreformat.xml" % tmpdir filehandle = open(filename ,"w") filehandle.write(xml_content) filehandle.close() ### bibformat external call ### t11 = os.times()[4] message = "START bibformat external call" print message command = "%s/bibformat otype='%s' < %s/bibreformat.xml > %s/rec_fmt.xml 2> %s/bibreformat.err" % (bindir,string.upper(fmt),tmpdir,tmpdir,tmpdir) os.system(command) t22 = os.times()[4] message = "END bibformat external call (time elapsed:%2f)" % (t22-t11) print message tbibformat = tbibformat + (t22 - t11) ### bibupload external call ### t11 = os.times()[4] message = "START bibupload external call" print message command = "%s/bibupload -f %s/rec_fmt.xml" % (bindir,tmpdir) os.system(command) t22 = os.times()[4] message = "END bibupload external call (time elapsed:%2f)" % (t22-t11) print message tbibupload = tbibupload + (t22- t11) n_rec = 0 xml_content = '' ### Process the last re-formated chunk ### if n_rec > 0: print "Processing last record set (%d)" % n_rec filename = "%s/bibreformat.xml" % tmpdir filehandle = open(filename ,"w") filehandle.write(xml_content) filehandle.close() ### bibformat external call ### t11 = os.times()[4] message = "START bibformat external call" print message command = "%s/bibformat otype='%s' < %s/bibreformat.xml > %s/rec_fmt.xml 2> %s/bibreformat.err" % (bindir,string.upper(fmt),tmpdir,tmpdir,tmpdir) os.system(command) t22 = os.times()[4] message = "END bibformat external call (time elapsed:%2f)" % (t22-t11) print message tbibformat = tbibformat + (t22 - t11) ### bibupload external call ### t11 = os.times()[4] message = "START bibupload external call" print message command = "%s/bibupload -f %s/rec_fmt.xml" % (bindir,tmpdir) os.system(command) t22 = os.times()[4] message = "END bibupload external call (time elapsed:%2f)" % (t22-t11) print message tbibupload = tbibupload + (t22- t11) return ### Bibshed compatibility procedures ### def write_message(msg, stream=sys.stdout): """Prints message and flush output stream (may be sys.stdout or sys.stderr). Useful for debugging stuff.""" if stream == sys.stdout or stream == sys.stderr: stream.write(time.strftime("%Y-%m-%d %H:%M:%S --> ", time.localtime())) stream.write("%s\n" % msg) stream.flush() else: sys.stderr.write("Unknown stream %s. [must be sys.stdout or sys.stderr]\n" % stream) def task_sig_sleep(sig, frame): """Signal handler for the 'sleep' signal sent by BibSched.""" write_message("sleeping...") task_update_state("SLEEPING") signal.pause() # wait for wake-up signal def task_sig_wakeup(sig, frame): """Signal handler for the 'wakeup' signal sent by BibSched.""" write_message("continuing...") task_update_state("CONTINUING") def task_sig_stop(sig, frame): """Signal handler for the 'stop' signal sent by BibSched.""" write_message("stopping...") task_update_state("STOPPING") write_message("flushing cache or whatever...") time.sleep(3) write_message("closing tables or whatever...") time.sleep(1) write_message("stopped") task_update_state("STOPPED") sys.exit(0) def task_sig_suicide(sig, frame): """Signal handler for the 'suicide' signal sent by BibSched.""" write_message("suiciding myself now...") task_update_state("SUICIDING") write_message("suicided") task_update_state("SUICIDED") sys.exit(0) def task_sig_unknown(sig, frame): """Signal handler for the other unknown signals sent by shell or user.""" write_message("unknown signal %d ignored" % sig) # do nothing for other signals def getpass(prompt = "Password: "): """Prompts for a password without echoing it back to the screen""" import termios, sys fd = sys.stdin.fileno() old = termios.tcgetattr(fd) new = termios.tcgetattr(fd) new[3] = new[3] & ~termios.ECHO # lflags passwd = "" try: termios.tcsetattr(fd, termios.TCSADRAIN, new) passwd = raw_input(prompt) print finally: termios.tcsetattr(fd, termios.TCSADRAIN, old) return passwd def authenticate(user): """Authenticates a user against the user database. NOTE: Access might be more complex in the future""" print "BibReformat Task Submission" print "=========================" if user == "": print >> sys.stdout, "\rUsername: ", user = string.strip(string.lower(sys.stdin.readline())) else: print >> sys.stdout, "\rUsername:", user res = run_sql("select password from user where email=%s", (user,), 1) if res: row = res[0] password_db = row[0] if password_db != '': # authentication needed password_entered = getpass() if password_db == password_entered: return user else: print "Sorry, you seem to be unauthorized user. Exiting." sys.exit(1) else: # no authentication needed return user else: print "Sorry, %s seems to be unauthorized user. Exiting." % user sys.exit(1) def task_submit(options): """Submits task to the BibSched task queue. This is what people will be invoking via command line.""" ## sanity check: remove eventual "task" option: if options.has_key("task"): del options["task"] ## authenticate user: user = authenticate(options.get("user", "")) ## submit task: task_id = run_sql("""INSERT INTO schTASKS (id,proc,user,state,date,arguments) VALUES (NULL,'bibreformat',%s,'WAITING',NOW(),%s)""", (user, marshal.dumps(options))) ## update task number: options["task"] = task_id run_sql("""UPDATE schTASKS SET arguments=%s WHERE id=%s""", (marshal.dumps(options),task_id)) write_message("Task #%d submitted." % task_id) return task_id def task_update_progress(msg): """Updates progress information in the BibSched task table.""" global task_id return run_sql("UPDATE schTASKS SET progress=%s where id=%s", (msg, task_id)) def task_update_state(val): """Updates state information in the BibSched task table.""" global task_id return run_sql("UPDATE schTASKS SET state=%s where id=%s", (val, task_id)) def task_read_state(task_id): """Read state information in the BibSched task table.""" res = run_sql("SELECT state FROM schTASKS where id=%s", (task_id,), 1) try: out = res[0][0] except: out = 'UNKNOWN' return out def task_get_options(id): """Returns options for the task 'id' read from the BibSched task queue table.""" out = {} res = run_sql("SELECT arguments FROM schTASKS WHERE id=%s AND proc='bibreformat'", (id,)) try: out = marshal.loads(res[0][0]) except: write_message("Error: BibReformat task %d does not seem to exist." % id) sys.exit(1) return out def task_run(process_hb): """Runs the task by fetching arguments from the BibSched task queue. This is what BibSched will be invoking via daemon call.""" global task_id, process, fmt options = task_get_options(task_id) # get options from BibSched task table ## check task id: if not options.has_key("task"): write_message("Error: The task #%d does not seem to be a BibReformat task." % task_id) return ## initialize parameters if options.has_key("all"): sql_queries.append("select br.id from bibrec as br, bibfmt as bf where bf.id_bibrec=br.id and bf.format ='%s'" % fmt) if options.has_key("new"): process_hb = 1 if options.has_key("noprocess"): process = 0 if options.has_key("last"): sql_queries.append("select br.id from bibrec as br, bibfmt as bf where bf.id_bibrec=br.id and bf.format='%s' and bf.last_updated < br.modification_date" % fmt) if options.has_key("collection"): mysql_queries.append(options["collection"]) else: mysql_queries.append("") if options.has_key("field"): mysql_queries.append(options["field"]) else: mysql_queries.append("") if options.has_key("phrase"): mysql_queries.append(options["phrase"]) else: mysql_queries.append("") if options.has_key("format"): fmt = options["format"] ## check task state: task_state = task_read_state(task_id) if task_state != "WAITING": write_message("Error: The task #%d is %s. I expected WAITING." % (task_id, task_state)) return ## update task state: task_update_state("RUNNING") ## initialize signal handler: signal.signal(signal.SIGUSR1, task_sig_sleep) signal.signal(signal.SIGTERM, task_sig_stop) signal.signal(signal.SIGABRT, task_sig_suicide) signal.signal(signal.SIGCONT, task_sig_wakeup) signal.signal(signal.SIGINT, task_sig_unknown) ## run the task: bibreformat_task(sql_queries, mysql_queries, process_hb) ## we are done: task_update_state("DONE") return def usage(exitcode=1, msg=""): """Prints usage info.""" if msg: sys.stderr.write("Error: %s.\n" % msg) sys.stderr.write("Usage: %s [options]\n" % sys.argv[0]) sys.stderr.write(" -u, --user=USER \t\t User name to submit the task as, password needed.\n") sys.stderr.write(" -h, --help \t\t Print this help.\n") sys.stderr.write(" -V, --version \t\t Print version information.\n") sys.stderr.write(" -d, --debug \t\t Print debugging information.\n") sys.stderr.write(" -a, --all \t\t All records\n") sys.stderr.write(" -c, --collection\t\t Select records by collection\n") sys.stderr.write(" -f, --field \t\t Select records by field.\n") sys.stderr.write(" -p, --phrase \t\t Select records by phrase.\n") sys.stderr.write(" -o, --format \t\t Specify output format to be (re-)created. (default HB)\n") sys.stderr.write(" -n, --noprocess \t\t Count records to be processed only (no processing done)\n") sys.stderr.write("\n") sys.stderr.write(" Example: bibreformat -n Show how many records are to be bibreformated.") sys.exit(exitcode) def main(): """Main function that analyzes command line input and calls whatever is appropriate. Useful for learning on how to write BibSched tasks.""" global task_id ## parse command line: if len(sys.argv) == 2 and sys.argv[1].isdigit(): ## A - run the task task_id = int(sys.argv[1]) process_hb = 0 task_run(process_hb) else: ## B - submit the task process_hb = 0 options = {} # will hold command-line options try: opts, args = getopt.getopt(sys.argv[1:], "hVdu:ac:f:p:lo:n", ["help", "version", "debug","user=","all","collection=","field=","phrase=","format=","noprocess"]) except getopt.GetoptError, err: usage(1, err) if len(sys.argv) == 1: # default options["new"] = 1 options["last"] = 1 try: for opt in opts: if opt[0] in ["-h", "--help"]: usage(0) elif opt[0] in ["-V", "--version"]: print __version__ sys.exit(0) elif opt[0] in [ "-u", "--user"]: options["user"] = opt[1] elif opt[0] in ["-d", "--debug"]: options["debug"] = 1 elif opt[0] in ["-a", "--all"]: options["all"] = 1 options["new"] = 1 elif opt[0] in ["-c", "--collection"]: options["collection"]=opt[1] elif opt[0] in ["-n", "--noprocess"]: options["noprocess"] = 1 if len(sys.argv) == 2: options["new"] = 1 options["last"] = 1 elif opt[0] in ["-f", "--field"]: options["field"] = opt[1] elif opt[0] in ["-p","--phrase"]: options["phrase"] = opt[1] elif opt[0] in ["-o","--format"]: options["format"] = opt[1] except StandardError, e: usage(e) task_submit(options) return ### okay, here we go: if __name__ == '__main__': main() \ No newline at end of file