diff --git a/modules/bibformat/bin/bibreformat.in b/modules/bibformat/bin/bibreformat.in
index 1fa299fe5..88c87f7a5 100644
--- a/modules/bibformat/bin/bibreformat.in
+++ b/modules/bibformat/bin/bibreformat.in
@@ -1,756 +1,757 @@
 #!@PYTHON@
 ## -*- mode: python; coding: utf-8; -*-
 ##
 ## $Id$
 ##
 ## This file is part of CDS Invenio.
 ## Copyright (C) 2002, 2003, 2004, 2005, 2006 CERN.
 ##
 ## CDS Invenio is free software; you can redistribute it and/or
 ## modify it under the terms of the GNU General Public License as
 ## published by the Free Software Foundation; either version 2 of the
 ## License, or (at your option) any later version.
 ##
 ## CDS Invenio is distributed in the hope that it will be useful, but
 ## WITHOUT ANY WARRANTY; without even the implied warranty of
 ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 ## General Public License for more details.
 ##
 ## You should have received a copy of the GNU General Public License
 ## along with CDS Invenio; if not, write to the Free Software Foundation, Inc.,
 ## 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA.
 
 """Call BibFormat engine and create HTML brief (and other) formats for bibliographic records.
    Upload formats via BibUpload."""
 
 __revision__ = "$Id$"
 
 ## import interesting modules:
 try:
     import sys
     from invenio.dbquery import run_sql, escape_string
     from invenio.config import *
     from invenio.search_engine import perform_request_search
     from invenio.search_engine import print_record, encode_for_xml
     from invenio.access_control_engine import acc_authorize_action
     from invenio.bibformat import format_record
     from invenio.bibformat_utils import encode_for_xml
     from invenio.bibformat_config import CFG_BIBFORMAT_USE_OLD_BIBFORMAT
     from invenio.bibrecord import create_records
     import getopt
     import getpass
     import marshal
     import signal
     import string
     import sys
     import os
     import re
     import time
 except ImportError, e:
     print "Error: %s" % e
     import sys
     sys.exit(1)
 
 options = {} # global variable to hold task options
 
 sql_queries    = []                           # holds SQL queries to be executed
 cds_query      = {}                           # holds CDS query parameters (fields, collection, pattern)
 process_format = 0                            # flag, process records without created format
 process        = 1                            # flag, process records (unless count only)
 fmt            = "hb"                         # default format to be processed
 sleeptime      = ""                           # default sleeptime
 format_string  = "%Y-%m-%d %H:%M:%S"          # date/time format
 sched_time     = time.strftime(format_string) # scheduled execution time in the date/time format
 
 
 ### run the bibreformat task bibsched scheduled
 ###
 
 def bibreformat_task(sql, sql_queries, cds_query, process_format):
     
     global process, fmt
 
     t1 = os.times()[4]
 
 
 ### Query the database
 ###
 
     if process_format:
         print "Querying database for records with missing format ..."
         without_format = without_fmt(sql)
 
     recIDs = []
 
     if  cds_query['field']      != "" or  \
         cds_query['collection'] != "" or  \
         cds_query['pattern']    != "":
 
         print "Querying database for records with old format (CDS query)..."
 
         res = perform_request_search(req=None, of='id', c=cds_query['collection'], p=cds_query['pattern'], f=cds_query['field'])
 
         for item in res:
             recIDs.append(item)
 
     for sql_query in sql_queries:
         print "Querying database for records with old format (SQL query) ..."
         res = run_sql(sql_query)
         for item in res:
             recIDs.append(item[0])
 
 ### list of corresponding record IDs was retrieved
 ### bibformat the records selected
 
     if process_format:
         print "Records to be processed: %d" % (len(recIDs)+len(without_format))
         print "Out of it records without created format: %d" % len(without_format)
     else:
         print "Records to be processed: %d" % (len(recIDs))
 
 ### Initialize main loop
 
     total_rec   = 0		# Total number of records
     xml_content = ''		# hold the contents
     tbibformat  = 0		# time taken up by external call
     tbibupload  = 0		# time taken up by external call
 
 
 ### Iterate over all records prepared in lists I (option)
     if process:
         if CFG_BIBFORMAT_USE_OLD_BIBFORMAT: # FIXME: remove this when migration from php to python bibformat is done
             iterate_over_old(recIDs, weburl, fmt)
         else:
             iterate_over_new(recIDs, weburl, fmt)
 
 
 ### Iterate over all records prepared in list II (no_format)
     if process_format and process:
         if CFG_BIBFORMAT_USE_OLD_BIBFORMAT: # FIXME: remove this when migration from php to python bibformat is done
             iterate_over_old(without_format, weburl, fmt)
         else:
             iterate_over_new(without_format, weburl, fmt)
 
 ### Final statistics
 
     t2 = os.times()[4]
 
     elapsed = t2 - t1
     message = "total records processed: %d" % total_rec
     print message
 
     message = "total processing time: %2f sec" % elapsed
     print message
 
     message = "Time spent on external call (os.system):"
     print message
 
     message = " bibformat: %2f sec" % tbibformat
     print message
 
     message = " bibupload: %2f sec" % tbibupload
     print message
 
 
 ### Result set operations
 ###
 
 def lhdiff(l1, l2):
     "Does list difference via intermediate hash."
     d = {}
     ld = []
     for e in l2:
         d[e]=1
     for e in l1:
         if not d.has_key(e):
             ld.append(e)
     return ld
 
 
 ### Result set operations
 ###
 
 def ldiff(l1, l2):
     "Returns l1 - l2."
 
     ld = []
     for e in l1:
         if not e in l2:
             ld.append(e)
     return ld
 
 
 ### Identify recIDs of records with missing format
 ###
 
 def without_fmt(sql):
     "List of record IDs to be reformated, not having the specified format yet"
 
     global fmt
 
     xm1, xm2, format1, format2 = [],[],[],[]
     
     q1 = sql['q1']
     q2 = sql['q2']
 
     ## get complete recID list of xm formatted records
     xm1 = run_sql(q1)
 
     for item in xm1:
         xm2.append(item[0])
 
     ## get complete recID list of formatted records
     format1 = run_sql(q2)
 
     for item in format1:
         format2.append(item[0])
 
     return lhdiff(xm2,format2)
 
 
 ### Bibreformat all selected records (using new python bibformat)
 ### (see iterate_over_old further down)
 
 def iterate_over_new(list, weburl, fmt):
     "Iterate odver list of IDs"
     n_it_rec       = 0          # Number of records for current iteration
     n_it_max       = 10000      # Number of max records in one iteration
     n_rec          = 0          # Number of formatted records 
     total_rec      = len(list)	# Total number of records
     formatted_records = ''      # (string-)List of formatted record of an iteration
     
     for recID in list:
 
         n_rec +=1
         n_it_rec += 1
         
         message = "Processing record %d with format %s (New BibFormat)" % (recID, fmt)
         print message
 
         t11 = os.times()[4]
         message = "START bibformat external call"
         print message
 
         ### bibformat external call
         ###
         formatted_record = format_record(recID, fmt, on_the_fly=True)
         # Encapsulate record in xml tags that bibupload understands
         prologue = '''
     <record>
        <controlfield tag="001">%s</controlfield>
           <datafield tag="FMT" ind1="" ind2="">
              <subfield code="f">%s</subfield>
              <subfield code="g">''' % (recID, fmt)
         epilogue = '''
           </subfield>
        </datafield>
     </record>'''
 
         t22 = os.times()[4]
         message = "END bibformat external call (time elapsed:%2f)" % (t22-t11)
         print message
         
         formatted_records += prologue + encode_for_xml(formatted_record) + epilogue
 
         # every n_max record, upload all formatted records. 
         # also upload if recID is last one
         if n_it_rec > n_it_max or n_rec == total_rec:
 
             #Save formatted records to disk for bibupload
             finalfilename = "%s/rec_fmt_%s.xml" % (tmpdir,time.strftime('%Y%m%d_%H%M%S'))
             filehandle = open(finalfilename,"w")
             filehandle.write(formatted_records)
             filehandle.close()
 
             ### bibupload external call
             ###
             t11 = os.times()[4]
             message = "START bibupload external call"
             print message
 
             command = "%s/bibupload -f %s" % (bindir, finalfilename)
             os.system(command)
 
             t22 = os.times()[4]
             message = "END bibupload external call (time elapsed:%2f)" % (t22-t11)
             print message
 
             #Reset iteration state
             n_it_rec = 0
             xml_content = ''
     
 def iterate_over_old(list, weburl, fmt):
     "Iterate odver list of IDs"
 
     n_rec       = 0
     n_max       = 10000
     total_rec   = 0		# Total number of records
     xml_content = ''		# hold the contents
     tbibformat  = 0		# time taken up by external call
     tbibupload  = 0		# time taken up by external call
 
     for record in list:
 
         n_rec = n_rec + 1
         total_rec = total_rec + 1
 
         message = "Processing record: %d" % (record)
         print message
 
         query = "id=%d&of=xm" % (record)
 
         count = 0
 
         contents = print_record(record, 'xm') 
 
         while (contents == "") and (count < 10):
             contents = print_record(record, 'xm') 
             count = count + 1
             time.sleep(10)
         if count == 10:
             sys.stderr.write("Failed to download %s from %s after 10 attempts... terminating" % (query, weburl))
             sys.exit(0)
 
         xml_content = xml_content + contents
 
         if xml_content:
 
             if n_rec >= n_max:
 
     	        finalfilename = "%s/rec_fmt_%s.xml" % (tmpdir,time.strftime('%Y%m%d_%H%M%S'))
                 filename = "%s/bibreformat.xml" % tmpdir
                 filehandle = open(filename ,"w")
                 filehandle.write(xml_content)
                 filehandle.close()
 
 ### bibformat external call
 ###
 
                 t11 = os.times()[4]
                 message = "START bibformat external call"
                 print message
 
                 command = "%s/bibformat otype='%s' < %s/bibreformat.xml > %s 2> %s/bibreformat.err" % (bindir,string.upper(fmt),tmpdir,finalfilename,tmpdir)
                 os.system(command)
 
                 t22 = os.times()[4]
                 message = "END bibformat external call (time elapsed:%2f)" % (t22-t11)
                 print message
 
                 tbibformat = tbibformat + (t22 - t11)
 
 
 ### bibupload external call
 ###
 
                 t11 = os.times()[4]
                 message = "START bibupload external call"
                 print message
 
                 command = "%s/bibupload -f %s" % (bindir,finalfilename)
                 os.system(command)
 		
                 t22 = os.times()[4]
                 message = "END bibupload external call (time elapsed:%2f)" % (t22-t11)
                 print message
 
                 tbibupload = tbibupload + (t22- t11)
 
                 n_rec = 0
                 xml_content = ''
 
 ### Process the last re-formated chunk
 ###
 
     if n_rec > 0:
 
         print "Processing last record set (%d)" % n_rec
 
         finalfilename = "%s/rec_fmt_%s.xml" % (tmpdir,time.strftime('%Y%m%d_%H%M%S'))
         filename = "%s/bibreformat.xml" % tmpdir
         filehandle = open(filename ,"w")
         filehandle.write(xml_content)
         filehandle.close()
 
 ### bibformat external call
 ###
 
         t11 = os.times()[4]
         message = "START bibformat external call"
         print message
 
         command = "%s/bibformat otype='%s' < %s/bibreformat.xml > %s 2> %s/bibreformat.err" % (bindir,string.upper(fmt),tmpdir,finalfilename,tmpdir)
         os.system(command)
 
         t22 = os.times()[4]
         message = "END bibformat external call (time elapsed:%2f)" % (t22-t11)
         print message
 
         tbibformat = tbibformat + (t22 - t11)
 
 ### bibupload external call
 ###
 
         t11 = os.times()[4]
         message = "START bibupload external call"
         print message
 
         command = "%s/bibupload -f %s" % (bindir,finalfilename)
         os.system(command)
 
         t22 = os.times()[4]
         message = "END bibupload external call (time elapsed:%2f)" % (t22-t11)
         print message
 
         tbibupload = tbibupload + (t22- t11)
 
     return
 
 
 ### Bibshed compatibility procedures
 ###
 
 def get_date(var, format_string = "%Y-%m-%d %H:%M:%S"):
     """Returns a date string according to the format string.
        It can handle normal date strings and shifts with respect
        to now."""
     date = time.time()
     shift_re=re.compile("([-\+]{0,1})([\d]+)([dhms])")
     factors = {"d":24*3600, "h":3600, "m":60, "s":1}
     m = shift_re.match(var)
     if m:
         sign = m.groups()[0] == "-" and -1 or 1
         factor = factors[m.groups()[2]]
         value = float(m.groups()[1])
         print value
         date = time.localtime(date + sign * factor * value)
         date = time.strftime(format_string, date)
     else:
         date = time.strptime(var, format_string)
         date = time.strftime(format_string, date)
 
     return date
 
 
 def write_message(msg, stream=sys.stdout):
     """Prints message and flush output stream (may be sys.stdout or sys.stderr).  Useful for debugging stuff."""
     if stream == sys.stdout or stream == sys.stderr:
         stream.write(time.strftime("%Y-%m-%d %H:%M:%S --> ", time.localtime()))
         stream.write("%s\n" % msg)
         stream.flush()
     else:
         sys.stderr.write("Unknown stream %s.  [must be sys.stdout or sys.stderr]\n" % stream)
 
 
 def task_sig_sleep(sig, frame):
     """Signal handler for the 'sleep' signal sent by BibSched."""
     if options["verbose"] >= 9:
         write_message("task_sig_sleep(), got signal %s frame %s" % (sig, frame))
     write_message("sleeping...")
     task_update_status("SLEEPING")
     signal.pause() # wait for wake-up signal
 
 
 def task_sig_wakeup(sig, frame):
     """Signal handler for the 'wakeup' signal sent by BibSched."""
     if options["verbose"] >= 9:
         write_message("task_sig_wakeup(), got signal %s frame %s" % (sig, frame))
     write_message("continuing...")
     task_update_status("CONTINUING")
 
 
 def task_sig_stop(sig, frame):
     """Signal handler for the 'stop' signal sent by BibSched."""
     if options["verbose"] >= 9:
         write_message("task_sig_stop(), got signal %s frame %s" % (sig, frame))
     write_message("stopping...")
     task_update_status("STOPPING")
     write_message("flushing cache or whatever...")
     time.sleep(3)
     write_message("closing tables or whatever...")
     time.sleep(1)
     write_message("stopped")
     task_update_status("STOPPED")
     sys.exit(0)
 
     
 def task_sig_suicide(sig, frame):
     """Signal handler for the 'suicide' signal sent by BibSched."""
     if options["verbose"] >= 9:
         write_message("task_sig_suicide(), got signal %s frame %s" % (sig, frame))
     write_message("suiciding myself now...")
     task_update_status("SUICIDING")
     write_message("suicided")
     task_update_status("SUICIDED")
     sys.exit(0)
 
 
 def task_sig_unknown(sig, frame):
     """Signal handler for the other unknown signals sent by shell or user."""
     # do nothing for unknown signals:
     write_message("unknown signal %d (frame %s) ignored" % (sig, frame)) 
 
 def authenticate(user, header="BibReformat Task Submission", action="runbibformat"):
     """Authenticate the user against the user database.
        Check for its password, if it exists.
        Check for action access rights.
        Return user name upon authorization success,
        do system exit upon authorization failure.
        """
     print header
     print "=" * len(header)
     if user == "":
         print >> sys.stdout, "\rUsername: ",
         user = string.strip(string.lower(sys.stdin.readline()))
     else:
         print >> sys.stdout, "\rUsername:", user        
     ## first check user pw:
-    res = run_sql("select id,password from user where email=%s or nickname=%s", (user, user,), 1)
+    res = run_sql("select id,password from user where email=%s", (user,), 1) + \
+          run_sql("select id,password from user where nickname=%s", (user,), 1)
     if not res:
         print "Sorry, %s does not exist." % user
         sys.exit(1)        
     else:
         (uid_db, password_db) = res[0]
         if password_db:
             password_entered = getpass.getpass()
             if password_db == password_entered:
                 pass
             else:
                 print "Sorry, wrong credentials for %s." % user
                 sys.exit(1)
         ## secondly check authorization for the action:
         (auth_code, auth_message) = acc_authorize_action(uid_db, action)
         if auth_code != 0:
             print auth_message
             sys.exit(1)
     return user
 
 def task_submit():
     """Submits task to the BibSched task queue.  This is what people will be invoking via command line."""
 
     global options, sched_time, sleep_time
 
     ## sanity check: remove eventual "task" option:
     if options.has_key("task"):
         del options["task"]
     ## authenticate user:
     user = authenticate(options.get("user", ""))
     ## submit task:
 
     task_id = run_sql("""INSERT INTO schTASK (id,proc,user,status,arguments,sleeptime,runtime) VALUES (NULL,'bibreformat',%s,'WAITING',%s,%s,%s)""",
                       (user, marshal.dumps(options),sleeptime,escape_string(sched_time)))
     ## update task number:
     options["task"] = task_id
     run_sql("""UPDATE schTASK SET arguments=%s WHERE id=%s""", (marshal.dumps(options),task_id))
     write_message("Task #%d submitted." % task_id)
     return task_id
 
 
 def task_update_progress(msg):
     """Updates progress information in the BibSched task table."""
     global options
     return run_sql("UPDATE schTASK SET progress=%s where id=%s", (msg, options["task"]))
 
 
 def task_update_status(val):
     """Updates status information in the BibSched task table."""
     global options
     return run_sql("UPDATE schTASK SET status=%s where id=%s", (val, options["task"]))
 
 
 def task_read_status(task_id):
     """Read status information in the BibSched task table."""
     res = run_sql("SELECT status FROM schTASK where id=%s", (task_id,), 1)
     try:
         out = res[0][0]
     except:
         out = 'UNKNOWN'
     return out
 
 
 def task_get_options(id):
     """Returns options for the task 'id' read from the BibSched task queue table."""
     out = {}
     res = run_sql("SELECT arguments FROM schTASK WHERE id=%s AND proc='bibreformat'", (id,))
     try:
         out = marshal.loads(res[0][0])
     except:
         write_message("Error: BibReformat task %d does not seem to exist." % id)
         sys.exit(1)
     return out
 
 
 def task_run(task_id, process_format):
     """Runs the task by fetching arguments from the BibSched task queue.  This is what BibSched will be invoking via daemon call."""
 
     global options, process, fmt, sched_time
     options = task_get_options(task_id) # get options from BibSched task table
     ## check task id:
     if not options.has_key("task"):
         write_message("Error: The task #%d does not seem to be a BibReformat task." % task_id)
         return
 
     ## initialize parameters
 
 
     if options.has_key("format"):
         fmt = options["format"]
     else:
         fmt = "hb"
 
     sql = {
 
         "all" : "select br.id from bibrec as br, bibfmt as bf where bf.id_bibrec=br.id and bf.format ='%s'" % fmt,
         "last": "select br.id from bibrec as br, bibfmt as bf where bf.id_bibrec=br.id and bf.format='%s' and bf.last_updated < br.modification_date" % fmt,
         "q1"  : "select br.id from bibrec as br, bibfmt as bf where bf.id_bibrec=br.id and bf.format ='xm'",
         "q2"  : "select br.id from bibrec as br, bibfmt as bf where bf.id_bibrec=br.id and bf.format ='%s'" % fmt
 
     }
 
     if options.has_key("all"):
         sql_queries.append(sql['all'])
     if options.has_key("without"):
         process_format = 1
 
     if options.has_key("noprocess"):
         process = 0
 
     if options.has_key("last"):
          sql_queries.append(sql['last'])
     if options.has_key("collection"):
         cds_query['collection'] = options['collection']
     else:
         cds_query['collection'] = ""
 
     if options.has_key("field"):
         cds_query['field']      = options['field']
     else:
         cds_query['field']      = ""
 
     if options.has_key("pattern"):
         cds_query['pattern']      = options['pattern']
     else:
         cds_query['pattern']      = ""
 
 
 ### sql commands to be executed during the script run
 ###
 
     ## check task status:
     task_status = task_read_status(task_id)
     if task_status != "WAITING":
         write_message("Error: The task #%d is %s.  I expected WAITING." % (task_id, task_status))
         return
     ## update task status:
     task_update_status("RUNNING")
     ## initialize signal handler:
     signal.signal(signal.SIGUSR1, task_sig_sleep)
     signal.signal(signal.SIGTERM, task_sig_stop)
     signal.signal(signal.SIGABRT, task_sig_suicide)
     signal.signal(signal.SIGCONT, task_sig_wakeup)
     signal.signal(signal.SIGINT, task_sig_unknown)
     ## run the task:
 
     bibreformat_task(sql, sql_queries, cds_query, process_format)
 
     ## we are done:
     task_update_status("DONE")
     return
 
 
 def usage(exitcode=1, msg=""):
     """Prints usage info."""
     if msg:
         sys.stderr.write("Error: %s.\n" % msg)
     sys.stderr.write("Usage: %s [options]\n" % sys.argv[0])
     sys.stderr.write("  -u,  --user=USER      \t\t User name to submit the task as, password needed.\n")
     sys.stderr.write("  -h,  --help           \t\t Print this help.\n")
     sys.stderr.write("  -V,  --version        \t\t Print version information.\n")
     sys.stderr.write("  -v,  --verbose=LEVEL  \t\t Verbose level (0=min,1=normal,9=max).\n")
     sys.stderr.write("  -s,  --sleeptime=SLEEP\t\t Time after which to repeat tasks (no)\n")
     sys.stderr.write("  -t,  --time=DATE      \t\t Moment for the task to be active (now).\n")
 
     sys.stderr.write("  -a,  --all            \t\t All records\n")
     sys.stderr.write("  -c,  --collection     \t\t Select records by collection\n")
     sys.stderr.write("  -f,  --field          \t\t Select records by field.\n")
     sys.stderr.write("  -p,  --pattern        \t\t Select records by pattern.\n")
     sys.stderr.write("  -o,  --format         \t\t Specify output format to be (re-)created. (default HB)\n")
     sys.stderr.write("  -n,  --noprocess      \t\t Count records to be processed only (no processing done)\n")
     sys.stderr.write("\n")
     sys.stderr.write(" Example: bibreformat -n  Show how many records are to be bibreformated.")
     sys.stderr.write("\n")
 
     sys.exit(exitcode)
 
 
 def main():
     """Main function that analyzes command line input and calls whatever is appropriate.
        Useful for learning on how to write BibSched tasks."""
     global options, sched_time, sleeptime
     ## parse command line:
     if len(sys.argv) == 2 and sys.argv[1].isdigit():
         ## A - run the task
         task_id = int(sys.argv[1])
         process_format = 0
         task_run(task_id, process_format)
     else:
         ## B - submit the task
         process_format = 0
         options = {} # will hold command-line options
         options["verbose"] = 1
         try:
             opts, args = getopt.getopt(sys.argv[1:], "hVv:u:ac:f:s:p:lo:nt:wl", ["help", "version", "verbose=","user=","all","collection=","field=","sleeptime=","pattern=","format=","noprocess","time=","without","last"])
         except getopt.GetoptError, err:
             usage(1, err)
 
         clp = 0 # default parameters flag
 
         try:
 
             for opt in opts:
                 if opt[0] in ["-h", "--help"]:
                     usage(0)
                 elif opt[0] in ["-V", "--version"]:
                     print __revision__
                     sys.exit(0)
                 elif opt[0] in [ "-u", "--user"]:
                     options["user"]   = opt[1]
                 elif opt[0] in ["-v", "--verbose"]:
                     options["verbose"]  = int(opt[1])
                 elif opt[0] in ["-a", "--all"]:
                     options["all"]        = 1
                     options["without"]    = 1
                     clp = 1
                 elif opt[0] in ["-c", "--collection"]:
                     options["collection"]=opt[1]
                     clp = 1
                 elif opt[0] in ["-n", "--noprocess"]:
                     options["noprocess"] = 1
                 elif opt[0] in ["-f", "--field"]:
                     options["field"]  = opt[1]
                     clp = 1
                 elif opt[0] in ["-p","--pattern"]:
                     options["pattern"] = opt[1]
                     clp = 1
                 elif opt[0] in ["-o","--format"]:
                     options["format"] = opt[1]
                 elif opt[0] in ["-s", "--sleeptime" ]:
                     get_date(opt[1])    # see if it is a valid shift
                     sleeptime  = opt[1]
                 elif opt[0] in [ "-t", "--time" ]:
                     sched_time = get_date(opt[1])
 
             if clp == 0: # default
 
                 options["without"]  = 1
                 options["last"]     = 1
         
         except StandardError, e:
             usage(e)
         task_submit()
     return
 
 
 ### okay, here we go:
 if __name__ == '__main__':
     main()
diff --git a/modules/bibharvest/bin/oaiarchive.in b/modules/bibharvest/bin/oaiarchive.in
index 4f8a40107..6303b779e 100644
--- a/modules/bibharvest/bin/oaiarchive.in
+++ b/modules/bibharvest/bin/oaiarchive.in
@@ -1,370 +1,371 @@
 #!@PYTHON@
 ## -*- mode: python; coding: utf-8; -*-
 ##
 ## $Id$
 ##
 ## This file is part of CDS Invenio.
 ## Copyright (C) 2002, 2003, 2004, 2005, 2006 CERN.
 ##
 ## CDS Invenio is free software; you can redistribute it and/or
 ## modify it under the terms of the GNU General Public License as
 ## published by the Free Software Foundation; either version 2 of the
 ## License, or (at your option) any later version.
 ##
 ## CDS Invenio is distributed in the hope that it will be useful, but
 ## WITHOUT ANY WARRANTY; without even the implied warranty of
 ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 ## General Public License for more details.
 ##
 ## You should have received a copy of the GNU General Public License
 ## along with CDS Invenio; if not, write to the Free Software Foundation, Inc.,
 ## 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA.
 """
  OAI repository archive and management tool
 
  Usage:  oaiarchive [options]
 
  Options:
 
      -o --oaiset=    Specify setSpec
      -h --help       Print this help
      -V --version    Print version information and exit
      
      -a --add        Add records to OAI repository
      -d --delete     Remove records from OAI repository
      -r --report     Print OAI repository status
      -i --info       Give info about OAI set (default)
      
      -p --upload     Upload records
      -n --no-process Do not upload records (default)
 
  Examples:
      
      Expose set -setname- via OAI gateway
      oaiarchive --oaiset='setname' --add --upload
      oaiarchive -apo 'setname'
 
      Remove records defined by set -setname- from OAI repository
      oaiarchive --oaiset='setname' --delete --upload
      oaiarchive -dpo 'setname'
      
      Expose entire repository via OAI gateway
      oaiarchive --oaiset=global --add --upload
      oaiarchive -apo global
 
      Print OAI repository status
      oaiarchive -r
 
 """
 
 __revision__ = "$Id$"
 
 try:
     import sys
     from invenio.oaiarchive_engine import oaiarchive_task
     from invenio.oaiarchive_engine import printInfo
     from invenio.dbquery import run_sql, escape_string
     from invenio.config import *
     from invenio.search_engine import perform_request_search
     from invenio.search_engine import print_record
     from invenio.access_control_engine import acc_authorize_action
     import getopt
     import getpass
     import string
     import marshal
     import signal
     import time
     import re
 
 except ImportError, e:
     print "Error: %s" % e
     sys.exit(1)
 
 options = {} # global variable to hold task options
 
 sleeptime      = ""                           # default sleeptime
 sched_time     = time.strftime("%Y-%m-%d %H:%M:%S") # scheduled execution time in the date/time format
 
 ### Bibshed compatibility procedures
 ###
 
 def get_date(var, format_string = "%Y-%m-%d %H:%M:%S"):
     """Returns a date string according to the format string.
        It can handle normal date strings and shifts with respect
        to now."""
     date = time.time()
     shift_re=re.compile("([-\+]{0,1})([\d]+)([dhms])")
     factors = {"d":24*3600, "h":3600, "m":60, "s":1}
     m = shift_re.match(var)
     if m:
         sign = m.groups()[0] == "-" and -1 or 1
         factor = factors[m.groups()[2]]
         value = float(m.groups()[1])
         date = time.localtime(date + sign * factor * value)
         date = time.strftime(format_string, date)
     else:
         date = time.strptime(var, format_string)
         date = time.strftime(format_string, date)
 
     return date
 
 
 def write_message(msg, stream=sys.stdout):
     """Prints message and flush output stream (may be sys.stdout or sys.stderr).  Useful for debugging stuff."""
     if stream == sys.stdout or stream == sys.stderr:
         stream.write(time.strftime("%Y-%m-%d %H:%M:%S --> ", time.localtime()))
         stream.write("%s\n" % msg)
         stream.flush()
     else:
         sys.stderr.write("Unknown stream %s.  [must be sys.stdout or sys.stderr]\n" % stream)
 
 
 def task_sig_sleep(sig, frame):
     """Signal handler for the 'sleep' signal sent by BibSched."""
     if options["verbose"] >= 9:
         write_message("task_sig_sleep(), got signal %s frame %s" % (sig, frame))
     write_message("sleeping...")
     task_update_status("SLEEPING")
     signal.pause() # wait for wake-up signal
 
 
 def task_sig_wakeup(sig, frame):
     """Signal handler for the 'wakeup' signal sent by BibSched."""
     if options["verbose"] >= 9:
         write_message("task_sig_wakeup(), got signal %s frame %s" % (sig, frame))
     write_message("continuing...")
     task_update_status("CONTINUING")
 
 
 def task_sig_stop(sig, frame):
     """Signal handler for the 'stop' signal sent by BibSched."""
     if options["verbose"] >= 9:
         write_message("task_sig_stop(), got signal %s frame %s" % (sig, frame))
     write_message("stopping...")
     task_update_status("STOPPING")
     write_message("flushing cache or whatever...")
     time.sleep(3)
     write_message("closing tables or whatever...")
     time.sleep(1)
     write_message("stopped")
     task_update_status("STOPPED")
     sys.exit(0)
 
     
 def task_sig_suicide(sig, frame):
     """Signal handler for the 'suicide' signal sent by BibSched."""
     if options["verbose"] >= 9:
         write_message("task_sig_suicide(), got signal %s frame %s" % (sig, frame))
     write_message("suiciding myself now...")
     task_update_status("SUICIDING")
     write_message("suicided")
     task_update_status("SUICIDED")
     sys.exit(0)
 
 
 def task_sig_unknown(sig, frame):
     """Signal handler for the other unknown signals sent by shell or user."""
     # do nothing for unknown signals:
     write_message("unknown signal %d (frame %s) ignored" % (sig, frame)) 
 
 def authenticate(user, header="OAI Archive Task Submission", action="runoaiarchive"):
     """Authenticate the user against the user database.
        Check for its password, if it exists.
        Check for action access rights.
        Return user name upon authorization success,
        do system exit upon authorization failure.
        """
     print header
     print "=" * len(header)
     if user == "":
         print >> sys.stdout, "\rUsername: ",
         user = string.strip(string.lower(sys.stdin.readline()))
     else:
         print >> sys.stdout, "\rUsername:", user        
     ## first check user pw:
-    res = run_sql("select id,password from user where email=%s or nickname=%s", (user, user,), 1)
+    res = run_sql("select id,password from user where email=%s", (user,), 1) + \
+          run_sql("select id,password from user where nickname=%s", (user,), 1)
     if not res:
         print "Sorry, %s does not exist." % user
         sys.exit(1)        
     else:
         (uid_db, password_db) = res[0]
         if password_db:
             password_entered = getpass.getpass()
             if password_db == password_entered:
                 pass
             else:
                 print "Sorry, wrong credentials for %s." % user
                 sys.exit(1)
         ## secondly check authorization for the action:
         (auth_code, auth_message) = acc_authorize_action(uid_db, action)
         if auth_code != 0:
             print auth_message
             sys.exit(1)
     return user
 
 def task_submit():
     """Submits task to the BibSched task queue.  This is what people will be invoking via command line."""
 
     global options, sched_time, sleep_time
 
     ## sanity check: remove eventual "task" option:
     if options.has_key("task"):
         del options["task"]
     ## authenticate user:
     user = authenticate(options.get("user", ""))
     ## submit task:
 
     task_id = run_sql("""INSERT INTO schTASK (id,proc,user,status,arguments,sleeptime,runtime) VALUES (NULL,'oaiarchive',%s,'WAITING',%s,%s,%s)""",
                       (user, marshal.dumps(options),sleeptime,escape_string(sched_time)))
     ## update task number:
     options["task"] = task_id
     run_sql("""UPDATE schTASK SET arguments=%s WHERE id=%s""", (marshal.dumps(options),task_id))
     write_message("Task #%d submitted." % task_id)
     return task_id
 
 
 def task_update_progress(msg):
     """Updates progress information in the BibSched task table."""
     global options
     return run_sql("UPDATE schTASK SET progress=%s where id=%s", (msg, options["task"]))
 
 
 def task_update_status(val):
     """Updates status information in the BibSched task table."""
     global options
     return run_sql("UPDATE schTASK SET status=%s where id=%s", (val, options["task"]))
 
 
 def task_read_status(task_id):
     """Read status information in the BibSched task table."""
     res = run_sql("SELECT status FROM schTASK where id=%s", (task_id,), 1)
     try:
         out = res[0][0]
     except:
         out = 'UNKNOWN'
     return out
 
 
 def task_get_options(id):
     """Returns options for the task 'id' read from the BibSched task queue table."""
     out = {}
     res = run_sql("SELECT arguments FROM schTASK WHERE id=%s AND proc='oaiarchive'", (id,))
     try:
         out = marshal.loads(res[0][0])
     except:
         write_message("Error: OAIarchive task %d does not seem to exist." % id)
         sys.exit(1)
     return out
 
 
 def task_run(task_id):
     """Runs the task"""
     global options
     options = task_get_options(task_id) # get options from BibSched task table
     ## check task id:
     if not options.has_key("task"):
         write_message("Error: The task #%d does not seem to be a OAI archive task." % task_id)
         return
 
     ## initialize parameters
 
     if options.has_key("option"):
         
 
 ### sql commands to be executed during the script run
 ###
 
         ## check task status:
         task_status = task_read_status(task_id)
         if task_status != "WAITING":
             write_message("Error: The task #%d is %s.  I expected WAITING." % (task_id, task_status))
             return
         ## update task status:
         task_update_status("RUNNING")
         ## initialize signal handler:
         signal.signal(signal.SIGUSR1, task_sig_sleep)
         signal.signal(signal.SIGTERM, task_sig_stop)
         signal.signal(signal.SIGABRT, task_sig_suicide)
         signal.signal(signal.SIGCONT, task_sig_wakeup)
         signal.signal(signal.SIGINT, task_sig_unknown)
         ## run the task:
 
     oaiarchive_task(options)
 
     ## we are done:
     task_update_status("DONE")
     return
 
 
 #########################
 
 def main():
     """Main function that analyzes command line input and calls whatever is appropriate.
        Useful for learning on how to write BibSched tasks."""
     global options, sched_time, sleeptime
     ## parse command line:
     if len(sys.argv) == 2 and sys.argv[1].isdigit():
         ## A - run the task
         task_id = int(sys.argv[1])
         task_run(task_id)
     else:
         ## B - submit the task
         options = {} # will hold command-line options
         options["verbose"] = 1
         try:
             opts, args = getopt.getopt(sys.argv[1:], "hVv:u:s:t:ado:pirn", ["help", "version", "verbose=","user=","sleeptime=","time=","add","delete","oaiset=","upload","info","report","no-process"])
         except getopt.GetoptError:
             printInfo()
             sys.exit(1)
 
         ## set defaults
         options["upload"] = 0
         options["mode"]   = 0
         options["oaiset"] = ""
         options["nice"]   = 0
 
         try:
 
             for opt in opts:
                 if opt[0] in ["-h", "--help"]:
                     printInfo()
                     sys.exit(0)
                 elif opt[0] in ["-V", "--version"]:
                     print __revision__
                     sys.exit(0)
                 elif opt[0] in [ "-u", "--user"]:
                     options["user"]   = opt[1]
                 elif opt[0] in ["-v", "--verbose"]:
                     options["verbose"]  = int(opt[1])
                 elif opt[0] in ["-s", "--sleeptime" ]:
                     get_date(opt[1])    # see if it is a valid shift
                     sleeptime  = opt[1]
                 elif opt[0] in [ "-t", "--time" ]:
                     sched_time = get_date(opt[1])
                 elif opt[0] in ["-n", "--nice"]:
                     options["nice"] = opt[1]
                 elif opt[0] in ["-o", "--oaiset"]:
                     options["oaiset"] = opt[1]
                 elif opt[0] in ["-a", "--add"]:
                     options["mode"] = 1
                 elif opt[0] in ["-d", "--delete"]:
                     options["mode"] = 2
                 elif opt[0] in ["-p", "--upload"]:
                     options["upload"] = 1
                 elif opt[0] in ["-i", "--info"]:
                     options["mode"] = 0
                 elif opt[0] in ["-r", "--report"]:
                     options["mode"] = 3
                 elif opt[0] in ["-n", "--no-process"]:
                     options["upload"] = 0
 
         except StandardError:
             printInfo()
             sys.exit(1)
         task_submit()
     return
 
 
 ### okay, here we go:
 if __name__ == '__main__':
     main()
diff --git a/modules/bibharvest/lib/oaiharvestlib.py b/modules/bibharvest/lib/oaiharvestlib.py
index 5fd01eaae..55e8e8681 100644
--- a/modules/bibharvest/lib/oaiharvestlib.py
+++ b/modules/bibharvest/lib/oaiharvestlib.py
@@ -1,640 +1,641 @@
 # -*- coding: utf-8 -*-
 ##
 ## This file is part of CDS Invenio.
 ## Copyright (C) 2002, 2003, 2004, 2005, 2006 CERN.
 ##
 ## CDS Invenio is free software; you can redistribute it and/or
 ## modify it under the terms of the GNU General Public License as
 ## published by the Free Software Foundation; either version 2 of the
 ## License, or (at your option) any later version.
 ##
 ## CDS Invenio is distributed in the hope that it will be useful, but
 ## WITHOUT ANY WARRANTY; without even the implied warranty of
 ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 ## General Public License for more details.  
 ##
 ## You should have received a copy of the GNU General Public License
 ## along with CDS Invenio; if not, write to the Free Software Foundation, Inc.,
 ## 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA.
 
 """
 oaiharvest implementation.  See oaiharvest executable for entry point.
 """
 
 __revision__ = "$Id$"
 
 import marshal
 import getopt
 import getpass
 import string
 import os
 import sre
 import sys
 import time
 import signal
 import traceback
 import calendar
 
 from invenio.config import \
      bibconvert, \
      bibupload, \
      bindir, \
      tmpdir, \
      version
 from invenio.bibindex_engine_config import *
 from invenio.dbquery import run_sql, escape_string
 from invenio.access_control_engine import acc_authorize_action
 
 options = {} # global variable to hold task options
 
 ## precompile some often-used regexp for speed reasons:
 sre_subfields = sre.compile('\$\$\w');
 sre_html = sre.compile("(?s)<[^>]*>|&#?\w+;")
 sre_datetime_shift = sre.compile("([-\+]{0,1})([\d]+)([dhms])")
 
 tmpHARVESTpath = tmpdir + '/oaiharvest'
 
 def write_message(msg, stream=sys.stdout):
     """Write message and flush output stream (may be sys.stdout or sys.stderr)."""
     if stream == sys.stdout or stream == sys.stderr:
         stream.write(time.strftime("%Y-%m-%d %H:%M:%S --> ", time.localtime()))
         stream.write("%s\n" % msg)
         stream.flush()
     else:
         sys.stderr.write("Unknown stream %s.  [must be sys.stdout or sys.stderr]\n" % stream)
     return
 
 def usage(code, msg=''):
     "Prints usage for this module."
     if msg:
         sys.stderr.write("Error: %s.\n" % msg)
     usagetext = """ Usage: oaiharvest [options] 
      Examples:
       oaiharvest -r arxiv -s 24h
       oaiharvest -r pubmed -d 2005-05-05:2005-05-10 -t 10m
 
  Specific options:
  -r, --repository=REPOS_ONE,"REPOS TWO"     name of the OAI repositories to be harvested (default=all)
  -d, --dates=yyyy-mm-dd:yyyy-mm-dd          harvest repositories between specified dates (overrides repositories' last updated timestamps)
      
  Scheduling options:
  -u,  --user=USER          user name to store task, password needed
  -s,  --sleeptime=SLEEP    time after which to repeat tasks (no)
                            e.g.: 1s, 30m, 24h, 7d
  -t,  --time=TIME          moment for the task to be active (now)
                            e.g.: +15s, 5m, 3h , 2002-10-27 13:57:26
 
  General options:
  -h,  --help               print this help and exit
  -V,  --version            print version and exit
  -v,  --verbose=LEVEL      verbose level (from 0 to 9, default 1)
     """
     sys.stderr.write(usagetext)
     sys.exit(code)
 
 def authenticate(user, header="oaiharvest Task Submission", action="runoaiharvest"):
     """Authenticate the user against the user database.
        Check for its password, if it exists.
        Check for action access rights.
        Return user name upon authorization success,
        do system exit upon authorization failure.
        """
     print header
     print "=" * len(header)
     if user == "":
         print >> sys.stdout, "\rUsername: ",
         user = string.strip(string.lower(sys.stdin.readline()))
     else:
         print >> sys.stdout, "\rUsername:", user        
     ## first check user pw:
-    res = run_sql("select id,password from user where email=%s or nickname=%s", (user, user,), 1)
+    res = run_sql("select id,password from user where email=%s", (user,), 1) + \
+          run_sql("select id,password from user where nickname=%s", (user,), 1)
     if not res:
         print "Sorry, %s does not exist." % user
         sys.exit(1)        
     else:
         (uid_db, password_db) = res[0]
         if password_db:
             password_entered = getpass.getpass()
             if password_db == password_entered:
                 pass
             else:
                 print "Sorry, wrong credentials for %s." % user
                 sys.exit(1)
         ## secondly check authorization for the action:
         (auth_code, auth_message) = acc_authorize_action(uid_db, action)
         if auth_code != 0:
             print auth_message
             sys.exit(1)
     return user
 
 def get_datetime(var, format_string="%Y-%m-%d %H:%M:%S"):
     """Returns a date string according to the format string.
        It can handle normal date strings and shifts with respect
        to now."""
     date = time.time()
     factors = {"d":24*3600, "h":3600, "m":60, "s":1}
     m = sre_datetime_shift.match(var)
     if m:
         sign = m.groups()[0] == "-" and -1 or 1
         factor = factors[m.groups()[2]]
         value = float(m.groups()[1])
         date = time.localtime(date + sign * factor * value)
         date = time.strftime(format_string, date)
     else:
         date = time.strptime(var, format_string)
         date = time.strftime(format_string, date)
     return date
 
                     
 def task_run(row):
     """Run the harvesting task.  The row argument is the Bibharvest task
     queue row, containing if, arguments, etc.
     Return 1 in case of success and 0 in case of failure.
     """
     global options
 
     reposlist = []
     datelist = []
     dateflag = 0
 
     # read from SQL row:
     task_id = row[0]
     task_proc = row[1]
     options = marshal.loads(row[6])
     task_status = row[7]
 
     # sanity check:
     if task_proc != "oaiharvest":
         write_message("The task #%d does not seem to be a oaiharvest task." % task_id, sys.stderr)
         return 0
     if task_status != "WAITING":
         write_message("The task #%d is %s.  I expected WAITING." % (task_id, task_status), sys.stderr)
         return 0
 
     # we can run the task now:
     if options["verbose"]:
         write_message("Task #%d started." % task_id)
     task_starting_time = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())
     task_update_status("RUNNING")
 
     # install signal handlers
     signal.signal(signal.SIGUSR1, task_sig_sleep)
     signal.signal(signal.SIGTERM, task_sig_stop)
     signal.signal(signal.SIGABRT, task_sig_suicide)
     signal.signal(signal.SIGCONT, task_sig_wakeup)
     signal.signal(signal.SIGINT, task_sig_unknown)
 
     ### go ahead: build up the reposlist
     if options["repository"] != None:  
         ### user requests harvesting from selected repositories
         write_message("harvesting from selected repositories")
         for reposname in options["repository"]:
             row = get_row_from_reposname(reposname)
             if row==[]:
                 write_message("source name " + reposname + " is not valid")
                 continue
             else:
                 reposlist.append(get_row_from_reposname(reposname))
     else:
         ### user requests harvesting from all repositories
         write_message("harvesting from all repositories in the database")
         reposlist = get_all_rows_from_db()
 
     ### go ahead: check if user requested from-until harvesting
     if options["dates"]:
         ### for each repos simply perform a from-until date harvesting... no need to update anything
         dateflag = 1
         for element in options["dates"]:
             datelist.append(element)
 
     for repos in reposlist:
         postmode = str(repos[0][9]) 
 
         if postmode=="h" or postmode=="h-c" or postmode=="h-u" or postmode=="h-c-u":
             harvestpath = tmpdir + "/oaiharvest" + str(os.getpid())
             
             if dateflag == 1:
                 res = call_bibharvest(prefix=repos[0][2], baseurl=repos[0][1], harvestpath=harvestpath, fro=str(datelist[0]), until=str(datelist[1]))
                 if res==0 :
                     write_message("source " + str(repos[0][6]) + " was harvested from " + str(datelist[0]) + " to " + str(datelist[1])) 
                 else :
                     write_message("an error occurred while harvesting from source " + str(repos[0][6]) + " for the dates chosen")
                     continue
 
             elif dateflag != 1 and repos[0][7] == None and repos[0][8] != 0:
                 write_message("source " + str(repos[0][6]) + " was never harvested before - harvesting whole repository")
                 res = call_bibharvest(prefix=repos[0][2], baseurl=repos[0][1], harvestpath=harvestpath)
                 if res==0 :
                     update_lastrun(repos[0][0])
                 else :
                     write_message("an error occurred while harvesting from source " + str(repos[0][6]))
                     continue
 
             elif dateflag != 1 and repos[0][8] != 0:
                 ### check that update is actually needed, i.e. lastrun+frequency>today
                 timenow = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())
                 lastrundate = sre.sub(r'\.[0-9]+$', '', str(repos[0][7]))        # remove trailing .00
                 timeinsec = int(repos[0][8])*60*60
                 updatedue = add_timestamp_and_timelag(lastrundate, timeinsec)
                 proceed = compare_timestamps_with_tolerance(updatedue, timenow)
                 if proceed==0 or proceed==-1 : #update needed!
                     write_message("source " + str(repos[0][6]) + " is going to be updated")
                     fromdate = str(repos[0][7])
                     fromdate = fromdate.split()[0] # get rid of time of the day for the moment
                     res = call_bibharvest(prefix=repos[0][2], baseurl=repos[0][1], harvestpath=harvestpath, fro=fromdate)
                     if res==0 :
                         update_lastrun(repos[0][0])
                     else :
                         write_message("an error occurred while harvesting from source " + str(repos[0][6]))
                         continue
                 else:
                     write_message("source " + str(repos[0][6]) + " does not need updating")
                     continue
 
             elif dateflag != 1 and repos[0][8] == 0:
                 write_message("source " + str(repos[0][6]) + " has frequency set to 'Never' so it will not be updated")
                 continue
 
         if postmode=="h-u":
             res = call_bibupload(convertpath=harvestpath)
             if res==0 :
                 write_message("material harvested from source " + str(repos[0][6]) + " was successfully uploaded") 
             else :
                 write_message("an error occurred while uploading harvest from " + str(repos[0][6]))
                 continue                    
 
         if postmode=="h-c" or postmode=="h-c-u":
             convertpath = tmpdir + "/bibconvertrun" + str(os.getpid())
             res = call_bibconvert(config=str(repos[0][5]), harvestpath=harvestpath, convertpath=convertpath)
             if res==0 :
                 write_message("material harvested from source " + str(repos[0][6]) + " was successfully converted") 
             else :
                 write_message("an error occurred while converting from " + str(repos[0][6]))
                 continue
 
         if postmode=="h-c-u":
             res = call_bibupload(convertpath=convertpath)
             if res==0 :
                 write_message("material harvested from source " + str(repos[0][6]) + " was successfully uploaded") 
             else :
                 write_message("an error occurred while uploading harvest from " + str(repos[0][6]))
                 continue                    
 
         elif postmode not in ["h", "h-c", "h-u", "h-c-u"] : ### this should not happen
             write_message("invalid postprocess mode: " + postmode + " skipping repository")
             continue
 
     task_update_status("DONE")
     if options["verbose"]:
         write_message("Task #%d finished." % task_id)
     return 1
 
 
 def add_timestamp_and_timelag(timestamp,
                               timelag):
     """ Adds a time lag in seconds to a given date (timestamp). Returns the resulting date. """
     # remove any trailing .00 in timestamp:
     timestamp = sre.sub(r'\.[0-9]+$', '', timestamp)
     # first convert timestamp to Unix epoch seconds:
     timestamp_seconds = calendar.timegm(time.strptime(timestamp, "%Y-%m-%d %H:%M:%S"))
     # now add them:
     result_seconds = timestamp_seconds + timelag
     result = time.strftime("%Y-%m-%d %H:%M:%S", time.gmtime(result_seconds))
     return result
 
 def update_lastrun(index):
     """ A method that updates the lastrun of a repository successfully harvested """
     try:
         today = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())
         sql = 'UPDATE oaiHARVEST SET lastrun="%s" WHERE id=%s' % (today, index)
         res = run_sql(sql)
         return 1
     except StandardError, e:
         return (0,e)
 
 def call_bibharvest(prefix, baseurl, harvestpath, fro="", until=""):
     """ A method that calls bibharvest and writes harvested output to disk """
     try:
         command = '%s/bibharvest -o %s -v ListRecords -p %s ' % (bindir, harvestpath, prefix)
         if fro!="":
             command += '-f %s ' % fro
         if until!="":
             command += '-u %s ' % until
         command += baseurl
 	print "Start harvesting"
 	#print command
         ret = os.system(command)
 	print "Harvesting finished, merging files"
 	harvestdir, filename = os.path.split(harvestpath)
 	#print "get files"
 	files = os.listdir(harvestdir)
 	#print "sort file"
 	files.sort()
 	#print "open dest file"
 	hf = file(harvestpath, 'w')
 	for f in files:
 	    if f[:len(filename)] == filename:
 	        print "processing file %s"%f
 	        rf = file(os.path.join(harvestdir, f), 'r')
 	        hf.write(rf.read())
 		hf.write("\n")
 	        rf.close()
 	        #os.remove(os.path.join(harvestdir, f))
 	hf.close()
 	print "Files merged"
         return 0
     except StandardError, e:
         return (0,e)
 
 def call_bibconvert(config, harvestpath, convertpath):
     """ A method that reads from a file and converts according to a BibConvert Configuration file. Converted output is returned """
     command = """%s/bibconvert -c %s < %s > %s """ % (bindir, config, harvestpath, convertpath)
     stdout = os.popen(command)
     return 0 
 
 def call_bibupload(convertpath):
     """ A method that uploads a file to the database - calls bibUpload """
     command = '%s/bibupload -i %s ' % (bindir, convertpath)
     p=os.system(command)
     return p
 
 def get_row_from_reposname(reposname):
     """ Returns all information about a row (OAI source) from the source name """
     try:
         sql = 'select * from oaiHARVEST where name="%s"' % escape_string(reposname)
         res = run_sql(sql)
         reposdata = []
         for element in res:
             reposdata.append(element)
         return reposdata
     except StandardError, e:
         return (0,e)
 
 def get_all_rows_from_db():
     """ This method retrieves the full database of repositories and returns a list containing (in exact order):
     | id | baseurl | metadataprefix | arguments | comment | bibconvertcfgfile | name   | lastrun | frequency | postprocess |
     """
     try:
         reposlist = []
         sql = """select id from oaiHARVEST"""
         idlist = run_sql(sql)
         for index in idlist:
             sql = """select * from oaiHARVEST where id=%s""" % index
             reposelements = run_sql(sql)
             repos = []
             for element in reposelements:
                 repos.append(element)
             reposlist.append(repos)
         return reposlist
     except StandardError, e:
         return (0,e)
 
 def compare_timestamps_with_tolerance(timestamp1,
                                       timestamp2,
                                       tolerance=0):
     """Compare two timestamps TIMESTAMP1 and TIMESTAMP2, of the form
        '2005-03-31 17:37:26'. Optionally receives a TOLERANCE argument
        (in seconds).  Return -1 if TIMESTAMP1 is less than TIMESTAMP2
        minus TOLERANCE, 0 if they are equal within TOLERANCE limit,
        and 1 if TIMESTAMP1 is greater than TIMESTAMP2 plus TOLERANCE.
     """
     # remove any trailing .00 in timestamps:
     timestamp1 = sre.sub(r'\.[0-9]+$', '', timestamp1)
     timestamp2 = sre.sub(r'\.[0-9]+$', '', timestamp2)
     # first convert timestamps to Unix epoch seconds:
     timestamp1_seconds = calendar.timegm(time.strptime(timestamp1, "%Y-%m-%d %H:%M:%S"))
     timestamp2_seconds = calendar.timegm(time.strptime(timestamp2, "%Y-%m-%d %H:%M:%S"))
     # now compare them:
     if timestamp1_seconds < timestamp2_seconds - tolerance:
         return -1
     elif timestamp1_seconds > timestamp2_seconds + tolerance:
         return 1
     else:
         return 0
 
 def command_line():
     global options
     long_flags =["repository=", "dates="
                  "user=","sleeptime=","time=",
                  "help", "version", "verbose="]
     short_flags ="r:d:u:s:t:hVv:"
     format_string = "%Y-%m-%d %H:%M:%S"
     repositories = None
     dates = None
     sleeptime = ""
 
     try:
         opts, args = getopt.getopt(sys.argv[1:], short_flags, long_flags)
     except getopt.GetoptError, err:
         write_message(err, sys.stderr)
         usage(1)
     if args:
         usage(1)
         
     options={"sleeptime":0, "verbose":1, "repository":0, "dates":0}
     sched_time = time.strftime(format_string)
     
     user = ""
     # Check for key options
 
     try:
         for opt in opts:
             if opt == ("-h","")  or opt == ("--help",""):
                 usage(1)
             elif opt == ("-V","")  or opt == ("--version",""):
                 print __revision__
                 sys.exit(1)
             elif opt[0] in ["--verbose", "-v"]:
                 options["verbose"] = int(opt[1])
             elif opt[0] in [ "-r", "--repository" ]:
                 repositories = opt[1]
             elif opt[0] in [ "-d", "--dates" ]:
                 dates = opt[1]
             elif opt[0] in [ "-u", "--user"]:
                 user = opt[1]
             elif opt[0] in [ "-s", "--sleeptime" ]:
                 get_datetime(opt[1])    # see if it is a valid shift
                 sleeptime= opt[1]
             elif opt[0] in [ "-t", "--time" ]:
                 sched_time= get_datetime(opt[1])
             else: usage(1)
     except StandardError, e:
         write_message(e, sys.stderr)
         sys.exit(1)
 
     options["repository"]=get_repository_names(repositories)
     if dates != None:
         options["dates"]=get_dates(dates)
     if dates != None and options["dates"]==None:
         write_message("Date format not valid. Quitting task...")
         sys.exit(1)
         
     user = authenticate(user)
 
     if options["verbose"] >= 9:
         print ""
         write_message("storing task options %s\n" % options)
 
     ## sanity check: remove eventual "task" option:
     if options.has_key("task"):
         del options["task"]
 
     new_task_id = run_sql("""INSERT INTO schTASK (proc,user,runtime,sleeptime,arguments,status)
                              VALUES ('oaiharvest',%s,%s,%s,%s,'WAITING')""",
                       (user, sched_time, sleeptime, marshal.dumps(options)))
 
     ## update task number: 
     options["task"] = new_task_id
     run_sql("""UPDATE schTASK SET arguments=%s WHERE id=%s""", (marshal.dumps(options), new_task_id))
 
     print "Task #%d was successfully scheduled for execution." % new_task_id
     return
 
 def get_dates(dates):
     """ A method to validate and process the dates input by the user at the command line """
     twodates = []
     if dates:
         datestring = string.split(dates, ":")
         if len(datestring)==2:
             for date in datestring:
                 ### perform some checks on the date format
                 datechunks = string.split(date, "-")
                 if len(datechunks)==3:
                     try:
                         if int(datechunks[0]) and int(datechunks[1]) and int(datechunks[2]):
                             twodates.append(date)
                     except StandardError:
                         write_message("Dates have invalid format, not 'yyyy-mm-dd:yyyy-mm-dd'")
                         twodates=None
                         return twodates
                 else:
                     write_message("Dates have invalid format, not 'yyyy-mm-dd:yyyy-mm-dd'")
                     twodates=None
                     return twodates
             ## final check.. date1 must me smaller than date2
             date1 = str(twodates[0]) + " 01:00:00"
             date2 = str(twodates[1]) + " 01:00:00"
             if compare_timestamps_with_tolerance(date1, date2)!=-1:
                 write_message("First date must be before second date.")
                 twodates=None
                 return twodates          
         else:
             write_message("Dates have invalid format, not 'yyyy-mm-dd:yyyy-mm-dd'")
             twodates=None
     else:
         twodates=None
     return twodates
 
 
 def get_repository_names(repositories):
     """ A method to validate and process the repository names input by the user at the command line """
     repository_names = []
     if repositories:
         names = string.split(repositories, ",")
         for name in names:
             ### take into account both single word names and multiple word names (which get wrapped around "" or '')
             quote = "'"
             doublequote = '"'
             if name.find(quote)==0 and name.find(quote)==len(name):
                 name = name.split(quote)[1]
             if name.find(doublequote)==0 and name.find(doublequote)==len(name):
                 name = name.split(doublequote)[1]
             repository_names.append(name)
     else:
         repository_names=None
     return repository_names
 
 def task_sig_sleep(sig, frame):
     """Signal handler for the 'sleep' signal sent by BibSched."""
     if options["verbose"] >= 9:
         write_message("task_sig_sleep(), got signal %s frame %s" % (sig, frame))
     write_message("sleeping...")
     task_update_status("SLEEPING")
     signal.pause() # wait for wake-up signal
 
 def task_sig_wakeup(sig, frame):
     """Signal handler for the 'wakeup' signal sent by BibSched."""
     if options["verbose"] >= 9:
         write_message("task_sig_wakeup(), got signal %s frame %s" % (sig, frame))
     write_message("continuing...")
     task_update_status("CONTINUING")
 
 def task_sig_stop(sig, frame):
     """Signal handler for the 'stop' signal sent by BibSched."""
     if options["verbose"] >= 9:
         write_message("task_sig_stop(), got signal %s frame %s" % (sig, frame))
     write_message("stopping...")
     task_update_status("STOPPING")
     errcode = 0
     try:
         task_sig_stop_commands()
         write_message("stopped")
         task_update_status("STOPPED")
     except StandardError, err:
         write_message("Error during stopping! %e" % err)
         task_update_status("STOPPINGFAILED")
         errcode = 1
     sys.exit(errcode)
 
 def task_sig_stop_commands():
     """Do all the commands necessary to stop the task before quitting.
     Useful for task_sig_stop() handler.    
     """
     write_message("stopping commands started")
     pass
     write_message("stopping commands ended")    
     
 def task_sig_suicide(sig, frame):
     """Signal handler for the 'suicide' signal sent by BibSched."""
     if options["verbose"] >= 9:
         write_message("task_sig_suicide(), got signal %s frame %s" % (sig, frame))
     write_message("suiciding myself now...")
     task_update_status("SUICIDING")
     write_message("suicided")
     task_update_status("SUICIDED")
     sys.exit(0)
 
 def task_sig_unknown(sig, frame):
     """Signal handler for the other unknown signals sent by shell or user."""
     # do nothing for unknown signals:
     write_message("unknown signal %d (frame %s) ignored" % (sig, frame)) 
 
 def task_update_progress(msg):
     """Updates progress information in the BibSched task table."""
     global options
     if options["verbose"] >= 9:
         write_message("Updating task progress to %s." % msg)
     return run_sql("UPDATE schTASK SET progress=%s where id=%s", (msg, options["task"]))
 
 def task_update_status(val):
     """Updates state information in the BibSched task table."""
     global options
     if options["verbose"] >= 9:
         write_message("Updating task status to %s." % val)
     return run_sql("UPDATE schTASK SET status=%s where id=%s", (val, options["task"]))    
 
 def main():
     """Reads arguments and either runs the task, or starts user-interface (command line)."""
     if len(sys.argv) == 2:
         try:
             task_id = int(sys.argv[1])
         except StandardError:
             command_line()
             sys.exit()
 
         res = run_sql("SELECT * FROM schTASK WHERE id='%d'" % (task_id), None, 1)
         if not res:
             write_message("Selected task not found.", sys.stderr)
             sys.exit(1)
         try:
             if not task_run(res[0]):
                 write_message("Error occurred.  Exiting.", sys.stderr)
         except StandardError, e:
             write_message("Unexpected error occurred: %s." % e, sys.stderr)
             if options["verbose"] >= 9:        
                 traceback.print_tb(sys.exc_info()[2])
             write_message("Exiting.")
             task_update_status("ERROR")                                               
     else:
         command_line()
 
diff --git a/modules/bibindex/lib/bibindex_engine.py b/modules/bibindex/lib/bibindex_engine.py
index dfade3868..f2f42fe79 100644
--- a/modules/bibindex/lib/bibindex_engine.py
+++ b/modules/bibindex/lib/bibindex_engine.py
@@ -1,1659 +1,1660 @@
 # -*- coding: utf-8 -*-
 ##
 ## $Id$
 ## BibIndxes bibliographic data, reference and fulltext indexing utility.
 ##
 ## This file is part of CDS Invenio.
 ## Copyright (C) 2002, 2003, 2004, 2005, 2006 CERN.
 ##
 ## CDS Invenio is free software; you can redistribute it and/or
 ## modify it under the terms of the GNU General Public License as
 ## published by the Free Software Foundation; either version 2 of the
 ## License, or (at your option) any later version.
 ##
 ## CDS Invenio is distributed in the hope that it will be useful, but
 ## WITHOUT ANY WARRANTY; without even the implied warranty of
 ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 ## General Public License for more details.  
 ##
 ## You should have received a copy of the GNU General Public License
 ## along with CDS Invenio; if not, write to the Free Software Foundation, Inc.,
 ## 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA.
 
 """
 BibIndex indexing engine implementation.  See bibindex executable for entry point.
 """
 
 __revision__ = "$Id$"
 
 import marshal
 from zlib import compress,decompress
 import string
 import getopt
 import getpass
 import os
 import sre
 import sys
 import time
 import Numeric
 import urllib2
 import signal
 import tempfile
 import traceback
 import cStringIO
 
 from invenio.config import \
      CFG_BIBINDEX_CHARS_ALPHANUMERIC_SEPARATORS, \
      CFG_BIBINDEX_CHARS_PUNCTUATION, \
      CFG_BIBINDEX_FULLTEXT_INDEX_LOCAL_FILES_ONLY, \
      CFG_BIBINDEX_MIN_WORD_LENGTH, \
      CFG_BIBINDEX_REMOVE_HTML_MARKUP, \
      CFG_BIBINDEX_STEMMER_DEFAULT_LANGUAGE, \
      CFG_MAX_RECID, \
      version, \
      weburl
 from invenio.bibindex_engine_config import *
 from invenio.search_engine import perform_request_search, strip_accents
 from invenio.dbquery import run_sql, escape_string, DatabaseError
 from invenio.access_control_engine import acc_authorize_action
 from invenio.bibindex_engine_stopwords import is_stopword
 from invenio.bibindex_engine_stemmer import stem
 
 ## import optional modules:
 try:
     import psyco
     psyco.bind(get_words_from_phrase)
     psyco.bind(WordTable.merge_with_old_recIDs)
     psyco.bind(serialize_via_numeric_array)
     psyco.bind(serialize_via_marshal)
     psyco.bind(deserialize_via_numeric_array)
     psyco.bind(deserialize_via_marshal)
 except:
     pass
 
 def write_message(msg, stream=sys.stdout):
     """Write message and flush output stream (may be sys.stdout or sys.stderr)."""
     if stream == sys.stdout or stream == sys.stderr:
         stream.write(time.strftime("%Y-%m-%d %H:%M:%S --> ", time.localtime()))
         stream.write("%s\n" % msg)
         stream.flush()
     else:
         sys.stderr.write("Unknown stream %s.  [must be sys.stdout or sys.stderr]\n" % stream)
     return
 
 ## precompile some often-used regexp for speed reasons:
 sre_subfields = sre.compile('\$\$\w');
 sre_html = sre.compile("(?s)<[^>]*>|&#?\w+;")
 sre_block_punctuation_begin = sre.compile(r"^"+CFG_BIBINDEX_CHARS_PUNCTUATION+"+")
 sre_block_punctuation_end = sre.compile(CFG_BIBINDEX_CHARS_PUNCTUATION+"+$")
 sre_punctuation = sre.compile(CFG_BIBINDEX_CHARS_PUNCTUATION)
 sre_separators = sre.compile(CFG_BIBINDEX_CHARS_ALPHANUMERIC_SEPARATORS)
 sre_datetime_shift = sre.compile("([-\+]{0,1})([\d]+)([dhms])")
 
 nb_char_in_line = 50  # for verbose pretty printing
 chunksize = 1000 # default size of chunks that the records will be treated by
 wordTables = []
 base_process_size = 4500 # process base size
 options = {} # will hold task options
 
 ## Dictionary merging functions
 def intersection(dict1, dict2):
     "Returns intersection of the two dictionaries."
     int_dict = {}
     if len(dict1) > len(dict2): 
         for e in dict2:
             if dict1.has_key(e):
                 int_dict[e] = 1
     else:
         for e in dict1:
             if dict2.has_key(e):
                 int_dict[e] = 1
     return int_dict
 
 def union(dict1, dict2):
     "Returns union of the two dictionaries."
     union_dict = {}
     for e in dict1.keys():
         union_dict[e] = 1
     for e in dict2.keys():
         union_dict[e] = 1
     return union_dict
 
 def diff(dict1, dict2):
     "Returns dict1 - dict2."
     diff_dict = {}
     for e in dict1.keys():
         if not dict2.has_key(e):
             diff_dict[e] = 1
     return diff_dict
 
 def list_union(list1, list2):
     "Returns union of the two lists."
     union_dict = {}
     for e in list1:
         union_dict[e] = 1
     for e in list2:
         union_dict[e] = 1
     return union_dict.keys()
 
 ## safety function for killing slow DB threads:
 def kill_sleepy_mysql_threads(max_threads=CFG_MAX_MYSQL_THREADS, thread_timeout=CFG_MYSQL_THREAD_TIMEOUT):
     """Check the number of DB threads and if there are more than
        MAX_THREADS of them, lill all threads that are in a sleeping
        state for more than THREAD_TIMEOUT seconds.  (This is useful
        for working around the the max_connection problem that appears
        during indexation in some not-yet-understood cases.)  If some
        threads are to be killed, write info into the log file.      
     """
     res = run_sql("SHOW FULL PROCESSLIST")
     if len(res) > max_threads:
         for row in res:
             r_id,r_user,r_host,r_db,r_command,r_time,r_state,r_info = row
             if r_command == "Sleep" and int(r_time) > thread_timeout:
                 run_sql("KILL %s", (r_id,))
                 if options["verbose"] >= 1:                
                     write_message("WARNING: too many DB threads, killing thread %s" % r_id)
     return
 
 ## MARC-21 tag/field access functions
 def get_fieldvalues(recID, tag):
     """Returns list of values of the MARC-21 'tag' fields for the record
        'recID'."""
     out = []
     bibXXx = "bib" + tag[0] + tag[1] + "x"
     bibrec_bibXXx = "bibrec_" + bibXXx
     query = "SELECT value FROM %s AS b, %s AS bb WHERE bb.id_bibrec=%s AND bb.id_bibxxx=b.id AND tag LIKE '%s'" \
             % (bibXXx, bibrec_bibXXx, recID, tag)
     res = run_sql(query)
     for row in res:
         out.append(row[0])
     return out
 
 def get_associated_subfield_value(recID, tag, value, associated_subfield_code):
     """Return list of ASSOCIATED_SUBFIELD_CODE, if exists, for record
     RECID and TAG of value VALUE.  Used by fulltext indexer only.
     Note: TAG must be 6 characters long (tag+ind1+ind2+sfcode),
     otherwise en empty string is returned.
     FIXME: what if many tag values have the same value but different
     associated_subfield_code?  Better use bibrecord library for this.
     """
     out = ""
     if len(tag) != 6:
         return out
     bibXXx = "bib" + tag[0] + tag[1] + "x"
     bibrec_bibXXx = "bibrec_" + bibXXx
     query = """SELECT bb.field_number, b.tag, b.value FROM %s AS b, %s AS bb
                WHERE bb.id_bibrec=%s AND bb.id_bibxxx=b.id AND tag LIKE '%s%%'""" % \
             (bibXXx, bibrec_bibXXx, recID, tag[:-1])
     res = run_sql(query)
     field_number = -1
     for row in res:        
         if row[1] == tag and row[2] == value:
             field_number = row[0]
     if field_number > 0:
         for row in res:        
             if row[0] == field_number and row[1] == tag[:-1] + associated_subfield_code:
                 out = row[2]
                 break
     return out
     
 def get_field_tags(field):
     """Returns a list of MARC tags for the field code 'field'.
        Returns empty list in case of error.
        Example: field='author', output=['100__%','700__%']."""
     out = []
     query = """SELECT t.value FROM tag AS t, field_tag AS ft, field AS f
                 WHERE f.code='%s' AND ft.id_field=f.id AND t.id=ft.id_tag
                 ORDER BY ft.score DESC""" % field
     res = run_sql(query)
     for row in res:
         out.append(row[0])
     return out
 
 ## Fulltext word extraction functions
 def get_fulltext_urls_from_html_page(htmlpagebody):
     """Parses htmlpagebody data looking for url_directs referring to
        probable fulltexts.
        Returns an array of (ext,url_direct) to fulltexts.
        Note: it looks for file format extensions as defined by global
        'CONV_PROGRAMS'structure.
        """
     out = []
     for ext in CONV_PROGRAMS.keys():
         expr = sre.compile( r"\"(http://[\w]+\.+[\w]+[^\"'><]*\." + \
                            ext + r")\"") 
         match =  expr.search(htmlpagebody)
         if match:
             out.append([ext,match.group(1)])
         else: # FIXME: workaround for getfile, should use bibdoc tables
             expr_getfile = sre.compile(r"\"(http://.*getfile\.py\?.*format=" + ext + r"&version=.*)\"")
             match =  expr_getfile.search(htmlpagebody)
             if match:
                 out.append([ext,match.group(1)])            
     return out
 
 def get_words_from_fulltext(url_direct_or_indirect,
                             split=string.split,
                             lower=string.lower,
                             force_file_extension=None):
     """Returns all the words contained in the document specified by
        URL_DIRECT_OR_INDIRECT with the words being split by various
        SRE_SEPARATORS regexp set earlier.  If FORCE_FILE_EXTENSION is
        set (e.g. to "pdf", then treat URL_DIRECT_OR_INDIRECT as a PDF
        file.  (This is interesting to index Indico for example.)  Note
        also that URL_DIRECT_OR_INDIRECT may be either a direct URL to
        the fulltext file or an URL to a setlink-like page body that
        presents the links to be indexed.  In the latter case the
        URL_DIRECT_OR_INDIRECT is parsed to extract actual direct URLs
        to fulltext documents, for all knows file extensions as
        specified by global CONV_PROGRAMS config variable.   
     """
 
     if CFG_BIBINDEX_FULLTEXT_INDEX_LOCAL_FILES_ONLY and string.find(url_direct_or_indirect, weburl) < 0:
         return []
     if options["verbose"] >= 2:
         write_message("... reading fulltext files from %s started" % url_direct_or_indirect)
 
     fulltext_urls = None
     if not force_file_extension:
         url_direct = None
         fulltext_urls = None
         # check for direct link in url
         url_direct_or_indirect_ext = lower(split(url_direct_or_indirect,".")[-1])
 
         if url_direct_or_indirect_ext in CONV_PROGRAMS.keys():
             fulltext_urls = [(url_direct_or_indirect_ext,url_direct_or_indirect)]
 
         # Indirect url. Try to fetch the real fulltext(s)
         if not fulltext_urls:
             # read "setlink" data
             try:
                 htmlpagebody = urllib2.urlopen(url_direct_or_indirect).read()
             except:
                 sys.stderr.write("Error: Cannot read %s.\n" % url_direct_or_indirect)
                 return []
             fulltext_urls = get_fulltext_urls_from_html_page(htmlpagebody)
             if options["verbose"] >= 9:
                 write_message("... fulltext_urls = %s" % fulltext_urls)
     else:
         fulltext_urls = [[force_file_extension, url_direct_or_indirect]]
 
     words = {}
 
     # process as many urls as they were found:
     for (ext,url_direct) in fulltext_urls:
 
         if options["verbose"] >= 2:
             write_message(".... processing %s from %s started" % (ext,url_direct))
         
         # sanity check:
         if not url_direct:
             break;
 
         # read fulltext file:
         try:
             url = urllib2.urlopen(url_direct)
         except:
             sys.stderr.write("Error: Cannot read %s.\n" % url_direct)
             break # try other fulltext files...
         
         tmp_fd, tmp_name = tempfile.mkstemp('invenio.tmp')
         data_chunk = url.read(8*1024)
         while data_chunk:
             os.write(tmp_fd, data_chunk)
             data_chunk = url.read(8*1024)
         os.close(tmp_fd)
 
         # try all available conversion programs according to their order:
         bingo = 0
         for conv_program in CONV_PROGRAMS[ext]:
             if os.path.exists(conv_program):
                 # intelligence on how to run various conversion programs:
                 cmd = ""  # wil keep command to run
                 bingo = 0 # had we success?
                 if os.path.basename(conv_program) == "pdftotext":
                     cmd = "%s -enc UTF-8 %s %s.txt" % (conv_program, tmp_name, tmp_name)
                 elif os.path.basename(conv_program) == "pstotext":
                     if ext == "ps.gz":
                         # is there gzip available?                        
                         if os.path.exists(CONV_PROGRAMS_HELPERS["gz"]): 
                             cmd = "%s -cd %s | %s > %s.txt" \
                                   % (CONV_PROGRAMS_HELPERS["gz"], tmp_name, conv_program, tmp_name)
                     else:
                         cmd = "%s %s > %s.txt" \
                               % (conv_program, tmp_name, tmp_name)
                 elif os.path.basename(conv_program) == "ps2ascii":
                     if ext == "ps.gz":
                          # is there gzip available?                        
                         if os.path.exists(CONV_PROGRAMS_HELPERS["gz"]):
                             cmd = "%s -cd %s | %s > %s.txt"\
                                   % (CONV_PROGRAMS_HELPERS["gz"], tmp_name,
                                      conv_program, tmp_name)
                     else:
                         cmd = "%s %s %s.txt" \
                               % (conv_program, tmp_name, tmp_name)
                 elif os.path.basename(conv_program) == "antiword":
                     cmd = "%s %s > %s.txt" % (conv_program, tmp_name, tmp_name)
                 elif os.path.basename(conv_program) == "catdoc":
                     cmd = "%s %s > %s.txt" % (conv_program, tmp_name, tmp_name)
                 elif os.path.basename(conv_program) == "wvText":
                     cmd = "%s %s %s.txt" % (conv_program, tmp_name, tmp_name)
                 elif os.path.basename(conv_program) == "ppthtml":
                     # is there html2text available?
                     if os.path.exists(CONV_PROGRAMS_HELPERS["html"]): 
                         cmd = "%s %s | %s > %s.txt"\
                               % (conv_program, tmp_name,
                                  CONV_PROGRAMS_HELPERS["html"], tmp_name)
                     else:
                         cmd = "%s %s > %s.txt" \
                               % (conv_program, tmp_name, tmp_name)
                 elif os.path.basename(conv_program) == "xlhtml":
                     # is there html2text available?
                     if os.path.exists(CONV_PROGRAMS_HELPERS["html"]): 
                         cmd = "%s %s | %s > %s.txt" % \
                               (conv_program, tmp_name,
                                CONV_PROGRAMS_HELPERS["html"], tmp_name)
                     else:
                         cmd = "%s %s > %s.txt" % \
                               (conv_program, tmp_name, tmp_name)
                 else:
                     sys.stderr.write("Error: Do not know how to handle %s conversion program.\n" % conv_program)
                 # try to run it:
                 try:
                     if options["verbose"] >= 9:
                         write_message("..... launching %s" % cmd)
                     errcode = os.system(cmd)
                     if errcode == 0 and os.path.exists("%s.txt" % tmp_name):
                         bingo = 1
                         break # bingo!
                     else:
                         write_message("Error while running %s for %s.\n" % (cmd, url_direct), sys.stderr)
                 except:
                     write_message("Error running %s for %s.\n" % (cmd, url_direct), sys.stderr)
 
         # were we successful?
         if bingo:
             tmp_name_txt_file = open("%s.txt" % tmp_name)
             for phrase in tmp_name_txt_file.xreadlines():
                 for word in get_words_from_phrase(phrase):
                     if not words.has_key(word):
                         words[word] = 1;
             tmp_name_txt_file.close()
         else:
             if options["verbose"]:
                 write_message("No conversion success for %s.\n" % (url_direct), sys.stderr)
 
         # delete temp files (they might not exist):
         try:
             os.unlink(tmp_name)
             os.unlink(tmp_name + ".txt")
         except StandardError:
             write_message("Error: Could not delete file. It didn't exist", sys.stderr) 
 
         if options["verbose"] >= 2:
             write_message(".... processing %s from %s ended" % (ext,url_direct))
         
 
     if options["verbose"] >= 2:
         write_message("... reading fulltext files from %s ended" % url_direct_or_indirect)
         
     return words.keys()
 
 # tagToFunctions mapping. It offers an indirection level necesary for
 # indexing fulltext. The default is get_words_from_phrase
 tagToWordsFunctions = {'8564_u':get_words_from_fulltext}
 
 def get_words_from_phrase(phrase, split=string.split):
     """Return list of words found in PHRASE.  Note that the phrase is
        split into groups depending on the alphanumeric characters and
        punctuation characters definition present in the config file.
     """
     words = {}
     if CFG_BIBINDEX_REMOVE_HTML_MARKUP and string.find(phrase, "</") > -1:
         phrase = sre_html.sub(' ', phrase)
     phrase = string.lower(phrase)
     # 1st split phrase into blocks according to whitespace
     for block in split(strip_accents(phrase)):
         # 2nd remove leading/trailing punctuation and add block:
         block = sre_block_punctuation_begin.sub("", block)
         block = sre_block_punctuation_end.sub("", block)
         if block:
             block = apply_stemming_and_stopwords_and_length_check(block)
             if block:
                 words[block] = 1    
             # 3rd break each block into subblocks according to punctuation and add subblocks:
             for subblock in sre_punctuation.split(block):
                 subblock = apply_stemming_and_stopwords_and_length_check(subblock)
                 if subblock:
                     words[subblock] = 1
                     # 4th break each subblock into alphanumeric groups and add groups:                    
                     for alphanumeric_group in sre_separators.split(subblock):
                         alphanumeric_group = apply_stemming_and_stopwords_and_length_check(alphanumeric_group)
                         if alphanumeric_group:
                             words[alphanumeric_group] = 1 
     return words.keys()
 
 def apply_stemming_and_stopwords_and_length_check(word):
     """Return WORD after applying stemming and stopword and length checks.
        See the config file in order to influence these.
     """
     # stem word, when configured so:
     if CFG_BIBINDEX_STEMMER_DEFAULT_LANGUAGE != "":
         word = stem(word, CFG_BIBINDEX_STEMMER_DEFAULT_LANGUAGE)
     # now check against stopwords: 
     if is_stopword(word): 
         return ""
     # finally check the word length:
     if len(word) < CFG_BIBINDEX_MIN_WORD_LENGTH:
         return ""
     return word
 
 def remove_subfields(s):
     "Removes subfields from string, e.g. 'foo $$c bar' becomes 'foo bar'."
     return sre_subfields.sub(' ', s)
 
 def get_index_id(indexname):
     """Returns the words/phrase index id for INDEXNAME.
        Returns empty string in case there is no words table for this index.
        Example: field='author', output=4."""
     out = 0
     query = """SELECT w.id FROM idxINDEX AS w 
                 WHERE w.name='%s' LIMIT 1""" % indexname
     res = run_sql(query, None, 1)
     if res:
         out = res[0][0]
     return out
 
 def get_index_tags(indexname):
     """Returns the list of tags that are indexed inside INDEXNAME.
        Returns empty list in case there are no tags indexed in this index.
        Note: uses get_field_tags() defined before.
        Example: field='author', output=['100__%', '700__%']."""
     out = []
     query = """SELECT f.code FROM idxINDEX AS w, idxINDEX_field AS wf,
     field AS f WHERE w.name='%s' AND w.id=wf.id_idxINDEX
     AND f.id=wf.id_field""" % indexname
     res = run_sql(query)
     for row in res:
         out.extend(get_field_tags(row[0]))
     return out
 
 def get_all_indexes():
     """Returns the list of the names of all defined words indexes.
        Returns empty list in case there are no tags indexed in this index.
        Example: output=['global', 'author']."""
     out = []
     query = """SELECT name FROM idxINDEX"""
     res = run_sql(query)
     for row in res:
         out.append(row[0])
     return out
         
 def usage(code, msg=''):
     "Prints usage for this module."
     if msg:
         sys.stderr.write("Error: %s.\n" % msg)
     print >> sys.stderr, \
     """ Usage: %s [options] 
      Examples:
        %s -a -i 234-250,293,300-500 -u admin@localhost
        %s -a -w author,fulltext -M 8192 -v3
        %s -d -m +4d -A on --flush=10000
 
  Indexing options:
  -a,  --add                add or update words for selected records
  -d,  --del                delete words for selected records
  -i,  --id=low[-high]      select according to doc recID
  -m,  --modified=from[,to] select according to modification date
  -c,  --collection=c1[,c2] select according to collection
 
  Repairing options:
  -k,  --check              check consistency for all records in the table(s)
  -r,  --repair             try to repair all records in the table(s)
 
  Specific options:
  -w,  --windex=w1[,w2]     word/phrase indexes to consider (all)
  -M,  --maxmem=XXX         maximum memory usage in kB (no limit)
  -f,  --flush=NNN          full consistent table flush after NNN records (10000)
       
  Scheduling options:
  -u,  --user=USER          user name to store task, password needed
  -s,  --sleeptime=SLEEP    time after which to repeat tasks (no)
                            e.g.: 1s, 30m, 24h, 7d
  -t,  --time=TIME          moment for the task to be active (now)
                            e.g.: +15s, 5m, 3h , 2002-10-27 13:57:26
 
  General options:
  -h,  --help               print this help and exit
  -V,  --version            print version and exit
  -v,  --verbose=LEVEL      verbose level (from 0 to 9, default 1)
     """ % ((sys.argv[0],) * 4)
     sys.exit(code)
 
 def authenticate(user, header="BibIndex Task Submission", action="runbibindex"):
     """Authenticate the user against the user database.
        Check for its password, if it exists.
        Check for action access rights.
        Return user name upon authorization success,
        do system exit upon authorization failure.
        """
     print header
     print "=" * len(header)
     if user == "":
         print >> sys.stdout, "\rUsername: ",
         user = string.strip(string.lower(sys.stdin.readline()))
     else:
         print >> sys.stdout, "\rUsername:", user        
     ## first check user pw:
-    res = run_sql("select id,password from user where email=%s or nickname=%s", (user, user,), 1)
+    res = run_sql("select id,password from user where email=%s", (user,), 1) + \
+          run_sql("select id,password from user where nickname=%s", (user,), 1)
     if not res:
         print "Sorry, %s does not exist." % user
         sys.exit(1)        
     else:
         (uid_db, password_db) = res[0]
         if password_db:
             password_entered = getpass.getpass()
             if password_db == password_entered:
                 pass
             else:
                 print "Sorry, wrong credentials for %s." % user
                 sys.exit(1)
         ## secondly check authorization for the action:
         (auth_code, auth_message) = acc_authorize_action(uid_db, action)
         if auth_code != 0:
             print auth_message
             sys.exit(1)
     return user
         
 def split_ranges(parse_string):
     recIDs = []
     ranges = string.split(parse_string, ",")
     for arange in ranges:
         tmp_recIDs = string.split(arange, "-")
         
         if len(tmp_recIDs)==1:
             recIDs.append([int(tmp_recIDs[0]), int(tmp_recIDs[0])])
         else:
             if int(tmp_recIDs[0]) > int(tmp_recIDs[1]): # sanity check
                 tmp = tmp_recIDs[0]
                 tmp_recIDs[0] = tmp_recIDs[1]
                 tmp_recIDs[1] = tmp
             recIDs.append([int(tmp_recIDs[0]), int(tmp_recIDs[1])])
     return recIDs
 
 def get_word_tables(tables):
     global wordTables
     if tables:
         indexes = string.split(tables, ",")
         for index in indexes:
             index_id = get_index_id(index)
             if index_id:
                 wordTables.append({"idxWORD%02dF" % index_id: \
                                    get_index_tags(index)})
             else:                
                 write_message("Error: There is no %s words table." % index, sys.stderr)
     else:
         for index in get_all_indexes():
             index_id = get_index_id(index)
             wordTables.append({"idxWORD%02dF" % index_id: \
                                get_index_tags(index)})
     return wordTables
 
 def get_date_range(var):
     "Returns the two dates contained as a low,high tuple"
     limits = string.split(var, ",")
     if len(limits)==1:
         low = get_datetime(limits[0])
         return low,None
     if len(limits)==2:
         low = get_datetime(limits[0])
         high = get_datetime(limits[1])
         return low,high
     return None,None
 
 def get_datetime(var, format_string="%Y-%m-%d %H:%M:%S"):
     """Returns a date string according to the format string.
        It can handle normal date strings and shifts with respect
        to now."""
     date = time.time()
     factors = {"d":24*3600, "h":3600, "m":60, "s":1}
     m = sre_datetime_shift.match(var)
     if m:
         sign = m.groups()[0] == "-" and -1 or 1
         factor = factors[m.groups()[2]]
         value = float(m.groups()[1])
         date = time.localtime(date + sign * factor * value)
         date = time.strftime(format_string, date)
     else:
         date = time.strptime(var, format_string)
         date = time.strftime(format_string, date)
     return date
 
 def create_range_list(res):
     """Creates a range list from a recID select query result contained
     in res. The result is expected to have ascending numerical order."""
     if not res:
         return []
     row = res[0]
     if not row:
         return []
     else:
         range_list = [[row[0],row[0]]]
     for row in res[1:]:
         row_id = row[0]
         if row_id == range_list[-1][1] + 1:
             range_list[-1][1] = row_id
         else:
             range_list.append([row_id,row_id])
     return range_list
         
 def beautify_range_list(range_list):
     """Returns a non overlapping, maximal range list"""
     ret_list = []
     for new in range_list:
         found = 0
         for old in ret_list:
             if new[0] <= old[0] <= new[1] + 1 or new[0] - 1 <= old[1] <= new[1]:
                 old[0] = min(old[0], new[0])
                 old[1] = max(old[1], new[1])
                 found = 1
                 break
 
         if not found:
             ret_list.append(new)
                 
     return ret_list
 
 def serialize_via_numeric_array(arr):
     """Serialize Numeric array into a compressed string."""
     return compress(Numeric.dumps(arr))
 
 def deserialize_via_numeric_array(string):
     """Decompress and deserialize string into a Numeric array."""
     return Numeric.loads(decompress(string))
 
 def serialize_via_marshal(obj):
     """Serialize Python object via marshal into a compressed string."""
     return escape_string(compress(marshal.dumps(obj)))
 
 def deserialize_via_marshal(string):
     """Decompress and deserialize string into a Python object via marshal."""
     return marshal.loads(decompress(string))
 
 class WordTable:
     "A class to hold the words table."
 
     def __init__(self, tablename, fields_to_index, separators="[^\s]"):
         "Creates words table instance."
         self.tablename = tablename
         self.recIDs_in_mem = []
         self.fields_to_index = fields_to_index
         self.separators = separators
         self.value = {}
 
     def get_field(self, recID, tag):
         """Returns list of values of the MARC-21 'tag' fields for the
            record 'recID'."""
 
         out = []
         bibXXx = "bib" + tag[0] + tag[1] + "x"
         bibrec_bibXXx = "bibrec_" + bibXXx
         query = """SELECT value FROM %s AS b, %s AS bb
                 WHERE bb.id_bibrec=%s AND bb.id_bibxxx=b.id
                 AND tag LIKE '%s'""" % (bibXXx, bibrec_bibXXx, recID, tag);
         res = run_sql(query)
         for row in res:
             out.append(row[0])
         return out
 
     def clean(self):
         "Cleans the words table."
         self.value={}
 
     def put_into_db(self, mode="normal"):
         """Updates the current words table in the corresponding DB
            idxFOO table.  Mode 'normal' means normal execution,
            mode 'emergency' means words index reverting to old state.
            """
         if options["verbose"]:
             write_message("%s %s wordtable flush started" % (self.tablename,mode))
             write_message('...updating %d words into %s started' % \
                 (len(self.value), self.tablename))
         task_update_progress("%s flushed %d/%d words" % (self.tablename, 0, len(self.value)))
             
         self.recIDs_in_mem = beautify_range_list(self.recIDs_in_mem)
 
         if mode == "normal":
             for group in self.recIDs_in_mem:
                 query = """UPDATE %sR SET type='TEMPORARY' WHERE id_bibrec
                 BETWEEN '%d' AND '%d' AND type='CURRENT'""" % \
                 (self.tablename[:-1], group[0], group[1])
                 if options["verbose"] >= 9:
                     write_message(query)
                 run_sql(query)
 
         nb_words_total = len(self.value)
         nb_words_report = int(nb_words_total/10.0)
         nb_words_done = 0
         for word in self.value.keys():
             self.put_word_into_db(word)
             nb_words_done += 1
             if nb_words_report!=0 and ((nb_words_done % nb_words_report) == 0):
                 if options["verbose"]:
                     write_message('......processed %d/%d words' % (nb_words_done, nb_words_total))
                 task_update_progress("%s flushed %d/%d words" % (self.tablename, nb_words_done, nb_words_total))
         if options["verbose"] >= 9:
             write_message('...updating %d words into %s ended' % \
                 (nb_words_total, self.tablename))
                 
         if options["verbose"]:
             write_message('...updating reverse table %sR started' % self.tablename[:-1])
         if mode == "normal":
             for group in self.recIDs_in_mem:
                 query = """UPDATE %sR SET type='CURRENT' WHERE id_bibrec
                 BETWEEN '%d' AND '%d' AND type='FUTURE'""" % \
                 (self.tablename[:-1], group[0], group[1])
                 if options["verbose"] >= 9:
                     write_message(query)
                 run_sql(query)
                 query = """DELETE FROM %sR WHERE id_bibrec
                 BETWEEN '%d' AND '%d' AND type='TEMPORARY'""" % \
                 (self.tablename[:-1], group[0], group[1])
                 if options["verbose"] >= 9:
                     write_message(query)
                 run_sql(query)
             if options["verbose"] >= 9:
                 write_message('End of updating wordTable into %s' % self.tablename)
         elif mode == "emergency":
             for group in self.recIDs_in_mem:
                 query = """UPDATE %sR SET type='CURRENT' WHERE id_bibrec
                 BETWEEN '%d' AND '%d' AND type='TEMPORARY'""" % \
                 (self.tablename[:-1], group[0], group[1])
                 if options["verbose"] >= 9:
                     write_message(query)
                 run_sql(query)
                 query = """DELETE FROM %sR WHERE id_bibrec
                 BETWEEN '%d' AND '%d' AND type='FUTURE'""" % \
                 (self.tablename[:-1], group[0], group[1])
                 if options["verbose"] >= 9:
                     write_message(query)
                 run_sql(query)
             if options["verbose"] >= 9:
                 write_message('End of emergency flushing wordTable into %s' % self.tablename)
         if options["verbose"]:
             write_message('...updating reverse table %sR ended' % self.tablename[:-1])
 
         self.clean()
         self.recIDs_in_mem = []
         if options["verbose"]:
             write_message("%s %s wordtable flush ended" % (self.tablename, mode))
         task_update_progress("%s flush ended" % (self.tablename))
 
     def load_old_recIDs(self,word):
         """Load existing hitlist for the word from the database index files."""
         query = "SELECT hitlist FROM %s WHERE term=%%s" % self.tablename
         res = run_sql(query, (word,))
         if res:
             return deserialize_via_numeric_array(res[0][0])
         else:
             return None
     
     def merge_with_old_recIDs(self,word,set):
         """Merge the system numbers stored in memory (hash of recIDs with value +1 or -1
         according to whether to add/delete them) with those stored in the database index
         and received in set universe of recIDs for the given word.
 
         Return 0 in case no change was done to SET, return 1 in case SET was changed.
         """
         set_changed_p = 0
         for recID,sign in self.value[word].items():
             if sign == -1 and set[recID]==1:
                 # delete recID if existent in set and if marked as to be deleted
                 set[recID] = 0
                 set_changed_p = 1
             elif set[recID] == 0:
                 # add recID if not existent in set and if marked as to be added
                 set[recID] = 1
                 set_changed_p = 1
         return set_changed_p
 
     def put_word_into_db(self, word):
         """Flush a single word to the database and delete it from memory"""
 
         set = self.load_old_recIDs(word)
         if set: # merge the word recIDs found in memory:
             if self.merge_with_old_recIDs(word,set) == 0:
                 # nothing to update:
                 if options["verbose"] >= 9:
                     write_message("......... unchanged hitlist for ``%s''" % word)
                 pass
             else:
                 # yes there were some new words:
                 if options["verbose"] >= 9:
                     write_message("......... updating hitlist for ``%s''" % word)
                 run_sql("UPDATE %s SET hitlist=%%s WHERE term=%%s" % self.tablename,
                         (serialize_via_numeric_array(set), word))
                 
         else: # the word is new, will create new set:
             set = Numeric.zeros(CFG_MAX_RECID+1, Numeric.Int0)
             Numeric.put(set, self.value[word].keys(), 1)
             if options["verbose"] >= 9:
                 write_message("......... inserting hitlist for ``%s''" % word)
             run_sql("INSERT INTO %s (term, hitlist) VALUES (%%s, %%s)" % self.tablename,
                     (word, serialize_via_numeric_array(set)))
 
         if not set: # never store empty words
             run_sql("DELETE from %s WHERE term=%%s" % self.tablename,
                     (word,))
  
         del self.value[word]
             
     def display(self):
         "Displays the word table."
         keys = self.value.keys()
         keys.sort()
         for k in keys:
             if options["verbose"]:
                 write_message("%s: %s" % (k, self.value[k]))
 
     def count(self):
         "Returns the number of words in the table."
         return len(self.value)
 
     def info(self):
         "Prints some information on the words table."
         if options["verbose"]:
             write_message("The words table contains %d words." % self.count())
 
     def lookup_words(self, word=""):
         "Lookup word from the words table."
 
         if not word:
             done = 0
             while not done:
                 try:
                     word = raw_input("Enter word: ")
                     done = 1
                 except (EOFError, KeyboardInterrupt):
                     return
 
         if self.value.has_key(word):
             if options["verbose"]:
                 write_message("The word '%s' is found %d times." \
                 % (word, len(self.value[word])))
         else:
             if options["verbose"]:
                 write_message("The word '%s' does not exist in the word file."\
                               % word)
 
     def update_last_updated(self, starting_time=None):
         """Update last_updated column of the index table in the database.
         Puts starting time there so that if the task was interrupted for record download,
         the records will be reindexed next time."""
         if starting_time is None:
             return None
         if options["verbose"] >= 9:
             write_message("updating last_updated to %s...", starting_time)            
         return run_sql("UPDATE idxINDEX SET last_updated=%s WHERE id=%s",
                        (starting_time, self.tablename[-3:-1],))
 
     def add_recIDs(self, recIDs):
         """Fetches records which id in the recIDs range list and adds
         them to the wordTable.  The recIDs range list is of the form:
         [[i1_low,i1_high],[i2_low,i2_high], ..., [iN_low,iN_high]].
         """
         global chunksize
         flush_count = 0
         records_done = 0
         records_to_go = 0
 
         for arange in recIDs:
             records_to_go = records_to_go + arange[1] - arange[0] + 1
             
         time_started = time.time() # will measure profile time
         for arange in recIDs:
             i_low = arange[0]
             chunksize_count = 0
             while i_low <= arange[1]:
                 # calculate chunk group of recIDs and treat it:
                 i_high = min(i_low+options["flush"]-flush_count-1,arange[1])
                 i_high = min(i_low+chunksize-chunksize_count-1, i_high)
                 try:
                     self.chk_recID_range(i_low, i_high)
                 except StandardError, e:
                     write_message("Exception caught: %s" % e, sys.stderr)
                     if options["verbose"] >= 9:        
                         traceback.print_tb(sys.exc_info()[2])
                     task_update_status("ERROR")
                     task_sig_stop_commands()
                     sys.exit(1)
                 if options["verbose"]:
                     write_message("%s adding records #%d-#%d started" % \
                         (self.tablename, i_low, i_high))
                 if CFG_CHECK_MYSQL_THREADS:
                     kill_sleepy_mysql_threads()
                 task_update_progress("%s adding recs %d-%d" % (self.tablename, i_low, i_high))
                 self.del_recID_range(i_low, i_high)
                 just_processed = self.add_recID_range(i_low, i_high)
                 flush_count = flush_count + i_high - i_low + 1
                 chunksize_count = chunksize_count + i_high - i_low + 1
                 records_done = records_done + just_processed
                 if options["verbose"]:
                     write_message("%s adding records #%d-#%d ended  " % \
                         (self.tablename, i_low, i_high))
 
                 if chunksize_count >= chunksize:
                     chunksize_count = 0
                 # flush if necessary:
                 if flush_count >= options["flush"]:
                     self.put_into_db()
                     self.clean()
                     if options["verbose"]:
                         write_message("%s backing up" % (self.tablename))
                     flush_count = 0
                     self.log_progress(time_started,records_done,records_to_go)
                 # iterate:
                 i_low = i_high + 1
         if flush_count > 0:
             self.put_into_db()
             self.log_progress(time_started,records_done,records_to_go)
 
     def add_recIDs_by_date(self, dates):
         """Add records that were modified between DATES[0] and DATES[1].
            If DATES is not set, then add records that were modified since
            the last update of the index.
         """
         if not dates:
             table_id = self.tablename[-3:-1]
             query = """SELECT last_updated FROM idxINDEX WHERE id='%s'
             """ % table_id
             res = run_sql(query)
             if not res:
                 return
             if not res[0][0]:
                 dates = ("0000-00-00", None)
             else:
                 dates = (res[0][0], None)
         if dates[1] is None:
             res = run_sql("""SELECT b.id FROM bibrec AS b
                               WHERE b.modification_date >= %s ORDER BY b.id ASC""",
                           (dates[0],))
         elif dates[0] is None:
             res = run_sql("""SELECT b.id FROM bibrec AS b
                               WHERE b.modification_date <= %s ORDER BY b.id ASC""",
                           (dates[1],))
         else:
             res = run_sql("""SELECT b.id FROM bibrec AS b
                               WHERE b.modification_date >= %s AND 
                                     b.modification_date <= %s ORDER BY b.id ASC""",
                           (dates[0], dates[1]))
         alist = create_range_list(res)
         if not alist:
             if options["verbose"]:
                 write_message( "No new records added. %s is up to date" % self.tablename)
         else:
             self.add_recIDs(alist)
         
     def add_recID_range(self, recID1, recID2):
         empty_list_string = serialize_via_marshal([])
         wlist = {}
         self.recIDs_in_mem.append([recID1,recID2])
         # secondly fetch all needed tags:
         for tag in self.fields_to_index:
 	    if tag in tagToWordsFunctions.keys():
                 get_words_function = tagToWordsFunctions[tag]
 	    else:
                 get_words_function = get_words_from_phrase
             bibXXx = "bib" + tag[0] + tag[1] + "x"
             bibrec_bibXXx = "bibrec_" + bibXXx
             query = """SELECT bb.id_bibrec,b.value FROM %s AS b, %s AS bb
                     WHERE bb.id_bibrec BETWEEN %d AND %d
                     AND bb.id_bibxxx=b.id AND tag LIKE '%s'""" % (bibXXx, bibrec_bibXXx, recID1, recID2, tag)    
             res = run_sql(query)
             for row in res:
                 recID,phrase = row
                 if not wlist.has_key(recID): wlist[recID] = []
                 if tag == "8564_u":
                     # Special treatment for fulltext indexing. 8564
                     # $$u contains URL, and $$y link name.  If $$y is
                     # actually a file name, that is if it ends with
                     # something like .pdf or .ppt, then $$u is treated
                     # as direct URL to the PDF file, and is indexed as
                     # such.  This is useful to index Indico files.
                     # FIXME: this is a quick fix only.  We should
                     # rather download all 856 $$u files and analyze
                     # content in order to decide how to index them
                     # (directly for Indico, indirectly for Setlink).                    
                     filename = get_associated_subfield_value(recID,'8564_u', phrase, 'y')
                     filename_extension = string.lower(string.split(filename, ".")[-1])
                     if filename_extension in CONV_PROGRAMS.keys():
                         new_words = get_words_function(phrase, force_file_extension=filename_extension) # ,self.separators
                     else:
                         new_words = get_words_function(phrase) # ,self.separators
                 else:
                     new_words = get_words_function(phrase) # ,self.separators
                 wlist[recID] = list_union(new_words,wlist[recID])
 
         # were there some words for these recIDs found?
         if len(wlist) == 0: return 0
         recIDs = wlist.keys()
         for recID in recIDs:
             # was this record marked as deleted?
             if "DELETED" in self.get_field(recID, "980__c"):
                 wlist[recID] = []
                 if options["verbose"] >= 9:
                     write_message("... record %d was declared deleted, removing its word list" % recID)
             if options["verbose"] >= 9:
                 write_message("... record %d, termlist: %s" % (recID, wlist[recID]))
 
         # Using cStringIO for speed.
         query_factory = cStringIO.StringIO()
         qwrite = query_factory.write
         
         qwrite( "INSERT INTO %sR (id_bibrec,termlist,type) VALUES" % self.tablename[:-1])
         qwrite( "('" )
         qwrite( str(recIDs[0]) )
         qwrite( "','" )
         qwrite( serialize_via_marshal(wlist[recIDs[0]]) )
         qwrite( "','FUTURE')" )
 
         for recID in recIDs[1:]:
             qwrite(",('")
             qwrite(str(recID))
             qwrite("','")
             qwrite(serialize_via_marshal(wlist[recID]))
             qwrite("','FUTURE')")
 
         query = query_factory.getvalue()
         query_factory.close()
         run_sql(query)
         
         query_factory = cStringIO.StringIO()
         qwrite = query_factory.write
         
         qwrite("INSERT INTO %sR (id_bibrec,termlist,type) VALUES" % self.tablename[:-1])
         qwrite("('")
         qwrite(str(recIDs[0]))
         qwrite("','")
         qwrite(serialize_via_marshal(wlist[recIDs[0]]))
         qwrite("','CURRENT')")
 
         for recID in recIDs[1:]:
             qwrite( ",('" )
             qwrite( str(recID) )
             qwrite( "','" )
             qwrite( empty_list_string )
             qwrite( "','CURRENT')" )
 
         query = query_factory.getvalue()
         query_factory.close()
         
         try:
             run_sql(query)
         except DatabaseError:
             # ok, we tried to add an existent record. No problem
             pass
 
         put = self.put
         for recID in recIDs:
             for w in wlist[recID]:
                 put(recID, w, 1)
                 
         return len(recIDs)
                 
     def log_progress(self, start, done, todo):
         """Calculate progress and store it.
         start: start time,
         done: records processed,
         todo: total number of records"""
         time_elapsed = time.time() - start
         # consistency check
         if time_elapsed == 0 or done > todo:
             return
 
         time_recs_per_min = done/(time_elapsed/60.0)
         if options["verbose"]:
             write_message("%d records took %.1f seconds to complete.(%1.f recs/min)"\
                 % (done, time_elapsed, time_recs_per_min))
         
         if time_recs_per_min:
             if options["verbose"]:
                 write_message("Estimated runtime: %.1f minutes" % \
                     ((todo-done)/time_recs_per_min))
 
     def put(self, recID, word, sign):
         "Adds/deletes a word to the word list."        
         try:
             word = string.lower(word[:50])
             if self.value.has_key(word):
                 # the word 'word' exist already: update sign
                 self.value[word][recID] = sign
             else:
                 self.value[word] = {recID: sign}
         except:
             write_message("Error: Cannot put word %s with sign %d for recID %s." % (word, sign, recID))
 
     def del_recIDs(self, recIDs):
         """Fetches records which id in the recIDs range list and adds
         them to the wordTable.  The recIDs range list is of the form:
         [[i1_low,i1_high],[i2_low,i2_high], ..., [iN_low,iN_high]].
         """
         count = 0
         for arange in recIDs:
             self.del_recID_range(arange[0],arange[1])
             count = count + arange[1] - arange[0]
         self.put_into_db()
         
     def del_recID_range(self, low, high):
         """Deletes records with 'recID' system number between low
            and high from memory words index table."""
         if options["verbose"] > 2:
             write_message("%s fetching existing words for records #%d-#%d started" % \
                 (self.tablename, low, high))
         self.recIDs_in_mem.append([low,high])
         query = """SELECT id_bibrec,termlist FROM %sR as bb WHERE bb.id_bibrec
         BETWEEN '%d' AND '%d'""" % (self.tablename[:-1], low, high)
         recID_rows = run_sql(query)        
         for recID_row in recID_rows:
             recID = recID_row[0]
             wlist = deserialize_via_marshal(recID_row[1])
             for word in wlist:
                 self.put(recID, word, -1)
         if options["verbose"] > 2:
             write_message("%s fetching existing words for records #%d-#%d ended" % \
                 (self.tablename, low, high))
 
     def report_on_table_consistency(self):
         """Check reverse words index tables (e.g. idxWORD01R) for
         interesting states such as 'TEMPORARY' state.
         Prints small report (no of words, no of bad words).
         """
         # find number of words:
         query = """SELECT COUNT(*) FROM %s""" % (self.tablename)
         res = run_sql(query, None, 1)
         if res:
             nb_words = res[0][0]
         else:
             nb_words = 0
 
         # find number of records:
         query = """SELECT COUNT(DISTINCT(id_bibrec)) FROM %sR""" % (self.tablename[:-1])
         res = run_sql(query, None, 1)
         if res:
             nb_records = res[0][0]
         else:
             nb_records = 0
 
         # report stats:
         if options["verbose"]:
             write_message("%s contains %d words from %d records" % (self.tablename, nb_words, nb_records))
 
         # find possible bad states in reverse tables:
         query = """SELECT COUNT(DISTINCT(id_bibrec)) FROM %sR WHERE type <> 'CURRENT'""" % (self.tablename[:-1])
         res = run_sql(query)
         if res:
             nb_bad_records = res[0][0]
         else:
             nb_bad_records = 999999999
         if nb_bad_records:
             write_message("EMERGENCY: %s needs to repair %d of %d records" % \
                 (self.tablename, nb_bad_records, nb_records))
         else:
             if options["verbose"]:
                 write_message("%s is in consistent state" % (self.tablename))
         
         return nb_bad_records
 
     def repair(self):
         """Repair the whole table"""
         # find possible bad states in reverse tables:
         query = """SELECT COUNT(DISTINCT(id_bibrec)) FROM %sR WHERE type <> 'CURRENT'""" % (self.tablename[:-1])
         res = run_sql(query, None, 1)
         if res:
             nb_bad_records = res[0][0]
         else: 
             nb_bad_records = 0
             
         if nb_bad_records == 0:
             return
 
         query = """SELECT id_bibrec FROM %sR WHERE type <> 'CURRENT' ORDER BY id_bibrec""" \
                 % (self.tablename[:-1])
         res = run_sql(query)
         recIDs = create_range_list(res)
 
         flush_count = 0
         records_done = 0
         records_to_go = 0
 
         for arange in recIDs:
             records_to_go = records_to_go + arange[1] - arange[0] + 1
 
         time_started = time.time() # will measure profile time
         for arange in recIDs:
             i_low = arange[0]
             chunksize_count = 0
             while i_low <= arange[1]:
                 # calculate chunk group of recIDs and treat it:
                 i_high = min(i_low+options["flush"]-flush_count-1,arange[1])
                 i_high = min(i_low+chunksize-chunksize_count-1, i_high)
 
                 try:
                     self.fix_recID_range(i_low, i_high)
                 except StandardError, e:
                     write_message("Exception caught: %s" % e, sys.stderr)
                     if options["verbose"] >= 9:        
                         traceback.print_tb(sys.exc_info()[2])
                     task_update_status("ERROR")
                     task_sig_stop_commands()
                     sys.exit(1)
 
                 flush_count = flush_count + i_high - i_low + 1
                 chunksize_count = chunksize_count + i_high - i_low + 1
                 records_done = records_done + i_high - i_low + 1
                 if chunksize_count >= chunksize:
                     chunksize_count = 0
                 # flush if necessary:
                 if flush_count >= options["flush"]:
                     self.put_into_db("emergency")
                     self.clean()
                     flush_count = 0
                     self.log_progress(time_started,records_done,records_to_go)
                 # iterate:
                 i_low = i_high + 1
         if flush_count > 0:
             self.put_into_db("emergency")
             self.log_progress(time_started,records_done,records_to_go)
         write_message("%s inconsistencies repaired." % self.tablename)
 
     def chk_recID_range(self, low, high):
         """Check if the reverse index table is in proper state"""
         ## check db
         query = """SELECT COUNT(*) FROM %sR WHERE type <> 'CURRENT'
         AND id_bibrec BETWEEN '%d' AND '%d'""" % (self.tablename[:-1], low, high)
         res = run_sql(query, None, 1)
         if res[0][0]==0:
             if options["verbose"]:
                 write_message("%s for %d-%d is in consistent state"%(self.tablename,low,high))
             return # okay, words table is consistent
 
         ## inconsistency detected!
         write_message("EMERGENCY: %s inconsistencies detected..." % self.tablename)
         write_message("""EMERGENCY: Errors found. You should check consistency of the %s - %sR tables.\nRunning 'bibindex --repair' is recommended.""" \
             % (self.tablename, self.tablename[:-1]))
         raise StandardError
         
     def fix_recID_range(self, low, high):
         """Try to fix reverse index database consistency (e.g. table idxWORD01R) in the low,high doc-id range.
 
         Possible states for a recID follow:
         CUR TMP FUT: very bad things have happened: warn!
         CUR TMP    : very bad things have happened: warn!
         CUR     FUT: delete FUT (crash before flushing)
         CUR        : database is ok
             TMP FUT: add TMP to memory and del FUT from memory
                      flush (revert to old state)
             TMP    : very bad things have happened: warn!
                 FUT: very bad things have happended: warn!
         """
 
         state = {}
         query = "SELECT id_bibrec,type FROM %sR WHERE id_bibrec BETWEEN '%d' AND '%d'"\
                 % (self.tablename[:-1], low, high)
         res = run_sql(query)
         for row in res:
             if not state.has_key(row[0]):
                 state[row[0]]=[]
             state[row[0]].append(row[1])
 
         ok = 1 # will hold info on whether we will be able to repair
         for recID in state.keys():
             if not 'TEMPORARY' in state[recID]:
                 if 'FUTURE' in state[recID]:
                     if 'CURRENT' not in state[recID]:
                         write_message("EMERGENCY: Record %d is in inconsistent state. Can't repair it" % recID)
                         ok = 0
                     else:
                         write_message("EMERGENCY: Inconsistency in record %d detected" % recID)
                         query = """DELETE FROM %sR
                         WHERE id_bibrec='%d'""" % (self.tablename[:-1], recID)
                         run_sql(query)
                         write_message("EMERGENCY: Inconsistency in record %d repaired." % recID)
 
             else:
                 if 'FUTURE' in state[recID] and not 'CURRENT' in state[recID]:
                     self.recIDs_in_mem.append([recID,recID])
 
                     # Get the words file
                     query = """SELECT type,termlist FROM %sR
                     WHERE id_bibrec='%d'""" % (self.tablename[:-1], recID)
                     if options["verbose"] >= 9:
                         write_message(query)
                     res = run_sql(query)
                     for row in res:
                         wlist = deserialize_via_marshal(row[1])
                         if options["verbose"] >= 9:
                             write_message("Words are %s " % wlist)
                         if row[0] == 'TEMPORARY':
                             sign = 1
                         else:
                             sign = -1
                         for word in wlist:
                             self.put(recID, word, sign)
 
                 else:
                     write_message("EMERGENCY: %s for %d is in inconsistent state. Couldn't repair it." % (self.tablename, recID))
                     ok = 0
 
         if not ok:
             write_message("""EMERGENCY: Unrepairable errors found. You should check consistency
                 of the %s - %sR tables. Deleting affected records is
                 recommended.""" % (self.tablename, self.tablename[:-1]))
             raise StandardError
                                     
 def task_run(row):
     """Run the indexing task.  The row argument is the BibSched task
     queue row, containing if, arguments, etc.
     Return 1 in case of success and 0 in case of failure.
     """
     
     global options, wordTables, stemmer, stopwords
 
     # read from SQL row:
     task_id = row[0]
     task_proc = row[1]
     options = marshal.loads(row[6])
     task_status = row[7]
 
     # sanity check:
     if task_proc != "bibindex":
         write_message("The task #%d does not seem to be a BibIndex task." % task_id, sys.stderr)
         return 0
     if task_status != "WAITING":
         write_message("The task #%d is %s.  I expected WAITING." % (task_id, task_status), sys.stderr)
         return 0
 
     # we can run the task now:
     if options["verbose"]:
         write_message("Task #%d started." % task_id)
     task_starting_time = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())
     task_update_status("RUNNING")
 
     # install signal handlers
     signal.signal(signal.SIGUSR1, task_sig_sleep)
     signal.signal(signal.SIGTERM, task_sig_stop)
     signal.signal(signal.SIGABRT, task_sig_suicide)
     signal.signal(signal.SIGCONT, task_sig_wakeup)
     signal.signal(signal.SIGINT, task_sig_unknown)
 
     ## go ahead and treat each table :
     for table in options["windex"]:
         wordTable = WordTable(table.keys()[0], table.values()[0])
         wordTable.report_on_table_consistency()
         try:
             if options["cmd"] == "del":
                 if options["id"]:
                     wordTable.del_recIDs(options["id"])
                 elif options["collection"]:
                     l_of_colls = string.split(options["collection"], ",")
                     recIDs = perform_request_search(c=l_of_colls)
                     recIDs_range = []
                     for recID in recIDs:
                         recIDs_range.append([recID,recID])
                     wordTable.del_recIDs(recIDs_range)
                 else:
                     write_message("Missing IDs of records to delete from index %s." % wordTable.tablename,
                                   sys.stderr)
                     raise StandardError
             elif options["cmd"] == "add":
                 if options["id"]:
                     wordTable.add_recIDs(options["id"])
                 elif options["collection"]:
                     l_of_colls = string.split(options["collection"], ",")
                     recIDs = perform_request_search(c=l_of_colls)
                     recIDs_range = []
                     for recID in recIDs:
                         recIDs_range.append([recID,recID])
                     wordTable.add_recIDs(recIDs_range)
                 else:
                     wordTable.add_recIDs_by_date(options["modified"])
                     # only update last_updated if run via automatic mode:
                     wordTable.update_last_updated(task_starting_time)
                 
             elif options["cmd"] == "repair":
                 wordTable.repair()
             else:
                 write_message("Invalid command found processing %s" % \
                      wordTable.tablename, sys.stderr)
                 raise StandardError
 
         except StandardError, e:
             write_message("Exception caught: %s" % e, sys.stderr)
             if options["verbose"] >= 9:        
                 traceback.print_tb(sys.exc_info()[2])
             task_update_status("ERROR")
             task_sig_stop_commands()
             sys.exit(1)
             
         wordTable.report_on_table_consistency()
 
     # We are done. State it in the database, close and quit
     task_update_status("DONE")
     if options["verbose"]:
         write_message("Task #%d finished." % task_id)
     return 1
         
 def command_line():
     global options
     long_flags =["add","del","id=","modified=","collection=", "windex=",
                  "check","repair","maxmem=", "flush=","user=","sleeptime=",
                  "time=","help", "version", "verbose="]
     short_flags ="adi:m:c:w:krM:f:u:s:t:hVv:"
     format_string = "%Y-%m-%d %H:%M:%S"
     tables = None
     sleeptime = ""
 
     try:
         opts, args = getopt.getopt(sys.argv[1:], short_flags, long_flags)
     except getopt.GetoptError, err:
         write_message(err, sys.stderr)
         usage(1)
     if args:
         usage(1)
         
     options={"cmd":"add", "id":[], "modified":[], "collection":[], "maxmem":0, 
                "flush":10000, "sleeptime":0, "verbose":1 }
     sched_time = time.strftime(format_string)
     
     user = ""
     # Check for key options
 
     try:
         for opt in opts:
             if opt == ("-h","")  or opt == ("--help",""):
                 usage(1)
             elif opt == ("-V","")  or opt == ("--version",""):
                 print BIBINDEX_ENGINE_VERSION
                 sys.exit(1)
             elif opt[0] in ["--verbose", "-v"]:
                 options["verbose"] = int(opt[1])
             elif opt == ("-a","") or opt == ("--add",""):
                 options["cmd"] = "add"
                 if ("-x","") in opts or ("--del","") in opts:
                     usage(1)
             elif opt == ("-k","") or opt == ("--check",""):
                 options["cmd"] = "check"
             elif opt == ("-r","") or opt == ("--repair",""):
                 options["cmd"] = "repair"
             elif opt == ("-d","") or opt == ("--del",""):            
                 options["cmd"]="del"
             elif opt[0] in [ "-i", "--id" ]:
                 options["id"] = options["id"] + split_ranges(opt[1])
             elif opt[0] in [ "-m", "--modified" ]:
                 options["modified"] = get_date_range(opt[1]) 
             elif opt[0] in [ "-c", "--collection" ]:
                 options["collection"] = opt[1]
             elif opt[0] in [ "-w", "--windex" ]:
                 tables = opt[1]
             elif opt[0] in [ "-M", "--maxmem"]:
                 options["maxmem"]=int(opt[1])
                 if options["maxmem"] < base_process_size + 1000:
                     raise StandardError, "Memory usage should be higher than %d kB" % (base_process_size + 1000)
             elif opt[0] in [ "-f", "--flush"]:
                 options["flush"]=int(opt[1])
             elif opt[0] in [ "-u", "--user"]:
                 user = opt[1]
             elif opt[0] in [ "-s", "--sleeptime" ]:
                 get_datetime(opt[1])    # see if it is a valid shift
                 sleeptime= opt[1]
             elif opt[0] in [ "-t", "--time" ]:
                 sched_time= get_datetime(opt[1])
             else: usage(1)
     except StandardError, e:
         write_message(e, sys.stderr)
         sys.exit(1)
 
     options["windex"]=get_word_tables(tables)
 
     if options["cmd"] == "check":
         for table in options["windex"]:
             wordTable = WordTable(table.keys()[0], table.values()[0])
             wordTable.report_on_table_consistency()
         return 
 
     user = authenticate(user)
 
     if options["verbose"] >= 9:
         print ""
         write_message("storing task options %s\n" % options)
 
     ## sanity check: remove eventual "task" option:
     if options.has_key("task"):
         del options["task"]
 
     new_task_id = run_sql("""INSERT INTO schTASK (proc,user,runtime,sleeptime,arguments,status)
                              VALUES ('bibindex',%s,%s,%s,%s,'WAITING')""",
                       (user, sched_time, sleeptime, marshal.dumps(options)))
 
     ## update task number: 
     options["task"] = new_task_id
     run_sql("""UPDATE schTASK SET arguments=%s WHERE id=%s""", (marshal.dumps(options), new_task_id))
 
     print "Task #%d was successfully scheduled for execution." % new_task_id
     return
 
 def task_sig_sleep(sig, frame):
     """Signal handler for the 'sleep' signal sent by BibSched."""
     if options["verbose"] >= 9:
         write_message("task_sig_sleep(), got signal %s frame %s" % (sig, frame))
     write_message("sleeping...")
     task_update_status("SLEEPING")
     signal.pause() # wait for wake-up signal
 
 def task_sig_wakeup(sig, frame):
     """Signal handler for the 'wakeup' signal sent by BibSched."""
     if options["verbose"] >= 9:
         write_message("task_sig_wakeup(), got signal %s frame %s" % (sig, frame))
     write_message("continuing...")
     task_update_status("CONTINUING")
 
 def task_sig_stop(sig, frame):
     """Signal handler for the 'stop' signal sent by BibSched."""
     if options["verbose"] >= 9:
         write_message("task_sig_stop(), got signal %s frame %s" % (sig, frame))
     write_message("stopping...")
     task_update_status("STOPPING")
     errcode = 0
     try:
         task_sig_stop_commands()
         write_message("stopped")
         task_update_status("STOPPED")
     except StandardError, err:
         write_message("Error during stopping! %e" % err)
         task_update_status("STOPPINGFAILED")
         errcode = 1
     sys.exit(errcode)
 
 def task_sig_stop_commands():
     """Do all the commands necessary to stop the task before quitting.
     Useful for task_sig_stop() handler.    
     """
     write_message("stopping commands started")    
     for table in wordTables:
         table.put_into_db()    
     write_message("stopping commands ended")    
     
 def task_sig_suicide(sig, frame):
     """Signal handler for the 'suicide' signal sent by BibSched."""
     if options["verbose"] >= 9:
         write_message("task_sig_suicide(), got signal %s frame %s" % (sig, frame))
     write_message("suiciding myself now...")
     task_update_status("SUICIDING")
     write_message("suicided")
     task_update_status("SUICIDED")
     sys.exit(0)
 
 def task_sig_unknown(sig, frame):
     """Signal handler for the other unknown signals sent by shell or user."""
     # do nothing for unknown signals:
     write_message("unknown signal %d (frame %s) ignored" % (sig, frame)) 
 
 def task_update_progress(msg):
     """Updates progress information in the BibSched task table."""
     global options
     if options["verbose"] >= 9:
         write_message("Updating task progress to %s." % msg)
     return run_sql("UPDATE schTASK SET progress=%s where id=%s", (msg, options["task"]))
 
 def task_update_status(val):
     """Updates state information in the BibSched task table."""
     global options
     if options["verbose"] >= 9:
         write_message("Updating task status to %s." % val)
     return run_sql("UPDATE schTASK SET status=%s where id=%s", (val, options["task"]))    
 
 def test_fulltext_indexing():
     """Tests fulltext indexing programs on PDF, PS, DOC, PPT,
     XLS.  Prints list of words and word table on the screen.  Does not
     integrate anything into the database.  Useful when debugging
     problems with fulltext indexing: call this function instead of main().
     """
     global options
     options["verbose"] = 9
     print get_words_from_fulltext("http://doc.cern.ch/cgi-bin/setlink?base=atlnot&categ=Communication&id=com-indet-2002-012") # protected URL
     print get_words_from_fulltext("http://doc.cern.ch/cgi-bin/setlink?base=agenda&categ=a00388&id=a00388s2t7") # XLS
     print get_words_from_fulltext("http://doc.cern.ch/cgi-bin/setlink?base=agenda&categ=a02883&id=a02883s1t6/transparencies") # PPT
     print get_words_from_fulltext("http://doc.cern.ch/cgi-bin/setlink?base=agenda&categ=a99149&id=a99149s1t10/transparencies") # DOC
     print get_words_from_fulltext("http://doc.cern.ch/cgi-bin/setlink?base=preprint&categ=cern&id=lhc-project-report-601") # PDF
     sys.exit(0)
 
 def test_word_separators(phrase="hep-th/0101001"):
     """Tests word separating policy on various input."""
     print "%s:" % phrase
     for word in get_words_from_phrase(phrase):
         print "\t-> %s" % word
 
 def main():
     """Reads arguments and either runs the task, or starts user-interface (command line)."""
     if len(sys.argv) == 2:
         try:
             task_id = int(sys.argv[1])
         except StandardError:
             command_line()
             sys.exit()
 
         res = run_sql("SELECT * FROM schTASK WHERE id='%d'" % (task_id), None, 1)
         if not res:
             write_message("Selected task not found.", sys.stderr)
             sys.exit(1)
         try:
             if not task_run(res[0]):
                 write_message("Error occurred.  Exiting.", sys.stderr)
         except StandardError, e:
             write_message("Unexpected error occurred: %s." % e, sys.stderr)
             if options["verbose"] >= 9:        
                 traceback.print_tb(sys.exc_info()[2])
             write_message("Exiting.")
             task_update_status("ERROR")                                               
     else:
         command_line()
diff --git a/modules/bibrank/bin/bibrank.in b/modules/bibrank/bin/bibrank.in
index 844b4dae7..60b37bd44 100644
--- a/modules/bibrank/bin/bibrank.in
+++ b/modules/bibrank/bin/bibrank.in
@@ -1,479 +1,480 @@
 #!@PYTHON@
 ## -*- mode: python; coding: utf-8; -*-
 ##
 ## $Id$
 ##
 ## This file is part of CDS Invenio.
 ## Copyright (C) 2002, 2003, 2004, 2005, 2006 CERN.
 ##
 ## CDS Invenio is free software; you can redistribute it and/or
 ## modify it under the terms of the GNU General Public License as
 ## published by the Free Software Foundation; either version 2 of the
 ## License, or (at your option) any later version.
 ##
 ## CDS Invenio is distributed in the hope that it will be useful, but
 ## WITHOUT ANY WARRANTY; without even the implied warranty of
 ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 ## General Public License for more details.
 ##
 ## You should have received a copy of the GNU General Public License
 ## along with CDS Invenio; if not, write to the Free Software Foundation, Inc.,
 ## 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA.
 
 """
 BibRank ranking daemon.
 
 Usage: %s [options]
      Ranking examples:
        %s -wjif -a --id=0-30000,30001-860000 --verbose=9
        %s -wjif -d --modified='2002-10-27 13:57:26'
        %s -wwrd --rebalance --collection=Articles
        %s -wwrd -a -i 234-250,293,300-500 -u admin
  
  Ranking options:
  -w, --run=r1[,r2]         runs each rank method in the order given
 
  -c, --collection=c1[,c2]  select according to collection
  -i, --id=low[-high]       select according to doc recID
  -m, --modified=from[,to]  select according to modification date
  -l, --lastupdate          select according to last update
 
  -a, --add                 add or update words for selected records
  -d, --del                 delete words for selected records
  -S, --stat                show statistics for a method
  
  -R, --recalculate         recalculate weigth data, used by word frequency method
                            should be used if ca 1% of the document has been changed
                            since last time -R was used
  Repairing options:
  -k,  --check              check consistency for all records in the table(s)
                            check if update of ranking data is necessary
  -r, --repair              try to repair all records in the table(s)
  Scheduling options:
  -u, --user=USER           user name to store task, password needed
  -s, --sleeptime=SLEEP     time after which to repeat tasks (no)
                             e.g.: 1s, 30m, 24h, 7d
  -t, --time=TIME           moment for the task to be active (now)
                             e.g.: +15s, 5m, 3h , 2002-10-27 13:57:26
  General options:
  -h, --help                print this help and exit
  -V, --version             print version and exit
  -v, --verbose=LEVEL       verbose level (from 0 to 9, default 1)
 """
 
 __revision__ = "$Id$"
 
 try:
     import marshal
     from zlib import compress,decompress
     from string import split,translate,lower,upper
     import getopt
     import getpass
     import string
     import os
     import sre
     import sys
     import time
     import urllib
     import signal
     import tempfile
     import traceback
     import cStringIO
     import re
     import copy
     import types
     import ConfigParser
 except ImportError, e:
     import sys
 
 try:
     from invenio.dbquery import run_sql, escape_string
     from invenio.bibrank_tag_based_indexer import *
     from invenio.bibrank_word_indexer import *
     from invenio.access_control_engine import acc_authorize_action 
     from invenio.search_engine import perform_request_search
 except ImportError, e:
     import sys
 
 nb_char_in_line = 50  # for verbose pretty printing
 chunksize = 1000 # default size of chunks that the records will be treated by
 base_process_size = 4500 # process base size
 bibrank_options = {} # will hold task options
 
 def serialize_via_numeric_array_dumps(arr):
     return Numeric.dumps(arr)
 def serialize_via_numeric_array_compr(str):
     return compress(str)
 def serialize_via_numeric_array_escape(str):
     return escape_string(str)
 def serialize_via_numeric_array(arr):
     """Serialize Numeric array into a compressed string."""
     return serialize_via_numeric_array_escape(serialize_via_numeric_array_compr(serialize_via_numeric_array_dumps(arr)))
 def deserialize_via_numeric_array(string):
     """Decompress and deserialize string into a Numeric array."""
     return Numeric.loads(decompress(string))
 def serialize_via_marshal(obj):
     """Serialize Python object via marshal into a compressed string."""
     return escape_string(compress(marshal.dumps(obj)))
 def deserialize_via_marshal(string):
     """Decompress and deserialize string into a Python object via marshal."""
     return marshal.loads(decompress(string))
 
 def authenticate(user, header="BibRank Task Submission", action="runbibrank"):
     print header
     print "=" * len(header)
     if user == "":
         print>> sys.stdout, "\rUsername: ",
         user = string.strip(string.lower(sys.stdin.readline()))
     else:
         print>> sys.stdout, "\rUsername:", user
-    res = run_sql("select id,password from user where email=%s or nickname=%s", (user, user,), 1)
+    res = run_sql("select id,password from user where email=%s", (user,), 1) + \
+          run_sql("select id,password from user where nickname=%s", (user,), 1)
     if not res:
         print "Sorry, %s does not exist." % user
         sys.exit(1)
     else:
         (uid_db, password_db) = res[0]
         if password_db:
             password_entered = getpass.getpass()
             if password_db == password_entered:
                 pass
             else:
                 print "Sorry, wrong credentials for %s." % user
                 sys.exit(1)
         (auth_code, auth_message) = acc_authorize_action(uid_db, action)
         if auth_code != 0:
             print auth_message
             sys.exit(1)
     return user
 
 def usage(code, msg=''):
     "Prints usage for this module."
     if msg:
         sys.stderr.write("Error: %s.\n" % msg)
     print >> sys.stderr, \
     """ Usage: %s [options]
      Ranking examples:
        %s -wjif -a --id=0-30000,30001-860000 --verbose=9
        %s -wjif -d --modified='2002-10-27 13:57:26'
        %s -wjif --rebalance --collection=Articles
        %s -wsbr -a -i 234-250,293,300-500 -u admin
  
  Ranking options:
  -w, --run=r1[,r2]         runs each rank method in the order given
 
  -c, --collection=c1[,c2]  select according to collection
  -i, --id=low[-high]       select according to doc recID
  -m, --modified=from[,to]  select according to modification date
  -l, --lastupdate          select according to last update
 
  -a, --add                 add or update words for selected records
  -d, --del                 delete words for selected records
  -S, --stat                show statistics for a method
  
  -R, --recalculate         recalculate weigth data, used by word frequency method
                            should be used if ca 1%% of the document has been changed
                            since last time -R was used
  Repairing options:
  -k,  --check              check consistency for all records in the table(s)
                            check if update of ranking data is necessary
  -r, --repair              try to repair all records in the table(s)
  Scheduling options:
  -u, --user=USER           user name to store task, password needed
  -s, --sleeptime=SLEEP     time after which to repeat tasks (no)
                             e.g.: 1s, 30m, 24h, 7d
  -t, --time=TIME           moment for the task to be active (now)
                             e.g.: +15s, 5m, 3h , 2002-10-27 13:57:26
  General options:
  -h, --help                print this help and exit
  -V, --version             print version and exit
  -v, --verbose=LEVEL       verbose level (from 0 to 9, default 1)
     """ % ((sys.argv[0],) * 5)
     sys.exit(code)
 
 def get_datetime(var, format_string="%Y-%m-%d %H:%M:%S"):
     """Returns a date string according to the format string.
        It can handle normal date strings and shifts with respect
        to now."""
     date = time.time()
     shift_re = sre.compile("([-\+]{0,1})([\d]+)([dhms])")
     factors = {"d":24*3600, "h":3600, "m":60, "s":1}
     m = shift_re.match(var)
     if m:
         sign = m.groups()[0] == "-" and -1 or 1
         factor = factors[m.groups()[2]]
         value = float(m.groups()[1])
         date = time.localtime(date + sign * factor * value)
         date = time.strftime(format_string, date)
     else:
         date = time.strptime(var, format_string)
         date = time.strftime(format_string, date)
     return date
 
 def task_sig_sleep(sig, frame):
     """Signal handler for the 'sleep' signal sent by BibSched."""
     if bibrank_options["verbose"] >= 9:
         write_message("task_sig_sleep(), got signal %s frame %s" % (sig, frame))
     write_message("sleeping...")
     task_update_status("SLEEPING")
     signal.pause() # wait for wake-up signal
 
 def task_sig_wakeup(sig, frame):
     """Signal handler for the 'wakeup' signal sent by BibSched."""
     if bibrank_options["verbose"] >= 9:
         write_message("task_sig_wakeup(), got signal %s frame %s" % (sig, frame))
     write_message("continuing...")
     task_update_status("CONTINUING")
 
 def task_sig_stop_commands():
     """Do all the commands necessary to stop the task before quitting.
     Useful for task_sig_stop() handler.
     """
     write_message("stopping commands started")
     write_message("stopping commands ended")
 
 def task_sig_suicide(sig, frame):
     """Signal handler for the 'suicide' signal sent by BibSched."""
     global bibrank_options
     if bibrank_options["verbose"] >= 9:
         write_message("task_sig_suicide(), got signal %s frame %s" % (sig, frame))
     write_message("suiciding myself now...")
     task_update_status("SUICIDING")
     write_message("suicided")
     task_update_status("SUICIDED")
     sys.exit(0)
 
 def task_sig_unknown(sig, frame):
     """Signal handler for the other unknown signals sent by shell or user."""
     # do nothing for unknown signals:
     write_message("unknown signal %d (frame %s) ignored" % (sig, frame)) 
 
 def task_update_progress(msg):
     """Updates progress information in the BibSched task table."""
     global bibrank_options
     query = "UPDATE schTASK SET progress='%s' where id=%d" % (escape_string(msg), bibrank_options["task"])
     if bibrank_options["verbose"]>= 9:
         write_message(query)
     run_sql(query)
     return
 
 def task_update_status(val):
     """Updates state information in the BibSched task table."""
     global bibrank_options
     query = "UPDATE schTASK SET status='%s' where id=%d" % (escape_string(val), bibrank_options["task"])
     if bibrank_options["verbose"]>= 9:
         write_message(query)
     run_sql(query)
     return
 
 def split_ranges(parse_string):
     recIDs = []
     ranges = string.split(parse_string, ",")
     for range in ranges:
         tmp_recIDs = string.split(range, "-")
         
         if len(tmp_recIDs)==1:
             recIDs.append([int(tmp_recIDs[0]), int(tmp_recIDs[0])])
         else:
             if int(tmp_recIDs[0]) > int(tmp_recIDs[1]): # sanity check
                 tmp = tmp_recIDs[0]
                 tmp_recIDs[0] = tmp_recIDs[1]
                 tmp_recIDs[1] = tmp
             recIDs.append([int(tmp_recIDs[0]), int(tmp_recIDs[1])])
     return recIDs
 
 def get_date_range(var):
     "Returns the two dates contained as a low,high tuple"
     limits = string.split(var, ",")
     if len(limits)==1:
         low = get_datetime(limits[0])
         return low,None
     if len(limits)==2:
         low = get_datetime(limits[0])
         high = get_datetime(limits[1])
         return low,high
 
 def command_line():
     """Storing the task together with the parameters given."""
     global bibrank_options
     long_flags = ["lastupdate","add","del","repair","maxmem", "flush","stat", "rebalance", "id=", "collection=", "check", "modified=", "update", "run=", "user=", "sleeptime=", "time=", "help", "version", "verbose="]
     short_flags = "ladSi:m:c:kUrRM:f:w:u:s:t:hVv:"
     format_string = "%Y-%m-%d %H:%M:%S"
     sleeptime = ""
     try:
         opts, args = getopt.getopt(sys.argv[1:], short_flags, long_flags)
     except getopt.GetoptError, err:
         write_message(err, sys.stderr)
         usage(1)
     if args:
         usage(1)
     bibrank_options = {"quick":"yes","cmd":"add","flush":100000,"validset":"", "collection":[], "id":[], "check": "", "stat":"", "modified":"", "last_updated":"last_updated","run":"", "verbose":1}
 
     res = run_sql("SELECT name from rnkMETHOD")
     bibrank_options["run"] = []
     for (name,) in res:
         bibrank_options["run"].append(name)
 
     sched_time = time.strftime(format_string)
     user = ""
     try:
         for opt in opts:
             if opt == ("-h","") or opt == ("--help",""):
                 usage(1)
             elif opt == ("-V","") or opt == ("--version",""):
                 print __revision__
                 sys.exit(1)
             elif opt[0] in ["--verbose", "-v"]:
                 bibrank_options["verbose"] = int(opt[1])
             elif opt == ("-a","") or opt == ("--add",""):
                 bibrank_options["cmd"] = "add"
                 if ("-x","") in opts or ("--del","") in opts:
                     usage(1)
             elif opt[0] in ["--run", "-w"]:
                 bibrank_options["run"] = []
                 run = split(opt[1],",")
                 for key in range(0,len(run)):
                     bibrank_options["run"].append(run[key])
             elif opt == ("-r","") or opt == ("--repair",""):
                 bibrank_options["cmd"] = "repair"
             elif opt == ("-d","") or opt == ("--del",""):            
                 bibrank_options["cmd"]="del"
             elif opt[0] in [ "-u", "--user"]:
                 user = opt[1]
             elif opt[0] in [ "-k", "--check"]:
                 bibrank_options["cmd"]= "check" 
             elif opt[0] in [ "-S", "--stat"]:
                 bibrank_options["cmd"] = "stat"
             elif opt[0] in [ "-i", "--id" ]:
                 bibrank_options["id"] = bibrank_options["id"] + split_ranges(opt[1])
                 bibrank_options["last_updated"] = ""
             elif opt[0] in [ "-c", "--collection" ]:
                 bibrank_options["collection"] = opt[1]
             elif opt[0] in [ "-R", "--rebalance"]: 
                  bibrank_options["quick"] = "no"   
             elif opt[0] in [ "-f", "--flush"]:     
                 bibrank_options["flush"]=int(opt[1])
             elif opt[0] in [ "-M", "--maxmem"]:
                 bibrank_options["maxmem"]=int(opt[1])
                 if bibrank_options["maxmem"] < base_process_size + 1000:
                     raise StandardError, "Memory usage should be higher than %d kB" % (base_process_size + 1000)
             elif opt[0] in [ "-m", "--modified" ]:
                 bibrank_options["modified"] = get_date_range(opt[1]) #2002-10-27 13:57:26
                 bibrank_options["last_updated"] = ""
             elif opt[0] in [ "-l", "--lastupdate" ]:
                 bibrank_options["last_updated"] = "last_updated"
             elif opt[0] in [ "-s", "--sleeptime" ]:
                 get_datetime(opt[1]) # see if it is a valid shift
                 sleeptime=opt[1]
             elif opt[0] in [ "-t", "--time" ]:
                 sched_time = get_datetime(opt[1])
             else:
                 usage(1)
     except StandardError, e:
         write_message(e, sys.stderr)
         sys.exit(1)
              
     user = authenticate(user)
     if bibrank_options["verbose"]>=9:
         write_message("Storing task options %s" % bibrank_options)
 
     ## sanity check: remove eventual "task" option:
     if bibrank_options.has_key("task"):
         del bibrank_options["task"]
 
     new_task_id = run_sql("""INSERT INTO schTASK (proc,user,runtime,sleeptime,arguments,status) VALUES ('bibrank',%s,%s,%s,%s,'WAITING')""",
                           (user, sched_time, sleeptime, marshal.dumps(bibrank_options)))
 
     ## update task number: 
     bibrank_options["task"] = new_task_id
     run_sql("""UPDATE schTASK SET arguments=%s WHERE id=%s""", (marshal.dumps(bibrank_options), new_task_id))
 
     print "Task #%d was successfully scheduled for execution." % new_task_id
     return
 
 def task_run(row):
     """Run the indexing task. The row argument is the BibSched task
     queue row, containing if, arguments, etc.
     Return 1 in case of success and 0 in case of failure.
     """
     global bibrank_options
     task_id = row[0]
     task_proc = row[1]
     bibrank_options = marshal.loads(row[6])
     task_status = row[7]
 
 
     # install signal handlers
     signal.signal(signal.SIGUSR1, task_sig_sleep)
     signal.signal(signal.SIGTERM, task_sig_stop)
     signal.signal(signal.SIGABRT, task_sig_suicide)
     signal.signal(signal.SIGCONT, task_sig_wakeup)
     signal.signal(signal.SIGINT, task_sig_unknown)
 
     if task_proc != "bibrank":
         write_message("-The task #%d does not seem to be a BibRank task." % task_id, sys.stderr)
         return 0
     if task_status != "WAITING":
         write_message("The task #%d is %s. I expected WAITING." % (task_id, task_status), sys.stderr)
         return 0
     if bibrank_options["verbose"]:
         write_message("Task #%d started." % task_id)
     task_update_status("RUNNING")
     try:  
         bibrank_options = marshal.loads(row[6])
         for key in bibrank_options["run"]:
             write_message("")
             file = etcdir + "/bibrank/" + key + ".cfg"
             if bibrank_options["verbose"] >= 9:
                 write_message("Getting configuration from file: %s" % file)
             config = ConfigParser.ConfigParser()
             try:
                 config.readfp(open(file))
             except StandardError, e:
                 write_message("Cannot find configurationfile: %s. The rankmethod may also not be registered using the BibRank Admin Interface." % file, sys.stderr)
                 raise StandardError
 
             #Using the function variable to call the function related to the rank method
             cfg_function = config.get("rank_method", "function")
             func_object = globals().get(cfg_function)
             if func_object:
                 func_object(row, key)
             else:
                 write_message("Cannot run method '%s', no function to call" % key)  
     except StandardError, e:
         write_message("\nException caught: %s" % e, sys.stderr)
         traceback.print_tb(sys.exc_info()[2])
         task_update_status("ERROR")
         sys.exit(1)
 
     task_update_status("DONE")
     if bibrank_options["verbose"]:
         write_message("Task #%d finished." % task_id)
     return 1
 
 def main():
     global bibrank_options
     if len(sys.argv) == 2:
         try:
             id = int(sys.argv[1])
         except StandardError, err:
             command_line()
             sys.exit()
         res = run_sql("SELECT * FROM schTASK WHERE id='%d'" % (id), None, 1)
         if not res:
             write_message("Selected task not found.", sys.stderr)
             sys.exit(1)
         try:
             if not task_run(res[0]):
                 write_message("Error occurred. Exiting.", sys.stderr)
         except StandardError, e:
             write_message("Unexpected error occurred: %s." % e, sys.stderr)
             write_message("Traceback is:")
             traceback.print_tb(sys.exc_info()[2])
             write_message("Exiting.")
             task_update_status("ERROR")
     else:
         command_line()
         
 if __name__ == "__main__":
     main()
   
diff --git a/modules/bibsched/lib/bibtaskex.py b/modules/bibsched/lib/bibtaskex.py
index 340b75fff..f430a6760 100644
--- a/modules/bibsched/lib/bibtaskex.py
+++ b/modules/bibsched/lib/bibtaskex.py
@@ -1,339 +1,340 @@
 # -*- coding: utf-8 -*-
 ##
 ## $Id$
 ##
 ## This file is part of CDS Invenio.
 ## Copyright (C) 2002, 2003, 2004, 2005, 2006 CERN.
 ##
 ## CDS Invenio is free software; you can redistribute it and/or
 ## modify it under the terms of the GNU General Public License as
 ## published by the Free Software Foundation; either version 2 of the
 ## License, or (at your option) any later version.
 ##
 ## CDS Invenio is distributed in the hope that it will be useful, but
 ## WITHOUT ANY WARRANTY; without even the implied warranty of
 ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 ## General Public License for more details.  
 ##
 ## You should have received a copy of the GNU General Public License
 ## along with CDS Invenio; if not, write to the Free Software Foundation, Inc.,
 ## 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA.
 
 """CDS Invenio Bibliographic Task Example.
 
 Demonstrates BibTask <-> BibSched connectivity, signal handling,
 error handling, etc.
 """
 
 __revision__ = "$Id$"
 
 import sys
 from invenio.dbquery import run_sql
 from invenio.access_control_engine import acc_authorize_action
 import getopt
 import getpass
 import marshal
 import signal
 import sre
 import string
 import time
 import traceback
 
 options = {} # global variable to hold task options
 
 cfg_n_default = 30 # how many Fibonacci numbers to calculate if none submitted?
 
 def get_datetime(var, format_string="%Y-%m-%d %H:%M:%S"):
     """Returns a date string according to the format string.
        It can handle normal date strings and shifts with respect
        to now."""
     date = time.time()
     shift_re = sre.compile("([-\+]{0,1})([\d]+)([dhms])")
     factors = {"d":24*3600, "h":3600, "m":60, "s":1}
     m = shift_re.match(var)
     if m:
         sign = m.groups()[0] == "-" and -1 or 1
         factor = factors[m.groups()[2]]
         value = float(m.groups()[1])
         date = time.localtime(date + sign * factor * value)
         date = time.strftime(format_string, date)
     else:
         date = time.strptime(var, format_string)
         date = time.strftime(format_string, date)        
     return date
 
 def write_message(msg, stream=sys.stdout):
     """Write message and flush output stream (may be sys.stdout or sys.stderr).  Useful for debugging stuff."""
     if stream == sys.stdout or stream == sys.stderr:
         stream.write(time.strftime("%Y-%m-%d %H:%M:%S --> ", time.localtime()))
         stream.write("%s\n" % msg)
         stream.flush()
     else:
         sys.stderr.write("Unknown stream %s.  [must be sys.stdout or sys.stderr]\n" % stream)
     return
 
 def task_sig_sleep(sig, frame):
     """Signal handler for the 'sleep' signal sent by BibSched."""
     if options["verbose"] >= 9:
         write_message("task_sig_sleep(), got signal %s frame %s" % (sig, frame))
     write_message("sleeping...")
     task_update_status("SLEEPING")
     signal.pause() # wait for wake-up signal
 
 def task_sig_wakeup(sig, frame):
     """Signal handler for the 'wakeup' signal sent by BibSched."""
     if options["verbose"] >= 9:
         write_message("task_sig_wakeup(), got signal %s frame %s" % (sig, frame))
     write_message("continuing...")
     task_update_status("CONTINUING")
 
 def task_sig_stop(sig, frame):
     """Signal handler for the 'stop' signal sent by BibSched."""
     if options["verbose"] >= 9:
         write_message("task_sig_stop(), got signal %s frame %s" % (sig, frame))
     write_message("stopping...")
     task_update_status("STOPPING")
     write_message("flushing cache or whatever...")
     time.sleep(3)
     write_message("closing tables or whatever...")
     time.sleep(1)
     write_message("stopped")
     task_update_status("STOPPED")
     sys.exit(0)
     
 def task_sig_suicide(sig, frame):
     """Signal handler for the 'suicide' signal sent by BibSched."""
     if options["verbose"] >= 9:
         write_message("task_sig_suicide(), got signal %s frame %s" % (sig, frame))
     write_message("suiciding myself now...")
     task_update_status("SUICIDING")
     write_message("suicided")
     task_update_status("SUICIDED")
     sys.exit(0)
 
 def task_sig_unknown(sig, frame):
     """Signal handler for the other unknown signals sent by shell or user."""
     # do nothing for unknown signals:
     write_message("unknown signal %d (frame %s) ignored" % (sig, frame)) 
 
 def fib(n):
     """Returns Fibonacci number for 'n'."""
     out = 1
     if n >= 2:
         out = fib(n-2) + fib(n-1)
     return out
 
 def authenticate(user, header="BibTaskEx Task Submission", action="runbibtaskex"):
     """Authenticate the user against the user database.
        Check for its password, if it exists.
        Check for action access rights.
        Return user name upon authorization success,
        do system exit upon authorization failure.
        """
     print header
     print "=" * len(header)
     if user == "":
         print >> sys.stdout, "\rUsername: ",
         user = string.strip(string.lower(sys.stdin.readline()))
     else:
         print >> sys.stdout, "\rUsername:", user        
     ## first check user pw:
-    res = run_sql("select id,password from user where email=%s or nickname=%s", (user, user,), 1)
+    res = run_sql("select id,password from user where email=%s", (user,), 1) + \
+          run_sql("select id,password from user where nickname=%s", (user,), 1)
     if not res:
         print "Sorry, %s does not exist." % user
         sys.exit(1)        
     else:
         (uid_db, password_db) = res[0]
         if password_db:
             password_entered = getpass.getpass()
             if password_db == password_entered:
                 pass
             else:
                 print "Sorry, wrong credentials for %s." % user
                 sys.exit(1)
         ## secondly check authorization for the action:
         (auth_code, auth_message) = acc_authorize_action(uid_db, action)
         if auth_code != 0:
             print auth_message
             sys.exit(1)
     return user
 
 def task_submit():
     """Submits task to the BibSched task queue.  This is what people will be invoking via command line."""
     global options
     ## sanity check: remove eventual "task" option:
     if options.has_key("task"):
         del options["task"]
     ## authenticate user:
     user = authenticate(options.get("user", ""))
     ## submit task:
     if options["verbose"] >= 9:
         print ""
         write_message("storing task options %s\n" % options)
     task_id = run_sql("""INSERT INTO schTASK (id,proc,user,runtime,sleeptime,status,arguments)
                          VALUES (NULL,'bibtaskex',%s,%s,%s,'WAITING',%s)""",
                       (user, options["runtime"], options["sleeptime"], marshal.dumps(options)))
     ## update task number: 
     options["task"] = task_id
     run_sql("""UPDATE schTASK SET arguments=%s WHERE id=%s""", (marshal.dumps(options), task_id))
     write_message("Task #%d submitted." % task_id)    
     return task_id
 
 def task_update_progress(msg):
     """Updates progress information in the BibSched task table."""
     global options
     if options["verbose"] >= 9:
         write_message("Updating task progress to %s." % msg)
     return run_sql("UPDATE schTASK SET progress=%s where id=%s", (msg, options["task"]))
 
 def task_update_status(val):
     """Updates status information in the BibSched task table."""
     global options
     if options["verbose"] >= 9:
         write_message("Updating task status to %s." % val)
     return run_sql("UPDATE schTASK SET status=%s where id=%s", (val, options["task"]))    
 
 def task_read_status(task_id):
     """Read status information in the BibSched task table."""
     res = run_sql("SELECT status FROM schTASK where id=%s", (task_id,), 1)
     try:
         out = res[0][0]
     except:
         out = 'UNKNOWN'
     return out
 
 def task_get_options(id):
     """Returns options for the task 'id' read from the BibSched task queue table."""
     out = {}
     res = run_sql("SELECT arguments FROM schTASK WHERE id=%s AND proc='bibtaskex'", (id,))
     try:
         out = marshal.loads(res[0][0])
     except:
         write_message("Error: BibTaskEx task %d does not seem to exist." % id, sys.stderr)
         sys.exit(1)
     return out
 
 def task_run(task_id):
     """Runs the task by fetching arguments from the BibSched task queue.  This is what BibSched will be invoking via daemon call.
        The task prints Fibonacci numbers for up to NUM on the stdout, and some messages on stderr.
        Return 1 in case of success and 0 in case of failure."""
     global options
     options = task_get_options(task_id) # get options from BibSched task table
     ## check task id:
     if not options.has_key("task"):
         write_message("Error: The task #%d does not seem to be a BibTaskEx task." % task_id, sys.stderr)
         return 0
     ## check task status:
     task_status = task_read_status(task_id)
     if task_status != "WAITING":
         write_message("Error: The task #%d is %s.  I expected WAITING." % (task_id, task_status), sys.stderr)
         return 0
     ## we can run the task now:
     if options["verbose"]:
         write_message("Task #%d started." % task_id)
     task_update_status("RUNNING")
     ## initialize signal handler:
     signal.signal(signal.SIGUSR1, task_sig_sleep)
     signal.signal(signal.SIGTERM, task_sig_stop)
     signal.signal(signal.SIGABRT, task_sig_suicide)
     signal.signal(signal.SIGCONT, task_sig_wakeup)
     signal.signal(signal.SIGINT, task_sig_unknown)
     ## run the task:
     if options.has_key("number"):
         n = options["number"]
     else:
         n = cfg_n_default
     if options["verbose"] >= 9:
         write_message("Printing %d Fibonacci numbers." % n)
     for i in range(0, n):
         if i > 0 and i % 4 == 0:
             if options["verbose"] >= 3:
                 write_message("Error: water in the CPU.  Ignoring and continuing.", sys.stderr)
         elif i > 0 and i % 5 == 0:
             if options["verbose"]:
                 write_message("Error: floppy drive dropped on the floor.  Ignoring and continuing.", sys.stderr)
         if options["verbose"]:
             write_message("fib(%d)=%d" % (i, fib(i)))
         task_update_progress("Done %d out of %d." % (i, n))
         time.sleep(1)
     ## we are done:
     task_update_progress("Done %d out of %d." % (n, n))
     task_update_status("DONE")
     if options["verbose"]:
         write_message("Task #%d finished." % task_id)
     return 1
 
 def usage(exitcode=1, msg=""):
     """Prints usage info."""
     if msg:
         sys.stderr.write("Error: %s.\n" % msg)
     sys.stderr.write("Usage: %s [options]\n" % sys.argv[0])
     sys.stderr.write("Command options:\n")
     sys.stderr.write("  -n, --number=NUM\t Print Fibonacci numbers for up to NUM.  [default=%d]\n" % cfg_n_default)
     sys.stderr.write("Scheduling options:\n")
     sys.stderr.write("  -u, --user=USER \t User name to submit the task as, password needed.\n")
     sys.stderr.write("  -t, --runtime=TIME \t Time to execute the task (now), e.g.: +15s, 5m, 3h, 2002-10-27 13:57:26\n")
     sys.stderr.write("  -s, --sleeptime=SLEEP \t Sleeping frequency after which to repeat task (no), e.g.: 30m, 2h, 1d\n")
     sys.stderr.write("General options:\n")
     sys.stderr.write("  -h, --help      \t\t Print this help.\n")
     sys.stderr.write("  -V, --version   \t\t Print version information.\n")
     sys.stderr.write("  -v, --verbose=LEVEL \t Verbose level (0=min, 1=default, 9=max).\n")
     sys.exit(exitcode)
 
 def main():
     """Main function that analyzes command line input and calls whatever is appropriate.
        Useful for learning on how to write BibSched tasks."""
     global options
     ## parse command line:
     if len(sys.argv) == 2 and sys.argv[1].isdigit():
         ## A - run the task
         task_id = int(sys.argv[1])
         try:
             if not task_run(task_id):
                 write_message("Error occurred.  Exiting.", sys.stderr)
         except StandardError, e:
             write_message("Unexpected error occurred: %s." % e, sys.stderr)
             write_message("Traceback is:", sys.stderr)
             traceback.print_tb(sys.exc_info()[2])
             write_message("Exiting.", sys.stderr)
             task_update_status("ERROR")                                   
     else:
         ## B - submit the task
         # set default values:
         options = {}
         options["runtime"] = time.strftime("%Y-%m-%d %H:%M:%S") 
         options["sleeptime"] = ""
         options["verbose"] = 1
         # set user-defined options:
         try:
             opts, args = getopt.getopt(sys.argv[1:], "hVv:n:u:s:t:", ["help", "version", "verbose=", "number=", "user=", "sleep=", "time="])
         except getopt.GetoptError, err:
             usage(1, err)
         try:
             for opt in opts:
                 if opt[0] in ["-h", "--help"]:
                     usage(0)
                 elif opt[0] in ["-V", "--version"]:
                     print __revision__
                     sys.exit(0)
                 elif opt[0] in [ "-u", "--user"]:
                     options["user"] = opt[1]
                 elif opt[0] in ["-v", "--verbose"]:
                     options["verbose"] = int(opt[1])
                 elif opt[0] in ["-n", "--number"]:
                     options["number"] = int(opt[1])
                 elif opt[0] in [ "-s", "--sleeptime" ]:
                     get_datetime(opt[1]) # see if it is a valid shift
                     options["sleeptime"] = opt[1]
                 elif opt[0] in [ "-t", "--runtime" ]:
                     options["runtime"] = get_datetime(opt[1])
                 else:
                     usage(1)
         except StandardError, e:
             usage(e)        
         task_submit()
     return
 
 ### okay, here we go:
 if __name__ == '__main__':
     main()
diff --git a/modules/bibupload/lib/bibupload.py b/modules/bibupload/lib/bibupload.py
index cc18a8bb7..828c4c592 100644
--- a/modules/bibupload/lib/bibupload.py
+++ b/modules/bibupload/lib/bibupload.py
@@ -1,1301 +1,1302 @@
 # -*- coding: utf-8 -*-
 ##
 ## $Id$
 ##
 ## This file is part of CDS Invenio.
 ## Copyright (C) 2002, 2003, 2004, 2005, 2006 CERN.
 ##
 ## CDS Invenio is free software; you can redistribute it and/or
 ## modify it under the terms of the GNU General Public License as
 ## published by the Free Software Foundation; either version 2 of the
 ## License, or (at your option) any later version.
 ##
 ## CDS Invenio is distributed in the hope that it will be useful, but
 ## WITHOUT ANY WARRANTY; without even the implied warranty of
 ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 ## General Public License for more details.  
 ##
 ## You should have received a copy of the GNU General Public License
 ## along with CDS Invenio; if not, write to the Free Software Foundation, Inc.,
 ## 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA.
 
 # pylint: disable-msg=C0301
 
 """
 BibUpload: Receive MARC XML file and update the appropriate database
 tables according to options.
 
     Usage: bibupload [options] input.xml
     Examples:  
       $ bibupload -i input.xml
 
     Options:
      -a, --append            new fields are appended to the existing record
      -c, --correct           fields are replaced by the new ones in the existing record
      -f, --format            takes only the FMT fields into account. Does not update
      -i, --insert            insert the new record in the database
      -r, --replace           the existing record is entirely replaced by the new one
      -z, --reference         update references (update only 999 fields)
      -s, --stage=STAGE       stage to start from in the algorithm (0: always done; 1: FMT tags;
                              2: FFT tags; 3: BibFmt; 4: Metadata update; 5: time update)
      -n,  --notimechange     do not change record last modification date when updating
 
     Scheduling options:
      -u, --user=USER         user name to store task, password needed
     
     General options:
      -h, --help              print this help and exit
      -v, --verbose=LEVEL     verbose level (from 0 to 9, default 1)
      -V  --version           print the script version    
 """
  
 __revision__ = "$Id$"
 
 import os
 import sys
 import getopt
 import getpass
 import signal
 import string
 import marshal
 import time
 import traceback
 from zlib import compress
 import MySQLdb
 import re
 
 from invenio.bibupload_config import * 
 from invenio.access_control_engine import acc_authorize_action
 from invenio.dbquery import run_sql, \
                             Error
 from invenio.bibrecord import create_records, \
                               create_record, \
                               record_add_field, \
                               record_delete_field, \
                               record_xml_output, \
                               record_get_field_instances, \
                               field_get_subfield_values
 from invenio.dateutils import convert_datestruct_to_datetext
 from invenio.search_engine import print_record
 from invenio.config import filedir, \
                            filedirsize, \
                            htdocsurl
 
 # Global variables
 options = {}
 options['mode'] = None
 options['verbose'] = 1 
 options['tag'] = None
 options['file_path'] = None
 options['notimechange'] = 0
 options['stage_to_start_from'] = 1
 
 #Statistic variables
 stat = {}
 stat['nb_records_to_upload'] = 0
 stat['nb_records_updated'] = 0
 stat['nb_records_inserted'] = 0
 stat['nb_errors'] = 0
 stat['exectime'] = time.localtime()
 
 ### bibsched task related functions:
 
 def write_message(msg, stream=sys.stdout, verbose=1):
     """Write message and flush output stream (may be sys.stdout or
     sys.stderr).  Useful for debugging stuff.  Do not print anything
     if the global verbose option is lower than VERBOSE.
     """
     if stream == sys.stdout or stream == sys.stderr:
         if options['verbose'] >= verbose:
             stream.write(time.strftime("%Y-%m-%d %H:%M:%S --> ",
                                        time.localtime()))
             stream.write("%s\n" % msg)
             stream.flush()
     else:
         sys.stderr.write("Unknown stream %s.  [must be sys.stdout or sys.stderr]\n" % stream)
     return
 
 def task_sig_sleep(sig, frame):
     """Signal handler for the 'sleep' signal sent by BibSched."""
     if options["verbose"] >= 9:
         write_message("task_sig_sleep(), got signal %s frame %s" % (sig, frame))
     write_message("sleeping...")
     task_update_status("SLEEPING")
     signal.pause() # wait for wake-up signal
 
 def task_sig_wakeup(sig, frame):
     """Signal handler for the 'wakeup' signal sent by BibSched."""
     if options["verbose"] >= 9:
         write_message("task_sig_wakeup(), got signal %s frame %s" % (sig, frame))
     write_message("continuing...")
     task_update_status("CONTINUING")
 
 def task_sig_stop(sig, frame):
     """Signal handler for the 'stop' signal sent by BibSched."""
     if options["verbose"] >= 9:
         write_message("task_sig_stop(), got signal %s frame %s" % (sig, frame))
     write_message("stopping...")
     task_update_status("STOPPING")
     write_message("flushing cache or whatever...")
     time.sleep(3)
     write_message("closing tables or whatever...")
     time.sleep(1)
     write_message("stopped")
     task_update_status("STOPPED")
     sys.exit(0)
     
 def task_sig_suicide(sig, frame):
     """Signal handler for the 'suicide' signal sent by BibSched."""
     if options["verbose"] >= 9:
         write_message("task_sig_suicide(), got signal %s frame %s" % (sig, frame))
     write_message("suiciding myself now...")
     task_update_status("SUICIDING")
     write_message("suicided")
     task_update_status("SUICIDED")
     sys.exit(0)
 
 def task_sig_unknown(sig, frame):
     """Signal handler for the other unknown signals sent by shell or user."""
     # do nothing for unknown signals:
     write_message("unknown signal %d (frame %s) ignored" % (sig, frame)) 
 
 def authenticate(user, header="BibUpload Task Submission", action="runbibupload"):
     """Authenticate the user against the user database.
        Check for its password, if it exists.
        Check for action access rights.
        Return user name upon authorization success,
        do system exit upon authorization failure.
        """
 
     # FIXME: for the time being do not authenticate but always let the
     # tasks in, because of automated inserts.  Maybe we shall design
     # an internal user here that will always be let in.
     return user
 
     print header
     print "=" * len(header)
     if user == "":
         print >> sys.stdout, "\rUsername: ",
         user = string.strip(string.lower(sys.stdin.readline()))
     else:
         print >> sys.stdout, "\rUsername:", user        
     ## first check user pw:
-    res = run_sql("select id,password from user where email=%s or nickname=%s", (user, user,), 1)
+    res = run_sql("select id,password from user where email=%s", (user,), 1) + \
+          run_sql("select id,password from user where nickname=%s", (user,), 1)
     if not res:
         print "Sorry, %s does not exist." % user
         sys.exit(1)        
     else:
         (uid_db, password_db) = res[0]
         if password_db:
             password_entered = getpass.getpass()
             if password_db == password_entered:
                 pass
             else:
                 print "Sorry, wrong credentials for %s." % user
                 sys.exit(1)
         ## secondly check authorization for the action:
         (auth_code, auth_message) = acc_authorize_action(uid_db, action)
         if auth_code != 0:
             print auth_message
             sys.exit(1)
     return user
 
 def task_submit():
     """Submits task to the BibSched task queue.  This is what people will be invoking via command line."""
     global options
     ## sanity check: remove eventual "task" option:
     if options.has_key("task"):
         del options["task"]
     ## authenticate user:
     user = authenticate(options.get("user", ""))
     ## submit task:
     if options["verbose"] >= 9:
         print ""
         write_message("storing task options %s\n" % options)
     task_id = run_sql("""INSERT INTO schTASK (id,proc,user,runtime,sleeptime,status,arguments)
                          VALUES (NULL,'bibupload',%s,%s,%s,'WAITING',%s)""",
                       (user, options["runtime"], options["sleeptime"], marshal.dumps(options)))
     ## update task number: 
     options["task"] = task_id
     run_sql("""UPDATE schTASK SET arguments=%s WHERE id=%s""", (marshal.dumps(options), task_id))
     write_message("Task #%d submitted." % task_id)    
     return task_id
 
 def task_update_progress(msg):
     """Updates progress information in the BibSched task table."""
     global options
     if options["verbose"] >= 9:
         write_message("Updating task progress to %s." % msg)
     return run_sql("UPDATE schTASK SET progress=%s WHERE id=%s", (msg, options["task"]))
 
 def task_update_status(val):
     """Updates status information in the BibSched task table."""
     global options
     if options["verbose"] >= 9:
         write_message("Updating task status to %s." % val)
     return run_sql("UPDATE schTASK SET status=%s WHERE id=%s", (val, options["task"]))    
 
 def task_read_status(task_id):
     """Read status information in the BibSched task table."""
     res = run_sql("SELECT status FROM schTASK where id=%s", (task_id,), 1)
     try:
         out = res[0][0]
     except:
         out = 'UNKNOWN'
     return out
 
 def task_get_options(task_id):
     """Returns options for the task 'task_id' read from the BibSched task queue table."""
     out = {}
     res = run_sql("SELECT arguments FROM schTASK WHERE id=%s AND proc='bibupload'", (task_id,))
     try:
         out = marshal.loads(res[0][0])
     except:
         write_message("Error: BibUpload task %d does not seem to exist." % task_id, sys.stderr)
         sys.exit(1)
     return out
 
 def task_run(task_id):
     """Runs the task by fetching arguments from the BibSched task queue.  This is what BibSched will be invoking via daemon call.
        The task prints Fibinacci numbers for up to NUM on the stdout, and some messages on stderr.
        Return 1 in case of success and 0 in case of failure."""
     global options, stat
     options = task_get_options(task_id) # get options from BibSched task table
     ## check task id:
     if not options.has_key("task"):
         write_message("Error: The task #%d does not seem to be a BibUpload task." % task_id, sys.stderr)
         return 0
     ## check task status:
     task_status = task_read_status(task_id)
     if task_status != "WAITING":
         write_message("Error: The task #%d is %s.  I expected WAITING." % (task_id, task_status), sys.stderr)
         return 0
     ## we can run the task now:
     if options["verbose"]:
         write_message("Task #%d started." % task_id)
     task_update_status("RUNNING")
     ## initialize signal handler:
     signal.signal(signal.SIGUSR1, task_sig_sleep)
     signal.signal(signal.SIGTERM, task_sig_stop)
     signal.signal(signal.SIGABRT, task_sig_suicide)
     signal.signal(signal.SIGCONT, task_sig_wakeup)
     signal.signal(signal.SIGINT, task_sig_unknown)
 
     ## run the task:
     error = 0
     write_message("BibUpload Mode "+options['mode']+" has been choosen.", verbose=2)
     write_message("STAGE 0:", verbose=2)
     
     if options['file_path'] != None: 
         recs = xml_marc_to_records(open_marc_file(options['file_path']))
         stat['nb_records_to_upload'] = len(recs)
         write_message("   -Open XML marc: DONE", verbose=2)
         if recs != None:
             #We proceed each record by record
             for record in recs:
                 error = bibupload(record)
                 if error[0] == 1:
                     stat['nb_errors'] += 1
                 task_update_progress("Done %d out of %d." % (stat['nb_records_inserted'] + stat['nb_records_updated'],
                                                     stat['nb_records_to_upload']))
         else:
             write_message("   Error bibupload failed: No record found", verbose=1, stream=sys.stderr)
     
     if options['verbose'] >= 1:
         #Print out the statistics
         print_out_bibupload_statistics()
     
     # Check if they were errors
     if stat['nb_errors'] >= 1:
         task_update_status("DONE WITH ERRORS")
     else:
         ## we are done:
         task_update_status("DONE") 
     if options["verbose"]:
         write_message("Task #%d finished." % task_id)
     return 1
 
 ### bibupload engine functions:
 
 def parse_command():
     """Analyze the command line and retrieve arguments (xml file,
        mode, etc) into global options variable.
 
        Return 0 in case everything went well, 1 in case of errors, 2
        in case only help or version number were asked for.
     """
     # FIXME: add treatment of `time'
     try:
         opts, args = getopt.getopt(sys.argv[1:], "i:r:c:a:z:s:f:u:hv:Vn",
                  [
                    "insert=",
                    "replace=",
                    "correct=",
                    "append=",
                    "reference=",
                    "stage=",
                    "format=",
                    "user=",
                    "help",
                    "verbose",
                    "version",
                    "notimechange",
                  ])
     except getopt.GetoptError,erro:
         usage()
         write_message("Stage 0 error: %s" % erro, verbose=1, stream=sys.stderr)
         return 1
 
     # set the proper mode depending on the argument value
     for opt, opt_value in opts:
         # Verbose mode option
         if opt in ["-v", "--verbose"]:
             try:
                 options['verbose'] = int(opt_value)
             except ValueError:
                 write_message("Failed: enter a valid number for verbose mode (between 0 and 9).", verbose=1, stream=sys.stderr)
                 return 1
 
         # stage mode option
         if opt in ["-s", "--stage"]:
             try:
                 options['stage_to_start_from'] = int(opt_value)
             except ValueError:
                 write_message("Failed: enter a valid number for the stage to start from(>0).", verbose=1, stream=sys.stderr)
                 return 1
 
         # No time change option
         if opt in ["-n", "--notimechange"]:    
             options['notimechange'] = 1
 
         # Insert mode option
         if opt in ["-i", "--insert"]:
             options['mode'] = 'insert'
             options['file_path'] = os.path.abspath(opt_value)
 
         # Replace mode option
         if opt in ["-r", "--replace"]:
             # We check if there is not the mode insert after replace
             if opt_value == '-i' or opt_value == '--insert':
                 options['replace_insert'] = 1
                 options['mode'] = 'replace_insert'
                 options['file_path'] = os.path.abspath(args[0])
             else:
                 # Creation of the records from the xml Marc in argument
                 options['mode'] = 'replace'
                 options['file_path'] = os.path.abspath(opt_value)
                 
         # Correct mode option
         if opt in ["-c", "--correct"]:
             # We check if there is not just a special tag to correct
             try:
                 if int(opt_value) > 0 and int(opt_value) < 999:
                     options['mode'] = 'correct'
                     options['tag'] = opt_value
                     options['file_path'] = os.path.abspath(args[0])
             except ValueError:
                 if  opt_value == 'FMT' or opt_value == 'FFT':
                     options['mode'] = 'correct'
                     options['tag'] = opt_value
                     options['file_path'] = os.path.abspath(args[0])
                 else:
                     options['mode'] = 'correct'
                     options['file_path'] = os.path.abspath(opt_value)
 
         # Append mode option
         if opt in ["-a", "--append"]:
             # We check if there is not just a special tag to append
             try:
                 if int(opt_value) > 0 and int(opt_value) < 999:
                     options['mode'] = 'append'
                     options['tag'] = opt_value
                     options['file_path'] = os.path.abspath(args[0])
             except ValueError:
                 if  opt_value == 'FMT' or opt_value == 'FFT':
                     options['mode'] = 'append'
                     options['tag'] = opt_value
                     options['file_path'] = os.path.abspath(args[0])
                 else:
                     options['mode'] = 'append'
                     options['file_path'] = os.path.abspath(opt_value)
 
         # Reference mode option
         if opt in ["-z", "--reference"]:
             options['mode'] = 'reference'
             options['file_path'] = os.path.abspath(opt_value)
 
         # Format mode option
         if opt in ["-f", "--format"]:
             options['mode'] = 'format'
             options['file_path'] = os.path.abspath(opt_value)
         
         # Detection of user
         if opt in ["-u", "--user"]:    
             options['user'] = opt_value
 
         # Help mode option
         if opt in ["-h", "--help"]:    
             usage()
             return 2
 
         # Version mode option
         if opt in ["-V", "--version"]:
             write_message(__revision__, verbose=1)
             return 2
 
     if options['mode'] == None:
         write_message("Please specify at least one update/insert mode!")
         return 1
 
     if options['file_path'] == None:
         write_message("Missing filename! -h for help.")
         return 1
     return 0
     
 def bibupload(record):
     """Main function: process a record and fit it in the tables
        bibfmt, bibrec, bibrec_bibxxx, bibxxx with proper record
        metadata.
 
        Return (error_code, recID) of the processed record.
     """
     error = None
     #If there are special tags to proceed check if it exists in the record
     if options['tag'] != None and not(record.has_key(options['tag'])):
         write_message("    Failed: Tag not found, enter a valid tag to update.", verbose=1, stream=sys.stderr)
         return (1, -1)
     
     #Extraction of the Record Id
     rec_id = retrieve_rec_id(record)
     if rec_id == -1:
         return (1, -1)
     else:
         write_message("   -Retrieve record Id (found %s): DONE." % rec_id, verbose=2)
     write_message("   -Check if the xml marc file is already in the database: DONE" , verbose=2)
     
     # Reference mode check if there are reference tag 
     if options['mode'] == 'reference':
         error = extract_tag_from_record(record, CFG_BIBUPLOAD_REFERENCE_TAG)
         if error == None:
             write_message("   Failed: No reference tags has been found...", verbose=1, stream=sys.stderr)
             return (1, -1)
         else:
             error = None
             write_message("   -Check if reference tags exist: DONE", verbose=2)
      
     if options['mode'] == 'insert':
         # Insert the record into the bibrec databases to have a recordId
         rec_id = create_new_record()
         write_message("   -Creation of a new record id (%d): DONE" % rec_id, verbose=2)
         
         # we add the record Id control field to the record
         error = record_add_field(record, '001', '', '', rec_id, [], 0)
         if error == None:
             write_message("   Failed: " \
                                          "Error during adding the 001 controlfield "  \
                                          "to the record", verbose=1, stream=sys.stderr)
             return (1, rec_id)
         else:
             error = None
 
     elif options['mode'] != 'insert' and options['mode'] != 'format' and options['stage_to_start_from'] != 5:    
         # Update Mode
         #Retrieve the old record to update
         rec_old = create_record(print_record(int(rec_id),'xm'), 2)[0]
         if rec_old == None:
             write_message("   Failed during the creation of the old record!", verbose=1, stream=sys.stderr)
             return (1, rec_id)
         else:
             write_message("   -Retrieve the old record to update: DONE", verbose=2)
         
         #Delete tags to correct in the record
         if options['mode'] == 'correct' or options['mode'] == 'reference':
             delete_tags_to_correct(record, rec_old)
             write_message("   -Delete the old tags to correct in the old record: DONE", verbose=2)
         
         # Append new tag to the old record and update the new record with the old_record modified
         if options['mode'] == 'append' or options['mode'] == 'correct' or options['mode'] == 'reference':
             record = append_new_tag_to_old_record(record, rec_old)
             write_message("   -Append new tags to the old record: DONE", verbose=2)
 
         # now we clear all the rows from bibrec_bibxxx from the old record 
         delete_bibrec_bibxxx(rec_old, rec_id)
         write_message("   -Clean bibrec_bibxxx: DONE", verbose=2)
     write_message("   -Stage COMPLETED", verbose=2)
     
     #Have a look if we have FMT tags
     write_message("Stage 1: Start (Insert of FMT tags if exist).", verbose=2)
     if options['stage_to_start_from'] <= 1 and  extract_tag_from_record(record, 'FMT') != None:
         record = insert_fmt_tags(record, rec_id)
         if record == None:
             write_message("   Stage 1 failed: Error while inserting FMT tags", verbose=1, stream=sys.stderr)
             return (1, rec_id)
         elif record == 0:
             #Mode format finished
             stat['nb_records_updated'] += 1
             return (0, rec_id)
         write_message("   -Stage COMPLETED", verbose=2)
     else:
         write_message("   -Stage NOT NEEDED", verbose=2)
    
     #Have a look if we have FFT tags 
     write_message("Stage 2: Start (Process FFT tags if exist).", verbose=2)
     if options['stage_to_start_from'] <= 2 and  extract_tag_from_record(record, 'FFT') != None:
         
         if options['mode'] == 'insert' or options['mode'] == 'append':
             record = insert_fft_tags(record, rec_id)
         else:
             record = update_fft_tag(record, rec_id)
         write_message("   -Stage COMPLETED", verbose=2)
     else:
         write_message("   -Stage NOT NEEDED", verbose=2)
     
     # Update of the BibFmt
     write_message("Stage 3: Start (Update bibfmt).", verbose=2)
     if options['stage_to_start_from'] <= 3:
         # format the single record as xml
         rec_xml_new = record_xml_output(record)
         #Update bibfmt with the format xm of this record
         if options['mode'] != 'format': 
             error = update_bibfmt_format(rec_id, rec_xml_new, 'xm')
         if error == 1:
             write_message("   Failed: error during update_bibfmt_format", verbose=1, stream=sys.stderr)
             return (1, rec_id)
         write_message("   -Stage COMPLETED", verbose=2)
     
     # Update the database MetaData
     write_message("Stage 4: Start (Update the database with the metadata).", verbose=2)
     if options['stage_to_start_from'] <= 4:
         update_database_with_metadata(record, rec_id)
         write_message("   -Stage COMPLETED", verbose=2)
     else:
         write_message("   -Stage NOT NEEDED", verbose=2)
     
     # Finally we update the bibrec table with the current date
     write_message("Stage 5: Start (Update bibrec table with current date).", verbose=2)
     if options['stage_to_start_from'] <= 5 and options['mode'] != 'insert' and options['notimechange'] == 0:
         now = convert_datestruct_to_datetext(time.localtime())
         write_message("   -Retrieve current localtime: DONE", verbose=2)
         update_bibrec_modif_date(now, rec_id)
         write_message("   -Stage COMPLETED", verbose=2)
     else:
         write_message("   -Stage NOT NEEDED", verbose=2)
 
     #Increase statistics
     if options['mode'] == 'insert':
         stat['nb_records_inserted'] += 1
     else:
         stat['nb_records_updated'] += 1
     
     #Upload of this record finish
     write_message("Record "+str(rec_id)+" DONE", verbose=1)
     return (0, rec_id)
 
 def usage():
     """Print help"""
     print """Receive MARC XML file and update appropriate database tables according to options.
 
     Usage: bibupload [options] input.xml
     Examples:  
       $ bibupload -i input.xml
 
     Options:
      -a, --append            new fields are appended to the existing record
      -c, --correct           fields are replaced by the new ones in the existing record
      -f, --format            takes only the FMT fields into account. Does not update
      -i, --insert            insert the new record in the database
      -r, --replace           the existing record is entirely replaced by the new one
      -z, --reference         update references (update only 999 fields)
      -s, --stage=STAGE       stage to start from in the algorithm (0: always done; 1: FMT tags;
                              2: FFT tags; 3: BibFmt; 4: Metadata update; 5: time update)
      -n,  --notimechange     do not change record last modification date when updating
 
     Scheduling options:
      -u, --user=USER         user name to store task, password needed
     
     General options:
      -h, --help              print this help and exit
      -v, --verbose=LEVEL     verbose level (from 0 to 9, default 1)
      -V  --version           print the script version    
     """    
     
 def print_out_bibupload_statistics():
     """Print the statistics of the process"""
     out = "Task stats: %(nb_input)d input records, %(nb_updated)d updated, " \
           "%(nb_inserted)d inserted, %(nb_errors)d errors.  Time %(nb_sec).2f sec." % { \
               'nb_input': stat['nb_records_to_upload'],
               'nb_updated': stat['nb_records_updated'],
               'nb_inserted': stat['nb_records_inserted'],
               'nb_errors': stat['nb_errors'],
               'nb_sec': time.time() - time.mktime(stat['exectime']) }
     write_message(out)    
     
 def open_marc_file(path):
     """Open a file and return the data"""
     try:
         # open the file containing the marc document
         marc_file = open(path,'r')
         marc = marc_file.read()
         marc_file.close()
     except IOError, erro:
         write_message("Error: %s" % erro, verbose=1, stream=sys.stderr)
         write_message("Exiting.", sys.stderr)
         task_update_status("ERROR")                                   
         sys.exit(1)
     return marc
 
 def xml_marc_to_records(xml_marc):
     """create the records"""
     # Creation of the records from the xml Marc in argument
     recs = create_records(xml_marc, 1, 1)
     if recs == []:
         write_message("Error: Cannot parse MARCXML file.", verbose=1, stream=sys.stderr)
         write_message("Exiting.", sys.stderr)
         task_update_status("ERROR")                                   
         sys.exit(1)        
     elif recs[0][0] == None:
         write_message("Error: MARCXML file has wrong format: %s" % recs[0][2], verbose=1, stream=sys.stderr)
         write_message("Exiting.", sys.stderr)
         task_update_status("ERROR")                                   
         sys.exit(1)
     else:
         recs = map((lambda x:x[0]), recs)
         return recs
     
 def find_record_bibrec(rec_id):
     """ receives the record ID and returns if this record exist in bibrec """
     query = """SELECT id FROM bibrec WHERE id = %s"""
     params = (rec_id,)
     try:
         res = run_sql(query, params)
     except Error, error:
         write_message("   Error during find_record_bibrec function : %s " % error, verbose=1, stream=sys.stderr) 
     if len(res):
         return res
     else:
         return None
         
 def find_record_format(rec_id, format):
     """Look whether record REC_ID is formatted in FORMAT,
        i.e. whether FORMAT exists in the bibfmt table for this record.
     
        Return the number of times it is formatted: 0 if not, 1 if yes,
        2 if found more than once (should never occur).
     """
     out = 0
     query = """SELECT COUNT(id) FROM bibfmt WHERE id_bibrec=%s AND format=%s"""
     params = (rec_id, format)
     res = []
     try:
         res = run_sql(query, params)
         out = res[0][0]
     except Error, error:
         write_message("   Error during find_record_format() : %s " % error, verbose=1, stream=sys.stderr) 
     return out
 
 def find_record_bibfmt(marc):
     """ receives the xmlmarc containing a record and returns the id in bibrec if the record exists in bibfmt"""
     # compress the marc value
     pickled_marc =  MySQLdb.escape_string(compress(marc))
     query = """SELECT id_bibrec FROM bibfmt WHERE value = %s"""
     # format for marc xml is xm
     params = (pickled_marc,)
     try:
         res = run_sql(query, params)
     except Error, error:
         write_message("   Error during find_record_bibfmt function : %s " % error, verbose=1, stream=sys.stderr) 
     if len(res):
         return res
     else:
         return None
 
 def find_record_from_sysno(sysno):
     """receive the sysno number and return the record id"""
     table_name = 'bib'+CFG_BIBUPLOAD_EXTERNAL_SYSNO_TAG[0:2]+'x'
     query = """SELECT DISTINCT id FROM `%s` WHERE value = '%s'"""
     params = (table_name, sysno)
     try:
         res = run_sql(query % params)
     except Error, error:
         write_message("   Error during find_record_from_sysno function 1st query : %s " % error, verbose=1, stream=sys.stderr) 
         
     if len(res):
         table_name = 'bibrec_bib'+CFG_BIBUPLOAD_EXTERNAL_SYSNO_TAG[0:2]+'x'
         query = """SELECT DISTINCT id_bibrec FROM `%s` WHERE id_bibxxx = '%s'"""
         params = (table_name, res[0][0])
         
         try:
             res = run_sql(query % params)
         except Error, error:
             write_message("   Error during find_record_from_sysno function 2nd query : %s " % error, verbose=1, stream=sys.stderr) 
         
         if len(res):
             return res[0][0]
         else:
             return None
     else:
         return None
 
 def extract_tag_from_record(record, tag_number):
     """ Extract the tag_number for record."""
     # first step verify if the record is not already in the database
     if record:
         return record.get(tag_number, None)
     return None
             
 def retrieve_rec_id(record):
     """Retrieve the record Id from a record by using tag 001 or SYSNO"""
 
     rec_id = None
     tag = None
     
     #1st step: we look for the tag 001
     tag = extract_tag_from_record(record, '001')
     if tag != None:
         #We exctract the record Id from the Tag
         rec_id = tag[0][3]        
         #if we are in insert mode => error
         if options['mode'] == 'insert':
             write_message("   Failed : Error tag 001 found in the xml submitted"\
                                     ", you should use the option replace, correct or append"\
                                     " to replace an existing record. -h for help.", verbose=1, stream=sys.stderr)
             return -1
             
         #if we found the rec id and we are not in insert mode => continue
         elif options['mode'] != 'insert':
             #we try to find the rec_id in the table bibrec
             if find_record_bibrec(rec_id) != None: 
                 return rec_id
             else:
                 #The record doesn't exist yet. We will try to check the SYSNO id
                 rec_id = None
     else:
         write_message("   -Tag 001 not found in the xml marc file.", verbose=9)
 
     if rec_id == None:
         #2nd step we look for the sysno code
         sysno = extract_tag_from_record(record, CFG_BIBUPLOAD_EXTERNAL_SYSNO_TAG)
         if sysno != None:
             #retrieve the SYSNO code from the tuple SYSNO
             sysno = sysno[0][0][0][1]
             write_message("   Check if the SYSNO id "+sysno+" exist in the database", verbose=9)
             
             #Retrieve the rec id from the database
             rec_id = find_record_from_sysno(sysno)
             if rec_id != None and options['mode'] == 'insert':
                 write_message("   Failed : Record id found in the database: Please choose another mode than insert. -h for help.",
                               verbose=1, stream=sys.stderr)
                 return -1
             elif rec_id == None and options['mode'] != 'insert':
                 if options['mode'] != 'replace_insert':
                     write_message("   Failed : Record Id not found even with SYSNO id..."\
                                             "Please insert the file before updating it."\
                                             " -h for help", verbose=1, stream=sys.stderr)
                     return -1
                 else:
                     options['mode'] = 'insert'
             else:
                 return rec_id
         if sysno == None and options['mode'] != 'insert':
             if options['mode'] != 'replace_insert':
                 write_message("   Failed : SYSNO tag not found in the xml marc file."\
                                         "Please insert the file before updating it."\
                                         " -h for help", verbose=1, stream=sys.stderr)
                 return -1
             else:
                 options['mode'] = 'insert'
     return rec_id
 
 ### Insert functions
 
 def create_new_record():
     """Create new record in the database"""
     now = convert_datestruct_to_datetext(time.localtime())
     query = """INSERT INTO bibrec (creation_date, modification_date)
                 VALUES (%s, %s)"""
     params = (now, now)
     try:
         rec_id = run_sql(query, params)
         return rec_id
     except Error, error:
         write_message("   Error during the creation_new_record function : %s " % error, verbose=1, stream=sys.stderr) 
     return None
     
 def insert_bibfmt(id_bibrec, marc, format):
     """Insert the format in the table bibfmt"""
     # compress the marc value
     #pickled_marc =  MySQLdb.escape_string(compress(marc))
     pickled_marc =  compress(marc)
     # get the current time
     now = convert_datestruct_to_datetext(time.localtime())
     query = """INSERT INTO  bibfmt (id_bibrec, format, last_updated, value) VALUES (%s, %s, %s, %s)"""
     try:
         row_id  = run_sql(query, (id_bibrec, format, now, pickled_marc))
         return row_id
     except Error, error:
         write_message("   Error during the insert_bibfmt function : %s " % error, verbose=1, stream=sys.stderr) 
     return None
 
 def insert_record_bibxxx(tag, value):
     """Insert the record into bibxxx"""
     #determine into which table one should insert the record
     table_name = 'bib'+tag[0:2]+'x'
 
     # check if the tag, value combination exists in the table
     query = """SELECT id FROM %s """ % table_name
     query += """ WHERE tag=%s AND value=%s"""
     params = (tag, value)
     try:
         res = run_sql(query, params)
     except Error, error:
         write_message("   Error during the insert_record_bibxxx function : %s " % error, verbose=1, stream=sys.stderr) 
     
     if len(res):
         # get the id of the row, if it exists
         row_id = res[0][0]
         return (table_name, row_id)
     else:
         # necessary to insert the tag, value into bibxxx table
         query = """INSERT INTO %s """ % table_name
         query += """ (tag, value) values (%s , %s)""" 
         params = (tag, value)
         try:
             row_id = run_sql(query, params)
         except Error, error:
             write_message("   Error during the insert_record_bibxxx function : %s " % error, verbose=1, stream=sys.stderr) 
         
         return (table_name, row_id)
 
 def insert_record_bibrec_bibxxx(table_name, id_bibxxx, field_number, id_bibrec):
     """Insert the record into bibrec_bibxxx"""
     #determine into which table one should insert the record
     full_table_name = 'bibrec_'+ table_name
 
     # insert the proper row into the table
     query = """INSERT INTO %s """ % full_table_name
     query += """(id_bibrec,id_bibxxx, field_number) values (%s , %s, %s)"""
     params = (id_bibrec, id_bibxxx, field_number)
     try:
         res = run_sql(query, params)
     except Error, error:
         write_message("   Error during the insert_record_bibrec_bibxxx function 2nd query : %s " % error, verbose=1, stream=sys.stderr) 
     return res
     
 def insert_fft_tags(record, rec_id):
     """Process and insert FFT tags"""
     tuple_list = None
     tuple_list = extract_tag_from_record(record, 'FFT')
     #If there is a FFT TAG :)
     if tuple_list != None:
         for single_tuple in tuple_list:
             # Get the inside of the FFT file
             docpath = single_tuple[0][0][1]
             docname = re.sub("\..*", "", os.path.basename(docpath))
             extension = re.sub("^[^\.]*.", "", os.path.basename(docpath)).lower()
             #Create a new docId
             try:
                 bib_doc_id = run_sql("insert into bibdoc (docname,creation_date,modification_date) values(%s,NOW(),NOW())", (docname,))
                 write_message("   -Insert of the file %s into bibdoc : DONE" % docname, verbose=2)
             except Error, error:
                 write_message("   Error during the insert_fft_tags function : %s " % error, verbose=1, stream=sys.stderr) 
             
             if bib_doc_id != None:
                 #we link the document to the record if a rec_id was specified
                 if rec_id != "":
                     
                     #TO FIX doc_type : main or additional, fron where the information come from?
                     doc_type = ""
                     try:
                         res = run_sql("insert into bibrec_bibdoc values(%s,%s,%s)", (rec_id, bib_doc_id, doc_type))
                         if res == None:
                             write_message("   Failed during creation of link between doc Id and rec Id).", verbose=1, stream=sys.stderr)
                         else:
                             write_message("   -Insert of the link bibrec bibdoc for %s : DONE" % docname, verbose=2)
                     except Error, error:
                         write_message("   Error during the insert_fft_tags function : %s " % error, verbose=1, stream=sys.stderr) 
             else:
                 write_message("   Failed during creation of the new doc Id.", verbose=1, stream=sys.stderr)
             #Move the file to the correct place
             # Variables from the config file
             archivepath = filedir
             archivesize = filedirsize
             url_path = None
 
             group = "g"+str(int(int(bib_doc_id)/archivesize))
             basedir = "%s/%s/%s" % (archivepath, group, bib_doc_id)
             # we create the corresponding storage directory
             if not os.path.exists(basedir):
                 try:
                     os.makedirs(basedir)
                     write_message("   -Create a new directory %s : DONE" % basedir, verbose=2)
                 except OSError, error:
                     write_message("   Error making the directory : %s " % error, verbose=1, stream=sys.stderr) 
                     
             # and save the father record id if it exists
             if rec_id != "":
                 try:
                     filep = open("%s/.recid" % basedir, "w")
                     filep.write(str(bib_doc_id))
                     filep.close()
                 except IOError, error:
                     write_message("   Error writing the file : %s " % error, verbose=1, stream=sys.stderr) 
                 #Move the file to the good directory
                 try:
                     os.system("mv %s %s" % (docpath, basedir))
                     write_message("   -Move the file %s : DONE" % docname, verbose=2)
                 except OSError, error:
                     write_message("   Error moving the file : %s " % error, verbose=1, stream=sys.stderr) 
                 
                 #Create the Url Path
                 url_path = htdocsurl+"/record/"+str(rec_id)+"/files/"+docname+"."+extension
              
                 #add tag 856 to the xml marc to proceed
                 subfield_list = [('u', url_path), ('z', 'Access to Fulltext')] 
                 newfield_number = record_add_field(record, "856", "4", "", "", subfield_list)
                 if newfield_number == None:
                     write_message("   Error when adding the field"+ single_tuple, verbose=1, stream=sys.stderr)
                 else:
                     write_message("   -Add the new tag 856 to the record for %s : DONE" % docname, verbose=2)
                 
             #Delete FFT tag :)
             record_delete_field(record, 'FFT', '', '')
             write_message("   -Delete FFT tag from source : DONE", verbose=2)
     return record
 
 def insert_fmt_tags(record, rec_id):
     """Process and insert FMT tags"""
 
     fmt_fields = record_get_field_instances(record, 'FMT')
     if fmt_fields:
         for fmt_field in fmt_fields:
             # Get the f, g subfields of the FMT tag
             try:
                 f_value = field_get_subfield_values(fmt_field, "f")[0]
             except IndexError:
                 f_value = ""
             try:
                 g_value = field_get_subfield_values(fmt_field, "g")[0]            
             except IndexError:
                 g_value = ""
             # Update the format
             res = update_bibfmt_format(rec_id, g_value, f_value)
             if res == 1:
                 write_message("   Failed: Error during update_bibfmt", verbose=1, stream=sys.stderr)
                 
         # If we are in format mode, we only care about the FMT tag
         if options['mode'] == 'format':
             return 0
         # We delete the FMT Tag of the record
         record_delete_field(record, 'FMT')
         write_message("   -Delete field FMT from record : DONE", verbose=2)
         return record
 
     elif options['mode'] == 'format':
         write_message("   Failed: Format updated failed : No tag FMT found", verbose=1, stream=sys.stderr)
         return None
     else:
         return record
 
     
 ### Update functions
     
 def update_bibrec_modif_date(now, bibrec_id):
     """Update the date of the record in bibrec table """    
     query = """UPDATE bibrec SET modification_date=%s WHERE id=%s"""
     params = (now, bibrec_id)
     try:
         res = run_sql(query, params)
         if res != 1:
             write_message("   Failed : Sql error during the update of the bibrec modification_date", verbose=1, stream=sys.stderr)
         else:
             write_message("   -Update record modification date : DONE" , verbose=2)
     except Error, error:
         write_message("   Error during update_bibrec_modif_date function : %s" % error, verbose=1, stream=sys.stderr)
 
 def update_bibfmt_format(id_bibrec, format_value, format_name):
     """Update the format in the table bibfmt"""
 
     # We check if the format is already in bibFmt
     nb_found = find_record_format(id_bibrec, format_name)
     
     if nb_found == 1:
         # Update the format
         # get the current time
         now = convert_datestruct_to_datetext(time.localtime())
         # compress the format_value value
         pickled_format_value =  compress(format_value)
         
         query = """UPDATE bibfmt SET last_updated=%s, value=%s WHERE id_bibrec=%s AND format=%s""" 
         params = (now, pickled_format_value, id_bibrec, format_name)
         try:
             row_id  = run_sql(query, params)
             if row_id == None:
                 write_message("   Failed: Error during update_bibfmt_format function", verbose=1, stream=sys.stderr)
                 return 1
             else:
                 write_message("   -Update the format %s in bibfmt : DONE" % format_name , verbose=2)
                 return 0
         except Error, error:
             write_message("   Error during the update_bibfmt_format function : %s " % error, verbose=1, stream=sys.stderr)     
        
     elif nb_found > 1:
         write_message("   Failed: Same format %s found several time in bibfmt for the same record." % format_name, verbose=1, stream=sys.stderr)
         return 1
     else:
         # Insert the format information in BibFMT
         res = insert_bibfmt(id_bibrec, format_value, format_name)
         if res == None:
             write_message("   Failed: Error during insert_bibfmt", verbose=1, stream=sys.stderr)
             return 1
         else:
             write_message("   -Insert the format %s in bibfmt : DONE" % format_name , verbose=2)
             return 0
     
 def update_database_with_metadata(record, rec_id):
     """Update the database tables with the record and the record id given in parameter"""
     for tag in record.keys():
         # check if tag is not a control field : tag not in CFG_BIBUPLOAD_CONTROLFIELD_TAGS and
         if tag not in CFG_BIBUPLOAD_SPECIAL_TAGS:
             # for each tag there is a list of tuples representing datafields
             tuple_list = record[tag]
             # this list should contain the elements of a full tag [tag, ind1, ind2, subfield_code]
             tag_list = []
             tag_list.append(tag)
             for single_tuple in tuple_list:
                 # these are the contents of a single tuple
                 subfield_list = single_tuple[0]
                 ind1 = single_tuple[1]
                 ind2 = single_tuple[2]
                 # append the ind's to the full tag
                 if (ind1 == ''):
                     tag_list.append('_')
                 else:
                     tag_list.append(ind1)
                 if (ind2 == ''):
                     tag_list.append('_')
                 else:
                     tag_list.append(ind2)
                 datafield_number = single_tuple[4]
                 
                 if tag in CFG_BIBUPLOAD_SPECIAL_TAGS:
                     # nothing to do for special tags (FFT, FMT)
                     pass
                 elif tag in CFG_BIBUPLOAD_CONTROLFIELD_TAGS and tag != "001":
                     value = single_tuple[3]
                     # get the full tag
                     full_tag = ''.join(tag_list)
                     
                     # Others modes : update all the tag
                     if options['mode'] == 'insert' or options['mode'] == 'replace' or options['mode'] == 'append':
                         write_message("   insertion of the tag "+full_tag+" with the value "+value, verbose=9)
                         # insert the tag and value into into bibxxx
                         (table_name, bibxxx_row_id) = insert_record_bibxxx(full_tag, value)
                         #print 'tname, bibrow', table_name, bibxxx_row_id;
                         if table_name == None or bibxxx_row_id == None:
                             write_message("   Failed : during insert_record_bibxxx", verbose=1, stream=sys.stderr)
                         # connect bibxxx and bibrec with the table bibrec_bibxxx
                         res = insert_record_bibrec_bibxxx(table_name, bibxxx_row_id, datafield_number, rec_id)
                         if res == None:
                             write_message("   Failed : during insert_record_bibrec_bibxxx", verbose=1, stream=sys.stderr)
                 else:
                     # get the tag and value from the content of each subfield
                     for subfield in subfield_list:
                         subtag = subfield[0]
                         value = subfield[1]
                         tag_list.append(subtag)
                         # get the full tag
                         full_tag = ''.join(tag_list)
                         
                         # Others modes : update all the tag
                         if options['mode'] == 'insert' or options['mode'] == 'replace' or options['mode'] == 'append':
                             write_message("   insertion of the tag "+full_tag+" with the value "+value, verbose=9)
                             # insert the tag and value into into bibxxx
                             (table_name, bibxxx_row_id) = insert_record_bibxxx(full_tag, value)
                             if table_name == None or bibxxx_row_id == None:
                                 write_message("   Failed : during insert_record_bibxxx", verbose=1, stream=sys.stderr)
                             # connect bibxxx and bibrec with the table bibrec_bibxxx
                             res = insert_record_bibrec_bibxxx(table_name, bibxxx_row_id, datafield_number, rec_id)
                             if res == None:
                                 write_message("   Failed : during insert_record_bibrec_bibxxx", verbose=1, stream=sys.stderr)
                         # remove the subtag from the list
                         tag_list.pop()
                 tag_list.pop()
                 tag_list.pop()
             tag_list.pop()
     write_message("   -Update the database with metadata : DONE", verbose=2)
 
 
 def append_new_tag_to_old_record(record, rec_old):
     """Append new tags to a old record"""
     if options['tag'] != None:
         tag = options['tag']
         if tag in CFG_BIBUPLOAD_CONTROLFIELD_TAGS:
             if tag == '001':
                 pass
             else:
                 # if it is a controlfield,just access the value
                 for single_tuple in record[tag]:
                     controlfield_value = single_tuple[3]
                     # add the field to the old record
                     newfield_number = record_add_field(rec_old, tag, "", "", controlfield_value)
                     if newfield_number == None:
                         write_message("   Error when adding the field"+tag, verbose=1, stream=sys.stderr)
         else:
             # For each tag there is a list of tuples representing datafields
             for single_tuple in record[tag]: 
                 # We retrieve the information of the tag
                 subfield_list = single_tuple[0]
                 ind1 = single_tuple[1]
                 ind2 = single_tuple[2]
                 # We add the datafield to the old record
                 if options['verbose'] == 9:
                     print "      Adding tag: ", tag, " ind1=", ind1, " ind2=", ind2, " code=", subfield_list
                 newfield_number = record_add_field(rec_old, tag, ind1, ind2, "", subfield_list)
                 if newfield_number == None:
                     write_message("Error when adding the field"+tag, verbose=1, stream=sys.stderr)
     else:
         # Go through each tag in the appended record
         for tag in record.keys():
             # Reference mode append only reference tag
             if options['mode'] == 'reference':
                 if tag == CFG_BIBUPLOAD_REFERENCE_TAG:
                     for single_tuple in record[tag]: 
                         # We retrieve the information of the tag
                         subfield_list = single_tuple[0]
                         ind1 = single_tuple[1]
                         ind2 = single_tuple[2]
                         # We add the datafield to the old record
                         if options['verbose'] == 9:
                             print "      Adding tag: ", tag, " ind1=", ind1, " ind2=", ind2, " code=", subfield_list
                         newfield_number = record_add_field(rec_old, tag, ind1, ind2, "", subfield_list)
                         if newfield_number == None:
                             write_message("   Error when adding the field"+tag, verbose=1, stream=sys.stderr)
             else:
                 if tag in CFG_BIBUPLOAD_CONTROLFIELD_TAGS:
                     if tag == '001':
                         pass
                     else:
                         # if it is a controlfield,just access the value
                         for single_tuple in record[tag]:
                             controlfield_value = single_tuple[3]
                             # add the field to the old record
                             newfield_number = record_add_field(rec_old, tag, "", "", controlfield_value)
                             if newfield_number == None:
                                 write_message("   Error when adding the field"+tag, verbose=1, stream=sys.stderr)
                 else:
                     # For each tag there is a list of tuples representing datafields
                     for single_tuple in record[tag]: 
                         # We retrieve the information of the tag
                         subfield_list = single_tuple[0]
                         ind1 = single_tuple[1]
                         ind2 = single_tuple[2]
                         # We add the datafield to the old record
                         if options['verbose'] == 9:
                             print "      Adding tag: ", tag, " ind1=", ind1, " ind2=", ind2, " code=", subfield_list
                         newfield_number = record_add_field(rec_old, tag, ind1, ind2, "", subfield_list)
                         if newfield_number == None:
                             write_message("   Error when adding the field"+tag, verbose=1, stream=sys.stderr)
     return rec_old
 
 def update_fft_tag(record, rec_id):
     """Process and Update FFT tags"""
     
     #TO IMPROVE: SELECT THE BIBDOC ID TO DELETE AND FIRST INSERT THE NEW FFT TAGS BEFORE DELETING THE OLD ONE
     # We delete the bibdoc corresponding to this record
     delete_bibdoc(rec_id)
     
     # We delete the links between bibrec and bibdoc
     delete_bibrec_bibdoc(rec_id)
     
     # We delete the tag 856 from the record
     record_delete_field(record, '856', '4')
 
     # We add the new fft tags
     record = insert_fft_tags(record, rec_id)
     
     return record
     
     
 ###Delete function
 def delete_tags_to_correct(record, rec_old):
     """Delete the tags which are existing in both records"""
     # Browse through all the tags from the marc file
     for tag in record.keys():
         #Do we have to delete only a special tag or all?
         if options['tag'] == None:
             # See if these tags exist in the old record
             if rec_old.has_key(tag) and tag != '001':
                 # Delete the tag found
                 write_message("      Delete tag: "+tag+" ind1= "+rec_old[tag][0][1]+" ind2= "+rec_old[tag][0][2], verbose=9)
                 record_delete_field(rec_old, tag, rec_old[tag][0][1], rec_old[tag][0][2])
         else:
             if rec_old.has_key(tag) and options['tag'] == tag:
                 # Delete the tag found
                 write_message("      Delete tag: "+tag+" ind1= "+rec_old[tag][0][1]+" ind2= "+rec_old[tag][0][2], verbose=9)
                 record_delete_field(rec_old, tag, rec_old[tag][0][1], rec_old[tag][0][2])
 
 def delete_bibrec_bibxxx(record, id_bibrec):
     """Delete the database record from the table bibxxx given in parameters"""
     # we clear all the rows from bibrec_bibxxx from the old record 
     for tag in record.keys():
         if tag not in CFG_BIBUPLOAD_SPECIAL_TAGS:        
             # for each name construct the bibrec_bibxxx table name
             table_name = 'bibrec_bib'+tag[0:2]+'x'
             # delete all the records with proper id_bibrec
             query = """DELETE FROM `%s` where id_bibrec = %s"""
             params = (table_name, id_bibrec)
             try:
                 run_sql(query % params)
             except Error, error:
                 write_message("   Error during the delete_bibrec_bibxxx function : %s " % error, verbose=1, stream=sys.stderr) 
         
 def delete_bibdoc(id_bibrec):
     """Delete document from bibdoc which correspond to the bibrec id given in parameter"""
     query = """UPDATE bibdoc SET status='deleted' WHERE id IN (SELECT id_bibdoc FROM bibrec_bibdoc WHERE id_bibrec=%s)"""
     params = (id_bibrec,)
     try:
         run_sql(query, params)
     except Error, error:
         write_message("   Error during the delete_bibdoc function : %s " % error, verbose=1, stream=sys.stderr) 
 
 def delete_bibrec_bibdoc(id_bibrec):
     """Delete the bibrec record from the table bibrec_bibdoc given in parameter"""
     # delete all the records with proper id_bibrec
     query = """DELETE FROM bibrec_bibdoc WHERE id_bibrec=%s"""
     params = (id_bibrec,)
     try:
         run_sql(query, params)
     except Error, error:
         write_message("   Error during the delete_bibrec_bibdoc function : %s " % error, verbose=1, stream=sys.stderr) 
 
 def main():
     """main entry point for bibupload"""
     global options
     ## parse command line:
     if len(sys.argv) == 2 and sys.argv[1].isdigit():
         ## A - run the task
         task_id = int(sys.argv[1])
         try:
             if not task_run(task_id):
                 write_message("Error occurred.  Exiting.", sys.stderr)
         except StandardError, erro:
             write_message("Unexpected error occurred: %s." % erro, sys.stderr)
             write_message("Traceback is:", sys.stderr)
             traceback.print_tb(sys.exc_info()[2])
             write_message("Exiting.", sys.stderr)
             task_update_status("ERROR")                                   
     else:
         ## B - submit the task
         # set default values:
         options["runtime"] = time.strftime("%Y-%m-%d %H:%M:%S") 
         options["sleeptime"] = ""
         # set user-defined options:
         error = parse_command()
         if error == 0:
             task_submit()
         else:
             sys.exit(1)
     return
 
 if __name__ == "__main__":
     main()
diff --git a/modules/websearch/lib/websearch_webcoll.py b/modules/websearch/lib/websearch_webcoll.py
index 74789b498..e0dc5d388 100644
--- a/modules/websearch/lib/websearch_webcoll.py
+++ b/modules/websearch/lib/websearch_webcoll.py
@@ -1,1087 +1,1088 @@
 ## $Id$
 ##
 ## This file is part of CDS Invenio.
 ## Copyright (C) 2002, 2003, 2004, 2005, 2006 CERN.
 ##
 ## CDS Invenio is free software; you can redistribute it and/or
 ## modify it under the terms of the GNU General Public License as
 ## published by the Free Software Foundation; either version 2 of the
 ## License, or (at your option) any later version.
 ##
 ## CDS Invenio is distributed in the hope that it will be useful, but
 ## WITHOUT ANY WARRANTY; without even the implied warranty of
 ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 ## General Public License for more details.
 ##
 ## You should have received a copy of the GNU General Public License
 ## along with CDS Invenio; if not, write to the Free Software Foundation, Inc.,
 ## 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA.
 
 """Creates CDS Invenio collection specific pages, using WML and MySQL configuration tables."""
 
 __revision__ = "$Id$"
 
 import calendar
 import copy
 import getopt
 import getpass
 import marshal
 import signal
 import sys
 import cgi
 import sre
 import os
 import string
 import zlib
 import Numeric
 import time
 import traceback
 
 from invenio.config import \
      CFG_CERN_SITE, \
      CFG_WEBSEARCH_INSTANT_BROWSE, \
      CFG_WEBSEARCH_NARROW_SEARCH_SHOW_GRANDSONS, \
      cachedir, \
      cdslang, \
      cdsname, \
      weburl
 from invenio.messages import gettext_set_language, language_list_long
 from invenio.search_engine import HitSet, search_pattern, get_creation_date, get_field_i18nname
 from invenio.dbquery import run_sql, escape_string, Error, get_table_update_time
 from invenio.access_control_engine import acc_authorize_action
 from invenio.bibrank_record_sorter import get_bibrank_methods
 from invenio.dateutils import convert_datestruct_to_dategui
 from invenio.websearch_external_collections import \
      external_collection_load_states, \
      dico_collection_external_searches, \
      external_collection_sort_engine_by_name 
 import invenio.template
 websearch_templates = invenio.template.load('websearch')
 
 ## global vars
 collection_house = {} # will hold collections we treat in this run of the program; a dict of {collname2, collobject1}, ...
 options = {} # will hold task options
 
 # cfg_cache_last_updated_timestamp_tolerance -- cache timestamp
 # tolerance (in seconds), to account for the fact that an admin might
 # accidentally happen to edit the collection definitions at exactly
 # the same second when some webcoll process was about to be started.
 # In order to be safe, let's put an exaggerated timestamp tolerance
 # value such as 20 seconds:
 cfg_cache_last_updated_timestamp_tolerance = 20
 
 # cfg_cache_last_updated_timestamp_file -- location of the cache
 # timestamp file:
 cfg_cache_last_updated_timestamp_file = "%s/collections/last_updated" % cachedir
 
 def get_collection(colname):
     """Return collection object from the collection house for given colname.
        If does not exist, then create it."""
     if not collection_house.has_key(colname):
         colobject = Collection(colname)
         collection_house[colname] = colobject
     return collection_house[colname]
 
 ## auxiliary functions:
 def mymkdir(newdir, mode=0777):
     """works the way a good mkdir should :)
         - already exists, silently complete
         - regular file in the way, raise an exception
         - parent directory(ies) does not exist, make them as well
     """
     if os.path.isdir(newdir):
         pass
     elif os.path.isfile(newdir):
         raise OSError("a file with the same name as the desired " \
                       "dir, '%s', already exists." % newdir)
     else:
         head, tail = os.path.split(newdir)
         if head and not os.path.isdir(head):
             mymkdir(head, mode)
         if tail:
             os.umask(022)
             os.mkdir(newdir, mode)
 
 def is_selected(var, fld):
     "Checks if the two are equal, and if yes, returns ' selected'.  Useful for select boxes."
     if var == fld:
         return " selected"
     else:
         return ""
 
 def write_message(msg, stream=sys.stdout):
     """Write message and flush output stream (may be sys.stdout or sys.stderr).  Useful for debugging stuff."""
     if stream == sys.stdout or stream == sys.stderr:
         stream.write(time.strftime("%Y-%m-%d %H:%M:%S --> ", time.localtime()))
         stream.write("%s\n" % msg)
         stream.flush()
     else:
         sys.stderr.write("Unknown stream %s.  [must be sys.stdout or sys.stderr]\n" % stream)
     return
 
 def get_field(recID, tag):
     "Gets list of field 'tag' for the record with 'recID' system number."
 
     out = []
     digit = tag[0:2]
 
     bx = "bib%sx" % digit
     bibx = "bibrec_bib%sx" % digit
     query = "SELECT bx.value FROM %s AS bx, %s AS bibx WHERE bibx.id_bibrec='%s' AND bx.id=bibx.id_bibxxx AND bx.tag='%s'" \
             % (bx, bibx, recID, tag)
     res = run_sql(query)
     for row in res:
         out.append(row[0])
     return out
 
 def print_record(recID, format='hb', ln=cdslang):
     "Prints record 'recID' formatted accoding to 'format'."
 
     out = ""
     # HTML brief format by default
     query = "SELECT value FROM bibfmt WHERE id_bibrec='%s' AND format='%s'" % (recID, format)
     res = run_sql(query, None, 1)
     if res:
         # record 'recID' is formatted in 'format', so print it
         out += "%s" % zlib.decompress(res[0][0])
     else:
         # record 'recID' does not exist in format 'format', so print some default format:
         # firstly, title:
         titles = get_field(recID, "245__a")
         # secondly, authors:
         authors = get_field(recID, "100__a") + get_field(recID, "700__a")
         # thirdly, date of creation:
         dates = get_field(recID, "260__c")
         # thirdly bis, report numbers:
         rns = get_field(recID, "037__a") + get_field(recID, "088__a")
         # fourthly, beginning of abstract:
         abstracts = get_field(recID, "520__a")
         # fifthly, fulltext link:
         urls_z = get_field(recID, "8564_z")
         urls_u = get_field(recID, "8564_u")
         out += websearch_templates.tmpl_record_body(
                      weburl = weburl,
                      titles = titles,
                      authors = authors,
                      dates = dates,
                      rns = rns,
                      abstracts = abstracts,
                      urls_u = urls_u,
                      urls_z = urls_z,
                      ln=ln)
 
     # at the end of HTML mode, print "Detailed record" and "Mark record" functions:
     out += websearch_templates.tmpl_record_links(
                                     weburl = weburl,
                                     recid = recID,
                                     ln = ln
                                    )
     return out
 
 class Collection:
     "Holds the information on collections (id,name,dbquery)."
 
     def __init__(self, name=""):
         "Creates collection instance by querying the DB configuration database about 'name'."
         self.calculate_reclist_run_already = 0 # to speed things up wihtout much refactoring
         self.update_reclist_run_already = 0 # to speed things up wihtout much refactoring
         self.reclist_with_nonpublic_subcolls = HitSet()
         if not name:
             self.name = cdsname # by default we are working on the home page
             self.id = 1
             self.dbquery = None
             self.nbrecs = None
             self.reclist = HitSet()
         else:
             self.name = name
             query = "SELECT id,name,dbquery,nbrecs,reclist FROM collection WHERE name='%s'" % escape_string(name)
             try:
                 res = run_sql(query, None, 1)
                 if res:
                     self.id = res[0][0]
                     self.name = res[0][1]
                     self.dbquery = res[0][2]
                     self.nbrecs = res[0][3]
                     try:
                         self.reclist = HitSet(Numeric.loads(zlib.decompress(res[0][5])))
                     except:
                         self.reclist = HitSet()
                 else: # collection does not exist!
                     self.id = None
                     self.dbquery = None
                     self.nbrecs = None
                     self.reclist = HitSet()
             except Error, e:
                 print "Error %d: %s" % (e.args[0], e.args[1])
                 sys.exit(1)
 
     def get_name(self, ln=cdslang, name_type="ln", prolog="", epilog="", prolog_suffix=" ", epilog_suffix=""):
         """Return nicely formatted collection name for language LN.
         The NAME_TYPE may be 'ln' (=long name), 'sn' (=short name), etc."""
         out = prolog
         i18name = ""
         res = run_sql("SELECT value FROM collectionname WHERE id_collection=%s AND ln=%s AND type=%s", (self.id, ln, name_type))
         try:
             i18name += res[0][0]
         except IndexError:
             pass
         if i18name:
             out += i18name
         else:
             out += self.name
         out += epilog
         return out
 
     def get_ancestors(self):
         "Returns list of ancestors of the current collection."
         ancestors = []
         id_son = self.id
         while 1:
             query = "SELECT cc.id_dad,c.name FROM collection_collection AS cc, collection AS c "\
                     "WHERE cc.id_son=%d AND c.id=cc.id_dad" % int(id_son)
             res = run_sql(query, None, 1)
             if res:
                 col_ancestor = get_collection(res[0][1])
                 ancestors.append(col_ancestor)
                 id_son = res[0][0]
             else:
                 break
         ancestors.reverse()
         return ancestors
 
     def restricted_p(self):
         """Predicate to test if the collection is restricted or not.  Return the contect of the
          `restrited' column of the collection table (typically Apache group).  Otherwise return
          None if the collection is public."""
         out = None
         query = "SELECT restricted FROM collection WHERE id=%d" % self.id
         res = run_sql(query, None, 1)
         try:
             out = res[0][0]
         except:
             pass
         return out
 
     def get_sons(self, type='r'):
         "Returns list of direct sons of type 'type' for the current collection."
         sons = []
         id_dad = self.id
         query = "SELECT cc.id_son,c.name FROM collection_collection AS cc, collection AS c "\
                 "WHERE cc.id_dad=%d AND cc.type='%s' AND c.id=cc.id_son ORDER BY score DESC, c.name ASC" % (int(id_dad), type)
         res = run_sql(query)
         for row in res:
             sons.append(get_collection(row[1]))
         return sons
 
     def get_descendants(self, type='r'):
         "Returns list of all descendants of type 'type' for the current collection."
         descendants = []
         id_dad = self.id
         query = "SELECT cc.id_son,c.name FROM collection_collection AS cc, collection AS c "\
                 "WHERE cc.id_dad=%d AND cc.type='%s' AND c.id=cc.id_son ORDER BY score DESC" % (int(id_dad), type)
         res = run_sql(query)
         for row in res:
             col_desc = get_collection(row[1])
             descendants.append(col_desc)
             descendants += col_desc.get_descendants()
         return descendants
 
     def write_cache_file(self, filename='', filebody=''):
         "Write a file inside collection cache."
         # open file:
         dirname = "%s/collections/%d" % (cachedir, self.id)
         mymkdir(dirname)
         fullfilename = dirname + "/%s.html" % filename
         try:
             os.umask(022)
             f = open(fullfilename, "w")
         except IOError, v:
             try:
                 (code, message) = v
             except:
                 code = 0
                 message = v
             print "I/O Error: " + str(message) + " (" + str(code) + ")"
             sys.exit(1)
         # print user info:
         if options["verbose"] >= 6:
             write_message("... creating %s" % fullfilename)
         sys.stdout.flush()
         # print page body:
         f.write(filebody)
         # close file:
         f.close()
 
     def update_webpage_cache(self):
         """Create collection page header, navtrail, body (including left and right stripes) and footer, and
            call write_cache_file() afterwards to update the collection webpage cache."""
         ## do this for each language:
         for lang, lang_fullname in language_list_long():
 
             # load the right message language
             _ = gettext_set_language(lang)
 
             ## first, update navtrail:
             for as in range(0, 2):
                 self.write_cache_file("navtrail-as=%s-ln=%s" % (as, lang), self.create_navtrail_links(as, lang))
             ## second, update page body:
             for as in range(0, 2): # do both simple search and advanced search pages:
                 body = websearch_templates.tmpl_webcoll_body(
                          ln=lang, collection=self.name,
                          te_portalbox = self.create_portalbox(lang, 'te'),
                          searchfor = self.create_searchfor(as, lang),
                          np_portalbox = self.create_portalbox(lang, 'np'),
                          narrowsearch = self.create_narrowsearch(as, lang, 'r'),
                          focuson = self.create_narrowsearch(as, lang, "v") + self.create_external_collections_box(),
                          instantbrowse = self.create_instant_browse(as=as, ln=lang),
                          ne_portalbox = self.create_portalbox(lang, 'ne')
                        )
                 self.write_cache_file("body-as=%s-ln=%s" % (as, lang), body)
             ## third, write portalboxes:
             self.write_cache_file("portalbox-tp-ln=%s" % lang, self.create_portalbox(lang, "tp"))
             self.write_cache_file("portalbox-te-ln=%s" % lang, self.create_portalbox(lang, "te"))
             self.write_cache_file("portalbox-lt-ln=%s" % lang, self.create_portalbox(lang, "lt"))
             self.write_cache_file("portalbox-rt-ln=%s" % lang, self.create_portalbox(lang, "rt"))
             ## fourth, write 'last updated' information:
             self.write_cache_file("last-updated-ln=%s" % lang,
                                   convert_datestruct_to_dategui(time.localtime(),
                                                                 ln=lang))
         return
 
     def create_navtrail_links(self, as=0, ln=cdslang):
         """Creates navigation trail links, i.e. links to collection
         ancestors (except Home collection).  If as==1, then links to
         Advanced Search interfaces; otherwise Simple Search.
         """
         
         dads = []
         for dad in self.get_ancestors():
             if dad.name != cdsname: # exclude Home collection
                 dads.append((dad.name, dad.get_name(ln)))
 
         return websearch_templates.tmpl_navtrail_links(
             as=as, ln=ln, dads=dads)
 
 
     def create_portalbox(self, lang=cdslang, position="rt"):
         """Creates portalboxes of language CDSLANG of the position POSITION by consulting DB configuration database.
            The position may be: 'lt'='left top', 'rt'='right top', etc."""
         out = ""
         query = "SELECT p.title,p.body FROM portalbox AS p, collection_portalbox AS cp "\
                 " WHERE cp.id_collection=%d AND p.id=cp.id_portalbox AND cp.ln='%s' AND cp.position='%s' "\
                 " ORDER BY cp.score DESC" % (self.id, lang, position)
         res = run_sql(query)
         for row in res:
             title, body = row[0], row[1]
             if title:
                 out += websearch_templates.tmpl_portalbox(title = title,
                                              body = body)
             else:
                 # no title specified, so print body ``as is'' only:
                 out += body
         return out
 
     def create_narrowsearch(self, as=0, ln=cdslang, type="r"):
         """Creates list of collection descendants of type 'type' under title 'title'.
         If as==1, then links to Advanced Search interfaces; otherwise Simple Search.
         Suitable for 'Narrow search' and 'Focus on' boxes."""
 
         # get list of sons and analyse it
         sons = self.get_sons(type)
 
         if not sons:
             return ''
         
         # get descendents
         descendants = self.get_descendants(type)
 
         grandsons = []
         if CFG_WEBSEARCH_NARROW_SEARCH_SHOW_GRANDSONS:
             # load grandsons for each son
             for son in sons:
                 grandsons.append(son.get_sons())
 
         # return ""
         return websearch_templates.tmpl_narrowsearch(
                  as = as,
                  ln = ln,
                  type = type,
                  father = self,
                  has_grandchildren = len(descendants)>len(sons),
                  sons = sons,
                  display_grandsons = CFG_WEBSEARCH_NARROW_SEARCH_SHOW_GRANDSONS,
                  grandsons = grandsons
                )
 
     def create_external_collections_box(self, ln=cdslang):
         external_collection_load_states()
         if not dico_collection_external_searches.has_key(self.id):
             return ""
 
         engines_list = external_collection_sort_engine_by_name(dico_collection_external_searches[self.id])
 
         return websearch_templates.tmpl_searchalso(ln, engines_list, self.id)
 
     def create_instant_browse(self, rg=CFG_WEBSEARCH_INSTANT_BROWSE, as=0, ln=cdslang):
         "Searches database and produces list of last 'rg' records."
         
         if self.restricted_p():
             return websearch_templates.tmpl_box_restricted_content(ln = ln)
         
         else:
             if self.nbrecs and self.reclist:
                 # firstly, get last 'rg' records:
                 recIDs = Numeric.nonzero(self.reclist._set)
                 passIDs = []
 
                 total = len(recIDs)
                 to_display = min(rg, total)
                 
                 for idx in range(total-1, total-to_display-1, -1):
                     recid = recIDs[idx]
 
                     passIDs.append({'id': recid,
                                     'body': print_record(recid, ln=ln),
                                     'date': get_creation_date(recid, fmt="%Y-%m-%d<br>%H:%i")})
                     
                 if self.nbrecs > rg:
                     url = websearch_templates.build_search_url(
                         cc=self.name, jrec=rg+1, ln=ln, as=as)
                 else:
                     url = ""
                     
                 return websearch_templates.tmpl_instant_browse(
                                  as=as, ln=ln, recids=passIDs, more_link=url)
 
         return websearch_templates.tmpl_box_no_records(ln=ln)
 
     def create_searchoptions(self):
         "Produces 'Search options' portal box."
         box = ""
         query = """SELECT DISTINCT(cff.id_field),f.code,f.name FROM collection_field_fieldvalue AS cff, field AS f
                    WHERE cff.id_collection=%d AND cff.id_fieldvalue IS NOT NULL AND cff.id_field=f.id
                    ORDER BY cff.score DESC""" % self.id
         res = run_sql(query)
         if res:
             for row in res:
                 field_id = row[0]
                 field_code = row[1]
                 field_name = row[2]
                 query_bis = """SELECT fv.value,fv.name FROM fieldvalue AS fv, collection_field_fieldvalue AS cff
                                WHERE cff.id_collection=%d AND cff.type='seo' AND cff.id_field=%d AND fv.id=cff.id_fieldvalue
                                ORDER BY cff.score_fieldvalue DESC, cff.score DESC, fv.name ASC""" % (self.id, field_id)
                 res_bis = run_sql(query_bis)
                 if res_bis:
                     values = [{'value' : '', 'text' : 'any' + field_name}] # FIXME: internationalisation of "any"
                     for row_bis in res_bis:
                         values.append({'value' : cgi.escape(row_bis[0], 1), 'text' : row_bis[1]})
 
                     box += websearch_templates.tmpl_select(
                                  fieldname = field_code,
                                  values = values
                                 )
         return box
 
     def create_sortoptions(self, ln=cdslang):
         "Produces 'Sort options' portal box."
 
         # load the right message language
         _ = gettext_set_language(ln)
 
         box = ""
         query = """SELECT f.code,f.name FROM field AS f, collection_field_fieldvalue AS cff
                    WHERE id_collection=%d AND cff.type='soo' AND cff.id_field=f.id
                    ORDER BY cff.score DESC, f.name ASC""" % self.id
         values = [{'value' : '', 'text': "- %s -" % _("latest first")}]
         res = run_sql(query)
         if res:
             for row in res:
                 values.append({'value' : row[0], 'text': row[1]})
         else:
             for tmp in ('title', 'author', 'report number', 'year'):
                 values.append({'value' : tmp.replace(' ', ''), 'text' : get_field_i18nname(tmp, ln)})
 
         box = websearch_templates.tmpl_select(
                    fieldname = 'sf',
                    css_class = 'address',
                    values = values
                   )
         box += websearch_templates.tmpl_select(
                     fieldname = 'so',
                     css_class = 'address',
                     values = [
                               {'value' : 'a' , 'text' : _("asc.")},
                               {'value' : 'd' , 'text' : _("desc.")}
                              ]
                    )
         return box
 
     def create_rankoptions(self, ln=cdslang):
         "Produces 'Rank options' portal box."
 
         # load the right message language
         _ = gettext_set_language(ln)
 
         values = [{'value' : '', 'text': "- %s %s -" % (string.lower(_("OR")), _("rank by"))}]
         for (code, name) in get_bibrank_methods(self.id, ln):
             values.append({'value' : code, 'text': name})
         box = websearch_templates.tmpl_select(
                    fieldname = 'sf',
                    css_class = 'address',
                    values = values
                   )
         return box
 
     def create_displayoptions(self, ln=cdslang):
         "Produces 'Display options' portal box."
 
         # load the right message language
         _ = gettext_set_language(ln)
 
         values = []
         for i in ['10', '25', '50', '100', '250', '500']:
             values.append({'value' : i, 'text' : i + ' ' + _("results")})
 
         box = websearch_templates.tmpl_select(
                    fieldname = 'rg',
                    css_class = 'address',
                    values = values
                   )
 
         if self.get_sons():
             box += websearch_templates.tmpl_select(
                         fieldname = 'sc',
                         css_class = 'address',
                         values = [
                                   {'value' : '1' , 'text' : _("split by collection")},
                                   {'value' : '0' , 'text' : _("single list")}
                                  ]
                        )
         return box
 
     def create_formatoptions(self, ln=cdslang):
         "Produces 'Output format options' portal box."
 
         # load the right message language
         _ = gettext_set_language(ln)
 
         box = ""
         values = []
         query = """SELECT f.code,f.name FROM format AS f, collection_format AS cf
                    WHERE cf.id_collection=%d AND cf.id_format=f.id ORDER BY cf.score DESC, f.name ASC"""  % self.id
         res = run_sql(query)
         if res:
             for row in res:
                 values.append({'value' : row[0], 'text': row[1]})
         else:
             values.append({'value' : 'hb', 'text' : "HTML %s" % _("brief")})
         box = websearch_templates.tmpl_select(
                    fieldname = 'of',
                    css_class = 'address',
                    values = values
                   )
         return box
 
     def create_searchwithin_selection_box(self, fieldname='f', value='', ln='en'):
         "Produces 'search within' selection box for the current collection."
 
         # get values
         query = """SELECT f.code,f.name FROM field AS f, collection_field_fieldvalue AS cff
                    WHERE cff.type='sew' AND cff.id_collection=%d AND cff.id_field=f.id
                    ORDER BY cff.score DESC, f.name ASC"""  % self.id
         res = run_sql(query)
         values = [{'value' : '', 'text' : get_field_i18nname("any field", ln)}]
         if res:
             for row in res:
                 values.append({'value' : row[0], 'text' : row[1]})
         else:
             if CFG_CERN_SITE:
                 for tmp in ['title', 'author', 'abstract', 'report number', 'year']:
                     values.append({'value' : tmp.replace(' ', ''), 'text' : get_field_i18nname(tmp, ln)})
             else:
                 for tmp in ['title', 'author', 'abstract', 'keyword', 'report number', 'year', 'fulltext', 'reference']:
                     values.append({'value' : tmp.replace(' ', ''), 'text' : get_field_i18nname(tmp, ln)})
 
         return websearch_templates.tmpl_searchwithin_select(
                                                 fieldname = fieldname,
                                                 ln = ln,
                                                 selected = value,
                                                 values = values
                                               )
     def create_searchexample(self):
         "Produces search example(s) for the current collection."
         out = "$collSearchExamples = getSearchExample(%d, $se);" % self.id
         return out
 
     def create_searchfor(self, as=0, ln=cdslang):
         "Produces either Simple or Advanced 'Search for' box for the current collection."
         if as == 1:
             return self.create_searchfor_advanced(ln)
         else:
             return self.create_searchfor_simple(ln)
 
     def create_searchfor_simple(self, ln=cdslang):
         "Produces simple 'Search for' box for the current collection."
 
         return websearch_templates.tmpl_searchfor_simple(
           ln=ln,
           collection_id = self.name,
           collection_name=self.get_name(ln=ln),
           record_count=self.nbrecs,
           middle_option = self.create_searchwithin_selection_box(ln=ln),
         )
 
     def create_searchfor_advanced(self, ln=cdslang):
         "Produces advanced 'Search for' box for the current collection."
 
         return websearch_templates.tmpl_searchfor_advanced(
           ln = ln,
           collection_id = self.name,
           collection_name=self.get_name(ln=ln),
           record_count=self.nbrecs,
 
           middle_option_1 = self.create_searchwithin_selection_box('f1', ln=ln),
           middle_option_2 = self.create_searchwithin_selection_box('f2', ln=ln),
           middle_option_3 = self.create_searchwithin_selection_box('f3', ln=ln),
 
           searchoptions = self.create_searchoptions(),
           sortoptions = self.create_sortoptions(ln),
           rankoptions = self.create_rankoptions(ln),
           displayoptions = self.create_displayoptions(ln),
           formatoptions = self.create_formatoptions(ln)
         )
 
     def calculate_reclist(self):
         """Calculate, set and return the (reclist, reclist_with_nonpublic_subcolls) tuple for given collection."""
         if self.calculate_reclist_run_already:
             # do we have to recalculate?
             return (self.reclist, self.reclist_with_nonpublic_subcolls)
         if options["verbose"] >= 6:
             write_message("... calculating reclist of %s" % self.name)
         reclist = HitSet() # will hold results for public sons only; good for storing into DB
         reclist_with_nonpublic_subcolls = HitSet() # will hold results for both public and nonpublic sons; good for deducing total
                                                    # number of documents
         if not self.dbquery:
             # A - collection does not have dbquery, so query recursively all its sons
             #     that are either non-restricted or that have the same restriction rules
             for coll in self.get_sons():
                 coll_reclist, coll_reclist_with_nonpublic_subcolls = coll.calculate_reclist()
                 if ((coll.restricted_p() is None) or
                     (coll.restricted_p() == self.restricted_p())):
                     # add this reclist ``for real'' only if it is public
                     reclist.union(coll_reclist)
                 reclist_with_nonpublic_subcolls.union(coll_reclist_with_nonpublic_subcolls)
         else:
             # B - collection does have dbquery, so compute it:
             reclist = search_pattern(None, self.dbquery)
             reclist_with_nonpublic_subcolls = copy.deepcopy(reclist)
         # deduce the number of records:
         reclist.calculate_nbhits()
         reclist_with_nonpublic_subcolls.calculate_nbhits()
         # store the results:
         self.nbrecs = reclist_with_nonpublic_subcolls._nbhits
         self.reclist = reclist
         self.reclist_with_nonpublic_subcolls = reclist_with_nonpublic_subcolls
         # last but not least, update the speed-up flag:
         self.calculate_reclist_run_already = 1
         # return the two sets:
         return (self.reclist, self.reclist_with_nonpublic_subcolls)
 
     def update_reclist(self):
         "Update the record universe for given collection; nbrecs, reclist of the collection table."
         if self.update_reclist_run_already:
             # do we have to reupdate?
             return 0
         if options["verbose"] >= 6:
             write_message("... updating reclist of %s (%s recs)" % (self.name, self.nbrecs))
         sys.stdout.flush()
         try:
             query = "UPDATE collection SET nbrecs=%d, reclist='%s' WHERE id=%d" % \
                     (self.nbrecs, escape_string(zlib.compress(Numeric.dumps(self.reclist._set))), self.id)
             run_sql(query)
             self.reclist_updated_since_start = 1
         except Error, e:
             print "Database Query Error %d: %s." % (e.args[0], e.args[1])
             sys.exit(1)
         # last but not least, update the speed-up flag:
         self.update_reclist_run_already = 1
         return 0
 
 def get_datetime(var, format_string="%Y-%m-%d %H:%M:%S"):
     """Returns a date string according to the format string.
        It can handle normal date strings and shifts with respect
        to now."""
     date = time.time()
     shift_re = sre.compile("([-\+]{0,1})([\d]+)([dhms])")
     factors = {"d":24*3600, "h":3600, "m":60, "s":1}
     m = shift_re.match(var)
     if m:
         sign = m.groups()[0] == "-" and -1 or 1
         factor = factors[m.groups()[2]]
         value = float(m.groups()[1])
         date = time.localtime(date + sign * factor * value)
         date = time.strftime(format_string, date)
     else:
         date = time.strptime(var, format_string)
         date = time.strftime(format_string, date)
     return date
 
 def get_current_time_timestamp():
     """Return timestamp corresponding to the current time."""
     return time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())
     
 def compare_timestamps_with_tolerance(timestamp1,
                                       timestamp2,
                                       tolerance=0):
     """Compare two timestamps TIMESTAMP1 and TIMESTAMP2, of the form
        '2005-03-31 17:37:26'. Optionally receives a TOLERANCE argument
        (in seconds).  Return -1 if TIMESTAMP1 is less than TIMESTAMP2
        minus TOLERANCE, 0 if they are equal within TOLERANCE limit,
        and 1 if TIMESTAMP1 is greater than TIMESTAMP2 plus TOLERANCE.
     """
     # remove any trailing .00 in timestamps:
     timestamp1 = sre.sub(r'\.[0-9]+$', '', timestamp1)
     timestamp2 = sre.sub(r'\.[0-9]+$', '', timestamp2)
     # first convert timestamps to Unix epoch seconds:
     timestamp1_seconds = calendar.timegm(time.strptime(timestamp1, "%Y-%m-%d %H:%M:%S"))
     timestamp2_seconds = calendar.timegm(time.strptime(timestamp2, "%Y-%m-%d %H:%M:%S"))
     # now compare them:
     if timestamp1_seconds < timestamp2_seconds - tolerance:
         return -1
     elif timestamp1_seconds > timestamp2_seconds + tolerance:
         return 1
     else:
         return 0
 
 def get_database_last_updated_timestamp():
     """Return last updated timestamp for collection-related and
        record-related database tables.
     """
     database_tables_timestamps = []
     database_tables_timestamps.append(get_table_update_time('bibrec'))
     database_tables_timestamps.append(get_table_update_time('bibfmt'))
     database_tables_timestamps.append(get_table_update_time('idxWORD%'))
     database_tables_timestamps.append(get_table_update_time('collection%'))
     database_tables_timestamps.append(get_table_update_time('portalbox'))
     database_tables_timestamps.append(get_table_update_time('field%'))
     database_tables_timestamps.append(get_table_update_time('format%'))
     database_tables_timestamps.append(get_table_update_time('rnkMETHODNAME'))
     return max(database_tables_timestamps)
 
 def get_cache_last_updated_timestamp():
     """Return last updated cache timestamp."""
     try:
         f = open(cfg_cache_last_updated_timestamp_file, "r")
     except:
         return "1970-01-01 00:00:00"
     timestamp = f.read()
     f.close()
     return timestamp
 
 def set_cache_last_updated_timestamp(timestamp):
     """Set last updated cache timestamp to TIMESTAMP."""
     try:
         f = open(cfg_cache_last_updated_timestamp_file, "w")
     except:
         pass
     f.write(timestamp)
     f.close()
     return timestamp
 
 def task_sig_sleep(sig, frame):
     """Signal handler for the 'sleep' signal sent by BibSched."""
     if options["verbose"] >= 9:
         write_message("task_sig_sleep(), got signal %s frame %s" % (sig, frame))
     write_message("sleeping...")
     task_update_status("SLEEPING")
     signal.pause() # wait for wake-up signal
 
 def task_sig_wakeup(sig, frame):
     """Signal handler for the 'wakeup' signal sent by BibSched."""
     if options["verbose"] >= 9:
         write_message("task_sig_wakeup(), got signal %s frame %s" % (sig, frame))
     write_message("continuing...")
     task_update_status("CONTINUING")
 
 def task_sig_stop(sig, frame):
     """Signal handler for the 'stop' signal sent by BibSched."""
     if options["verbose"] >= 9:
         write_message("task_sig_stop(), got signal %s frame %s" % (sig, frame))
     write_message("stopping...")
     task_update_status("STOPPING")
     pass # FIXME: is there anything to be done?
     task_update_status("STOPPED")
     sys.exit(0)
 
 def task_sig_suicide(sig, frame):
     """Signal handler for the 'suicide' signal sent by BibSched."""
     if options["verbose"] >= 9:
         write_message("task_sig_suicide(), got signal %s frame %s" % (sig, frame))
     write_message("suiciding myself now...")
     task_update_status("SUICIDING")
     write_message("suicided")
     task_update_status("SUICIDED")
     sys.exit(0)
 
 def task_sig_unknown(sig, frame):
     """Signal handler for the other unknown signals sent by shell or user."""
     # do nothing for unknown signals:
     write_message("unknown signal %d (frame %s) ignored" % (sig, frame)) 
 
 def authenticate(user, header="WebColl Task Submission", action="runwebcoll"):
     """Authenticate the user against the user database.
        Check for its password, if it exists.
        Check for action access rights.
        Return user name upon authorization success,
        do system exit upon authorization failure.
        """
     print header
     print "=" * len(header)
     if user == "":
         print >> sys.stdout, "\rUsername: ",
         user = string.strip(string.lower(sys.stdin.readline()))
     else:
         print >> sys.stdout, "\rUsername:", user
     ## first check user pw:
-    res = run_sql("select id,password from user where email=%s or nickname=%s", (user, user,), 1)
+    res = run_sql("select id,password from user where email=%s", (user,), 1) + \
+          run_sql("select id,password from user where nickname=%s", (user,), 1)
     if not res:
         print "Sorry, %s does not exist." % user
         sys.exit(1)
     else:
         (uid_db, password_db) = res[0]
         if password_db:
             password_entered = getpass.getpass()
             if password_db == password_entered:
                 pass
             else:
                 print "Sorry, wrong credentials for %s." % user
                 sys.exit(1)
         ## secondly check authorization for the action:
         (auth_code, auth_message) = acc_authorize_action(uid_db, action)
         if auth_code != 0:
             print auth_message
             sys.exit(1)
     return user
 
 def task_submit():
     """Submits task to the BibSched task queue.  This is what people will be invoking via command line."""
     global options
     ## sanity check: remove eventual "task" option:
     if options.has_key("task"):
         del options["task"]
     ## authenticate user:
     user = authenticate(options.get("user", ""))
     ## submit task:
     if options["verbose"] >= 9:
         print ""
         write_message("storing task options %s\n" % options)
     task_id = run_sql("""INSERT INTO schTASK (id,proc,user,runtime,sleeptime,status,arguments)
                          VALUES (NULL,'webcoll',%s,%s,%s,'WAITING',%s)""",
                       (user, options["runtime"], options["sleeptime"], marshal.dumps(options)))
     ## update task number:
     options["task"] = task_id
     run_sql("""UPDATE schTASK SET arguments=%s WHERE id=%s""", (marshal.dumps(options), task_id))
     write_message("Task #%d submitted." % task_id)
     return task_id
 
 def task_update_progress(msg):
     """Updates progress information in the BibSched task table."""
     global options
     return run_sql("UPDATE schTASK SET progress=%s where id=%s", (msg, options["task"]))
 
 def task_update_status(val):
     """Updates status information in the BibSched task table."""
     global options
     return run_sql("UPDATE schTASK SET status=%s where id=%s", (val, options["task"]))
 
 def task_read_status(task_id):
     """Read status information in the BibSched task table."""
     res = run_sql("SELECT status FROM schTASK where id=%s", (task_id,), 1)
     try:
         out = res[0][0]
     except:
         out = 'UNKNOWN'
     return out
 
 def task_get_options(id):
     """Returns options for the task 'id' read from the BibSched task queue table."""
     out = {}
     res = run_sql("SELECT arguments FROM schTASK WHERE id=%s AND proc='webcoll'", (id,))
     try:
         out = marshal.loads(res[0][0])
     except:
         write_message("Error: WebColl task %d does not seem to exist." % id)
         sys.exit(1)
     return out
 
 def task_run(task_id):
     """Run the WebColl task by fetching arguments from the BibSched task queue.
        This is what BibSched will be invoking via daemon call.
        The task will update collection reclist cache and collection web pages for
        given collection. (default is all).
        Arguments described in usage() function.
        Return 1 in case of success and 0 in case of failure."""
     global options
     task_run_start_timestamp = get_current_time_timestamp()
     options = task_get_options(task_id) # get options from BibSched task table
     ## check task id:
     if not options.has_key("task"):
         write_message("Error: The task #%d does not seem to be a WebColl task." % task_id)
         return 0
     ## check task status:
     task_status = task_read_status(task_id)
     if task_status != "WAITING":
         write_message("Error: The task #%d is %s.  I expected WAITING." % (task_id, task_status))
         return 0
     ## we can run the task now:
     if options["verbose"]:
         write_message("Task #%d started." % task_id)
     task_update_status("RUNNING")
     ## initialize signal handler:
     signal.signal(signal.SIGUSR1, task_sig_sleep)
     signal.signal(signal.SIGTERM, task_sig_stop)
     signal.signal(signal.SIGABRT, task_sig_suicide)
     signal.signal(signal.SIGCONT, task_sig_wakeup)
     signal.signal(signal.SIGINT, task_sig_unknown)
     colls = []
     # decide whether we need to run or not, by comparing last updated timestamps:
     if options["verbose"] >= 3:
         write_message("Database timestamp is %s." % get_database_last_updated_timestamp())
         write_message("Collection cache timestamp is %s." % get_cache_last_updated_timestamp())
     if options.has_key("force") or \
        compare_timestamps_with_tolerance(get_database_last_updated_timestamp(),
                                          get_cache_last_updated_timestamp(),
                                          cfg_cache_last_updated_timestamp_tolerance) >= 0:
         ## either forced update was requested or cache is not up to date, so recreate it:
         # firstly, decide which collections to do:
         if options.has_key("collection"):
             coll = get_collection(options["collection"])
             if coll.id == None:
                 usage(1, 'Collection %s does not exist' % coll.name)
             colls.append(coll)
         else:
             res = run_sql("SELECT name FROM collection ORDER BY id")
             for row in res:
                 colls.append(get_collection(row[0]))
         # secondly, update collection reclist cache:
         i = 0
         for coll in colls:
             i += 1
             if options["verbose"]:
                 write_message("%s / reclist cache update" % coll.name)
             coll.calculate_reclist()
             coll.update_reclist()
             task_update_progress("Part 1/2: done %d/%d" % (i, len(colls)))
         # thirdly, update collection webpage cache:
         i = 0
         for coll in colls:
             i += 1
             if options["verbose"]:
                 write_message("%s / web cache update" % coll.name)
             coll.update_webpage_cache()
             task_update_progress("Part 2/2: done %d/%d" % (i, len(colls)))
 
         # finally update the cache last updated timestamp:
         # (but only when all collections were updated, not when only
         # some of them were forced-updated as per admin's demand)
         if not options.has_key("collection"):
             set_cache_last_updated_timestamp(task_run_start_timestamp)
             if options["verbose"] >= 3:
                 write_message("Collection cache timestamp is set to %s." % get_cache_last_updated_timestamp())
     else:
         ## cache up to date, we don't have to run
         if options["verbose"]:
             write_message("Collection cache is up to date, no need to run.")        
         pass 
     ## we are done:
     task_update_progress("Done.")
     task_update_status("DONE")
     if options["verbose"]:
         write_message("Task #%d finished." % task_id)
     return 1
 
 def usage(exitcode=1, msg=""):
     """Prints usage info."""
     if msg:
         sys.stderr.write("Error: %s.\n" % msg)
     sys.stderr.write("Usage: %s [options]\n" % sys.argv[0])
     sys.stderr.write("Command options:\n")
     sys.stderr.write("  -c, --collection\t Update cache for the given collection only. [all]\n")
     sys.stderr.write("  -f, --force\t Force update even if cache is up to date. [no]\n")
     sys.stderr.write("Scheduling options:\n")
     sys.stderr.write("  -u, --user=USER \t User name to submit the task as, password needed.\n")
     sys.stderr.write("  -t, --runtime=TIME \t Time to execute the task (now), e.g.: +15s, 5m, 3h, 2002-10-27 13:57:26\n")
     sys.stderr.write("  -s, --sleeptime=SLEEP \t Sleeping frequency after which to repeat task (no), e.g.: 30m, 2h, 1d\n")
     sys.stderr.write("General options:\n")
     sys.stderr.write("  -h, --help      \t\t Print this help.\n")
     sys.stderr.write("  -V, --version   \t\t Print version information.\n")
     sys.stderr.write("  -v, --verbose=LEVEL   \t Verbose level (from 0 to 9, default 1).\n")
     sys.stderr.write("""Description: %s updates the collection cache
     (record universe for a given collection plus web page elements)
     based on WML and DB configuration parameters.
     If the collection name is passed as the second argument, it'll update
     this collection only.  If the collection name is immediately followed
     by a plus sign, it will also update all its desdendants.  The
     top-level collection name may be entered as the void string.\n""" % sys.argv[0])
     sys.exit(exitcode)
 
 def main():
     """Main function that analyzes command line input and calls whatever is appropriate.
        Useful for learning on how to write BibSched tasks."""
     global options
 
     ## parse command line:
     if len(sys.argv) == 2 and sys.argv[1].isdigit():
         ## A - run the task
         task_id = int(sys.argv[1])
         try:
             if not task_run(task_id):
                 write_message("Error occurred.  Exiting.", sys.stderr)
         except StandardError, e:
             write_message("Unexpected error occurred: %s." % e, sys.stderr)
             write_message("Traceback is:", sys.stderr)
             traceback.print_tb(sys.exc_info()[2])
             write_message("Exiting.", sys.stderr)
             task_update_status("ERROR")
     else:
         ## B - submit the task
         # set default values:
         options["runtime"] = time.strftime("%Y-%m-%d %H:%M:%S")
         options["verbose"] = 1
         options["sleeptime"] = ""
         # set user-defined options:
         try:
             opts, args = getopt.getopt(sys.argv[1:], "hVv:u:s:t:c:f",
                                        ["help", "version", "verbose=","user=","sleep=","time=","collection=","force"])
         except getopt.GetoptError, err:
             usage(1, err)
         try:
             for opt in opts:
                 if opt[0] in ["-h", "--help"]:
                     usage(0)
                 elif opt[0] in ["-V", "--version"]:
                     print __revision__
                     sys.exit(0)
                 elif opt[0] in [ "-u", "--user"]:
                     options["user"] = opt[1]
                 elif opt[0] in ["-v", "--verbose"]:
                     options["verbose"] = int(opt[1])
                 elif opt[0] in [ "-s", "--sleeptime" ]:
                     get_datetime(opt[1]) # see if it is a valid shift
                     options["sleeptime"] = opt[1]
                 elif opt[0] in [ "-t", "--runtime" ]:
                     options["runtime"] = get_datetime(opt[1])
                 elif opt[0] in [ "-c", "--collection"]:
                     options["collection"] = opt[1]
                 elif opt[0] in [ "-f", "--force"]:
                     options["force"] = 1
                 else:
                     usage(1)
         except StandardError, e:
             usage(e)
         task_submit()
     return
 
 ### okay, here we go:
 if __name__ == '__main__':
     main()
diff --git a/modules/websession/lib/webuser.py b/modules/websession/lib/webuser.py
index f5bf04183..ee36a0dc5 100644
--- a/modules/websession/lib/webuser.py
+++ b/modules/websession/lib/webuser.py
@@ -1,738 +1,739 @@
 # -*- coding: utf-8 -*-
 ##
 ## $Id$
 ##
 ## This file is part of CDS Invenio.
 ## Copyright (C) 2002, 2003, 2004, 2005, 2006 CERN.
 ##
 ## CDS Invenio is free software; you can redistribute it and/or
 ## modify it under the terms of the GNU General Public License as
 ## published by the Free Software Foundation; either version 2 of the
 ## License, or (at your option) any later version.
 ##
 ## CDS Invenio is distributed in the hope that it will be useful, but
 ## WITHOUT ANY WARRANTY; without even the implied warranty of
 ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 ## General Public License for more details.
 ##
 ## You should have received a copy of the GNU General Public License
 ## along with CDS Invenio; if not, write to the Free Software Foundation, Inc.,
 ## 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA.
 
 """
 This file implements all methods necessary for working with users and
 sessions in CDS Invenio.  Contains methods for logging/registration
 when a user log/register into the system, checking if it is a guest
 user or not.
 
 At the same time this presents all the stuff it could need with
 sessions managements, working with websession.
 
 It also contains Apache-related user authentication stuff.
 """
 
 __revision__ = "$Id$"
 
 from marshal import loads, dumps
 from zlib import compress, decompress
 import time
 import os
 import crypt
 import string
 import smtplib
 import sre
 
 from invenio.config import \
      CFG_ACCESS_CONTROL_LEVEL_ACCOUNTS, \
      CFG_ACCESS_CONTROL_LEVEL_GUESTS, \
      CFG_ACCESS_CONTROL_LEVEL_SITE, \
      CFG_ACCESS_CONTROL_LIMIT_REGISTRATION_TO_DOMAIN, \
      CFG_ACCESS_CONTROL_NOTIFY_ADMIN_ABOUT_NEW_ACCOUNTS, \
      CFG_ACCESS_CONTROL_NOTIFY_USER_ABOUT_NEW_ACCOUNT, \
      CFG_APACHE_GROUP_FILE, \
      CFG_APACHE_PASSWORD_FILE, \
      adminemail, \
      cdslang, \
      cdsname, \
      supportemail, \
      sweburl, \
      tmpdir, \
      version, \
      weburl
 from invenio import session, websession
 from invenio.dbquery import run_sql, escape_string, OperationalError
 from invenio.websession import pSession, pSessionMapping
 from invenio.session import SessionError
 from invenio.access_control_config import *
 from invenio.access_control_engine import acc_authorize_action
 from invenio.access_control_admin import acc_findUserRoleActions
 from invenio.messages import gettext_set_language
 import invenio.template
 tmpl = invenio.template.load('websession')
 
 sre_invalid_nickname = sre.compile(""".*[,'@]+.*""")
 
 # pylint: disable-msg=C0301
 
 def createGuestUser():
     """Create a guest user , insert into user null values in all fields
 
        createGuestUser() -> GuestUserID
     """
     if CFG_ACCESS_CONTROL_LEVEL_GUESTS == 0:
         try:
             return run_sql("insert into user (email, note) values ('', '1')")
         except OperationalError:
             return None
             
     elif CFG_ACCESS_CONTROL_LEVEL_GUESTS >= 1:
         try:
             return run_sql("insert into user (email, note) values ('', '0')")
         except OperationalError:
             return None
 
 def page_not_authorized(req, referer='', uid='', text='', navtrail='', ln=cdslang):
     """Show error message when account is not activated"""
 
     from invenio.webpage import page
 
     _ = gettext_set_language(ln)
 
     if not CFG_ACCESS_CONTROL_LEVEL_SITE:
         title = CFG_WEBACCESS_MSGS[5]
         if not uid:
             uid = getUid(req)
         try:
             res = run_sql("SELECT email FROM user WHERE id=%s" % uid)
 
             if res and res[0][0]:
                 if text:
                     body = text
                 else:
                     body = "%s %s" % (CFG_WEBACCESS_WARNING_MSGS[9] % res[0][0],
                                       ("%s %s" % (CFG_WEBACCESS_MSGS[0] % referer, CFG_WEBACCESS_MSGS[1])))
             else:
                 if text:
                     body = text
                 else:
                     if CFG_ACCESS_CONTROL_LEVEL_GUESTS == 1:
                         body = CFG_WEBACCESS_MSGS[3]
                     else:
                         body = CFG_WEBACCESS_WARNING_MSGS[4] + CFG_WEBACCESS_MSGS[2]
 
         except OperationalError, e:
             body = _("Database problem") + ': ' + str(e)
 
 
     elif CFG_ACCESS_CONTROL_LEVEL_SITE == 1:
         title = CFG_WEBACCESS_MSGS[8]
         body = "%s %s" % (CFG_WEBACCESS_MSGS[7], CFG_WEBACCESS_MSGS[2])
 
     elif CFG_ACCESS_CONTROL_LEVEL_SITE == 2:
         title = CFG_WEBACCESS_MSGS[6]
         body = "%s %s" % (CFG_WEBACCESS_MSGS[4], CFG_WEBACCESS_MSGS[2])
 
     return page(title=title,
                 uid=getUid(req),
                 body=body,
                 navtrail=navtrail,
                 req=req)
 
 def getUid (req):
     """Return user ID taking it from the cookie of the request.
        Includes control mechanism for the guest users, inserting in
        the database table when need be, raising the cookie back to the
        client.
 
        User ID is set to 0 when client refuses cookie or we are in the
        read-only site operation mode.
 
        User ID is set to -1 when we are in the permission denied site
        operation mode.
 
        getUid(req) -> userId
     """
 
     if CFG_ACCESS_CONTROL_LEVEL_SITE == 1: return 0
     if CFG_ACCESS_CONTROL_LEVEL_SITE == 2: return -1
 
     guest = 0
     sm = session.MPSessionManager(pSession, pSessionMapping())
     try:
         s = sm.get_session(req)
     except SessionError:
         sm.revoke_session_cookie (req)
         s = sm.get_session(req)
     userId = s.getUid()
     if userId == -1: # first time, so create a guest user
         s.setUid(createGuestUser())
         userId = s.getUid()
         guest = 1
     sm.maintain_session(req, s)
 
     if guest == 0:
         guest = isGuestUser(userId)
 
     if guest:
         if CFG_ACCESS_CONTROL_LEVEL_GUESTS == 0:
             return userId
         elif CFG_ACCESS_CONTROL_LEVEL_GUESTS >= 1:
             return -1
     else:
         res = run_sql("SELECT note FROM user WHERE id=%s" % userId)
         if CFG_ACCESS_CONTROL_LEVEL_ACCOUNTS == 0:
             return userId
         elif CFG_ACCESS_CONTROL_LEVEL_ACCOUNTS >= 1 and res and res[0][0] in [1, "1"]:
             return userId
         else:
             return -1
 
 def setUid(req, uid):
     """It sets the userId into the session, and raise the cookie to the client.
     """
     sm = session.MPSessionManager(pSession, pSessionMapping())
     try:
         s = sm.get_session(req)
     except SessionError:
         sm.revoke_session_cookie(req)
         s = sm.get_session(req)
     s.setUid(uid)
     sm.maintain_session(req, s)
     return uid
 
 def get_user_info(uid, ln=cdslang):
     """Get infos for a given user.
     @param uid: user id (int)
     @return tuple: (uid, nickname, display_name)
     """
     _ = gettext_set_language(ln)
     query = """SELECT id, nickname
                FROM user
                WHERE id=%i"""
     res = run_sql(query%uid)
     if res:
         if res[0]:
             user = list(res[0])
             if user[1]:
                 user.append(user[1])
             else:
                 user[1] = str(user[0])
                 user.append(_("user") + ' #' + str(user[0]))
             return tuple(user)
     return (uid, '', _("N/A"))
 
 def isGuestUser(uid):
     """It Checks if the userId corresponds to a guestUser or not
 
        isGuestUser(uid) -> boolean
     """
     out = 1
     try:
         res = run_sql("select email from user where id=%s", (uid,))
         if res:
             if res[0][0]:
                 out = 0
     except OperationalError:
         pass
     return out
 
 def isUserSubmitter(uid):
     u_email = get_email(uid)
     res = run_sql("select * from sbmSUBMISSIONS where email=%s", (u_email,))
     if len(res) > 0:
         return 1
     else:
         return 0
 
 def isUserReferee(uid):
     res = run_sql("select sdocname from sbmDOCTYPE")
     for row in res:
         doctype = row[0]
         categ = "*"
         (auth_code, auth_message) = acc_authorize_action(uid, "referee", doctype=doctype, categ=categ)
         if auth_code == 0:
             return 1
         res2 = run_sql("select sname from sbmCATEGORIES where doctype=%s", (doctype,))
         for row2 in res2:
             categ = row2[0]
             (auth_code, auth_message) = acc_authorize_action(uid, "referee", doctype=doctype, categ=categ)
             if auth_code == 0:
                 return 1
     return 0
 
 def isUserAdmin(uid):
     "Return 1 if the user UID has some admin rights; 0 otherwise."
     out = 0
     if acc_findUserRoleActions(uid):
         out = 1
     return out
 
 def nickname_valid_p(nickname):
     """Check whether wanted NICKNAME supplied by the user is valid.
        At the moment we just check whether it is not empty, does not
        contain blanks or @, is not equal to `guest', etc.
       
        This check relies on sre_invalid_nickname regexp (see above)
        Return 1 if nickname is okay, return 0 if it is not.
     """
     if nickname and \
        not(nickname.startswith(' ') or nickname.endswith(' ')) and \
        nickname.lower() != 'guest':
         if not sre_invalid_nickname.match(nickname):
             return 1
     return 0
 
 def email_valid_p(email):
     """Check whether wanted EMAIL address supplied by the user is valid.
        At the moment we just check whether it contains '@' and whether
        it doesn't contain blanks.  We also check the email domain if
        CFG_ACCESS_CONTROL_LIMIT_REGISTRATION_TO_DOMAIN is set.
 
        Return 1 if email is okay, return 0 if it is not.
     """
     if (string.find(email, "@") <= 0) or (string.find(email, " ") > 0):
         return 0
     elif CFG_ACCESS_CONTROL_LIMIT_REGISTRATION_TO_DOMAIN:
         if not email.endswith(CFG_ACCESS_CONTROL_LIMIT_REGISTRATION_TO_DOMAIN):
             return 0
     return 1
 
 def registerUser(req, email, passw, nickname, register_without_nickname=False):
     """Register user with the desired values of NICKNAME, EMAIL and
        PASSW.
 
        If REGISTER_WITHOUT_NICKNAME is set to True, then ignore
        desired NICKNAME and do not set any.  This is suitable for
        external authentications so that people can login without
        having to register an internal account first.
        
        Return 0 if the registration is successful, 1 if email is not
        valid, 2 if nickname is not valid, 3 if email is already in the
        database, 4 if nickname is already in the database, 5 when
        users cannot register themselves because of the site policy.
        """
 
     # is email valid?
     if not email_valid_p(email):
         return 1
 
     # is email already taken?
     res = run_sql("SELECT * FROM user WHERE email=%s", (email,))
     if len(res) > 0:
         return 3
 
     if register_without_nickname:
         # ignore desired nick and use default empty string one:
         nickname = ""
     else:
         # is nickname valid?
         if not nickname_valid_p(nickname):
             return 2
         # is nickname already taken?
         res = run_sql("SELECT * FROM user WHERE nickname=%s", (nickname,))
         if len(res) > 0:
             return 4
 
     # okay, go on and register the user:
     if CFG_ACCESS_CONTROL_LEVEL_ACCOUNTS == 0:
         activated = 1
     elif CFG_ACCESS_CONTROL_LEVEL_ACCOUNTS == 1:
         activated = 0
     elif CFG_ACCESS_CONTROL_LEVEL_ACCOUNTS >= 2:
         return 5
 
     user_preference = get_default_user_preferences()
 
     setUid(req, run_sql("INSERT INTO user (nickname, email, password, note, settings) VALUES (%s,%s,%s,%s,%s)",
                         (nickname, email, passw, activated, serialize_via_marshal(user_preference),)))
 
     if CFG_ACCESS_CONTROL_NOTIFY_USER_ABOUT_NEW_ACCOUNT:
         sendNewUserAccountWarning(email, email, passw)
     if CFG_ACCESS_CONTROL_NOTIFY_ADMIN_ABOUT_NEW_ACCOUNTS:
         sendNewAdminAccountWarning(email, adminemail)
     return 0
 
 def updateDataUser(uid, email, password, nickname):
     """Update user data.  Used when a user changed his email or password or nickname.
     """
     if email == 'guest':
         return 0
 
     if CFG_ACCESS_CONTROL_LEVEL_ACCOUNTS >= 2:
         run_sql("update user set password=%s where id=%s", (password, uid))
     else:
         run_sql("update user set email=%s,password=%s where id=%s", (email, password, uid))
     if nickname and nickname != '':
         run_sql("update user set nickname=%s where id=%s", (nickname, uid))
     return 1
 
 def loginUser(req, p_un, p_pw, login_method):
     """It is a first simple version for the authentication of user. It returns the id of the user,
        for checking afterwards if the login is correct
     """
 
     # p_un passed may be an email or a nickname:
     p_email = get_email_from_username(p_un)
 
     # go on with the old stuff based on p_email:
     user_prefs = get_user_preferences(emailUnique(p_email))
     if user_prefs and login_method != user_prefs["login_method"]:
         if CFG_EXTERNAL_AUTHENTICATION.has_key(user_prefs["login_method"]):
             return ([], p_email, p_pw, 11)
 
     if not CFG_EXTERNAL_AUTHENTICATION.has_key(login_method):
         return ([], p_email, p_pw, 12)
 
     if CFG_EXTERNAL_AUTHENTICATION[login_method][0]:
         p_email = CFG_EXTERNAL_AUTHENTICATION[login_method][0].auth_user(p_email, p_pw)
         if p_email:
             p_pw = givePassword(p_email)
             if not p_pw or p_pw < 0:
                 import random
                 p_pw = int(random.random() * 1000000)
                 if registerUser(req, p_email, p_pw, "", register_without_nickname=True) != 0:
                     return ([], p_email, p_pw, 13)
                 else:
                     query_result = run_sql("SELECT id from user where email=%s and password=%s", (p_email, p_pw,))
                     user_prefs = get_user_preferences(query_result[0][0])
                     user_prefs["login_method"] = login_method
                     set_user_preferences(query_result[0][0], user_prefs)
         else:
             return ([], p_email, p_pw, 10)
 
     query_result = run_sql("SELECT id from user where email=%s and password=%s", (p_email, p_pw,))
     if query_result:
         prefered_login_method = get_user_preferences(query_result[0][0])['login_method']
     else:
         return ([], p_email, p_pw, 14)
 
     if login_method != prefered_login_method:
         if CFG_EXTERNAL_AUTHENTICATION.has_key(prefered_login_method):
             return ([], p_email, p_pw, 11)
 
     return (query_result, p_email, p_pw, 0)
 
 def logoutUser(req):
     """It logout the user of the system, creating a guest user.
     """
     getUid(req)
     sm = session.MPSessionManager(pSession, pSessionMapping())
     try:
         s = sm.get_session(req)
     except SessionError:
         sm.revoke_session_cookie(req)
         s = sm.get_session(req)
     id1 = createGuestUser()
     s.setUid(id1)
     sm.maintain_session(req, s)
     return id1
 
 def username_exists_p(username):
     """Check if USERNAME exists in the system.  Username may be either
     nickname or email.
     
     Return 1 if it does exist, 0 if it does not. 
     """
 
     if username == "":
         # return not exists if asked for guest users
         return 0
-    res = run_sql("SELECT email FROM user WHERE nickname=%s OR email=%s",
-                  (username, username,))
+    res = run_sql("SELECT email FROM user WHERE email=%s", (username,)) + \
+          run_sql("SELECT email FROM user WHERE nickname=%s", (username,))
     if len(res) > 0:
         return 1
     return 0
 
 def emailUnique(p_email):
     """Check if the email address only exists once. If yes, return userid, if not, -1
     """
 
     query_result = run_sql("select id, email from user where email=%s", (p_email,))
     if len(query_result) == 1:
         return query_result[0][0]
     elif len(query_result) == 0:
         return 0
     return -1
 
 def nicknameUnique(p_nickname):
     """Check if the nickname only exists once. If yes, return userid, if not, -1
     """
 
     query_result = run_sql("select id, nickname from user where nickname=%s", (p_nickname,))
     if len(query_result) == 1:
         return query_result[0][0]
     elif len(query_result) == 0:
         return 0
     return -1
 
 def update_Uid(req, p_email, p_pw):
     """It updates the userId of the session. It is used when a guest user is logged in succesfully in the system
     with a given email and password
     """
     query_ID = int(run_sql("select id from user where email=%s and password=%s",
                            (p_email, p_pw))[0][0])
     setUid(req, query_ID)
     return query_ID
 
 def givePassword(email):
     """ It checks in the database the password for a given email. It is used to send the password to the email of the user.It returns
 	the password if the user exists, otherwise it returns -999
     """
 
     query_pass = run_sql("select password from user where email =%s", (email,))
     if len(query_pass)>0:
         return query_pass[0][0]
     return -999
 
 def sendNewAdminAccountWarning(newAccountEmail, sendTo, ln=cdslang):
     """Send an email to the address given by sendTo about the new account newAccountEmail."""
     _ = gettext_set_language(ln)
     fromaddr = "From: %s" % supportemail
     toaddrs  = "To: %s" % sendTo
     to = toaddrs + "\n"
     sub = "Subject: New account on '%s'" % cdsname
     if CFG_ACCESS_CONTROL_LEVEL_ACCOUNTS == 1:
         sub += " - PLEASE ACTIVATE"
     sub += "\n\n"
     body = "A new account has been created on '%s'" % cdsname
     if CFG_ACCESS_CONTROL_LEVEL_ACCOUNTS == 1:
         body += " and is awaiting activation"
     body += ":\n\n"
     body += "   Username/Email: %s\n\n" % newAccountEmail
     body += "You can approve or reject this account request at: %s/admin/webaccess/webaccessadmin.py/manageaccounts\n" % weburl
     body += "\n---------------------------------"
     body += "\n%s" % cdsname
     body += "\nContact: %s" % supportemail
     msg = to + sub + body
 
     server = smtplib.SMTP('localhost')
     server.set_debuglevel(1)
 
     try:
         server.sendmail(fromaddr, toaddrs, msg)
     except smtplib.SMTPRecipientsRefused:
         return 0
 
     server.quit()
     return 1
 
 def sendNewUserAccountWarning(newAccountEmail, sendTo, password, ln=cdslang):
     """Send an email to the address given by sendTo about the new account newAccountEmail."""
     _ = gettext_set_language(ln)
     fromaddr = "From: %s" % supportemail
     toaddrs  = "To: %s" % sendTo
     to = toaddrs + "\n"
     sub = "Subject: Your account created on '%s'\n\n" % cdsname
     body = "You have created a new account on '%s':\n\n" % cdsname
     body += "   Username/Email: %s\n" % newAccountEmail
     body += "   Password: %s\n\n" % ("*" * len(password))
     if CFG_ACCESS_CONTROL_LEVEL_ACCOUNTS >= 1:
         body += "This account is awaiting approval by the site administrators and therefore cannot be used as of yet.\nYou will receive an email notification as soon as your account request has been processed.\n"
     body += "\n---------------------------------"
     body += "\n%s" % cdsname
     body += "\nContact: %s" % supportemail
     msg = to + sub + body
 
     server = smtplib.SMTP('localhost')
     server.set_debuglevel(1)
 
     try:
         server.sendmail(fromaddr, toaddrs, msg)
     except smtplib.SMTPRecipientsRefused:
         return 0
 
     server.quit()
     return 1
 
 def get_email(uid):
     """Return email address of the user uid.  Return string 'guest' in case
     the user is not found."""
     out = "guest"
     res = run_sql("SELECT email FROM user WHERE id=%s", (uid,), 1)
     if res and res[0][0]:
         out = res[0][0]
     return out
 
 def get_email_from_username(username):
     """Return email address of the user corresponding to USERNAME.
     The username may be either nickname or email.  Return USERNAME
     untouched if not found in the database or if found several
     matching entries.
     """
     out = username
-    res = run_sql("SELECT email FROM user WHERE email=%s OR nickname=%s", (username, username,), 1)
+    res = run_sql("SELECT email FROM user WHERE email=%s", (username,), 1) + \
+          run_sql("SELECT email FROM user WHERE nickname=%s", (username,), 1)
     if res and len(res) == 1:
         out = res[0][0]
     return out
 
 def get_password(uid):
     """Return password of the user uid.  Return None in case
     the user is not found."""
     out = None
     res = run_sql("SELECT password FROM user WHERE id=%s", (uid,), 1)
     if res and res[0][0]:
         out = res[0][0]
     return out
 
 def get_nickname(uid):
     """Return nickname of the user uid.  Return None in case
     the user is not found."""
     out = None
     res = run_sql("SELECT nickname FROM user WHERE id=%s", (uid,), 1)
     if res and res[0][0]:
         out = res[0][0]
     return out
 
 def get_nickname_or_email(uid):
     """Return nickname (preferred) or the email address of the user uid.
     Return string 'guest' in case the user is not found."""
     out = "guest"
     res = run_sql("SELECT nickname, email FROM user WHERE id=%s", (uid,), 1)
     if res and res[0]:
         if res[0][0]:
             out = res[0][0]
         elif res[0][1]:
             out = res[0][1]
     return out
 
 def create_userinfobox_body(req, uid, language="en"):
     """Create user info box body for user UID in language LANGUAGE."""
 
     if req:
         if req.subprocess_env.has_key('HTTPS') \
            and req.subprocess_env['HTTPS'] == 'on':
             url_referer = sweburl + req.unparsed_uri
         else:
             url_referer = weburl + req.unparsed_uri
     else:
         url_referer = weburl
         
     try:
         return tmpl.tmpl_create_userinfobox(ln=language,
                                             url_referer=url_referer,
                                             guest = isGuestUser(uid),
                                             username = get_nickname_or_email(uid),
                                             submitter = isUserSubmitter(uid),
                                             referee = isUserReferee(uid),
                                             admin = isUserAdmin(uid),
                                             )
     except OperationalError:
         return ""
 
 def list_registered_users():
     """List all registered users."""
     return run_sql("SELECT id,email FROM user where email!=''")
 
 def list_users_in_role(role):
     """List all users of a given role (see table accROLE)
     @param role: role of user (string)
     @return list of uids
     """
     query = """SELECT uacc.id_user
                FROM user_accROLE uacc JOIN accROLE acc
                     ON uacc.id_accROLE=acc.id
                WHERE acc.name='%s'"""
     res = run_sql(query% escape_string(role))
     if res:
         return map(lambda x: int(x[0]), res)
     return []
 
 def list_users_in_roles(role_list):
     """List all users of given roles (see table accROLE)
     @param role_list: list of roles [string]
     @return list of uids
     """
     if not(type(role_list) is list or type(role_list) is tuple):
         role_list = [role_list]
     params = ''
     query = """SELECT distinct(uacc.id_user)
                FROM user_accROLE uacc JOIN accROLE acc
                     ON uacc.id_accROLE=acc.id
                %s"""
     if len(role_list) > 0:
         params = 'WHERE '
         for role in role_list[:-1]:
             params += "acc.name='%s' OR " % escape_string(role)
         params += "acc.name='%s'" % escape_string(role_list[-1])
     res = run_sql(query% params)
     if res:
         return map(lambda x: int(x[0]), res)
     return []
 
 ## --- follow some functions for Apache user/group authentication
 
 def auth_apache_user_p(user, password, apache_password_file=CFG_APACHE_PASSWORD_FILE):
     """Check whether user-supplied credentials correspond to valid
     Apache password data file.  Return 0 in case of failure, 1 in case
     of success."""
     try:
         if not apache_password_file.startswith("/"):
             apache_password_file = tmpdir + "/" + apache_password_file
         dummy, pipe_output = os.popen2(["grep", "^" + user + ":", apache_password_file], 'r')
         line =  pipe_output.readlines()[0]
         password_apache = string.split(string.strip(line),":")[1]
     except: # no pw found, so return not-allowed status
         return 0
     salt = password_apache[:2]
     if crypt.crypt(password, salt) == password_apache:
         return 1
     else:
         return 0
 
 def auth_apache_user_in_groups(user, apache_group_file=CFG_APACHE_GROUP_FILE):
     """Return list of Apache groups to which Apache user belong."""
     out = []
     try:
         if not apache_group_file.startswith("/"):
             apache_group_file = tmpdir + "/" + apache_group_file
         dummy, pipe_output = os.popen2(["grep", user, apache_group_file], 'r')
         for line in pipe_output.readlines():
             out.append(string.split(string.strip(line),":")[0])
     except: # no groups found, so return empty list
         pass
     return out
 
 def auth_apache_user_collection_p(user, password, coll):
     """Check whether user-supplied credentials correspond to valid
     Apache password data file, and whether this user is authorized to
     see the given collections.  Return 0 in case of failure, 1 in case
     of success."""
     from invenio.search_engine import coll_restricted_p, coll_restricted_group
     if not auth_apache_user_p(user, password):
         return 0
     if not coll_restricted_p(coll):
         return 1
     if coll_restricted_group(coll) in auth_apache_user_in_groups(user):
         return 1
     else:
         return 0
 
 def get_user_preferences(uid):
     pref = run_sql("SELECT id, settings FROM user WHERE id=%s", (uid,))
     if pref:
         try:
             return deserialize_via_marshal(pref[0][1])
         except:
             return get_default_user_preferences()
 
     return None
 
 def set_user_preferences(uid, pref):
     run_sql("UPDATE user SET settings='%s' WHERE id=%s" % (serialize_via_marshal(pref), uid))
 
 def get_default_user_preferences():
     user_preference = {
         'login_method': ''}
 
     for system in CFG_EXTERNAL_AUTHENTICATION.keys():
         if CFG_EXTERNAL_AUTHENTICATION[system][1]:
             user_preference['login_method'] = system
             break
     return user_preference
 
 def serialize_via_marshal(obj):
     """Serialize Python object via marshal into a compressed string."""
     return escape_string(compress(dumps(obj)))
 def deserialize_via_marshal(string):
     """Decompress and deserialize string into a Python object via marshal."""
     return loads(decompress(string))