diff --git a/modules/bibrank/bin/bibrank.in b/modules/bibrank/bin/bibrank.in
index 74790cafb..a63ee9279 100644
--- a/modules/bibrank/bin/bibrank.in
+++ b/modules/bibrank/bin/bibrank.in
@@ -1,480 +1,484 @@
 ##Ranking of records using different parameters and methods.
 
 ## This file is part of the CERN Document Server Software (CDSware).
 ## Copyright (C) 2002 CERN.
 ##
 ## The CDSware is free software; you can redistribute it and/or
 ## modify it under the terms of the GNU General Public License as
 ## published by the Free Software Foundation; either version 2 of the
 ## License, or (at your option) any later version.
 ##
 ## The CDSware is distributed in the hope that it will be useful, but
 ## WITHOUT ANY WARRANTY; without even the implied warranty of
 ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 ## General Public License for more details.
 ##
 ## You should have received a copy of the GNU General Public License
 ## along with CDSware; if not, write to the Free Software Foundation, Inc.,
 ## 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA.
 
 ## read config variables:
 #include "config.wml"
 #include "configbis.wml"
 #include "cdswmllib.wml"
 
 ## start Python:
 <protect>#!</protect><PYTHON>
 <protect># -*- coding: utf-8 -*-</protect>
 <protect>## $Id$</protect>
 <protect>## DO NOT EDIT THIS FILE!  IT WAS AUTOMATICALLY GENERATED FROM CDSware WML SOURCES.</protect>
 """
 BibRank ranking daemon.
 
 Usage: %s [options]
      Ranking examples:
        %s -wjif -a --id=0-30000,30001-860000 --verbose=9
        %s -wjif -d --modified='2002-10-27 13:57:26'
-       %s -wjif --rebalance --collection=Articles
-       %s -wsbr -a -i 234-250,293,300-500 -u admin@cdsware
+       %s -wwrd --rebalance --collection=Articles
+       %s -wwrd -a -i 234-250,293,300-500 -u admin@cdsware
  
  Ranking options:
  -w, --run=r1[,r2]         runs each rank method in the order given
 
  -c, --collection=c1[,c2]  select according to collection
  -i, --id=low[-high]       select according to doc recID
  -m, --modified=from[,to]  select according to modification date
  -l, --lastupdate          select according to last update
 
  -a, --add                 add or update words for selected records
  -d, --del                 delete words for selected records
  -S, --stat                show statistics for a method
  
- -R, --rebalance           rebalancing rank data: does complete update. if not used: quick update
+ -R, --recalculate         recalculate weigth data, used by word frequency method
+                           should be used if ca 1% of the document has been changed
+                           since last time -R was used
  Repairing options:
  -k,  --check              check consistency for all records in the table(s)
                            check if update of ranking data is necessary
  -r, --repair              try to repair all records in the table(s)
  Scheduling options:
  -u, --user=USER           user name to store task, password needed
  -s, --sleeptime=SLEEP     time after which to repeat tasks (no)
                             e.g.: 1s, 30m, 24h, 7d
  -t, --time=TIME           moment for the task to be active (now)
                             e.g.: +15s, 5m, 3h , 2002-10-27 13:57:26
  General options:
  -h, --help                print this help and exit
  -V, --version             print version and exit
  -v, --verbose=LEVEL       verbose level (from 0 to 9, default 1)
 """
 
 __version__ = "<: print generate_pretty_version_string('$Id$'); :>"
 
 ## fill config variables:
 pylibdir = "<LIBDIR>/python"
 
 try:
     from marshal import loads,dumps
     from zlib import compress,decompress
     from string import split,translate,lower,upper
     import getopt
     import getpass
     import string
     import os
     import sre
     import sys
     import time
     import MySQLdb
     import urllib
     import signal
     import tempfile
     import traceback
     import cStringIO
     import re
     import copy
     import types
     import ConfigParser
     import cdsware.search_engine
     
 except ImportError, e:
     import sys
 
 try:
     sys.path.append('%s' % pylibdir)
     from cdsware.dbquery import run_sql
     from cdsware.bibrank_tag_based_indexer import *
     from cdsware.bibrank_word_indexer import *
     from cdsware.access_control_engine import acc_authorize_action 
     from cdsware.search_engine import perform_request_search
 except ImportError, e:
     import sys
 
 task_id = -1 # the task id
 nb_char_in_line = 50  # for verbose pretty printing
 chunksize = 1000 # default size of chunks that the records will be treated by
 base_process_size = 4500 # process base size
 bibrank_options = {} # will hold task options
 
 def serialize_via_numeric_array_dumps(arr):
     return Numeric.dumps(arr)
 def serialize_via_numeric_array_compr(str):
     return compress(str)
 def serialize_via_numeric_array_escape(str):
     return MySQLdb.escape_string(str)
 def serialize_via_numeric_array(arr):
     """Serialize Numeric array into a compressed string."""
     return serialize_via_numeric_array_escape(serialize_via_numeric_array_compr(serialize_via_numeric_array_dumps(arr)))
 def deserialize_via_numeric_array(string):
     """Decompress and deserialize string into a Numeric array."""
     return Numeric.loads(decompress(string))
 def serialize_via_marshal(obj):
     """Serialize Python object via marshal into a compressed string."""
     return MySQLdb.escape_string(compress(dumps(obj)))
 def deserialize_via_marshal(string):
     """Decompress and deserialize string into a Python object via marshal."""
     return loads(decompress(string))
 
 def authenticate(user, header="BibRank Task Submission", action="runbibrank"):
     print header
     print "=" * len(header)
     if user == "":
         print>> sys.stdout, "\rUsername: ",
         user = string.strip(string.lower(sys.stdin.readline()))
     else:
         print>> sys.stdout, "\rUsername: ", user
     res = run_sql("select id,password from user where email=%s", (user,), 1)
     if not res:
         print "Sorry, %s does not exist." % user
         sys.exit(1)
     else:
         (uid_db, password_db) = res[0]
         if password_db:
             password_entered = getpass.getpass()
             if password_db == password_entered:
                 pass
             else:
                 print "Sorry, wrong credentials for %s." % user
                 sys.exit(1)
         (auth_code, auth_message) = acc_authorize_action(uid_db, action)
         if auth_code != 0:
             print auth_message
             sys.exit(1)
     return user
 
 def usage(code, msg=''):
     "Prints usage for this module."
     if msg:
         sys.stderr.write("Error: %s.\n" % msg)
 <protect>
     print >> sys.stderr, \
     """ Usage: %s [options]
      Ranking examples:
        %s -wjif -a --id=0-30000,30001-860000 --verbose=9
        %s -wjif -d --modified='2002-10-27 13:57:26'
        %s -wjif --rebalance --collection=Articles
        %s -wsbr -a -i 234-250,293,300-500 -u admin@cdsware
  
  Ranking options:
  -w, --run=r1[,r2]         runs each rank method in the order given
 
  -c, --collection=c1[,c2]  select according to collection
  -i, --id=low[-high]       select according to doc recID
  -m, --modified=from[,to]  select according to modification date
  -l, --lastupdate          select according to last update
 
  -a, --add                 add or update words for selected records
  -d, --del                 delete words for selected records
  -S, --stat                show statistics for a method
  
- -R, --rebalance           rebalancing rank data: does complete update. if not used: quick update
+ -R, --recalculate         recalculate weigth data, used by word frequency method
+                           should be used if ca 1%% of the document has been changed
+                           since last time -R was used
  Repairing options:
  -k,  --check              check consistency for all records in the table(s)
                            check if update of ranking data is necessary
  -r, --repair              try to repair all records in the table(s)
  Scheduling options:
  -u, --user=USER           user name to store task, password needed
  -s, --sleeptime=SLEEP     time after which to repeat tasks (no)
                             e.g.: 1s, 30m, 24h, 7d
  -t, --time=TIME           moment for the task to be active (now)
                             e.g.: +15s, 5m, 3h , 2002-10-27 13:57:26
  General options:
  -h, --help                print this help and exit
  -V, --version             print version and exit
  -v, --verbose=LEVEL       verbose level (from 0 to 9, default 1)
     """ % ((sys.argv[0],) * 5)
 </protect>
     sys.exit(code)
 
 def get_datetime(var, format_string="%Y-%m-%d %H:%M:%S"):
     """Returns a date string according to the format string.
        It can handle normal date strings and shifts with respect
        to now."""
     date = time.time()
     shift_re = sre.compile("([-\+]{0,1})([\d]+)([dhms])")
     factors = {"d":24*3600, "h":3600, "m":60, "s":1}
     m = shift_re.match(var)
     if m:
         sign = m.groups()[0] == "-" and -1 or 1
         factor = factors[m.groups()[2]]
         value = float(m.groups()[1])
         date = time.localtime(date + sign * factor * value)
         date = time.strftime(format_string, date)
     else:
         date = time.strptime(var, format_string)
         date = time.strftime(format_string, date)
     return date
 
 def task_sig_sleep(sig, frame):
     """Signal handler for the 'sleep' signal sent by BibSched."""
     if bibrank_options["verbose"]>= 9:
         write_message("got signal %d" % sig)
     write_message("sleeping...")
     task_update_status("SLEEPING")
     signal.pause() # wait for wake-up signal
 
 def task_sig_wakeup(sig, frame):
     """Signal handler for the 'wakeup' signal sent by BibSched."""
     if bibrank_options["verbose"]>= 9:
         write_message("got signal %d" % sig)
     write_message("continuing...")
     task_update_status("CONTINUING")
 
 def task_sig_stop_commands():
     """Do all the commands necessary to stop the task before quitting.
     Useful for task_sig_stop() handler.
     """
     write_message("stopping commands started")
     write_message("stopping commands ended")
 
 def task_sig_suicide(sig, frame):
     """Signal handler for the 'suicide' signal sent by BibSched."""
     if bibrank_options["verbose"]>= 9:
         write_message("got signal %d" % sig)
     write_message("suiciding myself now...")
     task_update_status("SUICIDING")
     write_message("suicided")
     task_update_status("SUICIDED")
     sys.exit(0)
 
 def task_sig_unknown(sig, frame):
     """Signal handler for the other unknown signals sent by shell or user."""
     if bibrank_options["verbose"]>= 9:
         write_message("got signal %d" % sig)
     write_message("unknown signal %d ignored" % sig) # do nothing for other signals
 
 def task_update_progress(msg):
     """Updates progress information in the BibSched task table."""
     query = "UPDATE schTASK SET progress='%s' where id=%d" % (MySQLdb.escape_string(msg), task_id)
     if bibrank_options["verbose"]>= 9:
         write_message(query)
     run_sql(query)
     return
 
 def task_update_status(val):
     """Updates state information in the BibSched task table."""
     query = "UPDATE schTASK SET status='%s' where id=%d" % (MySQLdb.escape_string(val), task_id)
     if bibrank_options["verbose"]>= 9:
         write_message(query)
     run_sql(query)
     return
 
 def split_ranges(parse_string):
     recIDs = []
     ranges = string.split(parse_string, ",")
     for range in ranges:
         tmp_recIDs = string.split(range, "-")
         
         if len(tmp_recIDs)==1:
             recIDs.append([int(tmp_recIDs[0]), int(tmp_recIDs[0])])
         else:
             if int(tmp_recIDs[0]) > int(tmp_recIDs[1]): # sanity check
                 tmp = tmp_recIDs[0]
                 tmp_recIDs[0] = tmp_recIDs[1]
                 tmp_recIDs[1] = tmp
             recIDs.append([int(tmp_recIDs[0]), int(tmp_recIDs[1])])
     return recIDs
 
 def get_date_range(var):
     "Returns the two dates contained as a low,high tuple"
     limits = string.split(var, ",")
     if len(limits)==1:
         low = get_datetime(limits[0])
         return low,None
     if len(limits)==2:
         low = get_datetime(limits[0])
         high = get_datetime(limits[1])
         return low,high
 
 def command_line():
     """Storing the task together with the parameters given."""
     global bibrank_options
     long_flags = ["lastupdate","add","del","repair","maxmem", "flush","stat", "rebalance", "id=", "collection=", "check", "modified=", "update", "run=", "user=", "sleeptime=", "time=", "help", "version", "verbose="]
     short_flags = "ladSi:m:c:kUrRM:f:w:u:s:t:hVv:"
     format_string = "%Y-%m-%d %H:%M:%S"
     sleeptime = ""
     try:
         opts, args = getopt.getopt(sys.argv[1:], short_flags, long_flags)
     except getopt.GetoptError, err:
         write_message(err, sys.stderr)
         usage(1)
     if args:
         usage(1)
     bibrank_options = {"quick":"yes","cmd":"add","flush":100000,"validset":"", "collection":[], "id":[], "check": "", "stat":"", "modified":"", "last_updated":"last_updated","run":"", "verbose":1}
 
     res = run_sql("SELECT name from rnkMETHOD")
     bibrank_options["run"] = []
     for (name,) in res:
         bibrank_options["run"].append(name)
 
     sched_time = time.strftime(format_string)
     user = ""
     try:
         for opt in opts:
             if opt == ("-h","") or opt == ("--help",""):
                 usage(1)
             elif opt == ("-V","") or opt == ("--version",""):
                 print __version__
                 sys.exit(1)
             elif opt[0] in ["--verbose", "-v"]:
                 bibrank_options["verbose"] = int(opt[1])
             elif opt == ("-a","") or opt == ("--add",""):
                 bibrank_options["cmd"] = "add"
                 if ("-x","") in opts or ("--del","") in opts:
                     usage(1)
             elif opt[0] in ["--run", "-w"]:
                 bibrank_options["run"] = []
                 run = split(opt[1],",")
                 for key in range(0,len(run)):
                     bibrank_options["run"].append(run[key])
             elif opt == ("-r","") or opt == ("--repair",""):
                 bibrank_options["cmd"] = "repair"
             elif opt == ("-d","") or opt == ("--del",""):            
                 bibrank_options["cmd"]="del"
             elif opt[0] in [ "-u", "--user"]:
                 user = opt[1]
             elif opt[0] in [ "-k", "--check"]:
                 bibrank_options["cmd"]= "check" 
             elif opt[0] in [ "-S", "--stat"]:
                 bibrank_options["cmd"] = "stat"
             elif opt[0] in [ "-i", "--id" ]:
                 bibrank_options["id"] = bibrank_options["id"] + split_ranges(opt[1])
                 bibrank_options["last_updated"] = ""
             elif opt[0] in [ "-c", "--collection" ]:
                 bibrank_options["collection"] = opt[1]
             elif opt[0] in [ "-R", "--rebalance"]: 
                  bibrank_options["quick"] = "no"   
             elif opt[0] in [ "-f", "--flush"]:     
                 bibrank_options["flush"]=int(opt[1])
             elif opt[0] in [ "-M", "--maxmem"]:
                 bibrank_options["maxmem"]=int(opt[1])
                 if bibrank_options["maxmem"] < base_process_size + 1000:
                     raise StandardError, "Memory usage should be higher than %d kB" % (base_process_size + 1000)
             elif opt[0] in [ "-m", "--modified" ]:
                 bibrank_options["modified"] = get_date_range(opt[1]) #2002-10-27 13:57:26
                 bibrank_options["last_updated"] = ""
             elif opt[0] in [ "-l", "--lastupdate" ]:
                 bibrank_options["last_updated"] = "last_updated"
             elif opt[0] in [ "-s", "--sleeptime" ]:
                 get_datetime(opt[1]) # see if it is a valid shift
                 sleeptime=opt[1]
             elif opt[0] in [ "-t", "--time" ]:
                 sched_time = get_datetime(opt[1])
             else:
                 usage(1)
     except StandardError, e:
         write_message(e, sys.stderr)
         sys.exit(1)
              
     user = authenticate(user)
     if bibrank_options["verbose"]>=9:
         write_message("Storing task options %s" % bibrank_options)
 
     new_task_id = run_sql("""INSERT INTO schTASK (proc,user,runtime,sleeptime,arguments,status) VALUES ('bibrank',%s,%s,%s,%s,'WAITING')""", (user, sched_time, sleeptime, dumps(bibrank_options)))
 
     print "Task #%d was successfully scheduled for execution." % new_task_id
     return
 
 def task_run(row):
     """Run the indexing task. The row argument is the BibSched task
     queue row, containing if, arguments, etc.
     Return 1 in case of success and 0 in case of failure.
     """
     global task_id, bibrank_options
     task_id = row[0]
     task_proc = row[1]
     bibrank_options = loads(row[6])
     task_status = row[7]
 
 
     # install signal handlers
     signal.signal(signal.SIGUSR1, task_sig_sleep)
     signal.signal(signal.SIGTERM, task_sig_stop)
     signal.signal(signal.SIGABRT, task_sig_suicide)
     signal.signal(signal.SIGCONT, task_sig_wakeup)
     signal.signal(signal.SIGINT, task_sig_unknown)
 
     if task_proc != "bibrank":
         write_message("-The task #%d does not seem to be a BibRank task." % task_id, sys.stderr)
         return 0
     if task_status != "WAITING":
         write_message("The task #%d is %s. I expected WAITING." % (task_id, task_status), sys.stderr)
         return 0
     if bibrank_options["verbose"]:
         write_message("Task #%d started." % task_id)
     task_update_status("RUNNING")
     try:  
         bibrank_options = marshal.loads(row[6])
         for key in bibrank_options["run"]:
             write_message("")
             file = etcdir + "/bibrank/" + key + ".cfg"
             if bibrank_options["verbose"] >= 9:
                 write_message("Getting configuration from file: %s" % file)
             config = ConfigParser.ConfigParser()
             try:
                 config.readfp(open(file))
             except StandardError, e:
                 write_message("Cannot find configurationfile: %s. The rankmethod may also not be registered using the BibRank Admin Interface." % file, sys.stderr)
                 raise StandardError
 
             #Using the function variable to call the function related to the rank method
             cfg_function = config.get("rank_method", "function")
             func_object = globals().get(cfg_function)
             if func_object:
                 func_object(row, key)
             else:
                 write_message("Cannot run method '%s', no function to call" % key)  
     except StandardError, e:
         write_message("\nException caught: %s" % e, sys.stderr)
         traceback.print_tb(sys.exc_info()[2])
         task_update_status("ERROR")
         sys.exit(1)
 
     task_update_status("DONE")
     if bibrank_options["verbose"]:
         write_message("Task #%d finished." % task_id)
     return 1
 
 def main():
     if len(sys.argv) == 2:
         try:
             id = int(sys.argv[1])
         except StandardError, err:
             command_line()
             sys.exit()
         res = run_sql("SELECT * FROM schTASK WHERE id='%d'" % (id), None, 1)
         if not res:
             write_message("Selected task not found.", sys.stderr)
             sys.exit(1)
         try:
             if not task_run(res[0]):
                 write_message("Error occurred. Exiting.", sys.stderr)
         except StandardError, e:
             write_message("Unexpected error occurred: %s." % e, sys.stderr)
             write_message("Traceback is:")
             traceback.print_tb(sys.exc_info()[2])
             write_message("Exiting.")
             task_update_status("ERROR")
     else:
         command_line()
         
 if __name__ == "__main__":
     main()
   
diff --git a/modules/bibrank/bin/bibrank.wml b/modules/bibrank/bin/bibrank.wml
index 74790cafb..a63ee9279 100644
--- a/modules/bibrank/bin/bibrank.wml
+++ b/modules/bibrank/bin/bibrank.wml
@@ -1,480 +1,484 @@
 ##Ranking of records using different parameters and methods.
 
 ## This file is part of the CERN Document Server Software (CDSware).
 ## Copyright (C) 2002 CERN.
 ##
 ## The CDSware is free software; you can redistribute it and/or
 ## modify it under the terms of the GNU General Public License as
 ## published by the Free Software Foundation; either version 2 of the
 ## License, or (at your option) any later version.
 ##
 ## The CDSware is distributed in the hope that it will be useful, but
 ## WITHOUT ANY WARRANTY; without even the implied warranty of
 ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 ## General Public License for more details.
 ##
 ## You should have received a copy of the GNU General Public License
 ## along with CDSware; if not, write to the Free Software Foundation, Inc.,
 ## 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA.
 
 ## read config variables:
 #include "config.wml"
 #include "configbis.wml"
 #include "cdswmllib.wml"
 
 ## start Python:
 <protect>#!</protect><PYTHON>
 <protect># -*- coding: utf-8 -*-</protect>
 <protect>## $Id$</protect>
 <protect>## DO NOT EDIT THIS FILE!  IT WAS AUTOMATICALLY GENERATED FROM CDSware WML SOURCES.</protect>
 """
 BibRank ranking daemon.
 
 Usage: %s [options]
      Ranking examples:
        %s -wjif -a --id=0-30000,30001-860000 --verbose=9
        %s -wjif -d --modified='2002-10-27 13:57:26'
-       %s -wjif --rebalance --collection=Articles
-       %s -wsbr -a -i 234-250,293,300-500 -u admin@cdsware
+       %s -wwrd --rebalance --collection=Articles
+       %s -wwrd -a -i 234-250,293,300-500 -u admin@cdsware
  
  Ranking options:
  -w, --run=r1[,r2]         runs each rank method in the order given
 
  -c, --collection=c1[,c2]  select according to collection
  -i, --id=low[-high]       select according to doc recID
  -m, --modified=from[,to]  select according to modification date
  -l, --lastupdate          select according to last update
 
  -a, --add                 add or update words for selected records
  -d, --del                 delete words for selected records
  -S, --stat                show statistics for a method
  
- -R, --rebalance           rebalancing rank data: does complete update. if not used: quick update
+ -R, --recalculate         recalculate weigth data, used by word frequency method
+                           should be used if ca 1% of the document has been changed
+                           since last time -R was used
  Repairing options:
  -k,  --check              check consistency for all records in the table(s)
                            check if update of ranking data is necessary
  -r, --repair              try to repair all records in the table(s)
  Scheduling options:
  -u, --user=USER           user name to store task, password needed
  -s, --sleeptime=SLEEP     time after which to repeat tasks (no)
                             e.g.: 1s, 30m, 24h, 7d
  -t, --time=TIME           moment for the task to be active (now)
                             e.g.: +15s, 5m, 3h , 2002-10-27 13:57:26
  General options:
  -h, --help                print this help and exit
  -V, --version             print version and exit
  -v, --verbose=LEVEL       verbose level (from 0 to 9, default 1)
 """
 
 __version__ = "<: print generate_pretty_version_string('$Id$'); :>"
 
 ## fill config variables:
 pylibdir = "<LIBDIR>/python"
 
 try:
     from marshal import loads,dumps
     from zlib import compress,decompress
     from string import split,translate,lower,upper
     import getopt
     import getpass
     import string
     import os
     import sre
     import sys
     import time
     import MySQLdb
     import urllib
     import signal
     import tempfile
     import traceback
     import cStringIO
     import re
     import copy
     import types
     import ConfigParser
     import cdsware.search_engine
     
 except ImportError, e:
     import sys
 
 try:
     sys.path.append('%s' % pylibdir)
     from cdsware.dbquery import run_sql
     from cdsware.bibrank_tag_based_indexer import *
     from cdsware.bibrank_word_indexer import *
     from cdsware.access_control_engine import acc_authorize_action 
     from cdsware.search_engine import perform_request_search
 except ImportError, e:
     import sys
 
 task_id = -1 # the task id
 nb_char_in_line = 50  # for verbose pretty printing
 chunksize = 1000 # default size of chunks that the records will be treated by
 base_process_size = 4500 # process base size
 bibrank_options = {} # will hold task options
 
 def serialize_via_numeric_array_dumps(arr):
     return Numeric.dumps(arr)
 def serialize_via_numeric_array_compr(str):
     return compress(str)
 def serialize_via_numeric_array_escape(str):
     return MySQLdb.escape_string(str)
 def serialize_via_numeric_array(arr):
     """Serialize Numeric array into a compressed string."""
     return serialize_via_numeric_array_escape(serialize_via_numeric_array_compr(serialize_via_numeric_array_dumps(arr)))
 def deserialize_via_numeric_array(string):
     """Decompress and deserialize string into a Numeric array."""
     return Numeric.loads(decompress(string))
 def serialize_via_marshal(obj):
     """Serialize Python object via marshal into a compressed string."""
     return MySQLdb.escape_string(compress(dumps(obj)))
 def deserialize_via_marshal(string):
     """Decompress and deserialize string into a Python object via marshal."""
     return loads(decompress(string))
 
 def authenticate(user, header="BibRank Task Submission", action="runbibrank"):
     print header
     print "=" * len(header)
     if user == "":
         print>> sys.stdout, "\rUsername: ",
         user = string.strip(string.lower(sys.stdin.readline()))
     else:
         print>> sys.stdout, "\rUsername: ", user
     res = run_sql("select id,password from user where email=%s", (user,), 1)
     if not res:
         print "Sorry, %s does not exist." % user
         sys.exit(1)
     else:
         (uid_db, password_db) = res[0]
         if password_db:
             password_entered = getpass.getpass()
             if password_db == password_entered:
                 pass
             else:
                 print "Sorry, wrong credentials for %s." % user
                 sys.exit(1)
         (auth_code, auth_message) = acc_authorize_action(uid_db, action)
         if auth_code != 0:
             print auth_message
             sys.exit(1)
     return user
 
 def usage(code, msg=''):
     "Prints usage for this module."
     if msg:
         sys.stderr.write("Error: %s.\n" % msg)
 <protect>
     print >> sys.stderr, \
     """ Usage: %s [options]
      Ranking examples:
        %s -wjif -a --id=0-30000,30001-860000 --verbose=9
        %s -wjif -d --modified='2002-10-27 13:57:26'
        %s -wjif --rebalance --collection=Articles
        %s -wsbr -a -i 234-250,293,300-500 -u admin@cdsware
  
  Ranking options:
  -w, --run=r1[,r2]         runs each rank method in the order given
 
  -c, --collection=c1[,c2]  select according to collection
  -i, --id=low[-high]       select according to doc recID
  -m, --modified=from[,to]  select according to modification date
  -l, --lastupdate          select according to last update
 
  -a, --add                 add or update words for selected records
  -d, --del                 delete words for selected records
  -S, --stat                show statistics for a method
  
- -R, --rebalance           rebalancing rank data: does complete update. if not used: quick update
+ -R, --recalculate         recalculate weigth data, used by word frequency method
+                           should be used if ca 1%% of the document has been changed
+                           since last time -R was used
  Repairing options:
  -k,  --check              check consistency for all records in the table(s)
                            check if update of ranking data is necessary
  -r, --repair              try to repair all records in the table(s)
  Scheduling options:
  -u, --user=USER           user name to store task, password needed
  -s, --sleeptime=SLEEP     time after which to repeat tasks (no)
                             e.g.: 1s, 30m, 24h, 7d
  -t, --time=TIME           moment for the task to be active (now)
                             e.g.: +15s, 5m, 3h , 2002-10-27 13:57:26
  General options:
  -h, --help                print this help and exit
  -V, --version             print version and exit
  -v, --verbose=LEVEL       verbose level (from 0 to 9, default 1)
     """ % ((sys.argv[0],) * 5)
 </protect>
     sys.exit(code)
 
 def get_datetime(var, format_string="%Y-%m-%d %H:%M:%S"):
     """Returns a date string according to the format string.
        It can handle normal date strings and shifts with respect
        to now."""
     date = time.time()
     shift_re = sre.compile("([-\+]{0,1})([\d]+)([dhms])")
     factors = {"d":24*3600, "h":3600, "m":60, "s":1}
     m = shift_re.match(var)
     if m:
         sign = m.groups()[0] == "-" and -1 or 1
         factor = factors[m.groups()[2]]
         value = float(m.groups()[1])
         date = time.localtime(date + sign * factor * value)
         date = time.strftime(format_string, date)
     else:
         date = time.strptime(var, format_string)
         date = time.strftime(format_string, date)
     return date
 
 def task_sig_sleep(sig, frame):
     """Signal handler for the 'sleep' signal sent by BibSched."""
     if bibrank_options["verbose"]>= 9:
         write_message("got signal %d" % sig)
     write_message("sleeping...")
     task_update_status("SLEEPING")
     signal.pause() # wait for wake-up signal
 
 def task_sig_wakeup(sig, frame):
     """Signal handler for the 'wakeup' signal sent by BibSched."""
     if bibrank_options["verbose"]>= 9:
         write_message("got signal %d" % sig)
     write_message("continuing...")
     task_update_status("CONTINUING")
 
 def task_sig_stop_commands():
     """Do all the commands necessary to stop the task before quitting.
     Useful for task_sig_stop() handler.
     """
     write_message("stopping commands started")
     write_message("stopping commands ended")
 
 def task_sig_suicide(sig, frame):
     """Signal handler for the 'suicide' signal sent by BibSched."""
     if bibrank_options["verbose"]>= 9:
         write_message("got signal %d" % sig)
     write_message("suiciding myself now...")
     task_update_status("SUICIDING")
     write_message("suicided")
     task_update_status("SUICIDED")
     sys.exit(0)
 
 def task_sig_unknown(sig, frame):
     """Signal handler for the other unknown signals sent by shell or user."""
     if bibrank_options["verbose"]>= 9:
         write_message("got signal %d" % sig)
     write_message("unknown signal %d ignored" % sig) # do nothing for other signals
 
 def task_update_progress(msg):
     """Updates progress information in the BibSched task table."""
     query = "UPDATE schTASK SET progress='%s' where id=%d" % (MySQLdb.escape_string(msg), task_id)
     if bibrank_options["verbose"]>= 9:
         write_message(query)
     run_sql(query)
     return
 
 def task_update_status(val):
     """Updates state information in the BibSched task table."""
     query = "UPDATE schTASK SET status='%s' where id=%d" % (MySQLdb.escape_string(val), task_id)
     if bibrank_options["verbose"]>= 9:
         write_message(query)
     run_sql(query)
     return
 
 def split_ranges(parse_string):
     recIDs = []
     ranges = string.split(parse_string, ",")
     for range in ranges:
         tmp_recIDs = string.split(range, "-")
         
         if len(tmp_recIDs)==1:
             recIDs.append([int(tmp_recIDs[0]), int(tmp_recIDs[0])])
         else:
             if int(tmp_recIDs[0]) > int(tmp_recIDs[1]): # sanity check
                 tmp = tmp_recIDs[0]
                 tmp_recIDs[0] = tmp_recIDs[1]
                 tmp_recIDs[1] = tmp
             recIDs.append([int(tmp_recIDs[0]), int(tmp_recIDs[1])])
     return recIDs
 
 def get_date_range(var):
     "Returns the two dates contained as a low,high tuple"
     limits = string.split(var, ",")
     if len(limits)==1:
         low = get_datetime(limits[0])
         return low,None
     if len(limits)==2:
         low = get_datetime(limits[0])
         high = get_datetime(limits[1])
         return low,high
 
 def command_line():
     """Storing the task together with the parameters given."""
     global bibrank_options
     long_flags = ["lastupdate","add","del","repair","maxmem", "flush","stat", "rebalance", "id=", "collection=", "check", "modified=", "update", "run=", "user=", "sleeptime=", "time=", "help", "version", "verbose="]
     short_flags = "ladSi:m:c:kUrRM:f:w:u:s:t:hVv:"
     format_string = "%Y-%m-%d %H:%M:%S"
     sleeptime = ""
     try:
         opts, args = getopt.getopt(sys.argv[1:], short_flags, long_flags)
     except getopt.GetoptError, err:
         write_message(err, sys.stderr)
         usage(1)
     if args:
         usage(1)
     bibrank_options = {"quick":"yes","cmd":"add","flush":100000,"validset":"", "collection":[], "id":[], "check": "", "stat":"", "modified":"", "last_updated":"last_updated","run":"", "verbose":1}
 
     res = run_sql("SELECT name from rnkMETHOD")
     bibrank_options["run"] = []
     for (name,) in res:
         bibrank_options["run"].append(name)
 
     sched_time = time.strftime(format_string)
     user = ""
     try:
         for opt in opts:
             if opt == ("-h","") or opt == ("--help",""):
                 usage(1)
             elif opt == ("-V","") or opt == ("--version",""):
                 print __version__
                 sys.exit(1)
             elif opt[0] in ["--verbose", "-v"]:
                 bibrank_options["verbose"] = int(opt[1])
             elif opt == ("-a","") or opt == ("--add",""):
                 bibrank_options["cmd"] = "add"
                 if ("-x","") in opts or ("--del","") in opts:
                     usage(1)
             elif opt[0] in ["--run", "-w"]:
                 bibrank_options["run"] = []
                 run = split(opt[1],",")
                 for key in range(0,len(run)):
                     bibrank_options["run"].append(run[key])
             elif opt == ("-r","") or opt == ("--repair",""):
                 bibrank_options["cmd"] = "repair"
             elif opt == ("-d","") or opt == ("--del",""):            
                 bibrank_options["cmd"]="del"
             elif opt[0] in [ "-u", "--user"]:
                 user = opt[1]
             elif opt[0] in [ "-k", "--check"]:
                 bibrank_options["cmd"]= "check" 
             elif opt[0] in [ "-S", "--stat"]:
                 bibrank_options["cmd"] = "stat"
             elif opt[0] in [ "-i", "--id" ]:
                 bibrank_options["id"] = bibrank_options["id"] + split_ranges(opt[1])
                 bibrank_options["last_updated"] = ""
             elif opt[0] in [ "-c", "--collection" ]:
                 bibrank_options["collection"] = opt[1]
             elif opt[0] in [ "-R", "--rebalance"]: 
                  bibrank_options["quick"] = "no"   
             elif opt[0] in [ "-f", "--flush"]:     
                 bibrank_options["flush"]=int(opt[1])
             elif opt[0] in [ "-M", "--maxmem"]:
                 bibrank_options["maxmem"]=int(opt[1])
                 if bibrank_options["maxmem"] < base_process_size + 1000:
                     raise StandardError, "Memory usage should be higher than %d kB" % (base_process_size + 1000)
             elif opt[0] in [ "-m", "--modified" ]:
                 bibrank_options["modified"] = get_date_range(opt[1]) #2002-10-27 13:57:26
                 bibrank_options["last_updated"] = ""
             elif opt[0] in [ "-l", "--lastupdate" ]:
                 bibrank_options["last_updated"] = "last_updated"
             elif opt[0] in [ "-s", "--sleeptime" ]:
                 get_datetime(opt[1]) # see if it is a valid shift
                 sleeptime=opt[1]
             elif opt[0] in [ "-t", "--time" ]:
                 sched_time = get_datetime(opt[1])
             else:
                 usage(1)
     except StandardError, e:
         write_message(e, sys.stderr)
         sys.exit(1)
              
     user = authenticate(user)
     if bibrank_options["verbose"]>=9:
         write_message("Storing task options %s" % bibrank_options)
 
     new_task_id = run_sql("""INSERT INTO schTASK (proc,user,runtime,sleeptime,arguments,status) VALUES ('bibrank',%s,%s,%s,%s,'WAITING')""", (user, sched_time, sleeptime, dumps(bibrank_options)))
 
     print "Task #%d was successfully scheduled for execution." % new_task_id
     return
 
 def task_run(row):
     """Run the indexing task. The row argument is the BibSched task
     queue row, containing if, arguments, etc.
     Return 1 in case of success and 0 in case of failure.
     """
     global task_id, bibrank_options
     task_id = row[0]
     task_proc = row[1]
     bibrank_options = loads(row[6])
     task_status = row[7]
 
 
     # install signal handlers
     signal.signal(signal.SIGUSR1, task_sig_sleep)
     signal.signal(signal.SIGTERM, task_sig_stop)
     signal.signal(signal.SIGABRT, task_sig_suicide)
     signal.signal(signal.SIGCONT, task_sig_wakeup)
     signal.signal(signal.SIGINT, task_sig_unknown)
 
     if task_proc != "bibrank":
         write_message("-The task #%d does not seem to be a BibRank task." % task_id, sys.stderr)
         return 0
     if task_status != "WAITING":
         write_message("The task #%d is %s. I expected WAITING." % (task_id, task_status), sys.stderr)
         return 0
     if bibrank_options["verbose"]:
         write_message("Task #%d started." % task_id)
     task_update_status("RUNNING")
     try:  
         bibrank_options = marshal.loads(row[6])
         for key in bibrank_options["run"]:
             write_message("")
             file = etcdir + "/bibrank/" + key + ".cfg"
             if bibrank_options["verbose"] >= 9:
                 write_message("Getting configuration from file: %s" % file)
             config = ConfigParser.ConfigParser()
             try:
                 config.readfp(open(file))
             except StandardError, e:
                 write_message("Cannot find configurationfile: %s. The rankmethod may also not be registered using the BibRank Admin Interface." % file, sys.stderr)
                 raise StandardError
 
             #Using the function variable to call the function related to the rank method
             cfg_function = config.get("rank_method", "function")
             func_object = globals().get(cfg_function)
             if func_object:
                 func_object(row, key)
             else:
                 write_message("Cannot run method '%s', no function to call" % key)  
     except StandardError, e:
         write_message("\nException caught: %s" % e, sys.stderr)
         traceback.print_tb(sys.exc_info()[2])
         task_update_status("ERROR")
         sys.exit(1)
 
     task_update_status("DONE")
     if bibrank_options["verbose"]:
         write_message("Task #%d finished." % task_id)
     return 1
 
 def main():
     if len(sys.argv) == 2:
         try:
             id = int(sys.argv[1])
         except StandardError, err:
             command_line()
             sys.exit()
         res = run_sql("SELECT * FROM schTASK WHERE id='%d'" % (id), None, 1)
         if not res:
             write_message("Selected task not found.", sys.stderr)
             sys.exit(1)
         try:
             if not task_run(res[0]):
                 write_message("Error occurred. Exiting.", sys.stderr)
         except StandardError, e:
             write_message("Unexpected error occurred: %s." % e, sys.stderr)
             write_message("Traceback is:")
             traceback.print_tb(sys.exc_info()[2])
             write_message("Exiting.")
             task_update_status("ERROR")
     else:
         command_line()
         
 if __name__ == "__main__":
     main()
   
diff --git a/modules/bibrank/doc/admin/guide.html.wml b/modules/bibrank/doc/admin/guide.html.wml
index f973032de..1dab3b4fa 100644
--- a/modules/bibrank/doc/admin/guide.html.wml
+++ b/modules/bibrank/doc/admin/guide.html.wml
@@ -1,564 +1,566 @@
 ## $Id$
 
 ## This file is part of the CERN Document Server Software (CDSware).
 ## Copyright (C) 2002 CERN.
 ##
 ## The CDSware is free software; you can redistribute it and/or
 ## modify it under the terms of the GNU General Public License as
 ## published by the Free Software Foundation; either version 2 of the
 ## License, or (at your option) any later version.
 ##
 ## The CDSware is distributed in the hope that it will be useful, but
 ## WITHOUT ANY WARRANTY; without even the implied warranty of
 ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 ## General Public License for more details.
 ##
 ## You should have received a copy of the GNU General Public License
 ## along with CDSware; if not, write to the Free Software Foundation, Inc.,
 ## 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA.
 
 #include "cdspage.wml" \
     title="BibRank Admin Guide" \
     navtrail_previous_links="<a class=navtrail href=<WEBURL>/admin/<lang:star: index.*.html>><MSG_ADMIN_AREA></a> &gt; <a class=navtrail href=<WEBURL>/admin/bibrank/>BibRank Admin</a>" \ 
     navbar_name="admin" \
     navbar_select="bibrank-admin-guide"
 
 <p>Version <: print generate_pretty_revision_date_string('$Id$'); :>
 
 <h2>Contents</h2>
 <strong>1.<a href="#o">Overview</a></strong><br>
 <strong>2.<a href="#c">Configuration Conventions</a></strong><br>
 <strong>3.<a href="#bai">BibRank Admin Interface</a></strong><br>
 &nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp; 3.1.<a href="#mi">Main interface</a><br>
 &nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp; 3.2.<a href="#ar">Add rank method</a><br>
 &nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp; 3.3.<a href="#sd">Show details of rank method</a><br>
 &nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp; 3.4.<a href="#mr">Modify rank method</a><br>
 &nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp; 3.5.<a href="#dr">Delete rank method</a><br>
 &nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp; 3.6.<a href="#mt">Modify translations</a><br>
 &nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp; 3.7.<a href="#mc">Modify visibility toward collections</a><br>
 <strong>4.<a href="#bd">BibRank Daemon</a></strong><br>
 &nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp; 4.1.<a href="#cli1">Command Line Interface</a><br>
 &nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp; 4.2.<a href="#ubd">Using BibRank</a><br>
 <strong>5.<a href="#brm">BibRank Methods</a></strong><br>
 &nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;5.1.<a href="#str">Single tag rank method</a><br>
 &nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;5.2.<a href="#wrd">Word Similarity/Similar Records</a><br>
 &nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;5.3.<a href="#cmb">Combined method</a><br>
 <strong>6.<a href="#bt">bibrankgkb Tool</a></strong><br>
 &nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;6.1.<a href="#cli2">Command Line Interface</a><br>
 &nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;6.2.<a href="#ubt">Using bibrankgkb</a><br>
 <strong>7.<a href="#ainf">Additional Information</a></strong><br>
 
 <a name="o"></a><h2>1. Overview</h2>
 <p>The bibrank module consist currently of two tools:
 <br><br>bibrank - Generates ranking data for ranking search results based on methods like:
 <blockquote>
 <pre>
 Journal Impact Factor
 Word Similarity/Similar Records
 Combined Method
 ##Number of downloads
 ##Author Impact
 ##Citation Impact
 </blockquote>
 </pre>
 bibrankgkb - For generating knowledge base files for use with bibrank
 <br><br>
 The bibrankgkb may not be necessary to use, it depends on which ranking methods you are planning
 to use, and what data you already got. This guide will take you through the necessary steps in detail in 
 order to create different kinds of ranking methods for the search engine to use.
 
 <a name="c"></a><h2>2. Configuration Conventions</h2>
 <blockquote>
 <pre>
 - comment line starts with '#' sign in the first column
 - each section in a configuration file is declared inside '[' ']' signs 
 - values in knowledgebasefiles are separated by '---' 
 </blockquote>
 </pre>
 
 <a name="bai"></a><h2>3. BibRank Admin Interface</h2>
 The bibrank web interface enables you to modify the configuration of most aspects of BibRank. For full functionality, it is advised to
 let the http-daemon have write/read access to your cdsware/etc/bibrank directory. If this is not wanted, you have to edit the configuration files from the console using your favourite text editor.
 <a name="mi"></a><h3>3.1 Main interface</h3>
 In the main interface screen, you see a list of all rank methods currently added. Each rank method is identified by the rank method code. To find out about the functionality available, check out the topics below.
 
 </br></br><b>Explanation of concepts</b>
 <blockquote>
 <pre>
 Rank method:
 A method responsible for creating the necessary data to rank a result.
 Translations:
 Each rank method may have many names in many languages.
 Collections:
 Which collections the rank method should be visible in.
 </pre>
 </blockquote>
 
 <a name="ar"></a><h3>3.2 Add rank method</h3>
 When pressing the link in the upper right corner from the main interface, you will see the interface for adding a new rank method. The two available options that needs to be decided upon, are the bibrank code and the template to use, both values can be changed later. The bibrank code is used by the bibrank daemon to run the method, and should be fairly short without spaces. Which template you are using, decides how the ranking will be done, and must before used, be changed to suit your cdsware configuration. When confirming to add a new rank method, it will be added to the list of possible rank methods, and a configuration file will be created if the httpd user has proper rights to the 'cdsware/etc/bibrank' directory. If not, the file has to manually be created with the name 'bibrankcode.cfg' where bibrankcode is the same as given in the interface.
 
 <a name="sd"></a><h3>3.3 Show details of rank method</h3>
 This interface gives you an overview of the current status of the rank method, and gives direct access to the various interfaces for changing the configuration.
 In the overview section, you see the bibrank code, for use with the bibrank daemon, and the date for the last run of the rank method.
 In the statistics section you see how many records have been added to the rank method and other statistic data. In the collection part, the collections which the rank method is visible to is shown. The translations part shows the various translations in the languages available in cdsware. On the bottom the configuration file is shown, if accessible.
 
 <a name="mr"></a><h3>3.4 Modify rank method</h3>
 This interface gives access to modify the bibrank code given when creating the rank method and the configuration file of the rank method, if the file can be accessed. If not, it may not exist, or the httpd user doesn't have enough rights to read the file. On the bottom of the interface, it is possible to choose a template, see it, and copy it over the old rank method configuration if wanted. Remember that the values present in the template is an example, and must be changed where necessary. See this documentation for information about this, and the 'BibRank Internals' link below for additional information.
 
 <a name="dr"></a><h3>3.5 Delete rank method</h3>
 If it is necessary to delete a rank method, some precautions must be taken since the configuration of the method will be lost. When deleting a rank method, the configuration file will also be deleted ('cdsware/etc/bibrank/bibrankcode.cfg' where bibrankcode is the code of the rank method) if accessible to the httpd user. If not, the file can be deleted manually from console. Any bibrank tasks scheduled to run the deleted rank method must be modified or deleted manually.
 
 <a name="mt"></a><h3>3.6 Modify translations</h3>
 If you want to use internalisation of the rank method names, you have to add them using the 'Modify translations' interface. Below a list of all the languages used in the cdsware installation will be shown with the possibility to add the translation for each language.
 
 <a name="mc"></a><h3>3.7 Modify visibility toward collections</h3>
 If a rank method should be visible to the users of the cdsware search interface, it must be enabled for one or several collections. A rank method can be visible in the search interface of the whole site, or just one collection. The collections in the upper list box does not show the rank method in the search interface to the user. To change this select the wanted collection and press 'Enable' to enable the rank method for this collection. The collections that the method has been activated for, is  shown in the lower list box. To remove a collection, select it and press the 'Disable' button to remove it from the list of collections which the rank method is enabled for.
 
 <a name="bd"></a><h2>4. BibRank Daemon</h2>
 The bibrank daemon read the necessary metadata from the cdsware database and combines the read metadata
 in different ways to create the ranking data necessary at searchtime to fast be able to rank the results.
 <a name="cli1"></a><h3>4.1 Command Line Interface</h3>
 <blockquote>
 <pre>
 Usage bibrank:
        bibrank -wjif -a --id=0-30000,30001-860000 --verbose=9
        bibrank -wjif -d --modified='2002-10-27 13:57:26'
-       bibrank -wjif --rebalance --collection=Articles
+       bibrank -wwrd --recalculate --collection=Articles
        bibrank -wwrd -a -i 234-250,293,300-500 -u admin@cdsware
 
  Ranking options:
  -w, --run=r1[,r2]         runs each rank method in the order given
 
  -c, --collection=c1[,c2]  select according to collection
  -i, --id=low[-high]       select according to doc recID
  -m, --modified=from[,to]  select according to modification date
  -l, --lastupdate          select according to last update
 
  -a, --add                 add or update words for selected records
  -d, --del                 delete words for selected records
  -S, --stat                show statistics for a method
 
- -R, --rebalance           rebalancing rank data: does complete update. if not used: quick update
+ -R, --recalculate         recalculate weigth data, used by word frequency method
+                           should be used if ca 1% of the document has been changed
+                           since last time -R was used
  Repairing options:
  -k,  --check              check consistency for all records in the table(s)
                            check if update of ranking data is necessary
  -r, --repair              try to repair all records in the table(s)
  Scheduling options:
  -u, --user=USER           user name to store task, password needed
  -s, --sleeptime=SLEEP     time after which to repeat tasks (no)
                             e.g.: 1s, 30m, 24h, 7d
  -t, --time=TIME           moment for the task to be active (now)
                             e.g.: +15s, 5m, 3h , 2002-10-27 13:57:26
  General options:
  -h, --help                print this help and exit
  -V, --version             print version and exit
  -v, --verbose=LEVEL       verbose level (from 0 to 9, default 1)
 </pre>
 </blockquote>
 
 <a name="ubd"></a><h3>4.2 Using BibRank</h3>
 
 <h4>Step 1 - Adding the rank option to the search interface</h4>
 To be able to add the needed ranking data to the database, you first have to add the rank method to the database, and
 add the wished code you want to use together with it. The name of the configuration file in the next section, needs to
 have the same name as the code stored in the database.
 
 <h4>Step 2 - Get necessary external data (ex. jif values)</h4>
 Find out what is necessary of data for each method. The bibrankgkb documentation below may be of assistance.
 
 <br><br><b>Example of necessary data</b> (<code>jif.kb</code> - journal impact factor knowledge base)
 <blockquote>
 <pre>
 Phys. Rev., D---3.838
 Phys. Rev. Lett.---6.462
 Phys. Lett., B---4.213
 Nucl. Instrum. Methods Phys. Res., A---0.964
 J. High Energy Phys.---8.664
 </pre>
 </blockquote>
 
 <h4>Step 3 - Modify the configuration file</h4>
 The configuration files for the different rank methods has different option, so verify that you are using the correct
 configuration file and rank method. A template for each rank method exists as examples, but may not work on all configurations of CDSware. 
 For a description of each rank method and the configuration necessary, check section 6 below.
 
 <h4>Step 4 - Add the ranking method as a scheduled task</h4>
 When the configuration is okay, you can add the bibrank daemon to the task scheduler using the scheduling options. The daemon can then do a update of the rank method once each day or similar automatically.
 <br><br><b>Example</b>
 <blockquote>
 <pre>
 $ bibrank -wjif -r
 Task #53 was successfully scheduled for execution.
 </pre>
 </blockquote>
 It is adviced to run the BibRank daemon using no parameters, since the default settings then will be used.
 <br><br><b>Example</b>
 <blockquote>
 <pre>
 $ bibrank
 Task #2505 was successfully scheduled for execution.
 </pre>
 </blockquote>
 
 <h4>Step 5 - Running bibrank manually</h4>
 If BibRank is scheduled without any parameters, and no records has been modified, you may get a output like shown below.
 
 <br><br><b>Example</b>
 <blockquote>
 <pre>
 $ bibrank 2505
 2004-09-07 17:51:46 --> Task #2505 started.
 2004-09-07 17:51:46 -->
 2004-09-07 17:51:46 --> Running rank method: Number of downloads.
 2004-09-07 17:51:47 --> No new records added since last time method was run
 2004-09-07 17:52:10 -->
 2004-09-07 17:52:10 --> Running rank method: Journal Impact Factor.
 2004-09-07 17:52:10 --> No new records added since last time method was run
 2004-09-07 17:52:11 --> Reading knowledgebase file: /soft/cdsware-CDSCERNWIENERDEV/etc/bibrank/cern_jif.kb
 2004-09-07 17:52:11 --> Number of lines read from knowledgebase file: 420
 2004-09-07 17:52:11 --> Number of records available in rank method: 0
 2004-09-07 17:52:12 -->
 2004-09-07 17:52:12 --> Running rank method: Word frequency
 2004-09-07 17:52:13 --> rnkWORD01F contains 256842 words from 677912 records
 2004-09-07 17:52:14 --> rnkWORD01F is in consistent state
 2004-09-07 17:52:14 --> Using the last update time for the rank method
 2004-09-07 17:52:14 --> No new records added. rnkWORD01F is up to date
 2004-09-07 17:52:14 --> rnkWORD01F contains 256842 words from 677912 records
 2004-09-07 17:52:14 --> rnkWORD01F is in consistent state
 2004-09-07 17:52:14 --> Task #2505 finished.
 </pre>
 </blockquote>
 
 <h4>Step 6 - Fast update of modified records</h4>
-If you just want to update the latest additions or modified records, you may want to do a faster update by running the daemon without the rebalance option. (the rebalance option is default). This may cause lower accurancy when ranking.
+If you just want to update the latest additions or modified records, you may want to do a faster update by running the daemon without the recalculate option. (the recalculate option is off by default). This may cause lower accurancy when ranking.
 
 <a name="brm"></a><h2>5. BibRank Methods</h2>
 Each BibRank method has a configuration file which contains different parameters and sections necessary to do the ranking.
 <a name="str"></a><h3>5.1 Single tag rank method</h3>
 This method uses one MARC tag together with a file containing possible values for this MARC tag together with a ranking value. This data is used to create a structure containing the record id associated with the ranking value based on the content of the tag. The method can be used for various ways of ranking like ranking by Journal Impact Factor, or use it to let certain authors always appear top of a search.
 The parameters needed to be configured for this method is the 'tag','kb_src' and 'check_mandatory_tags'.
 <br><br><b>Example</b>
 <blockquote>
 <pre>
 <protect>
 [rank_method]
 function = single_tag_rank_method
 
 [single_tag_rank]
 tag = 909C4p
 kb_src = /usr/local/cdsware-DEMO/etc/bibrank/jif.kb
 check_mandatory_tags = 909C4c,909C4v,909C4y
 </protect>
 </pre>
 </blockquote>
 
 <b>Explanation:</b>
 <pre>
 <blockquote>
 <protect>
 [rank_method]
 ##The function which is responsible for doing the work. Should not be changed
 function = single_tag_rank_method
 
 ##This section must be available if the single_tag_rank_method is going to be used
 [single_tag_kb]
 
 ##The tag which got the value to be searched for on the left side in the kb file (like the journal name)
 tag = 909C4p
 
 ##The path to the kb file which got the content of the tag above on left side, and value on the left side
 kb_src = /log/cdsware-DEMODEV/etc/bibrank/jif.kb
 
 ##Tags that must be included for a record to be added to the ranking data, to disable remove tags
 check_mandatory_tags = 909C4c,909C4v,909C4y
 </protect>
 </blockquote>
 The kb_src file must contain data on the form:
 <blockquote>
 Phys. Rev., D---3.838
 Phys. Rev. Lett.---6.462
 Phys. Lett., B---4.213
 Nucl. Instrum. Methods Phys. Res., A---0.964
 J. High Energy Phys.---8.664
 </blockquote>
 The left side must match the content of the tag mentioned in the tag variable.
 </pre>
 <a name="wrd"></a><h3>5.2 Word Similarity/Similar Records</h3>
 The Word Similarity/Similar Records method uses the content of the tags selected to determine which records is most relevant to a query, or most similar to a selected record. This method got a lot of parameters to configure, and it may need some tweaking to get the best result. The BibRank code for this method has to be 'wrd' for it to work. For best result, it is adviced to install the stemming module mentioned in INSTALL, and use a stopword list containing stopwords in the languages the records exists in. The stemmer and stopword list is used to get better results and to limit the size of the index, thus making ranking faster and more accurate. For best result with the stemmer, it is important to mark each tag to be used with the most common language the value of the tag may be in. It is adviced to not change the 'function','table' and the parameters under [find_similar]. If the stemmer is not installed, to assure that no problems exists, the 'stem_if_avail' parameter should be set to 'no'. Each tag to be used by the method has to be given a point. The number of points describes how important one word is in this tag.
 
-When running BibRank to update the index for this rank method, it is not necessary to rebalance each time, but when large number of records has been updated/added, it can be wise to recalculate using the rebalance parameter of BibRank.
+When running BibRank to update the index for this rank method, it is not necessary to recalculate each time, but when large number of records has been updated/added, it can be wise to recalculate using the recalculate parameter of BibRank.
 <br><br><b>Example</b>
 <pre>
 <blockquote>
 <protect>
 [rank_method]
 function = word_similarity
 
 [word_similarity]
 stemming = en
 table = rnkWORD01F
 stopword = True
 relevance_number_output_prologue = (
 relevance_number_output_epilogue = )
 #relevance_number_output_prologue = <!-
 #relevance_number_output_epilogue = -->
 #MARC tag,tag points, tag language
 tag1 = 6531_a, 2, en
 tag2 = 695__a, 1, en
 tag3 = 6532_a, 1, en
 tag4 = 245__%, 10, en
 tag5 = 246_%, 1, fr
 tag6 = 250__a, 1, en
 tag7 = 711__a, 1, en
 tag8 = 210__a, 1, en
 tag9 = 222__a, 1, en
 tag10 = 520__%, 1, en
 tag11 = 590__%, 1, fr
 tag12 = 111__a, 1, en
 tag13 = 100__%, 2, none
 tag14 = 700__%, 1, none
 tag15 = 721__a, 1, none
 
 
 [find_similar]
 max_word_occurence = 0.05
 min_word_occurence = 0.00
 min_word_length = 3
 min_nr_words_docs = 3
 max_nr_words_upper = 20
 max_nr_words_lower = 10
 default_min_relevance = 75
 </protect>
 </blockquote>
 </pre>
 
 <b>Explanation:</b>
 <pre>
 <blockquote>
 <protect>
 [rank_method]
 #internal name for the bibrank program, do not modify
 function = word_similarity
 
 [word_similarity]
 #if stemmer is available, default stemminglanguage should be given here. Adviced to turn off if not installed
 stemming = en
 #the internal table to load the index tables from.
 table = rnkWORD01F
 #remove stopwords? 
 stopword = True
 #text to show before the rank value when the search result is presented. <-- to hide result
 relevance_number_output_prologue = (
 #text to show after the rank value when the search result is presented. --> to hide result
 relevance_number_output_epilogue = )
 
 #MARC tag,tag points, tag language
 #a list of the tags to be used, together with a number describing the importance of the tag, and the 
 #most common language for the content. Not all languages are supported. Among the supported ones are: 
 #fr/french, en/english, no/norwegian, se/swedish, de/german, it/italian, pt/portugese
 
 #keyword
 tag1 = 6531_a, 1, en #keyword
 tag2 = 695__a, 1, en #keyword
 tag3 = 6532_a, 1, en #keyword
 tag4 = 245__%, 10, en #title, the words in the title is usually describing a record very good.
 tag5 = 246_% , 1, fr #french title
 tag6 = 250__a, 1, en #title
 tag7 = 711__a, 1, en #title
 tag8 = 210__a, 1, en #abbreviated
 tag9 = 222__a, 1, en #key title
 
 [find_similar]
 #term should exist in maximum X/100% of documents
 max_word_occurence = 0.05
 #term should exist in minimum X/100% of documents
 min_word_occurence = 0.00
 #term should be atleast 3 characters long
 min_word_length = 3
 #term should be in atleast 3 documents or more
 min_nr_words_docs = 3
 #do not use more than 20 terms for "find similar"
 max_nr_words_upper = 20
 #if a document contains less than 10 terms, use much used terms too, if not ignore them
 max_nr_words_lower = 10
 #default minimum relevance value to use for find similar
 default_min_relevance = 75
 </protect>
 </blockquote>
 Tip: When executing a search using a ranking method, you can add "verbose=1" to the list of parameteres
 in the URL to see which terms have been used in the ranking.
 </pre>
 
 <a name="cmb"></a><h3>5.3 Combine method</h3>
 The 'Combine method' is running each method mentioned in the config file and adding the score together
 based on the importance of the method given by the percentage.
 <br><br><b>Example</b>
 <pre>
 <blockquote>
 <protect>
 [rank_method]
 function = combine_method
 [combine_method]
 method1 = cern_jif,33
 method2 = cern_acc,33
 method3 = wrd,33
 relevance_number_output_prologue = (
 relevance_number_output_epilogue = )
 </protect>
 </blockquote>
 </pre>
 
 <b>Explanation:</b>
 <pre>
 <blockquote>
 <protect>
 [rank_method]
 #tells which method to use, do not change
 function = combine_method
 [combine_method]
 #each line tells which method to use, the code is the same as in the BibRank interface, the number describes how 
 #much of the total score the method should count.
 method1 = jif,50
 method2 = wrd,50
 #text to be shown before the rank value on the search result screen.
 relevance_number_output_prologue = (
 #text to be shown after the rank value on the search result screen.
 relevance_number_output_epilogue = )
 </protect>
 </blockquote>
 </pre>
 
 <a name="bt"></a><h2>6. bibrankgkb Tool</h2>
 For some ranking methods, like the single_tag_rank method, a knowledge base file (kb) with the needed data in the correct format is necessary. This file can be created using the bibrankgkb tool which can read the data either from
 the cdsware database, from several web pages using regular expressions, or from another file. In case one source
 has another naming convention, bibrank can convert between them using a convert file.
 <a name="cli2"></a><h3>6.1 Command Line Interface</h3>
 
 <blockquote>
 <pre>
 Usage: bibrankgkb %s [options]
      Examples:
        bibrankgkb --input=bibrankgkb.cfg --output=test.kb
        bibrankgkb -otest.cfg -v9
        bibrankgkb 
 
  Generate options:
  -i,  --input=file          input file, default from /etc/bibrank/bibrankgkb.cfg
  -o,  --output=file         output file, will be placed in current folder
  General options:
  -h,  --help                print this help and exit
  -V,  --version             print version and exit
  -v,  --verbose=LEVEL       verbose level (from 0 to 9, default 1)
 </blockquote>
 </pre>
 
 <a name="ubt"></a><h3>6.2 Using bibrankgkb</h3>
 
 <h4>Step 1 - Find sources</h4>
 Since some of the data used for ranking purposes is not freely available, it cannot be bundled with CDSware. To get hold of the necessary data,
 you may find it useful to ask your library if they have a copy of the data that can be used (like the Journal Impact Factors from the Science Citation Index), or use google to search the web for any public source.
 <h4>Step 2 - Create configuration file</h4>
 The default configuration file is shown below.
 <protect>
 <pre>
 <blockquote>
 
 ##The main section
 [bibrankgkb]
 ##The url to a web page with the data to be read, does not need to have the same name as this one, but if there
 are several links, the url parameter should end with _0-> 
 url_0 = http://www.taelinke.land.ru/impact_A.html
 url_1 = http://www.taelinke.land.ru/impact_B.html
 url_2 = http://www.taelinke.land.ru/impact_C.html
 url_3 = http://www.taelinke.land.ru/impact_DE.html
 url_4 = http://www.taelinke.land.ru/impact_FH.html
 url_5 = http://www.taelinke.land.ru/impact_I.html
 url_6 = http://www.taelinke.land.ru/impact_J.html
 url_7 = http://www.taelinke.land.ru/impact_KN.html
 url_8 = http://www.taelinke.land.ru/impact_QQ.html
 url_9 = http://www.taelinke.land.ru/impact_RZ.html
 ##The regular expression for the url mentioned should be given here
 url_regexp = 
 
 ##The various sources that can be read in, can either be a file, web page or from the database
 kb_1 = /home/trondaks/w/cdsware/modules/bibrank/etc/cern_jif.kb
 kb_2 = /home/trondaks/w/cdsware/modules/bibrank/etc/cdsware_jif.kb
 kb_2_filter = /home/trondaks/w/cdsware/modules/bibrank/etc/convert.kb
 kb_3 = SELECT id_bibrec,value FROM bib93x,bibrec_bib93x WHERE tag='938__f' AND id_bibxxx=id
 kb_4 = SELECT id_bibrec,value FROM bib21x,bibrec_bib21x WHERE tag='210__a' AND id_bibxxx=id
 ##This points to the url above (the common part of the url is 'url_' followed by a number
 kb_5 = url_%s
 
 ##This is the part that will be read by the bibrankgkb tool to determine what to read.
 ##The first two part (separated by ,,) gives where to look for the conversion file (which convert
 ##the names between to formats), and the second part is the data source. A conversion file is not
 ##needed, as shown in create_0. If the source is from a file, url or the database, it must be
 ##given with file,www or db. If several create lines exists, each will be read in turn, and added
 ##to a common kb file.
 ##So this means that:
 ##create_0: Load from file in variable kb_1 without converting
 ##create_1: Load from file in variable kb_2 using convertion from file kb_2_filter
 ##create_3: Load from www using url in variable kb_5 and regular expression in url_regexp
 ##create_4: Load from database using sql statements in kb_4 and kb_5
 create_0 = ,, ,,file,,%(kb_1)s
 create_1 = file,,%(kb_2_filter)s,,file,,%(kb_2)s
 #create_2 = ,, ,,www,,%(kb_5)s,,%(url_regexp)s
 #create_3 = ,, ,,db,,%(kb_4)s,,%(kb_4)s
 </pre>
 </blockquote>
 </protect>
 When you have found a source for the data, created the configuration file, it may be necessary to
 create an convertion file, but this depends on the conversions used in the available data versus
 the convertion used in your cdsware installation.
 <br>
 The available data may look like this:
 <pre>
 <blockquote>
 COLLOID SURFACE A---1.98
 </pre>
 </blockquote>
 But in cdsware you are using:
 <pre>
 <blockquote>
 Colloids Surf., A---1.98
 </pre>
 </blockquote>
 By using a convertion file like:
 <pre>
 <blockquote>
 COLLOID SURFACE A---Colloids Surf., A
 </pre>
 </blockquote>
 You can convert the source to the correct naming convention.
 <pre>
 <blockquote>
 Colloids Surf., A---1.98
 </pre>
 </blockquote>
 <h4>Step 3 - Run tool</h4>
 When ready to run the tool, you may either use the default file (/etc/bibrank/bibrankgkb.cfg), or use another one by giving it using the input variable '--input'.
 If you want to test the configuration, you can use '--verbose=9' to output on screen, or if you want to save it to a file, use
 '--output=filename', but remember that the file will be saved in the program directory.
 The output may look like this:
 <pre>
 <blockquote>
 $ ./bibrankgkb -v9
 2004-03-11 17:30:17 --> Running: Generate Knowledge base.
 2004-03-11 17:30:17 --> Reading data from file: /log/cdsware-DEMODEV/etc/bibrank/jif.kb
 2004-03-11 17:30:17 --> Reading data from file: /log/cdsware-DEMODEV/etc/bibrank/conv.kb
 2004-03-11 17:30:17 --> Using last resource for converting values.
 2004-03-11 17:30:17 --> Reading data from file: /log/cdsware-DEMODEV/etc/bibrank/jif2.kb
 2004-03-11 17:30:17 --> Converting between naming conventions given.
 2004-03-11 17:30:17 --> Colloids Surf., A---1.98
 2004-03-11 17:30:17 --> Phys. Rev. Lett.---6.462
 2004-03-11 17:30:17 --> J. High Energy Phys.---8.664
 2004-03-11 17:30:17 --> Nucl. Instrum. Methods Phys. Res., A---0.964
 2004-03-11 17:30:17 --> Phys. Lett., B---4.213
 2004-03-11 17:30:17 --> Phys. Rev., D---3.838
 2004-03-11 17:30:17 --> Total nr of lines: 6
 2004-03-11 17:30:17 --> Time used: 0 second(s).
 </blockquote>
 </pre>
 
 <a name="ainf"></a><h2>7. Additional Information</h2>
 <a href="<WEBURL>/hacking/bibrank/">BibRank Internals</a>
diff --git a/modules/bibrank/lib/bibrank_tag_based_indexer.py b/modules/bibrank/lib/bibrank_tag_based_indexer.py
index 258365289..5f912345e 100644
--- a/modules/bibrank/lib/bibrank_tag_based_indexer.py
+++ b/modules/bibrank/lib/bibrank_tag_based_indexer.py
@@ -1,539 +1,535 @@
 ## $Id$
 ## Ranking of records using different parameters and methods.
 
 ## This file is part of the CERN Document Server Software (CDSware).
 ## Copyright (C) 2002 CERN.
 ##
 ## The CDSware is free software; you can redistribute it and/or
 ## modify it under the terms of the GNU General Public License as
 ## published by the Free Software Foundation; either version 2 of the
 ## License, or (at your option) any later version.
 ##
 ## The CDSware is distributed in the hope that it will be useful, but
 ## WITHOUT ANY WARRANTY; without even the implied warranty of
 ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 ## General Public License for more details.
 ##
 ## You should have received a copy of the GNU General Public License
 ## along with CDSware; if not, write to the Free Software Foundation, Inc.,
 ## 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA.
 
 ## read config variables:
 #include "cdswmllib.wml"
 
 <protect># -*- coding: utf-8 -*-</protect>
 <protect>## $Id$</protect>
 <protect>## DO NOT EDIT THIS FILE!  IT WAS AUTOMATICALLY GENERATED FROM CDSware WML SOURCES.</protect>
 
 __version__ = "<: print generate_pretty_version_string('$Id$'); :>"
 
 from marshal import loads,dumps
 from zlib import compress,decompress
 from string import split,translate,lower,upper
 import getopt
 import getpass
 import string
 import os
 import sre
 import sys
 import time
 import MySQLdb
 import Numeric
 import urllib
 import signal
 import tempfile
 import unicodedata
 import traceback
 import cStringIO
 import re
 import copy
 import types
 import ConfigParser
 
 from config import *
 from search_engine_config import cfg_max_recID
 from search_engine import perform_request_search, strip_accents
 from search_engine import HitSet, get_index_id, create_basic_search_units
 from dbquery import run_sql
 
 options = {}
 
 def single_tag_rank_method_exec(rank_method_code, name, config):
     """Creating the rank method data"""
     startCreate = time.time()
     rnkset = {}
     rnkset_old = fromDB(rank_method_code)
-    #if options["quick"] == "no":
-    #    [[0, 1000]]
-    #    print options["recid_range"]
-        
     date = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())
     rnkset_new = single_tag_rank(config)
     rnkset = union_dicts(rnkset_old, rnkset_new)
     intoDB(rnkset, date, rank_method_code)
 
 def single_tag_rank(config):
     """Connect the given tag with the data from the kb file given"""
     if options["verbose"] >= 9:
         write_message("Loading knowledgebase file")
     kb_data = {}
     records = []
 
     write_message("Reading knowledgebase file: %s" % config.get(config.get("rank_method", "function"), "kb_src"))
     input = open(config.get(config.get("rank_method", "function"), "kb_src"), 'r')
     data = input.readlines()
     for line in data:
         if not line[0:1] == "#":
             kb_data[string.strip((string.split(string.strip(line),"---"))[0])] = (string.split(string.strip(line), "---"))[1]
     write_message("Number of lines read from knowledgebase file: %s" % len(kb_data))
 
     tag = config.get(config.get("rank_method", "function"),"tag")
     tags = split(config.get(config.get("rank_method", "function"), "check_mandatory_tags"),",")
     if tags == ['']:
 	tags = ""
    
     records = []
     for (recids,recide) in options["recid_range"]:
         write_message("......Processing records #%s-%s" % (recids, recide))
         recs = run_sql("SELECT id_bibrec,value FROM bib%sx,bibrec_bib%sx WHERE tag='%s' AND id_bibxxx=id and id_bibrec >=%s and id_bibrec<=%s" % (tag[0:2], tag[0:2], tag, recids, recide))
         valid = HitSet(Numeric.ones(cfg_max_recID + 1))
         for key in tags:
             newset = HitSet()
             newset.addlist(run_sql("SELECT id_bibrec FROM bib%sx,bibrec_bib%sx WHERE id_bibxxx=id AND tag='%s' AND id_bibxxx=id and id_bibrec >=%s and id_bibrec<=%s" % (tag[0:2], tag[0:2], key, recids, recide)))
             valid.intersect(newset)
         if tags:
             recs = filter(lambda x: valid.contains(x[0]), recs)
         records = records + list(recs)
         write_message("Number of records found with the necessary tags: %s" % len(records))
 
     records = filter(lambda x: options["validset"].contains(x[0]), records)
     rnkset = {}
     for key,value in records:
         if kb_data.has_key(value):
             if not rnkset.has_key(key):
                 rnkset[key] = float(kb_data[value])
             else:
                 if kb_data.has_key(rnkset[key]) and float(kb_data[value]) > float((rnkset[key])[1]):
                     rnkset[key] = float(kb_data[value])
         else:
             rnkset[key] = 0
 
     write_message("Number of records available in rank method: %s" % len(rnkset))
     return rnkset
 
 def get_lastupdated(rank_method_code):
     """Get the last time the rank method was updated"""
     res = run_sql("SELECT rnkMETHOD.last_updated FROM rnkMETHOD WHERE name='%s'" % rank_method_code)
     if res:
         return res[0][0]
     else:
         raise Exception("Is this the first run? Please do a complete update.")
 
 def intoDB(dict, date, rank_method_code):
     """Insert the rank method data into the database"""
     id = run_sql("SELECT id from rnkMETHOD where name='%s'" % rank_method_code)
     del_rank_method_codeDATA(rank_method_code)
     run_sql("INSERT INTO rnkMETHODDATA(id_rnkMETHOD, relevance_data) VALUES ('%s','%s')" % (id[0][0], serialize_via_marshal(dict)))
     run_sql("UPDATE rnkMETHOD SET last_updated='%s' WHERE name='%s'" % (date, rank_method_code))
 
 def fromDB(rank_method_code):
     """Get the data for a rank method"""
     id = run_sql("SELECT id from rnkMETHOD where name='%s'" % rank_method_code)
     res = run_sql("SELECT relevance_data FROM rnkMETHODDATA WHERE id_rnkMETHOD=%s" % id[0][0])
     if res:
         return deserialize_via_marshal(res[0][0])
     else:
         return {}
 
 def del_rank_method_codeDATA(rank_method_code):
     """Delete the data for a rank method"""
     id = run_sql("SELECT id from rnkMETHOD where name='%s'" % rank_method_code)
     res = run_sql("DELETE FROM rnkMETHODDATA WHERE id_rnkMETHOD=%s" % id[0][0])
 
 def del_recids(rank_method_code, range):
     """Delete some records from the rank method"""
     id = run_sql("SELECT id from rnkMETHOD where name='%s'" % rank_method_code)
     res = run_sql("SELECT relevance_data FROM rnkMETHODDATA WHERE id_rnkMETHOD=%s" % id[0][0])
     if res:
         rec_dict = deserialize_via_marshal(res[0][0])
         write_message("Old size: %s" % len(rec_dict))
         for (recids,recide) in range:
             for i in range(int(recids), int(recide)):
                 if rec_dict.has_key(i):
                     del rec_dict[i]  
         write_messag("New size: %s" % len(rec_dict))
         date = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())
         intoDB(rec_dict, date, rank_method_code)
     else:
         print "Create before deleting!"
     
 def union_dicts(dict1, dict2):
     "Returns union of the two dicts."
     union_dict = {}
     for (key, value) in dict1.iteritems():
         union_dict[key] = value
     for (key, value) in dict2.iteritems():
         union_dict[key] = value
     return union_dict
 
 def rank_method_code_statistics(rank_method_code):
     """Print statistics"""
     
     method = fromDB(rank_method_code) 
     max = ('',-999999)
     maxcount = 0
     min = ('',999999)
     mincount = 0
 
     for (recID, value) in method.iteritems():
         if value < min and value > 0:
             min = value
         if value > max:
             max = value
          
     for (recID, value) in method.iteritems():
         if value == min:
             mincount += 1
         if value == max:
             maxcount += 1
 
     write_message("Showing statistic for selected method")
     write_message("Method name: %s" % getName(rank_method_code))
     write_message("Short name: %s" % rank_method_code)
     write_message("Last run: %s" % get_lastupdated(rank_method_code))
     write_message("Number of records: %s" % len(method))
     write_message("Lowest value: %s - Number of records: %s" % (min, mincount))
     write_message("Highest value: %s - Number of records: %s" % (max, maxcount))
     write_message("Divided into 10 sets:")
     for i in range(1,11):
          setcount = 0
          distinct_values = {}
          lower = -1.0 + ((float(max + 1) / 10)) * (i - 1)
          upper = -1.0 + ((float(max + 1) / 10)) * i
          for (recID, value) in method.iteritems():
              if value >= lower and value <= upper:
                  setcount += 1
                  distinct_values[value] = 1
          write_message("Set %s (%s-%s) %s Distinct values: %s" % (i, lower, upper, len(distinct_values), setcount)) 
 
 def check_method(rank_method_code):
     write_message("Checking rank method...")
     if len(fromDB(rank_method_code)) == 0:
         write_message("Rank method not yet executed, please run it to create the necessary data.")
     else:
         if len(add_date(rank_method_code)) > 0:
             write_message("Records modified, update recommended")
         else:
             write_message("No records modified, update not necessary")
  
 def write_message(msg, stream = sys.stdout):
     """Write message and flush output stream (may be sys.stdout or sys.stderr). Useful for debugging stuff."""
     if stream == sys.stdout or stream == sys.stderr:
         stream.write(time.strftime("%Y-%m-%d %H:%M:%S --> ", time.localtime()))
         stream.write("%s\n" % msg)
         stream.flush()
     else:
         sys.stderr.write("Unknown stream %s. [must be sys.stdout or sys.stderr]\n" % stream)
     return
 
 def get_datetime(var, format_string="%Y-%m-%d %H:%M:%S"):
     """Returns a date string according to the format string.
        It can handle normal date strings and shifts with respect
        to now."""
     date = time.time()
     shift_re = sre.compile("([-\+]{0,1})([\d]+)([dhms])")
     factors = {"d":24*3600, "h":3600, "m":60, "s":1}
     m = shift_re.match(var)
     if m:
         sign = m.groups()[0] == "-" and -1 or 1
         factor = factors[m.groups()[2]]
         value = float(m.groups()[1])
         date = time.localtime(date + sign * factor * value)
         date = time.strftime(format_string, date)
     else:
         date = time.strptime(var, format_string)
         date = time.strftime(format_string, date)
     return date
 
 def task_sig_sleep(sig, frame):
     """Signal handler for the 'sleep' signal sent by BibSched."""
     if options["verbose"]>= 9:
         write_message("got signal %d" % sig)
     write_message("sleeping...")
     task_update_status("SLEEPING")
     signal.pause() # wait for wake-up signal
 
 def task_sig_wakeup(sig, frame):
     """Signal handler for the 'wakeup' signal sent by BibSched."""
     if options["verbose"]>= 9:
         write_message("got signal %d" % sig)
     write_message("continuing...")
     task_update_status("CONTINUING")
 
 def task_sig_stop(sig, frame):
     """Signal handler for the 'stop' signal sent by BibSched."""
     if options["verbose"]>= 9:
         write_message("got signal %d" % sig)
     write_message("stopping...")
     task_update_status("STOPPING")
     errcode = 0
     try:
         task_sig_stop_commands()
         write_message("stopped")
         task_update_status("STOPPED")
     except StandardError, err:
         write_message("Error during stopping! %e" % err)
         task_update_status("STOPPINGFAILED")
         errcode = 1
     sys.exit(errcode)
 
 def task_sig_stop_commands():
     """Do all the commands necessary to stop the task before quitting.
     Useful for task_sig_stop() handler.
     """
     write_message("stopping commands started")
     write_message("stopping commands ended")
 
 def task_sig_suicide(sig, frame):
     """Signal handler for the 'suicide' signal sent by BibSched."""
     if options["verbose"]>= 9:
         write_message("got signal %d" % sig)
     write_message("suiciding myself now...")
     task_update_status("SUICIDING")
     write_message("suicided")
     task_update_status("SUICIDED")
     sys.exit(0)
 
 def task_sig_unknown(sig, frame):
     """Signal handler for the other unknown signals sent by shell or user."""
     if options["verbose"]>= 9:
         write_message("got signal %d" % sig)
     write_message("unknown signal %d ignored" % sig) # do nothing for other signals
 
 def task_update_progress(msg):
     """Updates progress information in the BibSched task table."""
     query = "UPDATE schTASK SET progress='%s' where id=%d" % (MySQLdb.escape_string(msg), task_id)
     if options["verbose"]>= 9:
         write_message(query)
     run_sql(query)
     return
 
 def task_update_status(val):
     """Updates state information in the BibSched task table."""
     query = "UPDATE schTASK SET status='%s' where id=%d" % (MySQLdb.escape_string(val), task_id)
     if options["verbose"]>= 9:
         write_message(query)
     run_sql(query)
     return
 
 def split_ranges(parse_string):
     recIDs = []
     ranges = string.split(parse_string, ",")
     for range in ranges:
         tmp_recIDs = string.split(range, "-")
         
         if len(tmp_recIDs)==1:
             recIDs.append([int(tmp_recIDs[0]), int(tmp_recIDs[0])])
         else:
             if int(tmp_recIDs[0]) > int(tmp_recIDs[1]): # sanity check
                 tmp = tmp_recIDs[0]
                 tmp_recIDs[0] = tmp_recIDs[1]
                 tmp_recIDs[1] = tmp
             recIDs.append([int(tmp_recIDs[0]), int(tmp_recIDs[1])])
     return recIDs
 
 def bibrank_engine(row, run):
     """Run the indexing task. The row argument is the BibSched task
     queue row, containing if, arguments, etc.
     Return 1 in case of success and 0 in case of failure.
     """
    
     try:
         import psyco
         psyco.bind(single_tag_rank) 
         psyco.bind(single_tag_rank_method_exec)
         psyco.bind(serialize_via_numeric_array)
         psyco.bind(deserialize_via_numeric_array)
     except StandardError, e: 
         print "Psyco ERROR",e 
 
     startCreate = time.time()
     global options, task_id
     task_id = row[0]
     task_proc = row[1]
     options = loads(row[6])
 
     task_starting_time = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())
     signal.signal(signal.SIGUSR1, task_sig_sleep)
     signal.signal(signal.SIGTERM, task_sig_stop)
     signal.signal(signal.SIGABRT, task_sig_suicide)
     signal.signal(signal.SIGCONT, task_sig_wakeup)
     signal.signal(signal.SIGINT, task_sig_unknown)
 
     sets = {}
     try:
         options["run"] = []
         options["run"].append(run)
         for rank_method_code in options["run"]:
             cfg_name = getName(rank_method_code)
             if options["verbose"] >= 0:
                 write_message("Running rank method: %s." % cfg_name)
 
             file = etcdir + "/bibrank/" + rank_method_code + ".cfg"
             config = ConfigParser.ConfigParser()
             try:
                 config.readfp(open(file))
             except StandardError, e:
                 write_message("Cannot find configurationfile: %s" % file, sys.stderr)
                 raise StandardError
 
             cfg_short = rank_method_code
             cfg_function = config.get("rank_method", "function") + "_exec"
             cfg_name = getName(cfg_short)
             options["validset"] = get_valid_range(rank_method_code)
 
             if options["collection"]:
                 l_of_colls = string.split(options["collection"], ",")
                 recIDs = perform_request_search(c=l_of_colls)
                 recIDs_range = []
                 for recID in recIDs:
                     recIDs_range.append([recID,recID])
                 options["recid_range"] = recIDs_range
             elif options["id"]:
                 options["recid_range"] = options["id"]
             elif options["modified"]:
                 options["recid_range"] = add_date(rank_method_code, options["modified"])
             elif options["last_updated"]:
                 options["recid_range"] = add_date(rank_method_code)
             else:
                 if options["verbose"] > 1:
                     write_message("No records specified, updating all")
                 min_id = run_sql("SELECT min(id) from bibrec")[0][0]
                 max_id = run_sql("SELECT max(id) from bibrec")[0][0]
                 options["recid_range"] = [[min_id, max_id]] 
 
             if options["quick"] == "no" and options["verbose"] >= 9:
-                write_message("Rebalance not yet enabled, parameter ignored.")
+                write_message("Recalculate parameter not used, parameter ignored.")
 
             if options["cmd"] == "del":
                 del_recids(cfg_short, options["recid_range"])
             elif options["cmd"] == "add":
                 func_object = globals().get(cfg_function)
                 func_object(rank_method_code, cfg_name, config)
             elif options["cmd"] == "stat":
                 rank_method_code_statistics(rank_method_code)
             elif options["cmd"] == "check":
                 check_method(rank_method_code)
             else:
                 write_message("Invalid command found processing %s" % rank_method_code, sys.stderr)
                 raise StandardError
     except StandardError, e:
         write_message("\nException caught: %s" % e, sys.stderr)
         if options["verbose"] >= 9:      
             traceback.print_tb(sys.exc_info()[2])
         raise StandardError
 
     if options["verbose"]:
         showtime((time.time() - startCreate))
     return 1
 
 def get_valid_range(rank_method_code):
     """Return a range of records"""
     if options["verbose"] >=9:
         write_message("Getting records from collections enabled for rank method.")
 
     res = run_sql("SELECT collection.name FROM collection,collection_rnkMETHOD,rnkMETHOD WHERE collection.id=id_collection and id_rnkMETHOD=rnkMETHOD.id and rnkMETHOD.name='%s'" %  rank_method_code)
     l_of_colls = []
     for coll in res:
         l_of_colls.append(coll[0])
     if len(l_of_colls) > 0:
         recIDs = perform_request_search(c=l_of_colls)
     else:
         recIDs = []
     valid = HitSet()
     valid.addlist(recIDs)
     return valid
    
 def add_date(rank_method_code, date=""):
     """If date is not set, then retrieve it from the database.
        Reindex all formats newer than the modification date"""
     if not date:
         try:
             date = (get_lastupdated(rank_method_code),'')
         except Exception, e:
             date = "0000-00-00 00:00:00"
     query = """SELECT b.id FROM bibrec AS b WHERE b.modification_date >=
     '%s'""" % date[0]
     if date[1]:
         query += "and b.modification_date <= '%s'" % date[1]
     query += "ORDER BY b.id ASC"""
     res = run_sql(query)        
     list = create_range_list(res)
     if not list:
         if options["verbose"]:
             write_message("No new records added since last time method was run")
     return list
 
 def getName(rank_method_code, ln=cdslang, type='ln'):
     """Returns the name of the method if it exists"""
 
     try:
         rnkid = run_sql("SELECT id FROM rnkMETHOD where name='%s'" % rank_method_code)
         if rnkid:
             rnkid = str(rnkid[0][0])
             res = run_sql("SELECT value FROM rnkMETHODNAME where type='%s' and ln='%s' and id_rnkMETHOD=%s" % (type, ln, rnkid))
             if not res:
                 res = run_sql("SELECT value FROM rnkMETHODNAME WHERE ln='%s' and id_rnkMETHOD=%s and type='%s'"  % (cdslang, rnkid, type))
             if not res: 
                 return rank_method_code
             return res[0][0]
         else:
             raise Exception
     except Exception, e:
         write_message("Cannot run rank method, either given code for method is wrong, or it has not been added using the webinterface.")
         raise Exception
 
 def create_range_list(res):
     """Creates a range list from a recID select query result contained
     in res. The result is expected to have ascending numerical order."""
     if not res:
         return []
     row = res[0]
     if not row:
         return []
     else:
         range_list = [[row[0],row[0]]]
     for row in res[1:]:
         id = row[0]
         if id == range_list[-1][1] + 1:
             range_list[-1][1] = id
         else:
             range_list.append([id,id])
     return range_list
 
 def single_tag_rank_method(row, run):
     return bibrank_engine(row, run)
 
 def serialize_via_numeric_array_dumps(arr):
     return Numeric.dumps(arr)
 def serialize_via_numeric_array_compr(str):
     return compress(str)
 def serialize_via_numeric_array_escape(str):
     return MySQLdb.escape_string(str)
 def serialize_via_numeric_array(arr):
     """Serialize Numeric array into a compressed string."""
     return serialize_via_numeric_array_escape(serialize_via_numeric_array_compr(serialize_via_numeric_array_dumps(arr)))
 def deserialize_via_numeric_array(string):
     """Decompress and deserialize string into a Numeric array."""
     return Numeric.loads(decompress(string))
 def serialize_via_marshal(obj):
     """Serialize Python object via marshal into a compressed string."""
     return MySQLdb.escape_string(compress(dumps(obj)))
 def deserialize_via_marshal(string):
     """Decompress and deserialize string into a Python object via marshal."""
     return loads(decompress(string))
 
 def showtime(timeused):
     """Show time used for method"""
     if options["verbose"] >= 9:
         write_message("Time used: %d second(s)." % timeused)
diff --git a/modules/bibrank/lib/bibrank_tag_based_indexer.py.wml b/modules/bibrank/lib/bibrank_tag_based_indexer.py.wml
index 258365289..5f912345e 100644
--- a/modules/bibrank/lib/bibrank_tag_based_indexer.py.wml
+++ b/modules/bibrank/lib/bibrank_tag_based_indexer.py.wml
@@ -1,539 +1,535 @@
 ## $Id$
 ## Ranking of records using different parameters and methods.
 
 ## This file is part of the CERN Document Server Software (CDSware).
 ## Copyright (C) 2002 CERN.
 ##
 ## The CDSware is free software; you can redistribute it and/or
 ## modify it under the terms of the GNU General Public License as
 ## published by the Free Software Foundation; either version 2 of the
 ## License, or (at your option) any later version.
 ##
 ## The CDSware is distributed in the hope that it will be useful, but
 ## WITHOUT ANY WARRANTY; without even the implied warranty of
 ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 ## General Public License for more details.
 ##
 ## You should have received a copy of the GNU General Public License
 ## along with CDSware; if not, write to the Free Software Foundation, Inc.,
 ## 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA.
 
 ## read config variables:
 #include "cdswmllib.wml"
 
 <protect># -*- coding: utf-8 -*-</protect>
 <protect>## $Id$</protect>
 <protect>## DO NOT EDIT THIS FILE!  IT WAS AUTOMATICALLY GENERATED FROM CDSware WML SOURCES.</protect>
 
 __version__ = "<: print generate_pretty_version_string('$Id$'); :>"
 
 from marshal import loads,dumps
 from zlib import compress,decompress
 from string import split,translate,lower,upper
 import getopt
 import getpass
 import string
 import os
 import sre
 import sys
 import time
 import MySQLdb
 import Numeric
 import urllib
 import signal
 import tempfile
 import unicodedata
 import traceback
 import cStringIO
 import re
 import copy
 import types
 import ConfigParser
 
 from config import *
 from search_engine_config import cfg_max_recID
 from search_engine import perform_request_search, strip_accents
 from search_engine import HitSet, get_index_id, create_basic_search_units
 from dbquery import run_sql
 
 options = {}
 
 def single_tag_rank_method_exec(rank_method_code, name, config):
     """Creating the rank method data"""
     startCreate = time.time()
     rnkset = {}
     rnkset_old = fromDB(rank_method_code)
-    #if options["quick"] == "no":
-    #    [[0, 1000]]
-    #    print options["recid_range"]
-        
     date = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())
     rnkset_new = single_tag_rank(config)
     rnkset = union_dicts(rnkset_old, rnkset_new)
     intoDB(rnkset, date, rank_method_code)
 
 def single_tag_rank(config):
     """Connect the given tag with the data from the kb file given"""
     if options["verbose"] >= 9:
         write_message("Loading knowledgebase file")
     kb_data = {}
     records = []
 
     write_message("Reading knowledgebase file: %s" % config.get(config.get("rank_method", "function"), "kb_src"))
     input = open(config.get(config.get("rank_method", "function"), "kb_src"), 'r')
     data = input.readlines()
     for line in data:
         if not line[0:1] == "#":
             kb_data[string.strip((string.split(string.strip(line),"---"))[0])] = (string.split(string.strip(line), "---"))[1]
     write_message("Number of lines read from knowledgebase file: %s" % len(kb_data))
 
     tag = config.get(config.get("rank_method", "function"),"tag")
     tags = split(config.get(config.get("rank_method", "function"), "check_mandatory_tags"),",")
     if tags == ['']:
 	tags = ""
    
     records = []
     for (recids,recide) in options["recid_range"]:
         write_message("......Processing records #%s-%s" % (recids, recide))
         recs = run_sql("SELECT id_bibrec,value FROM bib%sx,bibrec_bib%sx WHERE tag='%s' AND id_bibxxx=id and id_bibrec >=%s and id_bibrec<=%s" % (tag[0:2], tag[0:2], tag, recids, recide))
         valid = HitSet(Numeric.ones(cfg_max_recID + 1))
         for key in tags:
             newset = HitSet()
             newset.addlist(run_sql("SELECT id_bibrec FROM bib%sx,bibrec_bib%sx WHERE id_bibxxx=id AND tag='%s' AND id_bibxxx=id and id_bibrec >=%s and id_bibrec<=%s" % (tag[0:2], tag[0:2], key, recids, recide)))
             valid.intersect(newset)
         if tags:
             recs = filter(lambda x: valid.contains(x[0]), recs)
         records = records + list(recs)
         write_message("Number of records found with the necessary tags: %s" % len(records))
 
     records = filter(lambda x: options["validset"].contains(x[0]), records)
     rnkset = {}
     for key,value in records:
         if kb_data.has_key(value):
             if not rnkset.has_key(key):
                 rnkset[key] = float(kb_data[value])
             else:
                 if kb_data.has_key(rnkset[key]) and float(kb_data[value]) > float((rnkset[key])[1]):
                     rnkset[key] = float(kb_data[value])
         else:
             rnkset[key] = 0
 
     write_message("Number of records available in rank method: %s" % len(rnkset))
     return rnkset
 
 def get_lastupdated(rank_method_code):
     """Get the last time the rank method was updated"""
     res = run_sql("SELECT rnkMETHOD.last_updated FROM rnkMETHOD WHERE name='%s'" % rank_method_code)
     if res:
         return res[0][0]
     else:
         raise Exception("Is this the first run? Please do a complete update.")
 
 def intoDB(dict, date, rank_method_code):
     """Insert the rank method data into the database"""
     id = run_sql("SELECT id from rnkMETHOD where name='%s'" % rank_method_code)
     del_rank_method_codeDATA(rank_method_code)
     run_sql("INSERT INTO rnkMETHODDATA(id_rnkMETHOD, relevance_data) VALUES ('%s','%s')" % (id[0][0], serialize_via_marshal(dict)))
     run_sql("UPDATE rnkMETHOD SET last_updated='%s' WHERE name='%s'" % (date, rank_method_code))
 
 def fromDB(rank_method_code):
     """Get the data for a rank method"""
     id = run_sql("SELECT id from rnkMETHOD where name='%s'" % rank_method_code)
     res = run_sql("SELECT relevance_data FROM rnkMETHODDATA WHERE id_rnkMETHOD=%s" % id[0][0])
     if res:
         return deserialize_via_marshal(res[0][0])
     else:
         return {}
 
 def del_rank_method_codeDATA(rank_method_code):
     """Delete the data for a rank method"""
     id = run_sql("SELECT id from rnkMETHOD where name='%s'" % rank_method_code)
     res = run_sql("DELETE FROM rnkMETHODDATA WHERE id_rnkMETHOD=%s" % id[0][0])
 
 def del_recids(rank_method_code, range):
     """Delete some records from the rank method"""
     id = run_sql("SELECT id from rnkMETHOD where name='%s'" % rank_method_code)
     res = run_sql("SELECT relevance_data FROM rnkMETHODDATA WHERE id_rnkMETHOD=%s" % id[0][0])
     if res:
         rec_dict = deserialize_via_marshal(res[0][0])
         write_message("Old size: %s" % len(rec_dict))
         for (recids,recide) in range:
             for i in range(int(recids), int(recide)):
                 if rec_dict.has_key(i):
                     del rec_dict[i]  
         write_messag("New size: %s" % len(rec_dict))
         date = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())
         intoDB(rec_dict, date, rank_method_code)
     else:
         print "Create before deleting!"
     
 def union_dicts(dict1, dict2):
     "Returns union of the two dicts."
     union_dict = {}
     for (key, value) in dict1.iteritems():
         union_dict[key] = value
     for (key, value) in dict2.iteritems():
         union_dict[key] = value
     return union_dict
 
 def rank_method_code_statistics(rank_method_code):
     """Print statistics"""
     
     method = fromDB(rank_method_code) 
     max = ('',-999999)
     maxcount = 0
     min = ('',999999)
     mincount = 0
 
     for (recID, value) in method.iteritems():
         if value < min and value > 0:
             min = value
         if value > max:
             max = value
          
     for (recID, value) in method.iteritems():
         if value == min:
             mincount += 1
         if value == max:
             maxcount += 1
 
     write_message("Showing statistic for selected method")
     write_message("Method name: %s" % getName(rank_method_code))
     write_message("Short name: %s" % rank_method_code)
     write_message("Last run: %s" % get_lastupdated(rank_method_code))
     write_message("Number of records: %s" % len(method))
     write_message("Lowest value: %s - Number of records: %s" % (min, mincount))
     write_message("Highest value: %s - Number of records: %s" % (max, maxcount))
     write_message("Divided into 10 sets:")
     for i in range(1,11):
          setcount = 0
          distinct_values = {}
          lower = -1.0 + ((float(max + 1) / 10)) * (i - 1)
          upper = -1.0 + ((float(max + 1) / 10)) * i
          for (recID, value) in method.iteritems():
              if value >= lower and value <= upper:
                  setcount += 1
                  distinct_values[value] = 1
          write_message("Set %s (%s-%s) %s Distinct values: %s" % (i, lower, upper, len(distinct_values), setcount)) 
 
 def check_method(rank_method_code):
     write_message("Checking rank method...")
     if len(fromDB(rank_method_code)) == 0:
         write_message("Rank method not yet executed, please run it to create the necessary data.")
     else:
         if len(add_date(rank_method_code)) > 0:
             write_message("Records modified, update recommended")
         else:
             write_message("No records modified, update not necessary")
  
 def write_message(msg, stream = sys.stdout):
     """Write message and flush output stream (may be sys.stdout or sys.stderr). Useful for debugging stuff."""
     if stream == sys.stdout or stream == sys.stderr:
         stream.write(time.strftime("%Y-%m-%d %H:%M:%S --> ", time.localtime()))
         stream.write("%s\n" % msg)
         stream.flush()
     else:
         sys.stderr.write("Unknown stream %s. [must be sys.stdout or sys.stderr]\n" % stream)
     return
 
 def get_datetime(var, format_string="%Y-%m-%d %H:%M:%S"):
     """Returns a date string according to the format string.
        It can handle normal date strings and shifts with respect
        to now."""
     date = time.time()
     shift_re = sre.compile("([-\+]{0,1})([\d]+)([dhms])")
     factors = {"d":24*3600, "h":3600, "m":60, "s":1}
     m = shift_re.match(var)
     if m:
         sign = m.groups()[0] == "-" and -1 or 1
         factor = factors[m.groups()[2]]
         value = float(m.groups()[1])
         date = time.localtime(date + sign * factor * value)
         date = time.strftime(format_string, date)
     else:
         date = time.strptime(var, format_string)
         date = time.strftime(format_string, date)
     return date
 
 def task_sig_sleep(sig, frame):
     """Signal handler for the 'sleep' signal sent by BibSched."""
     if options["verbose"]>= 9:
         write_message("got signal %d" % sig)
     write_message("sleeping...")
     task_update_status("SLEEPING")
     signal.pause() # wait for wake-up signal
 
 def task_sig_wakeup(sig, frame):
     """Signal handler for the 'wakeup' signal sent by BibSched."""
     if options["verbose"]>= 9:
         write_message("got signal %d" % sig)
     write_message("continuing...")
     task_update_status("CONTINUING")
 
 def task_sig_stop(sig, frame):
     """Signal handler for the 'stop' signal sent by BibSched."""
     if options["verbose"]>= 9:
         write_message("got signal %d" % sig)
     write_message("stopping...")
     task_update_status("STOPPING")
     errcode = 0
     try:
         task_sig_stop_commands()
         write_message("stopped")
         task_update_status("STOPPED")
     except StandardError, err:
         write_message("Error during stopping! %e" % err)
         task_update_status("STOPPINGFAILED")
         errcode = 1
     sys.exit(errcode)
 
 def task_sig_stop_commands():
     """Do all the commands necessary to stop the task before quitting.
     Useful for task_sig_stop() handler.
     """
     write_message("stopping commands started")
     write_message("stopping commands ended")
 
 def task_sig_suicide(sig, frame):
     """Signal handler for the 'suicide' signal sent by BibSched."""
     if options["verbose"]>= 9:
         write_message("got signal %d" % sig)
     write_message("suiciding myself now...")
     task_update_status("SUICIDING")
     write_message("suicided")
     task_update_status("SUICIDED")
     sys.exit(0)
 
 def task_sig_unknown(sig, frame):
     """Signal handler for the other unknown signals sent by shell or user."""
     if options["verbose"]>= 9:
         write_message("got signal %d" % sig)
     write_message("unknown signal %d ignored" % sig) # do nothing for other signals
 
 def task_update_progress(msg):
     """Updates progress information in the BibSched task table."""
     query = "UPDATE schTASK SET progress='%s' where id=%d" % (MySQLdb.escape_string(msg), task_id)
     if options["verbose"]>= 9:
         write_message(query)
     run_sql(query)
     return
 
 def task_update_status(val):
     """Updates state information in the BibSched task table."""
     query = "UPDATE schTASK SET status='%s' where id=%d" % (MySQLdb.escape_string(val), task_id)
     if options["verbose"]>= 9:
         write_message(query)
     run_sql(query)
     return
 
 def split_ranges(parse_string):
     recIDs = []
     ranges = string.split(parse_string, ",")
     for range in ranges:
         tmp_recIDs = string.split(range, "-")
         
         if len(tmp_recIDs)==1:
             recIDs.append([int(tmp_recIDs[0]), int(tmp_recIDs[0])])
         else:
             if int(tmp_recIDs[0]) > int(tmp_recIDs[1]): # sanity check
                 tmp = tmp_recIDs[0]
                 tmp_recIDs[0] = tmp_recIDs[1]
                 tmp_recIDs[1] = tmp
             recIDs.append([int(tmp_recIDs[0]), int(tmp_recIDs[1])])
     return recIDs
 
 def bibrank_engine(row, run):
     """Run the indexing task. The row argument is the BibSched task
     queue row, containing if, arguments, etc.
     Return 1 in case of success and 0 in case of failure.
     """
    
     try:
         import psyco
         psyco.bind(single_tag_rank) 
         psyco.bind(single_tag_rank_method_exec)
         psyco.bind(serialize_via_numeric_array)
         psyco.bind(deserialize_via_numeric_array)
     except StandardError, e: 
         print "Psyco ERROR",e 
 
     startCreate = time.time()
     global options, task_id
     task_id = row[0]
     task_proc = row[1]
     options = loads(row[6])
 
     task_starting_time = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())
     signal.signal(signal.SIGUSR1, task_sig_sleep)
     signal.signal(signal.SIGTERM, task_sig_stop)
     signal.signal(signal.SIGABRT, task_sig_suicide)
     signal.signal(signal.SIGCONT, task_sig_wakeup)
     signal.signal(signal.SIGINT, task_sig_unknown)
 
     sets = {}
     try:
         options["run"] = []
         options["run"].append(run)
         for rank_method_code in options["run"]:
             cfg_name = getName(rank_method_code)
             if options["verbose"] >= 0:
                 write_message("Running rank method: %s." % cfg_name)
 
             file = etcdir + "/bibrank/" + rank_method_code + ".cfg"
             config = ConfigParser.ConfigParser()
             try:
                 config.readfp(open(file))
             except StandardError, e:
                 write_message("Cannot find configurationfile: %s" % file, sys.stderr)
                 raise StandardError
 
             cfg_short = rank_method_code
             cfg_function = config.get("rank_method", "function") + "_exec"
             cfg_name = getName(cfg_short)
             options["validset"] = get_valid_range(rank_method_code)
 
             if options["collection"]:
                 l_of_colls = string.split(options["collection"], ",")
                 recIDs = perform_request_search(c=l_of_colls)
                 recIDs_range = []
                 for recID in recIDs:
                     recIDs_range.append([recID,recID])
                 options["recid_range"] = recIDs_range
             elif options["id"]:
                 options["recid_range"] = options["id"]
             elif options["modified"]:
                 options["recid_range"] = add_date(rank_method_code, options["modified"])
             elif options["last_updated"]:
                 options["recid_range"] = add_date(rank_method_code)
             else:
                 if options["verbose"] > 1:
                     write_message("No records specified, updating all")
                 min_id = run_sql("SELECT min(id) from bibrec")[0][0]
                 max_id = run_sql("SELECT max(id) from bibrec")[0][0]
                 options["recid_range"] = [[min_id, max_id]] 
 
             if options["quick"] == "no" and options["verbose"] >= 9:
-                write_message("Rebalance not yet enabled, parameter ignored.")
+                write_message("Recalculate parameter not used, parameter ignored.")
 
             if options["cmd"] == "del":
                 del_recids(cfg_short, options["recid_range"])
             elif options["cmd"] == "add":
                 func_object = globals().get(cfg_function)
                 func_object(rank_method_code, cfg_name, config)
             elif options["cmd"] == "stat":
                 rank_method_code_statistics(rank_method_code)
             elif options["cmd"] == "check":
                 check_method(rank_method_code)
             else:
                 write_message("Invalid command found processing %s" % rank_method_code, sys.stderr)
                 raise StandardError
     except StandardError, e:
         write_message("\nException caught: %s" % e, sys.stderr)
         if options["verbose"] >= 9:      
             traceback.print_tb(sys.exc_info()[2])
         raise StandardError
 
     if options["verbose"]:
         showtime((time.time() - startCreate))
     return 1
 
 def get_valid_range(rank_method_code):
     """Return a range of records"""
     if options["verbose"] >=9:
         write_message("Getting records from collections enabled for rank method.")
 
     res = run_sql("SELECT collection.name FROM collection,collection_rnkMETHOD,rnkMETHOD WHERE collection.id=id_collection and id_rnkMETHOD=rnkMETHOD.id and rnkMETHOD.name='%s'" %  rank_method_code)
     l_of_colls = []
     for coll in res:
         l_of_colls.append(coll[0])
     if len(l_of_colls) > 0:
         recIDs = perform_request_search(c=l_of_colls)
     else:
         recIDs = []
     valid = HitSet()
     valid.addlist(recIDs)
     return valid
    
 def add_date(rank_method_code, date=""):
     """If date is not set, then retrieve it from the database.
        Reindex all formats newer than the modification date"""
     if not date:
         try:
             date = (get_lastupdated(rank_method_code),'')
         except Exception, e:
             date = "0000-00-00 00:00:00"
     query = """SELECT b.id FROM bibrec AS b WHERE b.modification_date >=
     '%s'""" % date[0]
     if date[1]:
         query += "and b.modification_date <= '%s'" % date[1]
     query += "ORDER BY b.id ASC"""
     res = run_sql(query)        
     list = create_range_list(res)
     if not list:
         if options["verbose"]:
             write_message("No new records added since last time method was run")
     return list
 
 def getName(rank_method_code, ln=cdslang, type='ln'):
     """Returns the name of the method if it exists"""
 
     try:
         rnkid = run_sql("SELECT id FROM rnkMETHOD where name='%s'" % rank_method_code)
         if rnkid:
             rnkid = str(rnkid[0][0])
             res = run_sql("SELECT value FROM rnkMETHODNAME where type='%s' and ln='%s' and id_rnkMETHOD=%s" % (type, ln, rnkid))
             if not res:
                 res = run_sql("SELECT value FROM rnkMETHODNAME WHERE ln='%s' and id_rnkMETHOD=%s and type='%s'"  % (cdslang, rnkid, type))
             if not res: 
                 return rank_method_code
             return res[0][0]
         else:
             raise Exception
     except Exception, e:
         write_message("Cannot run rank method, either given code for method is wrong, or it has not been added using the webinterface.")
         raise Exception
 
 def create_range_list(res):
     """Creates a range list from a recID select query result contained
     in res. The result is expected to have ascending numerical order."""
     if not res:
         return []
     row = res[0]
     if not row:
         return []
     else:
         range_list = [[row[0],row[0]]]
     for row in res[1:]:
         id = row[0]
         if id == range_list[-1][1] + 1:
             range_list[-1][1] = id
         else:
             range_list.append([id,id])
     return range_list
 
 def single_tag_rank_method(row, run):
     return bibrank_engine(row, run)
 
 def serialize_via_numeric_array_dumps(arr):
     return Numeric.dumps(arr)
 def serialize_via_numeric_array_compr(str):
     return compress(str)
 def serialize_via_numeric_array_escape(str):
     return MySQLdb.escape_string(str)
 def serialize_via_numeric_array(arr):
     """Serialize Numeric array into a compressed string."""
     return serialize_via_numeric_array_escape(serialize_via_numeric_array_compr(serialize_via_numeric_array_dumps(arr)))
 def deserialize_via_numeric_array(string):
     """Decompress and deserialize string into a Numeric array."""
     return Numeric.loads(decompress(string))
 def serialize_via_marshal(obj):
     """Serialize Python object via marshal into a compressed string."""
     return MySQLdb.escape_string(compress(dumps(obj)))
 def deserialize_via_marshal(string):
     """Decompress and deserialize string into a Python object via marshal."""
     return loads(decompress(string))
 
 def showtime(timeused):
     """Show time used for method"""
     if options["verbose"] >= 9:
         write_message("Time used: %d second(s)." % timeused)