diff --git a/modules/webalert/lib/alert_engine.py b/modules/webalert/lib/alert_engine.py
index 27b00ca8e..2b3e759ea 100644
--- a/modules/webalert/lib/alert_engine.py
+++ b/modules/webalert/lib/alert_engine.py
@@ -1,348 +1,487 @@
 # -*- coding: utf-8 -*-
 ##
 ## This file is part of CDS Invenio.
 ## Copyright (C) 2002, 2003, 2004, 2005, 2006, 2007, 2008 CERN.
 ##
 ## CDS Invenio is free software; you can redistribute it and/or
 ## modify it under the terms of the GNU General Public License as
 ## published by the Free Software Foundation; either version 2 of the
 ## License, or (at your option) any later version.
 ##
 ## CDS Invenio is distributed in the hope that it will be useful, but
 ## WITHOUT ANY WARRANTY; without even the implied warranty of
 ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 ## General Public License for more details.
 ##
 ## You should have received a copy of the GNU General Public License
 ## along with CDS Invenio; if not, write to the Free Software Foundation, Inc.,
 ## 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA.
 
 """Alert engine implementation."""
 
 ## rest of the Python code goes below
 
 __revision__ = "$Id$"
 
 from cgi import parse_qs
 from re import search, sub
 from time import strftime
 import datetime
 
 from invenio.config import \
      CFG_LOGDIR, \
      CFG_SITE_SUPPORT_EMAIL, \
      CFG_SITE_URL, \
      CFG_WEBALERT_SEND_EMAIL_NUMBER_OF_TRIES, \
-     CFG_WEBALERT_SEND_EMAIL_SLEEPTIME_BETWEEN_TRIES
+     CFG_WEBALERT_SEND_EMAIL_SLEEPTIME_BETWEEN_TRIES, \
+     CFG_SITE_NAME
 from invenio.webbasket_dblayer import get_basket_owner_id, add_to_basket
-from invenio.search_engine import perform_request_search
+from invenio.search_engine import perform_request_search, wash_colls, get_coll_sons, is_hosted_collection
 from invenio.webinterface_handler import wash_urlargd
 from invenio.dbquery import run_sql
 from invenio.webuser import get_email
 from invenio.mailutils import send_email
 from invenio.errorlib import register_exception
 from invenio.alert_engine_config import CFG_WEBALERT_DEBUG_LEVEL
 
+from invenio.websearch_external_collections_config import CFG_EXTERNAL_COLLECTION_TIMEOUT
+from invenio.websearch_external_collections_config import CFG_EXTERNAL_COLLECTION_MAXRESULTS_ALERTS
+from invenio.websearch_external_collections_getter import HTTPAsyncPageGetter, async_download
+from invenio.websearch_external_collections_utils import get_collection_id
+
 import invenio.template
 websearch_templates = invenio.template.load('websearch')
 webalert_templates = invenio.template.load('webalert')
 
 def update_date_lastrun(alert):
-    return run_sql('update user_query_basket set date_lastrun=%s where id_user=%s and id_query=%s and id_basket=%s;', (strftime("%Y-%m-%d"), alert[0], alert[1], alert[2],))
+    """Update the last time this alert was ran in the database."""
 
+    return run_sql('update user_query_basket set date_lastrun=%s where id_user=%s and id_query=%s and id_basket=%s;', (strftime("%Y-%m-%d"), alert[0], alert[1], alert[2],))
 
 def get_alert_queries(frequency):
+    """Return all the queries for the given frequency."""
+
     return run_sql('select distinct id, urlargs from query q, user_query_basket uqb where q.id=uqb.id_query and uqb.frequency=%s and uqb.date_lastrun <= now();', (frequency,))
 
 def get_alert_queries_for_user(uid):
+    """Returns all the queries for the given user id."""
+
     return run_sql('select distinct id, urlargs, uqb.frequency from query q, user_query_basket uqb where q.id=uqb.id_query and uqb.id_user=%s and uqb.date_lastrun <= now();', (uid,))
 
 def get_alerts(query, frequency):
+    """Returns a dictionary of all the records found for a specific query and frequency along with other informationm"""
+
     r = run_sql('select id_user, id_query, id_basket, frequency, date_lastrun, alert_name, notification from user_query_basket where id_query=%s and frequency=%s;', (query['id_query'], frequency,))
     return {'alerts': r, 'records': query['records'], 'argstr': query['argstr'], 'date_from': query['date_from'], 'date_until': query['date_until']}
 
-# Optimized version:
-def add_records_to_basket(record_ids, basket_id):
-    nrec = len(record_ids)
+def add_records_to_basket(records, basket_id):
+    """Add the given records to the given baskets"""
+
+    nrec = len(records[0])
     if nrec > 0:
         if CFG_WEBALERT_DEBUG_LEVEL > 0:
-            print "-> adding %s records into basket %s: %s" % (nrec, basket_id, record_ids)
+            print "-> adding %s records into basket %s: %s" % (nrec, basket_id, records[0])
         try:
             if CFG_WEBALERT_DEBUG_LEVEL < 4:
                 owner_uid = get_basket_owner_id(basket_id)
-                add_to_basket(owner_uid, record_ids, [basket_id])
+                add_to_basket(owner_uid, records[0], 0, [basket_id])
             else:
                 print '   NOT ADDED, DEBUG LEVEL == 4'
         except Exception:
             register_exception()
-
+    for external_collection_results in records[1][0]:
+        nrec = len(external_collection_results[1][0])
+        if nrec > 0:
+            if CFG_WEBALERT_DEBUG_LEVEL > 0:
+                print "-> adding %s external records (collection \"%s\") into basket %s: %s" % (nrec, external_collection_results[0], basket_id, external_collection_results[1][0])
+            try:
+                if CFG_WEBALERT_DEBUG_LEVEL < 4:
+                    owner_uid = get_basket_owner_id(basket_id)
+                    collection_id = get_collection_id(external_collection_results[0])
+                    add_to_basket(owner_uid, external_collection_results[1][0], collection_id, [basket_id])
+                    # TBD: maybe cache here the html brief format of the external records for the baskets later?
+                else:
+                    print '   NOT ADDED, DEBUG LEVEL == 4'
+            except Exception:
+                register_exception()
 
 def get_query(alert_id):
+    """Returns the query for that corresponds to this alert id."""
+
     r = run_sql('select urlargs from query where id=%s', (alert_id,))
     return r[0][0]
 
 def email_notify(alert, records, argstr):
+    """Send the notification e-mail for a specific alert."""
 
-    if len(records) == 0:
-        return
+    if len(records[0]) == 0:
+        total_n_external_records = 0
+        for external_collection_results in records[1][0]:
+            total_n_external_records += len(external_collection_results[1][0])
+        if total_n_external_records == 0:
+            return
 
     msg = ""
 
     if CFG_WEBALERT_DEBUG_LEVEL > 0:
         msg = "*** THIS MESSAGE WAS SENT IN DEBUG MODE ***\n\n"
 
     url = CFG_SITE_URL + "/search?" + argstr
 
-    # Extract the pattern and catalogue list from the formatted query
+    # Extract the pattern, the collection list, the current collection
+    # and the sc (split collection) from the formatted query
     query = parse_qs(argstr)
     pattern = query.get('p', [''])[0]
-    catalogues = query.get('c', [])
+    collection_list = query.get('c', [])
+    current_collection = query.get('cc', [''])
+    sc = query.get('sc', ['1'])
+    collections = calculate_desired_collection_list(collection_list, current_collection, int(sc[0]))
 
     frequency = alert[3]
 
     msg += webalert_templates.tmpl_alert_email_body(
-        alert[5], url, records, pattern, catalogues, frequency)
+        alert[5], url, records, pattern, collections, frequency)
 
     email = get_email(alert[0])
 
     if email == 'guest':
         print "********************************************************************************"
         print "The following alert was not send, because cannot detect user email address:"
         print "   " + repr(argstr)
         print "********************************************************************************"
         return
 
     if CFG_WEBALERT_DEBUG_LEVEL > 0:
         print "********************************************************************************"
         print msg
         print "********************************************************************************"
 
     if CFG_WEBALERT_DEBUG_LEVEL < 2:
         send_email(fromaddr=webalert_templates.tmpl_alert_email_from(),
                    toaddr=email,
                    subject=webalert_templates.tmpl_alert_email_title(alert[5]),
                    content=msg,
                    header='',
                    footer='',
                    attempt_times=CFG_WEBALERT_SEND_EMAIL_NUMBER_OF_TRIES,
                    attempt_sleeptime=CFG_WEBALERT_SEND_EMAIL_SLEEPTIME_BETWEEN_TRIES)
     if CFG_WEBALERT_DEBUG_LEVEL == 4:
         send_email(fromaddr=webalert_templates.tmpl_alert_email_from(),
                    toaddr=CFG_SITE_SUPPORT_EMAIL,
                    subject=webalert_templates.tmpl_alert_email_title(alert[5]),
                    content=msg,
                    header='',
                    footer='',
                    attempt_times=CFG_WEBALERT_SEND_EMAIL_NUMBER_OF_TRIES,
                    attempt_sleeptime=CFG_WEBALERT_SEND_EMAIL_SLEEPTIME_BETWEEN_TRIES)
 
-def get_argument(args, argname):
-    if args.has_key(argname):
-        return args[argname]
-    else:
-        return []
-
 def _date_to_tuple(date):
+    """Private function. Converts a date as a tuple of string into a list of integers."""
+
     return [int(part) for part in (date.year, date.month, date.day)]
 
 def get_record_ids(argstr, date_from, date_until):
+    """Returns the local and external records found for a specific query and timeframe."""
+
     argd = wash_urlargd(parse_qs(argstr), websearch_templates.search_results_default_urlargd)
-    p       = get_argument(argd, 'p')
-    c       = get_argument(argd, 'c')
-    cc      = get_argument(argd, 'cc')
-    aas     = get_argument(argd, 'aas')
-    f       = get_argument(argd, 'f')
-    so      = get_argument(argd, 'so')
-    sp      = get_argument(argd, 'sp')
-    ot      = get_argument(argd, 'ot')
-    p1      = get_argument(argd, 'p1')
-    f1      = get_argument(argd, 'f1')
-    m1      = get_argument(argd, 'm1')
-    op1     = get_argument(argd, 'op1')
-    p2      = get_argument(argd, 'p2')
-    f2      = get_argument(argd, 'f2')
-    m2      = get_argument(argd, 'm2')
-    op2     = get_argument(argd, 'op2')
-    p3      = get_argument(argd, 'p3')
-    f3      = get_argument(argd, 'f3')
-    m3      = get_argument(argd, 'm3')
-    sc      = get_argument(argd, 'sc')
+    p       = argd.get('p', [])
+    c       = argd.get('c', [])
+    cc      = argd.get('cc', [])
+    aas     = argd.get('aas', [])
+    f       = argd.get('f', [])
+    so      = argd.get('so', [])
+    sp      = argd.get('sp', [])
+    ot      = argd.get('ot', [])
+    p1      = argd.get('p1', [])
+    f1      = argd.get('f1', [])
+    m1      = argd.get('m1', [])
+    op1     = argd.get('op1', [])
+    p2      = argd.get('p2', [])
+    f2      = argd.get('f2', [])
+    m2      = argd.get('m2', [])
+    op2     = argd.get('op3', [])
+    p3      = argd.get('p3', [])
+    f3      = argd.get('f3', [])
+    m3      = argd.get('m3', [])
+    sc      = argd.get('sc', [])
 
     d1y, d1m, d1d = _date_to_tuple(date_from)
     d2y, d2m, d2d = _date_to_tuple(date_until)
 
-    return perform_request_search(of='id', p=p, c=c, cc=cc, f=f, so=so, sp=sp, ot=ot,
+    washed_colls = wash_colls(cc, c, sc, 0)
+    hosted_colls = washed_colls[3]
+    if hosted_colls:
+        req_args = "p=%s&d1d=%s&d1m=%s&d1y=%s&d2d=%s&d2m=%s&d2y=%s&ap=%i" % (p, d1d, d1m, d1y, d2d, d2m, d2y, 0)
+        external_records = calculate_external_records(req_args, [p, p1, p2, p3], f, hosted_colls, CFG_EXTERNAL_COLLECTION_TIMEOUT, CFG_EXTERNAL_COLLECTION_MAXRESULTS_ALERTS)
+    else:
+        external_records = ([],[])
+
+    recids = perform_request_search(of='id', p=p, c=c, cc=cc, f=f, so=so, sp=sp, ot=ot,
                                   aas=aas, p1=p1, f1=f1, m1=m1, op1=op1, p2=p2, f2=f2,
                                   m2=m2, op2=op2, p3=p3, f3=f3, m3=m3, sc=sc, d1y=d1y,
                                   d1m=d1m, d1d=d1d, d2y=d2y, d2m=d2m, d2d=d2d)
 
-
-def get_argument_as_string(argstr, argname):
-    args = parse_qs(argstr)
-    a = get_argument(args, argname)
-    r = ''
-    if len(a):
-        r = a[0]
-    for i in a[1:len(a)]:
-        r += ", %s" % i
-    return r
-
-def get_pattern(argstr):
-    return get_argument_as_string(argstr, 'p')
-
-def get_catalogue(argstr):
-    return get_argument_as_string(argstr, 'c')
-
-def get_catalogue_num(argstr):
-    args = parse_qs(argstr)
-    a = get_argument(args, 'c')
-    return len(a)
-
+    return (recids, external_records)
 
 def run_query(query, frequency, date_until):
     """Return a dictionary containing the information of the performed query.
 
     The information contains the id of the query, the arguments as a
     string, and the list of found records."""
 
     if frequency == 'day':
         date_from = date_until - datetime.timedelta(days=1)
 
     elif frequency == 'week':
         date_from = date_until - datetime.timedelta(weeks=1)
 
     else:
         # Months are not an explicit notion of timedelta (it's the
         # most ambiguous too). So we explicitely take the same day of
         # the previous month.
         d, m, y = (date_until.day, date_until.month, date_until.year)
         m = m - 1
 
         if m == 0:
             m = 12
             y = y - 1
 
         date_from = datetime.date(year=y, month=m, day=d)
 
     recs = get_record_ids(query[1], date_from, date_until)
 
-    n = len(recs)
+    n = len(recs[0])
     if n:
-        log('query %08s produced %08s records' % (query[0], len(recs)))
+        log('query %08s produced %08s records for all the local collections' % (query[0], n))
+
+    for external_collection_results in recs[1][0]:
+        n = len(external_collection_results[1][0])
+        if n:
+            log('query %08s produced %08s records for external collection \"%s\"' % (query[0], n, external_collection_results[0]))
 
     if CFG_WEBALERT_DEBUG_LEVEL > 2:
         print "[%s] run query: %s with dates: from=%s, until=%s\n  found rec ids: %s" % (
             strftime("%c"), query, date_from, date_until, recs)
 
     return {'id_query': query[0], 'argstr': query[1],
             'records': recs, 'date_from': date_from, 'date_until': date_until}
 
-
 def process_alert_queries(frequency, date):
     """Run the alerts according to the frequency.
 
     Retrieves the queries for which an alert exists, performs it, and
     processes the corresponding alerts."""
 
     alert_queries = get_alert_queries(frequency)
 
     for aq in alert_queries:
         q = run_query(aq, frequency, date)
         alerts = get_alerts(q, frequency)
         process_alerts(alerts)
 
+def process_alert_queries_for_user(uid, date):
+    """Process the alerts for the given user id.
+
+    All alerts are with reference date set as the current local time."""
+
+    alert_queries = get_alert_queries_for_user(uid)
+
+    for aq in alert_queries:
+        frequency = aq[2]
+        q = run_query(aq, frequency, date)
+        alerts = get_alerts(q, frequency)
+        process_alerts(alerts)
 
 def replace_argument(argstr, argname, argval):
     """Replace the given date argument value with the new one.
 
     If the argument is missing, it is added."""
 
     if search('%s=\d+' % argname, argstr):
         r = sub('%s=\d+' % argname, '%s=%s' % (argname, argval), argstr)
     else:
         r = argstr + '&%s=%s' % (argname, argval)
 
     return r
 
 def update_arguments(argstr, date_from, date_until):
     """Replace date arguments in argstr with the ones specified by date_from and date_until.
 
     Absent arguments are added."""
 
     d1y, d1m, d1d = _date_to_tuple(date_from)
     d2y, d2m, d2d = _date_to_tuple(date_until)
 
     r = replace_argument(argstr, 'd1y', d1y)
     r = replace_argument(r, 'd1m', d1m)
     r = replace_argument(r, 'd1d', d1d)
     r = replace_argument(r, 'd2y', d2y)
     r = replace_argument(r, 'd2m', d2m)
     r = replace_argument(r, 'd2d', d2d)
 
     return r
 
 def log(msg):
+    """Logs the given message in the alert engine log."""
+
     try:
         logfile = open(CFG_LOGDIR + '/alertengine.log', 'a')
         logfile.write(strftime('%Y%m%d%H%M%S#'))
         logfile.write(msg + '\n')
         logfile.close()
     except Exception:
         register_exception()
 
 def process_alerts(alerts):
+    """Process the given alerts and store the records found to the user defined baskets
+    and/or notify them by e-mail"""
+
     # TBD: do not generate the email each time, forge it once and then
     # send it to all appropriate people
 
     for a in alerts['alerts']:
         if alert_use_basket_p(a):
             add_records_to_basket(alerts['records'], a[2])
         if alert_use_notification_p(a):
             argstr = update_arguments(alerts['argstr'], alerts['date_from'], alerts['date_until'])
             try:
                 email_notify(a, alerts['records'], argstr)
             except Exception:
                 # There were troubles sending this alert, so register
                 # this exception and continue with other alerts:
                 register_exception(alert_admin=True,
                                    prefix="Error when sending alert %s, %s\n." % \
                                    (repr(a), repr(argstr)))
+            # Inform the admin when external collections time out
+            if len(alerts['records'][1][1]) > 0:
+                register_exception(alert_admin=True,
+                                   prefix="External collections %s timed out when sending alert %s, %s\n." % \
+                                   (", ".join(alerts['records'][1][1]), repr(a), repr(argstr)))
 
         update_date_lastrun(a)
 
-
 def alert_use_basket_p(alert):
-    return alert[2] != 0
+    """Boolean. Should this alert store the records found in a basket?"""
 
+    return alert[2] != 0
 
 def alert_use_notification_p(alert):
-    return alert[6] == 'y'
+    """Boolean. Should this alert send a notification e-mail about the records found?"""
 
+    return alert[6] == 'y'
 
 def run_alerts(date):
     """Run the alerts.
 
     First decide which alerts to run according to the current local
     time, and runs them."""
 
     if date.day == 1:
         process_alert_queries('month', date)
 
     if date.isoweekday() == 1: # first day of the week
         process_alert_queries('week', date)
 
     process_alert_queries('day', date)
 
-def process_alert_queries_for_user(uid, date):
-    """Process the alerts for the given user id.
-
-    All alerts are with reference date set as the current local time."""
-
-    alert_queries = get_alert_queries_for_user(uid)
-    print alert_queries
-
-    for aq in alert_queries:
-        frequency = aq[2]
-        q = run_query(aq, frequency, date)
-        alerts = get_alerts(q, frequency)
-        process_alerts(alerts)
+# External records related functions
+def calculate_external_records(req_args, pattern_list, field, hosted_colls, timeout=CFG_EXTERNAL_COLLECTION_TIMEOUT, limit=CFG_EXTERNAL_COLLECTION_MAXRESULTS_ALERTS):
+    """Function that returns the external records found and the potential time outs
+    given a search pattern and a list of hosted collections."""
+
+    (external_search_engines, basic_search_units) = calculate_external_search_params(req_args, pattern_list, field, hosted_colls)
+
+    return do_calculate_external_records(req_args, basic_search_units, external_search_engines, timeout, limit)
+
+def calculate_external_search_params(req_args, pattern_list, field, hosted_colls):
+    """Function that calculates the basic search units given the search pattern.
+    Also returns a set of hosted collections engines."""
+
+    from invenio.search_engine import create_basic_search_units
+    from invenio.websearch_external_collections import bind_patterns
+    from invenio.websearch_external_collections import select_hosted_search_engines as select_external_search_engines
+
+    pattern = bind_patterns(pattern_list)
+    basic_search_units = create_basic_search_units(None, pattern, field)
+
+    external_search_engines = select_external_search_engines(hosted_colls)
+
+    return (external_search_engines, basic_search_units)
+
+def do_calculate_external_records(req_args, basic_search_units, external_search_engines, timeout=CFG_EXTERNAL_COLLECTION_TIMEOUT, limit=CFG_EXTERNAL_COLLECTION_MAXRESULTS_ALERTS):
+    """Function that returns the external records found and the potential time outs
+    given the basic search units or the req arguments and a set of hosted collections engines."""
+
+    # list to hold the hosted search engines and their respective search urls
+    engines_list = []
+    # list to hold the non timed out results
+    results_list = []
+    # list to hold all the results
+    full_results_list = []
+    # list to hold all the timeouts
+    timeout_list = []
+
+    for engine in external_search_engines:
+        url = engine.build_search_url(basic_search_units, req_args, limit=limit)
+        if url:
+            engines_list.append([url, engine])
+    # we end up with a [[search url], [engine]] kind of list
+
+    # create the list of search urls to be handed to the asynchronous getter
+    pagegetters_list = [HTTPAsyncPageGetter(engine[0]) for engine in engines_list]
+
+    # function to be run on every result
+    def finished(pagegetter, data, current_time):
+        """Function called, each time the download of a web page finish.
+        Will parse and print the results of this page."""
+        # each pagegetter that didn't timeout is added to this list
+        results_list.append((pagegetter, data))
+
+    # run the asynchronous getter
+    finished_list = async_download(pagegetters_list, finished, engines_list, timeout)
+
+    # create the complete list of tuples, one for each hosted collection, with the results and other information,
+    # including those that timed out
+    for (finished, engine) in zip(finished_list, engines_list): #finished_and_engines_list:
+        if finished:
+            for result in results_list:
+                if result[1] == engine:
+                    engine[1].parser.parse_and_get_results(result[0].data, feedonly=True)
+                    full_results_list.append((engine[1].name, engine[1].parser.parse_and_extract_records(of="xm")))
+                    break
+        elif not finished:
+            timeout_list.append(engine[1].name)
+
+    return (full_results_list, timeout_list)
+
+def calculate_desired_collection_list(c, cc, sc):
+    """Function that calculates the user desired collection list when sending a webalert e-mail"""
+
+    if not cc[0]:
+        cc = [CFG_SITE_NAME]
+
+    # quickly create the reverse function of is_hosted_collection
+    is_not_hosted_collection = lambda coll: not is_hosted_collection(coll)
+    
+    # calculate the list of non hosted, non restricted, regular sons of cc
+    washed_cc_sons = filter(is_not_hosted_collection, get_coll_sons(cc[0]))
+    # clean up c removing hosted collections
+    washed_c = filter(is_not_hosted_collection, c)
+
+    # try to simulate the wash_colls function behavior when calculating the collections to return
+    if washed_cc_sons == washed_c:
+        if sc == 0:
+            return cc
+        elif sc == 1:
+            return washed_c
+    else:
+        if sc == 0:
+            return washed_c
+        elif sc == 1:
+            washed_c_sons = []
+            for coll in washed_c:
+                if coll in washed_cc_sons:
+                    washed_c_sons.extend(get_coll_sons(coll))
+                else:
+                    washed_c_sons.append(coll)
+            return washed_c_sons
diff --git a/modules/webalert/lib/htmlparser.py b/modules/webalert/lib/htmlparser.py
index 98a4363ea..5f95fe834 100644
--- a/modules/webalert/lib/htmlparser.py
+++ b/modules/webalert/lib/htmlparser.py
@@ -1,187 +1,189 @@
 ## This file is part of CDS Invenio.
 ## Copyright (C) 2002, 2003, 2004, 2005, 2006, 2007, 2008 CERN.
 ##
 ## CDS Invenio is free software; you can redistribute it and/or
 ## modify it under the terms of the GNU General Public License as
 ## published by the Free Software Foundation; either version 2 of the
 ## License, or (at your option) any later version.
 ##
 ## CDS Invenio is distributed in the hope that it will be useful, but
 ## WITHOUT ANY WARRANTY; without even the implied warranty of
 ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 ## General Public License for more details.
 ##
 ## You should have received a copy of the GNU General Public License
 ## along with CDS Invenio; if not, write to the Free Software Foundation, Inc.,
 ## 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA.
 
 """HTML parser for records."""
 
 __revision__ = "$Id$"
 
 import re
 from HTMLParser import HTMLParser
 import textwrap
 import htmlentitydefs
 
 from invenio.config import \
      CFG_WEBALERT_MAX_NUM_OF_CHARS_PER_LINE_IN_ALERT_EMAIL, \
      CFG_SITE_LANG
 from invenio.bibformat import format_record
 from invenio.htmlutils import remove_html_markup
 from invenio.messages import gettext_set_language
 
 whitespaces_pattern = re.compile(r'[ \t]+')
 
 def wrap(text):
     """Limits the number of characters per line in given text.
     The function does not preserve new lines.
     """
     lines = textwrap.wrap(text, CFG_WEBALERT_MAX_NUM_OF_CHARS_PER_LINE_IN_ALERT_EMAIL)
     r = ''
     for l in lines:
         r += l + '\n'
     return r
 
 def wrap_records(text):
     """Limits the number of characters per line in given text.
     The function preserves new lines.
     """
     lines = text.split('\n')
     result_lines = []
     for l in lines:
         newlines = textwrap.wrap(l, CFG_WEBALERT_MAX_NUM_OF_CHARS_PER_LINE_IN_ALERT_EMAIL)
         for ll in newlines:
             result_lines.append(ll)
     return '\n'.join(result_lines)
 
 class RecordHTMLParser(HTMLParser):
     """A parser for the HTML returned by invenio.search_engine.print_record.
 
     The parser provides methods to transform the HTML returned by
     invenio.search_engine.print_record into plain text, with some
     minor formatting.
     """
 
     silent = False
     new_line = True # Are we at the beginning of a new line? (after
                     # <br/>, </p> or at the beginning of the text)
 
     def __init__(self):
         HTMLParser.__init__(self)
         self.result = ''
 
     def handle_starttag(self, tag, attrs):
         if tag == 'strong':
             # self.result += '*'
             pass
         elif tag == 'a':
             self.printURL = 0
             self.unclosedBracket = 0
             for f in attrs:
                 #if f[1] == 'note':
                 #    self.result += 'Fulltext : <'
                 #    self.unclosedBracket = 1
                 if f[1] == 'moreinfo':
                     self.result += 'Detailed record : '
                     self.printURL = 1
                 if (self.printURL == 1) and (f[0] == 'href'):
                     self.result += '<' + f[1] + '>'
         elif tag == 'br':
             self.result += '\n'
         elif tag == 'style' or tag == 'script':
             self.silent = True
         elif tag == 'p':
             if not self.new_line:
                 self.result += '\n'
                 self.new_line = True
 
     def handle_startendtag(self, tag, attrs):
         if tag == 'br':
             self.result += '\n'
             self.new_line = True
 
     def handle_endtag(self, tag):
         if tag == 'strong':
             # self.result += '\n'
             pass
         elif tag == 'a':
             if self.unclosedBracket == 1:
                 self.result += '>'
                 self.unclosedBracket = 0
         elif tag == 'style' or tag == 'script':
             self.silent = False
         elif tag == 'p':
             self.result += '\n'
             self.new_line = True
 
     def handle_data(self, data):
         if data.lower() in  ['detailed record', 'similar record', 'cited by']:
             pass
         elif self.silent == False:
             if self.new_line:
                 # Remove unnecessary trailing whitespace at the
                 # beginning of a line
                 data = data.lstrip()
             # Merge all consecutive whitespaces into a single one
             self.result += data
             self.new_line = False
 
     def handle_comment(self, data):
         if 'START_NOT_FOR_TEXT' == data.upper().strip():
             self.silent = True
         elif 'END_NOT_FOR_TEXT' == data.upper().strip():
             self.silent = False
 
     def handle_charref(self, name):
         """Process character references of the form "&#ref;". Transform to text whenever possible."""
         try:
             self.result += unichr(int(name)).encode("utf-8")
         except:
             return
 
     def handle_entityref(self, name):
         """Process a general entity reference of the form "&name;".
         Transform to text whenever possible."""
         if name == 'nbsp':
             # Keep them for the moment. Will be processed at the end.
             # It is needed to consider them separately, as all
             # consecutive whitespaces will be merged into a single one
             # at the end. It should not be the case of non breakable
             # space.  Note that because of this trick, input
             # &amp;nbsp; will be considered as a &nbsp;
             self.result += '&nbsp;'
             return
         char_code = htmlentitydefs.name2codepoint.get(name, None)
         if char_code is not None:
             try:
                 self.result += unichr(char_code).encode("utf-8")
             except:
                 return
 
-def get_as_text(record_id, ln=CFG_SITE_LANG):
+def get_as_text(record_id=0, xml_record=None, ln=CFG_SITE_LANG):
     """Return the record in a textual format"""
     _ = gettext_set_language(ln)
     out = ""
-    rec_in_hb = format_record(record_id, of="hb")
+    if record_id != 0:
+        rec_in_hb = format_record(record_id, of="hb")
+    elif xml_record:
+        rec_in_hb = format_record(0, of="hb", xml_record=xml_record)
     rec_in_hb = rec_in_hb.replace('\n', ' ')
     htparser = RecordHTMLParser()
     try:
         htparser.feed(rec_in_hb)
         htparser.close()
         out = htparser.result
     except:
         out = remove_html_markup(rec_in_hb)
 
     # Remove trailing whitespace and linefeeds
     out = out.strip('\n').strip()
     # Merge consecutive whitespaces. Must be done here, once all HTML
     # tags have been removed
     out = whitespaces_pattern.sub(' ', out)
     # Now consider non-breakable spaces
     out = out.replace('&nbsp;', ' ')
     out = re.sub(r"[\-:]?\s*%s\s*[\-:]?" % _("Detailed record"), "", out)
     out = re.sub(r"[\-:]?\s*%s\s*[\-:]?" % _("Similar records"), "", out)
     out = re.sub(r"[\-:]?\s*%s\s*[\-:]?" % _("Cited by"), "", out)
     return out.strip()
-
diff --git a/modules/webalert/lib/webalert_templates.py b/modules/webalert/lib/webalert_templates.py
index 4e50a2650..568dd90f0 100644
--- a/modules/webalert/lib/webalert_templates.py
+++ b/modules/webalert/lib/webalert_templates.py
@@ -1,576 +1,650 @@
 ## This file is part of CDS Invenio.
 ## Copyright (C) 2002, 2003, 2004, 2005, 2006, 2007, 2008 CERN.
 ##
 ## CDS Invenio is free software; you can redistribute it and/or
 ## modify it under the terms of the GNU General Public License as
 ## published by the Free Software Foundation; either version 2 of the
 ## License, or (at your option) any later version.
 ##
 ## CDS Invenio is distributed in the hope that it will be useful, but
 ## WITHOUT ANY WARRANTY; without even the implied warranty of
 ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 ## General Public License for more details.
 ##
 ## You should have received a copy of the GNU General Public License
 ## along with CDS Invenio; if not, write to the Free Software Foundation, Inc.,
 ## 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA.
 
 __revision__ = "$Id$"
 
 import cgi
 import time
 
 from invenio.config import \
      CFG_WEBALERT_ALERT_ENGINE_EMAIL, \
      CFG_SITE_NAME, \
      CFG_SITE_SUPPORT_EMAIL, \
      CFG_SITE_URL, \
      CFG_WEBALERT_MAX_NUM_OF_RECORDS_IN_ALERT_EMAIL
 from invenio.messages import gettext_set_language
 from invenio.htmlparser import get_as_text, wrap, wrap_records
 from invenio.urlutils import create_html_link
 
+from invenio.search_engine import guess_primary_collection_of_a_record, get_coll_ancestors
+
 class Template:
     def tmpl_errorMsg(self, ln, error_msg, rest = ""):
         """
         Adds an error message to the output
 
         Parameters:
 
           - 'ln' *string* - The language to display the interface in
 
           - 'error_msg' *string* - The error message
 
           - 'rest' *string* - The rest of the page
         """
 
         # load the right message language
         _ = gettext_set_language(ln)
 
         out = """<div class="quicknote">%(error)s</div><br />%(rest)s""" % {
                 'error' : error_msg,
                 'rest' : rest
               }
         return out
 
     def tmpl_textual_query_info_from_urlargs(self, ln, args):
         """
         Displays a human inteligible textual representation of a query
 
         Parameters:
 
           - 'ln' *string* - The language to display the interface in
 
           - 'args' *array* - The URL arguments array (parsed)
         """
 
         # load the right message language
         _ = gettext_set_language(ln)
 
         out = ""
         if args.has_key('p'):
             out += "<strong>" + _("Pattern") + ":</strong> " + "; ".join(args['p']) + "<br />"
         if args.has_key('f'):
             out += "<strong>" + _("Field") + ":</strong> " + "; ".join(args['f']) + "<br />"
         if args.has_key('p1'):
             out += "<strong>" + _("Pattern 1") + ":</strong> " + "; ".join(args['p1']) + "<br />"
         if args.has_key('f1'):
             out += "<strong>" + _("Field 1") + ":</strong> " + "; ".join(args['f1']) + "<br />"
         if args.has_key('p2'):
             out += "<strong>" + _("Pattern 2") + ":</strong> " + "; ".join(args['p2']) + "<br />"
         if args.has_key('f2'):
             out += "<strong>" + _("Field 2") + ":</strong> " + "; ".join(args['f2']) + "<br />"
         if args.has_key('p3'):
             out += "<strong>" + _("Pattern 3") + ":</strong> " + "; ".join(args['p3']) + "<br />"
         if args.has_key('f3'):
             out += "<strong>" + _("Field 3") + ":</strong> " + "; ".join(args['f3']) + "<br />"
         if args.has_key('c'):
             out += "<strong>" + _("Collections") + ":</strong> " + "; ".join(args['c']) + "<br />"
         elif args.has_key('cc'):
             out += "<strong>" + _("Collection") + ":</strong> " + "; ".join(args['cc']) + "<br />"
         return out
 
     def tmpl_account_list_alerts(self, ln, alerts):
         """
         Displays all the alerts in the main "Your account" page
 
         Parameters:
 
           - 'ln' *string* - The language to display the interface in
 
           - 'alerts' *array* - The existing alerts IDs ('id' + 'name' pairs)
         """
 
         # load the right message language
         _ = gettext_set_language(ln)
 
         out = """<form name="displayalert" action="../youralerts/list" method="post">
                  %(you_own)s:
                 <select name="id_alert">
                   <option value="0">- %(alert_name)s -</option>""" % {
                  'you_own' : _("You own the following alerts:"),
                  'alert_name' : _("alert name"),
                }
         for alert in alerts :
             out += """<option value="%(id)s">%(name)s</option>""" % \
                    {'id': alert['id'], 'name': cgi.escape(alert['name'])}
         out += """</select>
                 &nbsp;<code class="blocknote">
                 <input class="formbutton" type="submit" name="action" value="%(show)s" /></code>
                 </form>""" % {
                   'show' : _("SHOW"),
                 }
         return out
 
     def tmpl_input_alert(self, ln, query, alert_name, action, frequency, notification,
                          baskets, old_id_basket, id_basket, id_query,
                          guest, guesttxt):
         """
         Displays an alert adding form.
 
         Parameters:
 
           - 'ln' *string* - The language to display the interface in
 
           - 'query' *string* - The HTML code of the textual representation of the query (as returned ultimately by tmpl_textual_query_info_from_urlargs...)
 
           - 'alert_name' *string* - The alert name
 
           - 'action' *string* - The action to complete ('update' or 'add')
 
           - 'frequency' *string* - The frequency of alert running ('day', 'week', 'month')
 
           - 'notification' *string* - If notification should be sent by email ('y', 'n')
 
           - 'baskets' *array* - The existing baskets ('id' + 'name' pairs)
 
           - 'old_id_basket' *string* - The id of the previous basket of this alert
 
           - 'id_basket' *string* - The id of the basket of this alert
 
           - 'id_query' *string* - The id of the query associated to this alert
 
           - 'guest' *bool* - If the user is a guest user
 
           - 'guesttxt' *string* - The HTML content of the warning box for guest users (produced by webaccount.tmpl_warning_guest_user)
         """
 
         # load the right message language
         _ = gettext_set_language(ln)
 
         out = ""
         out += """<table style="border: 0px; padding: 2px; width: 650px;">
                     <tr><td colspan="3">%(notify_cond)s</td></tr>
                     <tr>
                       <td></td>
                       <td style="text-align: left; vertical-align: top; width: 10px; font-weight: bold;">%(query_text)s:</td>
                       <td style="text-align: left; vertical-align: top; width: 500px">%(query)s</td>
                     </tr>
                   </table>""" % {
                  'notify_cond' : _("This alert will notify you each time/only if a new item satisfies the following query:"),
                  'query_text' : _("QUERY"),
                  'query' : query,
                }
 
         out += """<form name="setalert" action="../youralerts/%(action)s" method="get">
         <table style="background-color:F1F1F1; border:thin groove grey; padding: 0px;">
           <tr>
             <td>
               <table style="border: 0px; padding:10px;">
                 <tr>
                    <td style="text-align: right; vertical-align:top; font-weight: bold;">%(alert_name)s</td>
                    <td><input type="text" name="name" size="20" maxlength="30" value="%(alert)s" /></td>
                 </tr>
                 <tr>
                   <td style="text-align: right; font-weight: bold;">%(freq)s</td>
                   <td>
                     <select name="freq">
                       <option value="month" %(freq_month)s>%(monthly)s</option>
                       <option value="week" %(freq_week)s>%(weekly)s</option>
                       <option value="day" %(freq_day)s>%(daily)s</option>
                     </select>
                   </td>
                 </tr>
                 <tr>
                   <td style="text-align:right; font-weight: bold">%(send_email)s</td>
                   <td>
                     <select name="notif">
                       <option value="y" %(notif_yes)s>%(yes)s</option>
                       <option value="n" %(notif_no)s>%(no)s</option>
                     </select>
                     <small class="quicknote"> (%(specify)s)</small>
                   </td>
                 </tr>
                 <tr>
                   <td style="text-align: right; vertical-align:top; font-weight: bold;">%(store_basket)s</td>
                   <td>%(baskets)s
                """ % {
                  'action': action,
                  'alert_name' : _("Alert identification name:"),
                  'alert' : cgi.escape(alert_name, 1),
                  'freq' : _("Search-checking frequency:"),
                  'freq_month' : (frequency == 'month' and 'selected="selected"' or ""),
                  'freq_week' : (frequency == 'week' and 'selected="selected"' or ""),
                  'freq_day' : (frequency == 'day' and 'selected="selected"' or ""),
                  'monthly' : _("monthly"),
                  'weekly' : _("weekly"),
                  'daily' : _("daily"),
                  'send_email' : _("Send notification email?"),
                  'notif_yes' : (notification == 'y' and 'selected="selected"' or ""),
                  'notif_no' : (notification == 'n' and 'selected="selected"' or ""),
                  'yes' : _("yes"),
                  'no' : _("no"),
                  'specify' : _("if %(x_fmt_open)sno%(x_fmt_close)s you must specify a basket") % {'x_fmt_open': '<b>',
                                                                                                   'x_fmt_close': '</b>'},
                  'store_basket' : _("Store results in basket?"),
                  'baskets': baskets
                }
 
         out += """  </td>
                    </tr>
                    <tr>
                     <td colspan="2" style="text-align:center">
                       <input type="hidden" name="idq" value="%(idq)s" />
                       <input type="hidden" name="ln" value="%(ln)s" />
                       <code class="blocknote"><input class="formbutton" type="submit" name="action" value="&nbsp;%(set_alert)s&nbsp;" /></code>&nbsp;
                       <code class="blocknote"><input class="formbutton" type="reset" value="%(clear_data)s" /></code>
                      </td>
                     </tr>
                    </table>
                   </td>
                  </tr>
                 </table>
                """ % {
                      'idq' : id_query,
                      'ln' : ln,
                      'set_alert' : _("SET ALERT"),
                      'clear_data' : _("CLEAR DATA"),
                    }
         if action == "update":
             out += '<input type="hidden" name="old_idb" value="%s" />' % old_id_basket
         out += "</form>"
 
         if guest:
             out += guesttxt
 
         return out
 
     def tmpl_list_alerts(self, ln, alerts, guest, guesttxt):
         """
         Displays the list of alerts
 
         Parameters:
 
           - 'ln' *string* - The language to display the interface in
 
           - 'alerts' *array* - The existing alerts:
 
               - 'queryid' *string* - The id of the associated query
 
               - 'queryargs' *string* - The query string
 
               - 'textargs' *string* - The textual description of the query string
 
               - 'userid' *string* - The user id
 
               - 'basketid' *string* - The basket id
 
               - 'basketname' *string* - The basket name
 
               - 'alertname' *string* - The alert name
 
               - 'frequency' *string* - The frequency of alert running ('day', 'week', 'month')
 
               - 'notification' *string* - If notification should be sent by email ('y', 'n')
 
               - 'created' *string* - The date of alert creation
 
               - 'lastrun' *string* - The last running date
 
           - 'guest' *bool* - If the user is a guest user
 
           - 'guesttxt' *string* - The HTML content of the warning box for guest users (produced by webaccount.tmpl_warning_guest_user)
         """
 
         # load the right message language
         _ = gettext_set_language(ln)
 
         out = '<p>' + _("Set a new alert from %(x_url1_open)syour searches%(x_url1_close)s, the %(x_url2_open)spopular searches%(x_url2_close)s, or the input form.") + '</p>'
         out %= {'x_url1_open': '<a href="display?ln=' + ln + '">',
                 'x_url1_close': '</a>',
                 'x_url2_open': '<a href="display?ln=' + ln + '&amp;p=y">',
                 'x_url2_close': '</a>',
                 }
         if len(alerts):
             out += """<table class="alrtTable">
                           <tr class="pageboxlefttop" style="text-align: center;">
                             <td style="font-weight: bold">%(no)s</td>
                             <td style="font-weight: bold">%(name)s</td>
                             <td style="font-weight: bold">%(search_freq)s</td>
                             <td style="font-weight: bold">%(notification)s</td>
                             <td style="font-weight: bold">%(result_basket)s</td>
                             <td style="font-weight: bold">%(date_run)s</td>
                             <td style="font-weight: bold">%(date_created)s</td>
                             <td style="font-weight: bold">%(query)s</td>
                             <td style="font-weight: bold">%(action)s</td></tr>""" % {
                        'no' : _("No"),
                        'name' : _("Name"),
                        'search_freq' : _("Search checking frequency"),
                        'notification' : _("Notification by email"),
                        'result_basket' : _("Result in basket"),
                        'date_run' : _("Date last run"),
                        'date_created' : _("Creation date"),
                        'query' : _("Query"),
                        'action' : _("Action"),
                      }
             i = 0
             for alert in alerts:
                 i += 1
                 if alert['frequency'] == "day":
                     frequency = _("daily")
                 else:
                     if alert['frequency'] == "week":
                         frequency = _("weekly")
                     else:
                         frequency = _("monthly")
 
                 if alert['notification'] == "y":
                     notification = _("yes")
                 else:
                     notification = _("no")
 
                 # we clean up the HH:MM part of lastrun, since it is always 00:00
                 lastrun = alert['lastrun'].split(',')[0]
                 created = alert['created'].split(',')[0]
 
                 out += """<tr>
                               <td style="font-style: italic">#%(index)d</td>
                               <td style="font-weight: bold; text-wrap:none;">%(alertname)s</td>
                               <td>%(frequency)s</td>
                               <td style="text-align:center">%(notification)s</td>
                               <td style="text-wrap:none;">%(basketname)s</td>
                               <td style="text-wrap:none;">%(lastrun)s</td>
                               <td style="text-wrap:none;">%(created)s</td>
                               <td>%(textargs)s</td>
                               <td>
                                  %(remove_link)s<br />
                                  %(modify_link)s<br />
                                  <a href="%(siteurl)s/search?%(queryargs)s&amp;ln=%(ln)s" style="white-space:nowrap">%(search)s</a>
                              </td>
                             </tr>""" % {
                     'index' : i,
                     'alertname' : cgi.escape(alert['alertname']),
                     'frequency' : frequency,
                     'notification' : notification,
                     'basketname' : alert['basketname'] and cgi.escape(alert['basketname']) \
                                                        or "- " + _("no basket") + " -",
                     'lastrun' : lastrun,
                     'created' : created,
                     'textargs' : alert['textargs'],
                     'queryid' : alert['queryid'],
                     'basketid' : alert['basketid'],
                     'freq' : alert['frequency'],
                     'notif' : alert['notification'],
                     'ln' : ln,
                     'modify_link': create_html_link("./modify",
                                                     {'ln': ln,
                                                      'idq': alert['queryid'],
                                                      'name': alert['alertname'],
                                                      'freq': frequency,
                                                      'notif':notification,
                                                      'idb':alert['basketid'],
                                                      'old_idb':alert['basketid']},
                                                     _("Modify")),
                     'remove_link': create_html_link("./remove",
                                                     {'ln': ln,
                                                      'idq': alert['queryid'],
                                                      'name': alert['alertname'],
                                                      'idb':alert['basketid']},
                                                     _("Remove")),
                     'siteurl' : CFG_SITE_URL,
                     'search' : _("Execute search"),
                     'queryargs' : cgi.escape(alert['queryargs'])
                   }
 
             out += '</table>'
 
         out += '<p>' + (_("You have defined %s alerts.") % ('<b>' + str(len(alerts)) + '</b>' )) + '</p>'
         if guest:
             out += guesttxt
         return out
 
     def tmpl_display_alerts(self, ln, permanent, nb_queries_total, nb_queries_distinct, queries, guest, guesttxt):
         """
         Displays the list of alerts
 
         Parameters:
 
           - 'ln' *string* - The language to display the interface in
 
           - 'permanent' *string* - If displaying most popular searches ('y') or only personal searches ('n')
 
           - 'nb_queries_total' *string* - The number of personal queries in the last period
 
           - 'nb_queries_distinct' *string* - The number of distinct queries in the last period
 
           - 'queries' *array* - The existing queries:
 
               - 'id' *string* - The id of the associated query
 
               - 'args' *string* - The query string
 
               - 'textargs' *string* - The textual description of the query string
 
               - 'lastrun' *string* - The last running date (only for personal queries)
 
           - 'guest' *bool* - If the user is a guest user
 
           - 'guesttxt' *string* - The HTML content of the warning box for guest users (produced by webaccount.tmpl_warning_guest_user)
         """
 
         # load the right message language
         _ = gettext_set_language(ln)
 
         if len(queries) == 0:
             out = _("You have not executed any search yet. Please go to the %(x_url_open)ssearch interface%(x_url_close)s first.") % \
                 {'x_url_open': '<a href="' + CFG_SITE_URL + '/?ln=' + ln +'">',
                  'x_url_close': '</a>'}
             return out
 
         out = ''
 
         # display message: number of items in the list
         if permanent == "n":
             msg = _("You have performed %(x_nb1)s searches (%(x_nb2)s different questions) during the last 30 days or so.") % {'x_nb1': nb_queries_total,
                                                                                                                                'x_nb2': nb_queries_distinct}
             out += '<p>' + msg + '</p>'
         else:
             # permanent="y"
             msg = _("Here are the %s most popular searches.")
             msg %= ('<b>' + str(len(queries)) + '</b>')
             out += '<p>' + msg + '</p>'
 
         # display the list of searches
         out += """<table class="alrtTable">
                     <tr class="pageboxlefttop">
                       <td style="font-weight: bold">%(no)s</td>
                       <td style="font-weight: bold">%(question)s</td>
                       <td style="font-weight: bold">%(action)s</td>""" % {
                       'no' : "#",
                       'question' : _("Question"),
                       'action' : _("Action")
                     }
         if permanent == "n":
             out += '<td  style="font-weight: bold">%s</td>' % _("Last Run")
         out += "</tr>\n"
         i = 0
         for query in queries :
             i += 1
             # id, pattern, base, search url and search set alert, date
             out += """<tr>
                         <td style="font-style: italic;">#%(index)d</td>
                         <td>%(textargs)s</td>
                         <td><a href="%(siteurl)s/search?%(args)s">%(execute_query)s</a><br />
                             <a href="%(siteurl)s/youralerts/input?ln=%(ln)s&amp;idq=%(id)d">%(set_alert)s</a></td>""" % {
                      'index' : i,
                      'textargs' : query['textargs'],
                      'siteurl' : CFG_SITE_URL,
                      'args' : cgi.escape(query['args']),
                      'id' : query['id'],
                      'ln': ln,
                      'execute_query' : _("Execute search"),
                      'set_alert' : _("Set new alert")
                    }
             if permanent == "n":
                 out += '<td>%s</td>' % query['lastrun']
             out += """</tr>\n"""
         out += "</table><br />\n"
         if guest :
             out += guesttxt
 
         return out
 
     def tmpl_alert_email_title(self, name):
         return 'Alert %s run on %s' % (
             name, time.strftime("%Y-%m-%d"))
 
     def tmpl_alert_email_from(self):
         return '%s Alert Engine <%s>' % (CFG_SITE_NAME, CFG_WEBALERT_ALERT_ENGINE_EMAIL)
 
     def tmpl_alert_email_body(self, name, url, records, pattern,
-                              catalogues, frequency):
-
-        l = len(catalogues)
+                              collection_list, frequency):
+
+        recids_by_collection = {}
+        for recid in records[0]:
+            primary_collection = guess_primary_collection_of_a_record(recid)
+            if primary_collection in collection_list:
+                if not recids_by_collection.has_key(primary_collection):
+                    recids_by_collection[primary_collection] = []
+                recids_by_collection[primary_collection].append(recid)
+            else:
+                ancestors = get_coll_ancestors(primary_collection)
+                ancestors.reverse()
+                nancestors = 0
+                for ancestor in ancestors:
+                    nancestors += 1
+                    if ancestor in collection_list:
+                        if not recids_by_collection.has_key(ancestor):
+                            recids_by_collection[ancestor] = []
+                        recids_by_collection[ancestor].append(recid)
+                        break
+                    elif len(ancestors) == nancestors:
+                        if not recids_by_collection.has_key('None of the above'):
+                            recids_by_collection['None of the above'] = []
+                        recids_by_collection['None of the above'].append(recid)
+
+        collection_list = [coll for coll in recids_by_collection.keys() if coll != 'None of the above']
+        for external_collection_results in records[1][0]:
+            if external_collection_results[1][0]:
+                collection_list.append(external_collection_results[0])
+
+        l = len(collection_list)
         if l == 0:
             collections = ''
         elif l == 1:
-            collections = "collection: %s\n" % catalogues[0]
+            collections = "collection: %s\n" % collection_list[0]
+        else:
+            collections = "collections: %s\n" % wrap(', '.join(collection_list))
+
+        l = len(records[0])
+        for external_collection_results in records[1][0]:
+            l += len(external_collection_results[1][0])
+        if l == 1:
+            total = '1 record'
         else:
-            collections = "collections: %s\n" % wrap(', '.join(catalogues))
+            total = '%d records' % l
 
         if pattern:
             pattern = 'pattern: %s\n' % pattern
 
         frequency = {'day': 'daily',
                      'week': 'weekly',
                      'month': 'monthly'}[frequency]
 
-        l = len(records)
-        if l == 1:
-            total = '1 record'
-        else:
-            total = '%d records' % l
-
-
         body = """\
 Hello:
 
 Below are the results of the email notification alert that
 you set up with the %(sitename)s.
 This is an automatic message, please don't reply to it.
 For any question, please use <%(sitesupportemail)s> instead.
 
 alert name: %(name)s
 %(pattern)s%(collections)sfrequency: %(frequency)s
 run time: %(runtime)s
 found: %(total)s
 url: <%(url)s>
 """ % {'sitesupportemail': CFG_SITE_SUPPORT_EMAIL,
        'name': name,
        'sitename': CFG_SITE_NAME,
        'pattern': pattern,
        'collections': collections,
        'frequency': frequency,
        'runtime': time.strftime("%a %Y-%m-%d %H:%M:%S"),
        'total': total,
        'url': url}
 
-
-        for index, recid in enumerate(records[:CFG_WEBALERT_MAX_NUM_OF_RECORDS_IN_ALERT_EMAIL]):
-            body += "\n%i) " % (index + 1)
-            body += self.tmpl_alert_email_record(recid)
-            body += "\n"
-
-        if len(records) > CFG_WEBALERT_MAX_NUM_OF_RECORDS_IN_ALERT_EMAIL:
+        index = 0
+
+        for collection_recids in recids_by_collection.items():
+            if collection_recids[0] != 'None of the above':
+                body += "\nCollection: %s\n" % collection_recids[0]
+                for recid in collection_recids[1]:
+                    index += 1
+                    body += "\n%i) " % (index)
+                    body += self.tmpl_alert_email_record(recid=recid)
+                    body += "\n"
+                    if index == CFG_WEBALERT_MAX_NUM_OF_RECORDS_IN_ALERT_EMAIL:
+                        break
+                if index == CFG_WEBALERT_MAX_NUM_OF_RECORDS_IN_ALERT_EMAIL:
+                    break
+
+        if index < CFG_WEBALERT_MAX_NUM_OF_RECORDS_IN_ALERT_EMAIL:
+            if recids_by_collection.has_key('None of the above'):
+                if len(recids_by_collection.keys()) > 1:
+                    body += "\nNone of the above collections:\n"
+                else:
+                    # if the uncategorized collection is the only collection then present
+                    # all the records as belonging to CFG_SITE_NAME
+                    body += "\nCollection: %s\n" % CFG_SITE_NAME
+                for recid in recids_by_collection['None of the above']:
+                    index += 1
+                    body += "\n%i) " % (index)
+                    body += self.tmpl_alert_email_record(recid=recid)
+                    body += "\n"
+                    if index == CFG_WEBALERT_MAX_NUM_OF_RECORDS_IN_ALERT_EMAIL:
+                        break
+
+        if index < CFG_WEBALERT_MAX_NUM_OF_RECORDS_IN_ALERT_EMAIL:
+            for external_collection_results in records[1][0]:
+                body += "\nCollection: %s\n" % external_collection_results[0]
+                for recid in external_collection_results[1][0]:
+                    index += 1
+                    body += "\n%i) " % (index)
+                    # TODO: extend function to accept xml_record!
+                    body += self.tmpl_alert_email_record(xml_record=external_collection_results[1][1][recid])
+                    body += "\n"
+                    if index == CFG_WEBALERT_MAX_NUM_OF_RECORDS_IN_ALERT_EMAIL:
+                        break
+                if index == CFG_WEBALERT_MAX_NUM_OF_RECORDS_IN_ALERT_EMAIL:
+                    break
+
+        if l > CFG_WEBALERT_MAX_NUM_OF_RECORDS_IN_ALERT_EMAIL:
             body += '''
 Only the first %s records were displayed.  Please consult the search
 URL given at the top of this email to see all the results.
 ''' % CFG_WEBALERT_MAX_NUM_OF_RECORDS_IN_ALERT_EMAIL
 
 
         body += '''
 --
 %s Alert Service <%s>
 Unsubscribe?  See <%s>
 Need human intervention?  Contact <%s>
 ''' % (CFG_SITE_NAME, CFG_SITE_URL, CFG_SITE_URL + '/youralerts/list', CFG_SITE_SUPPORT_EMAIL)
 
         return body
 
 
-    def tmpl_alert_email_record(self, recid):
+    def tmpl_alert_email_record(self, recid=0, xml_record=None):
         """ Format a single record."""
 
-        out = wrap_records(get_as_text(recid))
-        out += "\nDetailed record: <%s/record/%s>" % (CFG_SITE_URL, recid)
+        if recid != 0:
+            out = wrap_records(get_as_text(record_id=recid))
+            out += "\nDetailed record: <%s/record/%s>" % (CFG_SITE_URL, recid)
+        elif xml_record:
+            out = wrap_records(get_as_text(xml_record=xml_record))
+            # TODO: add Detailed record url for external records?
         return out
diff --git a/modules/webbasket/lib/webbasket.py b/modules/webbasket/lib/webbasket.py
index 6d1a85eb7..0b518e982 100644
--- a/modules/webbasket/lib/webbasket.py
+++ b/modules/webbasket/lib/webbasket.py
@@ -1,1019 +1,1018 @@
 ## This file is part of CDS Invenio.
 ## Copyright (C) 2002, 2003, 2004, 2005, 2006, 2007, 2008 CERN.
 ##
 ## CDS Invenio is free software; you can redistribute it and/or
 ## modify it under the terms of the GNU General Public License as
 ## published by the Free Software Foundation; either version 2 of the
 ## License, or (at your option) any later version.
 ##
 ## CDS Invenio is distributed in the hope that it will be useful, but
 ## WITHOUT ANY WARRANTY; without even the implied warranty of
 ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 ## General Public License for more details.
 ##
 ## You should have received a copy of the GNU General Public License
 ## along with CDS Invenio; if not, write to the Free Software Foundation, Inc.,
 ## 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA.
 
 """Web Baskets features."""
 
 __revision__ = "$Id$"
 
 import cgi
 from zlib import decompress
 
 from invenio.config import CFG_SITE_LANG, CFG_SITE_URL, \
      CFG_WEBBASKET_MAX_NUMBER_OF_DISPLAYED_BASKETS
 from invenio.messages import gettext_set_language
 from invenio.dateutils import convert_datetext_to_dategui, \
                               convert_datetext_to_datestruct,\
                               convert_datestruct_to_dategui
 from invenio.bibformat import format_record
 from invenio.webbasket_config import CFG_WEBBASKET_SHARE_LEVELS, \
                                      CFG_WEBBASKET_SHARE_LEVELS_ORDERED, \
                                      CFG_WEBBASKET_CATEGORIES, \
                                      CFG_WEBBASKET_WARNING_MESSAGES
 from invenio.webuser import isGuestUser, collect_user_info
 from invenio.search_engine import \
      record_exists, \
      check_user_can_view_record
 from invenio.webcomment import check_user_can_attach_file_to_comments
 import invenio.webbasket_dblayer as db
 try:
     import invenio.template
     webbasket_templates = invenio.template.load('webbasket')
 except ImportError:
     pass
 from invenio.websearch_external_collections_utils import get_collection_name_by_id
 from invenio.websearch_external_collections import select_hosted_search_engines
 from invenio.websearch_external_collections_config import CFG_EXTERNAL_COLLECTION_TIMEOUT
-from invenio.websearch_external_collections_searcher import external_collections_dictionary
 from invenio.websearch_external_collections_getter import HTTPAsyncPageGetter, async_download
 
 def perform_request_display(uid,
                             category=CFG_WEBBASKET_CATEGORIES['PRIVATE'],
                             selected_topic=0,
                             selected_group_id=0,
                             ln=CFG_SITE_LANG):
     """Display all the baskets of given category, topic or group.
     @param uid: user id
     @param category: selected category (see webbasket_config.py)
     @param selected_topic: # of selected topic to display baskets
     @param selected_group_id: id of group to display baskets
     @param ln: language"""
     warnings = []
     errors = []
     baskets_html = []
     baskets = []
 
     _ = gettext_set_language(ln)
     nb_groups = db.count_groups_user_member_of(uid)
     nb_external_baskets = db.count_external_baskets(uid)
     selectionbox = ''
     infobox = ''
     if category == CFG_WEBBASKET_CATEGORIES['EXTERNAL']:
         baskets = db.get_external_baskets_infos(uid)
         if len(baskets):
             map(list, baskets)
         else:
             category = CFG_WEBBASKET_CATEGORIES['PRIVATE']
     if category == CFG_WEBBASKET_CATEGORIES['GROUP']:
         groups = db.get_group_infos(uid)
         if len(groups):
             if selected_group_id == 0 and len(groups):
                 selected_group_id = groups[0][0]
             selectionbox = webbasket_templates.tmpl_group_selection(groups,
                                                                     selected_group_id,
                                                                     ln)
             baskets = db.get_group_baskets_infos(selected_group_id)
             def adapt_group_rights(item):
                 """Suppress unused element in tuple."""
                 out = list(item)
                 if out[-1] == uid:
                     out[-2] = CFG_WEBBASKET_SHARE_LEVELS['MANAGE']
                 return out[:-1]
             baskets = map(adapt_group_rights, baskets)
         else:
             category = CFG_WEBBASKET_CATEGORIES['PRIVATE']
     if category == CFG_WEBBASKET_CATEGORIES['PRIVATE']:
         topics_list = db.get_personal_topics_infos(uid)
         if not selected_topic and len(topics_list):
             selected_topic = 0
         selectionbox = webbasket_templates.tmpl_topic_selection(topics_list,
                                                                 selected_topic,
                                                                 ln)
         if selected_topic >= len(topics_list):
             selected_topic = len(topics_list) - 1
         if len(topics_list) > 0:
             baskets = db.get_personal_baskets_infos(uid, topics_list[selected_topic][0])
         else:
             baskets = []
         def add_manage_rights(item):
             """ Convert a tuple to a list and add rights"""
             out = list(item)
             out.append(CFG_WEBBASKET_SHARE_LEVELS['MANAGE'])
             return out
         baskets = map(add_manage_rights, baskets)
 
     bskids = []
     for basket in baskets:
         bskids.append(basket[0])
     levels = dict(db.is_shared_to(bskids))
     create_link = ''
     if category == CFG_WEBBASKET_CATEGORIES['PRIVATE']:
         create_link = webbasket_templates.tmpl_create_basket_link(selected_topic, ln)
     infobox = webbasket_templates.tmpl_baskets_infobox(map(lambda x: (x[0], x[1], x[2]),
                                                            baskets),
                                                        create_link,
                                                        ln)
     for (bskid, name, date_modification,
          nb_views, nb_items, last_added, share_level) in baskets:
         (bsk_html, bsk_e, bsk_w) = __display_basket(bskid,
                                                     name,
                                                     date_modification,
                                                     nb_views,
                                                     nb_items,
                                                     last_added,
                                                     share_level,
                                                     levels[bskid],
                                                     category,
                                                     selected_topic,
                                                     selected_group_id,
                                                     ln)
         baskets_html.append(bsk_html)
         errors.extend(bsk_e)
         warnings.extend(bsk_w)
 
     body = webbasket_templates.tmpl_display(selectionbox,
                                             infobox,
                                             baskets_html,
                                             category,
                                             nb_groups,
                                             nb_external_baskets,
                                             ln)
     return (body, errors, warnings)
 
 
 def __display_basket(bskid, name, date_modification, nb_views,
                      nb_items, last_added,
                      share_level, group_sharing_level,
                      category=CFG_WEBBASKET_CATEGORIES['PRIVATE'],
                      selected_topic=0, selected_group_id=0,
                      ln=CFG_SITE_LANG):
     """Private function. Display a basket giving its category and topic or group.
     @param share_level: rights user has on basket
     @param group_sharing_level: None if basket is not shared,
                                 0 if public basket,
                                 > 0 if shared to usergroups but not public.
     @param category: selected category (see webbasket_config.py)
     @param selected_topic: # of selected topic to display baskets
     @param selected_group_id: id of group to display baskets
     @param ln: language"""
 
     _ = gettext_set_language(ln)
     errors = []
     warnings = []
 
     nb_bsk_cmts = 0
     last_cmt = _("N/A")
     records = []
     cmt_dates = []
     date_modification = convert_datetext_to_dategui(date_modification, ln)
 
     items = db.get_basket_content(bskid, 'hb')
     external_recids = []
 
     for (recid, nb_cmt, last_cmt, ext_val, int_val, score) in items:
         cmt_dates.append(convert_datetext_to_datestruct(last_cmt))
         last_cmt = convert_datetext_to_dategui(last_cmt, ln)
         val = ''
         nb_bsk_cmts += nb_cmt
         if recid < 0:
             if ext_val:
                 val = decompress(ext_val)
             else:
                 external_recids.append(recid)
         else:
             if int_val:
                 val = decompress(int_val)
             else:
                 val = format_record(recid, 'hb', on_the_fly=True)
         records.append((recid, nb_cmt, last_cmt, val, score))
 
     if external_recids:
         external_records = format_external_records(external_recids, 'hb')
 
         for external_record in external_records:
             for record in records:
                 if record[0] == -external_record[0]:
                     idx = records.index(record)
                     tuple_to_list = list(records.pop(idx))
                     tuple_to_list[3] = external_record[1]
                     records.insert(idx,tuple(tuple_to_list))
                     break
 
     if len(cmt_dates) > 0:
         last_cmt = convert_datestruct_to_dategui(max(cmt_dates), ln)
 
     body = webbasket_templates.tmpl_basket(bskid,
                                            name,
                                            date_modification,
                                            nb_views,
                                            nb_items, last_added,
                                            (check_sufficient_rights(share_level, CFG_WEBBASKET_SHARE_LEVELS['READITM']),
                                             check_sufficient_rights(share_level, CFG_WEBBASKET_SHARE_LEVELS['MANAGE']),
                                             check_sufficient_rights(share_level, CFG_WEBBASKET_SHARE_LEVELS['READCMT']),
                                             check_sufficient_rights(share_level, CFG_WEBBASKET_SHARE_LEVELS['ADDITM']),
                                             check_sufficient_rights(share_level, CFG_WEBBASKET_SHARE_LEVELS['DELITM'])),
                                            nb_bsk_cmts, last_cmt,
                                            group_sharing_level,
                                            category, selected_topic, selected_group_id,
                                            records,
                                            ln)
     return (body, errors, warnings)
 
 def perform_request_display_item(uid, bskid, recid, format='hb',
                                  category=CFG_WEBBASKET_CATEGORIES['PRIVATE'],
                                  topic=0, group_id=0, ln=CFG_SITE_LANG):
     """Display an item of a basket of given category, topic or group.
     @param uid: user id
     @param bskid: basket_id
     @param recid: record id
     @param format: format of the record (hb, hd, etc.)
     @param category: selected category (see webbasket_config.py)
     @param topic: # of selected topic to display baskets
     @param group_id: id of group to display baskets
     @param ln: language"""
     body = ''
     errors = []
     warnings = []
 
     rights = db.get_max_user_rights_on_basket(uid, bskid)
     if not(check_sufficient_rights(rights, CFG_WEBBASKET_SHARE_LEVELS['READITM'])):
         errors.append('ERR_WEBBASKET_NO_RIGHTS')
         return (body, errors, warnings)
     if category == CFG_WEBBASKET_CATEGORIES['PRIVATE']:
         topics_list = db.get_personal_topics_infos(uid)
         if not topic and len(topics_list):
             topic = 0
         topicsbox = webbasket_templates.tmpl_topic_selection(topics_list, topic, ln)
     elif category == CFG_WEBBASKET_CATEGORIES['GROUP']:
         groups = db.get_group_infos(uid)
         if group_id == 0 and len(groups):
             group_id = groups[0][0]
         topicsbox = webbasket_templates.tmpl_group_selection(groups, group_id, ln)
     else:
         topicsbox = ''
     record = db.get_basket_record(bskid, recid, format)
     comments = db.get_comments(bskid, recid)
     group_sharing_level = None
     levels = db.is_shared_to(bskid)
     if len(levels):
         group_sharing_level = levels[0][1]
     basket = db.get_basket_general_infos(bskid)
     if not(len(basket)):
         errors.append('ERR_WEBBASKET_DB_ERROR')
         return (body, errors, warnings)
     item_html = webbasket_templates.tmpl_item(basket,
                                               recid, record, comments,
                                               group_sharing_level,
                                               (check_sufficient_rights(rights, CFG_WEBBASKET_SHARE_LEVELS['READCMT']),
                                                check_sufficient_rights(rights, CFG_WEBBASKET_SHARE_LEVELS['ADDCMT']),
                                                check_sufficient_rights(rights, CFG_WEBBASKET_SHARE_LEVELS['DELCMT'])),
                                               selected_category=category, selected_topic=topic, selected_group_id=group_id,
                                               ln=ln)
     body = webbasket_templates.tmpl_display(topicsbox=topicsbox, baskets=[item_html],
                                             selected_category=category,
                                             nb_groups=db.count_groups_user_member_of(uid),
                                             nb_external_baskets=db.count_external_baskets(uid),
                                             ln=ln)
     return (body, errors, warnings)
 
 def perform_request_write_comment(uid, bskid, recid, cmtid=0,
                                   category=CFG_WEBBASKET_CATEGORIES['PRIVATE'],
                                   topic=0, group_id=0,
                                   ln=CFG_SITE_LANG):
     """Display a comment writing form
     @param uid: user id
     @param bskid: basket id
     @param recid: record id (comments are on a specific record in a specific basket)
     @param cmtid: if provided this comment is a reply to comment cmtid.
     @param category: selected category
     @param topic: selected topic
     @param group_id: selected group id
     @param ln: language
     """
     body = ''
     warnings = []
     errors = []
     textual_msg = '' # initial value in replies
     html_msg = '' # initial value  in replies (if FCKeditor)
     title = '' # initial title in replies
     if not check_user_can_comment(uid, bskid):
         errors.append(('ERR_WEBBASKET_CANNOT_COMMENT'))
         return (body, errors, warnings)
     if cmtid:
         # this is a reply to another comment
         comment = db.get_comment(cmtid)
         if comment:
             # Title
             if comment[2]:
                 title = 'Re: ' + comment[2]
 
             # Build two msg: one mostly textual, the other one with HTML markup, for the FCKeditor.
             textual_msg = webbasket_templates.tmpl_quote_comment_textual(comment[2], # title
                                                                          uid,
                                                                          comment[0], # nickname
                                                                          comment[4], # date
                                                                          comment[3],
                                                                          ln)
             html_msg = webbasket_templates.tmpl_quote_comment_html(comment[2], # title
                                                                    uid,
                                                                    comment[0], # nickname
                                                                    comment[4], # date
                                                                    comment[3],
                                                                    ln)
         else:
             warning = (CFG_WEBBASKET_WARNING_MESSAGES['ERR_WEBBASKET_cmtid_INVALID'], cmtid)
             warnings.append(warning)
     record = db.get_basket_record(bskid, recid, 'hb')
     # Check that user can attach file. To simplify we use the same
     # checking as in WebComment, though it is not completely adequate.
     user_info = collect_user_info(uid)
     can_attach_files = check_user_can_attach_file_to_comments(user_info, recid)
     body = webbasket_templates.tmpl_write_comment(bskid=bskid,
                                                   recid=recid,
                                                   cmt_title=title,
                                                   cmt_body_textual=textual_msg,
                                                   cmt_body_html=html_msg,
                                                   record = record,
                                                   selected_category=category,
                                                   selected_topic=topic,
                                                   selected_group_id=group_id,
                                                   warnings=warnings,
                                                   can_attach_files=can_attach_files)
     if category == CFG_WEBBASKET_CATEGORIES['PRIVATE']:
         topics_list = db.get_personal_topics_infos(uid)
         if not topic and len(topics_list):
             topic = 0
         topicsbox = webbasket_templates.tmpl_topic_selection(topics_list, topic, ln)
     elif category == CFG_WEBBASKET_CATEGORIES['GROUP']:
         groups = db.get_group_infos(uid)
         if group_id == 0 and len(groups):
             group_id = groups[0][0]
         topicsbox = webbasket_templates.tmpl_group_selection(groups, group_id, ln)
     else:
         topicsbox = ''
     body = webbasket_templates.tmpl_display(topicsbox, '', [ body ], category, ln)
     return (body, errors, warnings)
 
 def perform_request_save_comment(uid, bskid, recid, title='', text='',
                                  ln=CFG_SITE_LANG,
                                  editor_type='textarea'):
     """ Save a given comment if able to.
     @param uid: user id (int)
     @param bskid: basket id (int)
     @param recid: record id (int)
     @param title: title of comment (string)
     @param text: comment's body (string)
     @param ln: language (string)
     @param editor_type: the kind of editor/input used for the comment: 'textarea', 'fckeditor'
     @return: (errors, infos) where errors: list of errors while saving
                                   infos: list of informations to display"""
     _ = gettext_set_language(ln)
     errors = []
     infos = []
     if not check_user_can_comment(uid, bskid):
         errors.append(('ERR_WEBBASKET_CANNOT_COMMENT'))
         return (errors, infos)
 
     if editor_type == 'fckeditor':
         # Here we remove the line feeds introduced by FCKeditor (they
         # have no meaning for the user) and replace the HTML line
         # breaks by linefeeds, so that we are close to an input that
         # would be done without the FCKeditor. That's much better if a
         # reply to a comment is made with a browser that does not
         # support FCKeditor.
         text = text.replace('\n', '').replace('\r', '').replace('<br />', '\n')
 
     if not(db.save_comment(uid, bskid, recid, title, text)):
         errors.append(('ERR_WEBBASKET_DB_ERROR'))
     else:
         infos.append(_('Your comment has been successfully posted'))
     return (errors, infos)
 
 def perform_request_delete_comment(uid, bskid, recid, cmtid):
     """Delete comment cmtid on record recid for basket bskid."""
     errors = []
     if __check_user_can_perform_action(uid, bskid, CFG_WEBBASKET_SHARE_LEVELS['DELCMT']):
         db.delete_comment(bskid, recid, cmtid)
     else:
         errors.append('ERR_WEBBASKET_NO_RIGHTS')
     return errors
 
 def perform_request_add(uid, recids=[], colid=0, bskids=[], referer='',
                         new_basket_name='', new_topic_name='', create_in_topic='',
                         ln=CFG_SITE_LANG):
     """Add records to baskets
     @param uid: user id
     @param recids: list of records to add
     @param colid: in case of external collections, the id of the collection the records belong to
     @param bskids: list of baskets to add records to. if not provided, will return a
                    page where user can select baskets
     @param referer: URL of the referring page
     @param new_basket_name: add record to new basket
     @param new_topic_name: new basket goes into new topic
     @param create_in_topic: # of topic to put basket into
     @param ln: language
     @return: (body, errors, warnings) tuple
     """
     body = ''
     errors = []
     warnings = []
     if not(type(recids) == list):
         recids = [recids]
 
     validated_recids = []
     if colid == 0:
         # local records
         for recid in recids:
             recid = int(recid)
             if record_exists(recid) == 1:
                 validated_recids.append(recid)
 
         user_info = collect_user_info(uid)
         for recid in validated_recids:
             (auth_code, auth_msg) = check_user_can_view_record(user_info, recid)
             if auth_code:
                 # User not authorized to view record
                 validated_recids.remove(recid)
                 warnings.append(('WRN_WEBBASKET_NO_RIGHTS_TO_ADD_THIS_RECORD', recid))
     elif colid > 0:
         # external records, no need to validate
         validated_recids = recids
     elif colid == -1:
         # external url / resource.
         # TODO: how to handle?
         pass
 
     if not(len(validated_recids)):
         warnings.append('WRN_WEBBASKET_NO_RECORD')
         body += webbasket_templates.tmpl_warnings(warnings, ln)
         if referer and not(referer.find(CFG_SITE_URL) == -1):
             body += webbasket_templates.tmpl_back_link(referer, ln)
         return (body, errors, warnings)
 
     if new_basket_name != '':
         new_topic_name = new_topic_name.strip()
         if new_topic_name:
             topic = new_topic_name
         elif create_in_topic != -1:
             topics = map(lambda x: x[0], db.get_personal_topics_infos(uid))
             try:
                 topic = topics[create_in_topic]
             except IndexError:
                 topic = ''
         else:
             topic = ''
             warnings.append('WRN_WEBBASKET_NO_GIVEN_TOPIC')
             body += webbasket_templates.tmpl_warnings(warnings, ln)
             bskids = []
         if topic:
             id_bsk = db.create_basket(uid, new_basket_name, topic)
             bskids.append(id_bsk)
 
     if bskids:
         bskids = [int(bskid) for bskid in bskids if int(bskid) > 0]
         if bskids:
             for bskid in bskids:
                 if not(__check_user_can_perform_action(uid,
                                         bskid,
                                         CFG_WEBBASKET_SHARE_LEVELS['ADDITM'])):
                     errors.append('ERR_WEBBASKET_NO_RIGHTS')
                     return (body, errors, warnings)
             nb_modified_baskets = db.add_to_basket(uid, validated_recids, bskids)
             body = webbasket_templates.tmpl_added_to_basket(nb_modified_baskets, ln)
             body_tmp, warnings_temp, errors_tmp = perform_request_display(uid,
                             category=CFG_WEBBASKET_CATEGORIES['PRIVATE'],
                             selected_topic=create_in_topic != -1 and create_in_topic or 0,
                             selected_group_id=0,
                             ln=CFG_SITE_LANG)
             body += body_tmp
             warnings += warnings_temp
             errors += errors_tmp
             body += webbasket_templates.tmpl_back_link(referer, ln)
             return (body, warnings, errors)
         else:
             warnings.append('WRN_WEBBASKET_NO_BASKET_SELECTED')
             body += webbasket_templates.tmpl_warnings(warnings, ln)
 
     # Display basket_selection
     personal_baskets = db.get_all_personal_baskets_names(uid)
     group_baskets = db.get_all_group_baskets_names(uid)
     external_baskets = db.get_all_external_baskets_names(uid)
     topics = map(lambda x: x[0], db.get_personal_topics_infos(uid))
     body += webbasket_templates.tmpl_add(recids=validated_recids,
                                         colid=colid,
                                         personal_baskets=personal_baskets,
                                         group_baskets=group_baskets,
                                         external_baskets=external_baskets,
                                         topics=topics,
                                         referer=referer,
                                         ln=ln)
     body += webbasket_templates.tmpl_back_link(referer, ln)
     return (body, errors, warnings)
 
 def perform_request_delete(uid, bskid, confirmed=0,
                            category=CFG_WEBBASKET_CATEGORIES['PRIVATE'],
                            selected_topic=0, selected_group_id=0,
                            ln=CFG_SITE_LANG):
     """Delete a given basket.
     @param uid: user id (user has to be owner of this basket)
     @param bskid: basket id
     @param confirmed: if 0 will return a confirmation page; if 1 will delete basket.
     @param category: category currently displayed
     @param selected_topic: topic currently displayed
     @param selected_group_id: if category is group, id of the group currently displayed
     @param ln: language"""
     body = ''
     errors = []
     warnings = []
     if not(db.check_user_owns_baskets(uid, [bskid])):
         errors.append(('ERR_WEBBASKET_NO_RIGHTS',))
         return (body, errors, warnings)
     if confirmed:
         success = db.delete_basket(bskid)
         if not success:
             errors.append(('ERR_WEBBASKET_DB_ERROR',))
     else:
         body = webbasket_templates.tmpl_confirm_delete(bskid,
                                                        db.count_subscribers(uid, bskid),
                                                        category,
                                                        selected_topic, selected_group_id,
                                                        ln)
     return (body, errors, warnings)
 
 def delete_record(uid, bskid, recid):
     """Delete a given record in a given basket.
     @param uid: user id (user has to have sufficient rights on basket
     @param bskid: basket id
     @param recid: record id
     """
     if __check_user_can_perform_action(uid,
                                        bskid,
                                        CFG_WEBBASKET_SHARE_LEVELS['DELITM']):
         db.delete_item(bskid, recid)
 
 def move_record(uid, bskid, recid, direction):
     """Move a record up or down in a basket (change score).
     @param uid: user id (user has to have manage rights over this basket)
     @param bskid: basket id
     @param recid: record we want to move
     @param direction: CFG_WEBBASKET_ACTIONS['UP'] or CFG_WEBBASKET_ACTIONS['DOWN']
     """
     if __check_user_can_perform_action(uid,
                                        bskid,
                                        CFG_WEBBASKET_SHARE_LEVELS['MANAGE']):
         db.move_item(bskid, recid, direction)
 
 def perform_request_edit(uid, bskid, topic=0, new_name='',
                          new_topic = '', new_topic_name='',
                          groups=[], external='',
                          ln=CFG_SITE_LANG):
     """Interface for management of basket. If names, groups or external is
      provided, will save new rights into database, else will provide interface.
     @param uid: user id (user has to have sufficient rights on this basket
     @param bskid: basket id to change rights on
     @param topic: topic currently used (int)
     @param new_name: new name of basket
     @param new_topic: topic in which to move basket (int),
                       new_topic_name must be left blank
     @param new_topic_name: new topic in which to move basket
                            (will overwrite param new_topic)
     @param groups: list of strings formed in this way: group_id + '_' + rights
     @param external: rights for everybody (can be 'NO')
     @param ln: language
     """
     body = ''
     errors = []
     warnings = []
 
     rights = db.get_max_user_rights_on_basket(uid, bskid)
     if rights != CFG_WEBBASKET_SHARE_LEVELS['MANAGE']:
         errors.append(('ERR_WEBBASKET_NO_RIGHTS',))
         return (body, errors, warnings)
     bsk_name = db.get_basket_name(bskid)
     if not(groups) and not(external) and not(new_name) and not(new_topic) and not(new_topic_name):
         # display interface
         topics = map(lambda x: x[0], db.get_personal_topics_infos(uid))
         groups_rights = db.get_groups_subscribing_to_basket(bskid)
         external_rights = ''
         if groups_rights and groups_rights[0][0] == 0:
             external_rights = groups_rights[0][2]
             groups_rights = groups_rights[1:]
         display_delete = db.check_user_owns_baskets(uid, bskid)
         display_general = display_delete
         if isGuestUser(uid):
             display_sharing = 0
         else:
             display_sharing = 1
         body = webbasket_templates.tmpl_edit(bskid=bskid, bsk_name=bsk_name,
                                              display_general=display_general,
                                              topics=topics, topic=topic,
                                              display_delete=display_delete,
                                              display_sharing=display_sharing,
                                              groups_rights=groups_rights,
                                              external_rights=external_rights,
                                              ln=ln)
     else:
         out_groups = {}
         if len(groups):
             for group in groups:
                 (group_id, group_rights) = group.split('_')
                 out_groups[group_id] = group_rights
         out_groups['0'] = external
         if not(isGuestUser(uid)):
             db.update_rights(bskid, out_groups)
         if new_name != bsk_name:
             db.rename_basket(bskid, new_name)
         if new_topic_name:
             db.move_baskets_to_topic(uid, bskid, new_topic_name)
         elif new_topic != -1:
             if db.check_user_owns_baskets(uid, bskid):
                 topics = map(lambda x: x[0], db.get_personal_topics_infos(uid))
                 try:
                     new_topic_name = topics[new_topic]
                     db.move_baskets_to_topic(uid, bskid, new_topic_name)
                 except:
                     errors.append(('ERR_WEBBASKET_DB_ERROR'))
             else:
                 topic = 0
             errors.append(('ERR_WEBBASKET_NOT_OWNER'))
     return (body, errors, warnings)
 
 def perform_request_add_group(uid, bskid, topic=0, group_id=0, ln=CFG_SITE_LANG):
     """If group id is specified, share basket bskid to this group with
     READITM rights;
     else return a page for selection of a group.
     @param uid: user id (selection only of groups user is member of)
     @param bskid: basket id
     @param topic: topic currently displayed
     @param group_id: id of group to share basket to
     @param ln: language
     """
     if group_id:
         db.share_basket_with_group(bskid,
                                    group_id,
                                    CFG_WEBBASKET_SHARE_LEVELS['READITM'])
     else:
         groups = db.get_groups_user_member_of(uid)
         body = webbasket_templates.tmpl_add_group(bskid, topic, groups, ln)
         return body
 
 def perform_request_create_basket(uid,
                                   new_basket_name='',
                                   new_topic_name='', create_in_topic=-1,
                                   topic_number=-1,
                                   ln=CFG_SITE_LANG):
     """if new_basket_name and topic infos are given create a basket and return topic number,
     else return (body, errors, warnings) tuple of basket creation form.
     @param uid: user id (int)
     @param new_basket_name: name of the basket to create (str)
     @param new_topic_name: name of new topic to create new basket in (str)
     @param create_in_topic: identification number of topic to create new basket in (int)
     @param topic_number: number of topic to preselect on the creation form.
     @pram ln: language
     """
     if new_basket_name and (new_topic_name or create_in_topic != -1):
         topics_infos = map(lambda x: x[0], db.get_personal_topics_infos(uid))
         new_topic_name = new_topic_name.strip()
         if new_topic_name:
             topic = new_topic_name
         else:
             try:
                 topic = topics_infos[create_in_topic]
             except IndexError:
                 return 0
         db.create_basket(uid, new_basket_name, topic)
         topics = map(lambda x: x[0], topics_infos)
         try:
             return topics.index(topic)
         except ValueError:
             return 0
     else:
         topics = map(lambda x: x[0], db.get_personal_topics_infos(uid))
         if topic_number in range (0, len(topics)):
             create_in_topic = topics[topic_number]
         body = webbasket_templates.tmpl_create_basket(new_basket_name,
                                                       new_topic_name,
                                                       create_in_topic,
                                                       topics,
                                                       ln)
         return (body, [], [])
 
 def perform_request_display_public(bskid=0, of='hb', ln=CFG_SITE_LANG):
     """return html representation of a public basket
     @param bskid: basket id
     @param of: format
     @param ln: language"""
     _ = gettext_set_language(ln)
     body = ''
     errors = []
     warnings = []
     basket = db.get_public_basket_infos(bskid)
     if of[0] == 'x':
         items = []
         if len(basket) == 7:
             content = db.get_basket_content(bskid)
             for item in content:
                 # TODO: return xml for external records too?
                 items.append(format_record(item[0], of))
         return webbasket_templates.tmpl_xml_basket(items)
 
     if len(basket) == 7:
         items = db.get_basket_content(bskid)
         external_recids = []
         last_cmt = _("N/A")
         records = []
         cmt_dates = []
         for (recid, nb_cmt, last_cmt, ext_val, int_val, score) in items:
             cmt_dates.append(convert_datetext_to_datestruct(last_cmt))
             last_cmt = convert_datetext_to_dategui(last_cmt, ln)
             val = ''
             if recid < 0:
                 if ext_val:
                     val = decompress(ext_val)
                 else:
                     external_recids.append(recid)
             else:
                 if int_val:
                     val = format_record(recid, 'hb')
             records.append((recid, nb_cmt, last_cmt, val, score))
 
         if external_recids:
             external_records = format_external_records(external_recids, 'hb')
     
             for external_record in external_records:
                 for record in records:
                     if record[0] == -external_record[0]:
                         idx = records.index(record)
                         tuple_to_list = list(records.pop(idx))
                         tuple_to_list[3] = external_record[1]
                         records.insert(idx,tuple(tuple_to_list))
                         break            
 
         body = webbasket_templates.tmpl_display_public(basket, records, ln)
     else:
         errors.append('ERR_WEBBASKET_RESTRICTED_ACCESS')
     return (body, errors, warnings)
 
 def perform_request_list_public_baskets(inf_limit=0, order=1, asc=1, ln=CFG_SITE_LANG):
     """Display list of public baskets.
     @param inf_limit: display baskets from inf_limit
     @param order: 1: order by name of basket, 2: number of views, 3: owner
     @param asc: ascending order if 1, descending if 0
     @param ln: language
     """
     errors = []
     warnings = []
     total_baskets = db.count_public_baskets()
     baskets = db.get_public_baskets_list(inf_limit, CFG_WEBBASKET_MAX_NUMBER_OF_DISPLAYED_BASKETS, order, asc)
     body = webbasket_templates.tmpl_display_list_public_baskets(baskets, inf_limit, total_baskets, order, asc, ln)
     return (body, errors, warnings)
 
 def perform_request_subscribe(uid, bskid):
     """subscribe to external basket bskid"""
     errors = []
     if db.is_public(bskid):
         db.subscribe(uid, bskid)
     else:
         errors.append('ERR_WEBBASKET_RESTRICTED_ACCESS')
     return errors
 
 def perform_request_unsubscribe(uid, bskid):
     """unsubscribe from external basket bskid"""
     db.unsubscribe(uid, bskid)
 
 def check_user_can_comment(uid, bskid):
     """ Private function. check if a user can comment """
     min_right = CFG_WEBBASKET_SHARE_LEVELS['ADDCMT']
     rights = db.get_max_user_rights_on_basket(uid, bskid)
     if rights:
         if CFG_WEBBASKET_SHARE_LEVELS_ORDERED.index(rights) >= CFG_WEBBASKET_SHARE_LEVELS_ORDERED.index(min_right):
             return 1
     return 0
 
 def __check_user_can_perform_action(uid, bskid, rights):
     """ Private function, check if a user has sufficient rights"""
     min_right = rights
     rights = db.get_max_user_rights_on_basket(uid, bskid)
     if rights:
         if CFG_WEBBASKET_SHARE_LEVELS_ORDERED.index(rights) >= CFG_WEBBASKET_SHARE_LEVELS_ORDERED.index(min_right):
             return 1
     return 0
 
 def check_sufficient_rights(rights_user_has, rights_needed):
     """Private function, check if the rights are sufficient."""
     try:
         out = CFG_WEBBASKET_SHARE_LEVELS_ORDERED.index(rights_user_has) >= \
               CFG_WEBBASKET_SHARE_LEVELS_ORDERED.index(rights_needed)
     except ValueError:
         out = 0
     return out
 
 def create_guest_warning_box(ln=CFG_SITE_LANG):
     """return a warning message about logging into system"""
     return webbasket_templates.tmpl_create_guest_warning_box(ln)
 
 def create_personal_baskets_selection_box(uid,
                                           html_select_box_name='baskets',
                                           selected_bskid=None,
                                           ln=CFG_SITE_LANG):
     """Return HTML box for basket selection. Only for personal baskets.
     @param uid: user id
     @param html_select_box_name: name used in html form
     @param selected_bskid: basket currently selected
     @param ln: language
     """
     baskets = db.get_all_personal_baskets_names(uid)
     return webbasket_templates.tmpl_personal_baskets_selection_box(
                                         baskets,
                                         html_select_box_name,
                                         selected_bskid,
                                         ln)
 
 def create_basket_navtrail(uid,
                            category=CFG_WEBBASKET_CATEGORIES['PRIVATE'],
                            topic=0, group=0,
                            bskid=0, ln=CFG_SITE_LANG):
     """display navtrail for basket navigation.
     @param uid: user id (int)
     @param category: selected category (see CFG_WEBBASKET_CATEGORIES)
     @param topic: selected topic # if personal baskets
     @param group: selected group id for displaying (int)
     @param bskid: basket id (int)
     @param ln: language"""
     _ = gettext_set_language(ln)
     out = ''
     if category == CFG_WEBBASKET_CATEGORIES['PRIVATE']:
         out += ' &gt; <a class="navtrail" href="%s/yourbaskets/display?%s">'\
                '%s</a>'
         out %= (CFG_SITE_URL,
                 'category=' + category + '&amp;ln=' + ln,
                 _("Personal baskets"))
         topics = map(lambda x: x[0], db.get_personal_topics_infos(uid))
         if topic in range(0, len(topics)):
             out += ' &gt; '
             out += '<a class="navtrail" href="%s/yourbaskets/display?%s">'\
                    '%s</a>'
             out %= (CFG_SITE_URL,
                     'category=' + category + '&amp;topic=' + \
                                   str(topic) + '&amp;ln=' + ln,
                     cgi.escape(topics[topic]))
             if bskid:
                 basket = db.get_public_basket_infos(bskid)
                 if basket:
                     out += ' &gt; '
                     out += '<a class="navtrail" href="%s/yourbaskets/display'\
                            '?%s">%s</a>'
                     out %= (CFG_SITE_URL,
                             'category=' + category + '&amp;topic=' + \
                             str(topic) + '&amp;ln=' + ln + '#bsk' + str(bskid),
                             cgi.escape(basket[1]))
 
     elif category == CFG_WEBBASKET_CATEGORIES['GROUP']:
         out += ' &gt; <a class="navtrail" href="%s/yourbaskets/display?%s">'\
                '%s</a>'
         out %= (CFG_SITE_URL, 'category=' + category + '&amp;ln=' + ln, _("Group baskets"))
         groups = db.get_group_infos(uid)
         if group:
             groups = filter(lambda x: x[0] == group, groups)
         if len(groups):
             out += ' &gt; '
             out += '<a class="navtrail" href="%s/yourbaskets/display?%s">%s</a>'
             out %= (CFG_SITE_URL,
                     'category=' + category + '&amp;group=' + \
                               str(group) + '&amp;ln=' + ln,
                     cgi.escape(groups[0][1]))
             if bskid:
                 basket = db.get_public_basket_infos(bskid)
                 if basket:
                     out += ' &gt; '
                     out += '<a class="navtrail" href="%s/yourbaskets/display?'\
                            '%s">%s</a>'
                     out %= (CFG_SITE_URL,
                             'category=' + category + '&amp;group=' + \
                             str(group) + '&amp;ln=' + ln + '#bsk' + str(bskid),
                             cgi.escape(basket[1]))
     elif category == CFG_WEBBASKET_CATEGORIES['EXTERNAL']:
         out += ' &gt; <a class="navtrail" href="%s/yourbaskets/display?%s">'\
                '%s</a>'
         out %= (CFG_SITE_URL,
                 'category=' + category + '&amp;ln=' + ln,
                 _("Others' baskets"))
         if bskid:
             basket = db.get_public_basket_infos(bskid)
             if basket:
                 out += ' &gt; '
                 out += '<a class="navtrail" href="%s/yourbaskets/display?%s">'\
                        '%s</a>'
                 out %= (CFG_SITE_URL,
                         'category=' + category + '&amp;ln=' + ln + \
                         '#bsk' + str(bskid),
                         cgi.escape(basket[1]))
     return out
 
 def create_infobox(infos=[]):
     """Create an infos box. infos param should be a list of strings.
     Return formatted infos"""
     return webbasket_templates.tmpl_create_infobox(infos)
 
 def account_list_baskets(uid, ln=CFG_SITE_LANG):
     """Display baskets informations on account page"""
     _ = gettext_set_language(ln)
     (personal, group, external) = db.count_baskets(uid)
     link = '<a href="%s">%s</a>'
     base_url = CFG_SITE_URL + '/yourbaskets/display?category=%s&amp;ln=' + ln
     personal_text = personal
     if personal:
         url = base_url % CFG_WEBBASKET_CATEGORIES['PRIVATE']
         personal_text = link % (url, personal_text)
     group_text = group
     if group:
         url = base_url % CFG_WEBBASKET_CATEGORIES['GROUP']
         group_text = link % (url, group_text)
     external_text = external
     if external:
         url = base_url % CFG_WEBBASKET_CATEGORIES['EXTERNAL']
     else:
         url = CFG_SITE_URL + '/yourbaskets/list_public_baskets?ln=' + ln
     external_text = link % (url, external_text)
     out = _("You have %(x_nb_perso)s personal baskets and are subscribed to %(x_nb_group)s group baskets and %(x_nb_public)s other users public baskets.") %\
         {'x_nb_perso': personal_text,
          'x_nb_group': group_text,
          'x_nb_public': external_text}
     return out
 
 ## External records functions
 
 def format_external_records(recids, of='hb'):
     """Given a list of external records' recids, this function returns a list of tuples
     with each recid and the actual formatted record using the selected output format.
     It also stores the formatted record in the database for future use."""
 
     if type(recids) is not list:
         recids = [recids]
 
     records_grouped_by_collection = db.get_external_records_by_collection(recids)
 
     formatted_records = []
 
     for records in records_grouped_by_collection:
         external_records = fetch_and_store_external_records(records, of)
         formatted_records.extend(external_records)
 
     return tuple(formatted_records)
 
 def fetch_and_store_external_records(records, of="hb"):
     """Function that fetches the formatted records for one collection and stores them
     into the database. It also calculates and stores the original external url for each
     record."""
 
     results = []
     parsed_results_list = []
     parsed_results_dict = {}
     formatted_records = []
 
     ids = records[0].split(",")
     external_ids = records[1].split(",")
     collection_name = get_collection_name_by_id(records[2])
     collection_engine_set = select_hosted_search_engines(collection_name)
     collection_engine = collection_engine_set.pop()
 
     external_ids_urls = collection_engine.build_record_urls(external_ids)
     external_urls = [external_id_url[1] for external_id_url in external_ids_urls]
     external_urls_dict = {}
     for (local_id, url) in zip(ids,external_urls):
         external_urls_dict[local_id] = url
     db.store_external_urls(external_urls_dict)
 
     url = collection_engine.build_search_url(None, req_args=external_ids)
     pagegetters = [HTTPAsyncPageGetter(url)]
 
     def finished(pagegetter, data, time):
         """Function to be called when a page has been downloaded."""
         results.append(pagegetter)
 
     finished_list = async_download(pagegetters, finish_function=finished, timeout=CFG_EXTERNAL_COLLECTION_TIMEOUT)
 
     if finished_list[0]:
         collection_engine.parser.parse_and_get_results(results[0].data, feedonly=True)
         (parsed_results_list, parsed_results_dict) = collection_engine.parser.parse_and_extract_records(of=of)
         for (id,external_id) in zip(ids,external_ids):
             formatted_records.append((int(id), parsed_results_dict[external_id]))
         db.store_external_records(formatted_records, of)
     else:
         for (id,external_id) in zip(ids,external_ids):
             formatted_records.append((int(id), "There was a timeout when fetching the record."))
 
     return formatted_records
diff --git a/modules/websearch/lib/search_engine.py b/modules/websearch/lib/search_engine.py
index 1bf517221..0e5adb194 100644
--- a/modules/websearch/lib/search_engine.py
+++ b/modules/websearch/lib/search_engine.py
@@ -1,4884 +1,4890 @@
 # -*- coding: utf-8 -*-
 
 ## This file is part of CDS Invenio.
 ## Copyright (C) 2002, 2003, 2004, 2005, 2006, 2007, 2008 CERN.
 ##
 ## CDS Invenio is free software; you can redistribute it and/or
 ## modify it under the terms of the GNU General Public License as
 ## published by the Free Software Foundation; either version 2 of the
 ## License, or (at your option) any later version.
 ##
 ## CDS Invenio is distributed in the hope that it will be useful, but
 ## WITHOUT ANY WARRANTY; without even the implied warranty of
 ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 ## General Public License for more details.
 ##
 ## You should have received a copy of the GNU General Public License
 ## along with CDS Invenio; if not, write to the Free Software Foundation, Inc.,
 ## 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA.
 
 # pylint: disable-msg=C0301
 
 """CDS Invenio Search Engine in mod_python."""
 
 __lastupdated__ = """$Date$"""
 
 __revision__ = "$Id$"
 
 ## import general modules:
 import cgi
 import cStringIO
 import copy
 import string
 import os
 import re
 import time
 import urllib
 import urlparse
 import zlib
 import sys
 
 if sys.hexversion < 0x2040000:
     # pylint: disable-msg=W0622
     from sets import Set as set
     # pylint: enable-msg=W0622
 
 ## import CDS Invenio stuff:
 from invenio.config import \
      CFG_CERN_SITE, \
      CFG_OAI_ID_FIELD, \
      CFG_WEBCOMMENT_ALLOW_REVIEWS, \
      CFG_WEBSEARCH_CALL_BIBFORMAT, \
      CFG_WEBSEARCH_CREATE_SIMILARLY_NAMED_AUTHORS_LINK_BOX, \
      CFG_WEBSEARCH_FIELDS_CONVERT, \
      CFG_WEBSEARCH_NB_RECORDS_TO_SORT, \
      CFG_WEBSEARCH_SEARCH_CACHE_SIZE, \
      CFG_WEBSEARCH_USE_JSMATH_FOR_FORMATS, \
      CFG_WEBSEARCH_USE_ALEPH_SYSNOS, \
      CFG_BIBUPLOAD_SERIALIZE_RECORD_STRUCTURE, \
      CFG_BIBUPLOAD_EXTERNAL_SYSNO_TAG, \
      CFG_BIBRANK_SHOW_DOWNLOAD_GRAPHS, \
      CFG_SITE_LANG, \
      CFG_SITE_NAME, \
      CFG_LOGDIR, \
      CFG_SITE_URL
 from invenio.search_engine_config import InvenioWebSearchUnknownCollectionError
 from invenio.bibrecord import create_record
 from invenio.bibrank_record_sorter import get_bibrank_methods, rank_records, is_method_valid
 from invenio.bibrank_downloads_similarity import register_page_view_event, calculate_reading_similarity_list
 from invenio.bibindex_engine_stemmer import stem
 from invenio.bibformat import format_record, format_records, get_output_format_content_type, create_excel
 from invenio.bibformat_config import CFG_BIBFORMAT_USE_OLD_BIBFORMAT
 from invenio.bibrank_downloads_grapher import create_download_history_graph_and_box
 from invenio.data_cacher import DataCacher
 from invenio.websearch_external_collections import print_external_results_overview, perform_external_collection_search
 from invenio.access_control_admin import acc_get_action_id
 from invenio.access_control_config import VIEWRESTRCOLL, \
     CFG_ACC_GRANT_AUTHOR_RIGHTS_TO_EMAILS_IN_TAGS
 from invenio.websearchadminlib import get_detailed_page_tabs
 from invenio.intbitset import intbitset as HitSet
 from invenio.dbquery import DatabaseError
 from invenio.access_control_engine import acc_authorize_action
 from invenio.errorlib import register_exception
 from invenio.textutils import encode_for_xml
 
 import invenio.template
 webstyle_templates = invenio.template.load('webstyle')
 webcomment_templates = invenio.template.load('webcomment')
 
 from invenio.bibrank_citation_searcher import calculate_cited_by_list, \
     calculate_co_cited_with_list, get_records_with_num_cites, get_self_cited_by
 from invenio.bibrank_citation_grapher import create_citation_history_graph_and_box
 
 from invenio.dbquery import run_sql, run_sql_cached, get_table_update_time, Error
 from invenio.webuser import getUid, collect_user_info
 from invenio.webpage import pageheaderonly, pagefooteronly, create_error_box
 from invenio.messages import gettext_set_language
 from invenio.search_engine_query_parser import SearchQueryParenthesisedParser, \
 InvenioWebSearchQueryParserException, SpiresToInvenioSyntaxConverter
 
 from invenio import webinterface_handler_wsgi_utils as apache
 
 try:
     import invenio.template
     websearch_templates = invenio.template.load('websearch')
 except:
     pass
 
 from invenio.websearch_external_collections import calculate_hosted_collections_results, do_calculate_hosted_collections_results
 from invenio.websearch_external_collections_config import CFG_HOSTED_COLLECTION_TIMEOUT_ANTE_SEARCH
 from invenio.websearch_external_collections_config import CFG_HOSTED_COLLECTION_TIMEOUT_POST_SEARCH
+from invenio.websearch_external_collections_config import CFG_EXTERNAL_COLLECTION_MAXRESULTS
 
 ## global vars:
 cfg_nb_browse_seen_records = 100 # limit of the number of records to check when browsing certain collection
 cfg_nicely_ordered_collection_list = 0 # do we propose collection list nicely ordered or alphabetical?
 
 ## precompile some often-used regexp for speed reasons:
 re_word = re.compile('[\s]')
 re_quotes = re.compile('[\'\"]')
 re_doublequote = re.compile('\"')
 re_equal = re.compile('\=')
 re_logical_and = re.compile('\sand\s', re.I)
 re_logical_or = re.compile('\sor\s', re.I)
 re_logical_not = re.compile('\snot\s', re.I)
 re_operators = re.compile(r'\s([\+\-\|])\s')
 re_pattern_wildcards_at_beginning = re.compile(r'(\s)[\*\%]+')
 re_pattern_single_quotes = re.compile("'(.*?)'")
 re_pattern_double_quotes = re.compile("\"(.*?)\"")
 re_pattern_regexp_quotes = re.compile("\/(.*?)\/")
 re_pattern_short_words = re.compile(r'([\s\"]\w{1,3})[\*\%]+')
 re_pattern_space = re.compile("__SPACE__")
 re_pattern_today = re.compile("\$TODAY\$")
 re_pattern_parens = re.compile(r'\([^\)]+\s+[^\)]+\)')
 re_unicode_lowercase_a = re.compile(unicode(r"(?u)[áàäâãå]", "utf-8"))
 re_unicode_lowercase_ae = re.compile(unicode(r"(?u)[æ]", "utf-8"))
 re_unicode_lowercase_e = re.compile(unicode(r"(?u)[éèëê]", "utf-8"))
 re_unicode_lowercase_i = re.compile(unicode(r"(?u)[íìïî]", "utf-8"))
 re_unicode_lowercase_o = re.compile(unicode(r"(?u)[óòöôõø]", "utf-8"))
 re_unicode_lowercase_u = re.compile(unicode(r"(?u)[úùüû]", "utf-8"))
 re_unicode_lowercase_y = re.compile(unicode(r"(?u)[ýÿ]", "utf-8"))
 re_unicode_lowercase_c = re.compile(unicode(r"(?u)[çć]", "utf-8"))
 re_unicode_lowercase_n = re.compile(unicode(r"(?u)[ñ]", "utf-8"))
 re_unicode_uppercase_a = re.compile(unicode(r"(?u)[ÁÀÄÂÃÅ]", "utf-8"))
 re_unicode_uppercase_ae = re.compile(unicode(r"(?u)[Æ]", "utf-8"))
 re_unicode_uppercase_e = re.compile(unicode(r"(?u)[ÉÈËÊ]", "utf-8"))
 re_unicode_uppercase_i = re.compile(unicode(r"(?u)[ÍÌÏÎ]", "utf-8"))
 re_unicode_uppercase_o = re.compile(unicode(r"(?u)[ÓÒÖÔÕØ]", "utf-8"))
 re_unicode_uppercase_u = re.compile(unicode(r"(?u)[ÚÙÜÛ]", "utf-8"))
 re_unicode_uppercase_y = re.compile(unicode(r"(?u)[Ý]", "utf-8"))
 re_unicode_uppercase_c = re.compile(unicode(r"(?u)[ÇĆ]", "utf-8"))
 re_unicode_uppercase_n = re.compile(unicode(r"(?u)[Ñ]", "utf-8"))
 re_latex_lowercase_a = re.compile("\\\\[\"H'`~^vu=k]\{?a\}?")
 re_latex_lowercase_ae = re.compile("\\\\ae\\{\\}?")
 re_latex_lowercase_e = re.compile("\\\\[\"H'`~^vu=k]\\{?e\\}?")
 re_latex_lowercase_i = re.compile("\\\\[\"H'`~^vu=k]\\{?i\\}?")
 re_latex_lowercase_o = re.compile("\\\\[\"H'`~^vu=k]\\{?o\\}?")
 re_latex_lowercase_u = re.compile("\\\\[\"H'`~^vu=k]\\{?u\\}?")
 re_latex_lowercase_y = re.compile("\\\\[\"']\\{?y\\}?")
 re_latex_lowercase_c = re.compile("\\\\['uc]\\{?c\\}?")
 re_latex_lowercase_n = re.compile("\\\\[c'~^vu]\\{?n\\}?")
 re_latex_uppercase_a = re.compile("\\\\[\"H'`~^vu=k]\\{?A\\}?")
 re_latex_uppercase_ae = re.compile("\\\\AE\\{?\\}?")
 re_latex_uppercase_e = re.compile("\\\\[\"H'`~^vu=k]\\{?E\\}?")
 re_latex_uppercase_i = re.compile("\\\\[\"H'`~^vu=k]\\{?I\\}?")
 re_latex_uppercase_o = re.compile("\\\\[\"H'`~^vu=k]\\{?O\\}?")
 re_latex_uppercase_u = re.compile("\\\\[\"H'`~^vu=k]\\{?U\\}?")
 re_latex_uppercase_y = re.compile("\\\\[\"']\\{?Y\\}?")
 re_latex_uppercase_c = re.compile("\\\\['uc]\\{?C\\}?")
 re_latex_uppercase_n = re.compile("\\\\[c'~^vu]\\{?N\\}?")
 
 class RestrictedCollectionDataCacher(DataCacher):
     def __init__(self):
         def cache_filler():
             ret = []
             try:
                 viewcollid = acc_get_action_id(VIEWRESTRCOLL)
                 res = run_sql("""SELECT DISTINCT ar.value
                     FROM accROLE_accACTION_accARGUMENT raa JOIN accARGUMENT ar ON raa.id_accARGUMENT = ar.id
                     WHERE ar.keyword = 'collection' AND raa.id_accACTION = %s""", (viewcollid,))
             except Exception:
                 # database problems, return empty cache
                 return []
             for coll in res:
                 ret.append(coll[0])
             return ret
 
         def timestamp_verifier():
             return max(get_table_update_time('accROLE_accACTION_accARGUMENT'), get_table_update_time('accARGUMENT'))
 
         DataCacher.__init__(self, cache_filler, timestamp_verifier)
 
 def collection_restricted_p(collection):
     restricted_collection_cache.recreate_cache_if_needed()
     return collection in restricted_collection_cache.cache
 
 try:
     restricted_collection_cache.is_ok_p
 except Exception:
     restricted_collection_cache = RestrictedCollectionDataCacher()
 
 def get_permitted_restricted_collections(user_info):
     """Return a list of collection that are restricted but for which the user
     is authorized."""
     restricted_collection_cache.recreate_cache_if_needed()
     ret = []
     for collection in restricted_collection_cache.cache:
         if acc_authorize_action(user_info, 'viewrestrcoll', collection=collection)[0] == 0:
             ret.append(collection)
     return ret
 
 def is_user_owner_of_record(user_info, recid):
     """
     Check if the user is owner of the record, i.e. he is the submitter
     and/or belongs to a owner-like group authorized to 'see' the record.
 
     @param user_info: the user_info dictionary that describe the user.
     @type user_info: user_info dictionary
     @param recid: the record identifier.
     @type recid: positive integer
     @return: True if the user is 'owner' of the record; False otherwise
     @rtype: bool
     """
     authorized_emails_or_group = []
     for tag in CFG_ACC_GRANT_AUTHOR_RIGHTS_TO_EMAILS_IN_TAGS:
         authorized_emails_or_group.extend(get_fieldvalues(recid, tag))
     for email_or_group in authorized_emails_or_group:
         if email_or_group in user_info['group']:
             return True
         email = email_or_group.strip().lower()
         if user_info['email'].strip().lower() == email:
             return True
     return False
 
 def check_user_can_view_record(user_info, recid):
     """
     Check if the user is authorized to view the given recid. The function
     grants access in two cases: either user has author rights on this
     record, or he has view rights to the primary collection this record
     belongs to.
 
     @param user_info: the user_info dictionary that describe the user.
     @type user_info: user_info dictionary
     @param recid: the record identifier.
     @type recid: positive integer
     @return: (0, ''), when authorization is granted, (>0, 'message') when
     authorization is not granted
     @rtype: (int, string)
     """
     record_primary_collection = guess_primary_collection_of_a_record(recid)
     if collection_restricted_p(record_primary_collection):
         (auth_code, auth_msg) = acc_authorize_action(user_info, VIEWRESTRCOLL, collection=record_primary_collection)
         if auth_code == 0 or is_user_owner_of_record(user_info, recid):
             return (0, '')
         else:
             return (auth_code, auth_msg)
     else:
         return (0, '')
 
 class IndexStemmingDataCacher(DataCacher):
     """
     Provides cache for stemming information for word/phrase indexes.
     This class is not to be used directly; use function
     get_index_stemming_language() instead.
     """
     def __init__(self):
         def cache_filler():
             try:
                 res = run_sql("""SELECT id, stemming_language FROM idxINDEX""")
             except DatabaseError:
                 # database problems, return empty cache
                 return {}
             return dict(res)
 
         def timestamp_verifier():
             return get_table_update_time('idxINDEX')
 
         DataCacher.__init__(self, cache_filler, timestamp_verifier)
 
 try:
     index_stemming_cache.is_ok_p
 except Exception:
     index_stemming_cache = IndexStemmingDataCacher()
 
 def get_index_stemming_language(index_id):
     """Return stemming langugage for given index."""
     index_stemming_cache.recreate_cache_if_needed()
     return index_stemming_cache.cache[index_id]
 
 class CollectionRecListDataCacher(DataCacher):
     """
     Provides cache for collection reclist hitsets.  This class is not
     to be used directly; use function get_collection_reclist() instead.
     """
     def __init__(self):
         def cache_filler():
             ret = {}
             try:
                 res = run_sql("SELECT name,reclist FROM collection")
             except Exception:
                 # database problems, return empty cache
                 return {}
             for name, reclist in res:
                 ret[name] = None # this will be filled later during runtime by calling get_collection_reclist(coll)
             return ret
 
         def timestamp_verifier():
             return get_table_update_time('collection')
 
         DataCacher.__init__(self, cache_filler, timestamp_verifier)
 
 try:
     if not collection_reclist_cache.is_ok_p:
         raise Exception
 except Exception:
     collection_reclist_cache = CollectionRecListDataCacher()
 
 def get_collection_reclist(coll):
     """Return hitset of recIDs that belong to the collection 'coll'."""
     collection_reclist_cache.recreate_cache_if_needed()
     if not collection_reclist_cache.cache[coll]:
         # not yet it the cache, so calculate it and fill the cache:
         set = HitSet()
         query = "SELECT nbrecs,reclist FROM collection WHERE name=%s"
         res = run_sql(query, (coll, ), 1)
         if res:
             try:
                 set = HitSet(res[0][1])
             except:
                 pass
         collection_reclist_cache.cache[coll] = set
     # finally, return reclist:
     return collection_reclist_cache.cache[coll]
 
 class SearchResultsCache(DataCacher):
     """
     Provides temporary lazy cache for Search Results.
     Useful when users click on `next page'.
     """
     def __init__(self):
         def cache_filler():
             return {}
         def timestamp_verifier():
             return '1970-01-01 00:00:00' # lazy cache is always okay;
                                          # its filling is governed by
                                          # CFG_WEBSEARCH_SEARCH_CACHE_SIZE
         DataCacher.__init__(self, cache_filler, timestamp_verifier)
 
 try:
     if not search_results_cache.is_ok_p:
         raise Exception
 except Exception:
     search_results_cache = SearchResultsCache()
 
 class CollectionI18nNameDataCacher(DataCacher):
     """
     Provides cache for I18N collection names.  This class is not to be
     used directly; use function get_coll_i18nname() instead.
     """
     def __init__(self):
         def cache_filler():
             ret = {}
             try:
                 res = run_sql("SELECT c.name,cn.ln,cn.value FROM collectionname AS cn, collection AS c WHERE cn.id_collection=c.id AND cn.type='ln'") # ln=long name
             except Exception:
                 # database problems
                 return {}
             for c, ln, i18nname in res:
                 if i18nname:
                     if not ret.has_key(c):
                         ret[c] = {}
                     ret[c][ln] = i18nname
             return ret
 
         def timestamp_verifier():
             return get_table_update_time('collectionname')
 
         DataCacher.__init__(self, cache_filler, timestamp_verifier)
 
 try:
     if not collection_i18nname_cache.is_ok_p:
         raise Exception
 except Exception:
     collection_i18nname_cache = CollectionI18nNameDataCacher()
 
 def get_coll_i18nname(c, ln=CFG_SITE_LANG, verify_cache_timestamp=True):
     """
     Return nicely formatted collection name (of the name type `ln'
     (=long name)) for collection C in language LN.
 
     This function uses collection_i18nname_cache, but it verifies
     whether the cache is up-to-date first by default.  This
     verification step is performed by checking the DB table update
     time.  So, if you call this function 1000 times, it can get very
     slow because it will do 1000 table update time verifications, even
     though collection names change not that often.
 
     Hence the parameter VERIFY_CACHE_TIMESTAMP which, when set to
     False, will assume the cache is already up-to-date.  This is
     useful namely in the generation of collection lists for the search
     results page.
     """
     if verify_cache_timestamp:
         collection_i18nname_cache.recreate_cache_if_needed()
     out = c
     try:
         out = collection_i18nname_cache.cache[c][ln]
     except KeyError:
         pass # translation in LN does not exist
     return out
 
 class FieldI18nNameDataCacher(DataCacher):
     """
     Provides cache for I18N field names.  This class is not to be used
     directly; use function get_field_i18nname() instead.
     """
     def __init__(self):
         def cache_filler():
             ret = {}
             try:
                 res = run_sql("SELECT f.name,fn.ln,fn.value FROM fieldname AS fn, field AS f WHERE fn.id_field=f.id AND fn.type='ln'") # ln=long name
             except Exception:
                 # database problems, return empty cache
                 return {}
             for f, ln, i18nname in res:
                 if i18nname:
                     if not ret.has_key(f):
                         ret[f] = {}
                     ret[f][ln] = i18nname
             return ret
 
         def timestamp_verifier():
             return get_table_update_time('fieldname')
 
         DataCacher.__init__(self, cache_filler, timestamp_verifier)
 
 try:
     if not field_i18nname_cache.is_ok_p:
         raise Exception
 except Exception:
     field_i18nname_cache = FieldI18nNameDataCacher()
 
 def get_field_i18nname(f, ln=CFG_SITE_LANG, verify_cache_timestamp=True):
     """
     Return nicely formatted field name (of type 'ln', 'long name') for
     field F in language LN.
 
     If VERIFY_CACHE_TIMESTAMP is set to True, then verify DB timestamp
     and field I18N name cache timestamp and refresh cache from the DB
     if needed.  Otherwise don't bother checking DB timestamp and
     return the cached value.  (This is useful when get_field_i18nname
     is called inside a loop.)
     """
     if verify_cache_timestamp:
         field_i18nname_cache.recreate_cache_if_needed()
     out = f
     try:
         out = field_i18nname_cache.cache[f][ln]
     except KeyError:
         pass # translation in LN does not exist
     return out
 
 def get_alphabetically_ordered_collection_list(level=0, ln=CFG_SITE_LANG):
     """Returns nicely ordered (score respected) list of collections, more exactly list of tuples
        (collection name, printable collection name).
        Suitable for create_search_box()."""
     out = []
     res = run_sql_cached("SELECT id,name FROM collection ORDER BY name ASC",
                          affected_tables=['collection',])
     for c_id, c_name in res:
         # make a nice printable name (e.g. truncate c_printable for
         # long collection names in given language):
         c_printable_fullname = get_coll_i18nname(c_name, ln, False)
         c_printable = wash_index_term(c_printable_fullname, 30, False)
         if c_printable != c_printable_fullname:
             c_printable = c_printable + "..."
         if level:
             c_printable = " " + level * '-' + " " + c_printable
         out.append([c_name, c_printable])
     return out
 
 def get_nicely_ordered_collection_list(collid=1, level=0, ln=CFG_SITE_LANG):
     """Returns nicely ordered (score respected) list of collections, more exactly list of tuples
        (collection name, printable collection name).
        Suitable for create_search_box()."""
     colls_nicely_ordered = []
     res = run_sql("""SELECT c.name,cc.id_son FROM collection_collection AS cc, collection AS c
                      WHERE c.id=cc.id_son AND cc.id_dad=%s ORDER BY score DESC""", (collid, ))
     for c, cid in res:
         # make a nice printable name (e.g. truncate c_printable for
         # long collection names in given language):
         c_printable_fullname = get_coll_i18nname(c, ln, False)
         c_printable = wash_index_term(c_printable_fullname, 30, False)
         if c_printable != c_printable_fullname:
             c_printable = c_printable + "..."
         if level:
             c_printable = " " + level * '-' + " " + c_printable
         colls_nicely_ordered.append([c, c_printable])
         colls_nicely_ordered  = colls_nicely_ordered + get_nicely_ordered_collection_list(cid, level+1, ln=ln)
     return colls_nicely_ordered
 
 def get_index_id_from_field(field):
     """
     Return index id with name corresponding to FIELD, or the first
     index id where the logical field code named FIELD is indexed.
 
     Return zero in case there is no index defined for this field.
 
     Example: field='author', output=4.
     """
     out = 0
     if field == '':
         field = 'global' # empty string field means 'global' index (field 'anyfield')
 
     # first look in the index table:
     res = run_sql("""SELECT id FROM idxINDEX WHERE name=%s""", (field,))
     if res:
         out = res[0][0]
         return out
 
     # not found in the index table, now look in the field table:
     res = run_sql("""SELECT w.id FROM idxINDEX AS w, idxINDEX_field AS wf, field AS f
                       WHERE f.code=%s AND wf.id_field=f.id AND w.id=wf.id_idxINDEX
                       LIMIT 1""", (field,))
     if res:
         out = res[0][0]
     return out
 
 def get_words_from_pattern(pattern):
     "Returns list of whitespace-separated words from pattern."
     words = {}
     for word in string.split(pattern):
         if not words.has_key(word):
             words[word] = 1;
     return words.keys()
 
 def create_basic_search_units(req, p, f, m=None, of='hb'):
     """Splits search pattern and search field into a list of independently searchable units.
        - A search unit consists of '(operator, pattern, field, type, hitset)' tuples where
           'operator' is set union (|), set intersection (+) or set exclusion (-);
           'pattern' is either a word (e.g. muon*) or a phrase (e.g. 'nuclear physics');
           'field' is either a code like 'title' or MARC tag like '100__a';
           'type' is the search type ('w' for word file search, 'a' for access file search).
         - Optionally, the function accepts the match type argument 'm'.
           If it is set (e.g. from advanced search interface), then it
           performs this kind of matching.  If it is not set, then a guess is made.
           'm' can have values: 'a'='all of the words', 'o'='any of the words',
                                'p'='phrase/substring', 'r'='regular expression',
                                'e'='exact value'.
         - Warnings are printed on req (when not None) in case of HTML output formats."""
 
     opfts = [] # will hold (o,p,f,t,h) units
 
     # FIXME: quick hack for the journal index
     if f == 'journal':
         opfts.append(['+', p, f, 'w'])
         return opfts
 
     ## check arguments: is desired matching type set?
     if m:
         ## A - matching type is known; good!
         if m == 'e':
             # A1 - exact value:
             opfts.append(['+', p, f, 'a']) # '+' since we have only one unit
         elif m == 'p':
             # A2 - phrase/substring:
             opfts.append(['+', "%" + p + "%", f, 'a']) # '+' since we have only one unit
         elif m == 'r':
             # A3 - regular expression:
             opfts.append(['+', p, f, 'r']) # '+' since we have only one unit
         elif m == 'a' or m == 'w':
             # A4 - all of the words:
             p = strip_accents(p) # strip accents for 'w' mode, FIXME: delete when not needed
             for word in get_words_from_pattern(p):
                 opfts.append(['+', word, f, 'w']) # '+' in all units
         elif m == 'o':
             # A5 - any of the words:
             p = strip_accents(p) # strip accents for 'w' mode, FIXME: delete when not needed
             for word in get_words_from_pattern(p):
                 if len(opfts)==0:
                     opfts.append(['+', word, f, 'w']) # '+' in the first unit
                 else:
                     opfts.append(['|', word, f, 'w']) # '|' in further units
         else:
             if of.startswith("h"):
                 print_warning(req, "Matching type '%s' is not implemented yet." % cgi.escape(m), "Warning")
             opfts.append(['+', "%" + p + "%", f, 'w'])
     else:
         ## B - matching type is not known: let us try to determine it by some heuristics
         if f and p[0] == '"' and p[-1] == '"':
             ## B0 - does 'p' start and end by double quote, and is 'f' defined? => doing ACC search
             opfts.append(['+', p[1:-1], f, 'a'])
         elif f and p[0] == "'" and p[-1] == "'":
             ## B0bis - does 'p' start and end by single quote, and is 'f' defined? => doing ACC search
             opfts.append(['+', '%' + p[1:-1] + '%', f, 'a'])
         elif f and p[0] == "/" and p[-1] == "/":
             ## B0ter - does 'p' start and end by a slash, and is 'f' defined? => doing regexp search
             opfts.append(['+', p[1:-1], f, 'r'])
         elif f and string.find(p, ',') >= 0:
             ## B1 - does 'p' contain comma, and is 'f' defined? => doing ACC search
             opfts.append(['+', p, f, 'a'])
         elif f and str(f[0:2]).isdigit():
             ## B2 - does 'f' exist and starts by two digits?  => doing ACC search
             opfts.append(['+', p, f, 'a'])
         else:
             ## B3 - doing WRD search, but maybe ACC too
             # search units are separated by spaces unless the space is within single or double quotes
             # so, let us replace temporarily any space within quotes by '__SPACE__'
             p = re_pattern_single_quotes.sub(lambda x: "'"+string.replace(x.group(1), ' ', '__SPACE__')+"'", p)
             p = re_pattern_double_quotes.sub(lambda x: "\""+string.replace(x.group(1), ' ', '__SPACE__')+"\"", p)
             p = re_pattern_regexp_quotes.sub(lambda x: "/"+string.replace(x.group(1), ' ', '__SPACE__')+"/", p)
             # wash argument:
             p = re_equal.sub(":", p)
             p = re_logical_and.sub(" ", p)
             p = re_logical_or.sub(" |", p)
             p = re_logical_not.sub(" -", p)
             p = re_operators.sub(r' \1', p)
             for pi in string.split(p): # iterate through separated units (or items, as "pi" stands for "p item")
                 pi = re_pattern_space.sub(" ", pi) # replace back '__SPACE__' by ' '
                 # firstly, determine set operator
                 if pi[0] == '+' or pi[0] == '-' or pi[0] == '|':
                     oi = pi[0]
                     pi = pi[1:]
                 else:
                     # okay, there is no operator, so let us decide what to do by default
                     oi = '+' # by default we are doing set intersection...
                 # secondly, determine search pattern and field:
                 if string.find(pi, ":") > 0:
                     fi, pi = string.split(pi, ":", 1)
                     # test whether fi is a real index code or a MARC-tag defined code:
                     if fi in get_fieldcodes() or '00' <= fi[:2] <= '99':
                         pass
                     else:
                         # it is not, so join it back:
                         fi, pi = f, fi + ":" + pi
                 else:
                     fi, pi = f, pi
                 # look also for old ALEPH field names:
                 if fi and CFG_WEBSEARCH_FIELDS_CONVERT.has_key(string.lower(fi)):
                     fi = CFG_WEBSEARCH_FIELDS_CONVERT[string.lower(fi)]
                 # wash 'pi' argument:
                 if re_quotes.match(pi):
                     # B3a - quotes are found => do ACC search (phrase search)
                     if pi[0] == '"' and pi[-1] == '"':
                         pi = string.replace(pi, '"', '') # remove quote signs
                         opfts.append([oi, pi, fi, 'a'])
                     elif pi[0] == "'" and pi[-1] == "'":
                         pi = string.replace(pi, "'", "") # remove quote signs
                         opfts.append([oi, "%" + pi + "%", fi, 'a'])
                     else: # unbalanced quotes, so fall back to WRD query:
                         opfts.append([oi, pi, fi, 'w'])
                 elif fi and str(fi[0]).isdigit() and str(fi[0]).isdigit():
                     # B3b - fi exists and starts by two digits => do ACC search
                     opfts.append([oi, pi, fi, 'a'])
                 elif fi and not get_index_id_from_field(fi) and get_field_name(fi):
                     # B3c - logical field fi exists but there is no WRD index for fi => try ACC search
                     opfts.append([oi, pi, fi, 'a'])
                 elif pi.startswith('/') and pi.endswith('/'):
                     # B3d - pi has slashes around => do regexp search
                     opfts.append([oi, pi[1:-1], fi, 'r'])
                 else:
                     # B3e - general case => do WRD search
                     pi = strip_accents(pi) # strip accents for 'w' mode, FIXME: delete when not needed
                     for pii in get_words_from_pattern(pi):
                         opfts.append([oi, pii, fi, 'w'])
 
     ## sanity check:
     for i in range(0, len(opfts)):
         try:
             pi = opfts[i][1]
             if pi == '*':
                 if of.startswith("h"):
                     print_warning(req, "Ignoring standalone wildcard word.", "Warning")
                 del opfts[i]
             if pi == '' or pi == ' ':
                 fi = opfts[i][2]
                 if fi:
                     if of.startswith("h"):
                         print_warning(req, "Ignoring empty <em>%s</em> search term." % fi, "Warning")
                 del opfts[i]
         except:
             pass
 
     ## return search units:
     return opfts
 
 def page_start(req, of, cc, aas, ln, uid, title_message=None,
                description='', keywords='', recID=-1, tab='', p=''):
     "Start page according to given output format."
     _ = gettext_set_language(ln)
 
     if not req or isinstance(req, cStringIO.OutputType):
         return # we were called from CLI
 
     if not title_message:
         title_message = _("Search Results")
 
     content_type = get_output_format_content_type(of)
 
     if of.startswith('x'):
         if of == 'xr':
             # we are doing RSS output
             req.content_type = "application/rss+xml"
             req.send_http_header()
             req.write("""<?xml version="1.0" encoding="UTF-8"?>\n""")
         else:
             # we are doing XML output:
             req.content_type = "text/xml"
             req.send_http_header()
             req.write("""<?xml version="1.0" encoding="UTF-8"?>\n""")
     elif of.startswith('t') or str(of[0:3]).isdigit():
         # we are doing plain text output:
         req.content_type = "text/plain"
         req.send_http_header()
     elif of == "id":
         pass # nothing to do, we shall only return list of recIDs
     elif content_type == 'text/html':
         # we are doing HTML output:
         req.content_type = "text/html"
         req.send_http_header()
 
         if not description:
             description = "%s %s." % (cc, _("Search Results"))
 
         if not keywords:
             keywords = "%s, WebSearch, %s" % (get_coll_i18nname(CFG_SITE_NAME, ln, False), get_coll_i18nname(cc, ln, False))
 
         ## generate RSS URL:
         argd = {}
         if req.args:
             argd = cgi.parse_qs(req.args)
         rssurl = websearch_templates.build_rss_url(argd)
 
         ## add jsmath if displaying single records (FIXME: find
         ## eventual better place to this code)
         if of.lower() in CFG_WEBSEARCH_USE_JSMATH_FOR_FORMATS:
             metaheaderadd = """
   <script type='text/javascript'>
     jsMath = {
         Controls: {cookie: {printwarn: 0}}
     };
   </script>
   <script src='/jsMath/easy/invenio-jsmath.js' type='text/javascript'></script>
 """
         else:
             metaheaderadd = ''
 
         ## generate navtrail:
         navtrail = create_navtrail_links(cc, aas, ln)
         if navtrail != '':
             navtrail += ' &gt; '
         if (tab != '' or ((of != '' or of.lower() != 'hd') and of != 'hb')) and \
                recID != -1:
             # If we are not in information tab in HD format, customize
             # the nav. trail to have a link back to main record. (Due
             # to the way perform_request_search() works, hb
             # (lowercase) is equal to hd)
             navtrail += ' <a class="navtrail" href="%s/record/%s">%s</a>' % \
                             (CFG_SITE_URL, recID, title_message)
             if (of != '' or of.lower() != 'hd') and of != 'hb':
                 # Export
                 format_name = of
                 query = "SELECT name FROM format WHERE code=%s"
                 res = run_sql(query, (of,))
                 if res:
                     format_name = res[0][0]
                 navtrail += ' &gt; ' + format_name
             else:
                 # Discussion, citations, etc. tabs
                 tab_label = get_detailed_page_tabs(cc, ln=ln)[tab]['label']
                 navtrail += ' &gt; ' + _(tab_label)
         else:
             navtrail += title_message
 
         if p:
             # we are serving search/browse results pages, so insert pattern:
             navtrail += ": " + cgi.escape(p)
             title_message = cgi.escape(p) + " - " + title_message
 
         ## finally, print page header:
         req.write(pageheaderonly(req=req, title=title_message,
                                  navtrail=navtrail,
                                  description=description,
                                  keywords=keywords,
                                  metaheaderadd=metaheaderadd,
                                  uid=uid,
                                  language=ln,
                                  navmenuid='search',
                                  navtrail_append_title_p=0,
                                  rssurl=rssurl))
         req.write(websearch_templates.tmpl_search_pagestart(ln=ln))
     #else:
     #    req.send_http_header()
 
 def page_end(req, of="hb", ln=CFG_SITE_LANG):
     "End page according to given output format: e.g. close XML tags, add HTML footer, etc."
     if of == "id":
         return [] # empty recID list
     if not req:
         return # we were called from CLI
     if of.startswith('h'):
         req.write(websearch_templates.tmpl_search_pageend(ln = ln)) # pagebody end
         req.write(pagefooteronly(lastupdated=__lastupdated__, language=ln, req=req))
     return
 
 def create_page_title_search_pattern_info(p, p1, p2, p3):
     """Create the search pattern bit for the page <title> web page
     HTML header.  Basically combine p and (p1,p2,p3) together so that
     the page header may be filled whether we are in the Simple Search
     or Advanced Search interface contexts."""
     out = ""
     if p:
         out = p
     else:
         out = p1
         if p2:
             out += ' ' + p2
         if p3:
             out += ' ' + p3
     return out
 
 def create_inputdate_box(name="d1", selected_year=0, selected_month=0, selected_day=0, ln=CFG_SITE_LANG):
     "Produces 'From Date', 'Until Date' kind of selection box.  Suitable for search options."
 
     _ = gettext_set_language(ln)
 
     box = ""
     # day
     box += """<select name="%sd">""" % name
     box += """<option value="">%s""" % _("any day")
     for day in range(1, 32):
         box += """<option value="%02d"%s>%02d""" % (day, is_selected(day, selected_day), day)
     box += """</select>"""
     # month
     box += """<select name="%sm">""" % name
     box += """<option value="">%s""" % _("any month")
     for mm, month in [(1, _("January")), (2, _("February")), (3, _("March")), (4, _("April")), \
                       (5, _("May")), (6, _("June")), (7, _("July")), (8, _("August")), \
                       (9, _("September")), (10, _("October")), (11, _("November")), (12, _("December"))]:
         box += """<option value="%02d"%s>%s""" % (mm, is_selected(mm, selected_month), month)
     box += """</select>"""
     # year
     box += """<select name="%sy">""" % name
     box += """<option value="">%s""" % _("any year")
     this_year = int(time.strftime("%Y", time.localtime()))
     for year in range(this_year-20, this_year+1):
         box += """<option value="%d"%s>%d""" % (year, is_selected(year, selected_year), year)
     box += """</select>"""
     return box
 
 def create_search_box(cc, colls, p, f, rg, sf, so, sp, rm, of, ot, aas,
                       ln, p1, f1, m1, op1, p2, f2, m2, op2, p3, f3,
                       m3, sc, pl, d1y, d1m, d1d, d2y, d2m, d2d, dt, jrec, ec,
                       action=""):
 
     """Create search box for 'search again in the results page' functionality."""
 
     # load the right message language
     _ = gettext_set_language(ln)
 
     # some computations
     cc_intl = get_coll_i18nname(cc, ln, False)
     cc_colID = get_colID(cc)
 
     colls_nicely_ordered = []
     if cfg_nicely_ordered_collection_list:
         colls_nicely_ordered = get_nicely_ordered_collection_list(ln=ln)
     else:
         colls_nicely_ordered = get_alphabetically_ordered_collection_list(ln=ln)
 
     colls_nice = []
     for (cx, cx_printable) in colls_nicely_ordered:
         if not cx.startswith("Unnamed collection"):
             colls_nice.append({ 'value' : cx,
                                 'text' : cx_printable
                               })
 
     coll_selects = []
     if colls and colls[0] != CFG_SITE_NAME:
         # some collections are defined, so print these first, and only then print 'add another collection' heading:
         for c in colls:
             if c:
                 temp = []
                 temp.append({ 'value' : CFG_SITE_NAME,
                               'text' : '*** %s ***' % _("any public collection")
                             })
                 # this field is used to remove the current collection from the ones to be searched.
                 temp.append({ 'value' : '',
                               'text' : '*** %s ***' % _("remove this collection")
                             })
                 for val in colls_nice:
                     # print collection:
                     if not cx.startswith("Unnamed collection"):
                         temp.append({ 'value' : val['value'],
                                       'text' : val['text'],
                                       'selected' : (c == re.sub("^[\s\-]*","", val['value']))
                                     })
                 coll_selects.append(temp)
         coll_selects.append([{ 'value' : '',
                                'text' : '*** %s ***' % _("add another collection")
                              }] + colls_nice)
     else: # we searched in CFG_SITE_NAME, so print 'any public collection' heading
         coll_selects.append([{ 'value' : CFG_SITE_NAME,
                                'text' : '*** %s ***' % _("any public collection")
                              }] + colls_nice)
 
     ## ranking methods
     ranks = [{
                'value' : '',
                'text' : "- %s %s -" % (_("OR").lower (), _("rank by")),
              }]
     for (code, name) in get_bibrank_methods(cc_colID, ln):
         # propose found rank methods:
         ranks.append({
                        'value' : code,
                        'text' : name,
                      })
 
     formats = []
     query = """SELECT code,name FROM format WHERE visibility='1' ORDER BY name ASC"""
     res = run_sql(query)
     if res:
         # propose found formats:
         for code, name in res:
             formats.append({ 'value' : code,
                              'text' : name
                            })
     else:
         formats.append({'value' : 'hb',
                         'text' : _("HTML brief")
                        })
 
     # show collections in the search box? (not if there is only one
     # collection defined, and not if we are in light search)
     show_colls = True
     show_title = True
     if len(collection_reclist_cache.cache.keys()) == 1 or \
            aas == -1:
         show_colls = False
         show_title = False
 
     if cc == CFG_SITE_NAME:
         show_title = False
 
     return websearch_templates.tmpl_search_box(
              ln = ln,
              aas = aas,
              cc_intl = cc_intl,
              cc = cc,
              ot = ot,
              sp = sp,
              action = action,
              fieldslist = get_searchwithin_fields(ln=ln, colID=cc_colID),
              f1 = f1,
              f2 = f2,
              f3 = f3,
              m1 = m1,
              m2 = m2,
              m3 = m3,
              p1 = p1,
              p2 = p2,
              p3 = p3,
              op1 = op1,
              op2 = op2,
              rm = rm,
              p = p,
              f = f,
              coll_selects = coll_selects,
              d1y = d1y, d2y = d2y, d1m = d1m, d2m = d2m, d1d = d1d, d2d = d2d,
              dt = dt,
              sort_fields = get_sortby_fields(ln=ln, colID=cc_colID),
              sf = sf,
              so = so,
              ranks = ranks,
              sc = sc,
              rg = rg,
              formats = formats,
              of = of,
              pl = pl,
              jrec = jrec,
              ec = ec,
              show_colls = show_colls,
              show_title = show_title,
            )
 
 def create_navtrail_links(cc=CFG_SITE_NAME, aas=0, ln=CFG_SITE_LANG, self_p=1, tab=''):
     """Creates navigation trail links, i.e. links to collection
     ancestors (except Home collection).  If aas==1, then links to
     Advanced Search interfaces; otherwise Simple Search.
     """
 
     dads = []
     for dad in get_coll_ancestors(cc):
         if dad != CFG_SITE_NAME: # exclude Home collection
             dads.append ((dad, get_coll_i18nname(dad, ln, False)))
 
     if self_p and cc != CFG_SITE_NAME:
         dads.append((cc, get_coll_i18nname(cc, ln, False)))
 
     return websearch_templates.tmpl_navtrail_links(
         aas=aas, ln=ln, dads=dads)
 
 def get_searchwithin_fields(ln='en', colID=None):
     """Retrieves the fields name used in the 'search within' selection box for the collection ID colID."""
     res = None
     if colID:
         res = run_sql_cached("""SELECT f.code,f.name FROM field AS f, collection_field_fieldvalue AS cff
                                  WHERE cff.type='sew' AND cff.id_collection=%s AND cff.id_field=f.id
                               ORDER BY cff.score DESC, f.name ASC""", (colID,),
                              affected_tables=['field', 'collection_field_fieldvalue'])
     if not res:
         res = run_sql_cached("SELECT code,name FROM field ORDER BY name ASC",
                              affected_tables=['field',])
     fields = [{
                 'value' : '',
                 'text' : get_field_i18nname("any field", ln, False)
               }]
     for field_code, field_name in res:
         if field_code and field_code != "anyfield":
             fields.append({ 'value' : field_code,
                             'text' : get_field_i18nname(field_name, ln, False)
                           })
     return fields
 
 def get_sortby_fields(ln='en', colID=None):
     """Retrieves the fields name used in the 'sort by' selection box for the collection ID colID."""
     _ = gettext_set_language(ln)
     res = None
     if colID:
         res = run_sql_cached("""SELECT DISTINCT(f.code),f.name FROM field AS f, collection_field_fieldvalue AS cff
                                  WHERE cff.type='soo' AND cff.id_collection=%s AND cff.id_field=f.id
                               ORDER BY cff.score DESC, f.name ASC""", (colID,),
                              affected_tables=['field', 'collection_field_fieldvalue'])
     if not res:
         # no sort fields defined for this colID, try to take Home collection:
         res = run_sql_cached("""SELECT DISTINCT(f.code),f.name FROM field AS f, collection_field_fieldvalue AS cff
                                  WHERE cff.type='soo' AND cff.id_collection=%s AND cff.id_field=f.id
                                  ORDER BY cff.score DESC, f.name ASC""", (1,),
                              affected_tables=['field', 'collection_field_fieldvalue'])
     if not res:
         # no sort fields defined for the Home collection, take all sort fields defined wherever they are:
         res = run_sql_cached("""SELECT DISTINCT(f.code),f.name FROM field AS f, collection_field_fieldvalue AS cff
                                  WHERE cff.type='soo' AND cff.id_field=f.id
                                  ORDER BY cff.score DESC, f.name ASC""",
                              affected_tables=['field', 'collection_field_fieldvalue'])
     fields = [{
                 'value' : '',
                 'text' : _("latest first")
               }]
     for field_code, field_name in res:
         if field_code and field_code != "anyfield":
             fields.append({ 'value' : field_code,
                             'text' : get_field_i18nname(field_name, ln, False)
                           })
     return fields
 
 def create_andornot_box(name='op', value='', ln='en'):
     "Returns HTML code for the AND/OR/NOT selection box."
 
     _ = gettext_set_language(ln)
 
     out = """
     <select name="%s">
     <option value="a"%s>%s
     <option value="o"%s>%s
     <option value="n"%s>%s
     </select>
     """ % (name,
            is_selected('a', value), _("AND"),
            is_selected('o', value), _("OR"),
            is_selected('n', value), _("AND NOT"))
 
     return out
 
 def create_matchtype_box(name='m', value='', ln='en'):
     "Returns HTML code for the 'match type' selection box."
 
     _ = gettext_set_language(ln)
 
     out = """
     <select name="%s">
     <option value="a"%s>%s
     <option value="o"%s>%s
     <option value="e"%s>%s
     <option value="p"%s>%s
     <option value="r"%s>%s
     </select>
     """ % (name,
            is_selected('a', value), _("All of the words:"),
            is_selected('o', value), _("Any of the words:"),
            is_selected('e', value), _("Exact phrase:"),
            is_selected('p', value), _("Partial phrase:"),
            is_selected('r', value), _("Regular expression:"))
     return out
 
 def is_selected(var, fld):
     "Checks if the two are equal, and if yes, returns ' selected'.  Useful for select boxes."
     if type(var) is int and type(fld) is int:
         if var == fld:
             return " selected"
     elif str(var) == str(fld):
         return " selected"
     elif fld and len(fld)==3 and fld[0] == "w" and var == fld[1:]:
         return " selected"
     return ""
 
 def wash_colls(cc, c, split_colls=0, verbose=0):
     """Wash collection list by checking whether user has deselected
     anything under 'Narrow search'.  Checks also if cc is a list or not.
        Return list of cc, colls_to_display, colls_to_search since the list
     of collections to display is different from that to search in.
     This is because users might have chosen 'split by collection'
     functionality.
        The behaviour of "collections to display" depends solely whether
     user has deselected a particular collection: e.g. if it started
     from 'Articles and Preprints' page, and deselected 'Preprints',
     then collection to display is 'Articles'.  If he did not deselect
     anything, then collection to display is 'Articles & Preprints'.
        The behaviour of "collections to search in" depends on the
     'split_colls' parameter:
          * if is equal to 1, then we can wash the colls list down
            and search solely in the collection the user started from;
          * if is equal to 0, then we are splitting to the first level
            of collections, i.e. collections as they appear on the page
            we started to search from;
 
     The function raises exception
     InvenioWebSearchUnknownCollectionError
     if cc or one of c collections is not known.
     """
 
     colls_out = []
     colls_out_for_display = []
     # list to hold the hosted collections to be searched and displayed
     hosted_colls_out = []
     debug = ""
 
     if verbose:
         debug += "<br />"
         debug += "<br />1) --- initial parameters ---"
         debug += "<br />cc : %s" % cc
         debug += "<br />c : %s" % c
         debug += "<br />"
 
     # check what type is 'cc':
     if type(cc) is list:
         for ci in cc:
             if collection_reclist_cache.cache.has_key(ci):
                 # yes this collection is real, so use it:
                 cc = ci
                 break
     else:
         # check once if cc is real:
         if not collection_reclist_cache.cache.has_key(cc):
             if cc:
                 raise InvenioWebSearchUnknownCollectionError(cc)
             else:
                 cc = CFG_SITE_NAME # cc is not set, so replace it with Home collection
 
     # check type of 'c' argument:
     if type(c) is list:
         colls = c
     else:
         colls = [c]
 
     if verbose:
         debug += "<br />2) --- after check for the integrity of cc and the being or not c a list ---"
         debug += "<br />cc : %s" % cc
         debug += "<br />c : %s" % c
         debug += "<br />"
 
     # remove all 'unreal' collections:
     colls_real = []
     for coll in colls:
         if collection_reclist_cache.cache.has_key(coll):
             colls_real.append(coll)
         else:
             if coll:
                 raise InvenioWebSearchUnknownCollectionError(coll)
     colls = colls_real
 
     if verbose:
         debug += "<br />3) --- keeping only the real colls of c ---"
         debug += "<br />colls : %s" % colls
         debug += "<br />"
 
     # check if some real collections remain:
     if len(colls)==0:
         colls = [cc]
 
     if verbose:
         debug += "<br />4) --- in case no colls were left we use cc directly ---"
         debug += "<br />colls : %s" % colls
         debug += "<br />"
 
     # then let us check the list of non-restricted "real" sons of 'cc' and compare it to 'coll':
     res = run_sql("""SELECT c.name FROM collection AS c,
                                         collection_collection AS cc,
                                         collection AS ccc
                      WHERE c.id=cc.id_son AND cc.id_dad=ccc.id
                        AND ccc.name=%s AND cc.type='r'""", (cc,))
 
     # list that holds all the non restricted sons of cc that are also not hosted collections
     l_cc_nonrestricted_sons_and_nonhosted_colls = []
     res_hosted = run_sql("""SELECT c.name FROM collection AS c,
                          collection_collection AS cc,
                          collection AS ccc
                          WHERE c.id=cc.id_son AND cc.id_dad=ccc.id
                          AND ccc.name=%s AND cc.type='r'
                          AND (c.dbquery NOT LIKE 'hostedcollection:%%' OR c.dbquery IS NULL)""", (cc,))
     for row_hosted in res_hosted:
             l_cc_nonrestricted_sons_and_nonhosted_colls.append(row_hosted[0])
     l_cc_nonrestricted_sons_and_nonhosted_colls.sort()
 
     l_cc_nonrestricted_sons = []
     l_c = colls
     for row in res:
         if not collection_restricted_p(row[0]):
             l_cc_nonrestricted_sons.append(row[0])
     l_c.sort()
     l_cc_nonrestricted_sons.sort()
     if l_cc_nonrestricted_sons == l_c:
         colls_out_for_display = [cc] # yep, washing permitted, it is sufficient to display 'cc'
     # the following elif is a hack that preserves the above funcionality when we start searching from
     # the frontpage with some hosted collections deselected (either by default or manually)
     elif set(l_cc_nonrestricted_sons_and_nonhosted_colls).issubset(set(l_c)):
         colls_out_for_display = colls
         split_colls = 0
     else:
         colls_out_for_display = colls # nope, we need to display all 'colls' successively
 
     # remove duplicates:
     #colls_out_for_display_nondups=filter(lambda x, colls_out_for_display=colls_out_for_display: colls_out_for_display[x-1] not in colls_out_for_display[x:], range(1, len(colls_out_for_display)+1))
     #colls_out_for_display = map(lambda x, colls_out_for_display=colls_out_for_display:colls_out_for_display[x-1], colls_out_for_display_nondups)
     colls_out_for_display = list(set(colls_out_for_display))
 
     if verbose:
         debug += "<br />5) --- decide whether colls_out_for_diplay should be colls or is it sufficient for it to be cc; remove duplicates ---"
         debug += "<br />colls_out_for_display : %s" % colls_out_for_display
         debug += "<br />"
 
     # the following piece of code takes care of removing collections whose ancestors are going to be searched anyway
     # list to hold the collections to be removed
     colls_to_be_removed = []
     # first calculate the collections that can safely be removed
     for coll in colls_out_for_display:
         for ancestor in get_coll_ancestors(coll):
             #if ancestor in colls_out_for_display: colls_to_be_removed.append(coll)
             if ancestor in colls_out_for_display and not is_hosted_collection(coll): colls_to_be_removed.append(coll)
     # secondly remove the collections
     for coll in colls_to_be_removed:
         colls_out_for_display.remove(coll)
 
     if verbose:
         debug += "<br />6) --- remove collections that have ancestors about to be search, unless they are hosted ---"
         debug += "<br />colls_out_for_display : %s" % colls_out_for_display
         debug += "<br />"
 
     # calculate the hosted collections to be searched.
     if colls_out_for_display == [cc]:
         if is_hosted_collection(cc):
             hosted_colls_out.append(cc)
         else:
             for coll in get_coll_sons(cc):
                 if is_hosted_collection(coll):
                     hosted_colls_out.append(coll)
     else:
         for coll in colls_out_for_display:
             if is_hosted_collection(coll):
                 hosted_colls_out.append(coll)
 
     if verbose:
         debug += "<br />7) --- calculate the hosted_colls_out ---"
         debug += "<br />hosted_colls_out : %s" % hosted_colls_out
         debug += "<br />"
 
     # second, let us decide on collection splitting:
     if split_colls == 0:
         # type A - no sons are wanted
         colls_out = colls_out_for_display
     else:
         # type B - sons (first-level descendants) are wanted
         for coll in colls_out_for_display:
             coll_sons = get_coll_sons(coll)
             if coll_sons == []:
                 colls_out.append(coll)
             else:
                 for coll_son in coll_sons:
                     if not is_hosted_collection(coll_son):
                         colls_out.append(coll_son)
             #else:
             #    colls_out = colls_out + coll_sons
 
     # remove duplicates:
     #colls_out_nondups=filter(lambda x, colls_out=colls_out: colls_out[x-1] not in colls_out[x:], range(1, len(colls_out)+1))
     #colls_out = map(lambda x, colls_out=colls_out:colls_out[x-1], colls_out_nondups)
     colls_out = list(set(colls_out))
 
     if verbose:
         debug += "<br />8) --- calculate the colls_out; remove duplicates ---"
         debug += "<br />colls_out : %s" % colls_out
         debug += "<br />"
 
     # remove the hosted collections from the collections to be searched
     if hosted_colls_out:
         for coll in hosted_colls_out:
             try:
                 colls_out.remove(coll)
             except ValueError:
                 # in case coll was not found in colls_out
                 pass
 
     if verbose:
         debug += "<br />9) --- remove the hosted_colls from the colls_out ---"
         debug += "<br />colls_out : %s" % colls_out
 
     return (cc, colls_out_for_display, colls_out, hosted_colls_out, debug)
 
 def strip_accents(x):
     """Strip accents in the input phrase X (assumed in UTF-8) by replacing
     accented characters with their unaccented cousins (e.g. é by e).
     Return such a stripped X."""
     x = re_latex_lowercase_a.sub("a", x)
     x = re_latex_lowercase_ae.sub("ae", x)
     x = re_latex_lowercase_e.sub("e", x)
     x = re_latex_lowercase_i.sub("i", x)
     x = re_latex_lowercase_o.sub("o", x)
     x = re_latex_lowercase_u.sub("u", x)
     x = re_latex_lowercase_y.sub("x", x)
     x = re_latex_lowercase_c.sub("c", x)
     x = re_latex_lowercase_n.sub("n", x)
     x = re_latex_uppercase_a.sub("A", x)
     x = re_latex_uppercase_ae.sub("AE", x)
     x = re_latex_uppercase_e.sub("E", x)
     x = re_latex_uppercase_i.sub("I", x)
     x = re_latex_uppercase_o.sub("O", x)
     x = re_latex_uppercase_u.sub("U", x)
     x = re_latex_uppercase_y.sub("Y", x)
     x = re_latex_uppercase_c.sub("C", x)
     x = re_latex_uppercase_n.sub("N", x)
 
     # convert input into Unicode string:
     try:
         y = unicode(x, "utf-8")
     except:
         return x # something went wrong, probably the input wasn't UTF-8
     # asciify Latin-1 lowercase characters:
     y = re_unicode_lowercase_a.sub("a", y)
     y = re_unicode_lowercase_ae.sub("ae", y)
     y = re_unicode_lowercase_e.sub("e", y)
     y = re_unicode_lowercase_i.sub("i", y)
     y = re_unicode_lowercase_o.sub("o", y)
     y = re_unicode_lowercase_u.sub("u", y)
     y = re_unicode_lowercase_y.sub("y", y)
     y = re_unicode_lowercase_c.sub("c", y)
     y = re_unicode_lowercase_n.sub("n", y)
     # asciify Latin-1 uppercase characters:
     y = re_unicode_uppercase_a.sub("A", y)
     y = re_unicode_uppercase_ae.sub("AE", y)
     y = re_unicode_uppercase_e.sub("E", y)
     y = re_unicode_uppercase_i.sub("I", y)
     y = re_unicode_uppercase_o.sub("O", y)
     y = re_unicode_uppercase_u.sub("U", y)
     y = re_unicode_uppercase_y.sub("Y", y)
     y = re_unicode_uppercase_c.sub("C", y)
     y = re_unicode_uppercase_n.sub("N", y)
     # return UTF-8 representation of the Unicode string:
     return y.encode("utf-8")
 
 def wash_index_term(term, max_char_length=50, lower_term=True):
     """
     Return washed form of the index term TERM that would be suitable
     for storing into idxWORD* tables.  I.e., lower the TERM if
     LOWER_TERM is True, and truncate it safely to MAX_CHAR_LENGTH
     UTF-8 characters (meaning, in principle, 4*MAX_CHAR_LENGTH bytes).
 
     The function works by an internal conversion of TERM, when needed,
     from its input Python UTF-8 binary string format into Python
     Unicode format, and then truncating it safely to the given number
     of UTF-8 characters, without possible mis-truncation in the middle
     of a multi-byte UTF-8 character that could otherwise happen if we
     would have been working with UTF-8 binary representation directly.
 
     Note that MAX_CHAR_LENGTH corresponds to the length of the term
     column in idxINDEX* tables.
     """
     if lower_term:
         washed_term = unicode(term, 'utf-8').lower()
     else:
         washed_term = unicode(term, 'utf-8')
     if len(washed_term) <= max_char_length:
         # no need to truncate the term, because it will fit
         # nicely even if it uses four-byte UTF-8 characters
         return washed_term.encode('utf-8')
     else:
         # truncate the term in a safe position:
         return washed_term[:max_char_length].encode('utf-8')
 
 def lower_index_term(term):
     """
     Return safely lowered index term TERM.  This is done by converting
     to UTF-8 first, because standard Python lower() function is not
     UTF-8 safe.  To be called by both the search engine and the
     indexer when appropriate (e.g. before stemming).
 
     In case of problems with UTF-8 compliance, this function raises
     UnicodeDecodeError, so the client code may want to catch it.
     """
     return unicode(term, 'utf-8').lower().encode('utf-8')
 
 def wash_output_format(format):
     """Wash output format FORMAT.  Currently only prevents input like
     'of=9' for backwards-compatible format that prints certain fields
     only.  (for this task, 'of=tm' is preferred)"""
     if str(format[0:3]).isdigit() and len(format) != 6:
         # asked to print MARC tags, but not enough digits,
         # so let's switch back to HTML brief default
         return 'hb'
     else:
         return format
 
 def wash_pattern(p):
     """Wash pattern passed by URL. Check for sanity of the wildcard by
     removing wildcards if they are appended to extremely short words
     (1-3 letters).  TODO: instead of this approximative treatment, it
     will be much better to introduce a temporal limit, e.g. to kill a
     query if it does not finish in 10 seconds."""
     # strip accents:
     # p = strip_accents(p) # FIXME: when available, strip accents all the time
     # add leading/trailing whitespace for the two following wildcard-sanity checking regexps:
     p = " " + p + " "
     # get rid of wildcards at the beginning of words:
     p = re_pattern_wildcards_at_beginning.sub("\\1", p)
     # replace spaces within quotes by __SPACE__ temporarily:
     p = re_pattern_single_quotes.sub(lambda x: "'"+string.replace(x.group(1), ' ', '__SPACE__')+"'", p)
     p = re_pattern_double_quotes.sub(lambda x: "\""+string.replace(x.group(1), ' ', '__SPACE__')+"\"", p)
     p = re_pattern_regexp_quotes.sub(lambda x: "/"+string.replace(x.group(1), ' ', '__SPACE__')+"/", p)
     # get rid of extremely short words (1-3 letters with wildcards):
     p = re_pattern_short_words.sub("\\1", p)
     # replace back __SPACE__ by spaces:
     p = re_pattern_space.sub(" ", p)
     # replace special terms:
     p = re_pattern_today.sub(time.strftime("%Y-%m-%d", time.localtime()), p)
     # remove unnecessary whitespace:
     p = string.strip(p)
     return p
 
 def wash_field(f):
     """Wash field passed by URL."""
     # get rid of unnecessary whitespace:
     f = string.strip(f)
     # wash old-style CDS Invenio/ALEPH 'f' field argument, e.g. replaces 'wau' and 'au' by 'author'
     if CFG_WEBSEARCH_FIELDS_CONVERT.has_key(string.lower(f)):
         f = CFG_WEBSEARCH_FIELDS_CONVERT[f]
     return f
 
 def wash_dates(d1="", d1y=0, d1m=0, d1d=0, d2="", d2y=0, d2m=0, d2d=0):
     """
     Take user-submitted date arguments D1 (full datetime string) or
     (D1Y, D1M, D1Y) year, month, day tuple and D2 or (D2Y, D2M, D2Y)
     and return (YYY1-M1-D2 H1:M1:S2, YYY2-M2-D2 H2:M2:S2) datetime
     strings in the YYYY-MM-DD HH:MM:SS format suitable for time
     restricted searching.
 
     Note that when both D1 and (D1Y, D1M, D1D) parameters are present,
     the precedence goes to D1.  Ditto for D2*.
 
     Note that when (D1Y, D1M, D1D) are taken into account, some values
     may be missing and are completed e.g. to 01 or 12 according to
     whether it is the starting or the ending date.
     """
     datetext1, datetext2 =  "", ""
     # sanity checking:
     if d1 == "" and d1y == 0 and d1m == 0 and d1d == 0 and d2 == "" and d2y == 0 and d2m == 0 and d2d == 0:
         return ("", "") # nothing selected, so return empty values
     # wash first (starting) date:
     if d1:
         # full datetime string takes precedence:
         datetext1 = d1
     else:
         # okay, first date passed as (year,month,day):
         if d1y:
             datetext1 += "%04d" % d1y
         else:
             datetext1 += "0000"
         if d1m:
             datetext1 += "-%02d" % d1m
         else:
             datetext1 += "-01"
         if d1d:
             datetext1 += "-%02d" % d1d
         else:
             datetext1 += "-01"
         datetext1 += " 00:00:00"
     # wash second (ending) date:
     if d2:
         # full datetime string takes precedence:
         datetext2 = d2
     else:
         # okay, second date passed as (year,month,day):
         if d2y:
             datetext2 += "%04d" % d2y
         else:
             datetext2 += "9999"
         if d2m:
             datetext2 += "-%02d" % d2m
         else:
             datetext2 += "-12"
         if d2d:
             datetext2 += "-%02d" % d2d
         else:
             datetext2 += "-31" # NOTE: perhaps we should add max(datenumber) in
                                # given month, but for our quering it's not
                                # needed, 31 will always do
         datetext2 += " 00:00:00"
     # okay, return constructed YYYY-MM-DD HH:MM:SS datetexts:
     return (datetext1, datetext2)
 
 def is_hosted_collection(coll):
     """Check if the given collection is a hosted one; i.e. its dbquery starts with hostedcollection:
     Returns True if it is, False if it's not or if the result is empty or if the query failed"""
 
     res = run_sql("SELECT dbquery FROM collection WHERE name=%s", (coll, ))
     try:
         return res[0][0].startswith("hostedcollection:")
     except:
         return False
 
 def get_colID(c):
     "Return collection ID for collection name C.  Return None if no match found."
     colID = None
     res = run_sql("SELECT id FROM collection WHERE name=%s", (c,), 1)
     if res:
         colID = res[0][0]
     return colID
 
 def get_coll_ancestors(coll):
     "Returns a list of ancestors for collection 'coll'."
     coll_ancestors = []
     coll_ancestor = coll
     while 1:
         res = run_sql("""SELECT c.name FROM collection AS c
                           LEFT JOIN collection_collection AS cc ON c.id=cc.id_dad
                           LEFT JOIN collection AS ccc ON ccc.id=cc.id_son
                           WHERE ccc.name=%s ORDER BY cc.id_dad ASC LIMIT 1""",
                       (coll_ancestor,))
         if res:
             coll_name = res[0][0]
             coll_ancestors.append(coll_name)
             coll_ancestor = coll_name
         else:
             break
     # ancestors found, return reversed list:
     coll_ancestors.reverse()
     return coll_ancestors
 
 def get_coll_sons(coll, type='r', public_only=1):
     """Return a list of sons (first-level descendants) of type 'type' for collection 'coll'.
        If public_only, then return only non-restricted son collections.
     """
     coll_sons = []
     query = "SELECT c.name FROM collection AS c "\
             "LEFT JOIN collection_collection AS cc ON c.id=cc.id_son "\
             "LEFT JOIN collection AS ccc ON ccc.id=cc.id_dad "\
             "WHERE cc.type=%s AND ccc.name=%s"
     query += " ORDER BY cc.score DESC"
     res = run_sql(query, (type, coll))
     for name in res:
         if not public_only or not collection_restricted_p(name[0]):
             coll_sons.append(name[0])
     return coll_sons
 
 def get_coll_real_descendants(coll, type='_', get_hosted_colls=True):
     """Return a list of all descendants of collection 'coll' that are defined by a 'dbquery'.
        IOW, we need to decompose compound collections like "A & B" into "A" and "B" provided
        that "A & B" has no associated database query defined.
     """
     coll_sons = []
     res = run_sql("""SELECT c.name,c.dbquery FROM collection AS c
                      LEFT JOIN collection_collection AS cc ON c.id=cc.id_son
                      LEFT JOIN collection AS ccc ON ccc.id=cc.id_dad
                      WHERE ccc.name=%s AND cc.type LIKE %s ORDER BY cc.score DESC""",
                   (coll, type,))
     for name, dbquery in res:
         if dbquery: # this is 'real' collection, so return it:
             if get_hosted_colls:
                 coll_sons.append(name)
             else:
                 if not dbquery.startswith("hostedcollection:"):
                     coll_sons.append(name)
         else: # this is 'composed' collection, so recurse:
             coll_sons.extend(get_coll_real_descendants(name))
     return coll_sons
 
 def browse_pattern(req, colls, p, f, rg, ln=CFG_SITE_LANG):
     """Browse either biliographic phrases or words indexes, and display it."""
 
     # load the right message language
     _ = gettext_set_language(ln)
 
     ## is p enclosed in quotes? (coming from exact search)
     if p.startswith('"') and p.endswith('"'):
         p = p[1:-1]
 
     p_orig = p
     ## okay, "real browse" follows:
     ## FIXME: the maths in the get_nearest_terms_in_bibxxx is just a test
 
     if not f and string.find(p, ":") > 0: # does 'p' contain ':'?
         f, p = string.split(p, ":", 1)
 
     ## do we search in words indexes?
     if not f:
         return browse_in_bibwords(req, p, f)
 
     index_id = get_index_id_from_field(f)
     if index_id != 0:
         coll = HitSet()
         for coll_name in colls:
             coll |= get_collection_reclist(coll_name)
         browsed_phrases_in_colls = get_nearest_terms_in_idxphrase_with_collection(p, index_id, rg/2, rg/2, coll)
     else:
         browsed_phrases = get_nearest_terms_in_bibxxx(p, f, (rg+1)/2+1, (rg-1)/2+1)
         while not browsed_phrases:
             # try again and again with shorter and shorter pattern:
             try:
                 p = p[:-1]
                 browsed_phrases = get_nearest_terms_in_bibxxx(p, f, (rg+1)/2+1, (rg-1)/2+1)
             except:
                 # probably there are no hits at all:
                 req.write(_("No values found."))
                 return
 
         ## try to check hits in these particular collection selection:
         browsed_phrases_in_colls = []
         if 0:
             for phrase in browsed_phrases:
                 phrase_hitset = HitSet()
                 phrase_hitsets = search_pattern("", phrase, f, 'e')
                 for coll in colls:
                     phrase_hitset.union_update(phrase_hitsets[coll])
                 if len(phrase_hitset) > 0:
                     # okay, this phrase has some hits in colls, so add it:
                     browsed_phrases_in_colls.append([phrase, len(phrase_hitset)])
 
         ## were there hits in collections?
         if browsed_phrases_in_colls == []:
             if browsed_phrases != []:
                 #print_warning(req, """<p>No match close to <em>%s</em> found in given collections.
                 #Please try different term.<p>Displaying matches in any collection...""" % p_orig)
                 ## try to get nbhits for these phrases in any collection:
                 for phrase in browsed_phrases:
                     browsed_phrases_in_colls.append([phrase, get_nbhits_in_bibxxx(phrase, f)])
 
     ## display results now:
     out = websearch_templates.tmpl_browse_pattern(
             f=f,
             fn=get_field_i18nname(get_field_name(f) or f, ln, False),
             ln=ln,
             browsed_phrases_in_colls=browsed_phrases_in_colls,
             colls=colls,
             rg=rg,
           )
     req.write(out)
     return
 
 def browse_in_bibwords(req, p, f, ln=CFG_SITE_LANG):
     """Browse inside words indexes."""
     if not p:
         return
     _ = gettext_set_language(ln)
 
     urlargd = {}
     urlargd.update(req.argd)
     urlargd['action'] = 'search'
 
     nearest_box = create_nearest_terms_box(urlargd, p, f, 'w', ln=ln, intro_text_p=0)
 
     req.write(websearch_templates.tmpl_search_in_bibwords(
         p = p,
         f = f,
         ln = ln,
         nearest_box = nearest_box
     ))
     return
 
 def search_pattern(req=None, p=None, f=None, m=None, ap=0, of="id", verbose=0, ln=CFG_SITE_LANG, display_nearest_terms_box=True):
     """Search for complex pattern 'p' within field 'f' according to
        matching type 'm'.  Return hitset of recIDs.
 
        The function uses multi-stage searching algorithm in case of no
        exact match found.  See the Search Internals document for
        detailed description.
 
        The 'ap' argument governs whether an alternative patterns are to
        be used in case there is no direct hit for (p,f,m).  For
        example, whether to replace non-alphanumeric characters by
        spaces if it would give some hits.  See the Search Internals
        document for detailed description.  (ap=0 forbits the
        alternative pattern usage, ap=1 permits it.)
 
        The 'of' argument governs whether to print or not some
        information to the user in case of no match found.  (Usually it
        prints the information in case of HTML formats, otherwise it's
        silent).
 
        The 'verbose' argument controls the level of debugging information
        to be printed (0=least, 9=most).
 
        All the parameters are assumed to have been previously washed.
 
        This function is suitable as a mid-level API.
     """
 
     _ = gettext_set_language(ln)
 
     hitset_empty = HitSet()
     # sanity check:
     if not p:
         hitset_full = HitSet(trailing_bits=1)
         hitset_full.discard(0)
         # no pattern, so return all universe
         return hitset_full
     # search stage 1: break up arguments into basic search units:
     if verbose and of.startswith("h"):
         t1 = os.times()[4]
     basic_search_units = create_basic_search_units(req, p, f, m, of)
     if verbose and of.startswith("h"):
         t2 = os.times()[4]
         print_warning(req, "Search stage 1: basic search units are: %s" % cgi.escape(repr(basic_search_units)))
         print_warning(req, "Search stage 1: execution took %.2f seconds." % (t2 - t1))
     # search stage 2: do search for each search unit and verify hit presence:
     if verbose and of.startswith("h"):
         t1 = os.times()[4]
     basic_search_units_hitsets = []
     for idx_unit in xrange(len(basic_search_units)):
         bsu_o, bsu_p, bsu_f, bsu_m = basic_search_units[idx_unit]
         basic_search_unit_hitset = search_unit(bsu_p, bsu_f, bsu_m)
         if verbose >= 9 and of.startswith("h"):
             print_warning(req, "Search stage 1: pattern %s gave hitlist %s" % (cgi.escape(bsu_p), basic_search_unit_hitset))
         if len(basic_search_unit_hitset) > 0 or \
            ap==0 or \
            bsu_o=="|" or \
            ((idx_unit+1)<len(basic_search_units) and basic_search_units[idx_unit+1][0]=="|"):
             # stage 2-1: this basic search unit is retained, since
             # either the hitset is non-empty, or the approximate
             # pattern treatment is switched off, or the search unit
             # was joined by an OR operator to preceding/following
             # units so we do not require that it exists
             basic_search_units_hitsets.append(basic_search_unit_hitset)
         else:
             # stage 2-2: no hits found for this search unit, try to replace non-alphanumeric chars inside pattern:
             if re.search(r'[^a-zA-Z0-9\s\:]', bsu_p):
                 if bsu_p.startswith('"') and bsu_p.endswith('"'): # is it ACC query?
                     bsu_pn = re.sub(r'[^a-zA-Z0-9\s\:]+', "*", bsu_p)
                 else: # it is WRD query
                     bsu_pn = re.sub(r'[^a-zA-Z0-9\s\:]+', " ", bsu_p)
                 if verbose and of.startswith('h') and req:
                     print_warning(req, "Trying (%s,%s,%s)" % (cgi.escape(bsu_pn), cgi.escape(bsu_f), cgi.escape(bsu_m)))
                 basic_search_unit_hitset = search_pattern(req=None, p=bsu_pn, f=bsu_f, m=bsu_m, of="id", ln=ln)
                 if len(basic_search_unit_hitset) > 0:
                     # we retain the new unit instead
                     if of.startswith('h'):
                         print_warning(req, _("No exact match found for %(x_query1)s, using %(x_query2)s instead...") % \
                                       {'x_query1': "<em>" + cgi.escape(bsu_p) + "</em>",
                                        'x_query2': "<em>" + cgi.escape(bsu_pn) + "</em>"})
                     basic_search_units[idx_unit][1] = bsu_pn
                     basic_search_units_hitsets.append(basic_search_unit_hitset)
                 else:
                     # stage 2-3: no hits found either, propose nearest indexed terms:
                     if of.startswith('h') and display_nearest_terms_box:
                         if req:
                             if bsu_f == "recid":
                                 print_warning(req, "Requested record does not seem to exist.")
                             else:
                                 print_warning(req, create_nearest_terms_box(req.argd, bsu_p, bsu_f, bsu_m, ln=ln))
                     return hitset_empty
             else:
                 # stage 2-3: no hits found either, propose nearest indexed terms:
                 if of.startswith('h') and display_nearest_terms_box:
                     if req:
                         if bsu_f == "recid":
                             print_warning(req, "Requested record does not seem to exist.")
                         else:
                             print_warning(req, create_nearest_terms_box(req.argd, bsu_p, bsu_f, bsu_m, ln=ln))
                 return hitset_empty
     if verbose and of.startswith("h"):
         t2 = os.times()[4]
         for idx_unit in range(0, len(basic_search_units)):
             print_warning(req, "Search stage 2: basic search unit %s gave %d hits." %
                           (basic_search_units[idx_unit][1:], len(basic_search_units_hitsets[idx_unit])))
         print_warning(req, "Search stage 2: execution took %.2f seconds." % (t2 - t1))
     # search stage 3: apply boolean query for each search unit:
     if verbose and of.startswith("h"):
         t1 = os.times()[4]
     # let the initial set be the complete universe:
     hitset_in_any_collection = HitSet(trailing_bits=1)
     hitset_in_any_collection.discard(0)
     for idx_unit in xrange(len(basic_search_units)):
         this_unit_operation = basic_search_units[idx_unit][0]
         this_unit_hitset = basic_search_units_hitsets[idx_unit]
         if this_unit_operation == '+':
             hitset_in_any_collection.intersection_update(this_unit_hitset)
         elif this_unit_operation == '-':
             hitset_in_any_collection.difference_update(this_unit_hitset)
         elif this_unit_operation == '|':
             hitset_in_any_collection.union_update(this_unit_hitset)
         else:
             if of.startswith("h"):
                 print_warning(req, "Invalid set operation %s." % cgi.escape(this_unit_operation), "Error")
     if len(hitset_in_any_collection) == 0:
         # no hits found, propose alternative boolean query:
         if of.startswith('h') and display_nearest_terms_box:
             nearestterms = []
             for idx_unit in range(0, len(basic_search_units)):
                 bsu_o, bsu_p, bsu_f, bsu_m = basic_search_units[idx_unit]
                 if bsu_p.startswith("%") and bsu_p.endswith("%"):
                     bsu_p = "'" + bsu_p[1:-1] + "'"
                 bsu_nbhits = len(basic_search_units_hitsets[idx_unit])
 
                 # create a similar query, but with the basic search unit only
                 argd = {}
                 argd.update(req.argd)
 
                 argd['p'] = bsu_p
                 argd['f'] = bsu_f
 
                 nearestterms.append((bsu_p, bsu_nbhits, argd))
 
             text = websearch_templates.tmpl_search_no_boolean_hits(
                      ln=ln,  nearestterms=nearestterms)
             print_warning(req, text)
     if verbose and of.startswith("h"):
         t2 = os.times()[4]
         print_warning(req, "Search stage 3: boolean query gave %d hits." % len(hitset_in_any_collection))
         print_warning(req, "Search stage 3: execution took %.2f seconds." % (t2 - t1))
     return hitset_in_any_collection
 
 def search_pattern_parenthesised(req=None, p=None, f=None, m=None, ap=0, of="id", verbose=0, ln=CFG_SITE_LANG, display_nearest_terms_box=True):
     """Search for complex pattern 'p' containing parenthesis within field 'f' according to
        matching type 'm'.  Return hitset of recIDs.
 
        For more details on the parameters see 'search_pattern'
     """
     _ = gettext_set_language(ln)
 
     # if the pattern uses SPIRES search syntax, convert it to Invenio syntax
     spires_syntax_converter = SpiresToInvenioSyntaxConverter()
     p = spires_syntax_converter.convert_query(p)
 
     # sanity check: do not call parenthesised parser for search terms
     # like U(1):
     if not re_pattern_parens.search(p):
         return search_pattern(req, p, f, m, ap, of, verbose, ln, display_nearest_terms_box=display_nearest_terms_box)
 
     # Try searching with parentheses
     try:
         parser = SearchQueryParenthesisedParser()
 
         # get a hitset with all recids
         result_hitset = HitSet(trailing_bits=1)
 
         # parse the query. The result is list of [op1, expr1, op2, expr2, ..., opN, exprN]
         parsing_result = parser.parse_query(p)
         if verbose  and of.startswith("h"):
             print_warning(req, "Search stage 1: search_pattern_parenthesised() returned %s." % repr(parsing_result))
 
         # go through every pattern
         # calculate hitset for it
         # combine pattern's hitset with the result using the corresponding operator
         for index in xrange(0, len(parsing_result)-1, 2 ):
             current_operator = parsing_result[index]
             current_pattern = parsing_result[index+1]
 
             # obtain a hitset for the current pattern
             current_hitset = search_pattern(req, current_pattern, f, m, ap, of, verbose, ln, display_nearest_terms_box=display_nearest_terms_box)
 
             # combine the current hitset with resulting hitset using the current operator
             if current_operator == '+':
                 result_hitset = result_hitset & current_hitset
             elif current_operator == '-':
                 result_hitset = result_hitset - current_hitset
             elif current_operator == '|':
                 result_hitset = result_hitset | current_hitset
             else:
                 assert False, "Unknown operator in search_pattern_parenthesised()"
 
         return result_hitset
 
     # If searching with parenteses fails, perform search ignoring parentheses
     except InvenioWebSearchQueryParserException:
 
         print_warning(req, _("Nested or mismatched parentheses detected. Ignoring all parentheses in the query..."))
 
         # remove the parentheses in the query. Current implementation removes all the parentheses,
         # but it could be improved to romove only these that are not insede quotes
         p = p.replace('(', ' ')
         p = p.replace(')', ' ')
 
         return search_pattern(req, p, f, m, ap, of, verbose, ln, display_nearest_terms_box=display_nearest_terms_box)
 
 def search_unit(p, f=None, m=None):
     """Search for basic search unit defined by pattern 'p' and field
        'f' and matching type 'm'.  Return hitset of recIDs.
 
        All the parameters are assumed to have been previously washed.
        'p' is assumed to be already a ``basic search unit'' so that it
        is searched as such and is not broken up in any way.  Only
        wildcard and span queries are being detected inside 'p'.
 
        This function is suitable as a low-level API.
     """
 
     ## create empty output results set:
     set = HitSet()
     if not p: # sanity checking
         return set
     if m == 'a' or m == 'r':
         # we are doing either phrase search or regexp search
         index_id = get_index_id_from_field(f)
         if index_id != 0:
             set = search_unit_in_idxphrases(p, f, m)
         else:
             set = search_unit_in_bibxxx(p, f, m)
     elif p.startswith("cited:"):
         # we are doing search by the citation count
         set = search_unit_by_times_cited(p[6:])
     else:
         # we are doing bibwords search by default
         set = search_unit_in_bibwords(p, f)
     return set
 
 def search_unit_in_bibwords(word, f, decompress=zlib.decompress):
     """Searches for 'word' inside bibwordsX table for field 'f' and returns hitset of recIDs."""
     set = HitSet() # will hold output result set
     set_used = 0 # not-yet-used flag, to be able to circumvent set operations
     # deduce into which bibwordsX table we will search:
     stemming_language = get_index_stemming_language(get_index_id_from_field("anyfield"))
     bibwordsX = "idxWORD%02dF" % get_index_id_from_field("anyfield")
     if f:
         index_id = get_index_id_from_field(f)
         if index_id:
             bibwordsX = "idxWORD%02dF" % index_id
             stemming_language = get_index_stemming_language(index_id)
         else:
             return HitSet() # word index f does not exist
 
     # wash 'word' argument and run query:
     word = string.replace(word, '*', '%') # we now use '*' as the truncation character
     words = string.split(word, "->", 1) # check for span query
     if len(words) == 2:
         word0 = re_word.sub('', words[0])
         word1 = re_word.sub('', words[1])
         if stemming_language:
             word0 = lower_index_term(word0)
             word1 = lower_index_term(word1)
             word0 = stem(word0, stemming_language)
             word1 = stem(word1, stemming_language)
         res = run_sql("SELECT term,hitlist FROM %s WHERE term BETWEEN %%s AND %%s" % bibwordsX,
                       (wash_index_term(word0), wash_index_term(word1)))
     else:
         if f == 'journal':
             pass # FIXME: quick hack for the journal index
         else:
             word = re_word.sub('', word)
         if stemming_language:
             word = lower_index_term(word)
             word = stem(word, stemming_language)
         if string.find(word, '%') >= 0: # do we have wildcard in the word?
             if f == 'journal':
                 # FIXME: quick hack for the journal index
                 # FIXME: we can run a sanity check here for all indexes
                 res = ()
             else:
                 res = run_sql("SELECT term,hitlist FROM %s WHERE term LIKE %%s" % bibwordsX,
                               (wash_index_term(word),))
         else:
             res = run_sql("SELECT term,hitlist FROM %s WHERE term=%%s" % bibwordsX,
                           (wash_index_term(word),))
     # fill the result set:
     for word, hitlist in res:
         hitset_bibwrd = HitSet(hitlist)
         # add the results:
         if set_used:
             set.union_update(hitset_bibwrd)
         else:
             set = hitset_bibwrd
             set_used = 1
     # okay, return result set:
     return set
 
 def search_unit_in_idxphrases(p, f, type):
     """Searches for phrase 'p' inside idxPHRASE*F table for field 'f' and returns hitset of recIDs found.
     The search type is defined by 'type' (e.g. equals to 'r' for a regexp search)."""
     set = HitSet() # will hold output result set
     set_used = 0 # not-yet-used flag, to be able to circumvent set operations
     # deduce in which idxPHRASE table we will search:
     idxphraseX = "idxPHRASE%02dF" % get_index_id_from_field("anyfield")
     if f:
         index_id = get_index_id_from_field(f)
         if index_id:
             idxphraseX = "idxPHRASE%02dF" % index_id
         else:
             return HitSet() # phrase index f does not exist
 
     # detect query type (exact phrase, partial phrase, regexp):
     if type == 'r':
         query_addons = "REGEXP %s"
         query_params = (p,)
     else:
         p = string.replace(p, '*', '%') # we now use '*' as the truncation character
         ps = string.split(p, "->", 1) # check for span query:
         if len(ps) == 2:
             query_addons = "BETWEEN %s AND %s"
             query_params = (ps[0], ps[1])
         else:
             if string.find(p, '%') > -1:
                 query_addons = "LIKE %s"
                 query_params = (ps[0],)
             else:
                 query_addons = "= %s"
                 query_params = (ps[0],)
 
     # perform search:
     res = run_sql("SELECT term,hitlist FROM %s WHERE term %s" % (idxphraseX, query_addons),
                   query_params)
     # fill the result set:
     for word, hitlist in res:
         hitset_bibphrase = HitSet(hitlist)
         # add the results:
         if set_used:
             set.union_update(hitset_bibphrase)
         else:
             set = hitset_bibphrase
             set_used = 1
     # okay, return result set:
     return set
 
 def search_unit_in_bibxxx(p, f, type):
     """Searches for pattern 'p' inside bibxxx tables for field 'f' and returns hitset of recIDs found.
     The search type is defined by 'type' (e.g. equals to 'r' for a regexp search)."""
 
     # FIXME: quick hack for the journal index
     if f == 'journal':
         return search_unit_in_bibwords(p, f)
 
     p_orig = p # saving for eventual future 'no match' reporting
     query_addons = "" # will hold additional SQL code for the query
     query_params = () # will hold parameters for the query (their number may vary depending on TYPE argument)
     # wash arguments:
     f = string.replace(f, '*', '%') # replace truncation char '*' in field definition
     if type == 'r':
         query_addons = "REGEXP %s"
         query_params = (p,)
     else:
         p = string.replace(p, '*', '%') # we now use '*' as the truncation character
         ps = string.split(p, "->", 1) # check for span query:
         if len(ps) == 2:
             query_addons = "BETWEEN %s AND %s"
             query_params = (ps[0], ps[1])
         else:
             if string.find(p, '%') > -1:
                 query_addons = "LIKE %s"
                 query_params = (ps[0],)
             else:
                 query_addons = "= %s"
                 query_params = (ps[0],)
     # construct 'tl' which defines the tag list (MARC tags) to search in:
     tl = []
     if str(f[0]).isdigit() and str(f[1]).isdigit():
         tl.append(f) # 'f' seems to be okay as it starts by two digits
     else:
         # convert old ALEPH tag names, if appropriate: (TODO: get rid of this before entering this function)
         if CFG_WEBSEARCH_FIELDS_CONVERT.has_key(string.lower(f)):
             f = CFG_WEBSEARCH_FIELDS_CONVERT[string.lower(f)]
         # deduce desired MARC tags on the basis of chosen 'f'
         tl = get_field_tags(f)
         if not tl:
             # f index does not exist, nevermind
             pass
     # okay, start search:
     l = [] # will hold list of recID that matched
     for t in tl:
         # deduce into which bibxxx table we will search:
         digit1, digit2 = int(t[0]), int(t[1])
         bx = "bib%d%dx" % (digit1, digit2)
         bibx = "bibrec_bib%d%dx" % (digit1, digit2)
         # construct and run query:
         if t == "001":
             res = run_sql("SELECT id FROM bibrec WHERE id %s" % query_addons,
                           query_params)
         else:
             query = "SELECT bibx.id_bibrec FROM %s AS bx LEFT JOIN %s AS bibx ON bx.id=bibx.id_bibxxx WHERE bx.value %s" % \
                     (bx, bibx, query_addons)
             if len(t) != 6 or t[-1:]=='%':
                 # wildcard query, or only the beginning of field 't'
                 # is defined, so add wildcard character:
                 query += " AND bx.tag LIKE %s"
                 res = run_sql(query, query_params + (t + '%',))
             else:
                 # exact query for 't':
                 query += " AND bx.tag=%s"
                 res = run_sql(query, query_params + (t,))
         # fill the result set:
         for id_bibrec in res:
             if id_bibrec[0]:
                 l.append(id_bibrec[0])
     # check no of hits found:
     nb_hits = len(l)
     # okay, return result set:
     set = HitSet(l)
     return set
 
 def search_unit_in_bibrec(datetext1, datetext2, type='c'):
     """
     Return hitset of recIDs found that were either created or modified
     (according to 'type' arg being 'c' or 'm') from datetext1 until datetext2, inclusive.
     Does not pay attention to pattern, collection, anything.  Useful
     to intersect later on with the 'real' query.
     """
     set = HitSet()
     if type.startswith("m"):
         type = "modification_date"
     else:
         type = "creation_date" # by default we are searching for creation dates
     res = run_sql("SELECT id FROM bibrec WHERE %s>=%%s AND %s<=%%s" % (type, type),
                   (datetext1, datetext2))
     for row in res:
         set += row[0]
     return set
 
 def search_unit_by_times_cited(p):
     """
     Return histset of recIDs found that are cited P times.
     Usually P looks like '10->23'.
     """
     numstr = '"'+p+'"'
     #this is sort of stupid but since we may need to
     #get the records that do _not_ have cites, we have to
     #know the ids of all records, too
     #but this is needed only if bsu_p is 0 or 0 or 0->0
     allrecs = []
     if p == 0 or p == "0" or \
        p.startswith("0->") or p.endswith("->0"):
         allrecs = HitSet(run_sql_cached("SELECT id FROM bibrec", affected_tables=['bibrec']))
     return get_records_with_num_cites(numstr, allrecs)
 
 def intersect_results_with_collrecs(req, hitset_in_any_collection, colls, ap=0, of="hb", verbose=0, ln=CFG_SITE_LANG, display_nearest_terms_box=True):
     """Return dict of hitsets given by intersection of hitset with the collection universes."""
     _ = gettext_set_language(ln)
 
     # search stage 4: intersect with the collection universe:
     if verbose and of.startswith("h"):
         t1 = os.times()[4]
     results = {}
     results_nbhits = 0
     for coll in colls:
         results[coll] = hitset_in_any_collection & get_collection_reclist(coll)
         results_nbhits += len(results[coll])
     if results_nbhits == 0:
         # no hits found, try to search in Home:
         results_in_Home = hitset_in_any_collection & get_collection_reclist(CFG_SITE_NAME)
         if len(results_in_Home) > 0:
             # some hits found in Home, so propose this search:
             if of.startswith("h") and display_nearest_terms_box:
                 url = websearch_templates.build_search_url(req.argd, cc=CFG_SITE_NAME, c=[])
                 print_warning(req, _("No match found in collection %(x_collection)s. Other public collections gave %(x_url_open)s%(x_nb_hits)d hits%(x_url_close)s.") %\
                               {'x_collection': '<em>' + string.join([get_coll_i18nname(coll, ln, False) for coll in colls], ', ') + '</em>',
                                'x_url_open': '<a class="nearestterms" href="%s">' % (url),
                                'x_nb_hits': len(results_in_Home),
                                'x_url_close': '</a>'})
             results = {}
         else:
             # no hits found in Home, recommend different search terms:
             if of.startswith("h") and display_nearest_terms_box:
                 print_warning(req, _("No public collection matched your query. "
                                      "If you were looking for a non-public document, please choose "
                                      "the desired restricted collection first."))
             results = {}
     if verbose and of.startswith("h"):
         t2 = os.times()[4]
         print_warning(req, "Search stage 4: intersecting with collection universe gave %d hits." % results_nbhits)
         print_warning(req, "Search stage 4: execution took %.2f seconds." % (t2 - t1))
     return results
 
 def intersect_results_with_hitset(req, results, hitset, ap=0, aptext="", of="hb"):
     """Return intersection of search 'results' (a dict of hitsets
        with collection as key) with the 'hitset', i.e. apply
        'hitset' intersection to each collection within search
        'results'.
 
        If the final 'results' set is to be empty, and 'ap'
        (approximate pattern) is true, and then print the `warningtext'
        and return the original 'results' set unchanged.  If 'ap' is
        false, then return empty results set.
     """
     if ap:
         results_ap = copy.deepcopy(results)
     else:
         results_ap = {} # will return empty dict in case of no hits found
     nb_total = 0
     for coll in results.keys():
         results[coll].intersection_update(hitset)
         nb_total += len(results[coll])
     if nb_total == 0:
         if of.startswith("h"):
             print_warning(req, aptext)
         results = results_ap
     return results
 
 def create_similarly_named_authors_link_box(author_name, ln=CFG_SITE_LANG):
     """Return a box similar to ``Not satisfied...'' one by proposing
        author searches for similar names.  Namely, take AUTHOR_NAME
        and the first initial of the firstame (after comma) and look
        into author index whether authors with e.g. middle names exist.
        Useful mainly for CERN Library that sometimes contains name
        forms like Ellis-N, Ellis-Nick, Ellis-Nicolas all denoting the
        same person.  The box isn't proposed if no similarly named
        authors are found to exist.
     """
     # return nothing if not configured:
     if CFG_WEBSEARCH_CREATE_SIMILARLY_NAMED_AUTHORS_LINK_BOX == 0:
         return ""
     # return empty box if there is no initial:
     if re.match(r'[^ ,]+, [^ ]', author_name) is None:
         return ""
     # firstly find name comma initial:
     author_name_to_search = re.sub(r'^([^ ,]+, +[^ ,]).*$', '\\1', author_name)
 
     # secondly search for similar name forms:
     similar_author_names = {}
     for name in author_name_to_search, strip_accents(author_name_to_search):
         for tag in get_field_tags("author"):
             # deduce into which bibxxx table we will search:
             digit1, digit2 = int(tag[0]), int(tag[1])
             bx = "bib%d%dx" % (digit1, digit2)
             bibx = "bibrec_bib%d%dx" % (digit1, digit2)
             if len(tag) != 6 or tag[-1:]=='%':
                 # only the beginning of field 't' is defined, so add wildcard character:
                 res = run_sql("""SELECT bx.value FROM %s AS bx
                                   WHERE bx.value LIKE %%s AND bx.tag LIKE %%s""" % bx,
                               (name + "%", tag + "%"))
             else:
                 res = run_sql("""SELECT bx.value FROM %s AS bx
                                   WHERE bx.value LIKE %%s AND bx.tag=%%s""" % bx,
                               (name + "%", tag))
             for row in res:
                 similar_author_names[row[0]] = 1
     # remove the original name and sort the list:
     try:
         del similar_author_names[author_name]
     except KeyError:
         pass
     # thirdly print the box:
     out = ""
     if similar_author_names:
         out_authors = similar_author_names.keys()
         out_authors.sort()
 
         tmp_authors = []
         for out_author in out_authors:
             nbhits = get_nbhits_in_bibxxx(out_author, "author")
             if nbhits:
                 tmp_authors.append((out_author, nbhits))
         out += websearch_templates.tmpl_similar_author_names(
                  authors=tmp_authors, ln=ln)
 
     return out
 
 def create_nearest_terms_box(urlargd, p, f, t='w', n=5, ln=CFG_SITE_LANG, intro_text_p=True):
     """Return text box containing list of 'n' nearest terms above/below 'p'
        for the field 'f' for matching type 't' (words/phrases) in
        language 'ln'.
        Propose new searches according to `urlargs' with the new words.
        If `intro_text_p' is true, then display the introductory message,
        otherwise print only the nearest terms in the box content.
     """
     # load the right message language
     _ = gettext_set_language(ln)
 
     out = ""
     nearest_terms = []
     if not p: # sanity check
         p = "."
     index_id = get_index_id_from_field(f)
     # look for nearest terms:
     if t == 'w':
         nearest_terms = get_nearest_terms_in_bibwords(p, f, n, n)
         if not nearest_terms:
             return _("No word index is available for %s.") % \
                    ('<em>' + cgi.escape(get_field_i18nname(get_field_name(f) or f, ln, False)) + '</em>')
     else:
         nearest_terms = []
         if index_id:
             nearest_terms = get_nearest_terms_in_idxphrase(p, index_id, n, n)
         if not nearest_terms:
             nearest_terms = get_nearest_terms_in_bibxxx(p, f, n, n)
         if not nearest_terms:
             return _("No phrase index is available for %s.") % \
                    ('<em>' + cgi.escape(get_field_i18nname(get_field_name(f) or f, ln, False)) + '</em>')
 
     terminfo = []
     for term in nearest_terms:
         if t == 'w':
             hits = get_nbhits_in_bibwords(term, f)
         else:
             if index_id:
                 hits = get_nbhits_in_idxphrases(term, f)
             else:
                 hits = get_nbhits_in_bibxxx(term, f)
 
         argd = {}
         argd.update(urlargd)
 
         # check which fields contained the requested parameter, and replace it.
         for (px, fx) in ('p', 'f'), ('p1', 'f1'), ('p2', 'f2'), ('p3', 'f3'):
             if px in argd:
                 argd_px = argd[px]
                 if t == 'w':
                     # p was stripped of accents, to do the same:
                     argd_px = strip_accents(argd_px)
                 if f == argd[fx] or f == "anyfield" or f == "":
                     if string.find(argd_px, p) > -1:
                         argd[px] = string.replace(argd_px, p, term)
                         break
                 else:
                     if string.find(argd_px, f+':'+p) > -1:
                         argd[px] = string.replace(argd_px, f+':'+p, f+':'+term)
                         break
                     elif string.find(argd_px, f+':"'+p+'"') > -1:
                         argd[px] = string.replace(argd_px, f+':"'+p+'"', f+':"'+term+'"')
                         break
 
         terminfo.append((term, hits, argd))
 
     intro = ""
     if intro_text_p: # add full leading introductory text
         if f:
             intro = _("Search term %(x_term)s inside index %(x_index)s did not match any record. Nearest terms in any collection are:") % \
                      {'x_term': "<em>" + cgi.escape(p.startswith("%") and p.endswith("%") and p[1:-1] or p) + "</em>",
                       'x_index': "<em>" + cgi.escape(get_field_i18nname(get_field_name(f) or f, ln, False)) + "</em>"}
         else:
             intro = _("Search term %s did not match any record. Nearest terms in any collection are:") % \
                      ("<em>" + cgi.escape(p.startswith("%") and p.endswith("%") and p[1:-1] or p) + "</em>")
 
     return websearch_templates.tmpl_nearest_term_box(p=p, ln=ln, f=f, terminfo=terminfo,
                                                      intro=intro)
 
 def get_nearest_terms_in_bibwords(p, f, n_below, n_above):
     """Return list of +n -n nearest terms to word `p' in index for field `f'."""
     nearest_words = [] # will hold the (sorted) list of nearest words to return
     # deduce into which bibwordsX table we will search:
     bibwordsX = "idxWORD%02dF" % get_index_id_from_field("anyfield")
     if f:
         index_id = get_index_id_from_field(f)
         if index_id:
             bibwordsX = "idxWORD%02dF" % index_id
         else:
             return nearest_words
     # firstly try to get `n' closest words above `p':
     res = run_sql("SELECT term FROM %s WHERE term<%%s ORDER BY term DESC LIMIT %%s" % bibwordsX,
                   (p, n_above))
     for row in res:
         nearest_words.append(row[0])
     nearest_words.reverse()
     # secondly insert given word `p':
     nearest_words.append(p)
     # finally try to get `n' closest words below `p':
     res = run_sql("SELECT term FROM %s WHERE term>%%s ORDER BY term ASC LIMIT %%s" % bibwordsX,
                   (p, n_below))
     for row in res:
         nearest_words.append(row[0])
     return nearest_words
 
 def get_nearest_terms_in_idxphrase(p, index_id, n_below, n_above):
     """Browse (-n_above, +n_below) closest bibliographic phrases
        for the given pattern p in the given field idxPHRASE table,
        regardless of collection.
        Return list of [phrase1, phrase2, ... , phrase_n]."""
     idxphraseX = "idxPHRASE%02dF" % index_id
     res_above = run_sql("SELECT term FROM %s WHERE term<%%s ORDER BY term DESC LIMIT %%s" % idxphraseX, (p, n_above))
     res_above = map(lambda x: x[0], res_above)
     res_above.reverse()
 
     res_below = run_sql("SELECT term FROM %s WHERE term>=%%s ORDER BY term ASC LIMIT %%s" % idxphraseX, (p, n_below))
     res_below = map(lambda x: x[0], res_below)
 
     return res_above + res_below
 
 def get_nearest_terms_in_idxphrase_with_collection(p, index_id, n_below, n_above, collection):
     """Browse (-n_above, +n_below) closest bibliographic phrases
        for the given pattern p in the given field idxPHRASE table,
        considering the collection (HitSet).
        Return list of [(phrase1, hitset), (phrase2, hitset), ... , (phrase_n, hitset)]."""
     idxphraseX = "idxPHRASE%02dF" % index_id
     res_above = run_sql("SELECT term,hitlist FROM %s WHERE term<%%s ORDER BY term DESC LIMIT %%s" % idxphraseX, (p, n_above * 3))
     res_above = [(term, HitSet(hitlist) & collection) for term, hitlist in res_above]
     res_above = [(term, len(hitlist)) for term, hitlist in res_above if hitlist]
 
     res_below = run_sql("SELECT term,hitlist FROM %s WHERE term>=%%s ORDER BY term ASC LIMIT %%s" % idxphraseX, (p, n_below * 3))
     res_below = [(term, HitSet(hitlist) & collection) for term, hitlist in res_below]
     res_below = [(term, len(hitlist)) for term, hitlist in res_below if hitlist]
 
     res_above.reverse()
     return res_above[-n_above:] + res_below[:n_below]
 
 
 def get_nearest_terms_in_bibxxx(p, f, n_below, n_above):
     """Browse (-n_above, +n_below) closest bibliographic phrases
        for the given pattern p in the given field f, regardless
        of collection.
        Return list of [phrase1, phrase2, ... , phrase_n]."""
     ## determine browse field:
     if not f and string.find(p, ":") > 0: # does 'p' contain ':'?
         f, p = string.split(p, ":", 1)
 
     # FIXME: quick hack for the journal index
     if f == 'journal':
         return get_nearest_terms_in_bibwords(p, f, n_below, n_above)
 
     ## We are going to take max(n_below, n_above) as the number of
     ## values to ferch from bibXXx.  This is needed to work around
     ## MySQL UTF-8 sorting troubles in 4.0.x.  Proper solution is to
     ## use MySQL 4.1.x or our own idxPHRASE in the future.
 
     index_id = get_index_id_from_field(f)
     if index_id:
         return get_nearest_terms_in_idxphrase(p, index_id, n_below, n_above)
 
     n_fetch = 2*max(n_below, n_above)
     ## construct 'tl' which defines the tag list (MARC tags) to search in:
     tl = []
     if str(f[0]).isdigit() and str(f[1]).isdigit():
         tl.append(f) # 'f' seems to be okay as it starts by two digits
     else:
         # deduce desired MARC tags on the basis of chosen 'f'
         tl = get_field_tags(f)
     ## start browsing to fetch list of hits:
     browsed_phrases = {} # will hold {phrase1: 1, phrase2: 1, ..., phraseN: 1} dict of browsed phrases (to make them unique)
     # always add self to the results set:
     browsed_phrases[p.startswith("%") and p.endswith("%") and p[1:-1] or p] = 1
     for t in tl:
         # deduce into which bibxxx table we will search:
         digit1, digit2 = int(t[0]), int(t[1])
         bx = "bib%d%dx" % (digit1, digit2)
         bibx = "bibrec_bib%d%dx" % (digit1, digit2)
         # firstly try to get `n' closest phrases above `p':
         if len(t) != 6 or t[-1:]=='%': # only the beginning of field 't' is defined, so add wildcard character:
             res = run_sql("""SELECT bx.value FROM %s AS bx
                               WHERE bx.value<%%s AND bx.tag LIKE %%s
                               ORDER BY bx.value DESC LIMIT %%s""" % bx,
                           (p, t + "%", n_fetch))
         else:
             res = run_sql("""SELECT bx.value FROM %s AS bx
                               WHERE bx.value<%%s AND bx.tag=%%s
                               ORDER BY bx.value DESC LIMIT %%s""" % bx,
                           (p, t, n_fetch))
         for row in res:
             browsed_phrases[row[0]] = 1
         # secondly try to get `n' closest phrases equal to or below `p':
         if len(t) != 6 or t[-1:]=='%': # only the beginning of field 't' is defined, so add wildcard character:
             res = run_sql("""SELECT bx.value FROM %s AS bx
                               WHERE bx.value>=%%s AND bx.tag LIKE %%s
                               ORDER BY bx.value ASC LIMIT %%s""" % bx,
                           (p, t + "%", n_fetch))
         else:
             res = run_sql("""SELECT bx.value FROM %s AS bx
                               WHERE bx.value>=%%s AND bx.tag=%%s
                               ORDER BY bx.value ASC LIMIT %%s""" % bx,
                           (p, t, n_fetch))
         for row in res:
             browsed_phrases[row[0]] = 1
     # select first n words only: (this is needed as we were searching
     # in many different tables and so aren't sure we have more than n
     # words right; this of course won't be needed when we shall have
     # one ACC table only for given field):
     phrases_out = browsed_phrases.keys()
     phrases_out.sort(lambda x, y: cmp(string.lower(strip_accents(x)),
                                       string.lower(strip_accents(y))))
     # find position of self:
     try:
         idx_p = phrases_out.index(p)
     except:
         idx_p = len(phrases_out)/2
     # return n_above and n_below:
     return phrases_out[max(0, idx_p-n_above):idx_p+n_below]
 
 def get_nbhits_in_bibwords(word, f):
     """Return number of hits for word 'word' inside words index for field 'f'."""
     out = 0
     # deduce into which bibwordsX table we will search:
     bibwordsX = "idxWORD%02dF" % get_index_id_from_field("anyfield")
     if f:
         index_id = get_index_id_from_field(f)
         if index_id:
             bibwordsX = "idxWORD%02dF" % index_id
         else:
             return 0
     if word:
         res = run_sql("SELECT hitlist FROM %s WHERE term=%%s" % bibwordsX,
                       (word,))
         for hitlist in res:
             out += len(HitSet(hitlist[0]))
     return out
 
 def get_nbhits_in_idxphrases(word, f):
     """Return number of hits for word 'word' inside phrase index for field 'f'."""
     out = 0
     # deduce into which bibwordsX table we will search:
     idxphraseX = "idxPHRASE%02dF" % get_index_id_from_field("anyfield")
     if f:
         index_id = get_index_id_from_field(f)
         if index_id:
             idxphraseX = "idxPHRASE%02dF" % index_id
         else:
             return 0
     if word:
         res = run_sql("SELECT hitlist FROM %s WHERE term=%%s" % idxphraseX,
                       (word,))
         for hitlist in res:
             out += len(HitSet(hitlist[0]))
     return out
 
 def get_nbhits_in_bibxxx(p, f):
     """Return number of hits for word 'word' inside words index for field 'f'."""
     ## determine browse field:
     if not f and string.find(p, ":") > 0: # does 'p' contain ':'?
         f, p = string.split(p, ":", 1)
 
     # FIXME: quick hack for the journal index
     if f == 'journal':
         return get_nbhits_in_bibwords(p, f)
 
     ## construct 'tl' which defines the tag list (MARC tags) to search in:
     tl = []
     if str(f[0]).isdigit() and str(f[1]).isdigit():
         tl.append(f) # 'f' seems to be okay as it starts by two digits
     else:
         # deduce desired MARC tags on the basis of chosen 'f'
         tl = get_field_tags(f)
     # start searching:
     recIDs = {} # will hold dict of {recID1: 1, recID2: 1, ..., }  (unique recIDs, therefore)
     for t in tl:
         # deduce into which bibxxx table we will search:
         digit1, digit2 = int(t[0]), int(t[1])
         bx = "bib%d%dx" % (digit1, digit2)
         bibx = "bibrec_bib%d%dx" % (digit1, digit2)
         if len(t) != 6 or t[-1:]=='%': # only the beginning of field 't' is defined, so add wildcard character:
             res = run_sql("""SELECT bibx.id_bibrec FROM %s AS bibx, %s AS bx
                               WHERE bx.value=%%s AND bx.tag LIKE %%s
                                 AND bibx.id_bibxxx=bx.id""" % (bibx, bx),
                           (p, t + "%"))
         else:
             res = run_sql("""SELECT bibx.id_bibrec FROM %s AS bibx, %s AS bx
                               WHERE bx.value=%%s AND bx.tag=%%s
                                 AND bibx.id_bibxxx=bx.id""" % (bibx, bx),
                           (p, t))
         for row in res:
             recIDs[row[0]] = 1
     return len(recIDs)
 
 def get_mysql_recid_from_aleph_sysno(sysno):
     """Returns DB's recID for ALEPH sysno passed in the argument (e.g. "002379334CER").
        Returns None in case of failure."""
     out = None
     res = run_sql("""SELECT bb.id_bibrec FROM bibrec_bib97x AS bb, bib97x AS b
                       WHERE b.value=%s AND b.tag='970__a' AND bb.id_bibxxx=b.id""",
                   (sysno,))
     if res:
         out = res[0][0]
     return out
 
 def guess_primary_collection_of_a_record(recID):
     """Return primary collection name a record recid belongs to, by
        testing 980 identifier.
        May lead to bad guesses when a collection is defined dynamically
        via dbquery.
        In that case, return 'CFG_SITE_NAME'."""
     out = CFG_SITE_NAME
     dbcollids = get_fieldvalues(recID, "980__a")
     if dbcollids:
         dbquery = "collection:" + dbcollids[0]
         res = run_sql("SELECT name FROM collection WHERE dbquery=%s", (dbquery,))
         if res:
             out = res[0][0]
     return out
 
 _re_collection_url = re.compile('/collection/(.+)')
 def guess_collection_of_a_record(recID, referer=None):
     """Return collection name a record recid belongs to, by first testing
        the referer URL if provided and otherwise returning the
        primary collection."""
     if referer:
         dummy, hostname, path, dummy, query, dummy = urlparse.urlparse(referer)
         g = _re_collection_url.match(path)
         if g:
             name = urllib.unquote_plus(g.group(1))
             if recID in get_collection_reclist(name):
                 return name
         elif path.startswith('/search'):
             query = cgi.parse_qs(query)
             for name in query.get('cc', []) + query.get('c', []):
                 if recID in get_collection_reclist(name):
                     return name
     return guess_primary_collection_of_a_record(recID)
 
 def get_all_collections_of_a_record(recID):
     """Return all the collection names a record belongs to.
     Note this function is O(n_collections)."""
     ret = []
     for name in collection_reclist_cache.cache.keys():
         if recID in get_collection_reclist(name):
             ret.append(name)
     return ret
 
 def get_tag_name(tag_value, prolog="", epilog=""):
     """Return tag name from the known tag value, by looking up the 'tag' table.
        Return empty string in case of failure.
        Example: input='100__%', output=first author'."""
     out = ""
     res = run_sql_cached("SELECT name FROM tag WHERE value=%s", (tag_value,),
                          affected_tables=['tag',])
     if res:
         out = prolog + res[0][0] + epilog
     return out
 
 def get_fieldcodes():
     """Returns a list of field codes that may have been passed as 'search options' in URL.
        Example: output=['subject','division']."""
     out = []
     res = run_sql_cached("SELECT DISTINCT(code) FROM field",
                          affected_tables=['field',])
     for row in res:
         out.append(row[0])
     return out
 
 def get_field_name(code):
     """Return the corresponding field_name given the field code.
     e.g. reportnumber -> report number."""
     res = run_sql_cached("SELECT name FROM field WHERE code=%s", (code, ),
                          affected_tables=['field',])
     if res:
         return res[0][0]
     else:
         return ""
 
 def get_field_tags(field):
     """Returns a list of MARC tags for the field code 'field'.
        Returns empty list in case of error.
        Example: field='author', output=['100__%','700__%']."""
     out = []
     query = """SELECT t.value FROM tag AS t, field_tag AS ft, field AS f
                 WHERE f.code=%s AND ft.id_field=f.id AND t.id=ft.id_tag
                 ORDER BY ft.score DESC"""
     res = run_sql(query, (field, ))
     for val in res:
         out.append(val[0])
     return out
 
 
 def get_fieldvalues(recIDs, tag, repetitive_values=True):
     """
     Return list of field values for field TAG for the given record ID
     or list of record IDs.  (RECIDS can be both an integer or a list
     of integers.)
 
     If REPETITIVE_VALUES is set to True, then return all values even
     if they are doubled.  If set to False, then return unique values
     only.
     """
     out = []
     if isinstance(recIDs, (int, long)):
         recIDs =[recIDs,]
     if not isinstance(recIDs, (list, tuple)):
         return []
     if len(recIDs) == 0:
         return []
     if tag == "001___":
         # we have asked for tag 001 (=recID) that is not stored in bibXXx tables
         out = [str(recID) for recID in recIDs]
     else:
         # we are going to look inside bibXXx tables
         digits = tag[0:2]
         try:
             intdigits = int(digits)
             if intdigits < 0 or intdigits > 99:
                 raise ValueError
         except ValueError:
             # invalid tag value asked for
             return []
         bx = "bib%sx" % digits
         bibx = "bibrec_bib%sx" % digits
         queryparam = []
         for recID in recIDs:
             queryparam.append(recID)
         if not repetitive_values:
             queryselect = "DISTINCT(bx.value)"
         else:
             queryselect = "bx.value"
         query = "SELECT %s FROM %s AS bx, %s AS bibx WHERE bibx.id_bibrec IN (%s) " \
                 " AND bx.id=bibx.id_bibxxx AND bx.tag LIKE %%s " \
                 " ORDER BY bibx.field_number, bx.tag ASC" % \
                 (queryselect, bx, bibx, ("%s,"*len(queryparam))[:-1])
         res = run_sql(query, tuple(queryparam) + (tag,))
         for row in res:
             out.append(row[0])
     return out
 
 def get_fieldvalues_alephseq_like(recID, tags_in):
     """Return buffer of ALEPH sequential-like textual format with fields found in the list TAGS_IN for record RECID."""
     out = ""
     if type(tags_in) is not list:
         tags_in = [tags_in,]
     if len(tags_in) == 1 and len(tags_in[0]) == 6:
         ## case A: one concrete subfield asked, so print its value if found
         ##         (use with care: can false you if field has multiple occurrences)
         out += string.join(get_fieldvalues(recID, tags_in[0]),"\n")
     else:
         ## case B: print our "text MARC" format; works safely all the time
         # find out which tags to output:
         dict_of_tags_out = {}
         if not tags_in:
             for i in range(0, 10):
                 for j in range(0, 10):
                     dict_of_tags_out["%d%d%%" % (i, j)] = 1
         else:
             for tag in tags_in:
                 if len(tag) == 0:
                     for i in range(0, 10):
                         for j in range(0, 10):
                             dict_of_tags_out["%d%d%%" % (i, j)] = 1
                 elif len(tag) == 1:
                     for j in range(0, 10):
                         dict_of_tags_out["%s%d%%" % (tag, j)] = 1
                 elif len(tag) < 5:
                     dict_of_tags_out["%s%%" % tag] = 1
                 elif tag >= 6:
                     dict_of_tags_out[tag[0:5]] = 1
         tags_out = dict_of_tags_out.keys()
         tags_out.sort()
         # search all bibXXx tables as needed:
         for tag in tags_out:
             digits = tag[0:2]
             try:
                 intdigits = int(digits)
                 if intdigits < 0 or intdigits > 99:
                     raise ValueError
             except ValueError:
                 # invalid tag value asked for
                 continue
             if tag.startswith("001") or tag.startswith("00%"):
                 if out:
                     out += "\n"
                 out += "%09d %s %d" % (recID, "001__", recID)
             bx = "bib%sx" % digits
             bibx = "bibrec_bib%sx" % digits
             query = "SELECT b.tag,b.value,bb.field_number FROM %s AS b, %s AS bb "\
                     "WHERE bb.id_bibrec=%%s AND b.id=bb.id_bibxxx AND b.tag LIKE %%s"\
                     "ORDER BY bb.field_number, b.tag ASC" % (bx, bibx)
             res = run_sql(query, (recID, str(tag)+'%'))
             # go through fields:
             field_number_old = -999
             field_old = ""
             for row in res:
                 field, value, field_number = row[0], row[1], row[2]
                 ind1, ind2 = field[3], field[4]
                 if ind1 == "_":
                     ind1 = ""
                 if ind2 == "_":
                     ind2 = ""
                 # print field tag
                 if field_number != field_number_old or field[:-1] != field_old[:-1]:
                     if out:
                         out += "\n"
                     out += "%09d %s " % (recID, field[:5])
                     field_number_old = field_number
                     field_old = field
                 # print subfield value
                 if field[0:2] == "00" and field[-1:] == "_":
                     out += value
                 else:
                     out += "$$%s%s" % (field[-1:], value)
     return out
 
 def record_exists(recID):
     """Return 1 if record RECID exists.
        Return 0 if it doesn't exist.
        Return -1 if it exists but is marked as deleted."""
     out = 0
     res = run_sql("SELECT id FROM bibrec WHERE id=%s", (recID,), 1)
     if res:
         recID = int(recID)
         # record exists; now check whether it isn't marked as deleted:
         dbcollids = get_fieldvalues(recID, "980__%")
         if ("DELETED" in dbcollids) or (CFG_CERN_SITE and "DUMMY" in dbcollids):
             out = -1 # exists, but marked as deleted
         else:
             out = 1 # exists fine
     return out
 
 def record_public_p(recID):
     """Return 1 if the record is public, i.e. if it can be found in the Home collection.
        Return 0 otherwise.
     """
     return recID in get_collection_reclist(CFG_SITE_NAME)
 
 def get_creation_date(recID, fmt="%Y-%m-%d"):
     "Returns the creation date of the record 'recID'."
     out = ""
     res = run_sql("SELECT DATE_FORMAT(creation_date,%s) FROM bibrec WHERE id=%s", (fmt, recID), 1)
     if res:
         out = res[0][0]
     return out
 
 def get_modification_date(recID, fmt="%Y-%m-%d"):
     "Returns the date of last modification for the record 'recID'."
     out = ""
     res = run_sql("SELECT DATE_FORMAT(modification_date,%s) FROM bibrec WHERE id=%s", (fmt, recID), 1)
     if res:
         out = res[0][0]
     return out
 
 def print_warning(req, msg, type='', prologue='<br />', epilogue='<br />'):
     "Prints warning message and flushes output."
     if req and msg:
         req.write(websearch_templates.tmpl_print_warning(
                    msg = msg,
                    type = type,
                    prologue = prologue,
                    epilogue = epilogue,
                  ))
         return
 
 def print_search_info(p, f, sf, so, sp, rm, of, ot, collection=CFG_SITE_NAME, nb_found=-1, jrec=1, rg=10,
                       aas=0, ln=CFG_SITE_LANG, p1="", p2="", p3="", f1="", f2="", f3="", m1="", m2="", m3="", op1="", op2="",
                       sc=1, pl_in_url="",
                       d1y=0, d1m=0, d1d=0, d2y=0, d2m=0, d2d=0, dt="",
                       cpu_time=-1, middle_only=0):
     """Prints stripe with the information on 'collection' and 'nb_found' results and CPU time.
        Also, prints navigation links (beg/next/prev/end) inside the results set.
        If middle_only is set to 1, it will only print the middle box information (beg/netx/prev/end/etc) links.
        This is suitable for displaying navigation links at the bottom of the search results page."""
 
     out = ""
 
     # sanity check:
     if jrec < 1:
         jrec = 1
     if jrec > nb_found:
         jrec = max(nb_found-rg+1, 1)
 
     return websearch_templates.tmpl_print_search_info(
              ln = ln,
              collection = collection,
              aas = aas,
              collection_name = get_coll_i18nname(collection, ln, False),
              collection_id = get_colID(collection),
              middle_only = middle_only,
              rg = rg,
              nb_found = nb_found,
              sf = sf,
              so = so,
              rm = rm,
              of = of,
              ot = ot,
              p = p,
              f = f,
              p1 = p1,
              p2 = p2,
              p3 = p3,
              f1 = f1,
              f2 = f2,
              f3 = f3,
              m1 = m1,
              m2 = m2,
              m3 = m3,
              op1 = op1,
              op2 = op2,
              pl_in_url = pl_in_url,
              d1y = d1y,
              d1m = d1m,
              d1d = d1d,
              d2y = d2y,
              d2m = d2m,
              d2d = d2d,
              dt = dt,
              jrec = jrec,
              sc = sc,
              sp = sp,
              all_fieldcodes = get_fieldcodes(),
              cpu_time = cpu_time,
            )
 
 def print_hosted_search_info(p, f, sf, so, sp, rm, of, ot, collection=CFG_SITE_NAME, nb_found=-1, jrec=1, rg=10,
                       aas=0, ln=CFG_SITE_LANG, p1="", p2="", p3="", f1="", f2="", f3="", m1="", m2="", m3="", op1="", op2="",
                       sc=1, pl_in_url="",
                       d1y=0, d1m=0, d1d=0, d2y=0, d2m=0, d2d=0, dt="",
                       cpu_time=-1, middle_only=0):
     """Prints stripe with the information on 'collection' and 'nb_found' results and CPU time.
        Also, prints navigation links (beg/next/prev/end) inside the results set.
        If middle_only is set to 1, it will only print the middle box information (beg/netx/prev/end/etc) links.
        This is suitable for displaying navigation links at the bottom of the search results page."""
 
     out = ""
 
     # sanity check:
     if jrec < 1:
         jrec = 1
     if jrec > nb_found:
         jrec = max(nb_found-rg+1, 1)
 
     return websearch_templates.tmpl_print_hosted_search_info(
              ln = ln,
              collection = collection,
              aas = aas,
              collection_name = get_coll_i18nname(collection, ln, False),
              collection_id = get_colID(collection),
              middle_only = middle_only,
              rg = rg,
              nb_found = nb_found,
              sf = sf,
              so = so,
              rm = rm,
              of = of,
              ot = ot,
              p = p,
              f = f,
              p1 = p1,
              p2 = p2,
              p3 = p3,
              f1 = f1,
              f2 = f2,
              f3 = f3,
              m1 = m1,
              m2 = m2,
              m3 = m3,
              op1 = op1,
              op2 = op2,
              pl_in_url = pl_in_url,
              d1y = d1y,
              d1m = d1m,
              d1d = d1d,
              d2y = d2y,
              d2m = d2m,
              d2d = d2d,
              dt = dt,
              jrec = jrec,
              sc = sc,
              sp = sp,
              all_fieldcodes = get_fieldcodes(),
              cpu_time = cpu_time,
            )
 
 def print_results_overview(req, colls, results_final_nb_total, results_final_nb, cpu_time, ln=CFG_SITE_LANG, ec=[], hosted_colls_potential_results_p=False):
     """Prints results overview box with links to particular collections below."""
 
     out = ""
     new_colls = []
     for coll in colls:
         new_colls.append({
                           'id': get_colID(coll),
                           'code': coll,
                           'name': get_coll_i18nname(coll, ln, False),
                          })
 
     return websearch_templates.tmpl_print_results_overview(
              ln = ln,
              results_final_nb_total = results_final_nb_total,
              results_final_nb = results_final_nb,
              cpu_time = cpu_time,
              colls = new_colls,
              ec = ec,
              hosted_colls_potential_results_p = hosted_colls_potential_results_p,
            )
 
-def print_hosted_results(infos, of=None, ln=CFG_SITE_LANG, req=None, no_records_found=False, timeout=False):
+def print_hosted_results(url_and_engine, ln=CFG_SITE_LANG, of=None, req=None, no_records_found=False, search_timed_out=False, limit=CFG_EXTERNAL_COLLECTION_MAXRESULTS):
     """Prints the full results of a hosted collection"""
 
     if of.startswith("h"):
         if no_records_found:
-            return "<br />No results found.."
-        if timeout:
-            return "<br />The search engine did not respond in time"
+            return "<br />No results found."
+        if search_timed_out:
+            return "<br />The search engine did not respond in time."
 
     return websearch_templates.tmpl_print_hosted_results(
+        url_and_engine=url_and_engine,
         ln=ln,
-        infos=infos,
         of=of,
-        req=req
+        req=req,
+        limit=limit
         )
 
 def sort_records(req, recIDs, sort_field='', sort_order='d', sort_pattern='', verbose=0, of='hb', ln=CFG_SITE_LANG):
     """Sort records in 'recIDs' list according sort field 'sort_field' in order 'sort_order'.
        If more than one instance of 'sort_field' is found for a given record, try to choose that that is given by
        'sort pattern', for example "sort by report number that starts by CERN-PS".
        Note that 'sort_field' can be field code like 'author' or MARC tag like '100__a' directly."""
 
     _ = gettext_set_language(ln)
 
     ## check arguments:
     if not sort_field:
         return recIDs
     if len(recIDs) > CFG_WEBSEARCH_NB_RECORDS_TO_SORT:
         if of.startswith('h'):
             print_warning(req, _("Sorry, sorting is allowed on sets of up to %d records only. Using default sort order.") % CFG_WEBSEARCH_NB_RECORDS_TO_SORT, "Warning")
         return recIDs
 
     sort_fields = string.split(sort_field, ",")
     recIDs_dict = {}
     recIDs_out = []
 
     ## first deduce sorting MARC tag out of the 'sort_field' argument:
     tags = []
     for sort_field in sort_fields:
         if sort_field and str(sort_field[0:2]).isdigit():
             # sort_field starts by two digits, so this is probably a MARC tag already
             tags.append(sort_field)
         else:
             # let us check the 'field' table
             query = """SELECT DISTINCT(t.value) FROM tag AS t, field_tag AS ft, field AS f
                         WHERE f.code=%s AND ft.id_field=f.id AND t.id=ft.id_tag
                         ORDER BY ft.score DESC"""
             res = run_sql(query, (sort_field, ))
             if res:
                 for row in res:
                     tags.append(row[0])
             else:
                 if of.startswith('h'):
                     print_warning(req, _("Sorry, %s does not seem to be a valid sort option. Choosing title sort instead.") % cgi.escape(sort_field), "Error")
                 tags.append("245__a")
     if verbose >= 3:
         print_warning(req, "Sorting by tags %s." % cgi.escape(repr(tags)))
         if sort_pattern:
             print_warning(req, "Sorting preferentially by %s." % cgi.escape(sort_pattern))
 
     ## check if we have sorting tag defined:
     if tags:
         # fetch the necessary field values:
         for recID in recIDs:
             val = "" # will hold value for recID according to which sort
             vals = [] # will hold all values found in sorting tag for recID
             for tag in tags:
                 vals.extend(get_fieldvalues(recID, tag))
             if sort_pattern:
                 # try to pick that tag value that corresponds to sort pattern
                 bingo = 0
                 for v in vals:
                     if v.lower().startswith(sort_pattern.lower()): # bingo!
                         bingo = 1
                         val = v
                         break
                 if not bingo: # sort_pattern not present, so add other vals after spaces
                     val = sort_pattern + "          " + string.join(vals)
             else:
                 # no sort pattern defined, so join them all together
                 val = string.join(vals)
             val = strip_accents(val.lower()) # sort values regardless of accents and case
             if recIDs_dict.has_key(val):
                 recIDs_dict[val].append(recID)
             else:
                 recIDs_dict[val] = [recID]
         # sort them:
         recIDs_dict_keys = recIDs_dict.keys()
         recIDs_dict_keys.sort()
         # now that keys are sorted, create output array:
         for k in recIDs_dict_keys:
             for s in recIDs_dict[k]:
                 recIDs_out.append(s)
         # ascending or descending?
         if sort_order == 'a':
             recIDs_out.reverse()
         # okay, we are done
         return recIDs_out
     else:
         # good, no sort needed
         return recIDs
 
 def print_records(req, recIDs, jrec=1, rg=10, format='hb', ot='', ln=CFG_SITE_LANG, relevances=[], relevances_prologue="(", relevances_epilogue="%%)", decompress=zlib.decompress, search_pattern='', print_records_prologue_p=True, print_records_epilogue_p=True, verbose=0, tab=''):
 
     """
     Prints list of records 'recIDs' formatted according to 'format' in
     groups of 'rg' starting from 'jrec'.
 
     Assumes that the input list 'recIDs' is sorted in reverse order,
     so it counts records from tail to head.
 
     A value of 'rg=-9999' means to print all records: to be used with care.
 
     Print also list of RELEVANCES for each record (if defined), in
     between RELEVANCE_PROLOGUE and RELEVANCE_EPILOGUE.
 
     Print prologue and/or epilogue specific to 'format' if
     'print_records_prologue_p' and/or print_records_epilogue_p' are
     True.
     """
 
     # load the right message language
     _ = gettext_set_language(ln)
 
     # sanity checking:
     if req is None:
         return
 
     # get user_info (for formatting based on user)
     if isinstance(req, cStringIO.OutputType):
         user_info = {}
     else:
         user_info = collect_user_info(req)
 
     if len(recIDs):
         nb_found = len(recIDs)
 
         if rg == -9999: # print all records
             rg = nb_found
         else:
             rg = abs(rg)
         if jrec < 1: # sanity checks
             jrec = 1
         if jrec > nb_found:
             jrec = max(nb_found-rg+1, 1)
 
         # will print records from irec_max to irec_min excluded:
         irec_max = nb_found - jrec
         irec_min = nb_found - jrec - rg
         if irec_min < 0:
             irec_min = -1
         if irec_max >= nb_found:
             irec_max = nb_found - 1
 
         #req.write("%s:%d-%d" % (recIDs, irec_min, irec_max))
 
         if format.startswith('x'):
 
             # print header if needed
             if print_records_prologue_p:
                 print_records_prologue(req, format)
 
             # print records
             recIDs_to_print = [recIDs[x] for x in range(irec_max, irec_min, -1)]
             format_records(recIDs_to_print,
                            format,
                            ln=ln,
                            search_pattern=search_pattern,
                            record_separator="\n",
                            user_info=user_info,
                            req=req)
             # print footer if needed
             if print_records_epilogue_p:
                 print_records_epilogue(req, format)
 
         elif format.startswith('t') or str(format[0:3]).isdigit():
             # we are doing plain text output:
             for irec in range(irec_max, irec_min, -1):
                 x = print_record(recIDs[irec], format, ot, ln, search_pattern=search_pattern,
                                  user_info=user_info, verbose=verbose)
                 req.write(x)
                 if x:
                     req.write('\n')
         elif format == 'excel':
             recIDs_to_print = [recIDs[x] for x in range(irec_max, irec_min, -1)]
             create_excel(recIDs=recIDs_to_print, req=req, ln=ln, ot=ot)
         else:
             # we are doing HTML output:
             if format == 'hp' or format.startswith("hb_") or format.startswith("hd_"):
                 # portfolio and on-the-fly formats:
                 for irec in range(irec_max, irec_min, -1):
                     req.write(print_record(recIDs[irec], format, ot, ln, search_pattern=search_pattern,
                                            user_info=user_info, verbose=verbose))
             elif format.startswith("hb"):
                 # HTML brief format:
                 req.write(websearch_templates.tmpl_record_format_htmlbrief_header(
                     ln = ln))
                 for irec in range(irec_max, irec_min, -1):
                     row_number = jrec+irec_max-irec
                     recid = recIDs[irec]
                     if relevances and relevances[irec]:
                         relevance = relevances[irec]
                     else:
                         relevance = ''
                     record = print_record(recIDs[irec], format, ot, ln, search_pattern=search_pattern,
                                                   user_info=user_info, verbose=verbose)
 
                     req.write(websearch_templates.tmpl_record_format_htmlbrief_body(
                         ln = ln,
                         recid = recid,
                         row_number = row_number,
                         relevance = relevance,
                         record = record,
                         relevances_prologue = relevances_prologue,
                         relevances_epilogue = relevances_epilogue,
                         ))
                 req.write(websearch_templates.tmpl_record_format_htmlbrief_footer(
                     ln = ln))
 
             elif format.startswith("hd"):
                 # HTML detailed format:
                 for irec in range(irec_max, irec_min, -1):
                     unordered_tabs = get_detailed_page_tabs(get_colID(guess_primary_collection_of_a_record(recIDs[irec])),
                                                             recIDs[irec], ln=ln)
                     ordered_tabs_id = [(tab_id, values['order']) for (tab_id, values) in unordered_tabs.iteritems()]
                     ordered_tabs_id.sort(lambda x,y: cmp(x[1],y[1]))
                     link_ln = ''
                     if ln != CFG_SITE_LANG:
                         link_ln = '?ln=%s' % ln
                     if CFG_WEBSEARCH_USE_ALEPH_SYSNOS:
                         recid_to_display = get_fieldvalues(recIDs[irec], CFG_BIBUPLOAD_EXTERNAL_SYSNO_TAG)[0]
                     else:
                         recid_to_display = recIDs[irec]
                     tabs = [(unordered_tabs[tab_id]['label'], \
                              '%s/record/%s/%s%s' % (CFG_SITE_URL, recid_to_display, tab_id, link_ln), \
                              tab_id == tab,
                              unordered_tabs[tab_id]['enabled']) \
                             for (tab_id, order) in ordered_tabs_id
                             if unordered_tabs[tab_id]['visible'] == True]
 
                     content = ''
                     # load content
                     if tab == 'usage':
                         req.write(webstyle_templates.detailed_record_container_top(recIDs[irec],
                                                                                    tabs,
                                                                                    ln))
                         r = calculate_reading_similarity_list(recIDs[irec], "downloads")
                         downloadsimilarity = None
                         downloadhistory = None
                         #if r:
                         #    downloadsimilarity = r
                         if CFG_BIBRANK_SHOW_DOWNLOAD_GRAPHS:
                             downloadhistory = create_download_history_graph_and_box(recIDs[irec], ln)
 
                         r = calculate_reading_similarity_list(recIDs[irec], "pageviews")
                         viewsimilarity = None
                         if r: viewsimilarity = r
                         content = websearch_templates.tmpl_detailed_record_statistics(recIDs[irec],
                                                                                       ln,
                                                                                       downloadsimilarity=downloadsimilarity,
                                                                                       downloadhistory=downloadhistory,
                                                                                       viewsimilarity=viewsimilarity)
                         req.write(content)
                         req.write(webstyle_templates.detailed_record_container_bottom(recIDs[irec],
                                                                                       tabs,
                                                                                       ln))
                     elif tab == 'citations':
                         recid = recIDs[irec]
                         req.write(webstyle_templates.detailed_record_container_top(recid,
                                                                                    tabs,
                                                                                    ln))
                         req.write(websearch_templates.tmpl_detailed_record_citations_prologue(recid, ln))
 
                         # Citing
                         citinglist = []
                         r = calculate_cited_by_list(recid)
                         if r:
                             citinglist = r
                         req.write(websearch_templates.tmpl_detailed_record_citations_citing_list(recid,
                                                                                        ln,
                                                                                        citinglist=citinglist))
                         # Self-cited
                         selfcited = get_self_cited_by(recid)
                         req.write(websearch_templates.tmpl_detailed_record_citations_self_cited(recid,
                                   ln, selfcited=selfcited, citinglist=citinglist))
                         # Co-cited
                         s = calculate_co_cited_with_list(recid)
                         cociting = None
                         if s:
                             cociting = s
                         req.write(websearch_templates.tmpl_detailed_record_citations_co_citing(recid,
                                                                                                ln,
                                                                                                cociting=cociting))
                         # Citation history
                         citationhistory = None
                         if r:
                             citationhistory = create_citation_history_graph_and_box(recid, ln)
                         #debug
                         if verbose > 3:
                             print_warning(req, "Citation graph debug: "+str(len(citationhistory)))
 
                         req.write(websearch_templates.tmpl_detailed_record_citations_citation_history(recid, ln, citationhistory))
                         req.write(websearch_templates.tmpl_detailed_record_citations_epilogue(recid, ln))
                         req.write(webstyle_templates.detailed_record_container_bottom(recid,
                                                                                       tabs,
                                                                                       ln))
                     elif tab == 'references':
                         req.write(webstyle_templates.detailed_record_container_top(recIDs[irec],
                                                                                    tabs,
                                                                                    ln))
                         req.write(format_record(recIDs[irec], 'HDREF', ln=ln, user_info=user_info, verbose=verbose))
                         req.write(webstyle_templates.detailed_record_container_bottom(recIDs[irec],
                                                                                       tabs,
                                                                                       ln))
                     elif tab == 'keywords':
                         from bibclassify_webinterface import \
                             record_get_keywords, get_sorting_options, \
                             generate_keywords, get_keywords_body
                         from invenio.webinterface_handler import wash_urlargd
                         form = req.form
                         argd = wash_urlargd(form, {
                             'generate': (str, 'no'),
                             'sort': (str, 'occurrences'),
                             'type': (str, 'tagcloud'),
                             'numbering': (str, 'off'),
                             })
                         recid = recIDs[irec]
 
                         req.write(webstyle_templates.detailed_record_container_top(recid,
                             tabs, ln))
 
                         if argd['generate'] == 'yes':
                             # The user asked to generate the keywords.
                             keywords = generate_keywords(req, recid)
                         else:
                             # Get the keywords contained in the MARC.
                             keywords = record_get_keywords(recid, argd)
 
                         if keywords:
                             req.write(get_sorting_options(argd, keywords))
                         elif argd['sort'] == 'related' and not keywords:
                             req.write('You may want to run BibIndex.')
 
                         # Output the keywords or the generate button.
                         get_keywords_body(keywords, req, recid, argd)
 
                         req.write(webstyle_templates.detailed_record_container_bottom(recid,
                             tabs, ln))
                     else:
                         # Metadata tab
                         req.write(webstyle_templates.detailed_record_container_top(recIDs[irec],
                                                                                    tabs,
                                                                                    ln,
                                                                                    show_short_rec_p=False))
                         creationdate = None
                         modificationdate = None
                         if record_exists(recIDs[irec]) == 1:
                             creationdate = get_creation_date(recIDs[irec])
                             modificationdate = get_modification_date(recIDs[irec])
 
                         content = print_record(recIDs[irec], format, ot, ln,
                                                search_pattern=search_pattern,
                                                user_info=user_info, verbose=verbose)
                         content = websearch_templates.tmpl_detailed_record_metadata(
                             recID = recIDs[irec],
                             ln = ln,
                             format = format,
                             creationdate = creationdate,
                             modificationdate = modificationdate,
                             content = content)
                         req.write(content)
 
                         req.write(webstyle_templates.detailed_record_container_bottom(recIDs[irec],
                                                                                       tabs,
                                                                                       ln,
                                                                                       creationdate=creationdate,
                                                                                       modificationdate=modificationdate,
                                                                                       show_short_rec_p=False))
 
                         if len(tabs) > 0:
                             # Add the mini box at bottom of the page
                             if CFG_WEBCOMMENT_ALLOW_REVIEWS:
                                 from invenio.webcomment import get_mini_reviews
                                 reviews = get_mini_reviews(recid = recIDs[irec], ln=ln)
                             else:
                                 reviews = ''
                             actions = format_record(recIDs[irec], 'HDACT', ln=ln, user_info=user_info, verbose=verbose)
                             files = format_record(recIDs[irec], 'HDFILE', ln=ln, user_info=user_info, verbose=verbose)
                             req.write(webstyle_templates.detailed_record_mini_panel(recIDs[irec],
                                                                                     ln,
                                                                                     format,
                                                                                     files=files,
                                                                                     reviews=reviews,
                                                                                     actions=actions))
             else:
                 # Other formats
                 for irec in range(irec_max, irec_min, -1):
                     req.write(print_record(recIDs[irec], format, ot, ln,
                                            search_pattern=search_pattern,
                                            user_info=user_info, verbose=verbose))
 
     else:
         print_warning(req, _("Use different search terms."))
 
 def print_records_prologue(req, format):
     """
     Print the appropriate prologue for list of records in the given
     format.
     """
     prologue = "" # no prologue needed for HTML or Text formats
     if format.startswith('xm'):
         prologue = websearch_templates.tmpl_xml_marc_prologue()
     elif format.startswith('xn'):
         prologue = websearch_templates.tmpl_xml_nlm_prologue()
     elif format.startswith('xw'):
         prologue = websearch_templates.tmpl_xml_refworks_prologue()
     elif format.startswith('xr'):
         prologue = websearch_templates.tmpl_xml_rss_prologue()
     elif format.startswith('xe'):
         prologue = websearch_templates.tmpl_xml_endnote_prologue()
     elif format.startswith('xo'):
         prologue = websearch_templates.tmpl_xml_mods_prologue()
     elif format.startswith('x'):
         prologue = websearch_templates.tmpl_xml_default_prologue()
     req.write(prologue)
 
 def print_records_epilogue(req, format):
     """
     Print the appropriate epilogue for list of records in the given
     format.
     """
     epilogue = "" # no epilogue needed for HTML or Text formats
     if format.startswith('xm'):
         epilogue = websearch_templates.tmpl_xml_marc_epilogue()
     elif format.startswith('xn'):
         epilogue = websearch_templates.tmpl_xml_nlm_epilogue()
     elif format.startswith('xw'):
         epilogue = websearch_templates.tmpl_xml_refworks_epilogue()
     elif format.startswith('xr'):
         epilogue = websearch_templates.tmpl_xml_rss_epilogue()
     elif format.startswith('xe'):
         epilogue = websearch_templates.tmpl_xml_endnote_epilogue()
     elif format.startswith('xo'):
         epilogue = websearch_templates.tmpl_xml_mods_epilogue()
     elif format.startswith('x'):
         epilogue = websearch_templates.tmpl_xml_default_epilogue()
     req.write(epilogue)
 
 def get_record(recid):
     """Directly the record object corresponding to the recid."""
     from marshal import loads, dumps
     from zlib import compress, decompress
     if CFG_BIBUPLOAD_SERIALIZE_RECORD_STRUCTURE:
         value = run_sql('SELECT value FROM bibfmt WHERE id_bibrec=%s AND FORMAT=\'recstruct\'',  (recid, ))
         if value:
             try:
                 return loads(decompress(value[0][0]))
             except:
                 ### In case of corruption, let's rebuild it!
                 pass
     return create_record(print_record(recid, 'xm'))[0]
 
 def print_record(recID, format='hb', ot='', ln=CFG_SITE_LANG, decompress=zlib.decompress,
                  search_pattern=None, user_info=None, verbose=0):
     """Prints record 'recID' formatted accoding to 'format'."""
 
     if format == 'recstruct':
         return get_record(recID)
 
     _ = gettext_set_language(ln)
 
     out = ""
 
     # sanity check:
     record_exist_p = record_exists(recID)
     if record_exist_p == 0: # doesn't exist
         return out
 
     # New Python BibFormat procedure for formatting
     # Old procedure follows further below
     # We must still check some special formats, but these
     # should disappear when BibFormat improves.
     if not (CFG_BIBFORMAT_USE_OLD_BIBFORMAT \
             or format.lower().startswith('t') \
             or format.lower().startswith('hm') \
             or str(format[0:3]).isdigit() \
             or ot):
 
         # Unspecified format is hd
         if format == '':
             format = 'hd'
 
         if record_exist_p == -1 and get_output_format_content_type(format) == 'text/html':
             # HTML output displays a default value for deleted records.
             # Other format have to deal with it.
             out += _("The record has been deleted.")
         else:
             out += call_bibformat(recID, format, ln, search_pattern=search_pattern,
                                   user_info=user_info, verbose=verbose)
 
             # at the end of HTML brief mode, print the "Detailed record" functionality:
             if format.lower().startswith('hb') and \
                    format.lower() != 'hb_p':
                 out += websearch_templates.tmpl_print_record_brief_links(
                     ln = ln,
                     recID = recID,
                     )
         return out
 
     # Old PHP BibFormat procedure for formatting
     # print record opening tags, if needed:
     if format == "marcxml" or format == "oai_dc":
         out += "  <record>\n"
         out += "   <header>\n"
         for oai_id in get_fieldvalues(recID, CFG_OAI_ID_FIELD):
             out += "    <identifier>%s</identifier>\n" % oai_id
         out += "    <datestamp>%s</datestamp>\n" % get_modification_date(recID)
         out += "   </header>\n"
         out += "   <metadata>\n"
 
     if format.startswith("xm") or format == "marcxml":
         # look for detailed format existence:
         query = "SELECT value FROM bibfmt WHERE id_bibrec=%s AND format=%s"
         res = run_sql(query, (recID, format), 1)
         if res and record_exist_p == 1:
             # record 'recID' is formatted in 'format', so print it
             out += "%s" % decompress(res[0][0])
         else:
             # record 'recID' is not formatted in 'format' -- they are not in "bibfmt" table; so fetch all the data from "bibXXx" tables:
             if format == "marcxml":
                 out += """    <record xmlns="http://www.loc.gov/MARC21/slim">\n"""
                 out += "        <controlfield tag=\"001\">%d</controlfield>\n" % int(recID)
             elif format.startswith("xm"):
                 out += """    <record>\n"""
                 out += "        <controlfield tag=\"001\">%d</controlfield>\n" % int(recID)
             if record_exist_p == -1:
                 # deleted record, so display only OAI ID and 980:
                 oai_ids = get_fieldvalues(recID, CFG_OAI_ID_FIELD)
                 if oai_ids:
                     out += "<datafield tag=\"%s\" ind1=\"%s\" ind2=\"%s\"><subfield code=\"%s\">%s</subfield></datafield>\n" % \
                            (CFG_OAI_ID_FIELD[0:3], CFG_OAI_ID_FIELD[3:4], CFG_OAI_ID_FIELD[4:5], CFG_OAI_ID_FIELD[5:6], oai_ids[0])
                 out += "<datafield tag=\"980\" ind1=\"\" ind2=\"\"><subfield code=\"c\">DELETED</subfield></datafield>\n"
             else:
                 # controlfields
                 query = "SELECT b.tag,b.value,bb.field_number FROM bib00x AS b, bibrec_bib00x AS bb "\
                         "WHERE bb.id_bibrec=%s AND b.id=bb.id_bibxxx AND b.tag LIKE '00%%' "\
                         "ORDER BY bb.field_number, b.tag ASC"
                 res = run_sql(query, (recID, ))
                 for row in res:
                     field, value = row[0], row[1]
                     value = encode_for_xml(value)
                     out += """        <controlfield tag="%s" >%s</controlfield>\n""" % \
                            (encode_for_xml(field[0:3]), value)
                 # datafields
                 i = 1 # Do not process bib00x and bibrec_bib00x, as
                       # they are controlfields. So start at bib01x and
                       # bibrec_bib00x (and set i = 0 at the end of
                       # first loop)
                 for digit1 in range(0, 10):
                     for digit2 in range(i, 10):
                         bx = "bib%d%dx" % (digit1, digit2)
                         bibx = "bibrec_bib%d%dx" % (digit1, digit2)
                         query = "SELECT b.tag,b.value,bb.field_number FROM %s AS b, %s AS bb "\
                                 "WHERE bb.id_bibrec=%%s AND b.id=bb.id_bibxxx AND b.tag LIKE %%s"\
                                 "ORDER BY bb.field_number, b.tag ASC" % (bx, bibx)
                         res = run_sql(query, (recID, str(digit1)+str(digit2)+'%'))
                         field_number_old = -999
                         field_old = ""
                         for row in res:
                             field, value, field_number = row[0], row[1], row[2]
                             ind1, ind2 = field[3], field[4]
                             if ind1 == "_" or ind1 == "":
                                 ind1 = " "
                             if ind2 == "_" or ind2 == "":
                                 ind2 = " "
                             # print field tag
                             if field_number != field_number_old or field[:-1] != field_old[:-1]:
                                 if field_number_old != -999:
                                     out += """        </datafield>\n"""
                                 out += """        <datafield tag="%s" ind1="%s" ind2="%s">\n""" % \
                                            (encode_for_xml(field[0:3]), encode_for_xml(ind1), encode_for_xml(ind2))
                                 field_number_old = field_number
                                 field_old = field
                             # print subfield value
                             value = encode_for_xml(value)
                             out += """            <subfield code="%s">%s</subfield>\n""" % \
                                    (encode_for_xml(field[-1:]), value)
 
                         # all fields/subfields printed in this run, so close the tag:
                         if field_number_old != -999:
                             out += """        </datafield>\n"""
                     i = 0 # Next loop should start looking at bib%0 and bibrec_bib00x
             # we are at the end of printing the record:
             out += "    </record>\n"
 
     elif format == "xd" or format == "oai_dc":
         # XML Dublin Core format, possibly OAI -- select only some bibXXx fields:
         out += """    <dc xmlns="http://purl.org/dc/elements/1.1/"
                          xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
                          xsi:schemaLocation="http://purl.org/dc/elements/1.1/
                                              http://www.openarchives.org/OAI/1.1/dc.xsd">\n"""
         if record_exist_p == -1:
             out += ""
         else:
             for f in get_fieldvalues(recID, "041__a"):
                 out += "        <language>%s</language>\n" % f
 
             for f in get_fieldvalues(recID, "100__a"):
                 out += "        <creator>%s</creator>\n" % encode_for_xml(f)
 
             for f in get_fieldvalues(recID, "700__a"):
                 out += "        <creator>%s</creator>\n" % encode_for_xml(f)
 
             for f in get_fieldvalues(recID, "245__a"):
                 out += "        <title>%s</title>\n" % encode_for_xml(f)
 
             for f in get_fieldvalues(recID, "65017a"):
                 out += "        <subject>%s</subject>\n" % encode_for_xml(f)
 
             for f in get_fieldvalues(recID, "8564_u"):
                 out += "        <identifier>%s</identifier>\n" % encode_for_xml(f)
 
             for f in get_fieldvalues(recID, "520__a"):
                 out += "        <description>%s</description>\n" % encode_for_xml(f)
 
             out += "        <date>%s</date>\n" % get_creation_date(recID)
         out += "    </dc>\n"
 
     elif len(format) == 6 and str(format[0:3]).isdigit():
         # user has asked to print some fields only
         if format == "001":
             out += "<!--%s-begin-->%s<!--%s-end-->\n" % (format, recID, format)
         else:
             vals = get_fieldvalues(recID, format)
             for val in vals:
                 out += "<!--%s-begin-->%s<!--%s-end-->\n" % (format, val, format)
 
     elif format.startswith('t'):
         ## user directly asked for some tags to be displayed only
         if record_exist_p == -1:
             out += get_fieldvalues_alephseq_like(recID, ["001", CFG_OAI_ID_FIELD, "980"])
         else:
             out += get_fieldvalues_alephseq_like(recID, ot)
 
     elif format == "hm":
         if record_exist_p == -1:
             out += "\n<pre>" + cgi.escape(get_fieldvalues_alephseq_like(recID, ["001", CFG_OAI_ID_FIELD, "980"])) + "</pre>"
         else:
             out += "\n<pre>" + cgi.escape(get_fieldvalues_alephseq_like(recID, ot)) + "</pre>"
 
     elif format.startswith("h") and ot:
         ## user directly asked for some tags to be displayed only
         if record_exist_p == -1:
             out += "\n<pre>" + get_fieldvalues_alephseq_like(recID, ["001", CFG_OAI_ID_FIELD, "980"]) + "</pre>"
         else:
             out += "\n<pre>" + get_fieldvalues_alephseq_like(recID, ot) + "</pre>"
 
     elif format == "hd":
         # HTML detailed format
         if record_exist_p == -1:
             out += _("The record has been deleted.")
         else:
             # look for detailed format existence:
             query = "SELECT value FROM bibfmt WHERE id_bibrec=%s AND format=%s"
             res = run_sql(query, (recID, format), 1)
             if res:
                 # record 'recID' is formatted in 'format', so print it
                 out += "%s" % decompress(res[0][0])
             else:
                 # record 'recID' is not formatted in 'format', so try to call BibFormat on the fly or use default format:
                 out_record_in_format = call_bibformat(recID, format, ln, search_pattern=search_pattern,
                                                       user_info=user_info, verbose=verbose)
                 if out_record_in_format:
                     out += out_record_in_format
                 else:
                     out += websearch_templates.tmpl_print_record_detailed(
                              ln = ln,
                              recID = recID,
                            )
 
     elif format.startswith("hb_") or format.startswith("hd_"):
         # underscore means that HTML brief/detailed formats should be called on-the-fly; suitable for testing formats
         if record_exist_p == -1:
             out += _("The record has been deleted.")
         else:
             out += call_bibformat(recID, format, ln, search_pattern=search_pattern,
                                   user_info=user_info, verbose=verbose)
 
     elif format.startswith("hx"):
         # BibTeX format, called on the fly:
         if record_exist_p == -1:
             out += _("The record has been deleted.")
         else:
             out += call_bibformat(recID, format, ln, search_pattern=search_pattern,
                                   user_info=user_info, verbose=verbose)
 
     elif format.startswith("hs"):
         # for citation/download similarity navigation links:
         if record_exist_p == -1:
             out += _("The record has been deleted.")
         else:
             out += '<a href="%s">' % websearch_templates.build_search_url(recid=recID, ln=ln)
             # firstly, title:
             titles = get_fieldvalues(recID, "245__a")
             if titles:
                 for title in titles:
                     out += "<strong>%s</strong>" % title
             else:
                 # usual title not found, try conference title:
                 titles = get_fieldvalues(recID, "111__a")
                 if titles:
                     for title in titles:
                         out += "<strong>%s</strong>" % title
                 else:
                     # just print record ID:
                     out += "<strong>%s %d</strong>" % (get_field_i18nname("record ID", ln, False), recID)
             out += "</a>"
             # secondly, authors:
             authors = get_fieldvalues(recID, "100__a") + get_fieldvalues(recID, "700__a")
             if authors:
                 out += " - %s" % authors[0]
                 if len(authors) > 1:
                     out += " <em>et al</em>"
             # thirdly publication info:
             publinfos = get_fieldvalues(recID, "773__s")
             if not publinfos:
                 publinfos = get_fieldvalues(recID, "909C4s")
                 if not publinfos:
                     publinfos = get_fieldvalues(recID, "037__a")
                     if not publinfos:
                         publinfos = get_fieldvalues(recID, "088__a")
             if publinfos:
                 out += " - %s" % publinfos[0]
             else:
                 # fourthly publication year (if not publication info):
                 years = get_fieldvalues(recID, "773__y")
                 if not years:
                     years = get_fieldvalues(recID, "909C4y")
                     if not years:
                         years = get_fieldvalues(recID, "260__c")
                 if years:
                     out += " (%s)" % years[0]
     else:
         # HTML brief format by default
         if record_exist_p == -1:
             out += _("The record has been deleted.")
         else:
             query = "SELECT value FROM bibfmt WHERE id_bibrec=%s AND format=%s"
             res = run_sql(query, (recID, format))
             if res:
                 # record 'recID' is formatted in 'format', so print it
                 out += "%s" % decompress(res[0][0])
             else:
                 # record 'recID' is not formatted in 'format', so try to call BibFormat on the fly: or use default format:
                 if CFG_WEBSEARCH_CALL_BIBFORMAT:
                     out_record_in_format = call_bibformat(recID, format, ln, search_pattern=search_pattern,
                                                           user_info=user_info, verbose=verbose)
                     if out_record_in_format:
                         out += out_record_in_format
                     else:
                         out += websearch_templates.tmpl_print_record_brief(
                                  ln = ln,
                                  recID = recID,
                                )
                 else:
                     out += websearch_templates.tmpl_print_record_brief(
                              ln = ln,
                              recID = recID,
                            )
 
             # at the end of HTML brief mode, print the "Detailed record" functionality:
             if format == 'hp' or format.startswith("hb_") or format.startswith("hd_"):
                 pass # do nothing for portfolio and on-the-fly formats
             else:
                 out += websearch_templates.tmpl_print_record_brief_links(
                          ln = ln,
                          recID = recID,
                        )
 
     # print record closing tags, if needed:
     if format == "marcxml" or format == "oai_dc":
         out += "   </metadata>\n"
         out += "  </record>\n"
 
     return out
 
 def call_bibformat(recID, format="HD", ln=CFG_SITE_LANG, search_pattern=None, user_info=None, verbose=0):
     """
     Calls BibFormat and returns formatted record.
 
     BibFormat will decide by itself if old or new BibFormat must be used.
     """
 
     keywords = []
     if search_pattern is not None:
         units = create_basic_search_units(None, str(search_pattern), None)
         keywords = [unit[1] for unit in units if unit[0] != '-']
 
     return format_record(recID,
                          of=format,
                          ln=ln,
                          search_pattern=keywords,
                          user_info=user_info,
                          verbose=verbose)
 
 def log_query(hostname, query_args, uid=-1):
     """
     Log query into the query and user_query tables.
     Return id_query or None in case of problems.
     """
     id_query = None
     if uid >= 0:
         # log the query only if uid is reasonable
         res = run_sql("SELECT id FROM query WHERE urlargs=%s", (query_args,), 1)
         try:
             id_query = res[0][0]
         except:
             id_query = run_sql("INSERT INTO query (type, urlargs) VALUES ('r', %s)", (query_args,))
         if id_query:
             run_sql("INSERT INTO user_query (id_user, id_query, hostname, date) VALUES (%s, %s, %s, %s)",
                     (uid, id_query, hostname,
                      time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())))
     return id_query
 
 def log_query_info(action, p, f, colls, nb_records_found_total=-1):
     """Write some info to the log file for later analysis."""
     try:
         log = open(CFG_LOGDIR + "/search.log", "a")
         log.write(time.strftime("%Y%m%d%H%M%S#", time.localtime()))
         log.write(action+"#")
         log.write(p+"#")
         log.write(f+"#")
         for coll in colls[:-1]:
             log.write("%s," % coll)
         log.write("%s#" % colls[-1])
         log.write("%d" % nb_records_found_total)
         log.write("\n")
         log.close()
     except:
         pass
     return
 
 ### CALLABLES
 
 def perform_request_search(req=None, cc=CFG_SITE_NAME, c=None, p="", f="", rg=10, sf="", so="d", sp="", rm="", of="id", ot="", aas=0,
                            p1="", f1="", m1="", op1="", p2="", f2="", m2="", op2="", p3="", f3="", m3="", sc=0, jrec=0,
                            recid=-1, recidb=-1, sysno="", id=-1, idb=-1, sysnb="", action="", d1="",
                            d1y=0, d1m=0, d1d=0, d2="", d2y=0, d2m=0, d2d=0, dt="", verbose=0, ap=0, ln=CFG_SITE_LANG, ec=None, tab=""):
     """Perform search or browse request, without checking for
        authentication.  Return list of recIDs found, if of=id.
        Otherwise create web page.
 
        The arguments are as follows:
 
          req - mod_python Request class instance.
 
           cc - current collection (e.g. "ATLAS").  The collection the
                user started to search/browse from.
 
            c - collection list (e.g. ["Theses", "Books"]).  The
                collections user may have selected/deselected when
                starting to search from 'cc'.
 
            p - pattern to search for (e.g. "ellis and muon or kaon").
 
            f - field to search within (e.g. "author").
 
           rg - records in groups of (e.g. "10").  Defines how many hits
                per collection in the search results page are
                displayed.
 
           sf - sort field (e.g. "title").
 
           so - sort order ("a"=ascending, "d"=descending).
 
           sp - sort pattern (e.g. "CERN-") -- in case there are more
                values in a sort field, this argument tells which one
                to prefer
 
           rm - ranking method (e.g. "jif").  Defines whether results
                should be ranked by some known ranking method.
 
           of - output format (e.g. "hb").  Usually starting "h" means
                HTML output (and "hb" for HTML brief, "hd" for HTML
                detailed), "x" means XML output, "t" means plain text
                output, "id" means no output at all but to return list
                of recIDs found.  (Suitable for high-level API.)
 
           ot - output only these MARC tags (e.g. "100,700,909C0b").
                Useful if only some fields are to be shown in the
                output, e.g. for library to control some fields.
 
          aas - advanced search ("0" means no, "1" means yes).  Whether
                search was called from within the advanced search
                interface.
 
           p1 - first pattern to search for in the advanced search
                interface.  Much like 'p'.
 
           f1 - first field to search within in the advanced search
                interface.  Much like 'f'.
 
           m1 - first matching type in the advanced search interface.
                ("a" all of the words, "o" any of the words, "e" exact
                phrase, "p" partial phrase, "r" regular expression).
 
          op1 - first operator, to join the first and the second unit
                in the advanced search interface.  ("a" add, "o" or,
                "n" not).
 
           p2 - second pattern to search for in the advanced search
                interface.  Much like 'p'.
 
           f2 - second field to search within in the advanced search
                interface.  Much like 'f'.
 
           m2 - second matching type in the advanced search interface.
                ("a" all of the words, "o" any of the words, "e" exact
                phrase, "p" partial phrase, "r" regular expression).
 
          op2 - second operator, to join the second and the third unit
                in the advanced search interface.  ("a" add, "o" or,
                "n" not).
 
           p3 - third pattern to search for in the advanced search
                interface.  Much like 'p'.
 
           f3 - third field to search within in the advanced search
                interface.  Much like 'f'.
 
           m3 - third matching type in the advanced search interface.
                ("a" all of the words, "o" any of the words, "e" exact
                phrase, "p" partial phrase, "r" regular expression).
 
           sc - split by collection ("0" no, "1" yes).  Governs whether
                we want to present the results in a single huge list,
                or splitted by collection.
 
         jrec - jump to record (e.g. "234").  Used for navigation
                inside the search results.
 
        recid - display record ID (e.g. "20000").  Do not
                search/browse but go straight away to the Detailed
                record page for the given recID.
 
       recidb - display record ID bis (e.g. "20010").  If greater than
                'recid', then display records from recid to recidb.
                Useful for example for dumping records from the
                database for reformatting.
 
        sysno - display old system SYS number (e.g. "").  If you
                migrate to CDS Invenio from another system, and store your
                old SYS call numbers, you can use them instead of recid
                if you wish so.
 
           id - the same as recid, in case recid is not set.  For
                backwards compatibility.
 
          idb - the same as recid, in case recidb is not set.  For
                backwards compatibility.
 
        sysnb - the same as sysno, in case sysno is not set.  For
                backwards compatibility.
 
       action - action to do.  "SEARCH" for searching, "Browse" for
                browsing.  Default is to search.
 
           d1 - first datetime in full YYYY-mm-dd HH:MM:DD format
                (e.g. "1998-08-23 12:34:56"). Useful for search limits
                on creation/modification date (see 'dt' argument
                below).  Note that 'd1' takes precedence over d1y, d1m,
                d1d if these are defined.
 
          d1y - first date's year (e.g. "1998").  Useful for search
                limits on creation/modification date.
 
          d1m - first date's month (e.g. "08").  Useful for search
                limits on creation/modification date.
 
          d1d - first date's day (e.g. "23").  Useful for search
                limits on creation/modification date.
 
           d2 - second datetime in full YYYY-mm-dd HH:MM:DD format
                (e.g. "1998-09-02 12:34:56"). Useful for search limits
                on creation/modification date (see 'dt' argument
                below).  Note that 'd2' takes precedence over d2y, d2m,
                d2d if these are defined.
 
          d2y - second date's year (e.g. "1998").  Useful for search
                limits on creation/modification date.
 
          d2m - second date's month (e.g. "09").  Useful for search
                limits on creation/modification date.
 
          d2d - second date's day (e.g. "02").  Useful for search
                limits on creation/modification date.
 
           dt - first and second date's type (e.g. "c").  Specifies
                whether to search in creation dates ("c") or in
                modification dates ("m").  When dt is not set and d1*
                and d2* are set, the default is "c".
 
      verbose - verbose level (0=min, 9=max).  Useful to print some
                internal information on the searching process in case
                something goes wrong.
 
           ap - alternative patterns (0=no, 1=yes).  In case no exact
                match is found, the search engine can try alternative
                patterns e.g. to replace non-alphanumeric characters by
                a boolean query.  ap defines if this is wanted.
 
           ln - language of the search interface (e.g. "en").  Useful
                for internationalization.
 
           ec - list of external search engines to search as well
                (e.g. "SPIRES HEP").
     """
 
     selected_external_collections_infos = None
 
     # wash output format:
     of = wash_output_format(of)
 
+    # raise an exception when trying to print out html or xml from the cli
+    if of.startswith("h") or of.startswith("x"):
+        assert req
+
     # for every search engine request asking for an HTML output, we
     # first regenerate cache of collection and field I18N names if
     # needed; so that later we won't bother checking timestamps for
     # I18N names at all:
     if of.startswith("h"):
         collection_i18nname_cache.recreate_cache_if_needed()
         field_i18nname_cache.recreate_cache_if_needed()
 
     # wash all arguments requiring special care
     try:
         (cc, colls_to_display, colls_to_search, hosted_colls, wash_colls_debug) = wash_colls(cc, c, sc, verbose) # which colls to search and to display?
     except InvenioWebSearchUnknownCollectionError, exc:
         colname = exc.colname
         if of.startswith("h"):
             page_start(req, of, cc, aas, ln, getUid(req),
                        websearch_templates.tmpl_collection_not_found_page_title(colname, ln))
             req.write(websearch_templates.tmpl_collection_not_found_page_body(colname, ln))
             return page_end(req, of, ln)
         elif of == "id":
             return []
         elif of.startswith("x"):
             # Print empty, but valid XML
             print_records_prologue(req, of)
             print_records_epilogue(req, of)
             return page_end(req, of, ln)
         else:
             return page_end(req, of, ln)
 
     p = wash_pattern(p)
     f = wash_field(f)
     p1 = wash_pattern(p1)
     f1 = wash_field(f1)
     p2 = wash_pattern(p2)
     f2 = wash_field(f2)
     p3 = wash_pattern(p3)
     f3 = wash_field(f3)
     datetext1, datetext2 = wash_dates(d1, d1y, d1m, d1d, d2, d2y, d2m, d2d)
 
     # wash ranking method:
     if not is_method_valid(None, rm):
         rm = ""
 
     _ = gettext_set_language(ln)
 
     # backwards compatibility: id, idb, sysnb -> recid, recidb, sysno (if applicable)
     if sysnb != "" and sysno == "":
         sysno = sysnb
     if id > 0 and recid == -1:
         recid = id
     if idb > 0 and recidb == -1:
         recidb = idb
     # TODO deduce passed search limiting criterias (if applicable)
     pl, pl_in_url = "", "" # no limits by default
     if action != "browse" and req and not isinstance(req, cStringIO.OutputType) \
            and req.args: # we do not want to add options while browsing or while calling via command-line
         fieldargs = cgi.parse_qs(req.args)
         for fieldcode in get_fieldcodes():
             if fieldargs.has_key(fieldcode):
                 for val in fieldargs[fieldcode]:
                     pl += "+%s:\"%s\" " % (fieldcode, val)
                     pl_in_url += "&amp;%s=%s" % (urllib.quote(fieldcode), urllib.quote(val))
     # deduce recid from sysno argument (if applicable):
     if sysno: # ALEPH SYS number was passed, so deduce DB recID for the record:
         recid = get_mysql_recid_from_aleph_sysno(sysno)
         if recid is None:
             recid = 0 # use recid 0 to indicate that this sysno does not exist
     # deduce collection we are in (if applicable):
     if recid > 0:
         referer = None
         if req:
             referer = req.headers_in.get('Referer')
         cc = guess_collection_of_a_record(recid, referer)
     # deduce user id (if applicable):
     try:
         uid = getUid(req)
     except:
         uid = 0
     ## 0 - start output
     if recid >= 0: # recid can be 0 if deduced from sysno and if such sysno does not exist
         ## 1 - detailed record display
         title, description, keywords = \
                websearch_templates.tmpl_record_page_header_content(req, recid, ln)
 
         if req is not None and not req.header_only:
             page_start(req, of, cc, aas, ln, uid, title, description, keywords, recid, tab)
         # Default format is hb but we are in detailed -> change 'of'
         if of == "hb":
             of = "hd"
         if record_exists(recid):
             if recidb <= recid: # sanity check
                 recidb = recid + 1
             if of == "id":
                 return [recidx for recidx in range(recid, recidb) if record_exists(recidx)]
             else:
                 print_records(req, range(recid, recidb), -1, -9999, of, ot, ln, search_pattern=p, verbose=verbose, tab=tab)
             if req and of.startswith("h"): # register detailed record page view event
                 client_ip_address = str(req.remote_ip)
                 register_page_view_event(recid, uid, client_ip_address)
         else: # record does not exist
             if of == "id":
                 return []
             elif of.startswith("x"):
                 # Print empty, but valid XML
                 print_records_prologue(req, of)
                 print_records_epilogue(req, of)
             elif of.startswith("h"):
                 if req.header_only:
                     raise apache.SERVER_RETURN, apache.HTTP_NOT_FOUND
                 else:
                     print_warning(req, _("Requested record does not seem to exist."))
 
     elif action == "browse":
         ## 2 - browse needed
         of = 'hb'
         page_start(req, of, cc, aas, ln, uid, _("Browse"), p=create_page_title_search_pattern_info(p, p1, p2, p3))
         req.write(create_search_box(cc, colls_to_display, p, f, rg, sf, so, sp, rm, of, ot, aas, ln, p1, f1, m1, op1,
                                     p2, f2, m2, op2, p3, f3, m3, sc, pl, d1y, d1m, d1d, d2y, d2m, d2d, dt, jrec, ec, action))
         try:
             if aas == 1 or (p1 or p2 or p3):
                 browse_pattern(req, colls_to_search, p1, f1, rg, ln)
                 browse_pattern(req, colls_to_search, p2, f2, rg, ln)
                 browse_pattern(req, colls_to_search, p3, f3, rg, ln)
             else:
                 browse_pattern(req, colls_to_search, p, f, rg, ln)
         except:
             register_exception(req=req, alert_admin=True)
             if of.startswith("h"):
                 req.write(create_error_box(req, verbose=verbose, ln=ln))
             elif of.startswith("x"):
                 # Print empty, but valid XML
                 print_records_prologue(req, of)
                 print_records_epilogue(req, of)
             return page_end(req, of, ln)
 
     elif rm and p.startswith("recid:"):
         ## 3-ter - similarity search or citation search needed
         if req and not req.header_only:
             page_start(req, of, cc, aas, ln, uid, _("Search Results"), p=create_page_title_search_pattern_info(p, p1, p2, p3))
         if of.startswith("h"):
             req.write(create_search_box(cc, colls_to_display, p, f, rg, sf, so, sp, rm, of, ot, aas, ln, p1, f1, m1, op1,
                                         p2, f2, m2, op2, p3, f3, m3, sc, pl, d1y, d1m, d1d, d2y, d2m, d2d, dt, jrec, ec, action))
         if record_exists(p[6:]) != 1:
             # record does not exist
             if of.startswith("h"):
                 if req.header_only:
                     raise apache.SERVER_RETURN, apache.HTTP_NOT_FOUND
                 else:
                     print_warning(req, "Requested record does not seem to exist.")
             if of == "id":
                 return []
             elif of.startswith("x"):
                 # Print empty, but valid XML
                 print_records_prologue(req, of)
                 print_records_epilogue(req, of)
         else:
             # record well exists, so find similar ones to it
             t1 = os.times()[4]
             results_similar_recIDs, results_similar_relevances, results_similar_relevances_prologue, results_similar_relevances_epilogue, results_similar_comments = \
                                     rank_records(rm, 0, get_collection_reclist(cc), string.split(p), verbose)
             if results_similar_recIDs:
                 t2 = os.times()[4]
                 cpu_time = t2 - t1
                 if of.startswith("h"):
                     req.write(print_search_info(p, f, sf, so, sp, rm, of, ot, cc, len(results_similar_recIDs),
                                                 jrec, rg, aas, ln, p1, p2, p3, f1, f2, f3, m1, m2, m3, op1, op2,
                                                 sc, pl_in_url,
                                                 d1y, d1m, d1d, d2y, d2m, d2d, dt, cpu_time))
                     print_warning(req, results_similar_comments)
                     print_records(req, results_similar_recIDs, jrec, rg, of, ot, ln,
                                   results_similar_relevances, results_similar_relevances_prologue, results_similar_relevances_epilogue, search_pattern=p, verbose=verbose)
                 elif of=="id":
                     return results_similar_recIDs
                 elif of.startswith("x"):
                     print_records(req, results_similar_recIDs, jrec, rg, of, ot, ln,
                                   results_similar_relevances, results_similar_relevances_prologue, results_similar_relevances_epilogue, search_pattern=p, verbose=verbose)
             else:
                 # rank_records failed and returned some error message to display:
                 if of.startswith("h"):
                     print_warning(req, results_similar_relevances_prologue)
                     print_warning(req, results_similar_relevances_epilogue)
                     print_warning(req, results_similar_comments)
                 if of == "id":
                     return []
                 elif of.startswith("x"):
                     # Print empty, but valid XML
                     print_records_prologue(req, of)
                     print_records_epilogue(req, of)
 
     elif p.startswith("cocitedwith:"):  #WAS EXPERIMENTAL
         ## 3-terter - cited by search needed
         page_start(req, of, cc, aas, ln, uid, _("Search Results"), p=create_page_title_search_pattern_info(p, p1, p2, p3))
         if of.startswith("h"):
             req.write(create_search_box(cc, colls_to_display, p, f, rg, sf, so, sp, rm, of, ot, aas, ln, p1, f1, m1, op1,
                                         p2, f2, m2, op2, p3, f3, m3, sc, pl, d1y, d1m, d1d, d2y, d2m, d2d, dt, jrec, ec, action))
         recID = p[12:]
         if record_exists(recID) != 1:
             # record does not exist
             if of.startswith("h"):
                 print_warning(req, "Requested record does not seem to exist.")
             if of == "id":
                 return []
             elif of.startswith("x"):
                 # Print empty, but valid XML
                 print_records_prologue(req, of)
                 print_records_epilogue(req, of)
         else:
             # record well exists, so find co-cited ones:
             t1 = os.times()[4]
             results_cocited_recIDs = map(lambda x: x[0], calculate_co_cited_with_list(int(recID)))
             if results_cocited_recIDs:
                 t2 = os.times()[4]
                 cpu_time = t2 - t1
                 if of.startswith("h"):
                     req.write(print_search_info(p, f, sf, so, sp, rm, of, ot, CFG_SITE_NAME, len(results_cocited_recIDs),
                                                 jrec, rg, aas, ln, p1, p2, p3, f1, f2, f3, m1, m2, m3, op1, op2,
                                                 sc, pl_in_url,
                                                 d1y, d1m, d1d, d2y, d2m, d2d, dt, cpu_time))
                     print_records(req, results_cocited_recIDs, jrec, rg, of, ot, ln, search_pattern=p, verbose=verbose)
                 elif of=="id":
                     return results_cocited_recIDs
                 elif of.startswith("x"):
                     print_records(req, results_cocited_recIDs, jrec, rg, of, ot, ln, search_pattern=p, verbose=verbose)
 
             else:
                 # cited rank_records failed and returned some error message to display:
                 if of.startswith("h"):
                     print_warning(req, "nothing found")
                 if of == "id":
                     return []
                 elif of.startswith("x"):
                     # Print empty, but valid XML
                     print_records_prologue(req, of)
                     print_records_epilogue(req, of)
     else:
         ## 3 - common search needed
         query_in_cache = False
         query_representation_in_cache = repr((p,f,colls_to_search))
         page_start(req, of, cc, aas, ln, uid, p=create_page_title_search_pattern_info(p, p1, p2, p3))
 
         if of.startswith("h") and verbose and wash_colls_debug:
             print_warning(req, "wash_colls debugging info : %s" % wash_colls_debug)
 
         # search into the hosted collections only if the output format is html or xml
         if hosted_colls and (of.startswith("h") or of.startswith("x")) and not p.startswith("recid:"):
 
             # hosted_colls_results : the hosted collections' searches that did not timeout
             # hosted_colls_timeouts : the hosted collections' searches that timed out and will be searched later on again
             (hosted_colls_results, hosted_colls_timeouts) = calculate_hosted_collections_results(req, [p, p1, p2, p3], f, hosted_colls, verbose, ln, CFG_HOSTED_COLLECTION_TIMEOUT_ANTE_SEARCH)
 
             # successful searches
             if hosted_colls_results:
                 hosted_colls_true_results = []
                 for result in hosted_colls_results:
                     # if the number of results is None or 0 (or False) then just do nothing
                     if result[1] == None or result[1] == False:
                         # these are the searches the returned no or zero results
                         if verbose:
                             print_warning(req, "Hosted collections (perform_search_request): %s returned no results" % result[0][1].name)
                     else:
                         # these are the searches that actually returned results on time
                         hosted_colls_true_results.append(result)
                         if verbose:
                             print_warning(req, "Hosted collections (perform_search_request): %s returned %s results in %s seconds" % (result[0][1].name, result[1], result[2]))
             else:
                 if verbose:
                     print_warning(req, "Hosted collections (perform_search_request): there were no hosted collections results to be printed at this time")
             if hosted_colls_timeouts:
                 if verbose:
                     for timeout in hosted_colls_timeouts:
                         print_warning(req, "Hosted collections (perform_search_request): %s timed out and will be searched again later" % timeout[0][1].name)
         # we need to know for later use if there were any hosted collections to be searched even if they weren't in the end
         elif hosted_colls and ((not (of.startswith("h") or of.startswith("x"))) or p.startswith("recid:")):
             (hosted_colls_results, hosted_colls_timeouts) = (None, None)
         else:
             if verbose:
                 print_warning(req, "Hosted collections (perform_search_request): there were no hosted collections to be searched")
 
         ## let's define some useful boolean variables:
         # True means there are actual or potential hosted collections results to be printed
         hosted_colls_actual_or_potential_results_p = not (not hosted_colls or not ((hosted_colls_results and hosted_colls_true_results) or hosted_colls_timeouts))
 
         # True means there are hosted collections timeouts to take care of later
         # (useful for more accurate printing of results later)
         hosted_colls_potential_results_p = not (not hosted_colls or not hosted_colls_timeouts)
 
         # True means we only have hosted collections to deal with
         only_hosted_colls_actual_or_potential_results_p = not colls_to_search and hosted_colls_actual_or_potential_results_p
 
         if of.startswith("h"):
             req.write(create_search_box(cc, colls_to_display, p, f, rg, sf, so, sp, rm, of, ot, aas, ln, p1, f1, m1, op1,
                                         p2, f2, m2, op2, p3, f3, m3, sc, pl, d1y, d1m, d1d, d2y, d2m, d2d, dt, jrec, ec, action))
         t1 = os.times()[4]
         results_in_any_collection = HitSet()
         if aas == 1 or (p1 or p2 or p3):
             ## 3A - advanced search
             try:
                 results_in_any_collection = search_pattern_parenthesised(req, p1, f1, m1, ap=ap, of=of, verbose=verbose, ln=ln)
                 if len(results_in_any_collection) == 0:
                     if of.startswith("h"):
                         perform_external_collection_search(req, cc, [p, p1, p2, p3], f, ec, verbose, ln, selected_external_collections_infos)
                     elif of.startswith("x"):
                         # Print empty, but valid XML
                         print_records_prologue(req, of)
                         print_records_epilogue(req, of)
                     return page_end(req, of, ln)
                 if p2:
                     results_tmp = search_pattern_parenthesised(req, p2, f2, m2, ap=ap, of=of, verbose=verbose, ln=ln)
                     if op1 == "a": # add
                         results_in_any_collection.intersection_update(results_tmp)
                     elif op1 == "o": # or
                         results_in_any_collection.union_update(results_tmp)
                     elif op1 == "n": # not
                         results_in_any_collection.difference_update(results_tmp)
                     else:
                         if of.startswith("h"):
                             print_warning(req, "Invalid set operation %s." % cgi.escape(op1), "Error")
                     if len(results_in_any_collection) == 0:
                         if of.startswith("h"):
                             perform_external_collection_search(req, cc, [p, p1, p2, p3], f, ec, verbose, ln, selected_external_collections_infos)
                         elif of.startswith("x"):
                             # Print empty, but valid XML
                             print_records_prologue(req, of)
                             print_records_epilogue(req, of)
                         return page_end(req, of, ln)
                 if p3:
                     results_tmp = search_pattern_parenthesised(req, p3, f3, m3, ap=ap, of=of, verbose=verbose, ln=ln)
                     if op2 == "a": # add
                         results_in_any_collection.intersection_update(results_tmp)
                     elif op2 == "o": # or
                         results_in_any_collection.union_update(results_tmp)
                     elif op2 == "n": # not
                         results_in_any_collection.difference_update(results_tmp)
                     else:
                         if of.startswith("h"):
                             print_warning(req, "Invalid set operation %s." % cgi.escape(op2), "Error")
             except:
                 register_exception(req=req, alert_admin=True)
                 if of.startswith("h"):
                     req.write(create_error_box(req, verbose=verbose, ln=ln))
                     perform_external_collection_search(req, cc, [p, p1, p2, p3], f, ec, verbose, ln, selected_external_collections_infos)
                 elif of.startswith("x"):
                     # Print empty, but valid XML
                     print_records_prologue(req, of)
                     print_records_epilogue(req, of)
 
                 return page_end(req, of, ln)
         else:
             ## 3B - simple search
             if search_results_cache.cache.has_key(query_representation_in_cache):
                 # query is not in the cache already, so reuse it:
                 query_in_cache = True
                 results_in_any_collection = search_results_cache.cache[query_representation_in_cache]
                 if verbose and of.startswith("h"):
                     print_warning(req, "Search stage 0: query found in cache, reusing cached results.")
             else:
                 try:
                     # added the display_nearest_terms_box parameter to avoid printing out the "Nearest terms in any collection"
                     # recommendations when there are results only in the hosted collections. Also added the if clause to avoid
                     # searching in case we know we only have actual or potential hosted collections results
                     if not only_hosted_colls_actual_or_potential_results_p:
                         results_in_any_collection = search_pattern_parenthesised(req, p, f, ap=ap, of=of, verbose=verbose, ln=ln, display_nearest_terms_box=not hosted_colls_actual_or_potential_results_p)
                 except:
                     register_exception(req=req, alert_admin=True)
                     if of.startswith("h"):
                         req.write(create_error_box(req, verbose=verbose, ln=ln))
                         perform_external_collection_search(req, cc, [p, p1, p2, p3], f, ec, verbose, ln, selected_external_collections_infos)
                     return page_end(req, of, ln)
 
         if len(results_in_any_collection) == 0 and not hosted_colls_actual_or_potential_results_p:
             if of.startswith("h"):
                 perform_external_collection_search(req, cc, [p, p1, p2, p3], f, ec, verbose, ln, selected_external_collections_infos)
             elif of.startswith("x"):
                 # Print empty, but valid XML
                 print_records_prologue(req, of)
                 print_records_epilogue(req, of)
             return page_end(req, of, ln)
 
         # store this search query results into search results cache if needed:
         if CFG_WEBSEARCH_SEARCH_CACHE_SIZE and not query_in_cache:
             if len(search_results_cache.cache) > CFG_WEBSEARCH_SEARCH_CACHE_SIZE:
                 search_results_cache.clear()
             search_results_cache.cache[query_representation_in_cache] = results_in_any_collection
             if verbose and of.startswith("h"):
                 print_warning(req, "Search stage 3: storing query results in cache.")
 
         # search stage 4: intersection with collection universe:
         try:
             # added the display_nearest_terms_box parameter to avoid printing out the "Nearest terms in any collection"
             # recommendations when there results only in the hosted collections. Also added the if clause to avoid
             # searching in case we know since the last stage that we have no results in any collection
             if len(results_in_any_collection) != 0:
                 results_final = intersect_results_with_collrecs(req, results_in_any_collection, colls_to_search, ap, of, verbose, ln, display_nearest_terms_box=not hosted_colls_actual_or_potential_results_p)
             else:
                 results_final = {}
         except:
             register_exception(req=req, alert_admin=True)
             if of.startswith("h"):
                 req.write(create_error_box(req, verbose=verbose, ln=ln))
                 perform_external_collection_search(req, cc, [p, p1, p2, p3], f, ec, verbose, ln, selected_external_collections_infos)
             return page_end(req, of, ln)
 
         if results_final == {} and not hosted_colls_actual_or_potential_results_p:
             if of.startswith("h"):
                 perform_external_collection_search(req, cc, [p, p1, p2, p3], f, ec, verbose, ln, selected_external_collections_infos)
             if of.startswith("x"):
                 # Print empty, but valid XML
                 print_records_prologue(req, of)
                 print_records_epilogue(req, of)
             return page_end(req, of, ln)
 
         # search stage 5: apply search option limits and restrictions:
         if datetext1 != "" and results_final != {}:
             if verbose and of.startswith("h"):
                 print_warning(req, "Search stage 5: applying time etc limits, from %s until %s..." % (datetext1, datetext2))
             try:
                 results_final = intersect_results_with_hitset(req,
                                                               results_final,
                                                               search_unit_in_bibrec(datetext1, datetext2, dt),
                                                               ap,
                                                               aptext= _("No match within your time limits, "
                                                                         "discarding this condition..."),
                                                               of=of)
             except:
                 register_exception(req=req, alert_admin=True)
                 if of.startswith("h"):
                     req.write(create_error_box(req, verbose=verbose, ln=ln))
                     perform_external_collection_search(req, cc, [p, p1, p2, p3], f, ec, verbose, ln, selected_external_collections_infos)
                 return page_end(req, of, ln)
             if results_final == {} and not hosted_colls_actual_or_potential_results_p:
                 if of.startswith("h"):
                     perform_external_collection_search(req, cc, [p, p1, p2, p3], f, ec, verbose, ln, selected_external_collections_infos)
                 #if of.startswith("x"):
                 #    # Print empty, but valid XML
                 #    print_records_prologue(req, of)
                 #    print_records_epilogue(req, of)
                 return page_end(req, of, ln)
 
         if pl and results_final != {}:
             pl = wash_pattern(pl)
             if verbose and of.startswith("h"):
                 print_warning(req, "Search stage 5: applying search pattern limit %s..." % cgi.escape(pl))
             try:
                 results_final = intersect_results_with_hitset(req,
                                                               results_final,
                                                               search_pattern_parenthesised(req, pl, ap=0, ln=ln),
                                                               ap,
                                                               aptext=_("No match within your search limits, "
                                                                        "discarding this condition..."),
                                                               of=of)
             except:
                 register_exception(req=req, alert_admin=True)
                 if of.startswith("h"):
                     req.write(create_error_box(req, verbose=verbose, ln=ln))
                     perform_external_collection_search(req, cc, [p, p1, p2, p3], f, ec, verbose, ln, selected_external_collections_infos)
                 return page_end(req, of, ln)
             if results_final == {} and not hosted_colls_actual_or_potential_results_p:
                 if of.startswith("h"):
                     perform_external_collection_search(req, cc, [p, p1, p2, p3], f, ec, verbose, ln, selected_external_collections_infos)
                 if of.startswith("x"):
                     # Print empty, but valid XML
                     print_records_prologue(req, of)
                     print_records_epilogue(req, of)
                 return page_end(req, of, ln)
 
         t2 = os.times()[4]
         cpu_time = t2 - t1
         ## search stage 6: display results:
         results_final_nb_total = 0
         results_final_nb = {} # will hold number of records found in each collection
                               # (in simple dict to display overview more easily)
         for coll in results_final.keys():
             results_final_nb[coll] = len(results_final[coll])
             #results_final_nb_total += results_final_nb[coll]
 
         # Now let us calculate results_final_nb_total more precisely,
         # in order to get the total number of "distinct" hits across
         # searched collections; this is useful because a record might
         # have been attributed to more than one primary collection; so
         # we have to avoid counting it multiple times.  The price to
         # pay for this accuracy of results_final_nb_total is somewhat
         # increased CPU time.
         if results_final.keys() == 1:
             # only one collection; no need to union them
             results_final_for_all_selected_colls = results_final.values()[0]
             results_final_nb_total = results_final_nb.values()[0]
         else:
             # okay, some work ahead to union hits across collections:
             results_final_for_all_selected_colls = HitSet()
             for coll in results_final.keys():
                 results_final_for_all_selected_colls.union_update(results_final[coll])
             results_final_nb_total = len(results_final_for_all_selected_colls)
 
         #if hosted_colls and (of.startswith("h") or of.startswith("x")):
         if hosted_colls_actual_or_potential_results_p:
             if hosted_colls_results:
                 for result in hosted_colls_true_results:
                     colls_to_search.append(result[0][1].name)
                     results_final_nb[result[0][1].name] = result[1]
                     results_final_nb_total += result[1]
                     cpu_time += result[2]
             if hosted_colls_timeouts:
                 for timeout in hosted_colls_timeouts:
                     colls_to_search.append(timeout[1].name)
                     # use -963 as a special number to identify the collections that timed out
                     results_final_nb[timeout[1].name] = -963
 
         # we continue past this point only if there is a hosted collection that has timed out and might offer potential results
         if results_final_nb_total ==0 and not hosted_colls_potential_results_p:
             if of.startswith("h"):
                 print_warning(req, "No match found, please enter different search terms.")
             elif of.startswith("x"):
                 # Print empty, but valid XML
                 print_records_prologue(req, of)
                 print_records_epilogue(req, of)
         else:
             # yes, some hits found: good!
             # collection list may have changed due to not-exact-match-found policy so check it out:
             for coll in results_final.keys():
                 if coll not in colls_to_search:
                     colls_to_search.append(coll)
             # print results overview:
             if of == "id":
                 # we have been asked to return list of recIDs
                 recIDs = list(results_final_for_all_selected_colls)
                 if sf: # do we have to sort?
                     recIDs = sort_records(req, recIDs, sf, so, sp, verbose, of)
                 elif rm: # do we have to rank?
                     results_final_for_all_colls_rank_records_output = rank_records(rm, 0, results_final_for_all_selected_colls,
                                                                                    string.split(p) + string.split(p1) +
                                                                                    string.split(p2) + string.split(p3), verbose)
                     if results_final_for_all_colls_rank_records_output[0]:
                         recIDs = results_final_for_all_colls_rank_records_output[0]
                 return recIDs
             elif of.startswith("h"):
                 if of not in ['hcs']:
                     # added the hosted_colls_potential_results_p parameter to help print out the overview more accurately
                     req.write(print_results_overview(req, colls_to_search, results_final_nb_total, results_final_nb, cpu_time, ln, ec, hosted_colls_potential_results_p=hosted_colls_potential_results_p))
                     selected_external_collections_infos = print_external_results_overview(req, cc, [p, p1, p2, p3], f, ec, verbose, ln)
             # print number of hits found for XML outputs:
             if of.startswith("x"):
                 req.write("<!-- Search-Engine-Total-Number-Of-Results: %s -->\n" % results_final_nb_total)
             # print records:
             if of in ['hcs']:
                 # feed the current search to be summarized:
                 from invenio.search_engine_summarizer import summarize_records
                 summarize_records(results_final_for_all_selected_colls, 'hcs', ln, p, f, req)
             else:
                 if len(colls_to_search)>1:
                     cpu_time = -1 # we do not want to have search time printed on each collection
                 print_records_prologue(req, of)
                 for coll in colls_to_search:
                     if results_final.has_key(coll) and len(results_final[coll]):
                         if of.startswith("h"):
                             req.write(print_search_info(p, f, sf, so, sp, rm, of, ot, coll, results_final_nb[coll],
                                                         jrec, rg, aas, ln, p1, p2, p3, f1, f2, f3, m1, m2, m3, op1, op2,
                                                         sc, pl_in_url,
                                                         d1y, d1m, d1d, d2y, d2m, d2d, dt, cpu_time))
                         results_final_recIDs = list(results_final[coll])
                         results_final_relevances = []
                         results_final_relevances_prologue = ""
                         results_final_relevances_epilogue = ""
                         if sf: # do we have to sort?
                             results_final_recIDs = sort_records(req, results_final_recIDs, sf, so, sp, verbose, of)
                         elif rm: # do we have to rank?
                             results_final_recIDs_ranked, results_final_relevances, results_final_relevances_prologue, results_final_relevances_epilogue, results_final_comments = \
                                                          rank_records(rm, 0, results_final[coll],
                                                                       string.split(p) + string.split(p1) +
                                                                       string.split(p2) + string.split(p3), verbose)
                             if of.startswith("h"):
                                 print_warning(req, results_final_comments)
                             if results_final_recIDs_ranked:
                                 results_final_recIDs = results_final_recIDs_ranked
                             else:
                                 # rank_records failed and returned some error message to display:
                                 print_warning(req, results_final_relevances_prologue)
                                 print_warning(req, results_final_relevances_epilogue)
                         print_records(req, results_final_recIDs, jrec, rg, of, ot, ln,
                                       results_final_relevances,
                                       results_final_relevances_prologue,
                                       results_final_relevances_epilogue,
                                       search_pattern=p,
                                       print_records_prologue_p=False,
                                       print_records_epilogue_p=False,
                                       verbose=verbose)
                         if of.startswith("h"):
                             req.write(print_search_info(p, f, sf, so, sp, rm, of, ot, coll, results_final_nb[coll],
                                                         jrec, rg, aas, ln, p1, p2, p3, f1, f2, f3, m1, m2, m3, op1, op2,
                                                         sc, pl_in_url,
                                                         d1y, d1m, d1d, d2y, d2m, d2d, dt, cpu_time, 1))
 
                 #if hosted_colls and (of.startswith("h") or of.startswith("x")):
                 if hosted_colls_actual_or_potential_results_p:
                     if hosted_colls_results:
                         # TODO: add a verbose message here
                         for result in hosted_colls_true_results:
                             if of.startswith("h"):
                                 req.write(print_hosted_search_info(p, f, sf, so, sp, rm, of, ot, result[0][1].name, results_final_nb[result[0][1].name],
                                                             jrec, rg, aas, ln, p1, p2, p3, f1, f2, f3, m1, m2, m3, op1, op2,
                                                             sc, pl_in_url,
                                                             d1y, d1m, d1d, d2y, d2m, d2d, dt, cpu_time))
-                            req.write(print_hosted_results(result[0], of, ln, req))
+                            req.write(print_hosted_results(url_and_engine=result[0], ln=ln, of=of, req=req, limit=rg))
                             if of.startswith("h"):
                                 req.write(print_hosted_search_info(p, f, sf, so, sp, rm, of, ot, result[0][1].name, results_final_nb[result[0][1].name],
                                                             jrec, rg, aas, ln, p1, p2, p3, f1, f2, f3, m1, m2, m3, op1, op2,
                                                             sc, pl_in_url,
                                                             d1y, d1m, d1d, d2y, d2m, d2d, dt, cpu_time, 1))
                     if hosted_colls_timeouts:
                         # TODO: add a verbose message here
                         # TODO: check if verbose messages still work when dealing with (re)calculations of timeouts
                         (hosted_colls_timeouts_results, hosted_colls_timeouts_timeouts) = do_calculate_hosted_collections_results(req, ln, None, verbose, None, hosted_colls_timeouts, CFG_HOSTED_COLLECTION_TIMEOUT_POST_SEARCH)
                         if hosted_colls_timeouts_results:
                             hosted_colls_timeouts_true_results = []
                             for result in hosted_colls_timeouts_results:
                                 if result[1] == None or result[1] == False:
                                     ## these are the searches the returned no or zero results
                                     ## also print a nearest terms box, in case this is the only
                                     ## collection being searched and it returns no results?
                                     if of.startswith("h"):
                                         req.write(print_hosted_search_info(p, f, sf, so, sp, rm, of, ot, result[0][1].name, -963,
                                                                     jrec, rg, aas, ln, p1, p2, p3, f1, f2, f3, m1, m2, m3, op1, op2,
                                                                     sc, pl_in_url,
                                                                     d1y, d1m, d1d, d2y, d2m, d2d, dt, cpu_time))
-                                        req.write(print_hosted_results(result[0], of, ln, req, no_records_found=True))
+                                        req.write(print_hosted_results(url_and_engine=result[0], ln=ln, of=of, req=req, no_records_found=True, limit=rg))
                                         req.write(print_hosted_search_info(p, f, sf, so, sp, rm, of, ot, result[0][1].name, -963,
                                                                     jrec, rg, aas, ln, p1, p2, p3, f1, f2, f3, m1, m2, m3, op1, op2,
                                                                     sc, pl_in_url,
                                                                     d1y, d1m, d1d, d2y, d2m, d2d, dt, cpu_time, 1))
                                 else:
                                     # these are the searches that actually returned results on time
                                     if of.startswith("h"):
                                         req.write(print_hosted_search_info(p, f, sf, so, sp, rm, of, ot, result[0][1].name, result[1],
                                                                     jrec, rg, aas, ln, p1, p2, p3, f1, f2, f3, m1, m2, m3, op1, op2,
                                                                     sc, pl_in_url,
                                                                     d1y, d1m, d1d, d2y, d2m, d2d, dt, cpu_time))
-                                    req.write(print_hosted_results(result[0], of, ln, req))
+                                    req.write(print_hosted_results(url_and_engine=result[0], ln=ln, of=of, req=req, limit=rg))
                                     if of.startswith("h"):
                                         req.write(print_hosted_search_info(p, f, sf, so, sp, rm, of, ot, result[0][1].name, result[1],
                                                                     jrec, rg, aas, ln, p1, p2, p3, f1, f2, f3, m1, m2, m3, op1, op2,
                                                                     sc, pl_in_url,
                                                                     d1y, d1m, d1d, d2y, d2m, d2d, dt, cpu_time, 1))
                         if hosted_colls_timeouts_timeouts:
                             for timeout in hosted_colls_timeouts_timeouts:
                                 if of.startswith("h"):
                                     req.write(print_hosted_search_info(p, f, sf, so, sp, rm, of, ot, timeout[0][1].name, -963,
                                                                 jrec, rg, aas, ln, p1, p2, p3, f1, f2, f3, m1, m2, m3, op1, op2,
                                                                 sc, pl_in_url,
                                                                 d1y, d1m, d1d, d2y, d2m, d2d, dt, cpu_time))
-                                    req.write(print_hosted_results(timeout[0], of, ln, req, timeout=True))
+                                    req.write(print_hosted_results(url_and_engine=timeout[0], ln=ln, of=of, req=req, search_timed_out=True, limit=rg))
                                     req.write(print_hosted_search_info(p, f, sf, so, sp, rm, of, ot, timeout[0][1].name, -963,
                                                                 jrec, rg, aas, ln, p1, p2, p3, f1, f2, f3, m1, m2, m3, op1, op2,
                                                                 sc, pl_in_url,
                                                                 d1y, d1m, d1d, d2y, d2m, d2d, dt, cpu_time, 1))
 
                 print_records_epilogue(req, of)
                 if f == "author" and of.startswith("h"):
                     req.write(create_similarly_named_authors_link_box(p, ln))
 
             # log query:
             try:
                 id_query = log_query(req.remote_host, req.args, uid)
                 if of.startswith("h") and id_query:
                     if not of in ['hcs']:
                         # display alert/RSS teaser for non-summary formats:
                         req.write(websearch_templates.tmpl_alert_rss_teaser_box_for_query(id_query, ln=ln))
             except:
                 # do not log query if req is None (used by CLI interface)
                 pass
             log_query_info("ss", p, f, colls_to_search, results_final_nb_total)
 
     # External searches
     if of.startswith("h"):
         if not of in ['hcs']:
             perform_external_collection_search(req, cc, [p, p1, p2, p3], f, ec, verbose, ln, selected_external_collections_infos)
 
     return page_end(req, of, ln)
 
 def perform_request_cache(req, action="show"):
     """Manipulates the search engine cache."""
     req.content_type = "text/html"
     req.send_http_header()
     req.write("<html>")
     out = ""
     out += "<h1>Search Cache</h1>"
     # clear cache if requested:
     if action == "clear":
         search_results_cache.clear()
     req.write(out)
     # show collection reclist cache:
     out = "<h3>Collection reclist cache</h3>"
     out += "- collection table last updated: %s" % get_table_update_time('collection')
     out += "<br />- reclist cache timestamp: %s" % collection_reclist_cache.timestamp
     out += "<br />- reclist cache contents:"
     out += "<blockquote>"
     for coll in collection_reclist_cache.cache.keys():
         if collection_reclist_cache.cache[coll]:
             out += "%s (%d)<br />" % (coll, len(collection_reclist_cache.cache[coll]))
     out += "</blockquote>"
     req.write(out)
     # show search results cache:
     out = "<h3>Search Cache</h3>"
     out += "- search cache usage: %d queries cached (max. ~%d)" % \
            (len(search_results_cache.cache), CFG_WEBSEARCH_SEARCH_CACHE_SIZE)
     if len(search_results_cache.cache):
         out += "<br />- search cache contents:"
         out += "<blockquote>"
         for query, hitset in search_results_cache.cache.items():
             out += "<br />%s ... %s" % (query, hitset)
         out += """<p><a href="%s/search/cache?action=clear">clear search results cache</a>""" % CFG_SITE_URL
         out += "</blockquote>"
     req.write(out)
     # show field i18nname cache:
     out = "<h3>Field I18N names cache</h3>"
     out += "- fieldname table last updated: %s" % get_table_update_time('fieldname')
     out += "<br />- i18nname cache timestamp: %s" % field_i18nname_cache.timestamp
     out += "<br />- i18nname cache contents:"
     out += "<blockquote>"
     for field in field_i18nname_cache.cache.keys():
         for ln in field_i18nname_cache.cache[field].keys():
             out += "%s, %s = %s<br />" % (field, ln, field_i18nname_cache.cache[field][ln])
     out += "</blockquote>"
     req.write(out)
     # show collection i18nname cache:
     out = "<h3>Collection I18N names cache</h3>"
     out += "- collectionname table last updated: %s" % get_table_update_time('collectionname')
     out += "<br />- i18nname cache timestamp: %s" % collection_i18nname_cache.timestamp
     out += "<br />- i18nname cache contents:"
     out += "<blockquote>"
     for coll in collection_i18nname_cache.cache.keys():
         for ln in collection_i18nname_cache.cache[coll].keys():
             out += "%s, %s = %s<br />" % (coll, ln, collection_i18nname_cache.cache[coll][ln])
     out += "</blockquote>"
     req.write(out)
     req.write("</html>")
     return "\n"
 
 def perform_request_log(req, date=""):
     """Display search log information for given date."""
     req.content_type = "text/html"
     req.send_http_header()
     req.write("<html>")
     req.write("<h1>Search Log</h1>")
     if date: # case A: display stats for a day
         yyyymmdd = string.atoi(date)
         req.write("<p><big><strong>Date: %d</strong></big><p>" % yyyymmdd)
         req.write("""<table border="1">""")
         req.write("<tr><td><strong>%s</strong></td><td><strong>%s</strong></td><td><strong>%s</strong></td><td><strong>%s</strong></td><td><strong>%s</strong></td><td><strong>%s</strong></td></tr>" % ("No.", "Time", "Pattern", "Field", "Collection", "Number of Hits"))
         # read file:
         p = os.popen("grep ^%d %s/search.log" % (yyyymmdd, CFG_LOGDIR), 'r')
         lines = p.readlines()
         p.close()
         # process lines:
         i = 0
         for line in lines:
             try:
                 datetime, aas, p, f, c, nbhits = string.split(line,"#")
                 i += 1
                 req.write("<tr><td align=\"right\">#%d</td><td>%s:%s:%s</td><td>%s</td><td>%s</td><td>%s</td><td>%s</td></tr>" \
                           % (i, datetime[8:10], datetime[10:12], datetime[12:], p, f, c, nbhits))
             except:
                 pass # ignore eventual wrong log lines
         req.write("</table>")
     else: # case B: display summary stats per day
         yyyymm01 = int(time.strftime("%Y%m01", time.localtime()))
         yyyymmdd = int(time.strftime("%Y%m%d", time.localtime()))
         req.write("""<table border="1">""")
         req.write("<tr><td><strong>%s</strong></td><td><strong>%s</strong></tr>" % ("Day", "Number of Queries"))
         for day in range(yyyymm01, yyyymmdd + 1):
             p = os.popen("grep -c ^%d %s/search.log" % (day, CFG_LOGDIR), 'r')
             for line in p.readlines():
                 req.write("""<tr><td>%s</td><td align="right"><a href="%s/search/log?date=%d">%s</a></td></tr>""" % \
                           (day, CFG_SITE_URL, day, line))
             p.close()
         req.write("</table>")
     req.write("</html>")
     return "\n"
 
 
 def get_most_popular_field_values(recids, tags, exclude_values=None, count_repetitive_values=True):
     """
     Analyze RECIDS and look for TAGS and return most popular values
     and the frequency with which they occur sorted according to
     descending frequency.
 
     If a value is found in EXCLUDE_VALUES, then do not count it.
 
     If COUNT_REPETITIVE_VALUES is True, then we count every occurrence
     of value in the tags.  If False, then we count the value only once
     regardless of the number of times it may appear in a record.
     (But, if the same value occurs in another record, we count it, of
     course.)
 
     Example:
      >>> get_most_popular_field_values(range(11,20), '980__a')
      (('PREPRINT', 10), ('THESIS', 7), ...)
      >>> get_most_popular_field_values(range(11,20), ('100__a', '700__a'))
      (('Ellis, J', 10), ('Ellis, N', 7), ...)
      >>> get_most_popular_field_values(range(11,20), ('100__a', '700__a'), ('Ellis, J'))
      (('Ellis, N', 7), ...)
     """
 
     def _get_most_popular_field_values_helper_sorter(val1, val2):
         "Compare VAL1 and VAL2 according to, firstly, frequency, then secondly, alphabetically."
         compared_via_frequencies = cmp(valuefreqdict[val2], valuefreqdict[val1])
         if compared_via_frequencies == 0:
             return cmp(val1.lower(), val2.lower())
         else:
             return compared_via_frequencies
 
     valuefreqdict = {}
     ## sanity check:
     if not exclude_values:
         exclude_values = []
     if isinstance(tags, str):
         tags = (tags,)
     ## find values to count:
     vals_to_count = []
     if count_repetitive_values:
         # counting technique A: can look up many records at once: (very fast)
         for tag in tags:
             vals_to_count.extend(get_fieldvalues(recids, tag))
     else:
         # counting technique B: must count record-by-record: (slow)
         for recid in recids:
             vals_in_rec = []
             for tag in tags:
                 for val in get_fieldvalues(recid, tag, False):
                     vals_in_rec.append(val)
             # do not count repetitive values within this record
             # (even across various tags, so need to unify again):
             dtmp = {}
             for val in vals_in_rec:
                 dtmp[val] = 1
             vals_in_rec = dtmp.keys()
             vals_to_count.extend(vals_in_rec)
     ## are we to exclude some of found values?
     for val in vals_to_count:
         if val not in exclude_values:
             if valuefreqdict.has_key(val):
                 valuefreqdict[val] += 1
             else:
                 valuefreqdict[val] = 1
     ## sort by descending frequency of values:
     out = ()
     vals = valuefreqdict.keys()
     vals.sort(_get_most_popular_field_values_helper_sorter)
     for val in vals:
         out += (val, valuefreqdict[val]),
     return out
 
 def profile(p="", f="", c=CFG_SITE_NAME):
     """Profile search time."""
     import profile
     import pstats
     profile.run("perform_request_search(p='%s',f='%s', c='%s')" % (p, f, c), "perform_request_search_profile")
     p = pstats.Stats("perform_request_search_profile")
     p.strip_dirs().sort_stats("cumulative").print_stats()
     return 0
 
 ## test cases:
 #print wash_colls(CFG_SITE_NAME,"Library Catalogue", 0)
 #print wash_colls("Periodicals & Progress Reports",["Periodicals","Progress Reports"], 0)
 #print wash_field("wau")
 #print print_record(20,"tm","001,245")
 #print create_opft_search_units(None, "PHE-87-13","reportnumber")
 #print ":"+wash_pattern("* and % doo * %")+":\n"
 #print ":"+wash_pattern("*")+":\n"
 #print ":"+wash_pattern("ellis* ell* e*%")+":\n"
 #print run_sql("SELECT name,dbquery from collection")
 #print get_index_id("author")
 #print get_coll_ancestors("Theses")
 #print get_coll_sons("Articles & Preprints")
 #print get_coll_real_descendants("Articles & Preprints")
 #print get_collection_reclist("Theses")
 #print log(sys.stdin)
 #print search_unit_in_bibrec('2002-12-01','2002-12-12')
 #print get_nearest_terms_in_bibxxx("ellis", "author", 5, 5)
 #print call_bibformat(68, "HB_FLY")
 #print get_fieldvalues(10, "980__a")
 #print get_fieldvalues_alephseq_like(10,"001___")
 #print get_fieldvalues_alephseq_like(10,"980__a")
 #print get_fieldvalues_alephseq_like(10,"foo")
 #print get_fieldvalues_alephseq_like(10,"-1")
 #print get_fieldvalues_alephseq_like(10,"99")
 #print get_fieldvalues_alephseq_like(10,["001", "980"])
 
 ## profiling:
 #profile("of the this")
 #print perform_request_search(p="ellis")
diff --git a/modules/websearch/lib/websearch_external_collections_config.py b/modules/websearch/lib/websearch_external_collections_config.py
index 198accf96..e8b3eff1d 100644
--- a/modules/websearch/lib/websearch_external_collections_config.py
+++ b/modules/websearch/lib/websearch_external_collections_config.py
@@ -1,118 +1,120 @@
 # -*- coding: utf-8 -*-
 ##
 ## This file is part of CDS Invenio.
 ## Copyright (C) 2002, 2003, 2004, 2005, 2006, 2007, 2008 CERN.
 ##
 ## CDS Invenio is free software; you can redistribute it and/or
 ## modify it under the terms of the GNU General Public License as
 ## published by the Free Software Foundation; either version 2 of the
 ## License, or (at your option) any later version.
 ##
 ## CDS Invenio is distributed in the hope that it will be useful, but
 ## WITHOUT ANY WARRANTY; without even the implied warranty of
 ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 ## General Public License for more details.
 ##
 ## You should have received a copy of the GNU General Public License
 ## along with CDS Invenio; if not, write to the Free Software Foundation, Inc.,
 ## 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA.
 
 # pylint: disable-msg=C0301
 
 """External collection configuration file."""
 
 __revision__ = "$Id$"
 
 from invenio.config import CFG_WEBSEARCH_EXTERNAL_COLLECTION_SEARCH_TIMEOUT, \
      CFG_WEBSEARCH_EXTERNAL_COLLECTION_SEARCH_MAXRESULTS
 
 # if we want to define a parser for an external collection directly using the following dictionary
 # we have to import them here instead of the searcher
 from invenio.websearch_external_collections_parser import CDSInvenioHTMLExternalCollectionResultsParser
 from invenio.websearch_external_collections_parser import CDSInvenioXMLExternalCollectionResultsParser
 
 
 CFG_EXTERNAL_COLLECTION_TIMEOUT = CFG_WEBSEARCH_EXTERNAL_COLLECTION_SEARCH_TIMEOUT
 
 CFG_HOSTED_COLLECTION_TIMEOUT_NBRECS = 1
 
 CFG_HOSTED_COLLECTION_TIMEOUT_ANTE_SEARCH = 1
 
 CFG_HOSTED_COLLECTION_TIMEOUT_POST_SEARCH = 10
 
 CFG_EXTERNAL_COLLECTION_MAXRESULTS = CFG_WEBSEARCH_EXTERNAL_COLLECTION_SEARCH_MAXRESULTS
 
+CFG_EXTERNAL_COLLECTION_MAXRESULTS_ALERTS = 100
+
 CFG_EXTERNAL_COLLECTIONS = {
     'Amazon':
         {'engine': 'Amazon'},
     'CERN Indico':
         {'engine': 'CDSIndico'},
     'CERN Intranet':
         {'base_url': "http://www.iso.org", 'search_url': "http://search.cern.ch/query.html?qt="},
     'CERN EDMS':
         {'engine': 'CERNEDMS'},
     'CiteSeer':
         {'engine': 'Citeseer'},
     'Google Web':
         {'engine': 'Google'},
     'Google Books':
         {'engine': 'GoogleBooks'},
     'Google Scholar':
         {'engine': 'GoogleScholar'},
     'IEC':
         {'base_url': "http://www.iec.ch",
          'search_url': "http://www.iec.ch/cgi-bin/procgi.pl/www/iecwww.p?wwwlang=E&wwwprog=sea22.p&search=text&searchfor="},
     'IHS':
         {'base_url': "http://global.ihs.com",
          'search_url': "http://global.ihs.com/search_res.cfm?&input_doc_title="},
     'ISO':
         {'base_url': "http://www.iso.org",
          'search_url': "http://www.iso.org/iso/en/StandardsQueryFormHandler.StandardsQueryFormHandler?languageCode=en" + \
             "&lastSearch=false&title=true&isoNumber=&isoPartNumber=&isoDocType=ALL&isoDocElem=ALL&ICS=&stageCode=&stages" + \
             "cope=Current&repost=1&stagedatepredefined=&stageDate=&committee=ALL&subcommittee=&scopecatalogue=CATALOGUE&" + \
             "scopeprogramme=PROGRAMME&scopewithdrawn=WITHDRAWN&scopedeleted=DELETED&sortOrder=ISO&keyword="},
     'INSPEC':
         {'engine': 'INSPEC'},
     'KISS Preprints':
         {'engine': 'KissForPreprints'},
     'KISS Books/Journals':
         {'engine': 'KissForBooksAndJournals'},
     'NEBIS':
         {'engine': 'NEBIS'},
     'Scirus':
         {'engine': 'Scirus'},
     'SPIRES HEP':
         {'engine': 'SPIRES'},
     'SLAC Library Catalog':
         {'engine': 'SPIRESBooks'},
     'Atlantis Institute Books':
         {'engine': 'CDSInvenio',
          'base_url': 'http://invenio-demo.cern.ch/',
          'parser_params':
             {'host': 'invenio-demo.cern.ch',
              'path': '',
              'parser': CDSInvenioHTMLExternalCollectionResultsParser,
              'fetch_format': 'hb',
              'num_results_regex_str': r'<strong>([0-9,]+?)</strong> records found',
              'nbrecs_regex_str': r'<!-- Search-Engine-Total-Number-Of-Results: ([0-9,]+?) -->',
              'nbrecs_url': 'http://invenio-demo.cern.ch/search?c=Books&rg=0&of=xm'},
          'search_url': 'http://invenio-demo.cern.ch/search?cc=Books&p=',
          'record_url': 'http://invenio-demo.cern.ch/record/',
          'selected_by_default': False},
     'Atlantis Institute Articles':
         {'engine': 'CDSInvenio',
          'base_url': 'http://invenio-demo.cern.ch/',
          'parser_params':
             {'host': 'invenio-demo.cern.ch',
              'path': '',
              'parser': CDSInvenioXMLExternalCollectionResultsParser,
              'fetch_format': 'xm',
              'num_results_regex_str': r'<!-- Search-Engine-Total-Number-Of-Results: ([0-9,]+?) -->',
              'nbrecs_regex_str': r'<!-- Search-Engine-Total-Number-Of-Results: ([0-9,]+?) -->',
              'nbrecs_url': 'http://invenio-demo.cern.ch/search?cc=Articles&rg=0&of=xm'},
          'search_url': 'http://invenio-demo.cern.ch/search?cc=Articles&p=',
          'record_url': 'http://invenio-demo.cern.ch/record/',
          'selected_by_default': True},
 }
 
 CFG_EXTERNAL_COLLECTION_STATES_NAME = {0: 'Disabled', 1: 'See also', 2: 'External search', 3:'External search checked'}
diff --git a/modules/websearch/lib/websearch_external_collections_parser.py b/modules/websearch/lib/websearch_external_collections_parser.py
index 4c96a8788..5e93fcabb 100644
--- a/modules/websearch/lib/websearch_external_collections_parser.py
+++ b/modules/websearch/lib/websearch_external_collections_parser.py
@@ -1,477 +1,480 @@
 # -*- coding: utf-8 -*-
 
 ## This file is part of CDS Invenio.
 ## Copyright (C) 2002, 2003, 2004, 2005, 2006, 2007, 2008 CERN.
 ##
 ## CDS Invenio is free software; you can redistribute it and/or
 ## modify it under the terms of the GNU General Public License as
 ## published by the Free Software Foundation; either version 2 of the
 ## License, or (at your option) any later version.
 ##
 ## CDS Invenio is distributed in the hope that it will be useful, but
 ## WITHOUT ANY WARRANTY; without even the implied warranty of
 ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 ## General Public License for more details.
 ##
 ## You should have received a copy of the GNU General Public License
 ## along with CDS Invenio; if not, write to the Free Software Foundation, Inc.,
 ## 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA.
 
 """
 This is a collection of parsers for external search engines.
 
 Each parser try to extract results from a web page returned by an external search
 engine.
 """
 
 __revision__ = "$Id$"
 
 import re
 #from invenio.websearch_external_collections_config import CFG_EXTERNAL_COLLECTION_MAXRESULTS
 from invenio.config import CFG_WEBSEARCH_EXTERNAL_COLLECTION_SEARCH_MAXRESULTS
 CFG_EXTERNAL_COLLECTION_MAXRESULTS = CFG_WEBSEARCH_EXTERNAL_COLLECTION_SEARCH_MAXRESULTS
 
 from invenio.bibformat import format_record
 from invenio.websearch_external_collections_getter import fetch_url_content
 import cgi
 
 re_href = re.compile(r'<a[^>]*href="?([^">]*)"?[^>]*>', re.IGNORECASE)
 re_img = re.compile(r'<img[^>]*src="?([^">]*)"?[^>]*>', re.IGNORECASE)
 
 def correct_url(htmlcode, host, path):
     """This function is used to correct urls in html code.
 
     >>> correct_url('<a href="hello.html">', 'www.google.com', 'search/')
     '<a href="http://www.google.com/search/hello.html">'
     """
     htmlcode = correct_url_with_regex(htmlcode, host, path, re_href)
     htmlcode = correct_url_with_regex(htmlcode, host, path, re_img)
     return htmlcode
 
 def correct_url_with_regex(htmlcode, host, path, regex):
     """Correct urls in html code. The url is found using the regex given."""
     url_starts = []
     results = regex.finditer(htmlcode)
     for result in results:
         url = result.group(1)
         if not url.startswith('http://'):
             url_starts.append(result.start(1))
     url_starts.reverse()
     for url_start in url_starts:
         if htmlcode[url_start] == '/':
             htmlcode = htmlcode[:url_start] + "http://" + host + htmlcode[url_start:]
         else:
             htmlcode = htmlcode[:url_start] + "http://" + host + "/" + path + htmlcode[url_start:]
     return htmlcode
 
 class ExternalCollectionHit:
     """Hold a result."""
 
     def __init__(self, html=None):
         self.html = html
 
 class ExternalCollectionResultsParser(object):
     """Mother class for parsers."""
 
     num_results_regex = None
     nbrecs_regex = None
 
     def __init__(self, host='', path=''):
         self.buffer = ""
         self.results = []
         self.host = host
         self.path = path
         self.clean()
 
     def clean(self):
         """Clean buffer and results to be able to parse a new web page."""
         self.buffer = ""
         self.results = []
 
     def feed(self, data):
         """Feed buffer with data that will be parse later."""
         self.buffer += data
 
-    def parse(self, of=None, req=None):
+    def parse(self, of=None, req=None, limit=CFG_EXTERNAL_COLLECTION_MAXRESULTS):
         """Parse the buffer. Set an optional output format."""
         pass
 
-    def add_html_result(self, html):
+    def add_html_result(self, html, limit=CFG_EXTERNAL_COLLECTION_MAXRESULTS):
         """Add a new html code as result. The urls in the html code will be corrected."""
 
         if not html:
             return
 
-        if len(self.results) >= CFG_EXTERNAL_COLLECTION_MAXRESULTS:
+        if len(self.results) >= limit:
             return
 
         html = correct_url(html, self.host, self.path) + '\n'
         result = ExternalCollectionHit(html)
         self.results.append(result)
 
     def parse_num_results(self):
         """Parse the buffer with the num_results_regex to extract the number of records found.
         This will be returned as a formated string."""
         if self.num_results_regex is None:
             return None
         list_matchs = self.num_results_regex.finditer(self.buffer)
         for match in list_matchs:
             return int(match.group(1).replace(',', ''))
         return None
 
     def parse_nbrecs(self, timeout):
         """Fetch and parse the contents of the nbrecs url with the nbrecs_regex to extract the total
         number of records. This will be returned as a formated string."""
 
         if self.nbrecs_regex is None:
             return None
         html = fetch_url_content([self.nbrecs_url], timeout)
         try:
             if len(html) == 1:
                 matches = self.nbrecs_regex.search(html[0])
                 return int(matches.group(1).replace(',', ''))
             else: return None
             # This last else should never occur. It means the list html has more (or less) than 1 elements,
             # which is impossible since the fetch_url_content(url) function always returns a list with as many
             # elements as the list's it was fed with
         except AttributeError:
             # This means that the pattern did not match anything, therefore the matches.group(1) raised the exception
             return -1
         except TypeError:
             # This means that the pattern was ran on None instead of string or buffer, therefore the
             # self.nbrecs_regex.search(html[0]) raised the exception, as html = [None]
             return -2
 
-    def parse_and_get_results(self, data, of=None, req=None, feedonly=False, parseonly=False):
+    def parse_and_get_results(self, data, of=None, req=None, limit=CFG_EXTERNAL_COLLECTION_MAXRESULTS, feedonly=False, parseonly=False):
         """Parse given data and return results."""
 
         # parseonly = True just in case we only want to parse the data and return the results
         # ex. the bufffer has already been fed
         if not parseonly:
             self.clean()
             self.feed(data)
         # feedonly = True just in case we just want to feed the buffer with the new data
         # ex. the data will be used only to calculate the number of results
         if not feedonly:
-            self.parse(of, req)
+            self.parse(of, req, limit)
             return self.results
 
     def buffer_decode_from(self, charset):
         """Convert the buffer to UTF-8 from the specified charset. Ignore errors."""
         try:
             self.buffer = self.buffer.decode(charset, 'ignore').encode('utf-8', 'ignore')
         except:
             pass
 
 class CDSIndicoCollectionResutsParser(ExternalCollectionResultsParser):
     """Parser for CDS Indico"""
 
     num_results_regex = re.compile(r'<strong>([0-9]+?)</strong> records found')
     result_regex = re.compile(r'<tr><td valign="top" align="right" style="white-space: nowrap;">\s*<input name="recid" type="checkbox" value="[0-9]+" \/>\s*([0-9]+\.)\s*</td><td valign="top">(.*?)<div class="moreinfo">.*?</div></td></tr>', re.MULTILINE + re.DOTALL)
 
     def __init__(self, host="", path=""):
         super(CDSIndicoCollectionResutsParser, self).__init__(host, path)
 
-    def parse(self, of=None, req=None):
+    def parse(self, of=None, req=None, limit=CFG_EXTERNAL_COLLECTION_MAXRESULTS):
         """Parse buffer to extract records."""
 
         results = self.result_regex.finditer(self.buffer)
         for result in results:
             num = result.group(1)
             html = result.group(2)
 
-            self.add_html_result(num + ' ' + html  + '<br />')
+            self.add_html_result(num + ' ' + html  + '<br />', limit)
 
 class KISSExternalCollectionResultsParser(ExternalCollectionResultsParser):
     """Parser for Kiss."""
 
     num_results_regex = re.compile(r'<pre><b> ([0-9]+?) records matched</b></pre>')
 
     def __init__(self, host="www-lib.kek.jp", path="cgi-bin/"):
         super(KISSExternalCollectionResultsParser, self).__init__(host, path)
 
-    def parse(self, of=None, req=None):
+    def parse(self, of=None, req=None, limit=CFG_EXTERNAL_COLLECTION_MAXRESULTS):
         """Parse buffer to extract records."""
 
         self.buffer_decode_from('Shift_JIS')
 
         elements = self.buffer.split("<DL>")
         if len(elements) <= 1:
             return
 
         for element in elements[1:]:
             if len(self.results) >= CFG_EXTERNAL_COLLECTION_MAXRESULTS:
                 return
             end_index = element.find('</DL>')
             if end_index != -1:
                 element = element[:end_index + 4]
-            self.add_html_result(element + '<br /><br />')
+            self.add_html_result(element + '<br /><br />', limit)
 
 class KISSBooksExternalCollectionResultsParser(ExternalCollectionResultsParser):
     """Parser for Kiss books."""
     line = re.compile(r'<TR>(.*?)</TR>')
     title = re.compile(r'<TR>[ ]+<TD valign="top">([0-9]+)\)</TD>[ ]+<TD><A HREF="?(.*)"?>[ ]*(.*?)[ ]*</A></TD>[ ]+</TR>')
     info_line = re.compile(r'[ ]*<TR>[ ]*<TD></TD>[ ]*<TD>(.*?)</TD>.*</TR>')
     num_results_regex = re.compile(r'<B> (?:Books|Journals) ([0-9]+?) </B>')
 
     def __init__(self, host="www-lib.kek.jp", path="cgi-bin/"):
         super(KISSBooksExternalCollectionResultsParser, self).__init__(host, path)
 
-    def parse(self, of=None, req=None):
+    def parse(self, of=None, req=None, limit=CFG_EXTERNAL_COLLECTION_MAXRESULTS):
         """Parse buffer to extract records."""
 
         self.buffer_decode_from('Shift_JIS')
         self.buffer = self.buffer.replace('\n', ' ')
 
         html = ""
         results_to_parse = self.line.finditer(self.buffer)
         for result in results_to_parse:
             if len(self.results) >= CFG_EXTERNAL_COLLECTION_MAXRESULTS:
                 return
             data = result.group()
 
             title_match = self.title.match(data)
             if title_match:
-                self.add_html_result(html)
+                self.add_html_result(html, limit)
 
                 num = title_match.group(1)
                 url = title_match.group(2)
                 title = title_match.group(3)
 
                 html = num + ') <a href=http://' + self.host + url + ">" + title + "</a><br />"
             else:
                 info_line_match = self.info_line.match(data)
                 if info_line_match:
                     info = info_line_match.group(1)
                     html += info + '<br />'
 
-        self.add_html_result(html)
+        self.add_html_result(html, limit)
 
 class GoogleExternalCollectionResultsParser(ExternalCollectionResultsParser):
     """Parser for Google"""
 
     num_results_regex = re.compile(r'of about <b>([0-9,]+?)</b>')
 
     def __init__(self, host = "www.google.com", path=""):
         super(GoogleExternalCollectionResultsParser, self).__init__(host, path)
 
-    def parse(self, of=None, req=None):
+    def parse(self, of=None, req=None, limit=CFG_EXTERNAL_COLLECTION_MAXRESULTS):
         """Parse buffer to extract records."""
         elements = self.buffer.split("<div class=g>")
         if len(elements) <= 1:
             return
 
         for element in elements[1:]:
             end_index = element.find('</table>')
             if end_index != -1:
                 element = element[:end_index + 8]
-            self.add_html_result(element)
+            self.add_html_result(element, limit)
 
 class GoogleScholarExternalCollectionResultsParser(GoogleExternalCollectionResultsParser):
     """Parser for Google Scholar."""
 
     def __init__(self, host = "scholar.google.com", path=""):
         super(GoogleScholarExternalCollectionResultsParser, self).__init__(host, path)
 
-    def parse(self, of=None, req=None):
+    def parse(self, of=None, req=None, limit=CFG_EXTERNAL_COLLECTION_MAXRESULTS):
         """Parse buffer to extract records."""
         elements = self.buffer.split("<p class=g>")
         if len(elements) <= 1:
             return
 
         for element in elements[1:-1]:
             end_index = element.find('</table>')
             if end_index != -1:
                 element = element[:end_index + 8]
-            self.add_html_result(element + '<br />')
+            self.add_html_result(element + '<br />', limit)
 
 class GoogleBooksExternalCollectionResultsParser(GoogleExternalCollectionResultsParser):
     """Parser for Google Books."""
 
     num_results_regex = re.compile(r' with <b>([0-9]+?)</b> pages on ')
 
     def __init__(self, host = "books.google.com", path=""):
         super(GoogleBooksExternalCollectionResultsParser, self).__init__(host, path)
 
-    def parse(self, of=None, req=None):
+    def parse(self, of=None, req=None, limit=CFG_EXTERNAL_COLLECTION_MAXRESULTS):
         """Parse buffer to extract records."""
         elements = self.buffer.split('<table class=rsi><tr><td class="covertd">')
         if len(elements) <= 1:
             return
 
         for element in elements[1:-1]:
-            self.add_html_result(element)
+            self.add_html_result(element, limit)
 
 class SPIRESExternalCollectionResultsParser(ExternalCollectionResultsParser):
     """Parser for SPIRES."""
 
     num_results_regex = re.compile(r'Paper <b>[0-9]+</b> to <b>[0-9]+</b> of <b>([0-9]+)</b>')
 
     def __init__(self, host="www.slac.stanford.edu", path="spires/find/hep/"):
         super(SPIRESExternalCollectionResultsParser, self).__init__(host, path)
 
-    def parse(self, of=None, req=None):
+    def parse(self, of=None, req=None, limit=CFG_EXTERNAL_COLLECTION_MAXRESULTS):
         """Parse buffer to extract records."""
         elements = self.buffer.split('<p>')
 
         if len(elements) <= 2:
             return
 
         for element in elements[1:-1]:
-            self.add_html_result(element)
+            self.add_html_result(element, limit)
 
 class SCIRUSExternalCollectionResultsParser(ExternalCollectionResultsParser):
     """Parser for SCIRUS."""
 
     num_results_regex = re.compile(r'<b>([0-9,]+) total</b> ')
     result_separator = re.compile(r'<td width="100%" valign="top" colspan="2">[ ]*(.*?)</td>[ ]*</tr>[ ]*</table>')
     result_decode = re.compile('[ ]*(.*?)[ ]*<font class="filesize">.*?<br />[ ]*(.*?)[ ]*<br />[ ]*(.*?)[ ]*</td>.*?<br />[ ]*(.*)[ ]*')
 
     cleaning = re.compile('(<img .*?>|</td>|</tr>|<td .*?>|<tr.*?>)')
 
     def __init__(self, host='www.scirus.com', path='srsapp/'):
         super(SCIRUSExternalCollectionResultsParser, self).__init__(host, path)
 
-    def parse(self, of=None, req=None):
+    def parse(self, of=None, req=None, limit=CFG_EXTERNAL_COLLECTION_MAXRESULTS):
         """Parse buffer to extract records."""
         data = self.buffer.replace('\n', ' ')
 
         for element in self.result_separator.finditer(data):
             data = element.group(1)
             parsed_line = self.result_decode.match(data)
             if parsed_line is not None:
                 link = parsed_line.group(1)
                 date = parsed_line.group(2)
                 comments = parsed_line.group(3)
                 similar = parsed_line.group(4)
                 html = "%(link)s - %(date)s <br /> %(comments)s <br /> %(similar)s <br />" % {'link' : link,
                     'date' : date, 'comments' : comments, 'similar' : similar}
             else:
                 html = self.cleaning.sub("", data) + '<br />'
-            self.add_html_result(html)
+            self.add_html_result(html, limit)
 
 class CiteSeerExternalCollectionResultsParser(ExternalCollectionResultsParser):
     """Parser for CiteSeer."""
 
     num_results_regex = re.compile(r'<br />(?:More than |)([0-9]+)(?: documents found.| results)')
     result_separator = re.compile(r'<!--RIS-->.*?<!--RIE-->', re.DOTALL)
 
     def __init__(self, host='', path=''):
         super(CiteSeerExternalCollectionResultsParser, self).__init__(host, path)
 
-    def parse(self, of=None, req=None):
+    def parse(self, of=None, req=None, limit=CFG_EXTERNAL_COLLECTION_MAXRESULTS):
         """Parse buffer to extract records."""
         for element in self.result_separator.finditer(self.buffer):
-            self.add_html_result(element.group() + '<br />')
+            self.add_html_result(element.group() + '<br />', limit)
 
 class CDSInvenioHTMLExternalCollectionResultsParser(ExternalCollectionResultsParser):
     """HTML brief (hb) Parser for Invenio"""
 
     def __init__(self, params):
         self.buffer = ""
         self.results = []
         self.clean()
         for (name, value) in params.iteritems():
             setattr(self, name, value)
         self.num_results_regex = re.compile(self.num_results_regex_str)
         self.nbrecs_regex = re.compile(self.nbrecs_regex_str, re.IGNORECASE)
 
-    def parse(self, of=None, req=None):
+    def parse(self, of=None, req=None, limit=CFG_EXTERNAL_COLLECTION_MAXRESULTS):
         """Parse buffer to extract records."""
 
         # the patterns :
         # level_a : select only the results
         level_a_pat = re.compile(r'<form[^>]*basket[^>]*?>.*?<table>(.*?)</table>.*?</form>', re.DOTALL + re.MULTILINE + re.IGNORECASE)
         # level_b : purge html from the basket input fields
         level_b_pat = re.compile(r'<input[^>]*?/>', re.DOTALL + re.MULTILINE + re.IGNORECASE)
         # level_c : separate the results from one another
         level_c_pat = re.compile(r'(<tr>.*?</tr>)', re.DOTALL + re.MULTILINE + re.IGNORECASE)
 
         # the long way :
         #level_a_res = level_a_pat.search(self.buffer)
         #level_ab_res = level_a_res.group(1)
         #level_b_res = level_b_pat.sub('', level_ab_res)
         #level_c_res = level_c_pat.finditer(level_b_res)
 
         # the short way :
         try:
             results = level_c_pat.finditer(level_b_pat.sub('', level_a_pat.search(self.buffer).group(1)))
             for result in results:
                # each result is placed in each own table since it already has its rows and cells defined
-               self.add_html_result('<table>' + result.group(1) + '</table>')
+               self.add_html_result('<table>' + result.group(1) + '</table>', limit)
         except AttributeError:
             # in case there were no results found an Attribute error is raised
             pass
 
 class CDSInvenioXMLExternalCollectionResultsParser(ExternalCollectionResultsParser):
     """XML (xm) parser for Invenio"""
 
     def __init__(self, params):
         self.buffer = ""
         self.results = []
         self.clean()
         for (name, value) in params.iteritems():
             setattr(self, name, value)
         self.num_results_regex = re.compile(self.num_results_regex_str)
         self.nbrecs_regex = re.compile(self.nbrecs_regex_str, re.IGNORECASE)
 
-    def parse(self, of='hb', req=None):
+    def parse(self, of='hb', req=None, limit=CFG_EXTERNAL_COLLECTION_MAXRESULTS):
         """Parse buffer to extract records. Format the records using the selected output format."""
 
         (recids, records) = self.parse_and_extract_records(of, req)
 
         if req and cgi.parse_qs(req.args).has_key('jrec'):
             counter = int(cgi.parse_qs(req.args)['jrec'][0]) - 1
         else:
             counter = 0
         for recid in recids:
             counter += 1
             if of == 'hb':
                 html = """
                         <tr><td valign="top" align="right" style="white-space: nowrap;">
                         <input name="recid" type="checkbox" value="%(recid)s" />
 
                         %(counter)s.
 
                         </td><td valign="top">%(record)s</td></tr>
                         """ % {'recid': recid,
                                'counter': counter,
                                'record': records[recid]}
             elif of == 'hd':
                 # HTML detailed (hd) is not supported yet
                 # TODO: either disable the hd output format or print it out correctly
                 html = """"""
             elif of == 'xm':
                 html = records[recid]
-            self.add_html_result(html)
+            else:
+                html = None
+            if html:
+                self.add_html_result(html, limit)
 
     def parse_and_extract_records(self, of='hb', req=None):
         """Parse the buffer and return a list of the recids and a
         dictionary with key:value pairs like the following
         recid:formated record with the selected output format"""
 
         # the patterns :
         # separate the records from one another
         record_pat = re.compile(r'(<record.*?>.*?</record>)', re.DOTALL + re.MULTILINE + re.IGNORECASE)
         # extract the recid
         recid_pat = re.compile(r'<controlfield tag="001">([0-9]+?)</controlfield>', re.DOTALL + re.MULTILINE + re.IGNORECASE)
 
         if not of:
             of='hb'
 
         try:
             results = record_pat.finditer(self.buffer)
             records = {}
             recids = []
             for result in results:
                 xml_record = result.group(1)
                 recid = recid_pat.search(xml_record).group(1)
                 recids.append(recid)
                 if of != 'xm':
                     records[recid] = format_record(None, of, xml_record=xml_record)
                 elif of == 'xm':
                     records[recid] = xml_record
             return (recids,records)
         except AttributeError:
             # in case there were no results found an Attribute error is raised
             return ([], {})
diff --git a/modules/websearch/lib/websearch_external_collections_searcher.py b/modules/websearch/lib/websearch_external_collections_searcher.py
index 30e6df527..87d93f07d 100644
--- a/modules/websearch/lib/websearch_external_collections_searcher.py
+++ b/modules/websearch/lib/websearch_external_collections_searcher.py
@@ -1,604 +1,620 @@
 # -*- coding: utf-8 -*-
 
 ## This file is part of CDS Invenio.
 ## Copyright (C) 2002, 2003, 2004, 2005, 2006, 2007, 2008 CERN.
 ##
 ## CDS Invenio is free software; you can redistribute it and/or
 ## modify it under the terms of the GNU General Public License as
 ## published by the Free Software Foundation; either version 2 of the
 ## License, or (at your option) any later version.
 ##
 ## CDS Invenio is distributed in the hope that it will be useful, but
 ## WITHOUT ANY WARRANTY; without even the implied warranty of
 ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 ## General Public License for more details.
 ##
 ## You should have received a copy of the GNU General Public License
 ## along with CDS Invenio; if not, write to the Free Software Foundation, Inc.,
 ## 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA.
 
 """CDS Invenio external search Engines."""
 
 __revision__ = "$Id$"
 
 import sys
 import urllib
 
 import cgi
 import types
 
 from invenio.config import CFG_SITE_LANG
 from invenio.websearch_external_collections_config import CFG_EXTERNAL_COLLECTIONS, CFG_EXTERNAL_COLLECTION_MAXRESULTS
 from invenio.websearch_external_collections_parser import CDSIndicoCollectionResutsParser, \
     GoogleExternalCollectionResultsParser, \
     KISSExternalCollectionResultsParser, GoogleScholarExternalCollectionResultsParser, \
     GoogleBooksExternalCollectionResultsParser, KISSBooksExternalCollectionResultsParser, \
     SPIRESExternalCollectionResultsParser, SCIRUSExternalCollectionResultsParser, \
     CiteSeerExternalCollectionResultsParser
 
 def format_basic(basic):
     """Format a basic query"""
     if basic[3] == "w":
         return basic[1]
     else:
         return '"' + basic[1] + '"'
 
 def only_field(basic_search_units, fieldname):
     """Check if in the basic search units, there is only on field representated."""
     for search_unit in basic_search_units:
         if search_unit[2] != fieldname:
             return False
     return True
 
 class ExternalSearchEngine(object):
     """Global class for interfaces to external search engines."""
 
     lang_translator = None
 
     def __init__(self, configuration):
         self.search_url = ""
         self.combiner = " "
         self.name = None
         self.parser_params = None
         self.parser = None
         self.fetch_format = ""
         self.selected_by_default = False
         for (name, value) in configuration.iteritems():
             setattr(self, name, value)
         if self.parser_params:
             setattr(self, 'parser', self.parser_params['parser'](self.parser_params))
             if 'fetch_format' in self.parser_params.keys():
                 self.fetch_format = self.parser_params['fetch_format']
 
     def build_units(self, basic_search_units):
         """ Build the research units for basic_search_units provided"""
         units = []
         for search_unit in basic_search_units:
             unit = self.build_search_unit_unit(search_unit)
             if unit is not None:
                 units.append(unit)
         return units
 
     def build_search_unit_unit(self, basic):
         """Build a search string from a search unit. This is the base
         version that just keep keywords with "+". """
         if basic[0] == "+":
             return basic[1]
         return None
 
-    def build_search_url(self, basic_search_units, req_args=None, lang=CFG_SITE_LANG):
+    def build_search_url(self, basic_search_units, req_args=None, lang=CFG_SITE_LANG, limit=CFG_EXTERNAL_COLLECTION_MAXRESULTS):
         """Build a URL for a specific set of search_units."""
 
         units = self.build_units(basic_search_units)
         if len(units) == 0:
             return None
         request = self.combine_units(units)
         url_request = urllib.quote(request)
 
         return self.search_url + url_request
 
     def combine_units(self, units):
         """Combine the units to make a boolean AND query."""
         return self.combiner.join(units)
 
     def __repr__(self):
         return 'ec:' + self.name
 
 class SortedFieldsSearchEngine(ExternalSearchEngine):
     """Class for search engines that used separate query box for fields."""
 
     def __init__(self, configuration):
         self.fields = []
         self.fields_content = {}
         self.search_url = ""
         self.converter = {}
         super(SortedFieldsSearchEngine, self).__init__(configuration)
 
-    def build_search_url(self, basic_search_units, req_args=None, lang=CFG_SITE_LANG):
+    def build_search_url(self, basic_search_units, req_args=None, lang=CFG_SITE_LANG, limit=CFG_EXTERNAL_COLLECTION_MAXRESULTS):
         """Build a search URL. Reuse the search pattern found in req only with Invenio-based search engines"""
         self.clear_fields()
         self.fill_fields(basic_search_units)
 
     def clear_fields(self):
         """Clear fields to be able to build a new URL."""
         self.fields_content = {}
         for field_name in self.fields:
             self.fields_content[field_name] = []
         self.fields_content["default"] = []
 
     def fill_fields(self, basic_search_units):
         """Fill fields with the apropriate research terms."""
         for search_unit in basic_search_units:
             self.add_search_unit(search_unit)
 
     def add_search_unit(self, search_unit):
         """Add a search unit to fields to search."""
 
         if search_unit[0] == "-":
             return
 
         search = format_basic(search_unit)
         field_name = search_unit[2]
         if field_name in self.fields:
             self.fields_content[field_name].append(search)
         else:
             self.fields_content["default"].append(search)
 
 # CERN
 
 class CDSIndicoSearchEngine(ExternalSearchEngine):
     """Global class for CDS Search Engines."""
 
     index_translator = {'title' : 'title', 'author': 'speaker', 'fulltext': 'fulltext'}
     lang_translator = {
         'ca': 'ca', 'cs': 'cs', 'de': 'de', 'el': 'el', 'en': 'en', 'es': 'es',
         'fr': 'fr', 'it': 'it', 'ja': 'ja', 'no': 'no', 'pl': 'pl', 'pt': 'pt',
         'ru': 'ru', 'sk': 'sk', 'sv': 'sv', 'uk': 'uk', 'default' : 'en'}
 
     def __init__(self, configuration):
         super(CDSIndicoSearchEngine, self).__init__(configuration)
         self.base_url = 'http://indicosearch.cern.ch/'
         self.search_url = 'http://indicosearchpublic.cern.ch/search?cc=INDICOPUBLIC&p='
         self.parser = CDSIndicoCollectionResutsParser()
 
     def build_search_unit_unit(self, basic):
         """Build a search string from a search unit.
         This will also translate index name using the index_translator
         dictionary."""
         operator = basic[0]
         pattern = basic[1]
         index = basic[2]
         search_type = basic[3]
 
         if self.index_translator.has_key(index):
             index = self.index_translator[index]
         else:
             index = None
 
         if index:
             return operator + index + ':"' + pattern + '"'
         else:
             if search_type == 'w':
                 return operator + pattern
             else:
                 return operator + '"' + pattern + '"'
 
-    def build_search_url(self, basic_search_units, req_args=None, lang=CFG_SITE_LANG):
+    def build_search_url(self, basic_search_units, req_args=None, lang=CFG_SITE_LANG, limit=CFG_EXTERNAL_COLLECTION_MAXRESULTS):
         """Build an URL for a specific set of search_units."""
 
         url = super(CDSIndicoSearchEngine, self).build_search_url(basic_search_units, None, lang)
         if not url:
             return None
 
         if self.lang_translator.has_key(lang):
             dest_lang = self.lang_translator[lang]
         else:
             dest_lang = self.lang_translator['default']
 
         return url + '&ln=' + dest_lang + '&rg=' + str(CFG_EXTERNAL_COLLECTION_MAXRESULTS)
 
 class CERNEDMSSearchEngine(SortedFieldsSearchEngine):
     """CERN EDMS"""
 
     def __init__(self, configuration):
         super(CERNEDMSSearchEngine, self).__init__(configuration)
         self.base_url = "http://edms.cern.ch/cedar/plsql/fullsearch.doc_search"
         self.search_url = "http://edms.cern.ch/cedar/plsql/fullsearch.doc_search?p_search_type=ADVANCED&"
         self.search_url_simple = "http://edms.cern.ch/cedar/plsql/fullsearch.doc_search?p_search_type=BASE&p_free_text="
         self.fields = ["author", "keyword", "abstract", "title", "reportnumber"]
 
-    def build_search_url(self, basic_search_units, req_args=None, lang=CFG_SITE_LANG):
+    def build_search_url(self, basic_search_units, req_args=None, lang=CFG_SITE_LANG, limit=CFG_EXTERNAL_COLLECTION_MAXRESULTS):
         """Build an URL for CERN EDMS."""
         super(CERNEDMSSearchEngine, self).build_search_url(basic_search_units)
         if len(self.fields_content["default"]) > 0:
             free_text = self.bind_fields(["author", "keyword", "abstract", "title", "reportnumber", "default"])
             return self.search_url_simple + free_text
         else:
             authors = self.bind_fields(["author"])
             title = self.bind_fields(["title", "abstract", "keyword"])
             reportnumber = self.bind_fields(["reportnumber"])
             url_parts = []
             if authors != '':
                 url_parts.append('p_author=' + authors)
             if title != "":
                 url_parts.append('p_title=' + title)
             if reportnumber != "":
                 url_parts.append('p_document_id=' + reportnumber)
             if len(url_parts) == 0:
                 return None
             return self.search_url + "&".join(url_parts)
 
     def bind_fields(self, fieldname_list):
         """Combine some fields together."""
         result = []
         for key in fieldname_list:
             content = self.fields_content[key]
             if len(content) > 0:
                 result.append(" ".join(content))
         return urllib.quote(" ".join(result))
 
 class CERNAgendaSearchEngine(ExternalSearchEngine):
     """CERN Agenda"""
 
     def __init__(self, configuration):
         super(CERNAgendaSearchEngine, self).__init__(configuration)
         self.base_url = "http://agenda.cern.ch"
         self.search_url_author = "http://agenda.cern.ch/search.php?field=speaker&search=Search&keywords="
         self.search_url_title = "http://agenda.cern.ch/search.php?field=title&search=Search&keywords="
 
-    def build_search_url(self, basic_search_units, req_args=None, lang=CFG_SITE_LANG):
+    def build_search_url(self, basic_search_units, req_args=None, lang=CFG_SITE_LANG, limit=CFG_EXTERNAL_COLLECTION_MAXRESULTS):
         """Build an url for searching on CERN Agenda. This will only work if there is only author
         or title tags."""
         if only_field(basic_search_units, "author"):
             self.search_url = self.search_url_author
         elif only_field(basic_search_units, "title"):
             self.search_url = self.search_url_title
         else:
             return None
         return super(CERNAgendaSearchEngine, self).build_search_url(basic_search_units)
 
 # Google
 
 class GoogleSearchEngine(ExternalSearchEngine):
     """Search engine class for Google """
 
     def __init__(self, configuration):
         super(GoogleSearchEngine, self).__init__(configuration)
         self.base_url = "http://www.google.com"
         self.search_url = "http://www.google.com/search?q="
         self.parser = GoogleExternalCollectionResultsParser()
 
     def build_search_unit_unit(self, search_unit):
         """Build a part of the google query."""
         return self.build_search_unit_unit_google(search_unit, "")
 
     def build_search_unit_unit_google(self, search_unit, author_tag):
         """Parse a unit and return it in a google query form."""
         sign = search_unit[0].replace("+", "")
         if search_unit[2] == "author":
             if search_unit[1].find(",") >= 0 and search_unit[3] != "p":
                 (lastname, firstname) = search_unit[1].split(",", 1)
                 return sign + author_tag + '"%s %s" OR ' % (lastname, firstname) + \
                     sign + author_tag + '"%s %s"' % (firstname, lastname)
             else:
                 return sign + author_tag + search_unit[1]
         if search_unit[3] == "w":
             return sign + search_unit[1]
         else:
             return sign + '"' + search_unit[1] + '"'
 
 class GoogleBooksSearchEngine(GoogleSearchEngine):
     """Interface for searching on Google Books."""
 
     def __init__(self, configuration):
         super(GoogleBooksSearchEngine, self).__init__(configuration)
         self.base_url = "http://books.google.com"
         self.search_url = "http://books.google.com/books?q="
         self.parser = GoogleBooksExternalCollectionResultsParser()
 
 class GoogleScholarSearchEngine(GoogleSearchEngine):
     """Interface for searching on Google Scholar."""
 
     def __init__(self, configuration):
         super(GoogleScholarSearchEngine, self).__init__(configuration)
         self.base_url = 'http://scholar.google.com'
         self.search_url = 'http://scholar.google.com/scholar?q='
         self.parser = GoogleScholarExternalCollectionResultsParser()
 
     def build_search_unit_unit(self, search_unit):
         """Build a unit for Google Scholar. Is different from Google one's
         because there is an author tag for authors."""
         return self.build_search_unit_unit_google(search_unit, "author:")
 
 # Kiss
 
 class KissSearchEngine(SortedFieldsSearchEngine):
     """Search interface for KEK Information Service System.
        Not to be used directly but with Kiss*SearchEngine. """
 
     def __init__(self, configuration):
         super(KissSearchEngine, self).__init__(configuration)
         self.converter = { "author": "AU=", "year": "YR=",
              "title": "TI=", "reportnumber": "RP=" }
         self.fields = self.converter.keys()
         self.parser = KISSExternalCollectionResultsParser()
 
-    def build_search_url(self, basic_search_units, req_args=None, lang=CFG_SITE_LANG):
+    def build_search_url(self, basic_search_units, req_args=None, lang=CFG_SITE_LANG, limit=CFG_EXTERNAL_COLLECTION_MAXRESULTS):
         """Build an URL for a search."""
         super(KissSearchEngine, self).build_search_url(basic_search_units)
         url_parts = []
         for key in self.fields:
             if len(self.fields_content[key]) > 0:
                 field_request = " and ".join(self.fields_content[key])
                 url_part = self.converter[key] + urllib.quote(field_request)
                 url_parts.append(url_part)
         if len(url_parts) == 0:
             return None
         return self.search_url + "&".join(url_parts)
 
     def add_search_unit(self, search_unit):
         """Add a search unit to fields to search."""
         if search_unit[0] == "+":
             search = search_unit[1]
             field_name = search_unit[2]
             if field_name == "author":
                 self.add_author(search, search_unit[3])
             elif field_name == "year" or field_name == "reportnumber":
                 self.fields_content[field_name].append(search)
             else:
                 self.fields_content["title"].append("'" + search + "'")
 
     def add_author(self, author_name, unit_type):
         """Handle an author unit. """
         if author_name.find(",") >= 0 and unit_type != "p":
             (lastname, firstname) = author_name.split(",", 1)
             if firstname:
                 self.fields_content["author"].append("'%s, %c'" % (lastname, firstname[0]))
             else:
                 self.fields_content["author"].append("'%s'" % (lastname))
         else:
             self.fields_content["author"].append("'" + author_name + "'")
 
 class KissForPreprintsSearchEngine(KissSearchEngine):
     """Interface for seaching on Kiss for Preprints"""
 
     def __init__(self, configuration):
         super(KissForPreprintsSearchEngine, self).__init__(configuration)
         self.base_url = "http://www-lib.kek.jp/KISS/kiss_prepri.html"
         self.search_url = "http://www-lib.kek.jp/cgi-bin/kiss_prepri.v8?"
 
 class KissForBooksAndJournalsSearchEngine(KissSearchEngine):
     """Interface for seaching on Kiss for Books and Journals"""
 
     def __init__(self, configuration):
         super(KissForBooksAndJournalsSearchEngine, self).__init__(configuration)
         self.base_url = "http://www-lib.kek.jp/KISS/kiss_book.html"
         self.search_url = "http://www-lib.kek.jp/cgi-bin/kiss_book.v8?DSP=1&"
         self.parser = KISSBooksExternalCollectionResultsParser()
 
 # Scirus
 
 class ScirusSearchEngine(ExternalSearchEngine):
     """Interface for the Scirus search engine."""
 
     def __init__(self, configuration):
         super(ScirusSearchEngine, self).__init__(configuration)
         self.base_url = "http://www.scirus.com/srsapp/"
         self.search_url = "http://www.scirus.com/srsapp/search?q="
         self.parser = SCIRUSExternalCollectionResultsParser()
 
     def build_search_unit_unit(self, search_unit):
         """Build a unit for a search unit"""
         sign = search_unit[0].replace("+", "")
         search = self.normalize_unit(search_unit)
         if search_unit[2] == "author":
             return sign + "au:" + search
         if search_unit[2] == "title":
             return sign + "ti:" + search
         if search_unit[2] == "keyword":
             return sign + "keyword:" + search
         if search_unit[3] == "w":
             return sign + search
 
     def normalize_unit(self, search_unit):
         """ Add double quote if needed. """
         if search_unit[3] == "a":
             return '"' + search_unit[1] + '"'
         else:
             return search_unit[1]
 
 # Spires
 
 class SPIRESSearchEngine(ExternalSearchEngine):
     """Interface for the Spires Search engine."""
 
     def __init__(self, configuration):
         super(SPIRESSearchEngine, self).__init__(configuration)
         self.base_url = "http://www.slac.stanford.edu/spires/hep/"
         self.search_url = "http://www.slac.stanford.edu/spires/find/hep/?rawcmd=find+"
         self.combiner = " and "
         self.parser = SPIRESExternalCollectionResultsParser()
 
     def build_search_unit_unit(self, basic):
         """Build a search string from a search unit. This is the base
         version that just keep keywords with "+". """
         word = format_basic(basic)
         if basic[0] == "-":
             sign = "not "
         else:
             sign = ""
         if basic[2] == "author":
             return sign + "a " + word
         if basic[2] == "title":
             return sign + "t " + word
         if basic[2] == "keyword":
             return sign + "k " + word
         if basic[2] == "reportnumber":
             return sign + "r " + word
         if basic[0] == "+":
             return "a " + word + " or t " + word + " or k " + word
         else:
             return "not a " + word + " and not t " + word + " and not k " + word
 
 class SPIRESBooksSearchEngine(SPIRESSearchEngine):
     """SPIRES Books"""
 
     def __init__(self, configuration):
         super(SPIRESBooksSearchEngine, self).__init__(configuration)
         self.base_url = "http://www.slac.stanford.edu/library/catalog/"
         self.search_url = "http://www.slac.stanford.edu/spires/find/books/?rawcmd=find+"
         self.parser = None
 
 class SPIRESJournalsSearchEngine(SPIRESSearchEngine):
     """SPIRES Journals"""
 
     def __init__(self, configuration):
         super(SPIRESJournalsSearchEngine, self).__init__(configuration)
         self.base_url = "http://www.slac.stanford.edu/spires/find/tserials/"
         self.search_url = "http://www.slac.stanford.edu/spires/find/tserials/?rawcmd=find+"
 
 # Misc
 
 class AmazonSearchEngine(ExternalSearchEngine):
     """Interface for searching books on Amazon."""
 
     def __init__(self, configuration):
         super(AmazonSearchEngine, self).__init__(configuration)
         self.base_url = "http://www.amazon.com"
         self.search_url_general = "http://www.amazon.com/exec/obidos/external-search/?tag=cern&keyword="
         self.search_url_author = "http://www.amazon.com/exec/obidos/external-search/?tag=cern&field-author="
 
-    def build_search_url(self, basic_search_units, req_args=None, lang=CFG_SITE_LANG):
+    def build_search_url(self, basic_search_units, req_args=None, lang=CFG_SITE_LANG, limit=CFG_EXTERNAL_COLLECTION_MAXRESULTS):
         """Build an URL for Amazon"""
         if only_field(basic_search_units, "author"):
             self.search_url = self.search_url_author
         else:
             self.search_url = self.search_url_general
         return super(AmazonSearchEngine, self).build_search_url(basic_search_units)
 
 class CiteseerSearchEngine(ExternalSearchEngine):
     """Interface for searching on CiteSeer."""
 
     def __init__(self, configuration):
         super(CiteseerSearchEngine, self).__init__(configuration)
         self.base_url = "http://citeseer.ist.psu.edu"
         self.search_url = "http://citeseer.ist.psu.edu/cs?q="
         self.parser = CiteSeerExternalCollectionResultsParser()
 
 class INSPECSearchEngine(ExternalSearchEngine):
     """INSPEC"""
 
     def __init__(self, configuration):
         super(INSPECSearchEngine, self).__init__(configuration)
         self.base_url = "http://www.datastarweb.com/cern/"
         self.search_url = "http://www.datastarweb.com/cern/?dblabel=inzz&query="
         self.combiner = " AND "
 
     def build_search_unit_unit(self, basic):
         """Build a search string from a search unit. This is the base
         version that just keep keywords with "+". """
         word = format_basic(basic)
         if basic[0] == "-":
             return None
         if basic[2] == "author":
             return word + ".au."
         if basic[2] == "title":
             return word + ".ti."
         if basic[2] == "abstract":
             return word + ".ab."
         if basic[2] == "year":
             return word + ".yr."
         return word + ".ti. OR " + word + ".ab."
 
 class NEBISSearchEngine(ExternalSearchEngine):
     """NEBIS"""
 
     def __init__(self, configuration):
         super(NEBISSearchEngine, self).__init__(configuration)
         self.base_url = "http://opac.nebis.ch"
         self.search_url_general = "http://opac.nebis.ch/F/?func=find-b&find_code=WRD&REQUEST="
         self.search_url_author = "http://opac.nebis.ch/F/?func=find-b&find_code=WAU&REQUEST="
         self.search_url_title = "http://opac.nebis.ch/F/?func=find-b&find_code=WTI&REQUEST="
 
-    def build_search_url(self, basic_search_units, req_args=None, lang=CFG_SITE_LANG):
+    def build_search_url(self, basic_search_units, req_args=None, lang=CFG_SITE_LANG, limit=CFG_EXTERNAL_COLLECTION_MAXRESULTS):
         """Build an URL for NEBIS"""
         if only_field(basic_search_units, "author"):
             self.search_url = self.search_url_author
         elif only_field(basic_search_units, "title"):
             self.search_url = self.search_url_title
         else:
             self.search_url = self.search_url_general
         return super(NEBISSearchEngine, self).build_search_url(basic_search_units)
 
 # Invenio based
 
 class CDSInvenioSearchEngine(ExternalSearchEngine):
     """Generic search engine class for Invenio based sites"""
 
     def __init__(self, configuration):
         super(CDSInvenioSearchEngine, self).__init__(configuration)
 
-    def build_search_url(self, basic_search_units, req_args=None, lang=CFG_SITE_LANG):
+    def build_search_url(self, basic_search_units, req_args=None, lang=CFG_SITE_LANG, limit=CFG_EXTERNAL_COLLECTION_MAXRESULTS):
         """Build a URL for an Invenio based site"""
 
         if req_args:
             search_url_params = ""
             if type(req_args) is list:
                 # req_args is by definition a string. It is a list
                 # only when we manually pass it as a list of recids.
                 conjunction = " or "
                 search_url_recids = conjunction.join(['recid:%s'] * len(req_args))
                 params = tuple(req_args)
                 search_url_recids %= params
                 req_args = "p=" + search_url_recids
             req_args_dict = cgi.parse_qs(req_args)
             if req_args_dict.has_key('p'):
                 search_url_params += urllib.quote(req_args_dict['p'][0])
             if req_args_dict.has_key('jrec'):
                 search_url_params += '&jrec=' + req_args_dict['jrec'][0]
             if req_args_dict.has_key('rg'):
                 search_url_params += '&rg=' + req_args_dict['rg'][0]
+            else:
+                search_url_params += '&rg=' + str(limit)
+            if req_args_dict.has_key('d1d'):
+                search_url_params += '&d1d=' + req_args_dict['d1d'][0]
+            if req_args_dict.has_key('d1m'):
+                search_url_params += '&d1m=' + req_args_dict['d1m'][0]
+            if req_args_dict.has_key('d1y'):
+                search_url_params += '&d1y=' + req_args_dict['d1y'][0]
+            if req_args_dict.has_key('d2d'):
+                search_url_params += '&d2d=' + req_args_dict['d2d'][0]
+            if req_args_dict.has_key('d2m'):
+                search_url_params += '&d2m=' + req_args_dict['d2m'][0]
+            if req_args_dict.has_key('d2y'):
+                search_url_params += '&d2y=' + req_args_dict['d2y'][0]
+            if req_args_dict.has_key('ap'):
+                search_url_params += '&ap=' + req_args_dict['ap'][0]
             search_url_params += '&of=' + self.fetch_format
             return self.search_url + search_url_params
         else:
             units = self.build_units(basic_search_units)
             if len(units) == 0:
                 return None
             request = self.combine_units(units)
             url_request = urllib.quote(request)
-            return self.search_url + url_request + '&of=' + self.fetch_format
+            return self.search_url + url_request + '&rg=' + str(limit) + '&of=' + self.fetch_format
 
     def build_search_unit_unit(self, basic):
         """Build a search string from a search unit. Reconstructs original user query"""
 
         # TO DO: correct & improve the print out
         # adding the semicolon in case a specific field is chosen
         if basic[2] != "": basic[2] = basic[2] + ":"
         # adding the single quotes in case a multi word values is searched for
         if basic[3] == "a": basic[1] = "'" + basic[1] + "'"
         return basic[0] + " " + basic[2] + basic[1]
 
     def build_record_urls(self, recids):
         """Given a list of records this function returns a dictionary with
         recid:external_url key:value pairs"""
 
         if type(recids) is not list:
             recids = [recids]
         recids_urls = []
         for recid in recids:
             recids_urls.append((recid, self.record_url + recid))
         return tuple(recids_urls)
 
 external_collections_dictionary = {}
 
 def build_external_collections_dictionary():
     """Build the dictionary of the external collections."""
     for (name, configuration) in CFG_EXTERNAL_COLLECTIONS.iteritems():
         engine_name = configuration.pop('engine', 'External') + 'SearchEngine'
         configuration['name'] = name
         if globals().has_key(engine_name):
             external_collections_dictionary[name] = globals()[engine_name](configuration)
         else:
             sys.stderr.write("Error : not found " + engine_name + "\n")
 
 build_external_collections_dictionary()
diff --git a/modules/websearch/lib/websearch_templates.py b/modules/websearch/lib/websearch_templates.py
index aef98b310..2ece98057 100644
--- a/modules/websearch/lib/websearch_templates.py
+++ b/modules/websearch/lib/websearch_templates.py
@@ -1,4048 +1,4049 @@
 # -*- coding: utf-8 -*-
 
 ## This file is part of CDS Invenio.
 ## Copyright (C) 2002, 2003, 2004, 2005, 2006, 2007, 2008 CERN.
 ##
 ## CDS Invenio is free software; you can redistribute it and/or
 ## modify it under the terms of the GNU General Public License as
 ## published by the Free Software Foundation; either version 2 of the
 ## License, or (at your option) any later version.
 ##
 ## CDS Invenio is distributed in the hope that it will be useful, but
 ## WITHOUT ANY WARRANTY; without even the implied warranty of
 ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 ## General Public License for more details.
 ##
 ## You should have received a copy of the GNU General Public License
 ## along with CDS Invenio; if not, write to the Free Software Foundation, Inc.,
 ## 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA.
 
 # pylint: disable-msg=C0301
 
 __revision__ = "$Id$"
 
 import time
 import cgi
 import gettext
 import string
 import re
 import locale
 from urllib import quote, urlencode
 from xml.sax.saxutils import escape as xml_escape
 
 from invenio.config import \
      CFG_WEBSEARCH_LIGHTSEARCH_PATTERN_BOX_WIDTH, \
      CFG_WEBSEARCH_SIMPLESEARCH_PATTERN_BOX_WIDTH, \
      CFG_WEBSEARCH_ADVANCEDSEARCH_PATTERN_BOX_WIDTH, \
      CFG_WEBSEARCH_AUTHOR_ET_AL_THRESHOLD, \
      CFG_WEBSEARCH_USE_ALEPH_SYSNOS, \
      CFG_WEBSEARCH_SPLIT_BY_COLLECTION, \
      CFG_BIBRANK_SHOW_READING_STATS, \
      CFG_BIBRANK_SHOW_DOWNLOAD_STATS, \
      CFG_BIBRANK_SHOW_DOWNLOAD_GRAPHS, \
      CFG_BIBRANK_SHOW_CITATION_LINKS, \
      CFG_BIBRANK_SHOW_CITATION_STATS, \
      CFG_BIBRANK_SHOW_CITATION_GRAPHS, \
      CFG_WEBSEARCH_INSTANT_BROWSE_RSS, \
      CFG_WEBSEARCH_RSS_TTL, \
      CFG_SITE_LANG, \
      CFG_SITE_NAME, \
      CFG_SITE_NAME_INTL, \
      CFG_VERSION, \
      CFG_SITE_URL, \
      CFG_SITE_SUPPORT_EMAIL, \
      CFG_SITE_ADMIN_EMAIL, \
      CFG_INSPIRE_SITE, \
      CFG_WEBSEARCH_DEFAULT_SEARCH_INTERFACE, \
      CFG_WEBSEARCH_ENABLED_SEARCH_INTERFACES, \
      CFG_WEBSEARCH_MAX_RECORDS_IN_GROUPS, \
      CFG_BIBINDEX_CHARS_PUNCTUATION, \
      CFG_WEBCOMMENT_ALLOW_COMMENTS, \
      CFG_WEBCOMMENT_ALLOW_REVIEWS, \
      CFG_WEBSEARCH_SHOW_COMMENT_COUNT, \
      CFG_WEBSEARCH_SHOW_REVIEW_COUNT
 
 from invenio.dbquery import run_sql
 from invenio.messages import gettext_set_language
 #from invenio.search_engine_config import CFG_EXPERIMENTAL_FEATURES
 from invenio.urlutils import make_canonical_urlargd, drop_default_urlargd, create_html_link, create_url
 from invenio.htmlutils import nmtoken_from_string
 from invenio.webinterface_handler import wash_urlargd
 from invenio.bibrank_citation_searcher import get_cited_by_count
 
 from invenio.intbitset import intbitset
 
 from invenio.websearch_external_collections import external_collection_get_state, get_external_collection_engine
 from invenio.websearch_external_collections_utils import get_collection_id
+from invenio.websearch_external_collections_config import CFG_EXTERNAL_COLLECTION_MAXRESULTS
 
 _RE_PUNCTUATION = re.compile(CFG_BIBINDEX_CHARS_PUNCTUATION)
 _RE_SPACES = re.compile(r"\s+")
 
 def get_fieldvalues(recID, tag):
     """Return list of field values for field TAG inside record RECID.
        FIXME: should be imported commonly for search_engine too."""
     out = []
     if tag == "001___":
         # we have asked for recID that is not stored in bibXXx tables
         out.append(str(recID))
     else:
         # we are going to look inside bibXXx tables
         digit = tag[0:2]
         bx = "bib%sx" % digit
         bibx = "bibrec_bib%sx" % digit
         query = "SELECT bx.value FROM %s AS bx, %s AS bibx WHERE bibx.id_bibrec='%s' AND bx.id=bibx.id_bibxxx AND bx.tag LIKE '%s'" \
                 "ORDER BY bibx.field_number, bx.tag ASC" % (bx, bibx, recID, tag)
         res = run_sql(query)
         for row in res:
             out.append(row[0])
     return out
 
 
 class Template:
 
     # This dictionary maps CDS Invenio language code to locale codes (ISO 639)
     tmpl_localemap = {
         'bg': 'bg_BG',
         'ca': 'ca_ES',
         'de': 'de_DE',
         'el': 'el_GR',
         'en': 'en_US',
         'es': 'es_ES',
         'pt': 'pt_BR',
         'fr': 'fr_FR',
         'it': 'it_IT',
         'ro': 'ro_RO',
         'ru': 'ru_RU',
         'rw': 'rw_RW',
         'sk': 'sk_SK',
         'cs': 'cs_CZ',
         'no': 'no_NO',
         'sv': 'sv_SE',
         'uk': 'uk_UA',
         'ja': 'ja_JA',
         'pl': 'pl_PL',
         'hr': 'hr_HR',
         'zh_CN': 'zh_CN',
         'zh_TW': 'zh_TW',
         'hu': 'hu_HU',
         'af': 'af_ZA',
         'gl': 'gl_ES'
         }
     tmpl_default_locale = "en_US" # which locale to use by default, useful in case of failure
 
     # Type of the allowed parameters for the web interface for search results
     search_results_default_urlargd = {
         'cc': (str, CFG_SITE_NAME),
         'c': (list, []),
         'p': (str, ""), 'f': (str, ""),
         'rg': (int, 10),
         'sf': (str, ""),
         'so': (str, "d"),
         'sp': (str, ""),
         'rm': (str, ""),
         'of': (str, "hb"),
         'ot': (list, []),
         'aas': (int, CFG_WEBSEARCH_DEFAULT_SEARCH_INTERFACE),
         'as': (int, CFG_WEBSEARCH_DEFAULT_SEARCH_INTERFACE),
         'p1': (str, ""), 'f1': (str, ""), 'm1': (str, ""), 'op1':(str, ""),
         'p2': (str, ""), 'f2': (str, ""), 'm2': (str, ""), 'op2':(str, ""),
         'p3': (str, ""), 'f3': (str, ""), 'm3': (str, ""),
         'sc': (int, 0),
         'jrec': (int, 0),
         'recid': (int, -1), 'recidb': (int, -1), 'sysno': (str, ""),
         'id': (int, -1), 'idb': (int, -1), 'sysnb': (str, ""),
         'action': (str, "search"),
         'action_search': (str, ""),
         'action_browse': (str, ""),
         'd1': (str, ""),
         'd1y': (int, 0), 'd1m': (int, 0), 'd1d': (int, 0),
         'd2': (str, ""),
         'd2y': (int, 0), 'd2m': (int, 0), 'd2d': (int, 0),
         'dt': (str, ""),
         'ap': (int, 1),
         'verbose': (int, 0),
         'ec': (list, []),
         }
 
     # ...and for search interfaces
     search_interface_default_urlargd = {
         'aas': (int, CFG_WEBSEARCH_DEFAULT_SEARCH_INTERFACE),
         'as': (int, CFG_WEBSEARCH_DEFAULT_SEARCH_INTERFACE),
         'verbose': (int, 0)}
 
     # ...and for RSS feeds
     rss_default_urlargd = {'c'  : (list, []),
                            'cc' : (str, ""),
                            'p'  : (str, ""),
                            'f'  : (str, ""),
                            'p1' : (str, ""),
                            'f1' : (str, ""),
                            'm1' : (str, ""),
                            'op1': (str, ""),
                            'p2' : (str, ""),
                            'f2' : (str, ""),
                            'm2' : (str, ""),
                            'op2': (str, ""),
                            'p3' : (str, ""),
                            'f3' : (str, ""),
                            'm3' : (str, "")}
 
     tmpl_openurl_accepted_args = {
             'id' : (list, []),
             'genre' : (str, ''),
             'aulast' : (str, ''),
             'aufirst' : (str, ''),
             'auinit' : (str, ''),
             'auinit1' : (str, ''),
             'auinitm' : (str, ''),
             'issn' : (str, ''),
             'eissn' : (str, ''),
             'coden' : (str, ''),
             'isbn' : (str, ''),
             'sici' : (str, ''),
             'bici' : (str, ''),
             'title' : (str, ''),
             'stitle' : (str, ''),
             'atitle' : (str, ''),
             'volume' : (str, ''),
             'part' : (str, ''),
             'issue' : (str, ''),
             'spage' : (str, ''),
             'epage' : (str, ''),
             'pages' : (str, ''),
             'artnum' : (str, ''),
             'date' : (str, ''),
             'ssn' : (str, ''),
             'quarter' : (str, ''),
             'url_ver' : (str, ''),
             'ctx_ver' : (str, ''),
             'rft_val_fmt' : (str, ''),
             'rft_id' : (list, []),
             'rft.atitle' : (str, ''),
             'rft.title' : (str, ''),
             'rft.jtitle' : (str, ''),
             'rft.stitle' : (str, ''),
             'rft.date' : (str, ''),
             'rft.volume' : (str, ''),
             'rft.issue' : (str, ''),
             'rft.spage' : (str, ''),
             'rft.epage' : (str, ''),
             'rft.pages' : (str, ''),
             'rft.artnumber' : (str, ''),
             'rft.issn' : (str, ''),
             'rft.eissn' : (str, ''),
             'rft.aulast' : (str, ''),
             'rft.aufirst' : (str, ''),
             'rft.auinit' : (str, ''),
             'rft.auinit1' : (str, ''),
             'rft.auinitm' : (str, ''),
             'rft.ausuffix' : (str, ''),
             'rft.au' : (list, []),
             'rft.aucorp' : (str, ''),
             'rft.isbn' : (str, ''),
             'rft.coden' : (str, ''),
             'rft.sici' : (str, ''),
             'rft.genre' : (str, 'unknown'),
             'rft.chron' : (str, ''),
             'rft.ssn' : (str, ''),
             'rft.quarter' : (int, ''),
             'rft.part' : (str, ''),
             'rft.btitle' : (str, ''),
             'rft.isbn' : (str, ''),
             'rft.atitle' : (str, ''),
             'rft.place' : (str, ''),
             'rft.pub' : (str, ''),
             'rft.edition' : (str, ''),
             'rft.tpages' : (str, ''),
             'rft.series' : (str, ''),
     }
 
     tmpl_opensearch_rss_url_syntax = "%(CFG_SITE_URL)s/rss?p={searchTerms}&amp;jrec={startIndex}&amp;rg={count}&amp;ln={language}&amp;startIndex" % {'CFG_SITE_URL': CFG_SITE_URL}
     tmpl_opensearch_html_url_syntax = "%(CFG_SITE_URL)s/search?p={searchTerms}&amp;jrec={startIndex}&amp;rg={count}&amp;ln={language}&amp;startIndex" % {'CFG_SITE_URL': CFG_SITE_URL}
 
     def tmpl_openurl2invenio(self, openurl_data):
         """ Return an Invenio url corresponding to a search with the data
         included in the openurl form map.
         """
         def isbn_to_isbn13_isbn10(isbn):
             isbn = isbn.replace(' ', '').replace('-', '')
             if len(isbn) == 10 and isbn.isdigit():
                 ## We already have isbn10
                 return ('', isbn)
             if len(isbn) != 13 and isbn.isdigit():
                 return ('', '')
             isbn13, isbn10 = isbn, isbn[3:-1]
             checksum = 0
             weight = 10
             for char in isbn10:
                 checksum += int(char) * weight
                 weight -= 1
             checksum = 11 - (checksum % 11)
             if checksum == 10:
                 isbn10 += 'X'
             if checksum == 11:
                 isbn10 += '0'
             else:
                 isbn10 += str(checksum)
             return (isbn13, isbn10)
 
         from invenio.search_engine import perform_request_search
         doi = ''
         pmid = ''
         bibcode = ''
         oai = ''
         issn = ''
         isbn = ''
         for elem in openurl_data['id']:
             if elem.startswith('doi:'):
                 doi = elem[len('doi:'):]
             elif elem.startswith('pmid:'):
                 pmid = elem[len('pmid:'):]
             elif elem.startswith('bibcode:'):
                 bibcode = elem[len('bibcode:'):]
             elif elem.startswith('oai:'):
                 oai = elem[len('oai:'):]
         for elem in openurl_data['rft_id']:
             if elem.startswith('info:doi/'):
                 doi = elem[len('info:doi/'):]
             elif elem.startswith('info:pmid/'):
                 pmid = elem[len('info:pmid/'):]
             elif elem.startswith('info:bibcode/'):
                 bibcode = elem[len('info:bibcode/'):]
             elif elem.startswith('info:oai/'):
                 oai = elem[len('info:oai/')]
             elif elem.startswith('urn:ISBN:'):
                 isbn = elem[len('urn:ISBN:'):]
             elif elem.startswith('urn:ISSN:'):
                 issn = elem[len('urn:ISSN:'):]
 
         ## Building author query
         aulast = openurl_data['rft.aulast'] or openurl_data['aulast']
         aufirst = openurl_data['rft.aufirst'] or openurl_data['aufirst']
         auinit = openurl_data['rft.auinit'] or \
                  openurl_data['auinit'] or \
                  openurl_data['rft.auinit1'] + ' ' + openurl_data['rft.auinitm'] or \
                  openurl_data['auinit1'] + ' ' + openurl_data['auinitm'] or  aufirst[:1]
         auinit = auinit.upper()
         if aulast and aufirst:
             author_query = 'author:"%s, %s" or author:"%s, %s"' % (aulast, aufirst, aulast, auinit)
         elif aulast and auinit:
             author_query = 'author:"%s, %s"' % (aulast, auinit)
         else:
             author_query = ''
 
         ## Building title query
         title = openurl_data['rft.atitle'] or \
                 openurl_data['atitle'] or \
                 openurl_data['rft.btitle'] or \
                 openurl_data['rft.title'] or \
                 openurl_data['title']
         if title:
             title_query = 'title:"%s"' % title
             title_query_cleaned = 'title:"%s"' % _RE_SPACES.sub(' ', _RE_PUNCTUATION.sub(' ', title))
         else:
             title_query = ''
 
         ## Building journal query
         jtitle = openurl_data['rft.stitle'] or \
                  openurl_data['stitle'] or \
                  openurl_data['rft.jtitle'] or \
                  openurl_data['title']
         if jtitle:
             journal_query = 'journal:"%s"' % jtitle
         else:
             journal_query = ''
 
         ## Building isbn query
         isbn = isbn or openurl_data['rft.isbn'] or \
                openurl_data['isbn']
         isbn13, isbn10 = isbn_to_isbn13_isbn10(isbn)
         if isbn13:
             isbn_query = 'isbn:"%s" or isbn:"%s"' % (isbn13, isbn10)
         elif isbn10:
             isbn_query = 'isbn:"%s"' % isbn10
         else:
             isbn_query = ''
 
         ## Building issn query
         issn = issn or openurl_data['rft.eissn'] or \
                openurl_data['eissn'] or \
                openurl_data['rft.issn'] or \
                openurl_data['issn']
         if issn:
             issn_query = 'issn:"%s"' % issn
         else:
             issn_query = ''
 
         ## Building coden query
         coden = openurl_data['rft.coden'] or openurl_data['coden']
         if coden:
             coden_query = 'coden:"%s"' % coden
         else:
             coden_query = ''
 
         ## Building doi query
         if False: #doi: #FIXME Temporaly disabled until doi field is properly setup
             doi_query = 'doi:"%s"' % doi
         else:
             doi_query = ''
 
         ## Trying possible searches
         if doi_query:
             if perform_request_search(p=doi_query):
                 return '%s/search?%s' % (CFG_SITE_URL, urlencode({
                     'p' : doi_query,
                     'sc' : CFG_WEBSEARCH_SPLIT_BY_COLLECTION,
                     'of' : 'hd'}))
         if isbn_query:
             if perform_request_search(p=isbn_query):
                 return '%s/search?%s' % (CFG_SITE_URL, urlencode({
                     'p' : isbn_query,
                     'sc' : CFG_WEBSEARCH_SPLIT_BY_COLLECTION,
                     'of' : 'hd'}))
         if coden_query:
             if perform_request_search(p=coden_query):
                 return '%s/search?%s' % (CFG_SITE_URL, urlencode({
                     'p' : coden_query,
                     'sc' : CFG_WEBSEARCH_SPLIT_BY_COLLECTION,
                     'of' : 'hd'}))
         if author_query and title_query:
             if perform_request_search(p='%s and %s' % (title_query, author_query)):
                 return '%s/search?%s' % (CFG_SITE_URL, urlencode({
                     'p' : '%s and %s' % (title_query, author_query),
                     'sc' : CFG_WEBSEARCH_SPLIT_BY_COLLECTION,
                     'of' : 'hd'}))
         if title_query:
             result = len(perform_request_search(p=title_query))
             if result == 1:
                 return '%s/search?%s' % (CFG_SITE_URL, urlencode({
                     'p' : title_query,
                     'sc' : CFG_WEBSEARCH_SPLIT_BY_COLLECTION,
                     'of' : 'hd'}))
             elif result > 1:
                 return '%s/search?%s' % (CFG_SITE_URL, urlencode({
                     'p' : title_query,
                     'sc' : CFG_WEBSEARCH_SPLIT_BY_COLLECTION,
                     'of' : 'hb'}))
 
         ## Nothing worked, let's return a search that the user can improve
         if author_query and title_query:
             return '%s/search%s' % (CFG_SITE_URL, make_canonical_urlargd({
                 'p' : '%s and %s' % (title_query_cleaned, author_query),
                 'sc' : CFG_WEBSEARCH_SPLIT_BY_COLLECTION,
                 'of' : 'hb'}, {}))
         elif title_query:
             return '%s/search%s' % (CFG_SITE_URL, make_canonical_urlargd({
                 'p' : title_query_cleaned,
                 'sc' : CFG_WEBSEARCH_SPLIT_BY_COLLECTION,
                 'of' : 'hb'}, {}))
         else:
             ## Mmh. Too few information provided.
             return '%s/search%s' % (CFG_SITE_URL, make_canonical_urlargd({
                         'p' : 'recid:-1',
                         'sc' : CFG_WEBSEARCH_SPLIT_BY_COLLECTION,
                         'of' : 'hb'}, {}))
 
     def tmpl_opensearch_description(self, ln):
         """ Returns the OpenSearch description file of this site.
         """
         _ = gettext_set_language(ln)
         return """<OpenSearchDescription xmlns="http://a9.com/-/spec/opensearch/1.1/"
                        xmlns:moz="http://www.mozilla.org/2006/browser/search/">
 <ShortName>%(short_name)s</ShortName>
 <LongName>%(long_name)s</LongName>
 <Description>%(description)s</Description>
 <InputEncoding>UTF-8</InputEncoding>
 <OutputEncoding>UTF-8</OutputEncoding>
 <Language>*</Language>
 <Contact>%(CFG_SITE_ADMIN_EMAIL)s</Contact>
 <Query role="example" searchTerms="a" />
 <Developer>Powered by CDS Invenio</Developer>
 <Url type="text/html" indexOffset="1" rel="results" template="%(html_search_syntax)s" />
 <Url type="application/rss+xml" indexOffset="1" rel="results" template="%(rss_search_syntax)s" />
 <Url type="application/opensearchdescription+xml" rel="self" template="%(CFG_SITE_URL)s/search/opensearchdescription" />
 <moz:SearchForm>%(CFG_SITE_URL)s</moz:SearchForm>
 </OpenSearchDescription>""" % \
   {'CFG_SITE_URL': CFG_SITE_URL,
    'short_name': CFG_SITE_NAME_INTL.get(ln, CFG_SITE_NAME)[:16],
    'long_name': CFG_SITE_NAME_INTL.get(ln, CFG_SITE_NAME),
    'description': (_("Search on %(x_CFG_SITE_NAME_INTL)s") % \
    {'x_CFG_SITE_NAME_INTL': CFG_SITE_NAME_INTL.get(ln, CFG_SITE_NAME)})[:1024],
    'CFG_SITE_ADMIN_EMAIL': CFG_SITE_ADMIN_EMAIL,
    'rss_search_syntax': self.tmpl_opensearch_rss_url_syntax,
    'html_search_syntax': self.tmpl_opensearch_html_url_syntax
    }
 
     def build_search_url(self, known_parameters={}, **kargs):
         """ Helper for generating a canonical search
         url. 'known_parameters' is the list of query parameters you
         inherit from your current query. You can then pass keyword
         arguments to modify this query.
 
            build_search_url(known_parameters, of="xm")
 
         The generated URL is absolute.
         """
 
         parameters = {}
         parameters.update(known_parameters)
         parameters.update(kargs)
 
         # Now, we only have the arguments which have _not_ their default value
         parameters = drop_default_urlargd(parameters, self.search_results_default_urlargd)
 
         # Treat `as' argument specially:
         if parameters.has_key('aas'):
             parameters['as'] = parameters['aas']
             del parameters['aas']
 
         # Asking for a recid? Return a /record/<recid> URL
         if 'recid' in parameters:
             target = "%s/record/%s" % (CFG_SITE_URL, parameters['recid'])
             del parameters['recid']
             target += make_canonical_urlargd(parameters, self.search_results_default_urlargd)
             return target
 
         return "%s/search%s" % (CFG_SITE_URL, make_canonical_urlargd(parameters, self.search_results_default_urlargd))
 
     def build_search_interface_url(self, known_parameters={}, **kargs):
         """ Helper for generating a canonical search interface URL."""
 
         parameters = {}
         parameters.update(known_parameters)
         parameters.update(kargs)
 
         c = parameters['c']
         del parameters['c']
 
         # Now, we only have the arguments which have _not_ their default value
         parameters = drop_default_urlargd(parameters, self.search_results_default_urlargd)
 
         # Treat `as' argument specially:
         if parameters.has_key('aas'):
             parameters['as'] = parameters['aas']
             del parameters['aas']
 
         if c and c != CFG_SITE_NAME:
             base = CFG_SITE_URL + '/collection/' + quote(c)
         else:
             base = CFG_SITE_URL
         return create_url(base, parameters)
 
     def build_rss_url(self, known_parameters,  **kargs):
         """Helper for generating a canonical RSS URL"""
 
         parameters = {}
         parameters.update(known_parameters)
         parameters.update(kargs)
 
         # Keep only interesting parameters
         argd = wash_urlargd(parameters, self.rss_default_urlargd)
 
         if argd:
             # Handle 'c' differently since it is a list
             c = argd.get('c', [])
             del argd['c']
             # Create query, and drop empty params
             args = make_canonical_urlargd(argd, self.rss_default_urlargd)
             if c != []:
                 # Add collections
                 c = [quote(coll) for coll in c]
                 if args == '':
                     args += '?'
                 else:
                     args += '&amp;'
                 args += 'c=' + '&amp;c='.join(c)
 
         return CFG_SITE_URL + '/rss' + args
 
     def tmpl_record_page_header_content(self, req, recid, ln):
         """ Provide extra information in the header of /record pages """
 
         _ = gettext_set_language(ln)
 
         title = get_fieldvalues(recid, "245__a")
 
         if title:
             title = cgi.escape(title[0])
         else:
             title = _("Record") + ' #%d' % recid
 
         keywords = ', '.join(get_fieldvalues(recid, "6531_a"))
         description = ' '.join(get_fieldvalues(recid, "520__a"))
         description += "\n"
         description += '; '.join(get_fieldvalues(recid, "100__a") + get_fieldvalues(recid, "700__a"))
 
         return [cgi.escape(x, True) for x in (title, description, keywords)]
 
     def tmpl_navtrail_links(self, aas, ln, dads):
         """
         Creates the navigation bar at top of each search page (*Home > Root collection > subcollection > ...*)
 
         Parameters:
 
           - 'aas' *int* - Should we display an advanced search box?
 
           - 'ln' *string* - The language to display
 
           - 'separator' *string* - The separator between two consecutive collections
 
           - 'dads' *list* - A list of parent links, eachone being a dictionary of ('name', 'longname')
         """
         out = []
         for url, name in dads:
             args = {'c': url, 'as': aas, 'ln': ln}
             out.append(create_html_link(self.build_search_interface_url(**args), {}, cgi.escape(name), {'class': 'navtrail'}))
 
         return ' &gt; '.join(out)
 
     def tmpl_webcoll_body(self, ln, collection, te_portalbox,
                           searchfor, np_portalbox, narrowsearch,
                           focuson, instantbrowse, ne_portalbox):
 
         """ Creates the body of the main search page.
 
         Parameters:
 
           - 'ln' *string* - language of the page being generated
 
           - 'collection' - collection id of the page being generated
 
           - 'te_portalbox' *string* - The HTML code for the portalbox on top of search
 
           - 'searchfor' *string* - The HTML code for the search for box
 
           - 'np_portalbox' *string* - The HTML code for the portalbox on bottom of search
 
           - 'narrowsearch' *string* - The HTML code for the search categories (left bottom of page)
 
           - 'focuson' *string* - The HTML code for the "focuson" categories (right bottom of page)
 
           - 'ne_portalbox' *string* - The HTML code for the bottom of the page
         """
 
         if not narrowsearch:
             narrowsearch = instantbrowse
 
         body = '''
                 <form name="search" action="%(siteurl)s/search" method="get">
                 %(searchfor)s
                 %(np_portalbox)s
                 <table cellspacing="0" cellpadding="0" border="0" class="narrowandfocusonsearchbox">
                   <tr>
                     <td valign="top">%(narrowsearch)s</td>
                ''' % {
                  'siteurl' : CFG_SITE_URL,
                  'searchfor' : searchfor,
                  'np_portalbox' : np_portalbox,
                  'narrowsearch' : narrowsearch,
                }
         if focuson:
             body += """<td valign="top">""" + focuson + """</td>"""
         body += """</tr></table>
             %(ne_portalbox)s
                </form>""" % {'ne_portalbox' : ne_portalbox}
         return body
 
     def tmpl_portalbox(self, title, body):
         """Creates portalboxes based on the parameters
         Parameters:
 
           - 'title' *string* - The title of the box
 
           - 'body' *string* - The HTML code for the body of the box
 
         """
         out = """<div class="portalbox">
                     <div class="portalboxheader">%(title)s</div>
                     <div class="portalboxbody">%(body)s</div>
                  </div>""" % {'title' : cgi.escape(title), 'body' : body}
 
         return out
 
     def tmpl_searchfor_light(self, ln, collection_id, collection_name, record_count,
                              example_search_queries): # EXPERIMENTAL
         """Produces light *Search for* box for the current collection.
 
         Parameters:
 
           - 'ln' *string* - *str* The language to display
 
           - 'collection_id' - *str* The collection id
 
           - 'collection_name' - *str* The collection name in current language
 
           - 'example_search_queries' - *list* List of search queries given as example for this collection
         """
 
         # load the right message language
         _ = gettext_set_language(ln)
 
         out = '''
         <!--create_searchfor_light()-->
         '''
 
         argd = drop_default_urlargd({'ln': ln, 'sc': CFG_WEBSEARCH_SPLIT_BY_COLLECTION},
                                     self.search_results_default_urlargd)
 
         # Only add non-default hidden values
         for field, value in argd.items():
             out += self.tmpl_input_hidden(field, value)
 
 
         header = _("Search %s records for:") % \
                  self.tmpl_nbrecs_info(record_count, "", "")
         asearchurl = self.build_search_interface_url(c=collection_id,
                                                      aas=max(CFG_WEBSEARCH_ENABLED_SEARCH_INTERFACES),
                                                      ln=ln)
 
         # Build example of queries for this collection
         example_search_queries_links = [create_html_link(self.build_search_url(p=example_query,
                                                                                ln=ln,
                                                                                aas=-1,
                                                                                c=collection_id),
                                                          {},
                                                          cgi.escape(example_query),
                                                          {'class': 'examplequery'}) \
                                         for example_query in example_search_queries]
         example_query_html = ''
         if len(example_search_queries) > 0:
             example_query_link = example_search_queries_links[0]
 
             # offers more examples if possible
             more = ''
             if len(example_search_queries_links) > 1:
                 more = '''
                 <script type="text/javascript">
                 function toggle_more_example_queries_visibility(){
                     var more = document.getElementById('more_example_queries');
                     var link = document.getElementById('link_example_queries');
                     var sep = document.getElementById('more_example_sep');
                     if (more.style.display=='none'){
                         more.style.display = '';
                         link.innerHTML = "%(show_less)s"
                         link.style.color = "rgb(204,0,0)";
                         sep.style.display = 'none';
                     } else {
                         more.style.display = 'none';
                         link.innerHTML = "%(show_more)s"
                         link.style.color = "rgb(0,0,204)";
                         sep.style.display = '';
                     }
                     return false;
                 }
                 </script>
                 <span id="more_example_queries" style="display:none;text-align:right"><br/>%(more_example_queries)s<br/></span>
                 <a id="link_example_queries" href="#" onclick="toggle_more_example_queries_visibility()" style="display:none"></a>
                 <script type="text/javascript">
                     var link = document.getElementById('link_example_queries');
                     var sep = document.getElementById('more_example_sep');
                     link.style.display = '';
                     link.innerHTML = "%(show_more)s";
                     sep.style.display = '';
                 </script>
                 ''' % {'more_example_queries': '<br/>'.join(example_search_queries_links[1:]),
                        'show_less':_("less"),
                        'show_more':_("more")}
 
             example_query_html += '''<p style="text-align:right;margin:0px;">
             %(example)s<span id="more_example_sep" style="display:none;">&nbsp;&nbsp;::&nbsp;</span>%(more)s
             </p>
             ''' % {'example': _("Example: %(x_sample_search_query)s") % \
                    {'x_sample_search_query': example_query_link},
                    'more': more}
 
         # display options to search in current collection or everywhere
         search_in = ''
         if collection_name != CFG_SITE_NAME_INTL.get(ln, CFG_SITE_NAME):
             search_in += '''
            <input type="radio" name="cc" value="%(collection_id)s" id="searchCollection" checked="checked"/>
            <label for="searchCollection">%(search_in_collection_name)s</label>
            <input type="radio" name="cc" value="%(root_collection_name)s" id="searchEverywhere" />
            <label for="searchEverywhere">%(search_everywhere)s</label>
            ''' % {'search_in_collection_name': _("Search in %(x_collection_name)s") % \
                   {'x_collection_name': collection_name},
                   'collection_id': collection_id,
                   'root_collection_name': CFG_SITE_NAME,
                   'search_everywhere': _("Search everywhere")}
 
         # print commentary start:
         out += '''
         <table class="searchbox lightsearch">
          <tbody>
           <tr valign="baseline">
            <td class="searchboxbody" align="right"><input type="text" name="p" size="%(sizepattern)d" value="" class="lightsearchfield"/><br/>
              <small><small>%(example_query_html)s</small></small>
            </td>
            <td class="searchboxbody" align="left">
              <input class="formbutton" type="submit" name="action_search" value="%(msg_search)s" />
            </td>
            <td class="searchboxbody" align="left" rowspan="2" valign="top">
              <small><small>
              <a href="%(siteurl)s/help/search-tips%(langlink)s">%(msg_search_tips)s</a><br/>
              %(asearch)s
              </small></small>
            </td>
           </tr></table>
           <!--<tr valign="baseline">
            <td class="searchboxbody" colspan="2" align="left">
              <small>
                --><small>%(search_in)s</small><!--
              </small>
            </td>
           </tr>
          </tbody>
         </table>-->
         <!--/create_searchfor_light()-->
         ''' % {'ln' : ln,
                'sizepattern' : CFG_WEBSEARCH_LIGHTSEARCH_PATTERN_BOX_WIDTH,
                'langlink': ln != CFG_SITE_LANG and '?ln=' + ln or '',
                'siteurl' : CFG_SITE_URL,
                'asearch' : create_html_link(asearchurl, {}, _('Advanced Search')),
                'header' : header,
                'msg_search' : _('Search'),
                'msg_browse' : _('Browse'),
                'msg_search_tips' : _('Search Tips'),
                'search_in': search_in,
                'example_query_html': example_query_html}
 
         return out
 
     def tmpl_searchfor_simple(self, ln, collection_id, collection_name, record_count, middle_option):
         """Produces simple *Search for* box for the current collection.
 
         Parameters:
 
           - 'ln' *string* - *str* The language to display
 
           - 'collection_id' - *str* The collection id
 
           - 'collection_name' - *str* The collection name in current language
 
           - 'record_count' - *str* Number of records in this collection
 
           - 'middle_option' *string* - HTML code for the options (any field, specific fields ...)
         """
 
         # load the right message language
         _ = gettext_set_language(ln)
 
         out = '''
         <!--create_searchfor_simple()-->
         '''
 
         argd = drop_default_urlargd({'ln': ln, 'cc': collection_id, 'sc': CFG_WEBSEARCH_SPLIT_BY_COLLECTION},
                                     self.search_results_default_urlargd)
 
         # Only add non-default hidden values
         for field, value in argd.items():
             out += self.tmpl_input_hidden(field, value)
 
 
         header = _("Search %s records for:") % \
                  self.tmpl_nbrecs_info(record_count, "", "")
         asearchurl = self.build_search_interface_url(c=collection_id,
                                                      aas=max(CFG_WEBSEARCH_ENABLED_SEARCH_INTERFACES),
                                                      ln=ln)
         # print commentary start:
         out += '''
         <table class="searchbox simplesearch">
          <thead>
           <tr align="left">
            <th colspan="3" class="searchboxheader">%(header)s</th>
           </tr>
          </thead>
          <tbody>
           <tr valign="baseline">
            <td class="searchboxbody" align="left"><input type="text" name="p" size="%(sizepattern)d" value="" class="simplesearchfield"/></td>
            <td class="searchboxbody" align="left">%(middle_option)s</td>
            <td class="searchboxbody" align="left">
              <input class="formbutton" type="submit" name="action_search" value="%(msg_search)s" />
              <input class="formbutton" type="submit" name="action_browse" value="%(msg_browse)s" /></td>
           </tr>
           <tr valign="baseline">
            <td class="searchboxbody" colspan="3" align="right">
              <small>
                <a href="%(siteurl)s/help/search-tips%(langlink)s">%(msg_search_tips)s</a> ::
                %(asearch)s
              </small>
            </td>
           </tr>
          </tbody>
         </table>
         <!--/create_searchfor_simple()-->
         ''' % {'ln' : ln,
                'sizepattern' : CFG_WEBSEARCH_SIMPLESEARCH_PATTERN_BOX_WIDTH,
                'langlink': ln != CFG_SITE_LANG and '?ln=' + ln or '',
                'siteurl' : CFG_SITE_URL,
                'asearch' : create_html_link(asearchurl, {}, _('Advanced Search')),
                'header' : header,
                'middle_option' : middle_option,
                'msg_search' : _('Search'),
                'msg_browse' : _('Browse'),
                'msg_search_tips' : _('Search Tips')}
 
         return out
 
     def tmpl_searchfor_advanced(self,
                                 ln,                  # current language
                                 collection_id,
                                 collection_name,
                                 record_count,
                                 middle_option_1, middle_option_2, middle_option_3,
                                 searchoptions,
                                 sortoptions,
                                 rankoptions,
                                 displayoptions,
                                 formatoptions
                                 ):
         """
           Produces advanced *Search for* box for the current collection.
 
           Parameters:
 
             - 'ln' *string* - The language to display
 
             - 'middle_option_1' *string* - HTML code for the first row of options (any field, specific fields ...)
 
             - 'middle_option_2' *string* - HTML code for the second row of options (any field, specific fields ...)
 
             - 'middle_option_3' *string* - HTML code for the third row of options (any field, specific fields ...)
 
             - 'searchoptions' *string* - HTML code for the search options
 
             - 'sortoptions' *string* - HTML code for the sort options
 
             - 'rankoptions' *string* - HTML code for the rank options
 
             - 'displayoptions' *string* - HTML code for the display options
 
             - 'formatoptions' *string* - HTML code for the format options
 
         """
 
         # load the right message language
         _ = gettext_set_language(ln)
 
         out = '''
         <!--create_searchfor_advanced()-->
         '''
 
         argd = drop_default_urlargd({'ln': ln, 'aas': 1, 'cc': collection_id, 'sc': CFG_WEBSEARCH_SPLIT_BY_COLLECTION},
                                     self.search_results_default_urlargd)
 
         # Only add non-default hidden values
         for field, value in argd.items():
             out += self.tmpl_input_hidden(field, value)
 
 
         header = _("Search %s records for") % \
                  self.tmpl_nbrecs_info(record_count, "", "")
         header += ':'
         ssearchurl = self.build_search_interface_url(c=collection_id, aas=min(CFG_WEBSEARCH_ENABLED_SEARCH_INTERFACES), ln=ln)
 
         out += '''
         <table class="searchbox advancedsearch">
          <thead>
           <tr>
            <th class="searchboxheader" colspan="3">%(header)s</th>
           </tr>
          </thead>
          <tbody>
           <tr valign="bottom">
             <td class="searchboxbody" style="white-space: nowrap;">
                 %(matchbox_m1)s<input type="text" name="p1" size="%(sizepattern)d" value="" class="advancedsearchfield"/>
             </td>
             <td class="searchboxbody" style="white-space: nowrap;">%(middle_option_1)s</td>
             <td class="searchboxbody">%(andornot_op1)s</td>
           </tr>
           <tr valign="bottom">
             <td class="searchboxbody" style="white-space: nowrap;">
                 %(matchbox_m2)s<input type="text" name="p2" size="%(sizepattern)d" value="" class="advancedsearchfield"/>
             </td>
             <td class="searchboxbody">%(middle_option_2)s</td>
             <td class="searchboxbody">%(andornot_op2)s</td>
           </tr>
           <tr valign="bottom">
             <td class="searchboxbody" style="white-space: nowrap;">
                 %(matchbox_m3)s<input type="text" name="p3" size="%(sizepattern)d" value="" class="advancedsearchfield"/>
             </td>
             <td class="searchboxbody">%(middle_option_3)s</td>
             <td class="searchboxbody" style="white-space: nowrap;">
               <input class="formbutton" type="submit" name="action_search" value="%(msg_search)s" />
               <input class="formbutton" type="submit" name="action_browse" value="%(msg_browse)s" /></td>
           </tr>
           <tr valign="bottom">
             <td colspan="3" class="searchboxbody" align="right">
               <small>
                 <a href="%(siteurl)s/help/search-tips%(langlink)s">%(msg_search_tips)s</a> ::
                 %(ssearch)s
               </small>
             </td>
           </tr>
          </tbody>
         </table>
         <!-- @todo - more imports -->
         ''' % {'ln' : ln,
                'sizepattern' : CFG_WEBSEARCH_ADVANCEDSEARCH_PATTERN_BOX_WIDTH,
                'langlink': ln != CFG_SITE_LANG and '?ln=' + ln or '',
                'siteurl' : CFG_SITE_URL,
                'ssearch' : create_html_link(ssearchurl, {}, _("Simple Search")),
                'header' : header,
 
                'matchbox_m1' : self.tmpl_matchtype_box('m1', ln=ln),
                'middle_option_1' : middle_option_1,
                'andornot_op1' : self.tmpl_andornot_box('op1', ln=ln),
 
                'matchbox_m2' : self.tmpl_matchtype_box('m2', ln=ln),
                'middle_option_2' : middle_option_2,
                'andornot_op2' : self.tmpl_andornot_box('op2', ln=ln),
 
                'matchbox_m3' : self.tmpl_matchtype_box('m3', ln=ln),
                'middle_option_3' : middle_option_3,
 
                'msg_search' : _("Search"),
                'msg_browse' : _("Browse"),
                'msg_search_tips' : _("Search Tips")}
 
         if (searchoptions):
             out += """<table class="searchbox">
                       <thead>
                        <tr>
                          <th class="searchboxheader">
                            %(searchheader)s
                          </th>
                        </tr>
                       </thead>
                       <tbody>
                        <tr valign="bottom">
                         <td class="searchboxbody">%(searchoptions)s</td>
                        </tr>
                       </tbody>
                      </table>""" % {
                        'searchheader' : _("Search options:"),
                        'searchoptions' : searchoptions
                      }
 
         out += """<table class="searchbox">
                    <thead>
                     <tr>
                       <th class="searchboxheader">
                         %(added)s
                       </th>
                       <th class="searchboxheader">
                         %(until)s
                       </th>
                     </tr>
                    </thead>
                    <tbody>
                     <tr valign="bottom">
                       <td class="searchboxbody">%(added_or_modified)s %(date_added)s</td>
                       <td class="searchboxbody">%(date_until)s</td>
                     </tr>
                    </tbody>
                   </table>
                   <table class="searchbox">
                    <thead>
                     <tr>
                       <th class="searchboxheader">
                         %(msg_sort)s
                       </th>
                       <th class="searchboxheader">
                         %(msg_display)s
                       </th>
                       <th class="searchboxheader">
                         %(msg_format)s
                       </th>
                     </tr>
                    </thead>
                    <tbody>
                     <tr valign="bottom">
                       <td class="searchboxbody">%(sortoptions)s %(rankoptions)s</td>
                       <td class="searchboxbody">%(displayoptions)s</td>
                       <td class="searchboxbody">%(formatoptions)s</td>
                     </tr>
                    </tbody>
                   </table>
                   <!--/create_searchfor_advanced()-->
               """ % {
 
                     'added' : _("Added/modified since:"),
                     'until' : _("until:"),
                     'added_or_modified': self.tmpl_inputdatetype(ln=ln),
                     'date_added' : self.tmpl_inputdate("d1", ln=ln),
                     'date_until' : self.tmpl_inputdate("d2", ln=ln),
 
                     'msg_sort' : _("Sort by:"),
                     'msg_display' : _("Display results:"),
                     'msg_format' : _("Output format:"),
                     'sortoptions' : sortoptions,
                     'rankoptions' : rankoptions,
                     'displayoptions' : displayoptions,
                     'formatoptions' : formatoptions
                   }
         return out
 
     def tmpl_matchtype_box(self, name='m', value='', ln='en'):
         """Returns HTML code for the 'match type' selection box.
 
           Parameters:
 
             - 'name' *string* - The name of the produced select
 
             - 'value' *string* - The selected value (if any value is already selected)
 
             - 'ln' *string* - the language to display
         """
 
         # load the right message language
         _ = gettext_set_language(ln)
 
         out = """
         <select name="%(name)s">
         <option value="a"%(sela)s>%(opta)s</option>
         <option value="o"%(selo)s>%(opto)s</option>
         <option value="e"%(sele)s>%(opte)s</option>
         <option value="p"%(selp)s>%(optp)s</option>
         <option value="r"%(selr)s>%(optr)s</option>
         </select>
         """ % {'name' : name,
                'sela' : self.tmpl_is_selected('a', value),
                                                            'opta' : _("All of the words:"),
                'selo' : self.tmpl_is_selected('o', value),
                                                            'opto' : _("Any of the words:"),
                'sele' : self.tmpl_is_selected('e', value),
                                                            'opte' : _("Exact phrase:"),
                'selp' : self.tmpl_is_selected('p', value),
                                                            'optp' : _("Partial phrase:"),
                'selr' : self.tmpl_is_selected('r', value),
                                                            'optr' : _("Regular expression:")
               }
         return out
 
     def tmpl_is_selected(self, var, fld):
         """
           Checks if *var* and *fld* are equal, and if yes, returns ' selected="selected"'.  Useful for select boxes.
 
           Parameters:
 
           - 'var' *string* - First value to compare
 
           - 'fld' *string* - Second value to compare
         """
         if var == fld:
             return ' selected="selected"'
         else:
             return ""
 
     def tmpl_andornot_box(self, name='op', value='', ln='en'):
         """
           Returns HTML code for the AND/OR/NOT selection box.
 
           Parameters:
 
             - 'name' *string* - The name of the produced select
 
             - 'value' *string* - The selected value (if any value is already selected)
 
             - 'ln' *string* - the language to display
         """
 
         # load the right message language
         _ = gettext_set_language(ln)
 
         out = """
         <select name="%(name)s">
         <option value="a"%(sela)s>%(opta)s</option>
         <option value="o"%(selo)s>%(opto)s</option>
         <option value="n"%(seln)s>%(optn)s</option>
         </select>
         """ % {'name' : name,
                'sela' : self.tmpl_is_selected('a', value), 'opta' : _("AND"),
                'selo' : self.tmpl_is_selected('o', value), 'opto' : _("OR"),
                'seln' : self.tmpl_is_selected('n', value), 'optn' : _("AND NOT")
               }
         return out
 
     def tmpl_inputdate(self, name, ln, sy = 0, sm = 0, sd = 0):
         """
           Produces *From Date*, *Until Date* kind of selection box. Suitable for search options.
 
           Parameters:
 
             - 'name' *string* - The base name of the produced selects
 
             - 'ln' *string* - the language to display
         """
         # load the right message language
         _ = gettext_set_language(ln)
 
         box = """
                <select name="%(name)sd">
                  <option value=""%(sel)s>%(any)s</option>
               """ % {
                 'name' : name,
                 'any' : _("any day"),
                 'sel' : self.tmpl_is_selected(sd, 0)
               }
         for day in range(1, 32):
             box += """<option value="%02d"%s>%02d</option>""" % (day, self.tmpl_is_selected(sd, day), day)
         box += """</select>"""
         # month
         box += """
                 <select name="%(name)sm">
                   <option value=""%(sel)s>%(any)s</option>
                """ % {
                  'name' : name,
                  'any' : _("any month"),
                  'sel' : self.tmpl_is_selected(sm, 0)
                }
         for mm, month in [(1, _("January")), (2, _("February")), (3, _("March")), (4, _("April")), \
                           (5, _("May")), (6, _("June")), (7, _("July")), (8, _("August")), \
                           (9, _("September")), (10, _("October")), (11, _("November")), (12, _("December"))]:
             box += """<option value="%02d"%s>%s</option>""" % (mm, self.tmpl_is_selected(sm, mm), month)
         box += """</select>"""
         # year
         box += """
                 <select name="%(name)sy">
                   <option value=""%(sel)s>%(any)s</option>
                """ % {
                  'name' : name,
                  'any' : _("any year"),
                  'sel' : self.tmpl_is_selected(sy, 0)
                }
         this_year = int(time.strftime("%Y", time.localtime()))
         for year in range(this_year-20, this_year+1):
             box += """<option value="%d"%s>%d</option>""" % (year, self.tmpl_is_selected(sy, year), year)
         box += """</select>"""
         return box
 
     def tmpl_inputdatetype(self, dt='', ln=CFG_SITE_LANG):
         """
           Produces input date type selection box to choose
           added-or-modified date search option.
 
           Parameters:
 
             - 'dt' *string - date type (c=created, m=modified)
 
             - 'ln' *string* - the language to display
         """
         # load the right message language
         _ = gettext_set_language(ln)
 
         box = """<select name="dt">
                   <option value="">%(added)s </option>
                   <option value="m"%(sel)s>%(modified)s </option>
                  </select>
               """ % { 'added': _("Added since:"),
                       'modified': _("Modified since:"),
                       'sel': self.tmpl_is_selected(dt, 'm'),
                     }
         return box
 
     def tmpl_narrowsearch(self, aas, ln, type, father,
                           has_grandchildren, sons, display_grandsons,
                           grandsons):
 
         """
         Creates list of collection descendants of type *type* under title *title*.
         If aas==1, then links to Advanced Search interfaces; otherwise Simple Search.
         Suitable for 'Narrow search' and 'Focus on' boxes.
 
         Parameters:
 
           - 'aas' *bool* - Should we display an advanced search box?
 
           - 'ln' *string* - The language to display
 
           - 'type' *string* - The type of the produced box (virtual collections or normal collections)
 
           - 'father' *collection* - The current collection
 
           - 'has_grandchildren' *bool* - If the current collection has grand children
 
           - 'sons' *list* - The list of the sub-collections (first level)
 
           - 'display_grandsons' *bool* - If the grand children collections should be displayed (2 level deep display)
 
           - 'grandsons' *list* - The list of sub-collections (second level)
         """
 
         # load the right message language
         _ = gettext_set_language(ln)
 
         title = {'r': _("Narrow by collection:"),
                  'v': _("Focus on:")}[type]
 
 
         if has_grandchildren:
             style_prolog = "<strong>"
             style_epilog = "</strong>"
         else:
             style_prolog = ""
             style_epilog = ""
 
         out = """<table class="%(narrowsearchbox)s">
                    <thead>
                     <tr>
                      <th colspan="2" align="left" class="%(narrowsearchbox)sheader">
                       %(title)s
                      </th>
                     </tr>
                    </thead>
                    <tbody>""" % {'title' : title,
                                  'narrowsearchbox': {'r': 'narrowsearchbox',
                                                      'v': 'focusonsearchbox'}[type]}
         # iterate through sons:
         i = 0
         for son in sons:
             out += """<tr><td class="%(narrowsearchbox)sbody" valign="top">""" % \
                    { 'narrowsearchbox': {'r': 'narrowsearchbox',
                                          'v': 'focusonsearchbox'}[type]}
 
             if type == 'r':
                 if son.restricted_p() and son.restricted_p() != father.restricted_p():
                     out += """<input type="checkbox" name="c" value="%(name)s" /></td>""" % {'name' : cgi.escape(son.name) }
                 # hosted collections are checked by default only when configured so
                 elif str(son.dbquery).startswith("hostedcollection:"):
                     external_collection_engine = get_external_collection_engine(str(son.name))
                     if external_collection_engine and external_collection_engine.selected_by_default:
                         out += """<input type="checkbox" name="c" value="%(name)s" checked="checked" /></td>""" % {'name' : cgi.escape(son.name) }
                     elif external_collection_engine and not external_collection_engine.selected_by_default:
                         out += """<input type="checkbox" name="c" value="%(name)s" /></td>""" % {'name' : cgi.escape(son.name) }
                     else:
                         # strangely, the external collection engine was never found. In that case,
                         # why was the hosted collection here in the first place?
                         out += """<input type="checkbox" name="c" value="%(name)s" /></td>""" % {'name' : cgi.escape(son.name) }
                 else:
                     out += """<input type="checkbox" name="c" value="%(name)s" checked="checked" /></td>""" % {'name' : cgi.escape(son.name) }
             else:
                 out += '</td>'
             out += """<td valign="top">%(link)s%(recs)s """ % {
                 'link': create_html_link(self.build_search_interface_url(c=son.name, ln=ln, aas=aas),
                                          {}, style_prolog + cgi.escape(son.get_name(ln)) + style_epilog),
                 'recs' : self.tmpl_nbrecs_info(son.nbrecs, ln=ln)}
 
             # the following prints the "external collection" arrow just after the name and
             # number of records of the hosted collection
             # 1) we might want to make the arrow work as an anchor to the hosted collection as well.
             # That would probably require a new separate function under invenio.urlutils
             # 2) we might want to place the arrow between the name and the number of records of the hosted collection
             # That would require to edit/separate the above out += ...
             if type == 'r':
                 if str(son.dbquery).startswith("hostedcollection:"):
                     out += """<img src="%(siteurl)s/img/external-icon-light-8x8.gif" border="0" alt="%(name)s"/>""" % \
                            { 'siteurl' : CFG_SITE_URL, 'name' : cgi.escape(son.name), }
 
             if son.restricted_p():
                 out += """ <small class="warning">[%(msg)s]</small> """ % { 'msg' : _("restricted") }
             if display_grandsons and len(grandsons[i]):
                 # iterate trough grandsons:
                 out += """<br />"""
                 for grandson in grandsons[i]:
                     out += """ <small>%(link)s%(nbrec)s</small> """ % {
                         'link': create_html_link(self.build_search_interface_url(c=grandson.name, ln=ln, aas=aas),
                                                  {},
                                                  cgi.escape(grandson.get_name(ln))),
                         'nbrec' : self.tmpl_nbrecs_info(grandson.nbrecs, ln=ln)}
                     # the following prints the "external collection" arrow just after the name and
                     # number of records of the hosted collection
                     # Some relatives comments have been made just above
                     if type == 'r':
                         if str(grandson.dbquery).startswith("hostedcollection:"):
                             out += """<img src="%(siteurl)s/img/external-icon-light-8x8.gif" border="0" alt="%(name)s"/>""" % \
                                     { 'siteurl' : CFG_SITE_URL, 'name' : cgi.escape(grandson.name), }
 
             out += """</td></tr>"""
             i += 1
         out += "</tbody></table>"
 
         return out
 
     def tmpl_searchalso(self, ln, engines_list, collection_id):
         _ = gettext_set_language(ln)
 
         box_name = _("Search also:")
 
         html = """<table cellspacing="0" cellpadding="0" border="0">
             <tr><td valign="top"><table class="searchalsosearchbox">
             <thead><tr><th colspan="2" align="left" class="searchalsosearchboxheader">%(box_name)s
             </th></tr></thead><tbody>
         """ % locals()
 
         for engine in engines_list:
             internal_name = engine.name
             name = _(internal_name)
             base_url = engine.base_url
             if external_collection_get_state(engine, collection_id) == 3:
                 checked = ' checked="checked"'
             else:
                 checked = ''
 
             html += """<tr><td class="searchalsosearchboxbody" valign="top">
                 <input type="checkbox" name="ec" id="%(id)s" value="%(internal_name)s" %(checked)s /></td>
                 <td valign="top" class="searchalsosearchboxbody">
                 <div style="white-space: nowrap"><label for="%(id)s">%(name)s</label>
                 <a href="%(base_url)s">
                 <img src="%(siteurl)s/img/external-icon-light-8x8.gif" border="0" alt="%(name)s"/></a>
                 </div></td></tr>""" % \
                                  { 'checked': checked,
                                    'base_url': base_url,
                                    'internal_name': internal_name,
                                    'name': cgi.escape(name),
                                    'id': "extSearch" + nmtoken_from_string(name),
                                    'siteurl': CFG_SITE_URL,}
 
         html += """</tbody></table></td></tr></table>"""
         return html
 
     def tmpl_nbrecs_info(self, number, prolog=None, epilog=None, ln=CFG_SITE_LANG):
         """
         Return information on the number of records.
 
         Parameters:
 
         - 'number' *string* - The number of records
 
         - 'prolog' *string* (optional) - An HTML code to prefix the number (if **None**, will be
         '<small class="nbdoccoll">(')
 
         - 'epilog' *string* (optional) - An HTML code to append to the number (if **None**, will be
         ')</small>')
         """
 
         if number is None:
             number = 0
         if prolog is None:
             prolog = '''&nbsp;<small class="nbdoccoll">('''
         if epilog is None:
             epilog = ''')</small>'''
 
         return prolog + self.tmpl_nice_number(number, ln) + epilog
 
     def tmpl_box_restricted_content(self, ln):
         """
           Displays a box containing a *restricted content* message
 
         Parameters:
 
           - 'ln' *string* - The language to display
 
         """
 
         # load the right message language
         _ = gettext_set_language(ln)
 
         return _("This collection is restricted.  If you are authorized to access it, please click on the Search button.")
 
     def tmpl_box_hosted_collection(self, ln):
         """
           Displays a box containing a *hosted collection* message
 
         Parameters:
 
           - 'ln' *string* - The language to display
 
         """
 
         # load the right message language
         _ = gettext_set_language(ln)
 
         return _("This is a hosted external collection. Please click on the Search button to see its latest additions.")
 
     def tmpl_box_no_records(self, ln):
         """
           Displays a box containing a *no content* message
 
         Parameters:
 
           - 'ln' *string* - The language to display
 
         """
 
         # load the right message language
         _ = gettext_set_language(ln)
 
         return _("This collection does not contain any document yet.")
 
 
     def tmpl_instant_browse(self, aas, ln, recids, more_link = None):
         """
           Formats a list of records (given in the recids list) from the database.
 
         Parameters:
 
           - 'aas' *int* - Advanced Search interface or not (0 or 1)
 
           - 'ln' *string* - The language to display
 
           - 'recids' *list* - the list of records from the database
 
           - 'more_link' *string* - the "More..." link for the record. If not given, will not be displayed
 
         """
 
         # load the right message language
         _ = gettext_set_language(ln)
 
         body = '''<table class="latestadditionsbox">'''
         for recid in recids:
             body += '''
             <tr>
               <td class="latestadditionsboxtimebody">%(date)s</td>
               <td class="latestadditionsboxrecordbody">
                 <abbr class="unapi-id" title="%(recid)s"></abbr>
                 %(body)s
               </td>
             </tr>''' % {
                         'recid': recid['id'],
                         'date': recid['date'],
                         'body': recid['body']
                       }
         body += "</table>"
         if more_link:
             body += '<div align="right"><small>' + \
                     create_html_link(more_link, {}, '[&gt;&gt; %s]' % _("more")) + \
                     '</small></div>'
 
         return '''
         <table class="narrowsearchbox">
           <thead>
             <tr>
               <th class="narrowsearchboxheader">%(header)s</th>
             </tr>
           </thead>
           <tbody>
             <tr>
             <td class="narrowsearchboxbody">%(body)s</td>
             </tr>
           </tbody>
         </table>''' % {'header' : _("Latest additions:"),
                        'body' : body,
                        }
 
 
     def tmpl_searchwithin_select(self, ln, fieldname, selected, values):
         """
           Produces 'search within' selection box for the current collection.
 
         Parameters:
 
           - 'ln' *string* - The language to display
 
           - 'fieldname' *string* - the name of the select box produced
 
           - 'selected' *string* - which of the values is selected
 
           - 'values' *list* - the list of values in the select
         """
 
         out = '<select name="%(fieldname)s">' % {'fieldname': fieldname}
 
         if values:
             for pair in values:
                 out += """<option value="%(value)s"%(selected)s>%(text)s</option>""" % {
                          'value'    : cgi.escape(pair['value']),
                          'selected' : self.tmpl_is_selected(pair['value'], selected),
                          'text'     : cgi.escape(pair['text'])
                        }
         out += """</select>"""
         return out
 
     def tmpl_select(self, fieldname, values, selected=None, css_class=''):
         """
           Produces a generic select box
 
         Parameters:
 
           - 'css_class' *string* - optional, a css class to display this select with
 
           - 'fieldname' *list* - the name of the select box produced
 
           - 'selected' *string* - which of the values is selected
 
           - 'values' *list* - the list of values in the select
         """
         if css_class != '':
             class_field = ' class="%s"' % css_class
         else:
             class_field = ''
         out = '<select name="%(fieldname)s"%(class)s>' % {
             'fieldname' : fieldname,
             'class' : class_field
             }
 
         for pair in values:
             if pair.get('selected', False) or pair['value'] == selected:
                 flag = ' selected="selected"'
             else:
                 flag = ''
 
             out += '<option value="%(value)s"%(selected)s>%(text)s</option>' % {
                      'value'    : cgi.escape(str(pair['value'])),
                      'selected' : flag,
                      'text'     : cgi.escape(pair['text'])
                    }
 
         out += """</select>"""
         return out
 
     def tmpl_record_links(self, recid, ln):
         """
           Displays the *More info* and *Find similar* links for a record
 
         Parameters:
 
           - 'ln' *string* - The language to display
 
           - 'recid' *string* - the id of the displayed record
         """
 
         # load the right message language
         _ = gettext_set_language(ln)
 
         out = '''<br /><span class="moreinfo">%(detailed)s - %(similar)s</span>''' % {
             'detailed': create_html_link(self.build_search_url(recid=recid, ln=ln),
                                          {},
                                          _("Detailed record"), {'class': "moreinfo"}),
             'similar': create_html_link(self.build_search_url(p="recid:%d" % recid, rm='wrd', ln=ln),
                                         {},
                                         _("Similar records"),
                                         {'class': "moreinfo"})}
 
         if CFG_BIBRANK_SHOW_CITATION_LINKS:
             num_timescited = get_cited_by_count(recid)
             if num_timescited:
                 out += '''<span class="moreinfo"> - %s </span>''' % \
                        create_html_link(self.build_search_url(p='recid:%d' % recid, rm='citation', ln=ln),
                                         {}, _("Cited by %i records") % num_timescited, {'class': "moreinfo"})
 
         return out
 
     def tmpl_record_body(self, titles, authors, dates, rns, abstracts, urls_u, urls_z, ln):
         """
           Displays the "HTML basic" format of a record
 
         Parameters:
 
           - 'authors' *list* - the authors (as strings)
 
           - 'dates' *list* - the dates of publication
 
           - 'rns' *list* - the quicknotes for the record
 
           - 'abstracts' *list* - the abstracts for the record
 
           - 'urls_u' *list* - URLs to the original versions of the record
 
           - 'urls_z' *list* - Not used
         """
         out = ""
         for title in titles:
             out += "<strong>%(title)s</strong> " % {
                      'title' : cgi.escape(title)
                    }
         if authors:
             out += " / "
             for author in authors[:CFG_WEBSEARCH_AUTHOR_ET_AL_THRESHOLD]:
                 out += '%s; ' % \
                        create_html_link(self.build_search_url(p=author, f='author', ln=ln),
                                         {}, cgi.escape(author))
 
             if len(authors) > CFG_WEBSEARCH_AUTHOR_ET_AL_THRESHOLD:
                 out += "<em>et al</em>"
         for date in dates:
             out += " %s." % cgi.escape(date)
         for rn in rns:
             out += """ <small class="quicknote">[%(rn)s]</small>""" % {'rn' : cgi.escape(rn)}
         for abstract in abstracts:
             out += "<br /><small>%(abstract)s [...]</small>" % {'abstract' : cgi.escape(abstract[:1+string.find(abstract, '.')]) }
         for idx in range(0, len(urls_u)):
             out += """<br /><small class="note"><a class="note" href="%(url)s">%(name)s</a></small>""" % {
                      'url' : urls_u[idx],
                      'name' : urls_u[idx]
                    }
         return out
 
     def tmpl_search_in_bibwords(self, p, f, ln, nearest_box):
         """
           Displays the *Words like current ones* links for a search
 
         Parameters:
 
           - 'p' *string* - Current search words
 
           - 'f' *string* - the fields in which the search was done
 
           - 'nearest_box' *string* - the HTML code for the "nearest_terms" box - most probably from a create_nearest_terms_box call
         """
 
         # load the right message language
         _ = gettext_set_language(ln)
         out = '<p>'
         if f:
             out += _("Words nearest to %(x_word)s inside %(x_field)s in any collection are:") % {'x_word': '<em>' + cgi.escape(p) + '</em>',
                                                                                                  'x_field': '<em>' + cgi.escape(f) + '</em>'}
         else:
             out += _("Words nearest to %(x_word)s in any collection are:") % {'x_word': '<em>' + cgi.escape(p) + '</em>'}
         out += '<br />' + nearest_box + '</p>'
         return out
 
     def tmpl_nearest_term_box(self, p, ln, f, terminfo, intro):
         """
           Displays the *Nearest search terms* box
 
         Parameters:
 
           - 'p' *string* - Current search words
 
           - 'f' *string* - a collection description (if the search has been completed in a collection)
 
           - 'ln' *string* - The language to display
 
           - 'terminfo': tuple (term, hits, argd) for each near term
 
           - 'intro' *string* - the intro HTML to prefix the box with
         """
 
         out = '''<table class="nearesttermsbox" cellpadding="0" cellspacing="0" border="0">'''
 
         for term, hits, argd in terminfo:
 
             if hits:
                 hitsinfo = str(hits)
             else:
                 hitsinfo = '-'
 
             term = cgi.escape(term)
 
             if term == p: # print search word for orientation:
                 nearesttermsboxbody_class = "nearesttermsboxbodyselected"
                 if hits > 0:
                     term = create_html_link(self.build_search_url(argd), {},
                                             term, {'class': "nearesttermsselected"})
             else:
                 nearesttermsboxbody_class = "nearesttermsboxbody"
                 term = create_html_link(self.build_search_url(argd), {},
                                         term, {'class': "nearestterms"})
 
             out += '''\
             <tr>
               <td class="%(nearesttermsboxbody_class)s" align="right">%(hits)s</td>
               <td class="%(nearesttermsboxbody_class)s" width="15">&nbsp;</td>
               <td class="%(nearesttermsboxbody_class)s" align="left">%(term)s</td>
             </tr>
             ''' % {'hits': hitsinfo,
                    'nearesttermsboxbody_class': nearesttermsboxbody_class,
                    'term': term}
 
         out += "</table>"
         return intro + "<blockquote>" + out + "</blockquote>"
 
     def tmpl_browse_pattern(self, f, fn, ln, browsed_phrases_in_colls, colls, rg):
         """
           Displays the *Nearest search terms* box
 
         Parameters:
 
           - 'f' *string* - field (*not* i18nized)
 
           - 'fn' *string* - field name (i18nized)
 
           - 'ln' *string* - The language to display
 
           - 'browsed_phrases_in_colls' *array* - the phrases to display
 
           - 'colls' *array* - the list of collection parameters of the search (c's)
 
           - 'rg' *int* - the number of records
         """
 
         # load the right message language
         _ = gettext_set_language(ln)
 
         out = """<table class="searchresultsbox">
               <thead>
                <tr>
                 <th class="searchresultsboxheader" style="text-align: right;" width="15">
                   %(hits)s
                 </th>
                 <th class="searchresultsboxheader" width="15">
                   &nbsp;
                 </th>
                 <th class="searchresultsboxheader" style="text-align: left;">
                   %(fn)s
                 </th>
                </tr>
               </thead>
               <tbody>""" % {
                 'hits' : _("Hits"),
                 'fn' : cgi.escape(fn)
               }
 
         if len(browsed_phrases_in_colls) == 1:
             # one hit only found:
             phrase, nbhits = browsed_phrases_in_colls[0][0], browsed_phrases_in_colls[0][1]
 
             query = {'c': colls,
                      'ln': ln,
                      'p': '"%s"' % phrase.replace('"', '\\"'),
                      'f': f,
                      'rg' : rg}
 
             out += """<tr>
                        <td class="searchresultsboxbody" style="text-align: right;">
                         %(nbhits)s
                        </td>
                        <td class="searchresultsboxbody" width="15">
                         &nbsp;
                        </td>
                        <td class="searchresultsboxbody" style="text-align: left;">
                         %(link)s
                        </td>
                       </tr>""" % {'nbhits': nbhits,
                                   'link': create_html_link(self.build_search_url(query),
                                                            {}, cgi.escape(phrase))}
 
         elif len(browsed_phrases_in_colls) > 1:
             # first display what was found but the last one:
             for phrase, nbhits in browsed_phrases_in_colls[:-1]:
                 query = {'c': colls,
                          'ln': ln,
                          'p': '"%s"' % phrase.replace('"', '\\"'),
                          'f': f,
                          'rg' : rg}
 
                 out += """<tr>
                            <td class="searchresultsboxbody" style="text-align: right;">
                             %(nbhits)s
                            </td>
                            <td class="searchresultsboxbody" width="15">
                             &nbsp;
                            </td>
                            <td class="searchresultsboxbody" style="text-align: left;">
                             %(link)s
                            </td>
                           </tr>""" % {'nbhits' : nbhits,
                                       'link': create_html_link(self.build_search_url(query),
                                                                {},
                                                                cgi.escape(phrase))}
 
             # now display last hit as "previous term":
             phrase, nbhits = browsed_phrases_in_colls[0]
             query_previous = {'c': colls,
                      'ln': ln,
                      'p': '"%s"' % phrase.replace('"', '\\"'),
                      'f': f,
                      'rg' : rg}
 
             # now display last hit as "next term":
             phrase, nbhits = browsed_phrases_in_colls[-1]
             query_next = {'c': colls,
                      'ln': ln,
                      'p': '"%s"' % phrase.replace('"', '\\"'),
                      'f': f,
                      'rg' : rg}
 
             out += """<tr><td colspan="2" class="normal">
                             &nbsp;
                           </td>
                           <td class="normal">
                             %(link_previous)s
                             <img src="%(siteurl)s/img/sp.gif" alt="" border="0" />
                             <img src="%(siteurl)s/img/sn.gif" alt="" border="0" />
                             %(link_next)s
                           </td>
                       </tr>""" % {'link_previous': create_html_link(self.build_search_url(query_previous, action='browse'), {}, _("Previous")),
                       'link_next': create_html_link(self.build_search_url(query_next, action='browse'),
                                                            {}, _("next")),
                                   'siteurl' : CFG_SITE_URL}
         out += """</tbody>
             </table>"""
         return out
 
     def tmpl_search_box(self, ln, aas, cc, cc_intl, ot, sp,
                         action, fieldslist, f1, f2, f3, m1, m2, m3,
                         p1, p2, p3, op1, op2, rm, p, f, coll_selects,
                         d1y, d2y, d1m, d2m, d1d, d2d, dt, sort_fields,
                         sf, so, ranks, sc, rg, formats, of, pl, jrec, ec,
                         show_colls=True, show_title=True):
 
         """
           Displays the *Nearest search terms* box
 
         Parameters:
 
           - 'ln' *string* - The language to display
 
           - 'aas' *bool* - Should we display an advanced search box? -1 -> 1, from simpler to more advanced
 
           - 'cc_intl' *string* - the i18nized current collection name, used for display
 
           - 'cc' *string* - the internal current collection name
 
           - 'ot', 'sp' *string* - hidden values
 
           - 'action' *string* - the action demanded by the user
 
           - 'fieldslist' *list* - the list of all fields available, for use in select within boxes in advanced search
 
           - 'p, f, f1, f2, f3, m1, m2, m3, p1, p2, p3, op1, op2, op3, rm' *strings* - the search parameters
 
           - 'coll_selects' *array* - a list of lists, each containing the collections selects to display
 
           - 'd1y, d2y, d1m, d2m, d1d, d2d' *int* - the search between dates
 
           - 'dt' *string* - the dates' types (creation dates, modification dates)
 
           - 'sort_fields' *array* - the select information for the sort fields
 
           - 'sf' *string* - the currently selected sort field
 
           - 'so' *string* - the currently selected sort order ("a" or "d")
 
           - 'ranks' *array* - ranking methods
 
           - 'rm' *string* - selected ranking method
 
           - 'sc' *string* - split by collection or not
 
           - 'rg' *string* - selected results/page
 
           - 'formats' *array* - available output formats
 
           - 'of' *string* - the selected output format
 
           - 'pl' *string* - `limit to' search pattern
 
           - show_colls *bool* - propose coll selection box?
 
           - show_title *bool* show cc_intl in page title?
         """
 
         # load the right message language
         _ = gettext_set_language(ln)
 
 
         # These are hidden fields the user does not manipulate
         # directly
         if aas == -1:
             argd = drop_default_urlargd({
                 'ln': ln, 'aas': aas,
                 'ot': ot, 'sp': sp, 'ec': ec,
                 }, self.search_results_default_urlargd)
         else:
             argd = drop_default_urlargd({
                 'cc': cc, 'ln': ln, 'aas': aas,
                 'ot': ot, 'sp': sp, 'ec': ec,
                 }, self.search_results_default_urlargd)
 
         out = ""
         if show_title:
             # display cc name if asked for
             out += '''
             <h1 class="headline">%(ccname)s</h1>''' % {'ccname' : cgi.escape(cc_intl),}
 
         out += '''
         <form name="search" action="%(siteurl)s/search" method="get">
         ''' % {'siteurl' : CFG_SITE_URL}
 
         # Only add non-default hidden values
         for field, value in argd.items():
             out += self.tmpl_input_hidden(field, value)
 
         leadingtext = _("Search")
 
         if action == 'browse':
             leadingtext = _("Browse")
 
         if aas == 1:
             # print Advanced Search form:
 
             # define search box elements:
             out += '''
             <table class="searchbox advancedsearch">
              <thead>
               <tr>
                <th colspan="3" class="searchboxheader">
                 %(leading)s:
                </th>
               </tr>
              </thead>
              <tbody>
               <tr valign="top" style="white-space:nowrap;">
                 <td class="searchboxbody">%(matchbox1)s
                   <input type="text" name="p1" size="%(sizepattern)d" value="%(p1)s" class="advancedsearchfield"/>
                 </td>
                 <td class="searchboxbody">%(searchwithin1)s</td>
                 <td class="searchboxbody">%(andornot1)s</td>
               </tr>
               <tr valign="top">
                 <td class="searchboxbody">%(matchbox2)s
                   <input type="text" name="p2" size="%(sizepattern)d" value="%(p2)s" class="advancedsearchfield"/>
                 </td>
                 <td class="searchboxbody">%(searchwithin2)s</td>
                 <td class="searchboxbody">%(andornot2)s</td>
               </tr>
               <tr valign="top">
                 <td class="searchboxbody">%(matchbox3)s
                   <input type="text" name="p3" size="%(sizepattern)d" value="%(p3)s" class="advancedsearchfield"/>
                 </td>
                 <td class="searchboxbody">%(searchwithin3)s</td>
                 <td class="searchboxbody"  style="white-space:nowrap;">
                   <input class="formbutton" type="submit" name="action_search" value="%(search)s" />
                   <input class="formbutton" type="submit" name="action_browse" value="%(browse)s" />&nbsp;
                 </td>
               </tr>
               <tr valign="bottom">
                 <td colspan="3" align="right" class="searchboxbody">
                   <small>
                     <a href="%(siteurl)s/help/search-tips%(langlink)s">%(search_tips)s</a> ::
                     %(simple_search)s
                   </small>
                 </td>
               </tr>
              </tbody>
             </table>
             ''' % {
                 'simple_search': create_html_link(self.build_search_url(p=p1, f=f1, rm=rm, cc=cc, ln=ln, jrec=jrec, rg=rg),
                                                   {}, _("Simple Search")),
 
                 'leading' : leadingtext,
                 'sizepattern' : CFG_WEBSEARCH_ADVANCEDSEARCH_PATTERN_BOX_WIDTH,
                 'matchbox1' : self.tmpl_matchtype_box('m1', m1, ln=ln),
                 'p1' : cgi.escape(p1,1),
                 'searchwithin1' : self.tmpl_searchwithin_select(
                                   ln = ln,
                                   fieldname = 'f1',
                                   selected = f1,
                                   values = self._add_mark_to_field(value=f1, fields=fieldslist, ln=ln)
                                 ),
               'andornot1' : self.tmpl_andornot_box(
                                   name = 'op1',
                                   value = op1,
                                   ln = ln
                                 ),
               'matchbox2' : self.tmpl_matchtype_box('m2', m2, ln=ln),
               'p2' : cgi.escape(p2,1),
               'searchwithin2' : self.tmpl_searchwithin_select(
                                   ln = ln,
                                   fieldname = 'f2',
                                   selected = f2,
                                   values = self._add_mark_to_field(value=f2, fields=fieldslist, ln=ln)
                                 ),
               'andornot2' : self.tmpl_andornot_box(
                                   name = 'op2',
                                   value = op2,
                                   ln = ln
                                 ),
               'matchbox3' : self.tmpl_matchtype_box('m3', m3, ln=ln),
               'p3' : cgi.escape(p3,1),
               'searchwithin3' : self.tmpl_searchwithin_select(
                                   ln = ln,
                                   fieldname = 'f3',
                                   selected = f3,
                                   values = self._add_mark_to_field(value=f3, fields=fieldslist, ln=ln)
                                 ),
               'search' : _("Search"),
               'browse' : _("Browse"),
               'siteurl' : CFG_SITE_URL,
               'ln' : ln,
               'langlink': ln != CFG_SITE_LANG and '?ln=' + ln or '',
               'search_tips': _("Search Tips")
             }
         elif aas == 0:
             # print Simple Search form:
             out += '''
             <table class="searchbox simplesearch">
              <thead>
               <tr>
                <th colspan="3" class="searchboxheader">
                 %(leading)s:
                </th>
               </tr>
              </thead>
              <tbody>
               <tr valign="top">
                 <td class="searchboxbody"><input type="text" name="p" size="%(sizepattern)d" value="%(p)s" class="simplesearchfield"/></td>
                 <td class="searchboxbody">%(searchwithin)s</td>
                 <td class="searchboxbody">
                   <input class="formbutton" type="submit" name="action_search" value="%(search)s" />
                   <input class="formbutton" type="submit" name="action_browse" value="%(browse)s" />&nbsp;
                 </td>
               </tr>
               <tr valign="bottom">
                 <td colspan="3" align="right" class="searchboxbody">
                   <small>
                     <a href="%(siteurl)s/help/search-tips%(langlink)s">%(search_tips)s</a> ::
                     %(advanced_search)s
                   </small>
                 </td>
               </tr>
              </tbody>
             </table>
             ''' % {
               'advanced_search': create_html_link(self.build_search_url(p1=p,
                                                                         f1=f,
                                                                         rm=rm,
                                                                         aas=max(CFG_WEBSEARCH_ENABLED_SEARCH_INTERFACES),
                                                                         cc=cc,
                                                                         jrec=jrec,
                                                                         ln=ln,
                                                                         rg=rg),
                                                   {}, _("Advanced Search")),
 
               'leading' : leadingtext,
               'sizepattern' : CFG_WEBSEARCH_SIMPLESEARCH_PATTERN_BOX_WIDTH,
               'p' : cgi.escape(p, 1),
               'searchwithin' : self.tmpl_searchwithin_select(
                                   ln = ln,
                                   fieldname = 'f',
                                   selected = f,
                                   values = self._add_mark_to_field(value=f, fields=fieldslist, ln=ln)
                                 ),
               'search' : _("Search"),
               'browse' : _("Browse"),
               'siteurl' : CFG_SITE_URL,
               'ln' : ln,
               'langlink': ln != CFG_SITE_LANG and '?ln=' + ln or '',
               'search_tips': _("Search Tips")
             }
         else:
             # EXPERIMENTAL
             # print light search form:
             search_in = ''
             if cc_intl != CFG_SITE_NAME_INTL.get(ln, CFG_SITE_NAME):
                 search_in = '''
             <input type="radio" name="cc" value="%(collection_id)s" id="searchCollection" checked="checked"/>
             <label for="searchCollection">%(search_in_collection_name)s</label>
             <input type="radio" name="cc" value="%(root_collection_name)s" id="searchEverywhere" />
             <label for="searchEverywhere">%(search_everywhere)s</label>
             ''' % {'search_in_collection_name': _("Search in %(x_collection_name)s") % \
                   {'x_collection_name': cgi.escape(cc_intl)},
                   'collection_id': cc,
                   'root_collection_name': CFG_SITE_NAME,
                   'search_everywhere': _("Search everywhere")}
             out += '''
             <table class="searchbox lightsearch">
               <tr valign="top">
                 <td class="searchboxbody"><input type="text" name="p" size="%(sizepattern)d" value="%(p)s" class="lightsearchfield"/></td>
                 <td class="searchboxbody">
                   <input class="formbutton" type="submit" name="action_search" value="%(search)s" />
                 </td>
                 <td class="searchboxbody" align="left" rowspan="2" valign="top">
                   <small><small>
                   <a href="%(siteurl)s/help/search-tips%(langlink)s">%(search_tips)s</a><br/>
                   %(advanced_search)s
                 </td>
               </tr>
             </table>
             <small>%(search_in)s</small>
             ''' % {
               'sizepattern' : CFG_WEBSEARCH_LIGHTSEARCH_PATTERN_BOX_WIDTH,
               'advanced_search': create_html_link(self.build_search_url(p1=p,
                                                                         f1=f,
                                                                         rm=rm,
                                                                         aas=max(CFG_WEBSEARCH_ENABLED_SEARCH_INTERFACES),
                                                                         cc=cc,
                                                                         jrec=jrec,
                                                                         ln=ln,
                                                                         rg=rg),
                                                   {}, _("Advanced Search")),
 
               'leading' : leadingtext,
               'p' : cgi.escape(p, 1),
               'searchwithin' : self.tmpl_searchwithin_select(
                                   ln = ln,
                                   fieldname = 'f',
                                   selected = f,
                                   values = self._add_mark_to_field(value=f, fields=fieldslist, ln=ln)
                                 ),
               'search' : _("Search"),
               'browse' : _("Browse"),
               'siteurl' : CFG_SITE_URL,
               'ln' : ln,
               'langlink': ln != CFG_SITE_LANG and '?ln=' + ln or '',
               'search_tips': _("Search Tips"),
               'search_in': search_in
             }
         ## secondly, print Collection(s) box:
 
         if show_colls and aas > -1:
             # display collections only if there is more than one
             selects = ''
             for sel in coll_selects:
                 selects += self.tmpl_select(fieldname='c', values=sel)
 
             out += """
                 <table class="searchbox">
                  <thead>
                   <tr>
                    <th colspan="3" class="searchboxheader">
                     %(leading)s %(msg_coll)s:
                    </th>
                   </tr>
                  </thead>
                  <tbody>
                   <tr valign="bottom">
                    <td valign="top" class="searchboxbody">
                      %(colls)s
                    </td>
                   </tr>
                  </tbody>
                 </table>
                  """ % {
                    'leading' : leadingtext,
                    'msg_coll' : _("collections"),
                    'colls' : selects,
                  }
 
         ## thirdly, print search limits, if applicable:
         if action != _("Browse") and pl:
             out += """<table class="searchbox">
                        <thead>
                         <tr>
                           <th class="searchboxheader">
                             %(limitto)s
                           </th>
                         </tr>
                        </thead>
                        <tbody>
                         <tr valign="bottom">
                           <td class="searchboxbody">
                            <input type="text" name="pl" size="%(sizepattern)d" value="%(pl)s" />
                           </td>
                         </tr>
                        </tbody>
                       </table>""" % {
                         'limitto' : _("Limit to:"),
                         'sizepattern' : CFG_WEBSEARCH_ADVANCEDSEARCH_PATTERN_BOX_WIDTH,
                         'pl' : cgi.escape(pl, 1),
                       }
 
         ## fourthly, print from/until date boxen, if applicable:
         if action == _("Browse") or (d1y==0 and d1m==0 and d1d==0 and d2y==0 and d2m==0 and d2d==0):
             pass # do not need it
         else:
             cell_6_a = self.tmpl_inputdatetype(dt, ln) + self.tmpl_inputdate("d1", ln, d1y, d1m, d1d)
             cell_6_b = self.tmpl_inputdate("d2", ln, d2y, d2m, d2d)
             out += """<table class="searchbox">
                        <thead>
                         <tr>
                           <th class="searchboxheader">
                             %(added)s
                           </th>
                           <th class="searchboxheader">
                             %(until)s
                           </th>
                         </tr>
                        </thead>
                        <tbody>
                         <tr valign="bottom">
                           <td class="searchboxbody">%(added_or_modified)s %(date1)s</td>
                           <td class="searchboxbody">%(date2)s</td>
                         </tr>
                        </tbody>
                       </table>""" % {
                         'added' : _("Added/modified since:"),
                         'until' : _("until:"),
                         'added_or_modified': self.tmpl_inputdatetype(dt, ln),
                         'date1' : self.tmpl_inputdate("d1", ln, d1y, d1m, d1d),
                         'date2' : self.tmpl_inputdate("d2", ln, d2y, d2m, d2d),
                       }
 
         ## fifthly, print Display results box, including sort/rank, formats, etc:
         if action != _("Browse") and aas > -1:
 
             rgs = []
             for i in [10, 25, 50, 100, 250, 500]:
                 if i <= CFG_WEBSEARCH_MAX_RECORDS_IN_GROUPS:
                     rgs.append({ 'value' : i, 'text' : "%d %s" % (i, _("results"))})
             # enrich sort fields list if we are sorting by some MARC tag:
             sort_fields = self._add_mark_to_field(value=sf, fields=sort_fields, ln=ln)
             # create sort by HTML box:
             out += """<table class="searchbox">
                  <thead>
                   <tr>
                    <th class="searchboxheader">
                     %(sort_by)s
                    </th>
                    <th class="searchboxheader">
                     %(display_res)s
                    </th>
                    <th class="searchboxheader">
                     %(out_format)s
                    </th>
                   </tr>
                  </thead>
                  <tbody>
                   <tr valign="bottom">
                    <td valign="top" class="searchboxbody">
                      %(select_sf)s %(select_so)s %(select_rm)s
                    </td>
                    <td valign="top" class="searchboxbody">
                      %(select_rg)s %(select_sc)s
                    </td>
                    <td valign="top" class="searchboxbody">%(select_of)s</td>
                   </tr>
                  </tbody>
                 </table>""" % {
                   'sort_by' : _("Sort by:"),
                   'display_res' : _("Display results:"),
                   'out_format' : _("Output format:"),
                   'select_sf' : self.tmpl_select(fieldname = 'sf', values = sort_fields, selected = sf, css_class = 'address'),
                   'select_so' : self.tmpl_select(fieldname = 'so', values = [{
                                     'value' : 'a',
                                     'text' : _("asc.")
                                   }, {
                                     'value' : 'd',
                                     'text' : _("desc.")
                                   }], selected = so, css_class = 'address'),
                   'select_rm' : self.tmpl_select(fieldname = 'rm', values = ranks, selected = rm, css_class = 'address'),
                   'select_rg' : self.tmpl_select(fieldname = 'rg', values = rgs, selected = rg, css_class = 'address'),
                   'select_sc' : self.tmpl_select(fieldname = 'sc', values = [{
                                     'value' : 0,
                                     'text' : _("single list")
                                   }, {
                                     'value' : 1,
                                     'text' : _("split by collection")
                                   }], selected = sc, css_class = 'address'),
                   'select_of' : self.tmpl_select(
                                   fieldname = 'of',
                                   selected = of,
                                   values = self._add_mark_to_field(value=of, fields=formats, chars=3, ln=ln),
                                   css_class = 'address'),
                 }
 
         ## last but not least, print end of search box:
         out += """</form>"""
         return out
 
     def tmpl_input_hidden(self, name, value):
         "Produces the HTML code for a hidden field "
         if isinstance(value, list):
             list_input = [self.tmpl_input_hidden(name, val) for val in value]
             return "\n".join(list_input)
 
         # # Treat `as', `aas' arguments specially:
         if name == 'aas':
             name = 'as'
 
         return """<input type="hidden" name="%(name)s" value="%(value)s" />""" % {
                  'name' : cgi.escape(str(name), 1),
                  'value' : cgi.escape(str(value), 1),
                }
 
     def _add_mark_to_field(self, value, fields, ln, chars=1):
         """Adds the current value as a MARC tag in the fields array
         Useful for advanced search"""
 
         # load the right message language
         _ = gettext_set_language(ln)
 
         out = fields
         if value and str(value[0:chars]).isdigit():
             out.append({'value' : value,
                         'text' : str(value) + " " + _("MARC tag")
                         })
         return out
 
     def tmpl_search_pagestart(self, ln) :
         "page start for search page. Will display after the page header"
         return """<div class="pagebody"><div class="pagebodystripemiddle">"""
 
     def tmpl_search_pageend(self, ln) :
         "page end for search page. Will display just before the page footer"
         return """</div></div>"""
 
     def tmpl_print_warning(self, msg, type, prologue, epilogue):
         """Prints warning message and flushes output.
 
         Parameters:
 
           - 'msg' *string* - The message string
 
           - 'type' *string* - the warning type
 
           - 'prologue' *string* - HTML code to display before the warning
 
           - 'epilogue' *string* - HTML code to display after the warning
         """
 
         out = '\n%s<span class="quicknote">' % (prologue)
         if type:
             out += '%s: ' % type
         out += '%s</span>%s' % (msg, epilogue)
         return out
 
     def tmpl_print_search_info(self, ln, middle_only,
                                collection, collection_name, collection_id,
                                aas, sf, so, rm, rg, nb_found, of, ot, p, f, f1,
                                f2, f3, m1, m2, m3, op1, op2, p1, p2,
                                p3, d1y, d1m, d1d, d2y, d2m, d2d, dt,
                                all_fieldcodes, cpu_time, pl_in_url,
                                jrec, sc, sp):
 
         """Prints stripe with the information on 'collection' and 'nb_found' results and CPU time.
            Also, prints navigation links (beg/next/prev/end) inside the results set.
            If middle_only is set to 1, it will only print the middle box information (beg/netx/prev/end/etc) links.
            This is suitable for displaying navigation links at the bottom of the search results page.
 
         Parameters:
 
           - 'ln' *string* - The language to display
 
           - 'middle_only' *bool* - Only display parts of the interface
 
           - 'collection' *string* - the collection name
 
           - 'collection_name' *string* - the i18nized current collection name
 
           - 'aas' *bool* - if we display the advanced search interface
 
           - 'sf' *string* - the currently selected sort format
 
           - 'so' *string* - the currently selected sort order ("a" or "d")
 
           - 'rm' *string* - selected ranking method
 
           - 'rg' *int* - selected results/page
 
           - 'nb_found' *int* - number of results found
 
           - 'of' *string* - the selected output format
 
           - 'ot' *string* - hidden values
 
           - 'p' *string* - Current search words
 
           - 'f' *string* - the fields in which the search was done
 
           - 'f1, f2, f3, m1, m2, m3, p1, p2, p3, op1, op2' *strings* - the search parameters
 
           - 'jrec' *int* - number of first record on this page
 
           - 'd1y, d2y, d1m, d2m, d1d, d2d' *int* - the search between dates
 
           - 'dt' *string* the dates' type (creation date, modification date)
 
           - 'all_fieldcodes' *array* - all the available fields
 
           - 'cpu_time' *float* - the time of the query in seconds
 
         """
 
         # load the right message language
         _ = gettext_set_language(ln)
 
         out = ""
         # left table cells: print collection name
         if not middle_only:
             out += '''
                   <a name="%(collection_id)s"></a>
                   <form action="%(siteurl)s/search" method="get">
                   <table class="searchresultsbox"><tr><td class="searchresultsboxheader" align="left">
                   <strong><big>%(collection_link)s</big></strong></td>
                   ''' % {
                     'collection_id': collection_id,
                     'siteurl' : CFG_SITE_URL,
                     'collection_link': create_html_link(self.build_search_interface_url(c=collection, aas=aas, ln=ln),
                                                         {}, cgi.escape(collection_name))
                   }
         else:
             out += """
                   <form action="%(siteurl)s/search" method="get"><div align="center">
                   """ % { 'siteurl' : CFG_SITE_URL }
 
         # middle table cell: print beg/next/prev/end arrows:
         if not middle_only:
             out += """<td class="searchresultsboxheader" align="center">
                       %(recs_found)s &nbsp;""" % {
                      'recs_found' : _("%s records found") % ('<strong>' + self.tmpl_nice_number(nb_found, ln) + '</strong>')
                    }
         else:
             out += "<small>"
             if nb_found > rg:
                 out += "" + cgi.escape(collection_name) + " : " + _("%s records found") % ('<strong>' + self.tmpl_nice_number(nb_found, ln) + '</strong>') + " &nbsp; "
 
         if nb_found > rg: # navig.arrows are needed, since we have many hits
 
             query = {'p': p, 'f': f,
                      'cc': collection,
                      'sf': sf, 'so': so,
                      'sp': sp, 'rm': rm,
                      'of': of, 'ot': ot,
                      'aas': aas, 'ln': ln,
                      'p1': p1, 'p2': p2, 'p3': p3,
                      'f1': f1, 'f2': f2, 'f3': f3,
                      'm1': m1, 'm2': m2, 'm3': m3,
                      'op1': op1, 'op2': op2,
                      'sc': 0,
                      'd1y': d1y, 'd1m': d1m, 'd1d': d1d,
                      'd2y': d2y, 'd2m': d2m, 'd2d': d2d,
                      'dt': dt,
                 }
 
             # @todo here
             def img(gif, txt):
                 return '<img src="%(siteurl)s/img/%(gif)s.gif" alt="%(txt)s" border="0" />' % {
                     'txt': txt, 'gif': gif, 'siteurl': CFG_SITE_URL}
 
             if jrec-rg > 1:
                 out += create_html_link(self.build_search_url(query, jrec=1, rg=rg),
                                         {}, img('sb', _("begin")),
                                         {'class': 'img'})
 
             if jrec > 1:
                 out += create_html_link(self.build_search_url(query, jrec=max(jrec-rg, 1), rg=rg),
                                         {}, img('sp', _("previous")),
                                         {'class': 'img'})
 
             if jrec+rg-1 < nb_found:
                 out += "%d - %d" % (jrec, jrec+rg-1)
             else:
                 out += "%d - %d" % (jrec, nb_found)
 
             if nb_found >= jrec+rg:
                 out += create_html_link(self.build_search_url(query,
                                                               jrec=jrec+rg,
                                                               rg=rg),
                                         {}, img('sn', _("next")),
                                         {'class':'img'})
 
             if nb_found >= jrec+rg+rg:
                 out += create_html_link(self.build_search_url(query,
                                                             jrec=nb_found-rg+1,
                                                             rg=rg),
                                         {}, img('se', _("end")),
                                         {'class': 'img'})
 
 
             # still in the navigation part
             cc = collection
             sc = 0
             for var in ['p', 'cc', 'f', 'sf', 'so', 'of', 'rg', 'aas', 'ln', 'p1', 'p2', 'p3', 'f1', 'f2', 'f3', 'm1', 'm2', 'm3', 'op1', 'op2', 'sc', 'd1y', 'd1m', 'd1d', 'd2y', 'd2m', 'd2d', 'dt']:
                 out += self.tmpl_input_hidden(name = var, value = vars()[var])
             for var in ['ot', 'sp', 'rm']:
                 if vars()[var]:
                     out += self.tmpl_input_hidden(name = var, value = vars()[var])
             if pl_in_url:
                 fieldargs = cgi.parse_qs(pl_in_url)
                 for fieldcode in all_fieldcodes:
                     # get_fieldcodes():
                     if fieldargs.has_key(fieldcode):
                         for val in fieldargs[fieldcode]:
                             out += self.tmpl_input_hidden(name = fieldcode, value = val)
             out += """&nbsp; %(jump)s <input type="text" name="jrec" size="4" value="%(jrec)d" />""" % {
                      'jump' : _("jump to record:"),
                      'jrec' : jrec,
                    }
 
         if not middle_only:
             out += "</td>"
         else:
             out += "</small>"
 
         # right table cell: cpu time info
         if not middle_only:
             if cpu_time > -1:
                 out += """<td class="searchresultsboxheader" align="right"><small>%(time)s</small>&nbsp;</td>""" % {
                          'time' : _("Search took %s seconds.") % ('%.2f' % cpu_time),
                        }
             out += "</tr></table>"
         else:
             out += "</div>"
         out += "</form>"
         return out
 
     def tmpl_print_hosted_search_info(self, ln, middle_only,
                                collection, collection_name, collection_id,
                                aas, sf, so, rm, rg, nb_found, of, ot, p, f, f1,
                                f2, f3, m1, m2, m3, op1, op2, p1, p2,
                                p3, d1y, d1m, d1d, d2y, d2m, d2d, dt,
                                all_fieldcodes, cpu_time, pl_in_url,
                                jrec, sc, sp):
 
         """Prints stripe with the information on 'collection' and 'nb_found' results and CPU time.
            Also, prints navigation links (beg/next/prev/end) inside the results set.
            If middle_only is set to 1, it will only print the middle box information (beg/netx/prev/end/etc) links.
            This is suitable for displaying navigation links at the bottom of the search results page.
 
         Parameters:
 
           - 'ln' *string* - The language to display
 
           - 'middle_only' *bool* - Only display parts of the interface
 
           - 'collection' *string* - the collection name
 
           - 'collection_name' *string* - the i18nized current collection name
 
           - 'aas' *bool* - if we display the advanced search interface
 
           - 'sf' *string* - the currently selected sort format
 
           - 'so' *string* - the currently selected sort order ("a" or "d")
 
           - 'rm' *string* - selected ranking method
 
           - 'rg' *int* - selected results/page
 
           - 'nb_found' *int* - number of results found
 
           - 'of' *string* - the selected output format
 
           - 'ot' *string* - hidden values
 
           - 'p' *string* - Current search words
 
           - 'f' *string* - the fields in which the search was done
 
           - 'f1, f2, f3, m1, m2, m3, p1, p2, p3, op1, op2' *strings* - the search parameters
 
           - 'jrec' *int* - number of first record on this page
 
           - 'd1y, d2y, d1m, d2m, d1d, d2d' *int* - the search between dates
 
           - 'dt' *string* the dates' type (creation date, modification date)
 
           - 'all_fieldcodes' *array* - all the available fields
 
           - 'cpu_time' *float* - the time of the query in seconds
 
         """
 
         # load the right message language
         _ = gettext_set_language(ln)
 
         out = ""
         # left table cells: print collection name
         if not middle_only:
             out += '''
                   <a name="%(collection_id)s"></a>
                   <form action="%(siteurl)s/search" method="get">
                   <table class="searchresultsbox"><tr><td class="searchresultsboxheader" align="left">
                   <strong><big>%(collection_link)s</big></strong></td>
                   ''' % {
                     'collection_id': collection_id,
                     'siteurl' : CFG_SITE_URL,
                     'collection_link': create_html_link(self.build_search_interface_url(c=collection, aas=aas, ln=ln),
                                                         {}, cgi.escape(collection_name))
                   }
 
         else:
             out += """
                   <form action="%(siteurl)s/search" method="get"><div align="center">
                   """ % { 'siteurl' : CFG_SITE_URL }
 
         # middle table cell: print beg/next/prev/end arrows:
         if not middle_only:
             # in case we have a hosted collection that timed out do not print its number of records, as it is yet unknown
             if nb_found != -963:
                 out += """<td class="searchresultsboxheader" align="center">
                           %(recs_found)s &nbsp;""" % {
                          'recs_found' : _("%s records found") % ('<strong>' + self.tmpl_nice_number(nb_found, ln) + '</strong>')
                        }
             #elif nb_found = -963:
             #    out += """<td class="searchresultsboxheader" align="center">
             #              %(recs_found)s &nbsp;""" % {
             #             'recs_found' : _("%s records found") % ('<strong>' + self.tmpl_nice_number(nb_found, ln) + '</strong>')
             #           }
         else:
             out += "<small>"
             # we do not care about timed out hosted collections here, because the bumber of records found will never be bigger
             # than rg anyway, since it's negative
             if nb_found > rg:
                 out += "" + cgi.escape(collection_name) + " : " + _("%s records found") % ('<strong>' + self.tmpl_nice_number(nb_found, ln) + '</strong>') + " &nbsp; "
 
         if nb_found > rg: # navig.arrows are needed, since we have many hits
 
             query = {'p': p, 'f': f,
                      'cc': collection,
                      'sf': sf, 'so': so,
                      'sp': sp, 'rm': rm,
                      'of': of, 'ot': ot,
                      'aas': aas, 'ln': ln,
                      'p1': p1, 'p2': p2, 'p3': p3,
                      'f1': f1, 'f2': f2, 'f3': f3,
                      'm1': m1, 'm2': m2, 'm3': m3,
                      'op1': op1, 'op2': op2,
                      'sc': 0,
                      'd1y': d1y, 'd1m': d1m, 'd1d': d1d,
                      'd2y': d2y, 'd2m': d2m, 'd2d': d2d,
                      'dt': dt,
                 }
 
             # @todo here
             def img(gif, txt):
                 return '<img src="%(siteurl)s/img/%(gif)s.gif" alt="%(txt)s" border="0" />' % {
                     'txt': txt, 'gif': gif, 'siteurl': CFG_SITE_URL}
 
             if jrec-rg > 1:
                 out += create_html_link(self.build_search_url(query, jrec=1, rg=rg),
                                         {}, img('sb', _("begin")),
                                         {'class': 'img'})
 
             if jrec > 1:
                 out += create_html_link(self.build_search_url(query, jrec=max(jrec-rg, 1), rg=rg),
                                         {}, img('sp', _("previous")),
                                         {'class': 'img'})
 
             if jrec+rg-1 < nb_found:
                 out += "%d - %d" % (jrec, jrec+rg-1)
             else:
                 out += "%d - %d" % (jrec, nb_found)
 
             if nb_found >= jrec+rg:
                 out += create_html_link(self.build_search_url(query,
                                                               jrec=jrec+rg,
                                                               rg=rg),
                                         {}, img('sn', _("next")),
                                         {'class':'img'})
 
             if nb_found >= jrec+rg+rg:
                 out += create_html_link(self.build_search_url(query,
                                                             jrec=nb_found-rg+1,
                                                             rg=rg),
                                         {}, img('se', _("end")),
                                         {'class': 'img'})
 
 
             # still in the navigation part
             cc = collection
             sc = 0
             for var in ['p', 'cc', 'f', 'sf', 'so', 'of', 'rg', 'aas', 'ln', 'p1', 'p2', 'p3', 'f1', 'f2', 'f3', 'm1', 'm2', 'm3', 'op1', 'op2', 'sc', 'd1y', 'd1m', 'd1d', 'd2y', 'd2m', 'd2d', 'dt']:
                 out += self.tmpl_input_hidden(name = var, value = vars()[var])
             for var in ['ot', 'sp', 'rm']:
                 if vars()[var]:
                     out += self.tmpl_input_hidden(name = var, value = vars()[var])
             if pl_in_url:
                 fieldargs = cgi.parse_qs(pl_in_url)
                 for fieldcode in all_fieldcodes:
                     # get_fieldcodes():
                     if fieldargs.has_key(fieldcode):
                         for val in fieldargs[fieldcode]:
                             out += self.tmpl_input_hidden(name = fieldcode, value = val)
             out += """&nbsp; %(jump)s <input type="text" name="jrec" size="4" value="%(jrec)d" />""" % {
                      'jump' : _("jump to record:"),
                      'jrec' : jrec,
                    }
 
         if not middle_only:
             out += "</td>"
         else:
             out += "</small>"
 
         # right table cell: cpu time info
         if not middle_only:
             if cpu_time > -1:
                 out += """<td class="searchresultsboxheader" align="right"><small>%(time)s</small>&nbsp;</td>""" % {
                          'time' : _("Search took %s seconds.") % ('%.2f' % cpu_time),
                        }
             out += "</tr></table>"
         else:
             out += "</div>"
         out += "</form>"
         return out
 
     def tmpl_nice_number(self, number, ln=CFG_SITE_LANG, thousands_separator=',', max_ndigits_after_dot=None):
         """
         Return nicely printed number NUMBER in language LN using
         given THOUSANDS_SEPARATOR character.
         If max_ndigits_after_dot is specified and the number is float, the
         number is rounded by taking in consideration up to max_ndigits_after_dot
         digit after the dot.
 
         This version does not pay attention to locale.  See
         tmpl_nice_number_via_locale().
         """
         if type(number) is float:
             if max_ndigits_after_dot is not None:
                 number = round(number, max_ndigits_after_dot)
             int_part, frac_part = str(number).split('.')
             return '%s.%s' % (self.tmpl_nice_number(int(int_part), ln, thousands_separator), frac_part)
         else:
             chars_in = list(str(number))
             number = len(chars_in)
             chars_out = []
             for i in range(0, number):
                 if i % 3 == 0 and i != 0:
                     chars_out.append(thousands_separator)
                 chars_out.append(chars_in[number-i-1])
             chars_out.reverse()
             return ''.join(chars_out)
 
     def tmpl_nice_number_via_locale(self, number, ln=CFG_SITE_LANG):
         """
         Return nicely printed number NUM in language LN using the locale.
         See also version tmpl_nice_number().
         """
         if number is None:
             return None
         # Temporarily switch the numeric locale to the requested one, and format the number
         # In case the system has no locale definition, use the vanilla form
         ol = locale.getlocale(locale.LC_NUMERIC)
         try:
             locale.setlocale(locale.LC_NUMERIC, self.tmpl_localemap.get(ln, self.tmpl_default_locale))
         except locale.Error:
             return str(number)
         try:
             number = locale.format('%d', number, True)
         except TypeError:
             return str(number)
         locale.setlocale(locale.LC_NUMERIC, ol)
         return number
 
     def tmpl_record_format_htmlbrief_header(self, ln):
         """Returns the header of the search results list when output
         is html brief. Note that this function is called for each collection
         results when 'split by collection' is enabled.
 
         See also: tmpl_record_format_htmlbrief_footer(..),
                   tmpl_record_format_htmlbrief_body(..)
 
         Parameters:
 
           - 'ln' *string* - The language to display
 
         """
 
         # load the right message language
         _ = gettext_set_language(ln)
 
         out = """
               <form action="%(siteurl)s/yourbaskets/add" method="post">
               <table>
               """ % {
                 'siteurl' : CFG_SITE_URL,
               }
 
         return out
 
     def tmpl_record_format_htmlbrief_footer(self, ln):
         """Returns the footer of the search results list when output
         is html brief. Note that this function is called for each collection
         results when 'split by collection' is enabled.
 
         See also: tmpl_record_format_htmlbrief_header(..),
                   tmpl_record_format_htmlbrief_body(..)
 
         Parameters:
 
           - 'ln' *string* - The language to display
 
         """
 
         # load the right message language
         _ = gettext_set_language(ln)
 
         out = """</table>
                <br /><input class="formbutton" type="submit" name="action" value="%(basket)s" />
                </form>""" % {
                  'basket' : _("ADD TO BASKET")
                  }
 
         return out
 
     def tmpl_record_format_htmlbrief_body(self, ln, recid,
                                           row_number, relevance,
                                           record, relevances_prologue,
                                           relevances_epilogue):
         """Returns the html brief format of one record. Used in the
         search results list for each record.
 
         See also: tmpl_record_format_htmlbrief_header(..),
                   tmpl_record_format_htmlbrief_footer(..)
 
         Parameters:
 
           - 'ln' *string* - The language to display
 
           - 'row_number' *int* - The position of this record in the list
 
           - 'recid' *int* - The recID
 
           - 'relevance' *string* - The relevance of the record
 
           - 'record' *string* - The formatted record
 
           - 'relevances_prologue' *string* - HTML code to prepend the relevance indicator
 
           - 'relevances_epilogue' *string* - HTML code to append to the relevance indicator (used mostly for formatting)
 
         """
 
         # load the right message language
         _ = gettext_set_language(ln)
 
 
         out = """
                 <tr><td valign="top" align="right" style="white-space: nowrap;">
                     <input name="recid" type="checkbox" value="%(recid)s" />
                     <abbr class="unapi-id" title="%(recid)s"></abbr>
 
                 %(number)s.
                """ % {'recid': recid,
                       'number': row_number}
         if relevance:
             out += """<br /><div class="rankscoreinfo"><a title="rank score">%(prologue)s%(relevance)s%(epilogue)s</a></div>""" % {
                 'prologue' : relevances_prologue,
                 'epilogue' : relevances_epilogue,
                 'relevance' : relevance
                 }
         out += """</td><td valign="top">%s</td></tr>""" % record
 
         return out
 
     def tmpl_print_results_overview(self, ln, results_final_nb_total, cpu_time, results_final_nb, colls, ec, hosted_colls_potential_results_p=False):
         """Prints results overview box with links to particular collections below.
 
         Parameters:
 
           - 'ln' *string* - The language to display
 
           - 'results_final_nb_total' *int* - The total number of hits for the query
 
           - 'colls' *array* - The collections with hits, in the format:
 
           - 'coll[code]' *string* - The code of the collection (canonical name)
 
           - 'coll[name]' *string* - The display name of the collection
 
           - 'results_final_nb' *array* - The number of hits, indexed by the collection codes:
 
           - 'cpu_time' *string* - The time the query took
 
           - 'url_args' *string* - The rest of the search query
 
           - 'ec' *array* - selected external collections
 
           - 'hosted_colls_potential_results_p' *boolean* - check if there are any hosted collections searches
                                                     that timed out during the pre-search
         """
 
         if len(colls) == 1 and not ec:
             # if one collection only and no external collections, print nothing:
             return ""
 
         # load the right message language
         _ = gettext_set_language(ln)
 
         # first find total number of hits:
         # if there were no hosted collections that timed out during the pre-search print out the exact number of records found
         if not hosted_colls_potential_results_p:
             out = """<table class="searchresultsbox">
                     <thead><tr><th class="searchresultsboxheader">%(founds)s</th></tr></thead>
                     <tbody><tr><td class="searchresultsboxbody"> """ % {
                     'founds' : _("%(x_fmt_open)sResults overview:%(x_fmt_close)s Found %(x_nb_records)s records in %(x_nb_seconds)s seconds.") %\
                     {'x_fmt_open': '<strong>',
                      'x_fmt_close': '</strong>',
                      'x_nb_records': '<strong>' + self.tmpl_nice_number(results_final_nb_total, ln) + '</strong>',
                      'x_nb_seconds': '%.2f' % cpu_time}
                   }
         # if there were (only) hosted_collections that timed out during the pre-search print out a fuzzier message
         else:
             if results_final_nb_total == 0:
                 out = """<table class="searchresultsbox">
                         <thead><tr><th class="searchresultsboxheader">%(founds)s</th></tr></thead>
                         <tbody><tr><td class="searchresultsboxbody"> """ % {
                         'founds' : _("%(x_fmt_open)sResults overview%(x_fmt_close)s") %\
                         {'x_fmt_open': '<strong>',
                          'x_fmt_close': '</strong>'}
                       }
             elif results_final_nb_total > 0:
                 out = """<table class="searchresultsbox">
                         <thead><tr><th class="searchresultsboxheader">%(founds)s</th></tr></thead>
                         <tbody><tr><td class="searchresultsboxbody"> """ % {
                         'founds' : _("%(x_fmt_open)sResults overview:%(x_fmt_close)s Found at least %(x_nb_records)s records in %(x_nb_seconds)s seconds.") %\
                         {'x_fmt_open': '<strong>',
                          'x_fmt_close': '</strong>',
                          'x_nb_records': '<strong>' + self.tmpl_nice_number(results_final_nb_total, ln) + '</strong>',
                          'x_nb_seconds': '%.2f' % cpu_time}
                       }
         # then print hits per collection:
         for coll in colls:
             if results_final_nb.has_key(coll['code']) and results_final_nb[coll['code']] > 0:
                 out += '''<strong><a href="#%(coll)s">%(coll_name)s</a></strong>,
                       <a href="#%(coll)s">%(number)s</a><br />''' % {
                         'coll' : coll['id'],
                         'coll_name' : cgi.escape(coll['name']),
                         'number' : _("%s records found") % ('<strong>' + self.tmpl_nice_number(results_final_nb[coll['code']], ln) + '</strong>')
                       }
             # the following is used for hosted collections that have timed out,
             # i.e. for which we don't know the exact number of results yet.
             elif results_final_nb.has_key(coll['code']) and results_final_nb[coll['code']] == -963:
                 out += '''<strong><a href="#%(coll)s">%(coll_name)s</a></strong>''' % {
                         'coll' : coll['id'],
                         'coll_name' : cgi.escape(coll['name']),
                         'number' : _("%s records found") % ('<strong>' + self.tmpl_nice_number(results_final_nb[coll['code']], ln) + '</strong>')
                       }
         out += "</td></tr></tbody></table>"
         return out
 
-    def tmpl_print_hosted_results(self, ln, infos, of=None, req=None):
+    def tmpl_print_hosted_results(self, url_and_engine, ln, of=None, req=None, limit=CFG_EXTERNAL_COLLECTION_MAXRESULTS):
         """Print results of a given search engine.
         """
 
         _ = gettext_set_language(ln)
-        url = infos[0]
-        engine = infos[1]
-        name = _(engine.name)
+        #url = url_and_engine[0]
+        engine = url_and_engine[1]
+        #name = _(engine.name)
         db_id = get_collection_id(engine.name)
-        base_url = engine.base_url
+        #base_url = engine.base_url
 
         out = ""
 
-        results = engine.parser.parse_and_get_results(None, of=of, req=req, parseonly=True)
+        results = engine.parser.parse_and_get_results(None, of=of, req=req, limit=limit, parseonly=True)
 
         if len(results) != 0:
             if of == 'hb':
                 out += """
                       <form action="%(siteurl)s/yourbaskets/add" method="post">
                       <input type="hidden" name="colid" value="%(col_db_id)s" />
                       <table>
                       """ % {
                         'siteurl' : CFG_SITE_URL,
                         'col_db_id' : db_id,
                       }
         else:
             if of == 'hb':
                 out += """
                       <table>
                       """
 
         for result in results:
             out += result.html
 
         if len(results) != 0:
             if of == 'hb':
                 out += """</table>
                        <br /><input class="formbutton" type="submit" name="action" value="%(basket)s" />
                        </form>""" % {
                          'basket' : _("ADD TO BASKET")
                          }
         else:
             if of == 'hb':
                 out += """
                       </table>
                       """
 
         # we have already checked if there are results or no, maybe the following if should be removed?
         if not results:
             if of.startswith("h"):
                 out = _('No results found...') + '<br />'
 
         return out
 
     def tmpl_print_searchresultbox(self, header, body):
         """print a nicely formatted box for search results """
         #_ = gettext_set_language(ln)
 
         # first find total number of hits:
         out = '<table class="searchresultsbox"><thead><tr><th class="searchresultsboxheader">'+header+'</th></tr></thead><tbody><tr><td class="searchresultsboxbody">'+body+'</td></tr></tbody></table>'
         return out
 
 
     def tmpl_search_no_boolean_hits(self, ln, nearestterms):
         """No hits found, proposes alternative boolean queries
 
         Parameters:
 
           - 'ln' *string* - The language to display
 
           - 'nearestterms' *array* - Parts of the interface to display, in the format:
 
           - 'nearestterms[nbhits]' *int* - The resulting number of hits
 
           - 'nearestterms[url_args]' *string* - The search parameters
 
           - 'nearestterms[p]' *string* - The search terms
 
         """
 
         # load the right message language
         _ = gettext_set_language(ln)
 
         out = _("Boolean query returned no hits. Please combine your search terms differently.")
 
         out += '''<blockquote><table class="nearesttermsbox" cellpadding="0" cellspacing="0" border="0">'''
         for term, hits, argd in nearestterms:
             out += '''\
             <tr>
               <td class="nearesttermsboxbody" align="right">%(hits)s</td>
               <td class="nearesttermsboxbody" width="15">&nbsp;</td>
               <td class="nearesttermsboxbody" align="left">
                 %(link)s
               </td>
             </tr>''' % {'hits' : hits,
                         'link': create_html_link(self.build_search_url(argd),
                                                  {}, cgi.escape(term),
                                                  {'class': "nearestterms"})}
         out += """</table></blockquote>"""
         return out
 
     def tmpl_similar_author_names(self, authors, ln):
         """No hits found, proposes alternative boolean queries
 
         Parameters:
 
           - 'authors': a list of (name, hits) tuples
           - 'ln' *string* - The language to display
         """
 
         # load the right message language
         _ = gettext_set_language(ln)
 
         out = '''<a name="googlebox"></a>
                  <table class="googlebox"><tr><th colspan="2" class="googleboxheader">%(similar)s</th></tr>''' % {
                 'similar' : _("See also: similar author names")
               }
         for author, hits in authors:
             out += '''\
             <tr>
               <td class="googleboxbody">%(nb)d</td>
               <td class="googleboxbody">%(link)s</td>
             </tr>''' % {'link': create_html_link(
                                     self.build_search_url(p=author,
                                                           f='author',
                                                           ln=ln),
                                     {}, cgi.escape(author), {'class':"google"}),
                         'nb' : hits}
 
         out += """</table>"""
 
         return out
 
     def tmpl_print_record_detailed(self, recID, ln):
         """Displays a detailed on-the-fly record
 
         Parameters:
 
           - 'ln' *string* - The language to display
 
           - 'recID' *int* - The record id
         """
         # okay, need to construct a simple "Detailed record" format of our own:
         out = "<p>&nbsp;"
         # secondly, title:
         titles = get_fieldvalues(recID, "245__a")
         for title in titles:
             out += "<p><center><big><strong>%s</strong></big></center></p>" % cgi.escape(title)
         # thirdly, authors:
         authors = get_fieldvalues(recID, "100__a") + get_fieldvalues(recID, "700__a")
         if authors:
             out += "<p><center>"
             for author in authors:
                 out += '%s; ' % create_html_link(self.build_search_url(
                                                                 ln=ln,
                                                                 p=author,
                                                                 f='author'),
                                                  {}, cgi.escape(author))
             out += "</center></p>"
         # fourthly, date of creation:
         dates = get_fieldvalues(recID, "260__c")
         for date in dates:
             out += "<p><center><small>%s</small></center></p>" % date
         # fifthly, abstract:
         abstracts = get_fieldvalues(recID, "520__a")
         for abstract in abstracts:
             out += """<p style="margin-left: 15%%; width: 70%%">
                      <small><strong>Abstract:</strong> %s</small></p>""" % abstract
         # fifthly bis, keywords:
         keywords = get_fieldvalues(recID, "6531_a")
         if len(keywords):
             out += """<p style="margin-left: 15%%; width: 70%%">
                      <small><strong>Keyword(s):</strong>"""
             for keyword in keywords:
                 out += '%s; ' % create_html_link(
                                     self.build_search_url(ln=ln,
                                                           p=keyword,
                                                           f='keyword'),
                                     {}, cgi.escape(keyword))
 
             out += '</small></p>'
         # fifthly bis bis, published in:
         prs_p = get_fieldvalues(recID, "909C4p")
         prs_v = get_fieldvalues(recID, "909C4v")
         prs_y = get_fieldvalues(recID, "909C4y")
         prs_n = get_fieldvalues(recID, "909C4n")
         prs_c = get_fieldvalues(recID, "909C4c")
         for idx in range(0, len(prs_p)):
             out += """<p style="margin-left: 15%%; width: 70%%">
                      <small><strong>Publ. in:</strong> %s"""  % prs_p[idx]
             if prs_v and prs_v[idx]:
                 out += """<strong>%s</strong>""" % prs_v[idx]
             if prs_y and prs_y[idx]:
                 out += """(%s)""" % prs_y[idx]
             if prs_n and prs_n[idx]:
                 out += """, no.%s""" % prs_n[idx]
             if prs_c and prs_c[idx]:
                 out += """, p.%s""" % prs_c[idx]
             out += """.</small></p>"""
         # sixthly, fulltext link:
         urls_z = get_fieldvalues(recID, "8564_z")
         urls_u = get_fieldvalues(recID, "8564_u")
         for idx in range(0, len(urls_u)):
             link_text = "URL"
             try:
                 if urls_z[idx]:
                     link_text = urls_z[idx]
             except IndexError:
                 pass
             out += """<p style="margin-left: 15%%; width: 70%%">
             <small><strong>%s:</strong> <a href="%s">%s</a></small></p>""" % (link_text, urls_u[idx], urls_u[idx])
         # print some white space at the end:
         out += "<br /><br />"
         return out
 
     def tmpl_print_record_list_for_similarity_boxen(self, title, recID_score_list, ln=CFG_SITE_LANG):
         """Print list of records in the "hs" (HTML Similarity) format for similarity boxes.
            RECID_SCORE_LIST is a list of (recID1, score1), (recID2, score2), etc.
         """
 
         from invenio.search_engine import print_record, record_public_p
 
         recID_score_list_to_be_printed = []
 
         # firstly find 5 first public records to print:
         nb_records_to_be_printed = 0
         nb_records_seen = 0
         while nb_records_to_be_printed < 5 and nb_records_seen < len(recID_score_list) and nb_records_seen < 50:
             # looking through first 50 records only, picking first 5 public ones
             (recID, score) = recID_score_list[nb_records_seen]
             nb_records_seen += 1
             if record_public_p(recID):
                 nb_records_to_be_printed += 1
                 recID_score_list_to_be_printed.append([recID, score])
 
         # secondly print them:
         out = '''
         <table><tr>
          <td>
           <table><tr><td class="blocknote">%(title)s</td></tr></table>
          </td>
          </tr>
          <tr>
           <td><table>
         ''' % { 'title': cgi.escape(title) }
         for recid, score in recID_score_list_to_be_printed:
             out += '''
             <tr><td><font class="rankscoreinfo"><a>(%(score)s)&nbsp;</a></font><small>&nbsp;%(info)s</small></td></tr>''' % {
                 'score': score,
                 'info' : print_record(recid, format="hs", ln=ln),
                 }
 
         out += """</table></td></tr></table> """
         return out
 
     def tmpl_print_record_brief(self, ln, recID):
         """Displays a brief record on-the-fly
 
         Parameters:
 
           - 'ln' *string* - The language to display
 
           - 'recID' *int* - The record id
         """
         out = ""
 
         # record 'recID' does not exist in format 'format', so print some default format:
         # firstly, title:
         titles = get_fieldvalues(recID, "245__a")
         # secondly, authors:
         authors = get_fieldvalues(recID, "100__a") + get_fieldvalues(recID, "700__a")
         # thirdly, date of creation:
         dates = get_fieldvalues(recID, "260__c")
         # thirdly bis, report numbers:
         rns = get_fieldvalues(recID, "037__a")
         rns = get_fieldvalues(recID, "088__a")
         # fourthly, beginning of abstract:
         abstracts = get_fieldvalues(recID, "520__a")
         # fifthly, fulltext link:
         urls_z = get_fieldvalues(recID, "8564_z")
         urls_u = get_fieldvalues(recID, "8564_u")
 
         ## unAPI identifier
         out = '<abbr class="unapi-id" title="%s"></abbr>\n' % recID
         out += self.tmpl_record_body(
                  titles = titles,
                  authors = authors,
                  dates = dates,
                  rns = rns,
                  abstracts = abstracts,
                  urls_u = urls_u,
                  urls_z = urls_z,
                  ln=ln)
         return out
 
     def tmpl_print_record_brief_links(self, ln, recID):
         """Displays links for brief record on-the-fly
 
         Parameters:
 
           - 'ln' *string* - The language to display
 
           - 'recID' *int* - The record id
         """
         from invenio.webcommentadminlib import get_nb_reviews, get_nb_comments
 
         # load the right message language
         _ = gettext_set_language(ln)
 
         out = '<div class="moreinfo">'
         if CFG_WEBSEARCH_USE_ALEPH_SYSNOS:
             alephsysnos = get_fieldvalues(recID, "970__a")
             if len(alephsysnos)>0:
                 alephsysno = alephsysnos[0]
                 out += '<span class="moreinfo">%s</span>' % \
                     create_html_link(self.build_search_url(recid=alephsysno,
                                                            ln=ln),
                                      {}, _("Detailed record"),
                                      {'class': "moreinfo"})
             else:
                 out += '<span class="moreinfo">%s</span>' % \
                     create_html_link(self.build_search_url(recid=recID, ln=ln),
                                      {},
                                      _("Detailed record"),
                                      {'class': "moreinfo"})
         else:
             out += '<span class="moreinfo">%s</span>' % \
                    create_html_link(self.build_search_url(recid=recID, ln=ln),
                                     {}, _("Detailed record"),
                                     {'class': "moreinfo"})
 
             out += '<span class="moreinfo"> - %s</span>' % \
                    create_html_link(self.build_search_url(p="recid:%d" % recID,
                                                      rm="wrd",
                                                      ln=ln),
                                     {}, _("Similar records"),
                                     {'class': "moreinfo"})
 
         if CFG_BIBRANK_SHOW_CITATION_LINKS:
             num_timescited = get_cited_by_count(recID)
             if num_timescited:
                 out += '<span class="moreinfo"> - %s</span>' % \
                        create_html_link(self.build_search_url(p="recid:%d" % recID,
                                                               rm="citation",
                                                               ln=ln),
                                         {}, num_timescited > 1 and _("Cited by %i records") % num_timescited
                                         or _("Cited by 1 record"),
                                         {'class': "moreinfo"})
             else:
                 out+="<!--not showing citations links-->"
 
         if CFG_WEBCOMMENT_ALLOW_COMMENTS and CFG_WEBSEARCH_SHOW_COMMENT_COUNT:
             num_comments = get_nb_comments(recID)
             if num_comments:
                 out += '<span class="moreinfo"> - %s</span>' % \
                         create_html_link(CFG_SITE_URL + '/record/' + str(recID)
                         + '/comments?ln=%s' % ln, {}, num_comments > 1 and _("%i comments")
                         % (num_comments) or _("1 comment"),
                         {'class': "moreinfo"})
             else:
                 out+="<!--not showing reviews links-->"
 
         if CFG_WEBCOMMENT_ALLOW_REVIEWS and CFG_WEBSEARCH_SHOW_REVIEW_COUNT:
             num_reviews = get_nb_reviews(recID)
             if num_reviews:
                 out += '<span class="moreinfo"> - %s</span>' % \
                         create_html_link(CFG_SITE_URL + '/record/' + str(recID)
                         + '/reviews?ln=%s' % ln, {}, num_reviews > 1 and _("%i reviews")
                         % (num_reviews) or _("1 review"), {'class': "moreinfo"})
             else:
                 out+="<!--not showing reviews links-->"
 
 
         out+='</div>'
         return out
 
     def tmpl_xml_rss_prologue(self, current_url=None,
                               previous_url=None, next_url=None,
                               first_url=None, last_url=None,
                               nb_found=None, jrec=None, rg=None):
         """Creates XML RSS 2.0 prologue."""
         out = """<rss version="2.0"
         xmlns:media="http://search.yahoo.com/mrss/"
         xmlns:atom="http://www.w3.org/2005/Atom"
         xmlns:dc="http://purl.org/dc/elements/1.1/"
         xmlns:dcterms="http://purl.org/dc/terms/"
         xmlns:opensearch="http://a9.com/-/spec/opensearch/1.1/">
       <channel>
         <title>%(sitename)s</title>
         <link>%(siteurl)s</link>
         <description>%(sitename)s latest documents</description>
         <language>%(sitelang)s</language>
         <pubDate>%(timestamp)s</pubDate>
         <category></category>
         <generator>CDS Invenio %(version)s</generator>
         <webMaster>%(sitesupportemail)s</webMaster>
         <ttl>%(timetolive)s</ttl>%(previous_link)s%(next_link)s%(current_link)s%(total_results)s%(start_index)s%(total_results)s
         <image>
             <url>%(siteurl)s/img/cds.png</url>
             <title>%(sitename)s</title>
             <link>%(siteurl)s</link>
         </image>
          <url type="application/rss+xml" indexOffset="1" rel="results" template="%(search_syntax)s" />
          <atom:link rel="search" href="%(siteurl)s/search/opensearchdescription" type="application/opensearchdescription+xml" title="Content Search" />
 
         <textInput>
           <title>Search </title>
           <description>Search this site:</description>
           <name>p</name>
           <link>%(siteurl)s/search</link>
         </textInput>
         """ % {'sitename': CFG_SITE_NAME,
                'siteurl': CFG_SITE_URL,
                'sitelang': CFG_SITE_LANG,
                'search_syntax': self.tmpl_opensearch_rss_url_syntax,
                'timestamp': time.strftime("%a, %d %b %Y %H:%M:%S GMT", time.gmtime()),
                'version': CFG_VERSION,
                'sitesupportemail': CFG_SITE_SUPPORT_EMAIL,
                'timetolive': CFG_WEBSEARCH_RSS_TTL,
                'current_link': (current_url and \
                                  '\n<atom:link rel="self" href="%s" />\n' % current_url) or '',
                'previous_link': (previous_url and \
                                  '\n<atom:link rel="previous" href="%s" />' % previous_url) or '',
                'next_link': (next_url and \
                              '\n<atom:link rel="next" href="%s" />' % next_url) or '',
                'first_link': (first_url and \
                              '\n<atom:link rel="first" href="%s" />' % first_url) or '',
                'last_link': (last_url and \
                              '\n<atom:link rel="last" href="%s" />' % last_url) or '',
                'total_results': (nb_found and \
                              '\n<opensearch:totalResults>%i</opensearch:totalResults>' % nb_found) or '',
                'start_index': (jrec and \
                              '\n<opensearch:startIndex>%i</opensearch:startIndex>' % jrec) or '',
                'total_results': (rg and \
                              '\n<opensearch:itemsPerPage>%i</opensearch:itemsPerPage>' % rg) or '',
         }
         return out
 
     def tmpl_xml_rss_epilogue(self):
         """Creates XML RSS 2.0 epilogue."""
         out = """\
       </channel>
 </rss>\n"""
         return out
 
     def tmpl_xml_nlm_prologue(self):
         """Creates XML NLM prologue."""
         out = """<articles>\n"""
         return out
 
     def tmpl_xml_nlm_epilogue(self):
         """Creates XML NLM epilogue."""
         out = """\n</articles>"""
         return out
 
     def tmpl_xml_refworks_prologue(self):
         """Creates XML RefWorks prologue."""
         out = """<references>\n"""
         return out
 
     def tmpl_xml_refworks_epilogue(self):
         """Creates XML RefWorks epilogue."""
         out = """\n</references>"""
         return out
 
     def tmpl_xml_endnote_prologue(self):
         """Creates XML EndNote prologue."""
         out = """<records>\n"""
         return out
 
     def tmpl_xml_endnote_epilogue(self):
         """Creates XML EndNote epilogue."""
         out = """\n</records>"""
         return out
 
     def tmpl_xml_marc_prologue(self):
         """Creates XML MARC prologue."""
         out = """<collection xmlns="http://www.loc.gov/MARC21/slim">\n"""
         return out
 
     def tmpl_xml_marc_epilogue(self):
         """Creates XML MARC epilogue."""
         out = """\n</collection>"""
         return out
 
     def tmpl_xml_mods_prologue(self):
         """Creates XML MODS prologue."""
         out = """<modsCollection xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"\n
                    xsi:schemaLocation="http://www.loc.gov/mods/v3\n
                                        http://www.loc.gov/standards/mods/v3/mods-3-3.xsd">\n"""
         return out
 
     def tmpl_xml_mods_epilogue(self):
         """Creates XML MODS epilogue."""
         out = """\n</modsCollection>"""
         return out
 
     def tmpl_xml_default_prologue(self):
         """Creates XML default format prologue. (Sanity calls only.)"""
         out = """<collection>\n"""
         return out
 
     def tmpl_xml_default_epilogue(self):
         """Creates XML default format epilogue. (Sanity calls only.)"""
         out = """\n</collection>"""
         return out
 
     def tmpl_collection_not_found_page_title(self, colname, ln=CFG_SITE_LANG):
         """
         Create page title for cases when unexisting collection was asked for.
         """
         _ = gettext_set_language(ln)
         out = _("Collection %s Not Found") % cgi.escape(colname)
         return out
 
     def tmpl_collection_not_found_page_body(self, colname, ln=CFG_SITE_LANG):
         """
         Create page body for cases when unexisting collection was asked for.
         """
         _ = gettext_set_language(ln)
         out = """<h1>%(title)s</h1>
                  <p>%(sorry)s</p>
                  <p>%(you_may_want)s</p>
               """ % { 'title': self.tmpl_collection_not_found_page_title(colname, ln),
                       'sorry': _("Sorry, collection %s does not seem to exist.") % \
                                 ('<strong>' + cgi.escape(colname) + '</strong>'),
                       'you_may_want': _("You may want to start browsing from %s.") % \
                                  ('<a href="' + CFG_SITE_URL + '?ln=' + ln + '">' + \
                                         cgi.escape(CFG_SITE_NAME_INTL.get(ln, CFG_SITE_NAME)) + '</a>')}
         return out
 
     def tmpl_alert_rss_teaser_box_for_query(self, id_query, ln):
         """Propose teaser for setting up this query as alert or RSS feed.
 
         Parameters:
           - 'id_query' *int* - ID of the query we make teaser for
           - 'ln' *string* - The language to display
         """
 
         # load the right message language
         _ = gettext_set_language(ln)
 
         # get query arguments:
         res = run_sql("SELECT urlargs FROM query WHERE id=%s", (id_query,))
         argd = {}
         if res:
             argd = cgi.parse_qs(res[0][0])
 
         rssurl = self.build_rss_url(argd)
         alerturl = CFG_SITE_URL + '/youralerts/input?ln=%s&amp;idq=%s' % (ln, id_query)
 
         out = '''<a name="googlebox"></a>
                  <table class="googlebox"><tr><th class="googleboxheader">%(similar)s</th></tr>
                  <tr><td class="googleboxbody">%(msg_alert)s</td></tr>
                  </table>
                  ''' % {
                 'similar' : _("Interested in being notified about new results for this query?"),
                 'msg_alert': _("""Set up a personal %(x_url1_open)semail alert%(x_url1_close)s
                                   or subscribe to the %(x_url2_open)sRSS feed%(x_url2_close)s.""") % \
                         {'x_url1_open': '<a href="%s"><img src="%s/img/mail-icon-12x8.gif" border="0" alt="" /></a> ' % (alerturl, CFG_SITE_URL) + ' <a class="google" href="%s">' % (alerturl),
                          'x_url1_close': '</a>',
                          'x_url2_open': '<a href="%s"><img src="%s/img/feed-icon-12x12.gif" border="0" alt="" /></a> ' % (rssurl, CFG_SITE_URL) + ' <a class="google" href="%s">' % rssurl,
                          'x_url2_close': '</a>',
                          }}
         return out
 
     def tmpl_detailed_record_metadata(self, recID, ln, format,
                                       content,
                                       creationdate=None,
                                       modificationdate=None):
         """Returns the main detailed page of a record
 
         Parameters:
 
           - 'recID' *int* - The ID of the printed record
 
           - 'ln' *string* - The language to display
 
           - 'format' *string* - The format in used to print the record
 
           - 'content' *string* - The main content of the page
 
           - 'creationdate' *string* - The creation date of the printed record
 
           - 'modificationdate' *string* - The last modification date of the printed record
         """
         _ = gettext_set_language(ln)
 
         ## unAPI identifier
         out = '<abbr class="unapi-id" title="%s"></abbr>\n' % recID
         out += content
 
         return out
 
     def tmpl_detailed_record_statistics(self, recID, ln,
                                         downloadsimilarity,
                                         downloadhistory, viewsimilarity):
         """Returns the statistics page of a record
 
         Parameters:
 
           - 'recID' *int* - The ID of the printed record
 
           - 'ln' *string* - The language to display
 
           - downloadsimilarity *string* - downloadsimilarity box
 
           - downloadhistory *string* - downloadhistory box
 
           - viewsimilarity *string* - viewsimilarity box
 
         """
         # load the right message language
         _ = gettext_set_language(ln)
 
         out = ''
 
         if CFG_BIBRANK_SHOW_DOWNLOAD_STATS and downloadsimilarity is not None:
             similar = self.tmpl_print_record_list_for_similarity_boxen (
                 _("People who downloaded this document also downloaded:"), downloadsimilarity, ln)
 
             out = '<table>'
             out += '''
                     <tr><td>%(graph)s</td></tr>
                     <tr><td>%(similar)s</td></tr>
                     ''' % { 'siteurl': CFG_SITE_URL,   'recid': recID, 'ln': ln,
                              'similar': similar, 'more': _("more"),
                              'graph': downloadsimilarity
                              }
 
             out += '</table>'
             out +=  '<br />'
 
         if CFG_BIBRANK_SHOW_READING_STATS and viewsimilarity is not None:
             out += self.tmpl_print_record_list_for_similarity_boxen (
                 _("People who viewed this page also viewed:"), viewsimilarity, ln)
 
         if CFG_BIBRANK_SHOW_DOWNLOAD_GRAPHS and downloadhistory is not None:
             out += downloadhistory + '<br />'
 
         return out
 
     def tmpl_detailed_record_citations_prologue(self, recID, ln):
         """Returns the prologue of the citations page of a record
 
         Parameters:
 
           - 'recID' *int* - The ID of the printed record
 
           - 'ln' *string* - The language to display
 
         """
 
         return '<table>'
 
     def tmpl_detailed_record_citations_epilogue(self, recID, ln):
         """Returns the epilogue of the citations page of a record
 
         Parameters:
 
           - 'recID' *int* - The ID of the printed record
 
           - 'ln' *string* - The language to display
 
         """
 
         return '</table>'
 
     def tmpl_detailed_record_citations_citing_list(self, recID, ln,
                                                    citinglist):
         """Returns the list of record citing this one
 
         Parameters:
 
           - 'recID' *int* - The ID of the printed record
 
           - 'ln' *string* - The language to display
 
           - citinglist *list* - a list of tuples [(x1,y1),(x2,y2),..] where x is doc id and y is number of citations
 
         """
         # load the right message language
         _ = gettext_set_language(ln)
 
         out = ''
 
         if CFG_BIBRANK_SHOW_CITATION_STATS and citinglist is not None:
             similar = self.tmpl_print_record_list_for_similarity_boxen(
                 _("Cited by: %s records") % len (citinglist), citinglist, ln)
 
             out += '''
                     <tr><td>
                       %(similar)s&nbsp;%(more)s
                       <br /><br />
                     </td></tr>''' % {
                 'more': create_html_link(
                 self.build_search_url(p='recid:%d' % \
                                       recID,      #XXXX
                                       rm='citation', ln=ln),
                                       {}, _("more")),
                 'similar': similar}
         return out
 
     def tmpl_detailed_record_citations_citation_history(self, recID, ln,
                                                         citationhistory):
         """Returns the citations history graph of this record
 
         Parameters:
 
           - 'recID' *int* - The ID of the printed record
 
           - 'ln' *string* - The language to display
 
           - citationhistory *string* - citationhistory box
 
         """
         # load the right message language
         _ = gettext_set_language(ln)
 
         out = ''
 
         if CFG_BIBRANK_SHOW_CITATION_GRAPHS and citationhistory is not None:
             out = '<!--citation history--><tr><td>%s</td></tr>' % citationhistory
         else:
             out = "<!--not showing citation history. CFG_BIBRANK_SHOW_CITATION_GRAPHS:"
             out+= str(CFG_BIBRANK_SHOW_CITATION_GRAPHS)+" citationhistory "
             if citationhistory:
                 out+= str(len(citationhistory))+"-->"
             else:
                 out+= "no citationhistory -->"
         return out
 
     def tmpl_detailed_record_citations_co_citing(self, recID, ln,
                                                  cociting):
         """Returns the list of cocited records
 
         Parameters:
 
           - 'recID' *int* - The ID of the printed record
 
           - 'ln' *string* - The language to display
 
           - cociting *string* - cociting box
 
         """
         # load the right message language
         _ = gettext_set_language(ln)
 
         out = ''
 
         if CFG_BIBRANK_SHOW_CITATION_STATS and cociting is not None:
             similar = self.tmpl_print_record_list_for_similarity_boxen (
                 _("Co-cited with: %s records") % len (cociting), cociting, ln)
 
             out = '''
                     <tr><td>
                       %(similar)s&nbsp;%(more)s
                       <br />
                     </td></tr>''' % { 'more': create_html_link(self.build_search_url(p='cocitedwith:%d' % recID, ln=ln),
                                                                 {}, _("more")),
                                       'similar': similar }
         return out
 
 
     def tmpl_detailed_record_citations_self_cited(self, recID, ln,
                                                   selfcited, citinglist):
         """Returns the list of self-citations for this record
 
         Parameters:
 
           - 'recID' *int* - The ID of the printed record
 
           - 'ln' *string* - The language to display
 
           - selfcited list - a list of self-citations for recID
 
         """
         # load the right message language
         _ = gettext_set_language(ln)
 
         out = ''
 
         if CFG_BIBRANK_SHOW_CITATION_GRAPHS and selfcited is not None:
             sc_scorelist = [] #a score list for print..
             for s in selfcited:
                 #copy weight from citations
                 weight = 0
                 for c in citinglist:
                     (crec, score) = c
                     if crec == s:
                         weight = score
                 tmp = [s, weight]
                 sc_scorelist.append(tmp)
             scite = self.tmpl_print_record_list_for_similarity_boxen (
                 _(".. of which self-citations: %s records") % len (selfcited), sc_scorelist, ln)
             out = '<tr><td>'+scite+'</td></tr>'
         return out
 
     def tmpl_author_information(self, req, pubs, authorname, num_downloads, aff_pubdict,
                                 citedbylist, kwtuples, authors, vtuples, ln):
         """Prints stuff about the author given as authorname.
            1. Author name + his/her institutes. Each institute I has a link
               to papers where the auhtor has I as institute.
            2. Publications, number: link to search by author.
            3. Keywords
            4. Author collabs
            5. Publication venues like journals
            The parameters are data structures needed to produce 1-6, as follows:
            req - request
            pubs - list of recids, probably the records that have the author as an author
            authorname - evident
            num_downloads - evident
            aff_pubdict - a dictionary where keys are inst names and values lists of recordids
            citedbylist - list of recs that cite pubs
            kwtuples - keyword tuples like ('HIGGS BOSON',[3,4]) where 3 and 4 are recids
            authors - a list of authors that have collaborated with authorname
         """
         from invenio.search_engine import perform_request_search
         _ = gettext_set_language(ln)
         #make a authoraff string that looks like CERN (1), Caltech (2) etc
         authoraff = ""
         for a in aff_pubdict.keys():
             recids = "+or+".join(map(str, aff_pubdict[a]))
             print_a = a
             if (print_a == ' '):
                 print_a = _("unknown")
             if authoraff:
                 authoraff += '<br>'
             authoraff += "<a href=\"../search?f=recid&p="+recids+"\">"+print_a+' ('+str(len(aff_pubdict[a]))+")</a>"
 
         #print a "general" banner about the author
         req.write("<h1>" + authorname + "</h1>")
 
         #print affiliations
         line1 = "<strong>" + _("Affiliations:") + "</strong>"
         line2 = authoraff
         req.write(self.tmpl_print_searchresultbox(line1, line2))
 
         # print frequent keywords:
         keywstr = ""
         if (kwtuples):
             for (kw, freq) in kwtuples:
                 if keywstr:
                     keywstr += '<br>'
                 #create a link in author=x, keyword=y
                 searchstr = create_html_link(self.build_search_url(
                                                 p='author:"' + authorname + '" ' +
                                                   'keyword:"' + kw + '"'),
                                                 {}, kw+" ("+str(freq)+")",)
                 keywstr = keywstr+" "+searchstr
             banner = self.tmpl_print_searchresultbox("<strong>" + _("Frequent keywords:") + "</strong>", keywstr)
             req.write(banner)
 
         # print frequent co-authors:
         collabstr = ""
         if (authors):
             for c in authors:
                 c = c.strip()
                 if collabstr:
                     collabstr += '<br>'
                 #do not add this person him/herself in the list
                 if not c == authorname:
                     commpubs = intbitset(pubs) & intbitset(perform_request_search(p="author:\"%s\" author:\"%s\"" % (authorname, c)))
                     collabstr = collabstr + " <a href=\"/author/"+c+"\">"+c+" ("+str(len(commpubs))+")</a>"
             banner = self.tmpl_print_searchresultbox("<strong>" + _("Frequent co-authors:") + "</strong>", collabstr)
             req.write(banner)
 
         # print frequently publishes in journals:
         if (vtuples):
             pubinfo = ""
             for t in vtuples:
                 (journal, num) = t
                 pubinfo += create_html_link(self.build_search_url(p='author:"' + authorname + '" ' + \
                                                                   'journal:"' + journal + '"'),
                                                    {}, journal + " ("+str(num)+")<br/>")
             banner = self.tmpl_print_searchresultbox("<strong>" + _("Frequently publishes in:") + "<strong>", pubinfo)
             req.write(banner)
 
         # print papers:
         searchstr = create_html_link(self.build_search_url(p=authorname,
                                      f='author'),
                                      {}, "All papers ("+str(len(pubs))+")",)
         line1 = "<strong>" + _("Papers:") + "</strong>"
         line2 = searchstr
         if num_downloads:
             line2 + " ("+_("downloaded")+" "
             line2 += str(num_downloads)+" "+_("times")+")"
         from invenio.search_engine import perform_request_search
         if CFG_INSPIRE_SITE:
             CFG_COLLS = ['Book',
                          'Conference',
                          'Introductory',
                          'Lectures',
                          'Preprint',
                          'Published',
                          'Report',
                          'Review',
                          'Thesis']
         else:
             CFG_COLLS = ['Article',
                          'Book',
                          'Preprint',]
         for coll in CFG_COLLS:
             collsearch = intbitset(pubs) & intbitset(perform_request_search(p="collection:"+coll))
             if len(collsearch) > 0:
                 num = len(collsearch)
                 line2 += "<br>" + create_html_link(self.build_search_url(p='author:"' + authorname + '" ' + \
                                                                          'collection:' + coll),
                                                    {}, coll + " ("+str(num)+")",)
         banner = self.tmpl_print_searchresultbox(line1, line2)
         req.write(banner)
 
         # print citations:
         if len(citedbylist):
             line1 = "<strong>" + _("Citations:") + "</strong>"
             line2 = ""
             req.write(self.tmpl_print_searchresultbox(line1, line2))
             # they will be printed after that
 
     def tmpl_detailed_record_references(self, recID, ln, content):
         """Returns the discussion page of a record
 
         Parameters:
 
           - 'recID' *int* - The ID of the printed record
 
           - 'ln' *string* - The language to display
 
           - 'content' *string* - The main content of the page
         """
         # load the right message language
         _ = gettext_set_language(ln)
 
         out = ''
         if content is not None:
             out += content
 
         return out
 
     def tmpl_citesummary_prologue(self, d_total_recs, l_colls, searchpattern, searchfield, ln=CFG_SITE_LANG):
         """HTML citesummary format, prologue. A part of HCS format suite."""
         _ = gettext_set_language(ln)
         out = """<p><table id="citesummary">
                   <tr><td><strong class="headline">%(msg_title)s</strong></td>""" % \
                {'msg_title': _("Citation summary results"),}
         for coll, colldef in l_colls:
             out += '<td align="right">%s</td>' % coll
         out += '</tr>'
         out += """<tr><td><strong>%(msg_recs)s</strong></td>""" % \
                {'msg_recs': _("Total number of citable papers analyzed:"),}
         for coll, colldef in l_colls:
             link_url = CFG_SITE_URL + '/search?p='
             if searchpattern:
                 p = searchpattern
                 if searchfield:
                     if " " in searchpattern:
                         p = searchfield + ':"' + searchpattern + '"'
                     else:
                         p = searchfield + ':' + searchpattern
                 link_url += quote(p)
             if colldef:
                 link_url += ' ' + quote(colldef)
             link_url += '&amp;rm=citation';
             link_text = self.tmpl_nice_number(d_total_recs[coll], ln)
             out += '<td align="right"><a href="%s">%s</a></td>' % (link_url, link_text)
         out += '</tr>'
         return out
 
     def tmpl_citesummary_overview(self, d_total_cites, d_avg_cites, l_colls, ln=CFG_SITE_LANG):
         """HTML citesummary format, overview. A part of HCS format suite."""
         _ = gettext_set_language(ln)
         out = """<tr><td><strong>%(msg_cites)s</strong></td>""" % \
               {'msg_cites': _("Total number of citations:"),}
         for coll, colldef in l_colls:
             out += '<td align="right">%s</td>' % self.tmpl_nice_number(d_total_cites[coll], ln)
         out += '</tr>'
         out += """<tr><td><strong>%(msg_avgcit)s</strong></td>""" % \
                {'msg_avgcit': _("Average citations per paper:"),}
         for coll, colldef in l_colls:
             out += '<td align="right">%.1f</td>' % d_avg_cites[coll]
         out += '</tr>'
         out += """<tr><td><strong>%(msg_breakdown)s</strong></td></tr>""" % \
                {'msg_breakdown': _("Breakdown of papers by citations:"),}
         return out
 
     def tmpl_citesummary_breakdown_by_fame(self, d_cites, low, high, fame, l_colls, searchpattern, searchfield, ln=CFG_SITE_LANG):
         """HTML citesummary format, breakdown by fame. A part of HCS format suite."""
         _ = gettext_set_language(ln)
         out = """<tr><td>%(fame)s</td>""" % \
               {'fame': fame,}
         for coll, colldef in l_colls:
             link_url = CFG_SITE_URL + '/search?p='
             if searchpattern:
                 p = searchpattern
                 if searchfield:
                     if " " in searchpattern:
                         p = searchfield + ':"' + searchpattern + '"'
                     else:
                         p = searchfield + ':' + searchpattern
                 link_url += quote(p) + ' '
             if colldef:
                 link_url += quote(colldef) + ' '
             if low == 0 and high == 0:
                 link_url += quote('cited:0')
             else:
                 link_url += quote('cited:%i->%i' % (low, high))
             link_url += '&amp;rm=citation';
             link_text = self.tmpl_nice_number(d_cites[coll], ln)
             out += '<td align="right"><a href="%s">%s</a></td>' % (link_url, link_text)
         out += '</tr>'
         return out
 
     def tmpl_citesummary_epilogue(self, ln=CFG_SITE_LANG):
         """HTML citesummary format, epilogue. A part of HCS format suite."""
         _ = gettext_set_language(ln)
         out = """</table>"""
         return out
 
     def tmpl_unapi(self, formats, identifier=None):
         """
         Provide a list of object format available from the unAPI service
         for the object identified by IDENTIFIER
         """
         out = '<?xml version="1.0" encoding="UTF-8" ?>\n'
         if identifier:
             out += '<formats id="%i">\n' % (identifier)
         else:
             out += "<formats>\n"
         for format_name, format_type in formats.iteritems():
             docs = ''
             if format_name == 'xn':
                 docs = 'http://www.nlm.nih.gov/databases/dtd/'
                 format_type = 'application/xml'
                 format_name = 'nlm'
             elif format_name == 'xm':
                 docs = 'http://www.loc.gov/standards/marcxml/schema/MARC21slim.xsd'
                 format_type = 'application/xml'
                 format_name = 'marcxml'
             elif format_name == 'xr':
                 format_type = 'application/rss+xml'
                 docs = 'http://www.rssboard.org/rss-2-0/'
             elif format_name == 'xw':
                 format_type = 'application/xml'
                 docs = 'http://www.refworks.com/RefWorks/help/RefWorks_Tagged_Format.htm'
             elif format_name == 'xoaidc':
                 format_type = 'application/xml'
                 docs = 'http://www.openarchives.org/OAI/2.0/oai_dc.xsd'
             elif format_name == 'xe':
                 format_type = 'application/xml'
                 docs= 'http://www.endnote.com/support/'
                 format_name = 'endnote'
             elif format_name == 'xd':
                 format_type = 'application/xml'
                 docs = 'http://dublincore.org/schemas/'
                 format_name = 'dc'
             elif format_name == 'xo':
                 format_type = 'application/xml'
                 docs = 'http://www.loc.gov/standards/mods/v3/mods-3-3.xsd'
                 format_name = 'mods'
             if docs:
                 out += '<format name="%s" type="%s" docs="%s" />\n' % (xml_escape(format_name), xml_escape(format_type), xml_escape(docs))
             else:
                 out += '<format name="%s" type="%s" />\n' % (xml_escape(format_name), xml_escape(format_type))
         out += "</formats>"
         return out