diff --git a/modules/bibformat/etc/format_templates/Default_HTML_citation_log.bft b/modules/bibformat/etc/format_templates/Default_HTML_citation_log.bft
new file mode 100644
index 000000000..6316646fd
--- /dev/null
+++ b/modules/bibformat/etc/format_templates/Default_HTML_citation_log.bft
@@ -0,0 +1,6 @@
+<name>Default HTML similarity</name>
+<description>Small HTML record printed in "Similar Documents" section</description>
+<strong><a href="/<BFE_SERVER_INFO var='CFG_SITE_RECORD'/>/<BFE_RECORD_ID/><BFE_CLIENT_INFO var="ln" prefix="?ln=">"><BFE_TITLE_BRIEF /></a></strong>
+<BFE_AUTHORS limit="1" prefix=" - " extension=" <em>et al</em>"/>
+<BFE_PUBLI_INFO prefix=" - "/>
+<BFE_PRIMARY_REPORT_NUMBER prefix=" - "/>
diff --git a/modules/bibformat/etc/format_templates/Makefile.am b/modules/bibformat/etc/format_templates/Makefile.am
index 64dad0aee..7cde9c56e 100644
--- a/modules/bibformat/etc/format_templates/Makefile.am
+++ b/modules/bibformat/etc/format_templates/Makefile.am
@@ -1,51 +1,71 @@
 ## This file is part of Invenio.
 ## Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011, 2013 CERN.
 ##
 ## Invenio is free software; you can redistribute it and/or
 ## modify it under the terms of the GNU General Public License as
 ## published by the Free Software Foundation; either version 2 of the
 ## License, or (at your option) any later version.
 ##
 ## Invenio is distributed in the hope that it will be useful, but
 ## WITHOUT ANY WARRANTY; without even the implied warranty of
 ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 ## General Public License for more details.
 ##
 ## You should have received a copy of the GNU General Public License
 ## along with Invenio; if not, write to the Free Software Foundation, Inc.,
 ## 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA.
 
 etcdir = $(sysconfdir)/bibformat/format_templates
 
-etc_DATA = Default_HTML_captions.bft Picture_HTML_brief.bft \
-           Default_HTML_detailed.bft Default_HTML_portfolio.bft \
-           Picture_HTML_detailed.bft Default_HTML_brief.bft \
-           BibTeX.bft MARCXML.bft Excel.bft \
-           Default_HTML_similarity.bft NLM.xsl \
-           OAI_DC.xsl OAI_MARC.bft DC.xsl EndNote.xsl \
-           RSS.xsl RefWorks.xsl MODS.xsl \
-           Default_HTML_references.bft Default_HTML_files.bft \
-           Default_HTML_actions.bft Journal_HTML_detailed.bft \
+etc_DATA = Default_HTML_captions.bft \
+           Picture_HTML_brief.bft \
+           Default_HTML_detailed.bft \
+           Default_HTML_portfolio.bft \
+           Picture_HTML_detailed.bft \
+           Default_HTML_brief.bft \
+           BibTeX.bft MARCXML.bft \
+           Excel.bft \
+           Default_HTML_similarity.bft \
+           NLM.xsl \
+           OAI_DC.xsl \
+           OAI_MARC.bft \
+           DC.xsl \
+           EndNote.xsl \
+           RSS.xsl \
+           RefWorks.xsl \
+           MODS.xsl \
+           Default_HTML_references.bft \
+           Default_HTML_files.bft \
+           Default_HTML_actions.bft \
+           Journal_HTML_detailed.bft \
            Journal_HTML_brief.bft \
-           Poetry_HTML_brief.bft Poetry_HTML_detailed.bft \
-           AID_HTML_very_brief.bft Podcast.xsl \
-           Video_HTML_brief.bft Video_HTML_detailed.bft \
-           Basket_Search_Result.bft Default_HTML_meta.bft \
-           WebAuthorProfile_affiliations_helper.bft DataCite.xsl \
-           Default_Mobile_brief.bft Default_Mobile_detailed.bft \
-           Authority_HTML_brief.bft Authority_HTML_detailed.bft \
-           Detailed_HEPDATA_dataset.bft
+           Poetry_HTML_brief.bft \
+           Poetry_HTML_detailed.bft \
+           AID_HTML_very_brief.bft \
+           Podcast.xsl \
+           Video_HTML_brief.bft \
+           Video_HTML_detailed.bft \
+           Basket_Search_Result.bft \
+           Default_HTML_meta.bft \
+           WebAuthorProfile_affiliations_helper.bft \
+           DataCite.xsl \
+           Default_Mobile_brief.bft \
+           Default_Mobile_detailed.bft \
+           Authority_HTML_brief.bft \
+           Authority_HTML_detailed.bft \
+           Detailed_HEPDATA_dataset.bft \
+           Default_HTML_citation_log.bft
 
 tmpdir = $(prefix)/var/tmp
 
 tmp_DATA = Test1.bft \
            Test3.bft \
            Test_2.bft \
            Test6.bft \
            Test7.bft \
            Test8.bft \
            Test_no_template.test
 
 EXTRA_DIST = $(etc_DATA) $(tmp_DATA)
 
 CLEANFILES = *.tmp
diff --git a/modules/bibformat/etc/output_formats/HS2.bfo b/modules/bibformat/etc/output_formats/HS2.bfo
new file mode 100644
index 000000000..afc6fb91d
--- /dev/null
+++ b/modules/bibformat/etc/output_formats/HS2.bfo
@@ -0,0 +1 @@
+default: Default_HTML_citation_log.bft
\ No newline at end of file
diff --git a/modules/bibformat/etc/output_formats/Makefile.am b/modules/bibformat/etc/output_formats/Makefile.am
index 2b823ff36..9f1dcc9ad 100644
--- a/modules/bibformat/etc/output_formats/Makefile.am
+++ b/modules/bibformat/etc/output_formats/Makefile.am
@@ -1,33 +1,33 @@
 ## This file is part of Invenio.
 ## Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011, 2013 CERN.
 ##
 ## Invenio is free software; you can redistribute it and/or
 ## modify it under the terms of the GNU General Public License as
 ## published by the Free Software Foundation; either version 2 of the
 ## License, or (at your option) any later version.
 ##
 ## Invenio is distributed in the hope that it will be useful, but
 ## WITHOUT ANY WARRANTY; without even the implied warranty of
 ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 ## General Public License for more details.
 ##
 ## You should have received a copy of the GNU General Public License
 ## along with Invenio; if not, write to the Free Software Foundation, Inc.,
 ## 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA.
 
 etcdir = $(sysconfdir)/bibformat/output_formats
 
 etc_DATA = HB.bfo HC.bfo HD.bfo HP.bfo HX.bfo XM.bfo EXCEL.bfo \
-	   XD.bfo HS.bfo HA.bfo \
-	   XE.bfo XE8X.bfo XN.bfo XR.bfo XW.bfo \
-	   XOAIDC.bfo XO.bfo XOAIMARC.bfo \
-	   HDREF.bfo HDFILE.bfo HDACT.bfo XP.bfo BSR.bfo WAPAFF.bfo \
-	   HDM.bfo DCITE.bfo MOBB.bfo MOBD.bfo
+	       XD.bfo HS.bfo HA.bfo \
+	       XE.bfo XE8X.bfo XN.bfo XR.bfo XW.bfo \
+	       XOAIDC.bfo XO.bfo XOAIMARC.bfo \
+	       HDREF.bfo HDFILE.bfo HDACT.bfo XP.bfo BSR.bfo WAPAFF.bfo \
+           HDM.bfo DCITE.bfo MOBB.bfo MOBD.bfo HS2.bfo
 
 tmpdir = $(prefix)/var/tmp
 
 tmp_DATA = TEST1.bfo TEST2.bfo TEST3.bfo TEST6.bfo TEST7.bfo TEST8.bfo
 
 EXTRA_DIST = $(etc_DATA) $(tmp_DATA)
 
 CLEANFILES = *.tmp
diff --git a/modules/bibformat/lib/elements/bfe_report_numbers.py b/modules/bibformat/lib/elements/bfe_report_numbers.py
index 89e2cd7a8..3db874567 100644
--- a/modules/bibformat/lib/elements/bfe_report_numbers.py
+++ b/modules/bibformat/lib/elements/bfe_report_numbers.py
@@ -1,66 +1,70 @@
 # -*- coding: utf-8 -*-
 ##
 ## This file is part of Invenio.
 ## Copyright (C) 2007, 2008, 2009, 2010, 2011 CERN.
 ##
 ## Invenio is free software; you can redistribute it and/or
 ## modify it under the terms of the GNU General Public License as
 ## published by the Free Software Foundation; either version 2 of the
 ## License, or (at your option) any later version.
 ##
 ## Invenio is distributed in the hope that it will be useful, but
 ## WITHOUT ANY WARRANTY; without even the implied warranty of
 ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 ## General Public License for more details.
 ##
 ## You should have received a copy of the GNU General Public License
 ## along with Invenio; if not, write to the Free Software Foundation, Inc.,
 ## 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA.
 """BibFormat element - Prints report numbers
 """
 
 __revision__ = ""
 
 import cgi
 from invenio.urlutils import create_html_link
 
-def format_element(bfo, limit, separator=" ", extension=" etc.", link='yes'):
+def format_element(bfo, limit, separator=" ", extension=" etc.", link='yes', just_one='no'):
     """
     Prints the report numbers of the record (037__a and 088__a)
 
     @param separator: the separator between report numbers.
     @param limit: the max number of report numbers to print
     @param extension: a prefix printed when limit param is reached
     @param link: if 'yes', display report number with corresponding link when possible
     """
     numbers = bfo.fields("037__a")
     numbers.extend(bfo.fields("088__a"))
 
+    # Only display the first one
+    if just_one == 'yes':
+        numbers = numbers[:1]
+
     if limit.isdigit():
         limit_as_int = int(limit)
         if limit_as_int <= len(numbers):
             return separator.join(numbers[:limit_as_int]) + extension
 
     return separator.join([build_report_number_link(report_number, \
                                                     link == 'yes') \
                            for report_number in numbers])
 
 def build_report_number_link(report_number, link_p=True):
     """
     Build HTML link out of given report number when it make sense (or
     is possible) and/or escape report number.
     @param report_number: the report number to consider
     @param link_p: if True, build link, otherwise just escape
     """
     if link_p and report_number.lower().startswith('arxiv:'):
         return create_html_link('http://arxiv.org/abs/' + report_number,
                                 urlargd={}, link_label=report_number)
     else:
         return cgi.escape(report_number)
 
 def escape_values(bfo):
     """
     Called by BibFormat in order to check if output of this element
     should be escaped.
     """
     return 0
diff --git a/modules/bibrank/lib/bibrank_citation_grapher.py b/modules/bibrank/lib/bibrank_citation_grapher.py
index dd20ea9ff..a9ccca354 100644
--- a/modules/bibrank/lib/bibrank_citation_grapher.py
+++ b/modules/bibrank/lib/bibrank_citation_grapher.py
@@ -1,151 +1,199 @@
 # -*- coding: utf-8 -*-
 ##
 ## This file is part of Invenio.
 ## Copyright (C) 2005, 2006, 2007, 2008, 2009, 2010, 2011 CERN.
 ##
 ## Invenio is free software; you can redistribute it and/or
 ## modify it under the terms of the GNU General Public License as
 ## published by the Free Software Foundation; either version 2 of the
 ## License, or (at your option) any later version.
 ##
 ## Invenio is distributed in the hope that it will be useful, but
 ## WITHOUT ANY WARRANTY; without even the implied warranty of
 ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 ## General Public License for more details.
 ##
 ## You should have received a copy of the GNU General Public License
 ## along with Invenio; if not, write to the Free Software Foundation, Inc.,
 ## 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA.
 
 __revision__ = "$Id$"
 
 import os
 import time
+import shutil
+from tempfile import NamedTemporaryFile
 
-from invenio.config import CFG_SITE_URL, CFG_SITE_LANG, CFG_WEBDIR, CFG_BIBRANK_SHOW_CITATION_GRAPHS
-from invenio.dbquery import run_sql
+from invenio.config import (CFG_SITE_URL,
+                            CFG_SITE_LANG,
+                            CFG_WEBDIR,
+                            CFG_BIBRANK_SHOW_CITATION_GRAPHS)
 from invenio.messages import gettext_set_language
-from invenio.bibrank_grapher import create_temporary_image, write_coordinates_in_tmp_file, remove_old_img
-from invenio.bibrank_citation_searcher import calculate_cited_by_list
-
-cfg_bibrank_print_citation_history = 1
-color_line_list = ['9', '19', '10', '15', '21', '18']
-cfg_bibrank_citation_history_min_x_points = 3 # do not generate graphs that have less than three points
-
-def get_field_values(recID, tag):
-    """Return list of field values for field tag inside record RECID."""
-    out = []
-    if tag == "001___":
-        out.append(str(recID))
-    else:
-        digit = tag[0:2]
-        bx = "bib%sx" % digit
-        bibx = "bibrec_bib%sx" % digit
-        query = "SELECT bx.value FROM %s AS bx, %s AS bibx WHERE bibx.id_bibrec='%s' AND bx.id=bibx.id_bibxxx AND bx.tag LIKE '%s'" "ORDER BY bibx.field_number, bx.tag ASC" % (bx, bibx, recID, tag)
-        res = run_sql(query)
-        for row in res:
-            out.append(row[0])
-    return out
+from invenio.bibrank_grapher import (create_temporary_image,
+                                     write_coordinates_in_tmp_file)
+from invenio.bibrank_citation_searcher import get_cited_by
+from invenio.search_engine_utils import get_fieldvalues
+from invenio.dateutils import strptime
+
+
+CFG_BIBRANK_PRINT_CITATION_HISTORY = 1
+# Do not generate graphs that have less than three points
+CFG_BIBRANK_CITATION_HISTORY_MIN_X_POINTS = 2
+
+REL_PATH = 'img/citation-graphs'
+BASE_DIR = os.path.join(CFG_WEBDIR, os.path.join(*REL_PATH.split('/')))
 
-def calculate_citation_history_coordinates(recid):
-    """Return a list of citation graph coordinates for RECID, sorted by year."""
-    result = []
-    dbg = ""
-    initial_result= get_initial_result(calculate_citation_graphe_x_coordinates(recid))
-    citlist = calculate_cited_by_list(recid)
-    for rec_id, _ in citlist:
-        cit_year = get_field_values(rec_id, '269__c')
-        if not cit_year:
-            cit_year = get_field_values(rec_id, '773__y')
-        if not cit_year:
-            cit_year = get_field_values(rec_id, '260__c')
-        #some records simlpy do not have these fields
-        if cit_year:
-            #maybe cit_year[0][0:4] has a typo and cannot
-            #be converted to an int
-            numeric=1
-            try:
-                tmpval = int(cit_year[0][0:4])
-            except ValueError:
-                numeric=0
-            if numeric and initial_result.has_key(int(cit_year[0][0:4])):
-                initial_result[int(cit_year[0][0:4])] += 1
-    for key, value in initial_result.items():
-        result.append((key, value))
-    result.sort()
-    if len(result) < cfg_bibrank_citation_history_min_x_points:
-        # do not generate graphs that have less than X points
-        return []
-    return result
 
 def calculate_citation_graphe_x_coordinates(recid):
     """Return a range of year from the publication year of record RECID
        until the current year."""
-    rec_years = []
-    recordyear = get_field_values(recid, '773__y')
-    if not recordyear:
-        recordyear = get_field_values(recid, '260__c')
-        if not recordyear:
-            recordyear = get_field_values(recid, '269__c')
+    record_date = get_fieldvalues(recid, '269__c')
+    if not record_date:
+        record_date = get_fieldvalues(recid, '773__y')
+        if not record_date:
+            record_date = get_fieldvalues(recid, '260__c')
+
     currentyear = time.localtime()[0]
-    if recordyear == []:
+
+    recordyear = None
+    if record_date:
+        recordyear = find_year(record_date[0])
+    if not recordyear:
         recordyear = currentyear
-    else:
-        recordyear = find_year(recordyear[0])
-    interval = range(int(recordyear), currentyear+1)
-    return interval
+
+    return range(recordyear, currentyear+1)
+
+
+def calculate_citation_history_coordinates(recid):
+    """Return a list of citation graph coordinates for RECID, sorted by year."""
+    result = {}
+    for year in calculate_citation_graphe_x_coordinates(recid):
+        result[year] = 0
+
+    if len(result) < CFG_BIBRANK_CITATION_HISTORY_MIN_X_POINTS:
+        # do not generate graphs that have less than X points
+        return []
+
+    for recid in get_cited_by(recid):
+        rec_date = get_fieldvalues(recid, '269__c')
+        if not rec_date:
+            rec_date = get_fieldvalues(recid, '773__y')
+            if not rec_date:
+                rec_date = get_fieldvalues(recid, '260__c')
+        # Some records simlpy do not have these fields
+        if rec_date:
+            # Maybe rec_date[0][0:4] has a typo and cannot
+            # be converted to an int
+            try:
+                d = strptime(rec_date[0], '%Y-%m')
+            except ValueError:
+                pass
+            else:
+                if d.year in result:
+                    result[d.year] += 1
+
+    return sorted(result.iteritems())
+
 
 def find_year(recordyear):
     """find the year in the string as a suite of 4 int"""
-    s = ""
+    year = None
     for i in range(len(recordyear)-3):
         s = recordyear[i:i+4]
         if s.isalnum():
+            year = int(s)
             break
-    return s
+    return year
 
-def get_initial_result(rec_years):
-    """return an initial dictionary with year of record publication as key
-       and zero as value
-    """
-    result = {}
-    for year in rec_years :
-        result[year] = 0
-    return result
 
 def html_command(file):
     t = ''
     if CFG_BIBRANK_SHOW_CITATION_GRAPHS == 1:
-        t = """<img src='%s/img/%s' align="center" alt="">""" % (CFG_SITE_URL, file)
+        t = '<img src="%s/%s/%s" align="center" alt="Citation Graph">' \
+                                           % (CFG_SITE_URL, REL_PATH, filename)
     elif CFG_BIBRANK_SHOW_CITATION_GRAPHS == 2:
-        t = open(CFG_WEBDIR + "/img/" + file).read()
-    #t += "</table></td></tr></table>"
+        t = open(os.path.join(BASE_DIR, filename)).read()
     return t
 
+
+def remove_old_graph_if_needed(filename):
+    """Delete graph if it is older than x seconds"""
+    if not os.path.isfile(filename):
+        return True
+
+    time_diff = time.time() - os.stat(filename).st_mtime
+    if time_diff > 3600*24:
+        try:
+            os.unlink(filename)
+        except OSError, e:
+            # File does not exist is ok
+            if e.errno != 2:
+                raise
+        return True
+    return False
+
+
+def safe_create_citation_graph(recid, dest):
+    # Create destination dir
+    dest_dir = os.path.dirname(dest)
+    try:
+        os.makedirs(dest_dir)
+    except OSError, e:
+        # If directory already exists, ignore error
+        if e.errno != 17:
+            raise
+
+    graph_source_file = create_citation_graph(recid, dest_dir)
+
+    if graph_source_file:
+        try:
+            os.rename(graph_source_file, dest)
+        except OSError:
+            os.unlink(graph_source_file)
+
+
+def create_citation_graph(recid, dest_dir):
+    coordinates = calculate_citation_history_coordinates(recid)
+    if coordinates:
+        years = calculate_citation_graphe_x_coordinates(recid)
+
+        coordinates_file, max_y = write_coordinates_in_tmp_file([coordinates])
+        try:
+            graph_file = create_temporary_image(recid,
+                    'citation', coordinates_file, 'Year', 'Times cited',
+                    [0, 0], max_y, [], ' ', years, dest_dir=dest_dir)
+        finally:
+            # Always delete the coordinates file
+            if coordinates_file:
+                os.unlink(coordinates_file)
+
+        if graph_file and os.path.exists(graph_file):
+            return graph_file
+
+
 def create_citation_history_graph_and_box(recid, ln=CFG_SITE_LANG):
     """Create graph with citation history for record RECID (into a
        temporary file) and return HTML box refering to that image.
        Called by Detailed record pages.
     """
 
     _ = gettext_set_language(ln)
 
     html_result = ""
-    if cfg_bibrank_print_citation_history:
-        coordinates = calculate_citation_history_coordinates(recid)
-        if coordinates:
-            html_head = """<br /><table><tr><td class="blocknote">%s</td></tr></table>"""% _("Citation history:")
-            graphe_file_name = 'citation_%s_stats.png' % str(recid)
-            remove_old_img(graphe_file_name)
-            years = calculate_citation_graphe_x_coordinates(recid)
-            years.sort()
-            datas_info = write_coordinates_in_tmp_file([coordinates])
-            graphe = create_temporary_image(recid, 'citation', datas_info[0], 'Year', 'Times cited', [0,0], datas_info[1], [], ' ', years)
-            graphe_image = graphe[0]
-            graphe_source_file = graphe[1]
-            if graphe_image and graphe_source_file:
-                if os.path.exists(graphe_source_file):
-                    os.unlink(datas_info[0])
-                    html_graphe_code = """<p>%s"""% html_command(graphe_image)
-                html_result = html_head + html_graphe_code
+
+    if CFG_BIBRANK_PRINT_CITATION_HISTORY:
+        graph_file_name = 'citation_%s_stats.png' % recid
+        # We need to store graphs in subdirectories because
+        # of max files per directory limit on AFS
+        sub_dir = str(recid / 10000)
+        graph_file = os.path.join(BASE_DIR, sub_dir, graph_file_name)
+
+        if remove_old_graph_if_needed(graph_file):
+            safe_create_citation_graph(recid, graph_file)
+
+        if os.path.exists(graph_file):
+            html_head = '<br /><table><tr><td class="blocknote">%s</td></tr></table>' % _("Citation history:")
+            html_graph_code = """<p>%s</p>""" % html_command('%s/%s' % (sub_dir, graph_file_name))
+            html_result = html_head + html_graph_code
+
     return html_result
diff --git a/modules/bibrank/lib/bibrank_citation_indexer.py b/modules/bibrank/lib/bibrank_citation_indexer.py
index 7ff9e429d..879b7b728 100644
--- a/modules/bibrank/lib/bibrank_citation_indexer.py
+++ b/modules/bibrank/lib/bibrank_citation_indexer.py
@@ -1,1038 +1,1043 @@
 # -*- coding: utf-8 -*-
 ##
 ## This file is part of Invenio.
 ## Copyright (C) 2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012, 2013 CERN.
 ##
 ## Invenio is free software; you can redistribute it and/or
 ## modify it under the terms of the GNU General Public License as
 ## published by the Free Software Foundation; either version 2 of the
 ## License, or (at your option) any later version.
 ##
 ## Invenio is distributed in the hope that it will be useful, but
 ## WITHOUT ANY WARRANTY; without even the implied warranty of
 ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 ## General Public License for more details.
 ##
 ## You should have received a copy of the GNU General Public License
 ## along with Invenio; if not, write to the Free Software Foundation, Inc.,
 ## 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA.
 
 __revision__ = "$Id$"
 
 import re
 import time
 import os
 import sys
 import ConfigParser
 from itertools import islice
 from datetime import datetime
 
 from invenio.intbitset import intbitset
 from invenio.dbquery import run_sql, \
                             deserialize_via_marshal
 from invenio.bibindex_tokenizers.BibIndexJournalTokenizer import \
     CFG_JOURNAL_PUBINFO_STANDARD_FORM, \
     CFG_JOURNAL_PUBINFO_STANDARD_FORM_REGEXP_CHECK
 from invenio.search_engine import search_pattern, \
                                   search_unit, \
                                   get_collection_reclist
 from invenio.search_engine_utils import get_fieldvalues
 from invenio.bibformat_utils import parse_tag
 from invenio.bibknowledge import get_kb_mappings
 from invenio.bibtask import write_message, task_get_option, \
                      task_update_progress, task_sleep_now_if_required, \
                      task_get_task_param
 from invenio.bibindex_engine_utils import get_field_tags
 from invenio.docextract_record import get_record
 
 re_CFG_JOURNAL_PUBINFO_STANDARD_FORM_REGEXP_CHECK \
                    = re.compile(CFG_JOURNAL_PUBINFO_STANDARD_FORM_REGEXP_CHECK)
 
 
 def compute_weights():
     sql = "SELECT citee, COUNT(citer) FROM rnkCITATIONDICT GROUP BY citee"
     weights = {}
     for citee, c in run_sql(sql):
         weights[citee] = c
     return weights
 
 
 def recids_cache(collections, cache={}):
     if 'valid_recids' not in cache:
         cache['valid_recids'] = intbitset()
         for coll in collections.split(','):
             cache['valid_recids'] += get_collection_reclist(coll)
     return cache['valid_recids']
 
 
 def deleted_recids_cache(cache={}):
     if 'deleted_records' not in cache:
         cache['deleted_records'] = search_unit(p='DELETED', f='980', m='a')
     return cache['deleted_records']
 
 
 def get_recids_matching_query(p, f, config, m='e'):
     """Return set of recIDs matching query for pattern p in field f."""
     function = config.get("rank_method", "function")
     collections = config.get(function, 'collections')
     if collections:
         ret = search_pattern(p=p, f=f, m=m) & recids_cache(collections)
     else:
         ret = search_pattern(p=p, f=f, m=m) - deleted_recids_cache()
     return ret
 
 
 def get_citation_weight(rank_method_code, config, chunk_size=25000):
     """return a dictionary which is used by bibrank daemon for generating
     the index of sorted research results by citation information
     """
     quick = task_get_option("quick") != "no"
 
     # id option forces re-indexing a certain range
     # even if there are no new recs
     if task_get_option("id"):
         # construct a range of records to index
         updated_recids = []
         for first, last in task_get_option("id"):
             updated_recids += range(first, last+1)
         if len(updated_recids) > 10000:
             str_updated_recids = str(updated_recids[:10]) + ' ... ' + str(updated_recids[-10:])
         else:
             str_updated_recids = str(updated_recids)
         write_message('Records to process: %s' % str_updated_recids)
         index_update_time = None
     else:
         bibrank_update_time = get_bibrankmethod_lastupdate(rank_method_code)
         if not quick:
             bibrank_update_time = "0000-00-00 00:00:00"
         write_message("bibrank: %s" % bibrank_update_time)
         index_update_time = get_bibindex_update_time()
         write_message("bibindex: %s" % index_update_time)
         if index_update_time > datetime.now().strftime("%Y-%m-%d %H:%M:%S"):
             index_update_time = "0000-00-00 00:00:00"
         updated_recids = get_modified_recs(bibrank_update_time,
                                            index_update_time)
         if len(updated_recids) > 10000:
             str_updated_recids = str(updated_recids[:10]) + ' ... ' + str(updated_recids[-10:])
         else:
             str_updated_recids = str(updated_recids)
         write_message("%s records to update" % str_updated_recids)
 
     if updated_recids:
         begin_time = time.time()
         try:
             function = config.get("rank_method", "function")
             config.get(function, 'collections')
         except ConfigParser.NoOptionError:
             config.set(function, 'collections', None)
         # Process fully the updated records
         weights = process_and_store(updated_recids, config, chunk_size)
         end_time = time.time()
         write_message("Total time of get_citation_weight(): %.2f sec" %
                                                       (end_time - begin_time))
         task_update_progress("citation analysis done")
     else:
         weights = None
         write_message("No new records added since last time this "
                       "rank method was executed")
 
     return weights, index_update_time
 
 
 def process_and_store(recids, config, chunk_size):
     # Limit of # of citation we can loose in one chunk
     function = config.get("rank_method", "function")
     citation_loss_limit = int(config.get(function, "citation_loss_limit"))
     # If we have nothing to process
     # Do not update the weights dictionary
     modified = False
     # Process recent records first
     # The older records were most likely added by the above steps
     # to be reprocessed so they only have minor changes
     recids_iter = iter(sorted(recids, reverse=True))
 
     # Split records to process into chunks so that we do not
     # fill up too much memory
     while True:
         task_sleep_now_if_required()
 
         chunk = list(islice(recids_iter, chunk_size))
         if not chunk:
             break
 
         write_message("Processing chunk #%s to #%s" % (chunk[0], chunk[-1]))
         # The core work
         cites, refs = process_chunk(chunk, config)
         # Check that we haven't lost too many citations
         cites_diff = compute_dicts_diff(chunk, refs, cites)
         write_message("Citations balance %s" % cites_diff)
         if citation_loss_limit and cites_diff <= -citation_loss_limit:
             raise Exception('Lost too many references, aborting')
 
         # Store processed citations/references
         store_dicts(chunk, refs, cites)
         modified = True
 
     # Compute new weights dictionary
     if modified:
         weights = compute_weights()
     else:
         weights = None
 
     return weights
 
 
 def process_chunk(recids, config):
     tags = get_tags_config(config)
 
     # call the procedure that does the hard work by reading fields of
     # citations and references in the updated_recid's (but nothing else)!
     write_message("Entering get_citation_informations", verbose=9)
     citation_informations = get_citation_informations(recids, tags, config)
 
     write_message("Entering ref_analyzer", verbose=9)
     # call the analyser that uses the citation_informations to really
     # search x-cites-y in the coll..
     return ref_analyzer(citation_informations,
                         recids,
                         tags,
                         config)
 
 
 def get_bibrankmethod_lastupdate(rank_method_code):
     """Return the last excution date of bibrank method
     """
     query = """SELECT DATE_FORMAT(last_updated, '%%Y-%%m-%%d %%H:%%i:%%s')
                FROM rnkMETHOD WHERE name =%s"""
     last_update_time = run_sql(query, [rank_method_code])
     try:
         r = last_update_time[0][0]
     except IndexError:
         r = "0000-00-00 00:00:00"
 
     return r
 
 
 def get_bibindex_update_time():
     """Return the last indexing date of the journals and report number indexes
     """
     try:
         # check indexing times of `journal' and `reportnumber`
         # indexes, and only fetch records which have been indexed
         sql = "SELECT DATE_FORMAT(MIN(last_updated), " \
               "'%%Y-%%m-%%d %%H:%%i:%%s') FROM idxINDEX WHERE name IN (%s,%s)"
         index_update_time = run_sql(sql, ('journal', 'reportnumber'), 1)[0][0]
     except IndexError:
         write_message("Not running citation indexer since journal/reportnumber"
                       " indexes are not created yet.")
         index_update_time = "0000-00-00 00:00:00"
 
     return index_update_time
 
 
 def get_modified_recs(bibrank_method_lastupdate, indexes_lastupdate):
     """Get records to be updated by bibrank indexing
 
     Return the list of records which have been modified between the last
     execution of bibrank method and the latest journal/report index updates.
     The result is expected to have ascending id order.
     """
     query = """SELECT id FROM bibrec
                WHERE modification_date >= %s
                AND modification_date < %s
                ORDER BY id ASC"""
     records = run_sql(query, (bibrank_method_lastupdate, indexes_lastupdate))
     return [r[0] for r in records]
 
 
 def format_journal(format_string, mappings):
     """format the publ infostring according to the format"""
 
     def replace(char, data):
         return data.get(char, char)
 
     return ''.join(replace(c, mappings) for c in format_string)
 
 
 def get_tags_config(config):
     """Fetch needs config from our config file"""
     # Probably "citation" unless this file gets renamed
     function = config.get("rank_method", "function")
     write_message("config function %s" % function, verbose=9)
 
     tags = {}
 
     # 037a: contains (often) the "hep-ph/0501084" tag of THIS record
     try:
         tag = config.get(function, "primary_report_number")
     except ConfigParser.NoOptionError:
         tags['record_pri_number'] = None
     else:
         tags['record_pri_number'] = tagify(parse_tag(tag))
 
     # 088a: additional short identifier for the record
     try:
         tag = config.get(function, "additional_report_number")
     except ConfigParser.NoOptionError:
         tags['record_add_number'] = None
     else:
         tags['record_add_number'] = tagify(parse_tag(tag))
 
     # 999C5r. this is in the reference list, refers to other records.
     # Looks like: hep-ph/0408002
     try:
         tag = config.get(function, "reference_via_report_number")
     except ConfigParser.NoOptionError:
         tags['refs_report_number'] = None
     else:
         tags['refs_report_number'] = tagify(parse_tag(tag))
     # 999C5s. this is in the reference list, refers to other records.
     # Looks like: Phys.Rev.,A21,78
     try:
         tag = config.get(function, "reference_via_pubinfo")
     except ConfigParser.NoOptionError:
         tags['refs_journal'] = None
     else:
         tags['refs_journal'] = tagify(parse_tag(tag))
     # 999C5a. this is in the reference list, refers to other records.
     # Looks like: 10.1007/BF03170733
     try:
         tag = config.get(function, "reference_via_doi")
     except ConfigParser.NoOptionError:
         tags['refs_doi'] = None
     else:
         tags['refs_doi'] = tagify(parse_tag(tag))
 
     # Fields needed to construct the journals for this record
     try:
         tag = {
             'pages': config.get(function, "pubinfo_journal_page"),
             'year': config.get(function, "pubinfo_journal_year"),
             'journal': config.get(function, "pubinfo_journal_title"),
             'volume': config.get(function, "pubinfo_journal_volume"),
         }
     except ConfigParser.NoOptionError:
         tags['publication'] = None
     else:
         tags['publication'] = {
             'pages': tagify(parse_tag(tag['pages'])),
             'year': tagify(parse_tag(tag['year'])),
             'journal': tagify(parse_tag(tag['journal'])),
             'volume': tagify(parse_tag(tag['volume'])),
         }
 
     # Fields needed to lookup the DOIs
     tags['doi'] = get_field_tags('doi')
 
     # 999C5s. A standardized way of writing a reference in the reference list.
     # Like: Nucl. Phys. B 710 (2000) 371
     try:
         tags['publication_format'] = config.get(function,
                                                 "pubinfo_journal_format")
     except ConfigParser.NoOptionError:
         tags['publication_format'] = CFG_JOURNAL_PUBINFO_STANDARD_FORM
 
     # Print values of tags for debugging
     write_message("tag values: %r" % [tags], verbose=9)
 
     return tags
 
 
 def get_journal_info(recid, tags):
     """Fetch journal info for given recid"""
     record_info = []
 
     record = get_record(recid)
     journals_fields = record.find_fields(tags['publication']['journal'][:5])
     for field in journals_fields:
         # we store the tags and their values here
         # like c->444 y->1999 p->"journal of foo",
         # v->20
         tagsvalues = {}
         try:
             tmp = field.get_subfield_values(tags['publication']['journal'][5])[0]
         except IndexError:
             pass
         else:
             tagsvalues["p"] = tmp
 
         try:
             tmp = field.get_subfield_values(tags['publication']['volume'][5])[0]
         except IndexError:
             pass
         else:
             tagsvalues["v"] = tmp
 
         try:
             tmp = field.get_subfield_values(tags['publication']['year'][5])[0]
         except IndexError:
             pass
         else:
             tagsvalues["y"] = tmp
 
         try:
             tmp = field.get_subfield_values(tags['publication']['pages'][5])[0]
         except IndexError:
             pass
         else:
             # if the page numbers have "x-y" take just x
             tagsvalues["c"] = tmp.split('-', 1)[0]
 
         # check if we have the required data
         ok = True
         for c in tags['publication_format']:
             if c in ('p', 'v', 'y', 'c'):
                 if c not in tagsvalues:
                     ok = False
 
         if ok:
             publ = format_journal(tags['publication_format'], tagsvalues)
             record_info += [publ]
 
             alt_volume = get_alt_volume(tagsvalues['v'])
             if alt_volume:
                 tagsvalues2 = tagsvalues.copy()
                 tagsvalues2['v'] = alt_volume
                 publ = format_journal(tags['publication_format'], tagsvalues2)
                 record_info += [publ]
 
             # Add codens
             for coden in get_kb_mappings('CODENS',
                                          value=tagsvalues['p']):
                 tagsvalues2 = tagsvalues.copy()
                 tagsvalues2['p'] = coden['key']
                 publ = format_journal(tags['publication_format'], tagsvalues2)
                 record_info += [publ]
 
     return record_info
 
 
 def get_alt_volume(volume):
     """Get alternate volume form
 
     We handle the inversed volume letter bug
     Some metadata is wrong which leads to journals with the volume letter
     at the end.
     e.g.  Phys.Rev.,51B,1 instead of Phys.Rev.,B51,1
     """
     alt_volume = None
     if re.match(ur'[a-zA-Z]\d+', volume, re.U|re.I):
         alt_volume = volume[1:] + volume[0]
     elif re.match(ur'\d+[a-zA-Z]', volume, re.U|re.I):
         alt_volume = volume[-1] + volume[:-1]
     return alt_volume
 
 
 def get_citation_informations(recid_list, tags, config,
                               fetch_catchup_info=True):
     """Scans the collections searching references (999C5x -fields) and
        citations for items in the recid_list
        returns a 4 list of dictionaries that contains the citation information
        of cds records
        examples: [ {} {} {} {} ]
                  [ {5: 'SUT-DP-92-70-5'},
                    { 93: ['astro-ph/9812088']},
                    { 93: ['Phys. Rev. Lett. 96 (2006) 081301'] }, {} ]
         NB: stuff here is for analysing new or changed records.
         see "ref_analyzer" for more.
     """
     begin_time = os.times()[4]
 
     records_info = {
         'report-numbers': {},
         'journals': {},
         'doi': {},
     }
 
     references_info = {
         'report-numbers': {},
         'journals': {},
         'doi': {},
     }
 
     # perform quick check to see if there are some records with
     # reference tags, because otherwise get.cit.inf would be slow even
     # if there is nothing to index:
     done = 0  # for status reporting
     for recid in recid_list:
         if done % 10 == 0:
             task_sleep_now_if_required()
 
         if done % 50 == 0:
             mesg = "get cit.inf done %s of %s" % (done, len(recid_list))
             write_message(mesg)
             task_update_progress(mesg)
 
         done += 1
 
         function = config.get("rank_method", "function")
         if config.get(function, 'collections'):
             if recid not in recids_cache(config.get(function, 'collections')):
                 # do not treat this record since it is not in the collections
                 # we want to process
                 continue
         else:
             if recid in deleted_recids_cache():
                 # do not treat this record since it was deleted; we
                 # skip it like this in case it was only soft-deleted
                 # e.g. via bibedit (i.e. when collection tag 980 is
                 # DELETED but other tags like report number or journal
                 # publication info remained the same, so the calls to
                 # get_fieldvalues() below would return old values)
                 continue
 
         if tags['refs_report_number']:
             references_info['report-numbers'][recid] \
                     = get_fieldvalues(recid,
                                       tags['refs_report_number'],
                                       sort=False)
             msg = "references_info['report-numbers'][%s] = %r" \
                         % (recid, references_info['report-numbers'][recid])
             write_message(msg, verbose=9)
         if tags['refs_journal']:
             references_info['journals'][recid] = []
             for ref in get_fieldvalues(recid,
                                        tags['refs_journal'],
                                        sort=False):
                 try:
                     # Inspire specific parsing
                     journal, volume, page = ref.split(',')
                 except ValueError:
                     pass
                 else:
                     alt_volume = get_alt_volume(volume)
                     if alt_volume:
                         alt_ref = ','.join([journal, alt_volume, page])
                         references_info['journals'][recid] += [alt_ref]
                 references_info['journals'][recid] += [ref]
             msg = "references_info['journals'][%s] = %r" \
                               % (recid, references_info['journals'][recid])
             write_message(msg, verbose=9)
         if tags['refs_doi']:
             references_info['doi'][recid] \
                     = get_fieldvalues(recid, tags['refs_doi'], sort=False)
             msg = "references_info['doi'][%s] = %r" \
                                    % (recid, references_info['doi'][recid])
             write_message(msg, verbose=9)
 
         if not fetch_catchup_info:
             # We do not need the extra info
             continue
 
         if tags['record_pri_number'] or tags['record_add_number']:
             records_info['report-numbers'][recid] = []
 
             if tags['record_pri_number']:
                 records_info['report-numbers'][recid] \
                     += get_fieldvalues(recid,
                                        tags['record_pri_number'],
                                        sort=False)
             if tags['record_add_number']:
                 records_info['report-numbers'][recid] \
                     += get_fieldvalues(recid,
                                        tags['record_add_number'],
                                        sort=False)
 
             msg = "records_info[%s]['report-numbers'] = %r" \
                         % (recid, records_info['report-numbers'][recid])
             write_message(msg, verbose=9)
 
         if tags['doi']:
             records_info['doi'][recid] = []
             for tag in tags['doi']:
                 records_info['doi'][recid] += get_fieldvalues(recid,
                                                               tag,
                                                               sort=False)
             msg = "records_info[%s]['doi'] = %r" \
                                       % (recid, records_info['doi'][recid])
             write_message(msg, verbose=9)
 
         # get a combination of
         # journal vol (year) pages
         if tags['publication']:
             records_info['journals'][recid] = get_journal_info(recid, tags)
             msg = "records_info[%s]['journals'] = %r" \
                                  % (recid, records_info['journals'][recid])
             write_message(msg, verbose=9)
 
     mesg = "get cit.inf done fully"
     write_message(mesg)
     task_update_progress(mesg)
 
     end_time = os.times()[4]
     write_message("Execution time for generating citation info "
                   "from record: %.2f sec" % (end_time - begin_time))
 
     return records_info, references_info
 
 
 def standardize_report_number(report_number):
     """Format the report number to a standard form.
 
     Currently we:
     * remove category for arxiv papers
     """
     report_number = re.sub(ur'(?:arXiv:)?(\d{4}\.\d{4}) \[[a-zA-Z\.-]+\]',
                   ur'arXiv:\g<1>',
                   report_number,
                   re.I | re.U)
     return report_number
 
 
 def ref_analyzer(citation_informations, updated_recids, tags, config):
     """Analyze the citation informations and calculate the citation weight
        and cited by list dictionary.
     """
     citations = {}
     for recid in updated_recids:
         citations[recid] = set()
     references = {}
     for recid in updated_recids:
         references[recid] = set()
 
     def step(msg_prefix, recid, done, total):
         if done % 30 == 0:
             task_sleep_now_if_required()
 
         if done % 1000 == 0:
             mesg = "%s done %s of %s" % (msg_prefix, done, total)
             write_message(mesg)
             task_update_progress(mesg)
 
         write_message("Processing: %s" % recid, verbose=9)
 
     def add_to_cites(citer, citee):
         # Make sure we don't add ourselves
         # Workaround till we know why we are adding ourselves.
         if citer == citee:
             return
 
         citations[citee].add(citer)
         if citer in updated_recids:
             references[citer].add(citee)
 
     def add_to_refs(citer, citee):
         # Make sure we don't add ourselves
         # Workaround till we know why we are adding ourselves.
         if citer == citee:
             return
 
         if citee in updated_recids:
             citations[citee].add(citer)
         references[citer].add(citee)
 
     # dict of recid -> institute_give_publ_id
     records_info, references_info = citation_informations
 
     t1 = os.times()[4]
 
     # Try to find references based on 999C5r
     # e.g 8 -> ([astro-ph/9889],[hep-ph/768])
     # meaning: rec 8 contains these in bibliography
     write_message("Phase 1: Report numbers references")
     done = 0
     for thisrecid, refnumbers in references_info['report-numbers'].iteritems():
         step("Report numbers references", thisrecid, done,
                                         len(references_info['report-numbers']))
         done += 1
 
         for refnumber in (r for r in refnumbers if r):
             field = 'reportnumber'
             refnumber = standardize_report_number(refnumber)
             # Search for "hep-th/5644654 or such" in existing records
             recids = get_recids_matching_query(p=refnumber,
                                                f=field,
                                                config=config)
             write_message("These match searching %s in %s: %s" %
                                    (refnumber, field, list(recids)), verbose=9)
 
             if not recids:
                 insert_into_missing(thisrecid, refnumber)
             else:
                 remove_from_missing(refnumber)
 
             if len(recids) > 1:
                 store_citation_warning('multiple-matches', refnumber)
                 msg = "Whoops: record '%d' report number value '%s' " \
                       "matches many records; taking only the first one. %s" % \
                       (thisrecid, refnumber, repr(recids))
                 write_message(msg, stream=sys.stderr)
 
             for recid in list(recids)[:1]:  # take only the first one
                 add_to_refs(thisrecid, recid)
 
     mesg = "done fully"
     write_message(mesg)
     task_update_progress(mesg)
 
     t2 = os.times()[4]
 
     # Try to find references based on 999C5s
     # e.g. Phys.Rev.Lett. 53 (1986) 2285
     write_message("Phase 2: Journal references")
     done = 0
     for thisrecid, refs in references_info['journals'].iteritems():
         step("Journal references", thisrecid, done,
                                               len(references_info['journals']))
         done += 1
 
         for reference in (r for r in refs if r):
             p = reference
             field = 'journal'
 
             # check reference value to see whether it is well formed:
             if not re_CFG_JOURNAL_PUBINFO_STANDARD_FORM_REGEXP_CHECK.match(p):
                 store_citation_warning('not-well-formed', p)
                 msg = "Whoops, record '%d' reference value '%s' " \
                       "is not well formed; skipping it." % (thisrecid, p)
                 write_message(msg, stream=sys.stderr)
                 continue  # skip this ill-formed value
 
             recids = get_recids_matching_query(p=p,
                                                f=field,
                                                config=config)
             write_message("These match searching %s in %s: %s"
                                  % (reference, field, list(recids)), verbose=9)
 
             if not recids:
                 insert_into_missing(thisrecid, p)
             else:
                 remove_from_missing(p)
 
             if len(recids) > 1:
                 store_citation_warning('multiple-matches', p)
                 msg = "Whoops: record '%d' reference value '%s' " \
                       "matches many records; taking only the first one. %s" % \
                       (thisrecid, p, repr(recids))
                 write_message(msg, stream=sys.stderr)
 
             for recid in list(recids)[:1]:  # take only the first one
                 add_to_refs(thisrecid, recid)
 
     mesg = "done fully"
     write_message(mesg)
     task_update_progress(mesg)
 
     t3 = os.times()[4]
 
     # Try to find references based on 999C5a
     # e.g. 10.1007/BF03170733
     write_message("Phase 3: DOI references")
     done = 0
     for thisrecid, refs in references_info['doi'].iteritems():
         step("DOI references", thisrecid, done, len(references_info['doi']))
         done += 1
 
         for reference in (r for r in refs if r):
             p = reference
             field = 'doi'
 
             recids = get_recids_matching_query(p=p,
                                                f=field,
                                                config=config)
             write_message("These match searching %s in %s: %s"
                                  % (reference, field, list(recids)), verbose=9)
 
             if not recids:
                 insert_into_missing(thisrecid, p)
             else:
                 remove_from_missing(p)
 
             if len(recids) > 1:
                 store_citation_warning('multiple-matches', p)
                 msg = "Whoops: record '%d' DOI value '%s' " \
                       "matches many records; taking only the first one. %s" % \
                       (thisrecid, p, repr(recids))
                 write_message(msg, stream=sys.stderr)
 
             for recid in list(recids)[:1]:  # take only the first one
                 add_to_refs(thisrecid, recid)
 
     mesg = "done fully"
     write_message(mesg)
     task_update_progress(mesg)
 
     t4 = os.times()[4]
 
     # Search for stuff like CERN-TH-4859/87 in list of refs
     write_message("Phase 4: report numbers catchup")
     done = 0
     for thisrecid, reportcodes in records_info['report-numbers'].iteritems():
         step("Report numbers catchup", thisrecid, done,
                                            len(records_info['report-numbers']))
         done += 1
 
         for reportcode in (r for r in reportcodes if r):
             if reportcode.startswith('arXiv'):
                 std_reportcode = standardize_report_number(reportcode)
                 report_pattern = r'^%s( *\[[a-zA-Z.-]*\])?' % \
                                                 re.escape(std_reportcode)
                 recids = get_recids_matching_query(p=report_pattern,
                                                    f=tags['refs_report_number'],
                                                    m='r',
                                                    config=config)
             else:
                 recids = get_recids_matching_query(p=reportcode,
                                                    f=tags['refs_report_number'],
                                                    config=config)
             for recid in recids:
                 add_to_cites(recid, thisrecid)
 
     mesg = "done fully"
     write_message(mesg)
     task_update_progress(mesg)
 
     # Find this record's pubinfo in other records' bibliography
     write_message("Phase 5: journals catchup")
     done = 0
     t5 = os.times()[4]
     for thisrecid, rec_journals in records_info['journals'].iteritems():
         step("Journals catchup", thisrecid, done,
                                                  len(records_info['journals']))
         done += 1
 
         for journal in rec_journals:
             journal = journal.replace("\"", "")
             # Search the publication string like
             # Phys. Lett., B 482 (2000) 417 in 999C5s
             recids = get_recids_matching_query(p=journal,
                                                f=tags['refs_journal'],
                                                config=config)
             write_message("These records match %s in %s: %s"
                     % (journal, tags['refs_journal'], list(recids)), verbose=9)
 
             for recid in recids:
                 add_to_cites(recid, thisrecid)
 
     mesg = "done fully"
     write_message(mesg)
     task_update_progress(mesg)
 
     write_message("Phase 6: DOI catchup")
     done = 0
     t6 = os.times()[4]
     for thisrecid, dois in records_info['doi'].iteritems():
         step("DOI catchup", thisrecid, done, len(records_info['doi']))
         done += 1
 
         for doi in dois:
             # Search the publication string like
             # Phys. Lett., B 482 (2000) 417 in 999C5a
             recids = get_recids_matching_query(p=doi,
                                                f=tags['refs_doi'],
                                                config=config)
             write_message("These records match %s in %s: %s"
                             % (doi, tags['refs_doi'], list(recids)), verbose=9)
 
             for recid in recids:
                 add_to_cites(recid, thisrecid)
 
     mesg = "done fully"
     write_message(mesg)
     task_update_progress(mesg)
 
     if task_get_task_param('verbose') >= 3:
         # Print only X first to prevent flood
         write_message("citation_list (x is cited by y):")
         write_message(dict(islice(citations.iteritems(), 10)))
         write_message("size: %s" % len(citations))
         write_message("reference_list (x cites y):")
         write_message(dict(islice(references.iteritems(), 10)))
         write_message("size: %s" % len(references))
 
     t7 = os.times()[4]
 
     write_message("Execution time for analyzing the citation information "
                   "generating the dictionary:")
     write_message("... checking ref report numbers: %.2f sec" % (t2-t1))
     write_message("... checking ref journals: %.2f sec" % (t3-t2))
     write_message("... checking ref DOI: %.2f sec" % (t4-t3))
     write_message("... checking rec report numbers: %.2f sec" % (t5-t4))
     write_message("... checking rec journals: %.2f sec" % (t6-t5))
     write_message("... checking rec DOI: %.2f sec" % (t7-t6))
     write_message("... total time of ref_analyze: %.2f sec" % (t7-t1))
 
     return citations, references
 
 
 def compute_refs_diff(recid, new_refs):
     """
     Given a set of references for a record, returns how many references were
     added to it. The value can be negative which means the record lost
     citations.
     """
     old_refs = set(row[0] for row in run_sql("""SELECT citee
                                                 FROM rnkCITATIONDICT
                                                 WHERE citer = %s""", [recid]))
 
     refs_to_add = new_refs - old_refs
     refs_to_delete = old_refs - new_refs
     return len(refs_to_add) - len(refs_to_delete)
 
 
 def compute_cites_diff(recid, new_cites):
     """
     This function does the same thing as compute_refs_diff but with citations.
     """
     old_cites = set(row[0] for row in run_sql("""SELECT citer
                                                  FROM rnkCITATIONDICT
                                                  WHERE citee = %s""", [recid]))
 
     cites_to_add = new_cites - old_cites
     cites_to_delete = old_cites - new_cites
     return len(cites_to_add) - len(cites_to_delete)
 
 
 def compute_dicts_diff(recids, refs, cites):
     """
     Given the new dictionaries for references and citations, computes how
     many references were added or removed by comparing them to the current
     stored in the database.
     """
     cites_diff = 0
     for recid in recids:
         cites_diff += compute_refs_diff(recid, refs[recid])
         cites_diff += compute_cites_diff(recid, cites[recid])
     return cites_diff
 
 
 def store_dicts(recids, refs, cites):
     """Insert the reference and citation list into the database"""
     for recid in recids:
         replace_refs(recid, refs[recid])
         replace_cites(recid, cites[recid])
 
 
 def replace_refs(recid, new_refs):
     """
     Given a set of references, replaces the references of given recid
     in the database.
     The changes are logged into rnkCITATIONLOG.
     """
     old_refs = set(row[0] for row in run_sql("""SELECT citee
                                                 FROM rnkCITATIONDICT
                                                 WHERE citer = %s""", [recid]))
 
     refs_to_add = new_refs - old_refs
     refs_to_delete = old_refs - new_refs
 
     for ref in refs_to_add:
         write_message('adding ref %s %s' % (recid, ref), verbose=1)
         now = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())
         run_sql("""INSERT INTO rnkCITATIONDICT (citer, citee, last_updated)
                    VALUES (%s, %s, %s)""", (recid, ref, now))
         run_sql("""INSERT INTO rnkCITATIONLOG (citer, citee, type, action_date)
                    VALUES (%s, %s, %s, %s)""", (recid, ref, 'added', now))
 
     for ref in refs_to_delete:
         write_message('deleting ref %s %s' % (recid, ref), verbose=1)
         now = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())
         run_sql("""DELETE FROM rnkCITATIONDICT
                    WHERE citer = %s and citee = %s""", (recid, ref))
         run_sql("""INSERT INTO rnkCITATIONLOG (citer, citee, type, action_date)
                    VALUES (%s, %s, %s, %s)""", (recid, ref, 'removed', now))
 
 
 def replace_cites(recid, new_cites):
     """
     Given a set of citations, replaces the citations of given recid
     in the database.
     The changes are logged into rnkCITATIONLOG.
 
     See @replace_refs
     """
     old_cites = set(row[0] for row in run_sql("""SELECT citer
                                                 FROM rnkCITATIONDICT
                                                 WHERE citee = %s""", [recid]))
 
     cites_to_add = new_cites - old_cites
     cites_to_delete = old_cites - new_cites
 
     for cite in cites_to_add:
         write_message('adding cite %s %s' % (recid, cite), verbose=1)
         now = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())
         run_sql("""INSERT INTO rnkCITATIONDICT (citee, citer, last_updated)
                    VALUES (%s, %s, %s)""", (recid, cite, now))
+        run_sql("""INSERT INTO rnkCITATIONLOG (citee, citer, type, action_date)
+                   VALUES (%s, %s, %s, %s)""", (recid, cite, 'added', now))
 
     for cite in cites_to_delete:
         write_message('deleting cite %s %s' % (recid, cite), verbose=1)
+        now = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())
         run_sql("""DELETE FROM rnkCITATIONDICT
                    WHERE citee = %s and citer = %s""", (recid, cite))
+        run_sql("""INSERT INTO rnkCITATIONLOG (citee, citer, type, action_date)
+                   VALUES (%s, %s, %s, %s)""", (recid, cite, 'removed', now))
 
 
 def insert_into_missing(recid, report):
     """Mark reference string as missing.
 
        If a reference is a report number / journal / DOI but we do not have
        the corresping record in the database, we mark that particualar
        reference string as missing, by adding a row in rnkCITATIONDATAEXT.
        The recid represents the record containing the reference string.
     """
     if len(report) >= 255:
         # Invalid report, it is too long
         # and does not fit in the database column
         # (currently varchar 255)
         return
     wasalready = run_sql("""SELECT id_bibrec
                             FROM rnkCITATIONDATAEXT
                             WHERE id_bibrec = %s
                             AND extcitepubinfo = %s""",
                           (recid, report))
     if not wasalready:
         run_sql("""INSERT INTO rnkCITATIONDATAEXT(id_bibrec, extcitepubinfo)
                    VALUES (%s,%s)""", (recid, report))
 
 
 def remove_from_missing(report):
     """Remove the reference string from the missing table
 
        See @insert_into_missing"""
     run_sql("""DELETE FROM rnkCITATIONDATAEXT
                WHERE extcitepubinfo = %s""", (report,))
 
 
 def print_missing(num):
     """
     Print the contents of rnkCITATIONDATAEXT table containing external
     records that were cited by NUM or more internal records.
 
     NUM is by default taken from the -E command line option.
     """
     if not num:
         num = task_get_option("print-extcites")
 
     write_message("Listing external papers cited by %i or more \
                                                       internal records:" % num)
 
     res = run_sql("""SELECT COUNT(id_bibrec), extcitepubinfo
                      FROM rnkCITATIONDATAEXT
                      GROUP BY extcitepubinfo HAVING COUNT(id_bibrec) >= %s
                      ORDER BY COUNT(id_bibrec) DESC""", (num,))
     for cnt, brec in res:
         print str(cnt), "\t", brec
 
     write_message("Listing done.")
 
 
 def tagify(parsedtag):
     """aux auf to make '100__a' out of ['100','','','a']"""
     tag = ""
     for t in parsedtag:
         if t == '':
             t = '_'
         tag += t
     return tag
 
 
 def store_citation_warning(warning_type, cit_info):
     """Store citation indexing warnings in the database
 
     If we encounter a problem during the citation indexing, such as multiple
     results for a report number, we store a warning in rnkCITATIONDATAERR
     """
     r = run_sql("""SELECT 1 FROM rnkCITATIONDATAERR
                    WHERE type = %s
                    AND citinfo = %s""", (warning_type, cit_info))
     if not r:
         run_sql("""INSERT INTO rnkCITATIONDATAERR (type, citinfo)
                    VALUES (%s, %s)""", (warning_type, cit_info))
diff --git a/modules/bibrank/lib/bibrank_citation_searcher.py b/modules/bibrank/lib/bibrank_citation_searcher.py
index 701c8bc74..4cdb5e702 100644
--- a/modules/bibrank/lib/bibrank_citation_searcher.py
+++ b/modules/bibrank/lib/bibrank_citation_searcher.py
@@ -1,322 +1,329 @@
 # -*- coding: utf-8 -*-
 ##
 ## This file is part of Invenio.
 ## Copyright (C) 2005, 2006, 2007, 2008, 2009, 2010, 2011 CERN.
 ##
 ## Invenio is free software; you can redistribute it and/or
 ## modify it under the terms of the GNU General Public License as
 ## published by the Free Software Foundation; either version 2 of the
 ## License, or (at your option) any later version.
 ##
 ## Invenio is distributed in the hope that it will be useful, but
 ## WITHOUT ANY WARRANTY; without even the implied warranty of
 ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 ## General Public License for more details.
 ##
 ## You should have received a copy of the GNU General Public License
 ## along with Invenio; if not, write to the Free Software Foundation, Inc.,
 ## 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA.
 
 __revision__ = "$Id$"
 
 import re
 
 from invenio.dbquery import run_sql
 from invenio.intbitset import intbitset
 from invenio.data_cacher import DataCacher
 from operator import itemgetter
 
 
 class CitationDictsDataCacher(DataCacher):
     """
     Cache holding all citation dictionaries (citationdict,
     reversedict, selfcitdict, selfcitedbydict).
     """
     def __init__(self):
         def initial_fill():
             alldicts = {}
             from invenio.bibrank_tag_based_indexer import fromDB
             weights = fromDB('citation')
 
             alldicts['citations_weights'] = weights
             # for cited:M->N queries, it is interesting to cache also
             # some preprocessed citationdict:
             alldicts['citations_keys'] = intbitset(weights.keys())
 
             # Citation counts
             alldicts['citations_counts'] = [t for t in weights.iteritems()]
             alldicts['citations_counts'].sort(key=itemgetter(1), reverse=True)
 
             # Self-cites
             selfcites = fromDB('selfcites')
             selfcites_weights = {}
             for recid, counts in alldicts['citations_counts']:
                 selfcites_weights[recid] = counts - selfcites.get(recid, 0)
             alldicts['selfcites_weights'] = selfcites_weights
             alldicts['selfcites_counts'] = [(recid, selfcites_weights.get(recid, cites)) for recid, cites in alldicts['citations_counts']]
             alldicts['selfcites_counts'].sort(key=itemgetter(1), reverse=True)
 
             return alldicts
 
         def incremental_fill():
             self.cache = None
             return initial_fill()
 
         def cache_filler():
             if self.cache:
                 cache = incremental_fill()
             else:
                 cache = initial_fill()
             return cache
 
         from invenio.bibrank_tag_based_indexer import get_lastupdated
 
         def timestamp_verifier():
             citation_lastupdate = get_lastupdated('citation')
             if citation_lastupdate:
                 return citation_lastupdate.strftime("%Y-%m-%d %H:%M:%S")
             else:
                 return "0000-00-00 00:00:00"
 
         DataCacher.__init__(self, cache_filler, timestamp_verifier)
 
 CACHE_CITATION_DICTS = None
 
 
 def get_citation_dict(dictname):
     """
     Returns a cached value of a citation dictionary. Performs lazy
     loading, i.e. loads the dictionary the first time it is actually
     used.
 
     @param dictname: the name of the citation dictionary to return. Can
             be citationdict, reversedict, selfcitdict, selfcitedbydict.
     @type dictname: string
     @return: a citation dictionary. The structure of the dictionary is
             { recid -> [list of recids] }.
     @rtype: dictionary
     """
     global CACHE_CITATION_DICTS
     if CACHE_CITATION_DICTS is None:
         CACHE_CITATION_DICTS = CitationDictsDataCacher()
     else:
         CACHE_CITATION_DICTS.recreate_cache_if_needed()
     return CACHE_CITATION_DICTS.cache[dictname]
 
 
 def get_refers_to(recordid):
     """Return a list of records referenced by this record"""
     rows = run_sql("SELECT citee FROM rnkCITATIONDICT WHERE citer = %s",
                    [recordid])
     return set(r[0] for r in rows)
 
 
 def get_cited_by(recordid):
     """Return a list of records that cite recordid"""
     rows = run_sql("SELECT citer FROM rnkCITATIONDICT WHERE citee = %s",
                    [recordid])
     return set(r[0] for r in rows)
 
 
 def get_cited_by_count(recordid):
     """Return how many records cite given RECORDID."""
     rows = run_sql("SELECT 1 FROM rnkCITATIONDICT WHERE citee = %s",
                    [recordid])
     return len(rows)
 
 
 def get_records_with_num_cites(numstr, allrecs=intbitset([]),
                                exclude_selfcites=False):
     """Return an intbitset of record IDs that are cited X times,
        X defined in numstr.
        Warning: numstr is string and may not be numeric! It can
        be 10,0->100 etc
     """
     if exclude_selfcites:
         cache_cited_by_dictionary_counts = get_citation_dict("selfcites_counts")
         citations_keys = intbitset(get_citation_dict("selfcites_weights").keys())
     else:
         cache_cited_by_dictionary_counts = get_citation_dict("citations_counts")
         citations_keys = get_citation_dict("citations_keys")
 
     matches = intbitset()
     #once again, check that the parameter is a string
     if type(numstr) != type("thisisastring"):
         return matches
     numstr = numstr.replace(" ", '')
     numstr = numstr.replace('"', '')
 
     num = 0
     #first, check if numstr is just a number
     singlenum = re.findall("^\d+$", numstr)
     if singlenum:
         num = int(singlenum[0])
         if num == 0:
             #we return recids that are not in keys
             return allrecs - citations_keys
         else:
             return intbitset([recid for recid, cit_count
                         in cache_cited_by_dictionary_counts
                         if cit_count == num])
 
     # Try to get 1->10 or such
     firstsec = re.findall("(\d+)->(\d+)", numstr)
     if firstsec:
         first = int(firstsec[0][0])
         sec = int(firstsec[0][1])
         if first == 0:
             # Start with those that have no cites..
             matches = allrecs - citations_keys
         if first <= sec:
             matches += intbitset([recid for recid, cit_count
                              in cache_cited_by_dictionary_counts
                              if first <= cit_count <= sec])
         return matches
 
     # Try to get 10+
     firstsec = re.findall("(\d+)\+", numstr)
     if firstsec:
         first = int(firstsec[0])
         matches = intbitset([recid for recid, cit_count
                          in cache_cited_by_dictionary_counts \
                          if cit_count > first])
 
     return matches
 
 
 def get_cited_by_list(recids):
     """Return a tuple of ([recid,list_of_citing_records],...) for all the
        records in recordlist.
     """
     if not recids:
         return []
 
     in_sql = ','.join('%s' for dummy in recids)
     rows = run_sql("""SELECT citer, citee FROM rnkCITATIONDICT
                        WHERE citee IN (%s)""" % in_sql, recids)
 
     cites = {}
     for citer, citee in rows:
         cites.setdefault(citee, set()).add(citer)
 
     return [(recid, cites.get(recid, set())) for recid in recids]
 
 
 def get_refers_to_list(recids):
     """Return a tuple of ([recid,list_of_citing_records],...) for all the
        records in recordlist.
     """
     if not recids:
         return []
 
     in_sql = ','.join('%s' for dummy in recids)
     rows = run_sql("""SELECT citee, citer FROM rnkCITATIONDICT
                        WHERE citer IN (%s)""" % in_sql, recids)
 
     refs = {}
     for citee, citer in rows:
         refs.setdefault(citer, set()).add(citee)
 
     return [(recid, refs.get(recid, set())) for recid in recids]
 
 
 def get_refersto_hitset(ahitset):
     """
     Return a hitset of records that refers to (cite) some records from
     the given ahitset.  Useful for search engine's
     refersto:author:ellis feature.
     """
     out = intbitset()
     if ahitset:
         try:
             iter(ahitset)
         except OverflowError:
             # ignore attempt to iterate over infinite ahitset
             pass
         else:
             in_sql = ','.join('%s' for dummy in ahitset)
             rows = run_sql("""SELECT citer FROM rnkCITATIONDICT
                               WHERE citee IN (%s)""" % in_sql, ahitset)
             out = intbitset(rows)
     return out
 
 def get_one_cited_by_weight(recID):
     """Returns a number_of_citing_records for one record
     """
     weight = get_citation_dict("citations_weights")
 
     return weight.get(recID, 0)
 
 def get_cited_by_weight(recordlist):
     """Return a tuple of ([recid,number_of_citing_records],...) for all the
        records in recordlist.
     """
     weights = get_citation_dict("citations_weights")
 
     result = []
     for recid in recordlist:
         result.append([recid, weights.get(recid, 0)])
 
     return result
 
 
 def get_citedby_hitset(ahitset):
     """
     Return a hitset of records that are cited by records in the given
     ahitset.  Useful for search engine's citedby:author:ellis feature.
     """
     out = intbitset()
     if ahitset:
         try:
             iter(ahitset)
         except OverflowError:
             # ignore attempt to iterate over infinite ahitset
             pass
         else:
             in_sql = ','.join('%s' for dummy in ahitset)
             rows = run_sql("""SELECT citee FROM rnkCITATIONDICT
                               WHERE citer IN (%s)""" % in_sql, ahitset)
             out = intbitset(rows)
     return out
 
 
 def calculate_cited_by_list(record_id, sort_order="d"):
     """Return a tuple of ([recid,citation_weight],...) for all the
        record citing RECORD_ID.  The resulting recids is sorted by
        ascending/descending citation weights depending or SORT_ORDER.
     """
     result = []
 
     citation_list = get_cited_by(record_id)
 
     # Add weights i.e. records that cite each of the entries in citation_list
     weights = get_citation_dict("citations_weights")
     for c in citation_list:
         result.append([c, weights.get(c, 0)])
 
     # sort them
     reverse = sort_order == "d"
     result.sort(key=itemgetter(1), reverse=reverse)
     return result
 
 
 def calculate_co_cited_with_list(record_id, sort_order="d"):
     """Return a tuple of ([recid,co-cited weight],...) for records
        that are co-cited with RECORD_ID.  The resulting recids is sorted by
        ascending/descending citation weights depending or SORT_ORDER.
     """
     result = []
     result_intermediate = {}
 
     for cit_id in get_cited_by(record_id):
         for ref_id in get_refers_to(cit_id):
             if ref_id not in result_intermediate:
                 result_intermediate[ref_id] = 1
             else:
                 result_intermediate[ref_id] += 1
     for key, value in result_intermediate.iteritems():
         if key != record_id:
             result.append([key, value])
     reverse = sort_order == "d"
     result.sort(key=itemgetter(1), reverse=reverse)
     return result
+
+
+def get_citers_log(recid):
+    return run_sql("""SELECT citer, type, action_date
+                      FROM rnkCITATIONLOG
+                      WHERE citee = %s
+                      ORDER BY action_date DESC""", [recid])
diff --git a/modules/bibrank/lib/bibrank_downloads_grapher.py b/modules/bibrank/lib/bibrank_downloads_grapher.py
index 3fba76fa6..962aa5f2b 100644
--- a/modules/bibrank/lib/bibrank_downloads_grapher.py
+++ b/modules/bibrank/lib/bibrank_downloads_grapher.py
@@ -1,303 +1,308 @@
 # -*- coding: utf-8 -*-
 ##
 ## This file is part of Invenio.
 ## Copyright (C) 2005, 2006, 2007, 2008, 2009, 2010, 2011 CERN.
 ##
 ## Invenio is free software; you can redistribute it and/or
 ## modify it under the terms of the GNU General Public License as
 ## published by the Free Software Foundation; either version 2 of the
 ## License, or (at your option) any later version.
 ##
 ## Invenio is distributed in the hope that it will be useful, but
 ## WITHOUT ANY WARRANTY; without even the implied warranty of
 ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 ## General Public License for more details.
 ##
 ## You should have received a copy of the GNU General Public License
 ## along with Invenio; if not, write to the Free Software Foundation, Inc.,
 ## 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA.
 
 __revision__ = "$Id$"
 
 import string
 import os
 import time
 import calendar
 
 from invenio.config import CFG_SITE_URL, CFG_SITE_LANG, CFG_BIBRANK_SHOW_DOWNLOAD_GRAPHS, CFG_BIBRANK_SHOW_DOWNLOAD_GRAPHS_CLIENT_IP_DISTRIBUTION, CFG_WEBDIR
 from invenio.messages import gettext_set_language
 from invenio.intbitset import intbitset
 from invenio.dbquery import run_sql
 from invenio.bibrank_downloads_indexer import database_tuples_to_single_list
-from invenio.bibrank_grapher import *
+from invenio.bibrank_grapher import (write_coordinates_in_tmp_file,
+                                     create_temporary_image,
+                                     remove_old_img)
 
-color_line_list = ['9', '19', '10', '15', '21', '18']
-cfg_id_bibdoc_id_bibrec = 5
+CFG_ID_BIBDOC_ID_BIBREC = 5
 
 def create_download_history_graph_and_box(id_bibrec, ln=CFG_SITE_LANG):
     """Create graph with citation history for record ID_BIBREC (into a
        temporary file) and return HTML box refering to that image.
        Called by Detailed record pages.
        Notes:
         if id_bibdoc=0 : its an oustide-stored document and it has no id_bibdoc --> only one line
         if len(id_bibdocs) <= cfg_id_bibdoc_id_bibrec draw one line per id_bibdoc
         if len(id_bibdocs) > cfg_id_bibdoc_id_bibrec draw only one line which hold simultaneously the downloads for all id_bibdoc
         Each time this function is called, all the images older than 10 minutes are deleted.
     """
     _ = gettext_set_language(ln)
 
     out = ""
 
     # Prepare downloads history graph:
     if CFG_BIBRANK_SHOW_DOWNLOAD_GRAPHS:
         html_content = ""
         # remove images older than 10 minutes
         remove_old_img("download")
         # download count graph
         id_bibdocs = intbitset(run_sql("select distinct id_bibdoc from rnkDOWNLOADS where id_bibrec=%s", (id_bibrec, )))
 
         id_existing_bibdocs = intbitset(run_sql("SELECT id_bibdoc FROM bibrec_bibdoc JOIN bibdoc ON id_bibdoc=id WHERE id_bibrec=%s AND status<>'DELETED'", (id_bibrec, )))
 
         ## FIXME: when bibdocs are deleted we loose the stats. What shall we do with them?
         id_bibdocs &= id_existing_bibdocs
 
         history_analysis_results = ()
         if not id_bibdocs:
             pass
-        elif len(id_bibdocs) <= cfg_id_bibdoc_id_bibrec and 0 not in id_bibdocs:
+        elif len(id_bibdocs) <= CFG_ID_BIBDOC_ID_BIBREC and 0 not in id_bibdocs:
             history_analysis_results = draw_downloads_statistics(id_bibrec, list(id_bibdocs))
         else:
             history_analysis_results = draw_downloads_statistics(id_bibrec, [])
         if history_analysis_results and history_analysis_results[0]:
             if CFG_BIBRANK_SHOW_DOWNLOAD_GRAPHS == 2:
                 graph_file_history = CFG_WEBDIR + "/img/" + history_analysis_results[0]
                 html_content += """<tr><td valign=center align=center>%s</td>""" % open(graph_file_history).read()
-            else:#gnuplot
+            else:  # gnuplot
                 graph_file_history = CFG_SITE_URL + "/img/" + history_analysis_results[0]
                 html_content += """<tr><td valign=center align=center><img src='%s'/></td>""" % graph_file_history
             file_to_close_history = history_analysis_results[1]
             if file_to_close_history :
                 if os.path.exists(file_to_close_history):
                     os.unlink(file_to_close_history)
         if html_content != "":
             out += """<br/><br/><table><tr><td class="blocknote">
                       %s</td></tr><tr><td>
                       <table border="0" cellspacing="1" cellpadding="1">""" % _("Download history:")
             out += html_content + "</table></td></tr></table>"
 
     if CFG_BIBRANK_SHOW_DOWNLOAD_GRAPHS_CLIENT_IP_DISTRIBUTION:
         # do we show also user IP repartition?
         html_content = ""
         remove_old_img("download")
         #Users analysis graph
         ips = database_tuples_to_single_list(run_sql("select client_host from rnkDOWNLOADS where id_bibrec=%s;" % id_bibrec))
         if ips:
             users_analysis_results = create_users_analysis_graph(id_bibrec, ips)
             if users_analysis_results[0]:
                 file_to_close_users = users_analysis_results[1]
                 if CFG_BIBRANK_SHOW_DOWNLOAD_GRAPHS_CLIENT_IP_DISTRIBUTION == 1:
                     html_content += """<tr><td valign=center align=center><img src='%s/img/%s' align="center" alt=""></td>""" % (CFG_SITE_URL, users_analysis_results[0])
                 elif CFG_BIBRANK_SHOW_DOWNLOAD_GRAPHS_CLIENT_IP_DISTRIBUTION == 2:
                     html_content += """<tr><td valign=center align=center>%s</td>""" % open(CFG_WEBDIR + "/img/"  + users_analysis_results[0]).read()
                 if file_to_close_users:
                     if os.path.exists(file_to_close_users):
                         os.unlink(file_to_close_users)
         if html_content != "":
             out += """<br/><br/><table><tr><td class="blocknote">
                       %s</td></tr><tr><td>
-                      <table border="0" cellspacing="1" cellpadding="1">""" %  _("Download user distribution:")
+                      <table border="0" cellspacing="1" cellpadding="1">""" % _("Download user distribution:")
             out += html_content
             out += "</table></td></tr></table>"
 
     # return html code used by get_file or search_engine
     return out
 
 def draw_downloads_statistics(id_bibrec, id_bibdoc_list):
     """Create a graph of download history using a temporary file to store datas
     and a new png file for each id_bibrec to store the image of the graph which will
     be referenced by html code."""
 
     intervals = []
     #used to name the different curves when len(id_bibdoc_list)>1
     docfile_name_list = []
     #used to name the uniquecurve when len(id_bibdoc_list)=0 or > cfg_id_bibdoc_id_bibrec
     record_name = ""
     record_name_query = run_sql("select value from bibrec_bib24x,bib24x where id_bibrec=%s and id_bibxxx=id;" % id_bibrec)
     if record_name_query:
         record_name = record_name_query[0][0]
     #list of lists of tuples: [[("09/2004",4),..],[(..,..)]..]
     #Each list element of the list is represented by a curve
     #each elem of each list is a point on the graph
     coordinates_list = []
 
 
     #If the document is not stored in internally it has id_bibrec 0 and no creation date
     #In this case the beginning date is the first time the document has been downloaded
     local_time = time.localtime()
     local_month = local_time.tm_mon
     local_year = local_time.tm_year
 
     creation_date_res = run_sql("""SELECT DATE_FORMAT(creation_date,"%%Y-%%m") FROM bibrec WHERE id=%s;""" % id_bibrec)
     if creation_date_res == ():
         creation_date_res = run_sql("""SELECT DATE_FORMAT(MIN(download_time),"%%Y-%%m") FROM rnkDOWNLOADS where id_bibrec=%s;""" % id_bibrec)
     if creation_date_res == (('0000-00',),):
         creation_date_year = local_year - 1
         creation_date_month = local_month
     else :
         creation_date_year, creation_date_month = string.split(creation_date_res[0][0], "-")
         creation_date_year = int(creation_date_year)
         creation_date_month = int(creation_date_month)
 
 
     #create intervals and corresponding values
     res = create_tic_intervals(local_year, local_month, creation_date_year, creation_date_month)
     intervals = res[1]
     tic_list = res[0]
 
     if id_bibdoc_list == []:
         coordinates_list.append(create_list_tuple_data(intervals, id_bibrec))
         docfile_name_list = record_name
     else :
         for i in range(len(id_bibdoc_list)):
             datas = create_list_tuple_data(intervals, id_bibrec, id_bibdoc_query_addition="and id_bibdoc=%s" % id_bibdoc_list[i])
             coordinates_list.append(datas)
             docname = run_sql("select docname from bibrec_bibdoc where id_bibdoc=%s and id_bibrec=%s;" % (id_bibdoc_list[i], id_bibrec))
             docfile_name_list.append(docname[0][0])
         #In case of multiple id_bibdocs datas_max will be used to draw a line which is the total of the others lines
         if not (len(intervals)==1 or len(id_bibdoc_list)==1):
             datas_max = create_list_tuple_total(intervals, coordinates_list)
             coordinates_list.append(datas_max)
     #write coordinates_list in a temporary file
-    result2 = write_coordinates_in_tmp_file(coordinates_list)
-
-    fname = result2[0]
-    y_max = result2[1]
+    graph_source_file, y_max = write_coordinates_in_tmp_file(coordinates_list)
     #Use create the graph from the temporary file
-    return create_temporary_image(id_bibrec, 'download_history', fname, ' ', 'Times downloaded', [0, 0], y_max, id_bibdoc_list, docfile_name_list, tic_list)
+    graph_file = create_temporary_image(id_bibrec, 'download_history',
+                                        graph_source_file, ' ',
+                                        'Times downloaded', [0, 0], y_max,
+                                        id_bibdoc_list, docfile_name_list,
+                                        tic_list)
+    return graph_file, graph_source_file
 
 def create_list_tuple_data(intervals, id_bibrec, id_bibdoc_query_addition=""):
     """-Return a list of tuple of the form [('10/2004',3),(..)] used to plot graph
         Where 3 is the number of downloads between 01/10/2004 and 31/10/2004"""
     list_tuple = []
     for elem in intervals:
         main_date_end = string.split(elem[1], '/')
         end_of_month_end = calendar.monthrange(int(main_date_end[1]), int(main_date_end[0]))[1]
         s0 = string.split(elem[0], "/")
         s1 = string.split(elem[1], "/")
         elem0 = s0[1] + "-" + s0[0]
         elem1 = s1[1] + "-" + s1[0]
         date1 = "%s%s" % (elem0, "-01 00:00:00")
         date2 = "%s%s" % (elem1, "-%s 00:00:00" % str(end_of_month_end))
         sql_query = "select count(*) from rnkDOWNLOADS where id_bibrec=%s %s and download_time>='%s' and download_time<'%s';" % (id_bibrec, id_bibdoc_query_addition, date1, date2)
         res = run_sql(sql_query)[0][0]
         list_tuple.append((elem[0], res))
     #list_tuple = sort_list_tuple_by_date(list_tuple)
     return (list_tuple)
 
 def sort_list_tuple_by_date(list_tuple):
     """Sort a list of tuple of the forme ("09/2004", 3)according to the
     year of the first element of the tuple"""
     list_tuple.sort(lambda x, y: (cmp(string.split(x[0], '/')[1],
                                       string.split(y[0], '/')[1])))
     return list_tuple
 
 def create_list_tuple_total(intervals, list_data):
     """In the case of multiple id_bibdocs,  a last paragraph is added
     at the end to show the global evolution of the record"""
     list_tuple = []
     if len(intervals)==1:
         res = 0
         for j in range(len(list_data)):
             res += list_data[j][1]
         list_tuple.append((intervals[0][0], res))
     else :
 
         for i in range(len(intervals)):
             res = 0
             for j in range(len(list_data)):
                 res += list_data[j][i][1]
             list_tuple.append((intervals[i][0], res))
         #list_tuple = sort_list_tuple_by_date(list_tuple)
     return list_tuple
 
 def create_tic_intervals(local_year, local_month, creation_date_year, creation_date_month):
     """Create intervals since document creation date until now
        Return a list of the tics for the graph of the form ["04/2004","05/2004"), ...]
        And a list of tuple(each tuple stands for a period) of the form [("04/2004", "04/2004"),.]
        to compute the number of downloads in each period
        For the very short periods some tics and tuples are added to  make sure that
        at least two dates are returned. Useful for drawing graphs.
     """
 
     # okay, off we go
     tic_list = []
     interval_list = []
     original_date = (creation_date_month, creation_date_year)
 
     while (creation_date_year, creation_date_month) <= (local_year, local_month) and creation_date_month <= 12:
         date_elem = "%s/%s" % (creation_date_month, creation_date_year)
         tic_list.append(date_elem)
         interval_list.append((date_elem, date_elem))
         if creation_date_month != 12:
             creation_date_month = creation_date_month+1
         else :
             creation_date_year = creation_date_year+1
             creation_date_month = 1
 
         next_period = (creation_date_month, creation_date_year)
 
         #additional periods for the short period
 
     if len(interval_list) <= 2:
         period_before = "%s/%s" % (sub_month(original_date[0], original_date[1]))
         period_after = "%s/%s" % next_period
         interval_list.insert(0, (period_before, period_before))
         interval_list.append((period_after, period_after))
         tic_list.insert(0, period_before)
         tic_list.append(period_after)
     return (tic_list, interval_list)
 
 def add_month(month, year):
     """Add a month and increment the year if necessary"""
     if month == 12:
         month = 1
         year += 1
-    else  :
+    else:
         month += 1
     return (month, year)
 
 def sub_month(month, year):
     """Add a month and decrease the year if necessary"""
     if month == 1:
         month = 12
         year = year -1
     else :
         month -= 1
     return (month, year)
 
 def create_users_analysis_graph(id_bibrec, ips):
     """For a given id_bibrec, classify cern users and other users
     Draw a percentage graphic reprentation"""
     cern_users = 0
     other_users = 0
     coordinates_list = []
     #compute users repartition
-    for i in range(len(ips)):
-        if 2307522817 <= ips[i] <= 2307588095 or 2156724481 <= ips[i] <= 2156789759:
+    for ip in ips:
+        if 2307522817 <= ip <= 2307588095 or 2156724481 <= ip <= 2156789759:
             cern_users += 1
-        else :
+        else:
             other_users += 1
     tot = float(cern_users+other_users)
     #prepare coordinates list
     coordinates_list.append((1, str(float(cern_users)/tot*100)))
     coordinates_list.append((3, str(float(other_users)/tot*100)))
     #write coordinates in a temporary file
-    result2 = write_coordinates_in_tmp_file([coordinates_list])
+    graph_source_file, y_max = write_coordinates_in_tmp_file([coordinates_list])
     #result2 example: [/path/to-invenio/var/www/img/tmpeC9GP5,'100.0']
     #the file contains e.g.
     #1 100.0
     #3 0.0
     #plot the graph
-    return create_temporary_image(id_bibrec, 'download_users', result2[0], ' ', 'User distribution', (0, 0), result2[1], [], [], [1, 3])
-
-
+    graph_file = create_temporary_image(id_bibrec, 'download_users',
+                                        graph_source_file, ' ',
+                                        'User distribution', (0, 0), y_max,
+                                        [], [], [1, 3])
+    return graph_file, graph_source_file
diff --git a/modules/bibrank/lib/bibrank_grapher.py b/modules/bibrank/lib/bibrank_grapher.py
index a870427d5..bfee1bcdb 100644
--- a/modules/bibrank/lib/bibrank_grapher.py
+++ b/modules/bibrank/lib/bibrank_grapher.py
@@ -1,419 +1,460 @@
 # -*- coding: utf-8 -*-
 ##
 ## This file is part of Invenio.
 ## Copyright (C) 2005, 2006, 2007, 2008, 2010, 2011, 2013 CERN.
 ##
 ## Invenio is free software; you can redistribute it and/or
 ## modify it under the terms of the GNU General Public License as
 ## published by the Free Software Foundation; either version 2 of the
 ## License, or (at your option) any later version.
 ##
 ## Invenio is distributed in the hope that it will be useful, but
 ## WITHOUT ANY WARRANTY; without even the implied warranty of
 ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 ## General Public License for more details.
 ##
 ## You should have received a copy of the GNU General Public License
 ## along with Invenio; if not, write to the Free Software Foundation, Inc.,
 ## 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA.
 
 __revision__ = "$Id$"
 
 import os
 import tempfile
 
-from invenio.config import CFG_TMPSHAREDDIR, CFG_WEBDIR, CFG_SITE_URL, CFG_BIBRANK_SHOW_CITATION_GRAPHS,\
-    CFG_BIBRANK_SHOW_DOWNLOAD_GRAPHS,\
-    CFG_BIBRANK_SHOW_DOWNLOAD_GRAPHS_CLIENT_IP_DISTRIBUTION
+from invenio.config import (CFG_TMPSHAREDDIR, CFG_WEBDIR, CFG_SITE_URL,
+    CFG_BIBRANK_SHOW_CITATION_GRAPHS, CFG_BIBRANK_SHOW_DOWNLOAD_GRAPHS,
+    CFG_BIBRANK_SHOW_DOWNLOAD_GRAPHS_CLIENT_IP_DISTRIBUTION)
 
 ## test gnuplot presence:
-cfg_gnuplot_available = 1
+CFG_GNUPLOT_AVAILABLE = 1
 try:
     import Gnuplot
 except ImportError, e:
-    cfg_gnuplot_available = 0
+    CFG_GNUPLOT_AVAILABLE = 0
 
-GRAPH_TYPES = ((1, 'GNU plot'),(2, 'Flot'))
+GRAPH_TYPES = ((1, 'GNU plot'), (2, 'Flot'))
 
 def write_coordinates_in_tmp_file(lists_coordinates):
     """write the graph coordinates in a temporary file for reading it later
     by the create_temporary_image method
     lists_coordinates is a list of list of this form:
     [[(1,3),(2,4),(3,5)],[(1,5),(2,5),(3,6)]
     This file is organized into one or more sets of 2 columns.
     Each set is separated from the others by two blank lines.
     Each intern list represents a set and each tuple a line in the file where fist element
     of the tuple is the element of the first column, and second element of the
     tuple is the element of the second column.
     With gnuplot, first column is used as  x coordinates, and second column as y coordinates.
     One set represents a curve in the graph.
     """
     max_y_datas = 0
     (fd, fname) = tempfile.mkstemp(prefix='bibrank_grapher_', dir=CFG_TMPSHAREDDIR)
-    file_dest = os.fdopen(fd, 'a')
+    file_dest = os.fdopen(fd, 'w')
     for list_elem in lists_coordinates:
         y_axe = []
         #prepare data and store them in a file
         for key_value in list_elem:
-            file_dest.write("%s %s\n"%(key_value[0], key_value[1]))
+            file_dest.write("%s %s\n" % (key_value[0], key_value[1]))
             y_axe.append(key_value[1])
         max_tmp = 0
         if y_axe:
             max_tmp = max(y_axe)
         if max_tmp > max_y_datas:
             max_y_datas = max_tmp
         file_dest.write("\n\n")
     file_dest.close()
 
-    return [fname, max_y_datas]
+    return (fname, max_y_datas)
 
-def create_temporary_image(recid, kind_of_graph, data_file, x_label, y_label, origin_tuple, y_max, docid_list, graphe_titles, intervals):
+def create_temporary_image(recid, kind_of_graph, data_file, x_label, y_label,
+                           origin_tuple, y_max, docid_list, graphe_titles,
+                           intervals, dest_dir=os.path.join(CFG_WEBDIR, 'img')):
     """From a temporary file, draw a gnuplot or flot graph
     The arguments are as follows:
     recid          - record ID
     kind_of_graph  - takes one of these values : "citation" ,"download_history", "download_users"
                      All the commons gnuplot commands for these cases, are written at the beginning
                      After the particular commands depending of each case are written.
     data_file      - Name of the temporary file which contains the gnuplot data used to plot the graph.
                      This file is organized into one or more sets of 2 columns.
                      First column contains x coordinates, and second column contains y coordinates.
                      Each set is separated from the others by two blank lines.
     x_label        - Name of the x axe.
     y_label        - Name of the y axe.
     origin_tuple   - Reference coordinates for positioning the graph.
     y_max          - Max value of y. Used to set y range.
     docid_list     - In download_history case, docid_list is used to plot multiple curves.
     graphe_titles  - List of graph titles. It's used to name the curve in the legend.
     intervals      - x tics location and xrange specification"""
+    fd, graph_tmp_file = tempfile.mkstemp(prefix='tmp_%s_%s_' % (kind_of_graph, recid),
+                                          suffix='.png',
+                                          dir=dest_dir)
+    os.close(fd)
+
     if (kind_of_graph == "citation" and CFG_BIBRANK_SHOW_CITATION_GRAPHS == 1) or \
         (kind_of_graph == "download_history" and CFG_BIBRANK_SHOW_DOWNLOAD_GRAPHS == 1) or \
         (kind_of_graph == "download_users" and CFG_BIBRANK_SHOW_DOWNLOAD_GRAPHS_CLIENT_IP_DISTRIBUTION == 1):
-        if cfg_gnuplot_available == 0:
+        if CFG_GNUPLOT_AVAILABLE == 0:
             return (None, None)
-        #Graphe name: file to store graph
-        graphe_name = "tmp_%s_%s_stats.png" % (kind_of_graph, recid)
-        create_temporary_gnuplot_image(recid, kind_of_graph, data_file, x_label, y_label, origin_tuple, y_max, docid_list, graphe_titles, intervals, graphe_name)
+        # Graphe name: file to store graph
+        if kind_of_graph == 'citation':
+            # Rename is done outside of this function
+            dest_graph_name = None
+        else:
+            dest_graph_name = "tmp_%s_%s_stats.png" % (kind_of_graph, recid)
+        create_temporary_gnuplot_image(kind_of_graph, data_file, x_label,
+                                       y_label, origin_tuple, y_max,
+                                       docid_list, graphe_titles, intervals,
+                                       dest=graph_tmp_file)
     elif (kind_of_graph == "citation" and CFG_BIBRANK_SHOW_CITATION_GRAPHS == 2) or \
         (kind_of_graph == "download_history" and CFG_BIBRANK_SHOW_DOWNLOAD_GRAPHS == 2) or \
         (kind_of_graph == "download_users" and CFG_BIBRANK_SHOW_DOWNLOAD_GRAPHS_CLIENT_IP_DISTRIBUTION == 2):
-        graphe_name = "tmp_%s_%s_stats.html" % (kind_of_graph, recid)
-        create_temporary_flot_image(recid, kind_of_graph, data_file, x_label, y_label, origin_tuple, y_max, docid_list, graphe_titles, intervals, graphe_name)
+        dest_graph_name = "tmp_%s_%s_stats.html" % (kind_of_graph, recid)
+        create_temporary_flot_image(kind_of_graph, data_file, x_label,
+                                    y_label, origin_tuple, y_max, docid_list,
+                                    graphe_titles, intervals,
+                                    dest=graph_tmp_file)
     else:
-        graphe_name = "tmp_error.html"
-        open(CFG_WEBDIR + "/img/" + graphe_name, 'w').write("Error, select a correct format")
-    return (graphe_name, data_file)
+        dest_graph_name = "tmp_error.html"
+        f = open(graph_tmp_file, 'w')
+        try:
+            f.write("Error, select a correct format")
+        finally:
+            f.close()
 
-def create_temporary_gnuplot_image(recid, kind_of_graph, data_file, x_label, y_label, origin_tuple, y_max, docid_list, graphe_titles, intervals, graphe_name):
+    if os.stat(graph_tmp_file).st_size == 0:
+        # Somehow the graph was not generated properly
+        return None
+
+    if dest_graph_name:
+        dest = os.path.join(dest_dir, dest_graph_name)
+        try:
+            os.rename(graph_tmp_file, dest)
+        except OSError:
+            os.unlink(graph_tmp_file)
+    else:
+        dest = graph_tmp_file
+
+    return dest
+
+def create_temporary_gnuplot_image(kind_of_graph, data_file, x_label, y_label,
+                                   origin_tuple, y_max, docid_list,
+                                   graphe_titles, intervals, dest):
     #For different curves
     color_line_list = ['4', '3', '2', '9', '6']
     #Gnuplot graphe object
     g = Gnuplot.Gnuplot()
     g('set terminal png small')
-    g('set output "%s/img/%s"' % (CFG_WEBDIR, graphe_name))
+    g('set output "%s"' % dest)
     len_intervals = len(intervals)
     len_docid_list = len(docid_list)
     # Standard options
     #g('set size 0.5,0.5')
     g('set origin %s,%s'% (origin_tuple[0], origin_tuple[1]))
     if x_label == '':
         g('unset xlabel')
     else:
         g.xlabel(s = x_label)
         if x_label == '':
             g('unset ylabel')
         else:
             g.ylabel(s = y_label)
     g('set bmargin 5')
     #let a place at the top of the graph
     g('set tmargin 1')
 
     #Will be passed to g at the end to plot the graphe
     plot_text = ""
 
     if kind_of_graph == 'download_history':
-        g('set xdata time') #Set x scale as date
-        g('set timefmt "%m/%Y"') #Inform about format in file .dat
-        g('set format x "%b %y"') #Format displaying
+        g('set xdata time')        # Set x scale as date
+        g('set timefmt "%m/%Y"')   # Inform about format in file .dat
+        g('set format x "%b %y"')  # Format displaying
         if len(intervals) > 1 :
             g('set xrange ["%s":"%s"]' % (intervals[0], intervals[len_intervals-1]))
         y_offset = max(3, float(y_max)/60)
         g('set yrange [0:%s]' %str(y_max + y_offset))
         if len_intervals > 1 and len_intervals <= 12:
-            g('set xtics rotate %s' % str(tuple(intervals)))#to prevent duplicate tics
+            g('set xtics rotate %s' % str(tuple(intervals)))  # to prevent duplicate tics
         elif len_intervals > 12 and len_intervals <= 24:
-            g('set xtics rotate "%s", 7776000, "%s"' % (intervals[0], intervals[len_intervals-1]))              #3 months intervalls
+            g('set xtics rotate "%s", 7776000, "%s"' % (intervals[0], intervals[len_intervals-1]))  # 3 months intervalls
         else :
-            g('set xtics rotate "%s",15552000, "%s"' % (intervals[0], intervals[len_intervals-1]))              #6 months intervalls
+            g('set xtics rotate "%s",15552000, "%s"' % (intervals[0], intervals[len_intervals-1]))  # 6 months intervalls
 
-        if len_docid_list <= 1: #Only one curve
+        if len_docid_list <= 1:  # Only one curve
             #g('set style fill solid 0.25')
             if len(intervals)<=4:
                 plot_text = plot_command(1, data_file, (0, 0), "", "imp", color_line_list[0], 20)
             else:
                 plot_text = plot_command(1, data_file, (0, 0), "", "linespoint", color_line_list[0], 1, "pt 26", "ps 0.5")
-        elif len_docid_list > 1: #Multiple curves
+        elif len_docid_list > 1:  # Multiple curves
             if len(intervals)<=4:
                 plot_text = plot_command(1, data_file, (0, 0), graphe_titles[0], "imp", color_line_list[0], 20)
             else:
                 plot_text = plot_command(1, data_file, (0, 0), graphe_titles[0], "linespoint", color_line_list[0], 1, "pt 26", "ps 0.5")
             for d in range(1, len_docid_list):
                 if len(intervals)<=4:
                     plot_text += plot_command(0, data_file, (d, d) , graphe_titles[d], "imp", color_line_list[d], 20)
                 else :
                     plot_text += plot_command(0, data_file, (d, d) , graphe_titles[d], "linespoint", color_line_list[d], 1, "pt 26", "ps 0.5")
             if len(intervals)>2:
-                plot_text += plot_command(0, data_file, (len_docid_list, len_docid_list), "", "impulses", 0, 2 )
+                plot_text += plot_command(0, data_file, (len_docid_list, len_docid_list), "", "impulses", 0, 2)
                 plot_text += plot_command(0, data_file, (len_docid_list, len_docid_list), "TOTAL", "lines", 0, 5)
 
     elif kind_of_graph == 'download_users':
         g('set xrange [0:4]')
         g('set yrange [0:100]')
         g('set format y "%g %%"')
         g("""set xtics ("" 0, "CERN\\n Users" 1, "Other\\n Users" 3, "" 4)""")
         g('set ytics 0,10,100')
         g('set boxwidth 0.7 relative')
         g('set style fill solid 0.25')
         plot_text = 'plot "%s" using 1:2 title "" with boxes lt 7 lw 2' % data_file
 
-    else: #citation
+    else:  # citation
         g('set boxwidth 0.6 relative')
         g('set style fill solid 0.250000 border -1')
         g('set xtics rotate')
         if len(intervals) > 1:
             g('set xrange [%s:%s]' % (str(intervals[0]), str(intervals[len_intervals-1])))
         else:
             g('set xrange [%s:%s]' % (str(intervals[0]-1), str(intervals[0]+1)))
         g('set yrange [0:%s]' %str(y_max+2))
         plot_text = """plot "% s" index 0:0 using 1:2 title "" w steps lt %s lw 3"""  % (data_file, color_line_list[1])
 
     g('%s' % plot_text)
 
-def create_temporary_flot_image(recid, kind_of_graph, data_file, x_label, y_label, origin_tuple, y_max, docid_list, graphe_titles, intervals, graphe_name):
+def create_temporary_flot_image(kind_of_graph, data_file, x_label, y_label, origin_tuple, y_max, docid_list, graphe_titles, intervals, dest):
     out = """
               <!--[if IE]><script language="javascript" type="text/javascript" src="%(site)s/js/excanvas.min.js"></script><![endif]-->
               <script language="javascript" type="text/javascript" src="%(site)s/js/jquery.flot.min.js"></script>
               <script language="javascript" type="text/javascript" src="%(site)s/js/jquery.flot.selection.min.js"></script>
               <script id="source" language="javascript" type="text/javascript">
                      document.write('<div style="float:left"><div id="placeholder%(graph)s" style="width:500px;height:400px"></div></div>'+
               '<div id="miniature%(graph)s" style="float:left;margin-left:20px;margin-top:50px">' +
               '<div id="overview%(graph)s" style="width:250px;height:200px"></div>' +
               '<p id="overviewLegend%(graph)s" style="margin-left:10px"></p>' +
               '</div>');
                      $(function () {
                              function parseDate%(graph)s(sdate){
                                  var div1 = sdate.split('/');
                                  if(div1.length == 1){
                                      return new Date(sdate).getTime() - (new Date().getTimezoneOffset() * 60 * 1000) ;}
                                  else{
                                      return new Date(div1[1], div1[0]-1).getTime() - (new Date().getTimezoneOffset() * 60 * 1000) ;}
                              }
                              function getData%(graph)s() {""" % \
         {'site' : CFG_SITE_URL, 'graph' : kind_of_graph}
     # Set options
     data = open(data_file, 'r')
     tics = ""
     lines = 'lines'
     if kind_of_graph == 'download_history':
         if len(intervals) > 1 :
             tics += 'xaxis: { mode:"time",min:parseDate%s("%s"),max:parseDate%s("%s")},'\
             % (kind_of_graph, intervals[0], kind_of_graph, intervals[len(intervals)-1])
         tics += """yaxis: {
                 tickDecimals : 0
             },
 """
         options = """var options%s ={
             %s
            """% (kind_of_graph, tics)
         if len(intervals)<=4:
             options += """series: {
                bars: { show: true },
                points: { show: false }
             },
 """
             lines = 'bars'
         else:
             options += """series: {
                lines: { show: true },
                points: { show: false }
             },
 """
-        if len(docid_list) > 1: #Multiple curves
+        if len(docid_list) > 1:  # Multiple curves
             options += """,
             legend: { show : true}"""
             for d in range(1, len(docid_list)):
-                out += """var d%s = ["""%d
+                out += """var d%s = [""" % d
                 first = 0
                 while True:
-                    x,_,y = data.readline().partition(' ')
+                    x, _, y = data.readline().partition(' ')
                     if y == '':
                         data.readline()
                         break
                     if first == 0:
                         first = 1
                     else:
                         out += ', '
                         out += '[parseDate%s("%s"),%s]' % \
-                            (kind_of_graph, x,y.strip())
+                            (kind_of_graph, x, y.strip())
                 out += """];
                            """
             out += """
                  return [d1];
                   }
         """
 
     elif kind_of_graph == 'download_users':
         options = """var options%s ={xaxis: { ticks: [[1, "CERN\\n Users"], [2, "Other\\n Users"]] },
                          yaxis: { min: 0, max: 100},
                          series: {
                              bars: { show: true , align: "center"},
                              points: { show: false }
                          },
                          legend: { show : false},
 """ % kind_of_graph
         lines = 'bars'
-    else: #citation
+    else:  # citation
         tics += """xaxis: { mode:"time",min:parseDate%s("%s"),max:parseDate%s("%s")},
                          yaxis: { min: 0, max: %s},""" % (kind_of_graph, str(intervals[0]),
                                 kind_of_graph, str(intervals[len(intervals)-1]), str(y_max+2))
         options = """var options%s = {
                          %s
                          series: {
                              lines: { show: true },
                              points: { show: true }
                          },
                          legend: { show : false},
 """ % (kind_of_graph, tics)
-    if docid_list is None or len(docid_list) <= 1: #Only one curve
+    if docid_list is None or len(docid_list) <= 1:  # Only one curve
         out += """var d1 = ["""
         if kind_of_graph == 'download_users':
             out += '[1,%s], [2,%s]' % (data.readline().partition(' ')[2].strip(),
                                        data.readline().partition(' ')[2].strip())
         else:
             first = 0
             for line in data:
-                x,_,y = line.partition(' ')
+                x, _, y = line.partition(' ')
                 if y == '':
                     break
                 if first == 0:
                     first = 1
                 else:
                     out += ', '
                 out += '[parseDate%s("%s"),%s]' % (kind_of_graph, x, y.strip())
         out += """];
                  return [d1];
                   }
         """
     options += """grid: { hoverable: true, clickable: true },
             selection: { mode: "xy" } };"""
     # Generate also the gnuplot image in case javascript is disabled
-    create_temporary_gnuplot_image(recid, kind_of_graph, data_file, x_label, y_label, origin_tuple, y_max, docid_list, graphe_titles, intervals, graphe_name[:-4] + "png")
+    create_temporary_gnuplot_image(kind_of_graph, data_file, x_label, y_label,
+                                   origin_tuple, y_max, docid_list,
+                                   graphe_titles, intervals, dest=dest)
     # Write the plot method in javascript
     out += """%(options)s
     var startData%(graph)s = getData%(graph)s();
     var plot%(graph)s = $.plot($("#placeholder%(graph)s"), startData%(graph)s, options%(graph)s);
     var overview%(graph)s = $.plot($("#overview%(graph)s"), startData%(graph)s, {
              legend: { show: true, container: $("#overviewLegend%(graph)s") },
              series: {
                 %(lines)s: { show: true, lineWidth: 1 },
                 shadowSize: 0
              },
              %(tics)s
              grid: { color: "#999" },
              selection: { mode: "xy" }
            });
            """% {'options' : options, 'lines' : lines, 'tics' : tics, 'graph' : kind_of_graph}
 
     # Tooltip and zoom
     out += """    function showTooltip%(graph)s(x, y, contents) {
     $('<div id="tooltip%(graph)s">' + contents + '</div>').css( {
         position: 'absolute',
         display: 'none',
         top: y - 5,
         left: x + 10,
         border: '1px solid #fdd',
         padding: '2px',
         'background-color': '#fee',
         opacity: 0.80
     }).appendTo("body").fadeIn(200);
 }
 
 var previousPoint%(graph)s = null;
 $("#placeholder%(graph)s").bind("plothover", function (event, pos, item) {
 
     if (item) {
         if (previousPoint%(graph)s != item.datapoint) {
             previousPoint%(graph)s = item.datapoint;
 
             $("#tooltip%(graph)s").remove();
             var y = item.datapoint[1];
 
             showTooltip%(graph)s(item.pageX, item.pageY, y);
         }
     }
     else {
         $("#tooltip%(graph)s").remove();
         previousPoint%(graph)s = null;
     }
 });
 
 $("#placeholder%(graph)s").bind("plotclick", function (event, pos, item) {
     if (item) {
         plot%(graph)s.highlight(item.series, item.datapoint);
     }
 });
     $("#placeholder%(graph)s").bind("plotselected", function (event, ranges) {
     // clamp the zooming to prevent eternal zoom
 
     if (ranges.xaxis.to - ranges.xaxis.from < 0.00001){
         ranges.xaxis.to = ranges.xaxis.from + 0.00001;}
     if (ranges.yaxis.to - ranges.yaxis.from < 0.00001){
         ranges.yaxis.to = ranges.yaxis.from + 0.00001;}
 
     // do the zooming
     plot%(graph)s = $.plot($("#placeholder%(graph)s"), startData%(graph)s,
                   $.extend(true, {}, options%(graph)s, {
                       xaxis: { min: ranges.xaxis.from, max: ranges.xaxis.to },
                       yaxis: { min: ranges.yaxis.from, max: ranges.yaxis.to }
                   }));
 
     // don't fire event on the overview to prevent eternal loop
     overview%(graph)s.setSelection(ranges, true);
 });
 $("#overview%(graph)s").bind("plotselected", function (event, ranges) {
     plot%(graph)s.setSelection(ranges);
 });
 });
             </script>""" % {'graph' : kind_of_graph}
     # Support for disabled javascript
     out += "<noscript>"
-    out += """<img src='%s/img/%s' align="center" alt="">"""% (CFG_SITE_URL, graphe_name[:-4] + "png")
+    out += """<img src='%s/img/%s' align="center" alt="">"""% (CFG_SITE_URL, os.path.basename(dest))
     out += "</noscript>"
-    open(CFG_WEBDIR + "/img/" + graphe_name, 'w').write(out)
+    open(dest, 'w').write(out)
     data.close()
-def remove_old_img(prefix_file_name):
-    """Detele all the images older than 10 minutes to prevent to much storage
+
+def remove_old_img(prefix_file_name, directory="%s/img/" % CFG_WEBDIR):
+    """Delete all the images older than 10 minutes to prevent to much storage
     Takes 0.0 seconds for 50 files to delete"""
 
-    command = "find %s/img/ -name tmp_%s*.png -amin +10 -exec rm -f {} \;" % (CFG_WEBDIR, prefix_file_name)
+    command = "find %s -name tmp_%s*.png -amin +10 -exec rm -f {} \\;" \
+                                                % (directory, prefix_file_name)
     return os.system(command)
 
 def plot_command(first_line, file_source, indexes, title, style, line_type, line_width, point_type="", point_size=""):
     """Return a string of a gnuplot plot command.Particularly useful when multiple curves
     From a temporary file, draw a gnuplot graph
     Return a plot command string as follows:
     plot datafile <first curve parameters>, datafile <second curve parameters>,...
     The arguments are as follows:
     first_line   - only the drawing command of the first curve contains the word plot
     file_source  - data file source which containes coordinates
     indexes      - points out set number in data file source
     title        - title of the curve in the legend box
     style        - respresentation of the curve ex: linespoints, lines ...
     line_type    - color of the line
     line_width   - width of the line
     point_type   - optionnal parameter: if not mentionned it's a wide string.
                    Using in the case of style = linespoints to set point style"""
     if first_line:
         plot_text = """plot "%s" index %s:%s using 1:2 title "%s" with %s lt %s lw %s %s %s"""  % (file_source, indexes[0], indexes[1], title, style, line_type, line_width, point_type, point_size)
     else:
         plot_text = """, "%s" index %s:%s using 1:2 title "%s" with %s lt %s lw %s %s %s"""  % (file_source, indexes[0], indexes[1], title, style, line_type, line_width, point_type, point_size)
     return plot_text
diff --git a/modules/websearch/lib/search_engine.py b/modules/websearch/lib/search_engine.py
index 3c71b7259..4504ef8e3 100644
--- a/modules/websearch/lib/search_engine.py
+++ b/modules/websearch/lib/search_engine.py
@@ -1,6736 +1,6743 @@
 # -*- coding: utf-8 -*-
 
 ## This file is part of Invenio.
 ## Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012, 2013 CERN.
 ##
 ## Invenio is free software; you can redistribute it and/or
 ## modify it under the terms of the GNU General Public License as
 ## published by the Free Software Foundation; either version 2 of the
 ## License, or (at your option) any later version.
 ##
 ## Invenio is distributed in the hope that it will be useful, but
 ## WITHOUT ANY WARRANTY; without even the implied warranty of
 ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 ## General Public License for more details.
 ##
 ## You should have received a copy of the GNU General Public License
 ## along with Invenio; if not, write to the Free Software Foundation, Inc.,
 ## 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA.
 
 # pylint: disable=C0301
 
 """Invenio Search Engine in mod_python."""
 
 __lastupdated__ = """$Date$"""
 
 __revision__ = "$Id$"
 
 ## import general modules:
 import cgi
 import cStringIO
 import copy
 import string
 import os
 import re
 import time
 import urllib
 import urlparse
 import zlib
 import sys
 
 try:
     ## import optional module:
     import numpy
     CFG_NUMPY_IMPORTABLE = True
 except:
     CFG_NUMPY_IMPORTABLE = False
 
 if sys.hexversion < 0x2040000:
     # pylint: disable=W0622
     from sets import Set as set
     # pylint: enable=W0622
 
 ## import Invenio stuff:
 from invenio.config import \
      CFG_CERN_SITE, \
      CFG_INSPIRE_SITE, \
      CFG_OAI_ID_FIELD, \
      CFG_WEBCOMMENT_ALLOW_REVIEWS, \
      CFG_WEBSEARCH_CALL_BIBFORMAT, \
      CFG_WEBSEARCH_CREATE_SIMILARLY_NAMED_AUTHORS_LINK_BOX, \
      CFG_WEBSEARCH_FIELDS_CONVERT, \
      CFG_WEBSEARCH_NB_RECORDS_TO_SORT, \
      CFG_WEBSEARCH_SEARCH_CACHE_SIZE, \
      CFG_WEBSEARCH_USE_MATHJAX_FOR_FORMATS, \
      CFG_WEBSEARCH_USE_ALEPH_SYSNOS, \
      CFG_WEBSEARCH_DEF_RECORDS_IN_GROUPS, \
      CFG_WEBSEARCH_FULLTEXT_SNIPPETS, \
      CFG_WEBSEARCH_DISPLAY_NEAREST_TERMS, \
      CFG_BIBUPLOAD_SERIALIZE_RECORD_STRUCTURE, \
      CFG_BIBUPLOAD_EXTERNAL_SYSNO_TAG, \
      CFG_BIBRANK_SHOW_DOWNLOAD_GRAPHS, \
      CFG_WEBSEARCH_SYNONYM_KBRS, \
      CFG_SITE_LANG, \
      CFG_SITE_NAME, \
      CFG_LOGDIR, \
      CFG_BIBFORMAT_HIDDEN_TAGS, \
      CFG_SITE_URL, \
      CFG_ACCESS_CONTROL_LEVEL_ACCOUNTS, \
      CFG_SOLR_URL, \
      CFG_WEBSEARCH_DETAILED_META_FORMAT, \
      CFG_SITE_RECORD, \
      CFG_WEBSEARCH_PREV_NEXT_HIT_LIMIT, \
      CFG_WEBSEARCH_VIEWRESTRCOLL_POLICY, \
      CFG_BIBSORT_BUCKETS, \
      CFG_XAPIAN_ENABLED, \
      CFG_BIBINDEX_CHARS_PUNCTUATION
 
 from invenio.search_engine_config import \
      InvenioWebSearchUnknownCollectionError, \
      InvenioWebSearchWildcardLimitError, \
      CFG_WEBSEARCH_IDXPAIRS_FIELDS,\
      CFG_WEBSEARCH_IDXPAIRS_EXACT_SEARCH
 from invenio.search_engine_utils import get_fieldvalues, get_fieldvalues_alephseq_like
 from invenio.bibrecord import create_record, record_xml_output
 from invenio.bibrank_record_sorter import get_bibrank_methods, is_method_valid, rank_records as rank_records_bibrank
 from invenio.bibrank_downloads_similarity import register_page_view_event, calculate_reading_similarity_list
 from invenio.bibindex_engine_stemmer import stem
 from invenio.bibindex_tokenizers.BibIndexDefaultTokenizer import BibIndexDefaultTokenizer
 from invenio.bibindex_tokenizers.BibIndexCJKTokenizer import BibIndexCJKTokenizer, is_there_any_CJK_character_in_text
 from invenio.bibindex_engine_utils import author_name_requires_phrase_search
 from invenio.bibindex_engine_washer import wash_index_term, lower_index_term, wash_author_name
 from invenio.bibindex_engine_config import CFG_BIBINDEX_SYNONYM_MATCH_TYPE
 from invenio.bibindex_engine_utils import get_idx_indexer
 from invenio.bibformat import format_record, format_records, get_output_format_content_type, create_excel
 from invenio.bibformat_config import CFG_BIBFORMAT_USE_OLD_BIBFORMAT
 from invenio.bibrank_downloads_grapher import create_download_history_graph_and_box
 from invenio.bibknowledge import get_kbr_values
 from invenio.data_cacher import DataCacher
 from invenio.websearch_external_collections import print_external_results_overview, perform_external_collection_search
 from invenio.access_control_admin import acc_get_action_id
 from invenio.access_control_config import VIEWRESTRCOLL, \
     CFG_ACC_GRANT_AUTHOR_RIGHTS_TO_EMAILS_IN_TAGS, \
     CFG_ACC_GRANT_VIEWER_RIGHTS_TO_EMAILS_IN_TAGS
 from invenio.websearchadminlib import get_detailed_page_tabs, get_detailed_page_tabs_counts
 from invenio.intbitset import intbitset
 from invenio.dbquery import DatabaseError, deserialize_via_marshal, InvenioDbQueryWildcardLimitError
 from invenio.access_control_engine import acc_authorize_action
 from invenio.errorlib import register_exception
 from invenio.textutils import encode_for_xml, wash_for_utf8, strip_accents
 from invenio.htmlutils import get_mathjax_header
 from invenio.htmlutils import nmtoken_from_string
 from invenio import bibrecord
 
 import invenio.template
 webstyle_templates = invenio.template.load('webstyle')
 webcomment_templates = invenio.template.load('webcomment')
 
 from invenio.bibrank_citation_searcher import calculate_cited_by_list, \
     calculate_co_cited_with_list, get_records_with_num_cites, \
     get_refersto_hitset, get_citedby_hitset, get_cited_by_list, \
-    get_refers_to_list
+    get_refers_to_list, get_citers_log
 from invenio.bibrank_citation_grapher import create_citation_history_graph_and_box
 from invenio.bibrank_selfcites_searcher import get_self_cited_by_list, \
                                                get_self_cited_by, \
                                                get_self_refers_to, \
                                                get_self_refers_to_list
 
 
 from invenio.dbquery import run_sql, \
                             run_sql_with_limit, \
                             wash_table_column_name, \
                             get_table_update_time
 from invenio.webuser import getUid, collect_user_info, session_param_set
 from invenio.webpage import pageheaderonly, pagefooteronly, create_error_box, write_warning
 from invenio.messages import gettext_set_language
 from invenio.search_engine_query_parser import SearchQueryParenthesisedParser, \
     SpiresToInvenioSyntaxConverter
 
 from invenio import webinterface_handler_config as apache
 from invenio.solrutils_bibindex_searcher import solr_get_bitset
 from invenio.xapianutils_bibindex_searcher import xapian_get_bitset
 from invenio.websearch_services import \
      get_search_services, \
      CFG_WEBSEARCH_SERVICE_MAX_SERVICE_ANSWER_RELEVANCE, \
      CFG_WEBSEARCH_SERVICE_MAX_NB_SERVICE_DISPLAY, \
      CFG_WEBSEARCH_SERVICE_MIN_RELEVANCE_TO_DISPLAY, \
      CFG_WEBSEARCH_SERVICE_MAX_RELEVANCE_DIFFERENCE
 
 try:
     import invenio.template
     websearch_templates = invenio.template.load('websearch')
 except:
     pass
 
 from invenio.websearch_external_collections import calculate_hosted_collections_results, do_calculate_hosted_collections_results
 from invenio.websearch_external_collections_config import CFG_HOSTED_COLLECTION_TIMEOUT_ANTE_SEARCH
 from invenio.websearch_external_collections_config import CFG_HOSTED_COLLECTION_TIMEOUT_POST_SEARCH
 from invenio.websearch_external_collections_config import CFG_EXTERNAL_COLLECTION_MAXRESULTS
 
 VIEWRESTRCOLL_ID = acc_get_action_id(VIEWRESTRCOLL)
 
 ## global vars:
 cfg_nb_browse_seen_records = 100 # limit of the number of records to check when browsing certain collection
 cfg_nicely_ordered_collection_list = 0 # do we propose collection list nicely ordered or alphabetical?
 
 ## precompile some often-used regexp for speed reasons:
 re_word = re.compile('[\s]')
 re_quotes = re.compile('[\'\"]')
 re_doublequote = re.compile('\"')
 re_logical_and = re.compile('\sand\s', re.I)
 re_logical_or = re.compile('\sor\s', re.I)
 re_logical_not = re.compile('\snot\s', re.I)
 re_operators = re.compile(r'\s([\+\-\|])\s')
 re_pattern_wildcards_after_spaces = re.compile(r'(\s)[\*\%]+')
 re_pattern_single_quotes = re.compile("'(.*?)'")
 re_pattern_double_quotes = re.compile("\"(.*?)\"")
 re_pattern_parens_quotes = re.compile(r'[\'\"]{1}[^\'\"]*(\([^\'\"]*\))[^\'\"]*[\'\"]{1}')
 re_pattern_regexp_quotes = re.compile("\/(.*?)\/")
 re_pattern_spaces_after_colon = re.compile(r'(:\s+)')
 re_pattern_short_words = re.compile(r'([\s\"]\w{1,3})[\*\%]+')
 re_pattern_space = re.compile("__SPACE__")
 re_pattern_today = re.compile("\$TODAY\$")
 re_pattern_parens = re.compile(r'\([^\)]+\s+[^\)]+\)')
 re_punctuation_followed_by_space = re.compile(CFG_BIBINDEX_CHARS_PUNCTUATION + '\s')
 
 ## em possible values
 EM_REPOSITORY={"body" : "B",
                "header" : "H",
                "footer" : "F",
                "search_box" : "S",
                "see_also_box" : "L",
                "basket" : "K",
                "alert" : "A",
                "search_info" : "I",
                "overview" : "O",
                "all_portalboxes" : "P",
                "te_portalbox" : "Pte",
                "tp_portalbox" : "Ptp",
                "np_portalbox" : "Pnp",
                "ne_portalbox" : "Pne",
                "lt_portalbox" : "Plt",
                "rt_portalbox" : "Prt",
                "search_services": "SER"};
 
 class RestrictedCollectionDataCacher(DataCacher):
     def __init__(self):
         def cache_filler():
             ret = []
             try:
                 res = run_sql("""SELECT DISTINCT ar.value
                     FROM accROLE_accACTION_accARGUMENT raa JOIN accARGUMENT ar ON raa.id_accARGUMENT = ar.id
                     WHERE ar.keyword = 'collection' AND raa.id_accACTION = %s""", (VIEWRESTRCOLL_ID,), run_on_slave=True)
             except Exception:
                 # database problems, return empty cache
                 return []
             for coll in res:
                 ret.append(coll[0])
             return ret
 
         def timestamp_verifier():
             return max(get_table_update_time('accROLE_accACTION_accARGUMENT'), get_table_update_time('accARGUMENT'))
 
         DataCacher.__init__(self, cache_filler, timestamp_verifier)
 
 def collection_restricted_p(collection, recreate_cache_if_needed=True):
     if recreate_cache_if_needed:
         restricted_collection_cache.recreate_cache_if_needed()
     return collection in restricted_collection_cache.cache
 
 try:
     restricted_collection_cache.is_ok_p
 except Exception:
     restricted_collection_cache = RestrictedCollectionDataCacher()
 
 
 def ziplist(*lists):
     """Just like zip(), but returns lists of lists instead of lists of tuples
 
     Example:
     zip([f1, f2, f3], [p1, p2, p3], [op1, op2, '']) =>
        [(f1, p1, op1), (f2, p2, op2), (f3, p3, '')]
     ziplist([f1, f2, f3], [p1, p2, p3], [op1, op2, '']) =>
        [[f1, p1, op1], [f2, p2, op2], [f3, p3, '']]
 
     FIXME: This is handy to have, and should live somewhere else, like
            miscutil.really_useful_functions or something.
     XXX: Starting in python 2.6, the same can be achieved (faster) by
          using itertools.izip_longest(); when the minimum recommended Python
          is bumped, we should use that instead.
     """
     def l(*items):
         return list(items)
     return map(l, *lists)
 
 
 def get_permitted_restricted_collections(user_info, recreate_cache_if_needed=True):
     """Return a list of collection that are restricted but for which the user
     is authorized."""
     if recreate_cache_if_needed:
         restricted_collection_cache.recreate_cache_if_needed()
     ret = []
     for collection in restricted_collection_cache.cache:
         if acc_authorize_action(user_info, 'viewrestrcoll', collection=collection)[0] == 0:
             ret.append(collection)
     return ret
 
 def get_all_restricted_recids():
     """
     Return the set of all the restricted recids, i.e. the ids of those records
     which belong to at least one restricted collection.
     """
     ret = intbitset()
     for collection in restricted_collection_cache.cache:
         ret |= get_collection_reclist(collection)
     return ret
 
 def get_restricted_collections_for_recid(recid, recreate_cache_if_needed=True):
     """
     Return the list of restricted collection names to which recid belongs.
     """
     if recreate_cache_if_needed:
         restricted_collection_cache.recreate_cache_if_needed()
         collection_reclist_cache.recreate_cache_if_needed()
     return [collection for collection in restricted_collection_cache.cache if recid in get_collection_reclist(collection, recreate_cache_if_needed=False)]
 
 def is_user_owner_of_record(user_info, recid):
     """
     Check if the user is owner of the record, i.e. he is the submitter
     and/or belongs to a owner-like group authorized to 'see' the record.
 
     @param user_info: the user_info dictionary that describe the user.
     @type user_info: user_info dictionary
     @param recid: the record identifier.
     @type recid: positive integer
     @return: True if the user is 'owner' of the record; False otherwise
     @rtype: bool
     """
     authorized_emails_or_group = []
     for tag in CFG_ACC_GRANT_AUTHOR_RIGHTS_TO_EMAILS_IN_TAGS:
         authorized_emails_or_group.extend(get_fieldvalues(recid, tag))
     for email_or_group in authorized_emails_or_group:
         if email_or_group in user_info['group']:
             return True
         email = email_or_group.strip().lower()
         if user_info['email'].strip().lower() == email:
             return True
     return False
 
 ###FIXME: This method needs to be refactorized
 def is_user_viewer_of_record(user_info, recid):
     """
     Check if the user is allow to view the record based in the marc tags
     inside CFG_ACC_GRANT_VIEWER_RIGHTS_TO_EMAILS_IN_TAGS
     i.e. his email is inside the 506__m tag or he is inside an e-group listed
     in the 506__m tag
 
     @param user_info: the user_info dictionary that describe the user.
     @type user_info: user_info dictionary
     @param recid: the record identifier.
     @type recid: positive integer
     @return: True if the user is 'allow to view' the record; False otherwise
     @rtype: bool
     """
 
     authorized_emails_or_group = []
     for tag in CFG_ACC_GRANT_VIEWER_RIGHTS_TO_EMAILS_IN_TAGS:
         authorized_emails_or_group.extend(get_fieldvalues(recid, tag))
     for email_or_group in authorized_emails_or_group:
         if email_or_group in user_info['group']:
             return True
         email = email_or_group.strip().lower()
         if user_info['email'].strip().lower() == email:
             return True
     return False
 
 def check_user_can_view_record(user_info, recid):
     """
     Check if the user is authorized to view the given recid. The function
     grants access in two cases: either user has author rights on this
     record, or he has view rights to the primary collection this record
     belongs to.
 
     @param user_info: the user_info dictionary that describe the user.
     @type user_info: user_info dictionary
     @param recid: the record identifier.
     @type recid: positive integer
     @return: (0, ''), when authorization is granted, (>0, 'message') when
     authorization is not granted
     @rtype: (int, string)
     """
     policy = CFG_WEBSEARCH_VIEWRESTRCOLL_POLICY.strip().upper()
     if isinstance(recid, str):
         recid = int(recid)
     ## At this point, either webcoll has not yet run or there are some
     ## restricted collections. Let's see first if the user own the record.
     if is_user_owner_of_record(user_info, recid):
         ## Perfect! It's authorized then!
         return (0, '')
 
     if is_user_viewer_of_record(user_info, recid):
         ## Perfect! It's authorized then!
         return (0, '')
 
     restricted_collections = get_restricted_collections_for_recid(recid, recreate_cache_if_needed=False)
     if not restricted_collections and record_public_p(recid):
         ## The record is public and not part of any restricted collection
         return (0, '')
     if restricted_collections:
         ## If there are restricted collections the user must be authorized to all/any of them (depending on the policy)
         auth_code, auth_msg = 0, ''
         for collection in restricted_collections:
             (auth_code, auth_msg) = acc_authorize_action(user_info, VIEWRESTRCOLL, collection=collection)
             if auth_code and policy != 'ANY':
                 ## Ouch! the user is not authorized to this collection
                 return (auth_code, auth_msg)
             elif auth_code == 0 and policy == 'ANY':
                 ## Good! At least one collection is authorized
                 return (0, '')
         ## Depending on the policy, the user will be either authorized or not
         return auth_code, auth_msg
     if is_record_in_any_collection(recid, recreate_cache_if_needed=False):
         ## the record is not in any restricted collection
         return (0, '')
     elif record_exists(recid) > 0:
         ## We are in the case where webcoll has not run.
         ## Let's authorize SUPERADMIN
         (auth_code, auth_msg) = acc_authorize_action(user_info, VIEWRESTRCOLL, collection=None)
         if auth_code == 0:
             return (0, '')
         else:
             ## Too bad. Let's print a nice message:
             return (1, """The record you are trying to access has just been
 submitted to the system and needs to be assigned to the
 proper collections. It is currently restricted for security reasons
 until the assignment will be fully completed. Please come back later to
 properly access this record.""")
     else:
         ## The record either does not exists or has been deleted.
         ## Let's handle these situations outside of this code.
         return (0, '')
 
 class IndexStemmingDataCacher(DataCacher):
     """
     Provides cache for stemming information for word/phrase indexes.
     This class is not to be used directly; use function
     get_index_stemming_language() instead.
     """
     def __init__(self):
         def cache_filler():
             try:
                 res = run_sql("""SELECT id, stemming_language FROM idxINDEX""")
             except DatabaseError:
                 # database problems, return empty cache
                 return {}
             return dict(res)
 
         def timestamp_verifier():
             return get_table_update_time('idxINDEX')
 
         DataCacher.__init__(self, cache_filler, timestamp_verifier)
 
 try:
     index_stemming_cache.is_ok_p
 except Exception:
     index_stemming_cache = IndexStemmingDataCacher()
 
 def get_index_stemming_language(index_id, recreate_cache_if_needed=True):
     """Return stemming langugage for given index."""
     if recreate_cache_if_needed:
         index_stemming_cache.recreate_cache_if_needed()
     return index_stemming_cache.cache[index_id]
 
 
 class FieldTokenizerDataCacher(DataCacher):
     """
     Provides cache for tokenizer information for fields corresponding to indexes.
     This class is not to be used directly; use function
     get_field_tokenizer_type() instead.
     """
     def __init__(self):
         def cache_filler():
             try:
                 res = run_sql("""SELECT fld.code, ind.tokenizer FROM idxINDEX AS ind, field AS fld, idxINDEX_field AS indfld WHERE ind.id = indfld.id_idxINDEX AND indfld.id_field = fld.id""")
             except DatabaseError:
                 # database problems, return empty cache
                 return {}
             return dict(res)
 
         def timestamp_verifier():
             return get_table_update_time('idxINDEX')
 
         DataCacher.__init__(self, cache_filler, timestamp_verifier)
 
 try:
     field_tokenizer_cache.is_ok_p
 except Exception:
     field_tokenizer_cache = FieldTokenizerDataCacher()
 
 def get_field_tokenizer_type(field_name, recreate_cache_if_needed=True):
     """Return tokenizer type for given field corresponding to an index if applicable."""
     if recreate_cache_if_needed:
         field_tokenizer_cache.recreate_cache_if_needed()
     tokenizer = None
     try:
         tokenizer = field_tokenizer_cache.cache[field_name]
     except KeyError:
         return None
     return tokenizer
 
 
 class CollectionRecListDataCacher(DataCacher):
     """
     Provides cache for collection reclist hitsets.  This class is not
     to be used directly; use function get_collection_reclist() instead.
     """
     def __init__(self):
         def cache_filler():
             ret = {}
             try:
                 res = run_sql("SELECT name FROM collection")
             except Exception:
                 # database problems, return empty cache
                 return {}
             for name in res:
                 ret[name[0]] = None # this will be filled later during runtime by calling get_collection_reclist(coll)
             return ret
 
         def timestamp_verifier():
             return get_table_update_time('collection')
 
         DataCacher.__init__(self, cache_filler, timestamp_verifier)
 
 try:
     if not collection_reclist_cache.is_ok_p:
         raise Exception
 except Exception:
     collection_reclist_cache = CollectionRecListDataCacher()
 
 def get_collection_reclist(coll, recreate_cache_if_needed=True):
     """Return hitset of recIDs that belong to the collection 'coll'."""
     if recreate_cache_if_needed:
         collection_reclist_cache.recreate_cache_if_needed()
     if coll not in collection_reclist_cache.cache:
         return intbitset() # collection does not exist; return empty set
     if not collection_reclist_cache.cache[coll]:
         # collection's reclist not in the cache yet, so calculate it
         # and fill the cache:
         reclist = intbitset()
         query = "SELECT nbrecs,reclist FROM collection WHERE name=%s"
         res = run_sql(query, (coll, ), 1)
         if res:
             try:
                 reclist = intbitset(res[0][1])
             except:
                 pass
         collection_reclist_cache.cache[coll] = reclist
     # finally, return reclist:
     return collection_reclist_cache.cache[coll]
 
 def get_available_output_formats(visible_only=False):
     """
     Return the list of available output formats.  When visible_only is
     True, returns only those output formats that have visibility flag
     set to 1.
     """
 
     formats = []
     query = "SELECT code,name FROM format"
     if visible_only:
         query += " WHERE visibility='1'"
     query += " ORDER BY name ASC"
     res = run_sql(query)
     if res:
         # propose found formats:
         for code, name in res:
             formats.append({ 'value' : code,
                              'text' : name
                            })
     else:
         formats.append({'value' : 'hb',
                         'text' : "HTML brief"
                        })
     return formats
 
 class SearchResultsCache(DataCacher):
     """
     Provides temporary lazy cache for Search Results.
     Useful when users click on `next page'.
     """
     def __init__(self):
         def cache_filler():
             return {}
         def timestamp_verifier():
             return '1970-01-01 00:00:00' # lazy cache is always okay;
                                          # its filling is governed by
                                          # CFG_WEBSEARCH_SEARCH_CACHE_SIZE
         DataCacher.__init__(self, cache_filler, timestamp_verifier)
 
 try:
     if not search_results_cache.is_ok_p:
         raise Exception
 except Exception:
     search_results_cache = SearchResultsCache()
 
 class CollectionI18nNameDataCacher(DataCacher):
     """
     Provides cache for I18N collection names.  This class is not to be
     used directly; use function get_coll_i18nname() instead.
     """
     def __init__(self):
         def cache_filler():
             ret = {}
             try:
                 res = run_sql("SELECT c.name,cn.ln,cn.value FROM collectionname AS cn, collection AS c WHERE cn.id_collection=c.id AND cn.type='ln'") # ln=long name
             except Exception:
                 # database problems
                 return {}
             for c, ln, i18nname in res:
                 if i18nname:
                     if not ret.has_key(c):
                         ret[c] = {}
                     ret[c][ln] = i18nname
             return ret
 
         def timestamp_verifier():
             return get_table_update_time('collectionname')
 
         DataCacher.__init__(self, cache_filler, timestamp_verifier)
 
 try:
     if not collection_i18nname_cache.is_ok_p:
         raise Exception
 except Exception:
     collection_i18nname_cache = CollectionI18nNameDataCacher()
 
 def get_coll_i18nname(c, ln=CFG_SITE_LANG, verify_cache_timestamp=True):
     """
     Return nicely formatted collection name (of the name type `ln'
     (=long name)) for collection C in language LN.
 
     This function uses collection_i18nname_cache, but it verifies
     whether the cache is up-to-date first by default.  This
     verification step is performed by checking the DB table update
     time.  So, if you call this function 1000 times, it can get very
     slow because it will do 1000 table update time verifications, even
     though collection names change not that often.
 
     Hence the parameter VERIFY_CACHE_TIMESTAMP which, when set to
     False, will assume the cache is already up-to-date.  This is
     useful namely in the generation of collection lists for the search
     results page.
     """
     if verify_cache_timestamp:
         collection_i18nname_cache.recreate_cache_if_needed()
     out = c
     try:
         out = collection_i18nname_cache.cache[c][ln]
     except KeyError:
         pass # translation in LN does not exist
     return out
 
 class FieldI18nNameDataCacher(DataCacher):
     """
     Provides cache for I18N field names.  This class is not to be used
     directly; use function get_field_i18nname() instead.
     """
     def __init__(self):
         def cache_filler():
             ret = {}
             try:
                 res = run_sql("SELECT f.name,fn.ln,fn.value FROM fieldname AS fn, field AS f WHERE fn.id_field=f.id AND fn.type='ln'") # ln=long name
             except Exception:
                 # database problems, return empty cache
                 return {}
             for f, ln, i18nname in res:
                 if i18nname:
                     if not ret.has_key(f):
                         ret[f] = {}
                     ret[f][ln] = i18nname
             return ret
 
         def timestamp_verifier():
             return get_table_update_time('fieldname')
 
         DataCacher.__init__(self, cache_filler, timestamp_verifier)
 
 try:
     if not field_i18nname_cache.is_ok_p:
         raise Exception
 except Exception:
     field_i18nname_cache = FieldI18nNameDataCacher()
 
 def get_field_i18nname(f, ln=CFG_SITE_LANG, verify_cache_timestamp=True):
     """
     Return nicely formatted field name (of type 'ln', 'long name') for
     field F in language LN.
 
     If VERIFY_CACHE_TIMESTAMP is set to True, then verify DB timestamp
     and field I18N name cache timestamp and refresh cache from the DB
     if needed.  Otherwise don't bother checking DB timestamp and
     return the cached value.  (This is useful when get_field_i18nname
     is called inside a loop.)
     """
     if verify_cache_timestamp:
         field_i18nname_cache.recreate_cache_if_needed()
     out = f
     try:
         out = field_i18nname_cache.cache[f][ln]
     except KeyError:
         pass # translation in LN does not exist
     return out
 
 def get_alphabetically_ordered_collection_list(level=0, ln=CFG_SITE_LANG):
     """Returns nicely ordered (score respected) list of collections, more exactly list of tuples
        (collection name, printable collection name).
        Suitable for create_search_box()."""
     out = []
     res = run_sql("SELECT name FROM collection ORDER BY name ASC")
     for c_name in res:
         c_name = c_name[0]
         # make a nice printable name (e.g. truncate c_printable for
         # long collection names in given language):
         c_printable_fullname = get_coll_i18nname(c_name, ln, False)
         c_printable = wash_index_term(c_printable_fullname, 30, False)
         if c_printable != c_printable_fullname:
             c_printable = c_printable + "..."
         if level:
             c_printable = " " + level * '-' + " " + c_printable
         out.append([c_name, c_printable])
     return out
 
 def get_nicely_ordered_collection_list(collid=1, level=0, ln=CFG_SITE_LANG):
     """Returns nicely ordered (score respected) list of collections, more exactly list of tuples
        (collection name, printable collection name).
        Suitable for create_search_box()."""
     colls_nicely_ordered = []
     res = run_sql("""SELECT c.name,cc.id_son FROM collection_collection AS cc, collection AS c
                      WHERE c.id=cc.id_son AND cc.id_dad=%s ORDER BY score DESC""", (collid, ))
     for c, cid in res:
         # make a nice printable name (e.g. truncate c_printable for
         # long collection names in given language):
         c_printable_fullname = get_coll_i18nname(c, ln, False)
         c_printable = wash_index_term(c_printable_fullname, 30, False)
         if c_printable != c_printable_fullname:
             c_printable = c_printable + "..."
         if level:
             c_printable = " " + level * '-' + " " + c_printable
         colls_nicely_ordered.append([c, c_printable])
         colls_nicely_ordered  = colls_nicely_ordered + get_nicely_ordered_collection_list(cid, level+1, ln=ln)
     return colls_nicely_ordered
 
 def get_index_id_from_field(field):
     """
     Return index id with name corresponding to FIELD, or the first
     index id where the logical field code named FIELD is indexed.
 
     Return zero in case there is no index defined for this field.
 
     Example: field='author', output=4.
     """
     out = 0
     if not field:
         field = 'global' # empty string field means 'global' index (field 'anyfield')
 
     # first look in the index table:
     res = run_sql("""SELECT id FROM idxINDEX WHERE name=%s""", (field,))
     if res:
         out = res[0][0]
         return out
 
     # not found in the index table, now look in the field table:
     res = run_sql("""SELECT w.id FROM idxINDEX AS w, idxINDEX_field AS wf, field AS f
                       WHERE f.code=%s AND wf.id_field=f.id AND w.id=wf.id_idxINDEX
                       LIMIT 1""", (field,))
     if res:
         out = res[0][0]
     return out
 
 def get_words_from_pattern(pattern):
     """
     Returns list of whitespace-separated words from pattern, removing any
     trailing punctuation-like signs from words in pattern.
     """
     words = {}
     # clean trailing punctuation signs inside pattern
     pattern = re_punctuation_followed_by_space.sub(' ', pattern)
     for word in string.split(pattern):
         if not words.has_key(word):
             words[word] = 1
     return words.keys()
 
 def create_basic_search_units(req, p, f, m=None, of='hb'):
     """Splits search pattern and search field into a list of independently searchable units.
        - A search unit consists of '(operator, pattern, field, type, hitset)' tuples where
           'operator' is set union (|), set intersection (+) or set exclusion (-);
           'pattern' is either a word (e.g. muon*) or a phrase (e.g. 'nuclear physics');
           'field' is either a code like 'title' or MARC tag like '100__a';
           'type' is the search type ('w' for word file search, 'a' for access file search).
         - Optionally, the function accepts the match type argument 'm'.
           If it is set (e.g. from advanced search interface), then it
           performs this kind of matching.  If it is not set, then a guess is made.
           'm' can have values: 'a'='all of the words', 'o'='any of the words',
                                'p'='phrase/substring', 'r'='regular expression',
                                'e'='exact value'.
         - Warnings are printed on req (when not None) in case of HTML output formats."""
 
     opfts = [] # will hold (o,p,f,t,h) units
 
     # FIXME: quick hack for the journal index
     if f == 'journal':
         opfts.append(['+', p, f, 'w'])
         return opfts
 
     ## check arguments: is desired matching type set?
     if m:
         ## A - matching type is known; good!
         if m == 'e':
             # A1 - exact value:
             opfts.append(['+', p, f, 'a']) # '+' since we have only one unit
         elif m == 'p':
             # A2 - phrase/substring:
             opfts.append(['+', "%" + p + "%", f, 'a']) # '+' since we have only one unit
         elif m == 'r':
             # A3 - regular expression:
             opfts.append(['+', p, f, 'r']) # '+' since we have only one unit
         elif m == 'a' or m == 'w':
             # A4 - all of the words:
             p = strip_accents(p) # strip accents for 'w' mode, FIXME: delete when not needed
             for word in get_words_from_pattern(p):
                 opfts.append(['+', word, f, 'w']) # '+' in all units
         elif m == 'o':
             # A5 - any of the words:
             p = strip_accents(p) # strip accents for 'w' mode, FIXME: delete when not needed
             for word in get_words_from_pattern(p):
                 if len(opfts)==0:
                     opfts.append(['+', word, f, 'w']) # '+' in the first unit
                 else:
                     opfts.append(['|', word, f, 'w']) # '|' in further units
         else:
             if of.startswith("h"):
                 write_warning("Matching type '%s' is not implemented yet." % cgi.escape(m), "Warning", req=req)
             opfts.append(['+', "%" + p + "%", f, 'w'])
     else:
         ## B - matching type is not known: let us try to determine it by some heuristics
         if f and p[0] == '"' and p[-1] == '"':
             ## B0 - does 'p' start and end by double quote, and is 'f' defined? => doing ACC search
             opfts.append(['+', p[1:-1], f, 'a'])
         elif f in ('author', 'firstauthor', 'exactauthor', 'exactfirstauthor', 'authorityauthor') and author_name_requires_phrase_search(p):
             ## B1 - do we search in author, and does 'p' contain space/comma/dot/etc?
             ## => doing washed ACC search
             opfts.append(['+', p, f, 'a'])
         elif f and p[0] == "'" and p[-1] == "'":
             ## B0bis - does 'p' start and end by single quote, and is 'f' defined? => doing ACC search
             opfts.append(['+', '%' + p[1:-1] + '%', f, 'a'])
         elif f and p[0] == "/" and p[-1] == "/":
             ## B0ter - does 'p' start and end by a slash, and is 'f' defined? => doing regexp search
             opfts.append(['+', p[1:-1], f, 'r'])
         elif f and string.find(p, ',') >= 0:
             ## B1 - does 'p' contain comma, and is 'f' defined? => doing ACC search
             opfts.append(['+', p, f, 'a'])
         elif f and str(f[0:2]).isdigit():
             ## B2 - does 'f' exist and starts by two digits?  => doing ACC search
             opfts.append(['+', p, f, 'a'])
         else:
             ## B3 - doing WRD search, but maybe ACC too
             # search units are separated by spaces unless the space is within single or double quotes
             # so, let us replace temporarily any space within quotes by '__SPACE__'
             p = re_pattern_single_quotes.sub(lambda x: "'"+string.replace(x.group(1), ' ', '__SPACE__')+"'", p)
             p = re_pattern_double_quotes.sub(lambda x: "\""+string.replace(x.group(1), ' ', '__SPACE__')+"\"", p)
             p = re_pattern_regexp_quotes.sub(lambda x: "/"+string.replace(x.group(1), ' ', '__SPACE__')+"/", p)
             # and spaces after colon as well:
             p = re_pattern_spaces_after_colon.sub(lambda x: string.replace(x.group(1), ' ', '__SPACE__'), p)
             # wash argument:
             p = re_logical_and.sub(" ", p)
             p = re_logical_or.sub(" |", p)
             p = re_logical_not.sub(" -", p)
             p = re_operators.sub(r' \1', p)
             for pi in string.split(p): # iterate through separated units (or items, as "pi" stands for "p item")
                 pi = re_pattern_space.sub(" ", pi) # replace back '__SPACE__' by ' '
                 # firstly, determine set operator
                 if pi[0] == '+' or pi[0] == '-' or pi[0] == '|':
                     oi = pi[0]
                     pi = pi[1:]
                 else:
                     # okay, there is no operator, so let us decide what to do by default
                     oi = '+' # by default we are doing set intersection...
                 # secondly, determine search pattern and field:
                 if string.find(pi, ":") > 0:
                     fi, pi = string.split(pi, ":", 1)
                     fi = wash_field(fi)
                     # test whether fi is a real index code or a MARC-tag defined code:
                     if fi in get_fieldcodes() or '00' <= fi[:2] <= '99':
                         pass
                     else:
                         # it is not, so join it back:
                         fi, pi = f, fi + ":" + pi
                 else:
                     fi, pi = f, pi
                 # wash 'fi' argument:
                 fi = wash_field(fi)
                 # wash 'pi' argument:
                 pi = pi.strip() # strip eventual spaces
                 if re_quotes.match(pi):
                     # B3a - quotes are found => do ACC search (phrase search)
                     if pi[0] == '"' and pi[-1] == '"':
                         pi = string.replace(pi, '"', '') # remove quote signs
                         opfts.append([oi, pi, fi, 'a'])
                     elif pi[0] == "'" and pi[-1] == "'":
                         pi = string.replace(pi, "'", "") # remove quote signs
                         opfts.append([oi, "%" + pi + "%", fi, 'a'])
                     else: # unbalanced quotes, so fall back to WRD query:
                         opfts.append([oi, pi, fi, 'w'])
                 elif pi.startswith('/') and pi.endswith('/'):
                     # B3b - pi has slashes around => do regexp search
                     opfts.append([oi, pi[1:-1], fi, 'r'])
                 elif fi and len(fi) > 1 and str(fi[0]).isdigit() and str(fi[1]).isdigit():
                     # B3c - fi exists and starts by two digits => do ACC search
                     opfts.append([oi, pi, fi, 'a'])
                 elif fi and not get_index_id_from_field(fi) and get_field_name(fi):
                     # B3d - logical field fi exists but there is no WRD index for fi => try ACC search
                     opfts.append([oi, pi, fi, 'a'])
                 else:
                     # B3e - general case => do WRD search
                     pi = strip_accents(pi) # strip accents for 'w' mode, FIXME: delete when not needed
                     for pii in get_words_from_pattern(pi):
                         opfts.append([oi, pii, fi, 'w'])
     ## sanity check:
     for i in range(0, len(opfts)):
         try:
             pi = opfts[i][1]
             if pi == '*':
                 if of.startswith("h"):
                     write_warning("Ignoring standalone wildcard word.", "Warning", req=req)
                 del opfts[i]
             if pi == '' or pi == ' ':
                 fi = opfts[i][2]
                 if fi:
                     if of.startswith("h"):
                         write_warning("Ignoring empty <em>%s</em> search term." % fi, "Warning", req=req)
                 del opfts[i]
         except:
             pass
 
     ## replace old logical field names if applicable:
     if CFG_WEBSEARCH_FIELDS_CONVERT:
         opfts = [[o, p, wash_field(f), t] for o, p, f, t in opfts]
 
     ## return search units:
     return opfts
 
 def page_start(req, of, cc, aas, ln, uid, title_message=None,
                description='', keywords='', recID=-1, tab='', p='', em=''):
     """
     Start page according to given output format.
 
     @param title_message: title of the page, not escaped for HTML
     @param description: description of the page, not escaped for HTML
     @param keywords: keywords of the page, not escaped for HTML
     """
     _ = gettext_set_language(ln)
     if not req or isinstance(req, cStringIO.OutputType):
         return # we were called from CLI
 
     if not title_message:
         title_message = _("Search Results")
 
     content_type = get_output_format_content_type(of)
 
     if of.startswith('x'):
         if of == 'xr':
             # we are doing RSS output
             req.content_type = "application/rss+xml"
             req.send_http_header()
             req.write("""<?xml version="1.0" encoding="UTF-8"?>\n""")
         else:
             # we are doing XML output:
             req.content_type = get_output_format_content_type(of, 'text/xml')
             req.send_http_header()
             req.write("""<?xml version="1.0" encoding="UTF-8"?>\n""")
     elif of.startswith('t') or str(of[0:3]).isdigit():
         # we are doing plain text output:
         req.content_type = "text/plain"
         req.send_http_header()
     elif of == "intbitset":
         req.content_type = "application/octet-stream"
         req.send_http_header()
     elif of == "id":
         pass # nothing to do, we shall only return list of recIDs
     elif content_type == 'text/html':
         # we are doing HTML output:
         req.content_type = "text/html"
         req.send_http_header()
 
         if not description:
             description = "%s %s." % (cc, _("Search Results"))
 
         if not keywords:
             keywords = "%s, WebSearch, %s" % (get_coll_i18nname(CFG_SITE_NAME, ln, False), get_coll_i18nname(cc, ln, False))
 
         ## generate RSS URL:
         argd = {}
         if req.args:
             argd = cgi.parse_qs(req.args)
         rssurl = websearch_templates.build_rss_url(argd)
 
         ## add MathJax if displaying single records (FIXME: find
         ## eventual better place to this code)
         if of.lower() in CFG_WEBSEARCH_USE_MATHJAX_FOR_FORMATS:
             metaheaderadd = get_mathjax_header(req.is_https())
         else:
             metaheaderadd = ''
         # Add metadata in meta tags for Google scholar-esque harvesting...
         # only if we have a detailed meta format and we are looking at a
         # single record
         if (recID != -1 and CFG_WEBSEARCH_DETAILED_META_FORMAT):
             metaheaderadd += format_record(recID, \
                                            CFG_WEBSEARCH_DETAILED_META_FORMAT, \
                                            ln = ln)
 
         ## generate navtrail:
         navtrail = create_navtrail_links(cc, aas, ln)
         if navtrail != '':
             navtrail += ' &gt; '
         if (tab != '' or ((of != '' or of.lower() != 'hd') and of != 'hb')) and \
                recID != -1:
             # If we are not in information tab in HD format, customize
             # the nav. trail to have a link back to main record. (Due
             # to the way perform_request_search() works, hb
             # (lowercase) is equal to hd)
             navtrail += ' <a class="navtrail" href="%s/%s/%s">%s</a>' % \
                             (CFG_SITE_URL, CFG_SITE_RECORD, recID, cgi.escape(title_message))
             if (of != '' or of.lower() != 'hd') and of != 'hb':
                 # Export
                 format_name = of
                 query = "SELECT name FROM format WHERE code=%s"
                 res = run_sql(query, (of,))
                 if res:
                     format_name = res[0][0]
                 navtrail += ' &gt; ' + format_name
             else:
                 # Discussion, citations, etc. tabs
                 tab_label = get_detailed_page_tabs(cc, ln=ln)[tab]['label']
                 navtrail += ' &gt; ' + _(tab_label)
         else:
             navtrail += cgi.escape(title_message)
 
         if p:
             # we are serving search/browse results pages, so insert pattern:
             navtrail += ": " + cgi.escape(p)
             title_message = p + " - " + title_message
 
         body_css_classes = []
         if cc:
             # we know the collection, lets allow page styles based on cc
 
             #collection names may not satisfy rules for css classes which
             #are something like:  -?[_a-zA-Z]+[_a-zA-Z0-9-]*
             #however it isn't clear what we should do about cases with
             #numbers, so we leave them to fail.  Everything else becomes "_"
 
             css = nmtoken_from_string(cc).replace('.','_').replace('-','_').replace(':','_')
             body_css_classes.append(css)
 
         ## finally, print page header:
         if em == '' or EM_REPOSITORY["header"] in em:
             req.write(pageheaderonly(req=req, title=title_message,
                                  navtrail=navtrail,
                                  description=description,
                                  keywords=keywords,
                                  metaheaderadd=metaheaderadd,
                                  uid=uid,
                                  language=ln,
                                  navmenuid='search',
                                  navtrail_append_title_p=0,
                                  rssurl=rssurl,
                                  body_css_classes=body_css_classes))
         req.write(websearch_templates.tmpl_search_pagestart(ln=ln))
     else:
         req.content_type = content_type
         req.send_http_header()
 
 def page_end(req, of="hb", ln=CFG_SITE_LANG, em=""):
     "End page according to given output format: e.g. close XML tags, add HTML footer, etc."
     if of == "id":
         return [] # empty recID list
     if of == "intbitset":
         return intbitset()
     if not req:
         return # we were called from CLI
     if of.startswith('h'):
         req.write(websearch_templates.tmpl_search_pageend(ln = ln)) # pagebody end
         if em == "" or EM_REPOSITORY["footer"] in em:
             req.write(pagefooteronly(lastupdated=__lastupdated__, language=ln, req=req))
     return
 
 def create_page_title_search_pattern_info(p, p1, p2, p3):
     """Create the search pattern bit for the page <title> web page
     HTML header.  Basically combine p and (p1,p2,p3) together so that
     the page header may be filled whether we are in the Simple Search
     or Advanced Search interface contexts."""
     out = ""
     if p:
         out = p
     else:
         out = p1
         if p2:
             out += ' ' + p2
         if p3:
             out += ' ' + p3
     return out
 
 def create_inputdate_box(name="d1", selected_year=0, selected_month=0, selected_day=0, ln=CFG_SITE_LANG):
     "Produces 'From Date', 'Until Date' kind of selection box.  Suitable for search options."
 
     _ = gettext_set_language(ln)
 
     box = ""
     # day
     box += """<select name="%sd">""" % name
     box += """<option value="">%s""" % _("any day")
     for day in range(1, 32):
         box += """<option value="%02d"%s>%02d""" % (day, is_selected(day, selected_day), day)
     box += """</select>"""
     # month
     box += """<select name="%sm">""" % name
     box += """<option value="">%s""" % _("any month")
     # trailing space in May distinguishes short/long form of the month name
     for mm, month in [(1, _("January")), (2, _("February")), (3, _("March")), (4, _("April")), \
                       (5, _("May ")), (6, _("June")), (7, _("July")), (8, _("August")), \
                       (9, _("September")), (10, _("October")), (11, _("November")), (12, _("December"))]:
         box += """<option value="%02d"%s>%s""" % (mm, is_selected(mm, selected_month), month.strip())
     box += """</select>"""
     # year
     box += """<select name="%sy">""" % name
     box += """<option value="">%s""" % _("any year")
     this_year = int(time.strftime("%Y", time.localtime()))
     for year in range(this_year-20, this_year+1):
         box += """<option value="%d"%s>%d""" % (year, is_selected(year, selected_year), year)
     box += """</select>"""
     return box
 
 def create_search_box(cc, colls, p, f, rg, sf, so, sp, rm, of, ot, aas,
                       ln, p1, f1, m1, op1, p2, f2, m2, op2, p3, f3,
                       m3, sc, pl, d1y, d1m, d1d, d2y, d2m, d2d, dt, jrec, ec,
                       action="", em=""):
 
     """Create search box for 'search again in the results page' functionality."""
     if em != "" and EM_REPOSITORY["search_box"] not in em:
         if EM_REPOSITORY["body"] in em and cc != CFG_SITE_NAME:
             return '''
             <h1 class="headline">%(ccname)s</h1>''' % {'ccname' : cgi.escape(cc), }
         else:
             return ""
     # load the right message language
     _ = gettext_set_language(ln)
 
     # some computations
     cc_intl = get_coll_i18nname(cc, ln, False)
     cc_colID = get_colID(cc)
 
     colls_nicely_ordered = []
     if cfg_nicely_ordered_collection_list:
         colls_nicely_ordered = get_nicely_ordered_collection_list(ln=ln)
     else:
         colls_nicely_ordered = get_alphabetically_ordered_collection_list(ln=ln)
 
     colls_nice = []
     for (cx, cx_printable) in colls_nicely_ordered:
         if not cx.startswith("Unnamed collection"):
             colls_nice.append({ 'value' : cx,
                                 'text' : cx_printable
                               })
 
     coll_selects = []
     if colls and colls[0] != CFG_SITE_NAME:
         # some collections are defined, so print these first, and only then print 'add another collection' heading:
         for c in colls:
             if c:
                 temp = []
                 temp.append({ 'value' : CFG_SITE_NAME,
                               'text' : '*** %s ***' % _("any public collection")
                             })
                 # this field is used to remove the current collection from the ones to be searched.
                 temp.append({ 'value' : '',
                               'text' : '*** %s ***' % _("remove this collection")
                             })
                 for val in colls_nice:
                     # print collection:
                     if not cx.startswith("Unnamed collection"):
                         temp.append({ 'value' : val['value'],
                                       'text' : val['text'],
                                       'selected' : (c == re.sub("^[\s\-]*","", val['value']))
                                     })
                 coll_selects.append(temp)
         coll_selects.append([{ 'value' : '',
                                'text' : '*** %s ***' % _("add another collection")
                              }] + colls_nice)
     else: # we searched in CFG_SITE_NAME, so print 'any public collection' heading
         coll_selects.append([{ 'value' : CFG_SITE_NAME,
                                'text' : '*** %s ***' % _("any public collection")
                              }] + colls_nice)
 
     ## ranking methods
     ranks = [{
                'value' : '',
                'text' : "- %s %s -" % (_("OR").lower (), _("rank by")),
              }]
     for (code, name) in get_bibrank_methods(cc_colID, ln):
         # propose found rank methods:
         ranks.append({
                        'value' : code,
                        'text' : name,
                      })
 
     formats = get_available_output_formats(visible_only=True)
 
     # show collections in the search box? (not if there is only one
     # collection defined, and not if we are in light search)
     show_colls = True
     show_title = True
     if len(collection_reclist_cache.cache.keys()) == 1 or \
            aas == -1:
         show_colls = False
         show_title = False
 
     if cc == CFG_SITE_NAME:
         show_title = False
 
     if CFG_INSPIRE_SITE:
         show_title = False
 
     return websearch_templates.tmpl_search_box(
              ln = ln,
              aas = aas,
              cc_intl = cc_intl,
              cc = cc,
              ot = ot,
              sp = sp,
              action = action,
              fieldslist = get_searchwithin_fields(ln=ln, colID=cc_colID),
              f1 = f1,
              f2 = f2,
              f3 = f3,
              m1 = m1,
              m2 = m2,
              m3 = m3,
              p1 = p1,
              p2 = p2,
              p3 = p3,
              op1 = op1,
              op2 = op2,
              rm = rm,
              p = p,
              f = f,
              coll_selects = coll_selects,
              d1y = d1y, d2y = d2y, d1m = d1m, d2m = d2m, d1d = d1d, d2d = d2d,
              dt = dt,
              sort_fields = get_sortby_fields(ln=ln, colID=cc_colID),
              sf = sf,
              so = so,
              ranks = ranks,
              sc = sc,
              rg = rg,
              formats = formats,
              of = of,
              pl = pl,
              jrec = jrec,
              ec = ec,
              show_colls = show_colls,
              show_title = show_title and (em=="" or EM_REPOSITORY["body"] in em)
            )
 
 
 def create_exact_author_browse_help_link(p=None, p1=None, p2=None, p3=None, f=None, f1=None, f2=None, f3=None,
                                   rm=None, cc=None, ln=None, jrec=None, rg=None, aas=0, action=""):
     """Creates a link to help switch from author to exact author while browsing"""
     if action == 'browse':
         search_fields = (f, f1, f2, f3)
         if ('author' in search_fields) or ('firstauthor' in search_fields):
             def add_exact(field):
                 if field == 'author' or field == 'firstauthor':
                     return 'exact' + field
                 return field
             (fe, f1e, f2e, f3e) = map(add_exact, search_fields)
             link_name = f or f1
             link_name = (link_name == 'firstauthor' and 'exact first author') or 'exact author'
             return websearch_templates.tmpl_exact_author_browse_help_link(p=p, p1=p1, p2=p2, p3=p3, f=fe, f1=f1e, f2=f2e, f3=f3e,
                                                                    rm=rm, cc=cc, ln=ln, jrec=jrec, rg=rg, aas=aas, action=action,
                                                                    link_name=link_name)
     return ""
 
 
 def create_navtrail_links(cc=CFG_SITE_NAME, aas=0, ln=CFG_SITE_LANG, self_p=1, tab=''):
     """Creates navigation trail links, i.e. links to collection
     ancestors (except Home collection).  If aas==1, then links to
     Advanced Search interfaces; otherwise Simple Search.
     """
 
     dads = []
     for dad in get_coll_ancestors(cc):
         if dad != CFG_SITE_NAME: # exclude Home collection
             dads.append ((dad, get_coll_i18nname(dad, ln, False)))
 
     if self_p and cc != CFG_SITE_NAME:
         dads.append((cc, get_coll_i18nname(cc, ln, False)))
 
     return websearch_templates.tmpl_navtrail_links(
         aas=aas, ln=ln, dads=dads)
 
 def get_searchwithin_fields(ln='en', colID=None):
     """Retrieves the fields name used in the 'search within' selection box for the collection ID colID."""
     res = None
     if colID:
         res = run_sql("""SELECT f.code,f.name FROM field AS f, collection_field_fieldvalue AS cff
                                  WHERE cff.type='sew' AND cff.id_collection=%s AND cff.id_field=f.id
                               ORDER BY cff.score DESC, f.name ASC""", (colID,))
     if not res:
         res = run_sql("SELECT code,name FROM field ORDER BY name ASC")
     fields = [{
                 'value' : '',
                 'text' : get_field_i18nname("any field", ln, False)
               }]
     for field_code, field_name in res:
         if field_code and field_code != "anyfield":
             fields.append({ 'value' : field_code,
                             'text' : get_field_i18nname(field_name, ln, False)
                           })
     return fields
 
 def get_sortby_fields(ln='en', colID=None):
     """Retrieves the fields name used in the 'sort by' selection box for the collection ID colID."""
     _ = gettext_set_language(ln)
     res = None
     if colID:
         res = run_sql("""SELECT DISTINCT(f.code),f.name FROM field AS f, collection_field_fieldvalue AS cff
                                  WHERE cff.type='soo' AND cff.id_collection=%s AND cff.id_field=f.id
                               ORDER BY cff.score DESC, f.name ASC""", (colID,))
     if not res:
         # no sort fields defined for this colID, try to take Home collection:
         res = run_sql("""SELECT DISTINCT(f.code),f.name FROM field AS f, collection_field_fieldvalue AS cff
                                  WHERE cff.type='soo' AND cff.id_collection=%s AND cff.id_field=f.id
                                  ORDER BY cff.score DESC, f.name ASC""", (1,))
     if not res:
         # no sort fields defined for the Home collection, take all sort fields defined wherever they are:
         res = run_sql("""SELECT DISTINCT(f.code),f.name FROM field AS f, collection_field_fieldvalue AS cff
                                  WHERE cff.type='soo' AND cff.id_field=f.id
                                  ORDER BY cff.score DESC, f.name ASC""",)
     fields = [{
                 'value' : '',
                 'text' : _("latest first")
               }]
     for field_code, field_name in res:
         if field_code and field_code != "anyfield":
             fields.append({ 'value' : field_code,
                             'text' : get_field_i18nname(field_name, ln, False)
                           })
     return fields
 
 def create_andornot_box(name='op', value='', ln='en'):
     "Returns HTML code for the AND/OR/NOT selection box."
 
     _ = gettext_set_language(ln)
 
     out = """
     <select name="%s">
     <option value="a"%s>%s
     <option value="o"%s>%s
     <option value="n"%s>%s
     </select>
     """ % (name,
            is_selected('a', value), _("AND"),
            is_selected('o', value), _("OR"),
            is_selected('n', value), _("AND NOT"))
 
     return out
 
 def create_matchtype_box(name='m', value='', ln='en'):
     "Returns HTML code for the 'match type' selection box."
 
     _ = gettext_set_language(ln)
 
     out = """
     <select name="%s">
     <option value="a"%s>%s
     <option value="o"%s>%s
     <option value="e"%s>%s
     <option value="p"%s>%s
     <option value="r"%s>%s
     </select>
     """ % (name,
            is_selected('a', value), _("All of the words:"),
            is_selected('o', value), _("Any of the words:"),
            is_selected('e', value), _("Exact phrase:"),
            is_selected('p', value), _("Partial phrase:"),
            is_selected('r', value), _("Regular expression:"))
     return out
 
 def is_selected(var, fld):
     "Checks if the two are equal, and if yes, returns ' selected'.  Useful for select boxes."
     if type(var) is int and type(fld) is int:
         if var == fld:
             return " selected"
     elif str(var) == str(fld):
         return " selected"
     elif fld and len(fld)==3 and fld[0] == "w" and var == fld[1:]:
         return " selected"
     return ""
 
 def wash_colls(cc, c, split_colls=0, verbose=0):
     """Wash collection list by checking whether user has deselected
     anything under 'Narrow search'.  Checks also if cc is a list or not.
        Return list of cc, colls_to_display, colls_to_search since the list
     of collections to display is different from that to search in.
     This is because users might have chosen 'split by collection'
     functionality.
        The behaviour of "collections to display" depends solely whether
     user has deselected a particular collection: e.g. if it started
     from 'Articles and Preprints' page, and deselected 'Preprints',
     then collection to display is 'Articles'.  If he did not deselect
     anything, then collection to display is 'Articles & Preprints'.
        The behaviour of "collections to search in" depends on the
     'split_colls' parameter:
          * if is equal to 1, then we can wash the colls list down
            and search solely in the collection the user started from;
          * if is equal to 0, then we are splitting to the first level
            of collections, i.e. collections as they appear on the page
            we started to search from;
 
     The function raises exception
     InvenioWebSearchUnknownCollectionError
     if cc or one of c collections is not known.
     """
 
     colls_out = []
     colls_out_for_display = []
     # list to hold the hosted collections to be searched and displayed
     hosted_colls_out = []
     debug = ""
 
     if verbose:
         debug += "<br />"
         debug += "<br />1) --- initial parameters ---"
         debug += "<br />cc : %s" % cc
         debug += "<br />c : %s" % c
         debug += "<br />"
 
     # check what type is 'cc':
     if type(cc) is list:
         for ci in cc:
             if collection_reclist_cache.cache.has_key(ci):
                 # yes this collection is real, so use it:
                 cc = ci
                 break
     else:
         # check once if cc is real:
         if not collection_reclist_cache.cache.has_key(cc):
             if cc:
                 raise InvenioWebSearchUnknownCollectionError(cc)
             else:
                 cc = CFG_SITE_NAME # cc is not set, so replace it with Home collection
 
     # check type of 'c' argument:
     if type(c) is list:
         colls = c
     else:
         colls = [c]
 
     if verbose:
         debug += "<br />2) --- after check for the integrity of cc and the being or not c a list ---"
         debug += "<br />cc : %s" % cc
         debug += "<br />c : %s" % c
         debug += "<br />"
 
     # remove all 'unreal' collections:
     colls_real = []
     for coll in colls:
         if collection_reclist_cache.cache.has_key(coll):
             colls_real.append(coll)
         else:
             if coll:
                 raise InvenioWebSearchUnknownCollectionError(coll)
     colls = colls_real
 
     if verbose:
         debug += "<br />3) --- keeping only the real colls of c ---"
         debug += "<br />colls : %s" % colls
         debug += "<br />"
 
     # check if some real collections remain:
     if len(colls)==0:
         colls = [cc]
 
     if verbose:
         debug += "<br />4) --- in case no colls were left we use cc directly ---"
         debug += "<br />colls : %s" % colls
         debug += "<br />"
 
     # then let us check the list of non-restricted "real" sons of 'cc' and compare it to 'coll':
     res = run_sql("""SELECT c.name FROM collection AS c,
                                         collection_collection AS cc,
                                         collection AS ccc
                      WHERE c.id=cc.id_son AND cc.id_dad=ccc.id
                        AND ccc.name=%s AND cc.type='r'""", (cc,))
 
     # list that holds all the non restricted sons of cc that are also not hosted collections
     l_cc_nonrestricted_sons_and_nonhosted_colls = []
     res_hosted = run_sql("""SELECT c.name FROM collection AS c,
                          collection_collection AS cc,
                          collection AS ccc
                          WHERE c.id=cc.id_son AND cc.id_dad=ccc.id
                          AND ccc.name=%s AND cc.type='r'
                          AND (c.dbquery NOT LIKE 'hostedcollection:%%' OR c.dbquery IS NULL)""", (cc,))
     for row_hosted in res_hosted:
         l_cc_nonrestricted_sons_and_nonhosted_colls.append(row_hosted[0])
     l_cc_nonrestricted_sons_and_nonhosted_colls.sort()
 
     l_cc_nonrestricted_sons = []
     l_c = colls[:]
     for row in res:
         if not collection_restricted_p(row[0]):
             l_cc_nonrestricted_sons.append(row[0])
     l_c.sort()
     l_cc_nonrestricted_sons.sort()
     if l_cc_nonrestricted_sons == l_c:
         colls_out_for_display = [cc] # yep, washing permitted, it is sufficient to display 'cc'
     # the following elif is a hack that preserves the above funcionality when we start searching from
     # the frontpage with some hosted collections deselected (either by default or manually)
     elif set(l_cc_nonrestricted_sons_and_nonhosted_colls).issubset(set(l_c)):
         colls_out_for_display = colls
         split_colls = 0
     else:
         colls_out_for_display = colls # nope, we need to display all 'colls' successively
 
     # remove duplicates:
     #colls_out_for_display_nondups=filter(lambda x, colls_out_for_display=colls_out_for_display: colls_out_for_display[x-1] not in colls_out_for_display[x:], range(1, len(colls_out_for_display)+1))
     #colls_out_for_display = map(lambda x, colls_out_for_display=colls_out_for_display:colls_out_for_display[x-1], colls_out_for_display_nondups)
     #colls_out_for_display = list(set(colls_out_for_display))
     #remove duplicates while preserving the order
     set_out = set()
     colls_out_for_display = [coll for coll in colls_out_for_display if coll not in set_out and not set_out.add(coll)]
 
     if verbose:
         debug += "<br />5) --- decide whether colls_out_for_diplay should be colls or is it sufficient for it to be cc; remove duplicates ---"
         debug += "<br />colls_out_for_display : %s" % colls_out_for_display
         debug += "<br />"
 
     # FIXME: The below quoted part of the code has been commented out
     # because it prevents searching in individual restricted daughter
     # collections when both parent and all its public daughter
     # collections were asked for, in addition to some restricted
     # daughter collections.  The removal was introduced for hosted
     # collections, so we may want to double check in this context.
 
     # the following piece of code takes care of removing collections whose ancestors are going to be searched anyway
     # list to hold the collections to be removed
     #colls_to_be_removed = []
     # first calculate the collections that can safely be removed
     #for coll in colls_out_for_display:
     #    for ancestor in get_coll_ancestors(coll):
     #        #if ancestor in colls_out_for_display: colls_to_be_removed.append(coll)
     #        if ancestor in colls_out_for_display and not is_hosted_collection(coll): colls_to_be_removed.append(coll)
     # secondly remove the collections
     #for coll in colls_to_be_removed:
     #    colls_out_for_display.remove(coll)
 
     if verbose:
         debug += "<br />6) --- remove collections that have ancestors about to be search, unless they are hosted ---"
         debug += "<br />colls_out_for_display : %s" % colls_out_for_display
         debug += "<br />"
 
     # calculate the hosted collections to be searched.
     if colls_out_for_display == [cc]:
         if is_hosted_collection(cc):
             hosted_colls_out.append(cc)
         else:
             for coll in get_coll_sons(cc):
                 if is_hosted_collection(coll):
                     hosted_colls_out.append(coll)
     else:
         for coll in colls_out_for_display:
             if is_hosted_collection(coll):
                 hosted_colls_out.append(coll)
 
     if verbose:
         debug += "<br />7) --- calculate the hosted_colls_out ---"
         debug += "<br />hosted_colls_out : %s" % hosted_colls_out
         debug += "<br />"
 
     # second, let us decide on collection splitting:
     if split_colls == 0:
         # type A - no sons are wanted
         colls_out = colls_out_for_display
     else:
         # type B - sons (first-level descendants) are wanted
         for coll in colls_out_for_display:
             coll_sons = get_coll_sons(coll)
             if coll_sons == []:
                 colls_out.append(coll)
             else:
                 for coll_son in coll_sons:
                     if not is_hosted_collection(coll_son):
                         colls_out.append(coll_son)
             #else:
             #    colls_out = colls_out + coll_sons
 
     # remove duplicates:
     #colls_out_nondups=filter(lambda x, colls_out=colls_out: colls_out[x-1] not in colls_out[x:], range(1, len(colls_out)+1))
     #colls_out = map(lambda x, colls_out=colls_out:colls_out[x-1], colls_out_nondups)
     #colls_out = list(set(colls_out))
     #remove duplicates while preserving the order
     set_out = set()
     colls_out = [coll for coll in colls_out if coll not in set_out and not set_out.add(coll)]
 
 
     if verbose:
         debug += "<br />8) --- calculate the colls_out; remove duplicates ---"
         debug += "<br />colls_out : %s" % colls_out
         debug += "<br />"
 
     # remove the hosted collections from the collections to be searched
     if hosted_colls_out:
         for coll in hosted_colls_out:
             try:
                 colls_out.remove(coll)
             except ValueError:
                 # in case coll was not found in colls_out
                 pass
 
     if verbose:
         debug += "<br />9) --- remove the hosted_colls from the colls_out ---"
         debug += "<br />colls_out : %s" % colls_out
 
     return (cc, colls_out_for_display, colls_out, hosted_colls_out, debug)
 
 def get_synonym_terms(term, kbr_name, match_type, use_memoise=False):
     """
     Return list of synonyms for TERM by looking in KBR_NAME in
     MATCH_TYPE style.
 
     @param term: search-time term or index-time term
     @type term: str
     @param kbr_name: knowledge base name
     @type kbr_name: str
     @param match_type: specifies how the term matches against the KBR
         before doing the lookup.  Could be `exact' (default),
         'leading_to_comma', `leading_to_number'.
     @type match_type: str
     @param use_memoise: can we memoise while doing lookups?
     @type use_memoise: bool
     @return: list of term synonyms
     @rtype: list of strings
     """
     dterms = {}
     ## exact match is default:
     term_for_lookup = term
     term_remainder = ''
     ## but maybe match different term:
     if match_type == CFG_BIBINDEX_SYNONYM_MATCH_TYPE['leading_to_comma']:
         mmm = re.match(r'^(.*?)(\s*,.*)$', term)
         if mmm:
             term_for_lookup = mmm.group(1)
             term_remainder = mmm.group(2)
     elif match_type == CFG_BIBINDEX_SYNONYM_MATCH_TYPE['leading_to_number']:
         mmm = re.match(r'^(.*?)(\s*\d.*)$', term)
         if mmm:
             term_for_lookup = mmm.group(1)
             term_remainder = mmm.group(2)
     ## FIXME: workaround: escaping SQL wild-card signs, since KBR's
     ## exact search is doing LIKE query, so would match everything:
     term_for_lookup = term_for_lookup.replace('%', '\%')
     ## OK, now find synonyms:
     for kbr_values in get_kbr_values(kbr_name,
                                      searchkey=term_for_lookup,
                                      searchtype='e',
                                      use_memoise=use_memoise):
         for kbr_value in kbr_values:
             dterms[kbr_value + term_remainder] = 1
     ## return list of term synonyms:
     return dterms.keys()
 
 
 def wash_output_format(format):
     """Wash output format FORMAT.  Currently only prevents input like
     'of=9' for backwards-compatible format that prints certain fields
     only.  (for this task, 'of=tm' is preferred)"""
     if str(format[0:3]).isdigit() and len(format) != 6:
         # asked to print MARC tags, but not enough digits,
         # so let's switch back to HTML brief default
         return 'hb'
     else:
         return format
 
 def wash_pattern(p):
     """Wash pattern passed by URL. Check for sanity of the wildcard by
     removing wildcards if they are appended to extremely short words
     (1-3 letters).  TODO: instead of this approximative treatment, it
     will be much better to introduce a temporal limit, e.g. to kill a
     query if it does not finish in 10 seconds."""
     # strip accents:
     # p = strip_accents(p) # FIXME: when available, strip accents all the time
     # add leading/trailing whitespace for the two following wildcard-sanity checking regexps:
     p = " " + p + " "
     # replace spaces within quotes by __SPACE__ temporarily:
     p = re_pattern_single_quotes.sub(lambda x: "'"+string.replace(x.group(1), ' ', '__SPACE__')+"'", p)
     p = re_pattern_double_quotes.sub(lambda x: "\""+string.replace(x.group(1), ' ', '__SPACE__')+"\"", p)
     p = re_pattern_regexp_quotes.sub(lambda x: "/"+string.replace(x.group(1), ' ', '__SPACE__')+"/", p)
     # get rid of unquoted wildcards after spaces:
     p = re_pattern_wildcards_after_spaces.sub("\\1", p)
     # get rid of extremely short words (1-3 letters with wildcards):
     #p = re_pattern_short_words.sub("\\1", p)
     # replace back __SPACE__ by spaces:
     p = re_pattern_space.sub(" ", p)
     # replace special terms:
     p = re_pattern_today.sub(time.strftime("%Y-%m-%d", time.localtime()), p)
     # remove unnecessary whitespace:
     p = string.strip(p)
     # remove potentially wrong UTF-8 characters:
     p = wash_for_utf8(p)
     return p
 
 def wash_field(f):
     """Wash field passed by URL."""
     if f:
         # get rid of unnecessary whitespace and make it lowercase
         # (e.g. Author -> author) to better suit iPhone etc input
         # mode:
         f = f.strip().lower()
     # wash legacy 'f' field names, e.g. replace 'wau' or `au' by
     # 'author', if applicable:
     if CFG_WEBSEARCH_FIELDS_CONVERT:
         f = CFG_WEBSEARCH_FIELDS_CONVERT.get(f, f)
     return f
 
 def wash_dates(d1="", d1y=0, d1m=0, d1d=0, d2="", d2y=0, d2m=0, d2d=0):
     """
     Take user-submitted date arguments D1 (full datetime string) or
     (D1Y, D1M, D1Y) year, month, day tuple and D2 or (D2Y, D2M, D2Y)
     and return (YYY1-M1-D2 H1:M1:S2, YYY2-M2-D2 H2:M2:S2) datetime
     strings in the YYYY-MM-DD HH:MM:SS format suitable for time
     restricted searching.
 
     Note that when both D1 and (D1Y, D1M, D1D) parameters are present,
     the precedence goes to D1.  Ditto for D2*.
 
     Note that when (D1Y, D1M, D1D) are taken into account, some values
     may be missing and are completed e.g. to 01 or 12 according to
     whether it is the starting or the ending date.
     """
     datetext1, datetext2 =  "", ""
     # sanity checking:
     if d1 == "" and d1y == 0 and d1m == 0 and d1d == 0 and d2 == "" and d2y == 0 and d2m == 0 and d2d == 0:
         return ("", "") # nothing selected, so return empty values
     # wash first (starting) date:
     if d1:
         # full datetime string takes precedence:
         datetext1 = d1
     else:
         # okay, first date passed as (year,month,day):
         if d1y:
             datetext1 += "%04d" % d1y
         else:
             datetext1 += "0000"
         if d1m:
             datetext1 += "-%02d" % d1m
         else:
             datetext1 += "-01"
         if d1d:
             datetext1 += "-%02d" % d1d
         else:
             datetext1 += "-01"
         datetext1 += " 00:00:00"
     # wash second (ending) date:
     if d2:
         # full datetime string takes precedence:
         datetext2 = d2
     else:
         # okay, second date passed as (year,month,day):
         if d2y:
             datetext2 += "%04d" % d2y
         else:
             datetext2 += "9999"
         if d2m:
             datetext2 += "-%02d" % d2m
         else:
             datetext2 += "-12"
         if d2d:
             datetext2 += "-%02d" % d2d
         else:
             datetext2 += "-31" # NOTE: perhaps we should add max(datenumber) in
                                # given month, but for our quering it's not
                                # needed, 31 will always do
         datetext2 += " 00:00:00"
     # okay, return constructed YYYY-MM-DD HH:MM:SS datetexts:
     return (datetext1, datetext2)
 
 def is_hosted_collection(coll):
     """Check if the given collection is a hosted one; i.e. its dbquery starts with hostedcollection:
     Returns True if it is, False if it's not or if the result is empty or if the query failed"""
 
     res = run_sql("SELECT dbquery FROM collection WHERE name=%s", (coll, ))
     try:
         return res[0][0].startswith("hostedcollection:")
     except:
         return False
 
 def get_colID(c):
     "Return collection ID for collection name C.  Return None if no match found."
     colID = None
     res = run_sql("SELECT id FROM collection WHERE name=%s", (c,), 1)
     if res:
         colID = res[0][0]
     return colID
 
 def get_coll_normalised_name(c):
     """Returns normalised collection name (case sensitive) for collection name
        C (case insensitive).
        Returns None if no match found."""
     try:
         return run_sql("SELECT name FROM collection WHERE name=%s", (c,))[0][0]
     except:
         return None
 
 def get_coll_ancestors(coll):
     "Returns a list of ancestors for collection 'coll'."
     coll_ancestors = []
     coll_ancestor = coll
     while 1:
         res = run_sql("""SELECT c.name FROM collection AS c
                           LEFT JOIN collection_collection AS cc ON c.id=cc.id_dad
                           LEFT JOIN collection AS ccc ON ccc.id=cc.id_son
                           WHERE ccc.name=%s ORDER BY cc.id_dad ASC LIMIT 1""",
                       (coll_ancestor,))
         if res:
             coll_name = res[0][0]
             coll_ancestors.append(coll_name)
             coll_ancestor = coll_name
         else:
             break
     # ancestors found, return reversed list:
     coll_ancestors.reverse()
     return coll_ancestors
 
 def get_coll_sons(coll, type='r', public_only=1):
     """Return a list of sons (first-level descendants) of type 'type' for collection 'coll'.
        If public_only, then return only non-restricted son collections.
     """
     coll_sons = []
     query = "SELECT c.name FROM collection AS c "\
             "LEFT JOIN collection_collection AS cc ON c.id=cc.id_son "\
             "LEFT JOIN collection AS ccc ON ccc.id=cc.id_dad "\
             "WHERE cc.type=%s AND ccc.name=%s"
     query += " ORDER BY cc.score DESC"
     res = run_sql(query, (type, coll))
     for name in res:
         if not public_only or not collection_restricted_p(name[0]):
             coll_sons.append(name[0])
     return coll_sons
 
 class CollectionAllChildrenDataCacher(DataCacher):
     """Cache for all children of a collection (regular & virtual, public & private)"""
     def __init__(self):
 
         def cache_filler():
 
             def get_all_children(coll, type='r', public_only=1):
                 """Return a list of all children of type 'type' for collection 'coll'.
                    If public_only, then return only non-restricted child collections.
                    If type='*', then return both regular and virtual collections.
                 """
                 children = []
                 if type == '*':
                     sons = get_coll_sons(coll, 'r', public_only) + get_coll_sons(coll, 'v', public_only)
                 else:
                     sons = get_coll_sons(coll, type, public_only)
                 for child in sons:
                     children.append(child)
                     children.extend(get_all_children(child, type, public_only))
                 return children
 
             ret = {}
             collections = collection_reclist_cache.cache.keys()
             for collection in collections:
                 ret[collection] = get_all_children(collection, '*', public_only=0)
             return ret
 
         def timestamp_verifier():
             return max(get_table_update_time('collection'), get_table_update_time('collection_collection'))
 
         DataCacher.__init__(self, cache_filler, timestamp_verifier)
 
 try:
     if not collection_allchildren_cache.is_ok_p:
         raise Exception
 except Exception:
     collection_allchildren_cache = CollectionAllChildrenDataCacher()
 
 def get_collection_allchildren(coll, recreate_cache_if_needed=True):
     """Returns the list of all children of a collection."""
     if recreate_cache_if_needed:
         collection_allchildren_cache.recreate_cache_if_needed()
     if coll not in collection_allchildren_cache.cache:
         return [] # collection does not exist; return empty list
     return collection_allchildren_cache.cache[coll]
 
 
 def get_coll_real_descendants(coll, type='_', get_hosted_colls=True):
     """Return a list of all descendants of collection 'coll' that are defined by a 'dbquery'.
        IOW, we need to decompose compound collections like "A & B" into "A" and "B" provided
        that "A & B" has no associated database query defined.
     """
     coll_sons = []
     res = run_sql("""SELECT c.name,c.dbquery FROM collection AS c
                      LEFT JOIN collection_collection AS cc ON c.id=cc.id_son
                      LEFT JOIN collection AS ccc ON ccc.id=cc.id_dad
                      WHERE ccc.name=%s AND cc.type LIKE %s ORDER BY cc.score DESC""",
                   (coll, type,))
     for name, dbquery in res:
         if dbquery: # this is 'real' collection, so return it:
             if get_hosted_colls:
                 coll_sons.append(name)
             else:
                 if not dbquery.startswith("hostedcollection:"):
                     coll_sons.append(name)
         else: # this is 'composed' collection, so recurse:
             coll_sons.extend(get_coll_real_descendants(name))
     return coll_sons
 
 def browse_pattern(req, colls, p, f, rg, ln=CFG_SITE_LANG):
     """Browse either biliographic phrases or words indexes, and display it."""
 
     # load the right message language
     _ = gettext_set_language(ln)
 
     ## is p enclosed in quotes? (coming from exact search)
     if p.startswith('"') and p.endswith('"'):
         p = p[1:-1]
 
     p_orig = p
     ## okay, "real browse" follows:
     ## FIXME: the maths in the get_nearest_terms_in_bibxxx is just a test
 
     if not f and string.find(p, ":") > 0: # does 'p' contain ':'?
         f, p = string.split(p, ":", 1)
 
     ## do we search in words indexes?
     if not f:
         return browse_in_bibwords(req, p, f)
 
     coll_hitset = intbitset()
     for coll_name in colls:
         coll_hitset |= get_collection_reclist(coll_name)
 
     index_id = get_index_id_from_field(f)
     if index_id != 0:
         browsed_phrases_in_colls = get_nearest_terms_in_idxphrase_with_collection(p, index_id, rg/2, rg/2, coll_hitset)
     else:
         browsed_phrases = get_nearest_terms_in_bibxxx(p, f, (rg+1)/2+1, (rg-1)/2+1)
         while not browsed_phrases:
             # try again and again with shorter and shorter pattern:
             try:
                 p = p[:-1]
                 browsed_phrases = get_nearest_terms_in_bibxxx(p, f, (rg+1)/2+1, (rg-1)/2+1)
             except:
                 # probably there are no hits at all:
                 req.write(_("No values found."))
                 return
 
         ## try to check hits in these particular collection selection:
         browsed_phrases_in_colls = []
         if 0:
             for phrase in browsed_phrases:
                 phrase_hitset = intbitset()
                 phrase_hitsets = search_pattern("", phrase, f, 'e')
                 for coll in colls:
                     phrase_hitset.union_update(phrase_hitsets[coll])
                 if len(phrase_hitset) > 0:
                     # okay, this phrase has some hits in colls, so add it:
                     browsed_phrases_in_colls.append([phrase, len(phrase_hitset)])
 
         ## were there hits in collections?
         if browsed_phrases_in_colls == []:
             if browsed_phrases != []:
                 #write_warning(req, """<p>No match close to <em>%s</em> found in given collections.
                 #Please try different term.<p>Displaying matches in any collection...""" % p_orig)
                 ## try to get nbhits for these phrases in any collection:
                 for phrase in browsed_phrases:
                     nbhits = get_nbhits_in_bibxxx(phrase, f, coll_hitset)
                     if nbhits > 0:
                         browsed_phrases_in_colls.append([phrase, nbhits])
 
     ## display results now:
     out = websearch_templates.tmpl_browse_pattern(
             f=f,
             fn=get_field_i18nname(get_field_name(f) or f, ln, False),
             ln=ln,
             browsed_phrases_in_colls=browsed_phrases_in_colls,
             colls=colls,
             rg=rg,
           )
     req.write(out)
     return
 
 def browse_in_bibwords(req, p, f, ln=CFG_SITE_LANG):
     """Browse inside words indexes."""
     if not p:
         return
     _ = gettext_set_language(ln)
 
     urlargd = {}
     urlargd.update(req.argd)
     urlargd['action'] = 'search'
 
     nearest_box = create_nearest_terms_box(urlargd, p, f, 'w', ln=ln, intro_text_p=0)
 
     req.write(websearch_templates.tmpl_search_in_bibwords(
         p = p,
         f = f,
         ln = ln,
         nearest_box = nearest_box
     ))
     return
 
 def search_pattern(req=None, p=None, f=None, m=None, ap=0, of="id", verbose=0, ln=CFG_SITE_LANG, display_nearest_terms_box=True, wl=0):
     """Search for complex pattern 'p' within field 'f' according to
        matching type 'm'.  Return hitset of recIDs.
 
        The function uses multi-stage searching algorithm in case of no
        exact match found.  See the Search Internals document for
        detailed description.
 
        The 'ap' argument governs whether an alternative patterns are to
        be used in case there is no direct hit for (p,f,m).  For
        example, whether to replace non-alphanumeric characters by
        spaces if it would give some hits.  See the Search Internals
        document for detailed description.  (ap=0 forbits the
        alternative pattern usage, ap=1 permits it.)
        'ap' is also internally used for allowing hidden tag search
        (for requests coming from webcoll, for example). In this
        case ap=-9
 
        The 'of' argument governs whether to print or not some
        information to the user in case of no match found.  (Usually it
        prints the information in case of HTML formats, otherwise it's
        silent).
 
        The 'verbose' argument controls the level of debugging information
        to be printed (0=least, 9=most).
 
        All the parameters are assumed to have been previously washed.
 
        This function is suitable as a mid-level API.
     """
 
     _ = gettext_set_language(ln)
 
     hitset_empty = intbitset()
     # sanity check:
     if not p:
         hitset_full = intbitset(trailing_bits=1)
         hitset_full.discard(0)
         # no pattern, so return all universe
         return hitset_full
     # search stage 1: break up arguments into basic search units:
     if verbose and of.startswith("h"):
         t1 = os.times()[4]
     basic_search_units = create_basic_search_units(req, p, f, m, of)
     if verbose and of.startswith("h"):
         t2 = os.times()[4]
         write_warning("Search stage 1: basic search units are: %s" % cgi.escape(repr(basic_search_units)), req=req)
         write_warning("Search stage 1: execution took %.2f seconds." % (t2 - t1), req=req)
     # search stage 2: do search for each search unit and verify hit presence:
     if verbose and of.startswith("h"):
         t1 = os.times()[4]
     basic_search_units_hitsets = []
     #prepare hiddenfield-related..
     myhiddens = CFG_BIBFORMAT_HIDDEN_TAGS
     can_see_hidden = False
     if req:
         user_info = collect_user_info(req)
         can_see_hidden = user_info.get('precached_canseehiddenmarctags', False)
     if not req and ap == -9: # special request, coming from webcoll
         can_see_hidden = True
     if can_see_hidden:
         myhiddens = []
 
     if CFG_INSPIRE_SITE and of.startswith('h'):
         # fulltext/caption search warnings for INSPIRE:
         fields_to_be_searched = [f for o, p, f, m in basic_search_units]
         if 'fulltext' in fields_to_be_searched:
             write_warning(_("Full-text search is currently available for all arXiv papers, many theses, a few report series and some journal articles"), req=req)
         elif 'caption' in fields_to_be_searched:
             write_warning(_("Warning: figure caption search is only available for a subset of papers mostly from %(x_range_from_year)s-%(x_range_to_year)s.") % \
                           {'x_range_from_year': '2008',
                            'x_range_to_year': '2012'}, req=req)
 
     for idx_unit in xrange(len(basic_search_units)):
         bsu_o, bsu_p, bsu_f, bsu_m = basic_search_units[idx_unit]
         if bsu_f and len(bsu_f) < 2:
             if of.startswith("h"):
                 write_warning(_("There is no index %s.  Searching for %s in all fields." % (bsu_f, bsu_p)), req=req)
             bsu_f = ''
             bsu_m = 'w'
             if of.startswith("h") and verbose:
                 write_warning(_('Instead searching %s.' % str([bsu_o, bsu_p, bsu_f, bsu_m])), req=req)
         try:
             basic_search_unit_hitset = search_unit(bsu_p, bsu_f, bsu_m, wl)
         except InvenioWebSearchWildcardLimitError, excp:
             basic_search_unit_hitset = excp.res
             if of.startswith("h"):
                 write_warning(_("Search term too generic, displaying only partial results..."), req=req)
         # FIXME: print warning if we use native full-text indexing
         if bsu_f == 'fulltext' and bsu_m != 'w' and of.startswith('h') and not CFG_SOLR_URL:
             write_warning(_("No phrase index available for fulltext yet, looking for word combination..."), req=req)
         #check that the user is allowed to search with this tag
         #if he/she tries it
         if bsu_f and len(bsu_f) > 1 and bsu_f[0].isdigit() and bsu_f[1].isdigit():
             for htag in myhiddens:
                 ltag = len(htag)
                 samelenfield = bsu_f[0:ltag]
                 if samelenfield == htag: #user searches by a hidden tag
                     #we won't show you anything..
                     basic_search_unit_hitset = intbitset()
                     if verbose >= 9 and of.startswith("h"):
                         write_warning("Pattern %s hitlist omitted since \
                                             it queries in a hidden tag %s" %
                                       (cgi.escape(repr(bsu_p)), repr(myhiddens)), req=req)
                     display_nearest_terms_box = False #..and stop spying, too.
         if verbose >= 9 and of.startswith("h"):
             write_warning("Search stage 1: pattern %s gave hitlist %s" % (cgi.escape(bsu_p), basic_search_unit_hitset), req=req)
         if len(basic_search_unit_hitset) > 0 or \
            ap<1 or \
            bsu_o=="|" or \
            ((idx_unit+1)<len(basic_search_units) and basic_search_units[idx_unit+1][0]=="|"):
             # stage 2-1: this basic search unit is retained, since
             # either the hitset is non-empty, or the approximate
             # pattern treatment is switched off, or the search unit
             # was joined by an OR operator to preceding/following
             # units so we do not require that it exists
             basic_search_units_hitsets.append(basic_search_unit_hitset)
         else:
             # stage 2-2: no hits found for this search unit, try to replace non-alphanumeric chars inside pattern:
             if re.search(r'[^a-zA-Z0-9\s\:]', bsu_p) and bsu_f != 'refersto' and bsu_f != 'citedby':
                 if bsu_p.startswith('"') and bsu_p.endswith('"'): # is it ACC query?
                     bsu_pn = re.sub(r'[^a-zA-Z0-9\s\:]+', "*", bsu_p)
                 else: # it is WRD query
                     bsu_pn = re.sub(r'[^a-zA-Z0-9\s\:]+', " ", bsu_p)
                 if verbose and of.startswith('h') and req:
                     write_warning("Trying (%s,%s,%s)" % (cgi.escape(bsu_pn), cgi.escape(bsu_f), cgi.escape(bsu_m)), req=req)
                 basic_search_unit_hitset = search_pattern(req=None, p=bsu_pn, f=bsu_f, m=bsu_m, of="id", ln=ln, wl=wl)
                 if len(basic_search_unit_hitset) > 0:
                     # we retain the new unit instead
                     if of.startswith('h'):
                         write_warning(_("No exact match found for %(x_query1)s, using %(x_query2)s instead...") % \
                                       {'x_query1': "<em>" + cgi.escape(bsu_p) + "</em>",
                                        'x_query2': "<em>" + cgi.escape(bsu_pn) + "</em>"}, req=req)
                     basic_search_units[idx_unit][1] = bsu_pn
                     basic_search_units_hitsets.append(basic_search_unit_hitset)
                 else:
                     # stage 2-3: no hits found either, propose nearest indexed terms:
                     if of.startswith('h') and display_nearest_terms_box:
                         if req:
                             if bsu_f == "recid":
                                 write_warning(_("Requested record does not seem to exist."), req=req)
                             else:
                                 write_warning(create_nearest_terms_box(req.argd, bsu_p, bsu_f, bsu_m, ln=ln), req=req)
                     return hitset_empty
             else:
                 # stage 2-3: no hits found either, propose nearest indexed terms:
                 if of.startswith('h') and display_nearest_terms_box:
                     if req:
                         if bsu_f == "recid":
                             write_warning(_("Requested record does not seem to exist."), req=req)
                         else:
                             write_warning(create_nearest_terms_box(req.argd, bsu_p, bsu_f, bsu_m, ln=ln), req=req)
                 return hitset_empty
     if verbose and of.startswith("h"):
         t2 = os.times()[4]
         for idx_unit in range(0, len(basic_search_units)):
             write_warning("Search stage 2: basic search unit %s gave %d hits." %
                           (basic_search_units[idx_unit][1:], len(basic_search_units_hitsets[idx_unit])), req=req)
         write_warning("Search stage 2: execution took %.2f seconds." % (t2 - t1), req=req)
     # search stage 3: apply boolean query for each search unit:
     if verbose and of.startswith("h"):
         t1 = os.times()[4]
     # let the initial set be the complete universe:
     hitset_in_any_collection = intbitset(trailing_bits=1)
     hitset_in_any_collection.discard(0)
     for idx_unit in xrange(len(basic_search_units)):
         this_unit_operation = basic_search_units[idx_unit][0]
         this_unit_hitset = basic_search_units_hitsets[idx_unit]
         if this_unit_operation == '+':
             hitset_in_any_collection.intersection_update(this_unit_hitset)
         elif this_unit_operation == '-':
             hitset_in_any_collection.difference_update(this_unit_hitset)
         elif this_unit_operation == '|':
             hitset_in_any_collection.union_update(this_unit_hitset)
         else:
             if of.startswith("h"):
                 write_warning("Invalid set operation %s." % cgi.escape(this_unit_operation), "Error", req=req)
     if len(hitset_in_any_collection) == 0:
         # no hits found, propose alternative boolean query:
         if of.startswith('h') and display_nearest_terms_box:
             nearestterms = []
             for idx_unit in range(0, len(basic_search_units)):
                 bsu_o, bsu_p, bsu_f, bsu_m = basic_search_units[idx_unit]
                 if bsu_p.startswith("%") and bsu_p.endswith("%"):
                     bsu_p = "'" + bsu_p[1:-1] + "'"
                 bsu_nbhits = len(basic_search_units_hitsets[idx_unit])
 
                 # create a similar query, but with the basic search unit only
                 argd = {}
                 argd.update(req.argd)
 
                 argd['p'] = bsu_p
                 argd['f'] = bsu_f
 
                 nearestterms.append((bsu_p, bsu_nbhits, argd))
 
             text = websearch_templates.tmpl_search_no_boolean_hits(
                      ln=ln,  nearestterms=nearestterms)
             write_warning(text, req=req)
     if verbose and of.startswith("h"):
         t2 = os.times()[4]
         write_warning("Search stage 3: boolean query gave %d hits." % len(hitset_in_any_collection), req=req)
         write_warning("Search stage 3: execution took %.2f seconds." % (t2 - t1), req=req)
     return hitset_in_any_collection
 
 def search_pattern_parenthesised(req=None, p=None, f=None, m=None, ap=0, of="id", verbose=0, ln=CFG_SITE_LANG, display_nearest_terms_box=True, wl=0):
     """Search for complex pattern 'p' containing parenthesis within field 'f' according to
        matching type 'm'.  Return hitset of recIDs.
 
        For more details on the parameters see 'search_pattern'
     """
     _ = gettext_set_language(ln)
     spires_syntax_converter = SpiresToInvenioSyntaxConverter()
     spires_syntax_query = False
 
     # if the pattern uses SPIRES search syntax, convert it to Invenio syntax
     if spires_syntax_converter.is_applicable(p):
         spires_syntax_query = True
         p = spires_syntax_converter.convert_query(p)
 
     # sanity check: do not call parenthesised parser for search terms
     # like U(1) but still call it for searches like ('U(1)' | 'U(2)'):
     if not re_pattern_parens.search(re_pattern_parens_quotes.sub('_', p)):
         return search_pattern(req, p, f, m, ap, of, verbose, ln, display_nearest_terms_box=display_nearest_terms_box, wl=wl)
 
     # Try searching with parentheses
     try:
         parser = SearchQueryParenthesisedParser()
 
         # get a hitset with all recids
         result_hitset = intbitset(trailing_bits=1)
 
         # parse the query. The result is list of [op1, expr1, op2, expr2, ..., opN, exprN]
         parsing_result = parser.parse_query(p)
         if verbose  and of.startswith("h"):
             write_warning("Search stage 1: search_pattern_parenthesised() searched %s." % repr(p), req=req)
             write_warning("Search stage 1: search_pattern_parenthesised() returned %s." % repr(parsing_result), req=req)
          # go through every pattern
         # calculate hitset for it
         # combine pattern's hitset with the result using the corresponding operator
         for index in xrange(0, len(parsing_result)-1, 2 ):
             current_operator = parsing_result[index]
             current_pattern = parsing_result[index+1]
 
             if CFG_INSPIRE_SITE and spires_syntax_query:
                 # setting ap=0 to turn off approximate matching for 0 results.
                 # Doesn't work well in combinations.
                 # FIXME: The right fix involves collecting statuses for each
                 #        hitset, then showing a nearest terms box exactly once,
                 #        outside this loop.
                 ap = 0
                 display_nearest_terms_box = False
              # obtain a hitset for the current pattern
             current_hitset = search_pattern(req, current_pattern, f, m, ap, of, verbose, ln, display_nearest_terms_box=display_nearest_terms_box, wl=wl)
             # combine the current hitset with resulting hitset using the current operator
             if current_operator == '+':
                 result_hitset = result_hitset & current_hitset
             elif current_operator == '-':
                 result_hitset = result_hitset - current_hitset
             elif current_operator == '|':
                 result_hitset = result_hitset | current_hitset
             else:
                 assert False, "Unknown operator in search_pattern_parenthesised()"
 
         return result_hitset
 
     # If searching with parenteses fails, perform search ignoring parentheses
     except SyntaxError:
 
         write_warning(_("Search syntax misunderstood. Ignoring all parentheses in the query. If this doesn't help, please check your search and try again."), req=req)
 
         # remove the parentheses in the query. Current implementation removes all the parentheses,
         # but it could be improved to romove only these that are not inside quotes
         p = p.replace('(', ' ')
         p = p.replace(')', ' ')
 
         return search_pattern(req, p, f, m, ap, of, verbose, ln, display_nearest_terms_box=display_nearest_terms_box, wl=wl)
 
 
 def search_unit(p, f=None, m=None, wl=0, ignore_synonyms=None):
     """Search for basic search unit defined by pattern 'p' and field
        'f' and matching type 'm'.  Return hitset of recIDs.
 
        All the parameters are assumed to have been previously washed.
        'p' is assumed to be already a ``basic search unit'' so that it
        is searched as such and is not broken up in any way.  Only
        wildcard and span queries are being detected inside 'p'.
 
        If CFG_WEBSEARCH_SYNONYM_KBRS is set and we are searching in
        one of the indexes that has defined runtime synonym knowledge
        base, then look up there and automatically enrich search
        results with results for synonyms.
 
        In case the wildcard limit (wl) is greater than 0 and this limit
        is reached an InvenioWebSearchWildcardLimitError will be raised.
        In case you want to call this function with no limit for the
        wildcard queries, wl should be 0.
 
        Parameter 'ignore_synonyms' is a list of terms for which we
        should not try to further find a synonym.
 
        This function is suitable as a low-level API.
     """
 
     ## create empty output results set:
     hitset = intbitset()
     if not p: # sanity checking
         return hitset
 
     tokenizer = get_field_tokenizer_type(f)
     hitset_cjk = intbitset()
     if tokenizer == "BibIndexCJKTokenizer":
         if is_there_any_CJK_character_in_text(p):
             cjk_tok = BibIndexCJKTokenizer()
             chars = cjk_tok.tokenize_for_words(p)
             for char in chars:
                 hitset_cjk |= search_unit_in_bibwords(char, f, m, wl)
 
     ## eventually look up runtime synonyms:
     hitset_synonyms = intbitset()
     if CFG_WEBSEARCH_SYNONYM_KBRS.has_key(f):
         if ignore_synonyms is None:
             ignore_synonyms = []
         ignore_synonyms.append(p)
         for p_synonym in get_synonym_terms(p,
                              CFG_WEBSEARCH_SYNONYM_KBRS[f][0],
                              CFG_WEBSEARCH_SYNONYM_KBRS[f][1]):
             if p_synonym != p and \
                    not p_synonym in ignore_synonyms:
                 hitset_synonyms |= search_unit(p_synonym, f, m, wl,
                                                ignore_synonyms)
 
     ## look up hits:
     if f == 'fulltext' and get_idx_indexer('fulltext') == 'SOLR' and CFG_SOLR_URL:
         # redirect to Solr
         try:
             return search_unit_in_solr(p, f, m)
         except:
             # There were troubles with getting full-text search
             # results from Solr. Let us alert the admin of these
             # problems and let us simply return empty results to the
             # end user.
             register_exception()
             return hitset
     elif f == 'fulltext' and get_idx_indexer('fulltext') == 'XAPIAN' and CFG_XAPIAN_ENABLED:
         # redirect to Xapian
         try:
             return search_unit_in_xapian(p, f, m)
         except:
             # There were troubles with getting full-text search
             # results from Xapian. Let us alert the admin of these
             # problems and let us simply return empty results to the
             # end user.
             register_exception()
             return hitset
     if f == 'datecreated':
         hitset = search_unit_in_bibrec(p, p, 'c')
     elif f == 'datemodified':
         hitset = search_unit_in_bibrec(p, p, 'm')
     elif f == 'refersto':
         # we are doing search by the citation count
         hitset = search_unit_refersto(p)
     elif f == 'referstoexcludingselfcites':
         # we are doing search by the citation count
         hitset = search_unit_refersto_excluding_selfcites(p)
     elif f == 'rawref':
         from invenio.refextract_api import search_from_reference
         field, pattern = search_from_reference(p)
         return search_unit(pattern, field)
     elif f == 'citedby':
         # we are doing search by the citation count
         hitset = search_unit_citedby(p)
     elif f == 'citedbyexcludingselfcites':
         # we are doing search by the citation count
         hitset = search_unit_citedby_excluding_selfcites(p)
     elif m == 'a' or m == 'r':
         # we are doing either phrase search or regexp search
         if f == 'fulltext':
             # FIXME: workaround for not having phrase index yet
             return search_pattern(None, p, f, 'w')
         index_id = get_index_id_from_field(f)
         if index_id != 0:
             if m == 'a' and index_id in get_idxpair_field_ids():
                 #for exact match on the admin configured fields we are searching in the pair tables
                 hitset = search_unit_in_idxpairs(p, f, m, wl)
             else:
                 hitset = search_unit_in_idxphrases(p, f, m, wl)
         else:
             hitset = search_unit_in_bibxxx(p, f, m, wl)
             # if not hitset and m == 'a' and (p[0] != '%' and p[-1] != '%'):
             #     #if we have no results by doing exact matching, do partial matching
             #     #for removing the distinction between simple and double quotes
             #     hitset = search_unit_in_bibxxx('%' + p + '%', f, m, wl)
     elif p.startswith("cited:"):
         # we are doing search by the citation count
         hitset = search_unit_by_times_cited(p[6:])
     elif p.startswith("citedexcludingselfcites:"):
         # we are doing search by the citation count
         hitset = search_unit_by_times_cited(p[6:], exclude_selfcites=True)
     else:
         # we are doing bibwords search by default
         hitset = search_unit_in_bibwords(p, f, m, wl=wl)
 
     ## merge synonym results and return total:
     hitset |= hitset_synonyms
     hitset |= hitset_cjk
     return hitset
 
 
 def get_idxpair_field_ids():
     """Returns the list of ids for the fields that idxPAIRS should be used on"""
     index_dict = dict(run_sql("SELECT name, id FROM idxINDEX"))
     return [index_dict[field] for field in index_dict if field in CFG_WEBSEARCH_IDXPAIRS_FIELDS]
 
 
 def search_unit_in_bibwords(word, f, m=None, decompress=zlib.decompress, wl=0):
     """Searches for 'word' inside bibwordsX table for field 'f' and returns hitset of recIDs."""
     set = intbitset() # will hold output result set
     set_used = 0 # not-yet-used flag, to be able to circumvent set operations
     limit_reached = 0 # flag for knowing if the query limit has been reached
 
     # if no field is specified, search in the global index.
     f = f or 'anyfield'
     index_id = get_index_id_from_field(f)
     if index_id:
         bibwordsX = "idxWORD%02dF" % index_id
         stemming_language = get_index_stemming_language(index_id)
     else:
         return intbitset() # word index f does not exist
 
     # wash 'word' argument and run query:
     if f.endswith('count') and word.endswith('+'):
         # field count query of the form N+ so transform N+ to N->99999:
         word = word[:-1] + '->99999'
     word = string.replace(word, '*', '%') # we now use '*' as the truncation character
     words = string.split(word, "->", 1) # check for span query
     if len(words) == 2:
         word0 = re_word.sub('', words[0])
         word1 = re_word.sub('', words[1])
         if stemming_language:
             word0 = lower_index_term(word0)
             word1 = lower_index_term(word1)
             word0 = stem(word0, stemming_language)
             word1 = stem(word1, stemming_language)
         word0_washed = wash_index_term(word0)
         word1_washed = wash_index_term(word1)
         if f.endswith('count'):
             # field count query; convert to integers in order
             # to have numerical behaviour for 'BETWEEN n1 AND n2' query
             try:
                 word0_washed = int(word0_washed)
                 word1_washed = int(word1_washed)
             except ValueError:
                 pass
         try:
             res = run_sql_with_limit("SELECT term,hitlist FROM %s WHERE term BETWEEN %%s AND %%s" % bibwordsX,
                           (word0_washed, word1_washed), wildcard_limit = wl)
         except InvenioDbQueryWildcardLimitError, excp:
             res = excp.res
             limit_reached = 1 # set the limit reached flag to true
     else:
         if f == 'journal':
             pass # FIXME: quick hack for the journal index
         else:
             word = re_word.sub('', word)
         if stemming_language:
             word = lower_index_term(word)
             word = stem(word, stemming_language)
         if string.find(word, '%') >= 0: # do we have wildcard in the word?
             if f == 'journal':
                 # FIXME: quick hack for the journal index
                 # FIXME: we can run a sanity check here for all indexes
                 res = ()
             else:
                 try:
                     res = run_sql_with_limit("SELECT term,hitlist FROM %s WHERE term LIKE %%s" % bibwordsX,
                                   (wash_index_term(word),), wildcard_limit = wl)
                 except InvenioDbQueryWildcardLimitError, excp:
                     res = excp.res
                     limit_reached = 1 # set the limit reached flag to true
         else:
             res = run_sql("SELECT term,hitlist FROM %s WHERE term=%%s" % bibwordsX,
                           (wash_index_term(word),))
     # fill the result set:
     for word, hitlist in res:
         hitset_bibwrd = intbitset(hitlist)
         # add the results:
         if set_used:
             set.union_update(hitset_bibwrd)
         else:
             set = hitset_bibwrd
             set_used = 1
     #check to see if the query limit was reached
     if limit_reached:
         #raise an exception, so we can print a nice message to the user
         raise InvenioWebSearchWildcardLimitError(set)
     # okay, return result set:
     return set
 
 def search_unit_in_idxpairs(p, f, type, wl=0):
     """Searches for pair 'p' inside idxPAIR table for field 'f' and
     returns hitset of recIDs found."""
     limit_reached = 0 # flag for knowing if the query limit has been reached
     do_exact_search = True # flag to know when it makes sense to try to do exact matching
     result_set = intbitset()
     #determine the idxPAIR table to read from
     index_id = get_index_id_from_field(f)
     if not index_id:
         return intbitset()
     stemming_language = get_index_stemming_language(index_id)
     pairs_tokenizer = BibIndexDefaultTokenizer(stemming_language)
     idxpair_table_washed = wash_table_column_name("idxPAIR%02dF" % index_id)
 
     if p.startswith("%") and p.endswith("%"):
         p = p[1:-1]
     original_pattern = p
     p = string.replace(p, '*', '%') # we now use '*' as the truncation character
     queries_releated_vars = [] # contains tuples of (query_addons, query_params, use_query_limit)
 
     #is it a span query?
     ps = string.split(p, "->", 1)
     if len(ps) == 2 and not (ps[0].endswith(' ') or ps[1].startswith(' ')):
         #so we are dealing with a span query
         pairs_left = pairs_tokenizer.tokenize_for_pairs(ps[0])
         pairs_right = pairs_tokenizer.tokenize_for_pairs(ps[1])
         if not pairs_left or not pairs_right:
             # we are not actually dealing with pairs but with words
             return search_unit_in_bibwords(original_pattern, f, type, wl)
         elif len(pairs_left) != len(pairs_right):
             # it is kind of hard to know what the user actually wanted
             # we have to do: foo bar baz -> qux xyz, so let's swith to phrase
             return search_unit_in_idxphrases(original_pattern, f, type, wl)
         elif len(pairs_left) > 1 and \
                 len(pairs_right) > 1 and \
                 pairs_left[:-1] != pairs_right[:-1]:
             # again we have something like: foo bar baz -> abc xyz qux
             # so we'd better switch to phrase
             return search_unit_in_idxphrases(original_pattern, f, type, wl)
         else:
             # finally, we can treat the search using idxPairs
             # at this step we have either: foo bar -> abc xyz
             # or foo bar abc -> foo bar xyz
             queries_releated_vars = [("BETWEEN %s AND %s", (pairs_left[-1], pairs_right[-1]), True)]
             for pair in pairs_left[:-1]:# which should be equal with pairs_right[:-1]
                 queries_releated_vars.append(("= %s", (pair, ), False))
         do_exact_search = False # no exact search for span queries
     elif string.find(p, '%') > -1:
         #tokenizing p will remove the '%', so we have to make sure it stays
         replacement = 'xxxxxxxxxx' #hopefuly this will not clash with anything in the future
         p = string.replace(p, '%', replacement)
         pairs = pairs_tokenizer.tokenize_for_pairs(p)
         if not pairs:
             # we are not actually dealing with pairs but with words
             return search_unit_in_bibwords(original_pattern, f, type, wl)
         queries_releated_vars = []
         for pair in pairs:
             if string.find(pair, replacement) > -1:
                 pair = string.replace(pair, replacement, '%') #we replace back the % sign
                 queries_releated_vars.append(("LIKE %s", (pair, ), True))
             else:
                 queries_releated_vars.append(("= %s", (pair, ), False))
         do_exact_search = False
     else:
         #normal query
         pairs = pairs_tokenizer.tokenize_for_pairs(p)
         if not pairs:
             # we are not actually dealing with pairs but with words
             return search_unit_in_bibwords(original_pattern, f, type, wl)
         queries_releated_vars = []
         for pair in pairs:
             queries_releated_vars.append(("= %s", (pair, ), False))
 
     first_results = 1 # flag to know if it's the first set of results or not
     for query_var in queries_releated_vars:
         query_addons = query_var[0]
         query_params = query_var[1]
         use_query_limit = query_var[2]
         if use_query_limit:
             try:
                 res = run_sql_with_limit("SELECT term, hitlist FROM %s WHERE term %s" \
                                      % (idxpair_table_washed, query_addons), query_params, wildcard_limit=wl) #kwalitee:disable=sql
             except InvenioDbQueryWildcardLimitError, excp:
                 res = excp.res
                 limit_reached = 1 # set the limit reached flag to true
         else:
             res = run_sql("SELECT term, hitlist FROM %s WHERE term %s" \
                       % (idxpair_table_washed, query_addons), query_params) #kwalitee:disable=sql
         if not res:
             return intbitset()
         for pair, hitlist in res:
             hitset_idxpairs = intbitset(hitlist)
             if first_results:
                 result_set = hitset_idxpairs
                 first_results = 0
             else:
                 result_set.intersection_update(hitset_idxpairs)
      #check to see if the query limit was reached
     if limit_reached:
         #raise an exception, so we can print a nice message to the user
         raise InvenioWebSearchWildcardLimitError(result_set)
 
     # check if we need to eliminate the false positives
     if CFG_WEBSEARCH_IDXPAIRS_EXACT_SEARCH and do_exact_search:
         # we need to eliminate the false positives
         idxphrase_table_washed = wash_table_column_name("idxPHRASE%02dR" % index_id)
         not_exact_search = intbitset()
         for recid in result_set:
             res = run_sql("SELECT termlist FROM %s WHERE id_bibrec %s" %(idxphrase_table_washed, '=%s'), (recid, )) #kwalitee:disable=sql
             if res:
                 termlist = deserialize_via_marshal(res[0][0])
                 if not [term for term in termlist if term.lower().find(p.lower()) > -1]:
                     not_exact_search.add(recid)
             else:
                 not_exact_search.add(recid)
         # remove the recs that are false positives from the final result
         result_set.difference_update(not_exact_search)
     return result_set
 
 
 def search_unit_in_idxphrases(p, f, type, wl=0):
     """Searches for phrase 'p' inside idxPHRASE*F table for field 'f' and returns hitset of recIDs found.
     The search type is defined by 'type' (e.g. equals to 'r' for a regexp search)."""
     # call word search method in some cases:
     if f.endswith('count'):
         return search_unit_in_bibwords(p, f, wl=wl)
     set = intbitset() # will hold output result set
     set_used = 0 # not-yet-used flag, to be able to circumvent set operations
     limit_reached = 0 # flag for knowing if the query limit has been reached
     use_query_limit = False # flag for knowing if to limit the query results or not
     # deduce in which idxPHRASE table we will search:
     idxphraseX = "idxPHRASE%02dF" % get_index_id_from_field("anyfield")
     if f:
         index_id = get_index_id_from_field(f)
         if index_id:
             idxphraseX = "idxPHRASE%02dF" % index_id
         else:
             return intbitset() # phrase index f does not exist
     # detect query type (exact phrase, partial phrase, regexp):
     if type == 'r':
         query_addons = "REGEXP %s"
         query_params = (p,)
         use_query_limit = True
     else:
         p = string.replace(p, '*', '%') # we now use '*' as the truncation character
         ps = string.split(p, "->", 1) # check for span query:
         if len(ps) == 2 and not (ps[0].endswith(' ') or ps[1].startswith(' ')):
             query_addons = "BETWEEN %s AND %s"
             query_params = (ps[0], ps[1])
             use_query_limit = True
         else:
             if string.find(p, '%') > -1:
                 query_addons = "LIKE %s"
                 query_params = (p,)
                 use_query_limit = True
             else:
                 query_addons = "= %s"
                 query_params = (p,)
 
     # special washing for fuzzy author index:
     if f in ('author', 'firstauthor', 'exactauthor', 'exactfirstauthor', 'authorityauthor'):
         query_params_washed = ()
         for query_param in query_params:
             query_params_washed += (wash_author_name(query_param),)
         query_params = query_params_washed
     # perform search:
     if use_query_limit:
         try:
             res = run_sql_with_limit("SELECT term,hitlist FROM %s WHERE term %s" % (idxphraseX, query_addons),
                       query_params, wildcard_limit=wl)
         except InvenioDbQueryWildcardLimitError, excp:
             res = excp.res
             limit_reached = 1 # set the limit reached flag to true
     else:
         res = run_sql("SELECT term,hitlist FROM %s WHERE term %s" % (idxphraseX, query_addons), query_params)
     # fill the result set:
     for word, hitlist in res:
         hitset_bibphrase = intbitset(hitlist)
         # add the results:
         if set_used:
             set.union_update(hitset_bibphrase)
         else:
             set = hitset_bibphrase
             set_used = 1
     #check to see if the query limit was reached
     if limit_reached:
         #raise an exception, so we can print a nice message to the user
         raise InvenioWebSearchWildcardLimitError(set)
     # okay, return result set:
     return set
 
 def search_unit_in_bibxxx(p, f, type, wl=0):
     """Searches for pattern 'p' inside bibxxx tables for field 'f' and returns hitset of recIDs found.
     The search type is defined by 'type' (e.g. equals to 'r' for a regexp search)."""
 
     # call word search method in some cases:
     if f == 'journal' or f.endswith('count'):
         return search_unit_in_bibwords(p, f, wl=wl)
     p_orig = p # saving for eventual future 'no match' reporting
     limit_reached = 0 # flag for knowing if the query limit has been reached
     use_query_limit = False  # flag for knowing if to limit the query results or not
     query_addons = "" # will hold additional SQL code for the query
     query_params = () # will hold parameters for the query (their number may vary depending on TYPE argument)
     # wash arguments:
     f = string.replace(f, '*', '%') # replace truncation char '*' in field definition
     if type == 'r':
         query_addons = "REGEXP %s"
         query_params = (p,)
         use_query_limit = True
     else:
         p = string.replace(p, '*', '%') # we now use '*' as the truncation character
         ps = string.split(p, "->", 1) # check for span query:
         if len(ps) == 2 and not (ps[0].endswith(' ') or ps[1].startswith(' ')):
             query_addons = "BETWEEN %s AND %s"
             query_params = (ps[0], ps[1])
             use_query_limit = True
         else:
             if string.find(p, '%') > -1:
                 query_addons = "LIKE %s"
                 query_params = (p,)
                 use_query_limit = True
             else:
                 query_addons = "= %s"
                 query_params = (p,)
     # construct 'tl' which defines the tag list (MARC tags) to search in:
     tl = []
     if len(f) >= 2 and str(f[0]).isdigit() and str(f[1]).isdigit():
         tl.append(f) # 'f' seems to be okay as it starts by two digits
     else:
         # deduce desired MARC tags on the basis of chosen 'f'
         tl = get_field_tags(f)
         if not tl:
             # f index does not exist, nevermind
             pass
     # okay, start search:
     l = [] # will hold list of recID that matched
     for t in tl:
         # deduce into which bibxxx table we will search:
         digit1, digit2 = int(t[0]), int(t[1])
         bx = "bib%d%dx" % (digit1, digit2)
         bibx = "bibrec_bib%d%dx" % (digit1, digit2)
         # construct and run query:
         if t == "001":
             if query_addons.find('BETWEEN') > -1 or query_addons.find('=') > -1:
                 # verify that the params are integers (to avoid returning record 123 when searching for 123foo)
                 try:
                     query_params = tuple(int(param) for param in query_params)
                 except ValueError:
                     return intbitset()
             if use_query_limit:
                 try:
                     res = run_sql_with_limit("SELECT id FROM bibrec WHERE id %s" % query_addons,
                               query_params, wildcard_limit=wl)
                 except InvenioDbQueryWildcardLimitError, excp:
                     res = excp.res
                     limit_reached = 1 # set the limit reached flag to true
             else:
                 res = run_sql("SELECT id FROM bibrec WHERE id %s" % query_addons,
                               query_params)
         else:
             query = "SELECT bibx.id_bibrec FROM %s AS bx LEFT JOIN %s AS bibx ON bx.id=bibx.id_bibxxx WHERE bx.value %s" % \
                     (bx, bibx, query_addons)
             if len(t) != 6 or t[-1:]=='%':
                 # wildcard query, or only the beginning of field 't'
                 # is defined, so add wildcard character:
                 query += " AND bx.tag LIKE %s"
                 query_params_and_tag = query_params + (t + '%',)
             else:
                 # exact query for 't':
                 query += " AND bx.tag=%s"
                 query_params_and_tag = query_params + (t,)
             if use_query_limit:
                 try:
                     res = run_sql_with_limit(query, query_params_and_tag, wildcard_limit=wl)
                 except InvenioDbQueryWildcardLimitError, excp:
                     res = excp.res
                     limit_reached = 1 # set the limit reached flag to true
             else:
                 res = run_sql(query, query_params_and_tag)
         # fill the result set:
         for id_bibrec in res:
             if id_bibrec[0]:
                 l.append(id_bibrec[0])
     # check no of hits found:
     nb_hits = len(l)
     # okay, return result set:
     set = intbitset(l)
     #check to see if the query limit was reached
     if limit_reached:
         #raise an exception, so we can print a nice message to the user
         raise InvenioWebSearchWildcardLimitError(set)
     return set
 
 def search_unit_in_solr(p, f=None, m=None):
     """
     Query a Solr index and return an intbitset corresponding
     to the result.  Parameters (p,f,m) are usual search unit ones.
     """
     if m and (m == 'a' or m == 'r'): # phrase/regexp query
         if p.startswith('%') and p.endswith('%'):
             p = p[1:-1] # fix for partial phrase
         p = '"' + p + '"'
     return solr_get_bitset(f, p)
 
 
 def search_unit_in_xapian(p, f=None, m=None):
     """
     Query a Xapian index and return an intbitset corresponding
     to the result.  Parameters (p,f,m) are usual search unit ones.
     """
     if m and (m == 'a' or m == 'r'): # phrase/regexp query
         if p.startswith('%') and p.endswith('%'):
             p = p[1:-1] # fix for partial phrase
         p = '"' + p + '"'
     return xapian_get_bitset(f, p)
 
 
 def search_unit_in_bibrec(datetext1, datetext2, type='c'):
     """
     Return hitset of recIDs found that were either created or modified
     (according to 'type' arg being 'c' or 'm') from datetext1 until datetext2, inclusive.
     Does not pay attention to pattern, collection, anything.  Useful
     to intersect later on with the 'real' query.
     """
     set = intbitset()
     if type and type.startswith("m"):
         type = "modification_date"
     else:
         type = "creation_date" # by default we are searching for creation dates
 
     parts = datetext1.split('->')
     if len(parts) > 1 and datetext1 == datetext2:
         datetext1 = parts[0]
         datetext2 = parts[1]
 
     if datetext1 == datetext2:
         res = run_sql("SELECT id FROM bibrec WHERE %s LIKE %%s" % (type,),
                       (datetext1 + '%',))
     else:
         res = run_sql("SELECT id FROM bibrec WHERE %s>=%%s AND %s<=%%s" % (type, type),
                       (datetext1, datetext2))
     for row in res:
         set += row[0]
     return set
 
 def search_unit_by_times_cited(p, exclude_selfcites=False):
     """
     Return histset of recIDs found that are cited P times.
     Usually P looks like '10->23'.
     """
     numstr = '"'+p+'"'
     #this is sort of stupid but since we may need to
     #get the records that do _not_ have cites, we have to
     #know the ids of all records, too
     #but this is needed only if bsu_p is 0 or 0 or 0->0
     allrecs = []
     if p == 0 or p == "0" or \
        p.startswith("0->") or p.endswith("->0"):
         allrecs = intbitset(run_sql("SELECT id FROM bibrec"))
     return get_records_with_num_cites(numstr, allrecs,
                                       exclude_selfcites=exclude_selfcites)
 
 def search_unit_refersto(query):
     """
     Search for records satisfying the query (e.g. author:ellis) and
     return list of records referred to by these records.
     """
     if query:
         ahitset = search_pattern(p=query)
         return get_refersto_hitset(ahitset)
     else:
         return intbitset([])
 
 def search_unit_refersto_excluding_selfcites(query):
     """
     Search for records satisfying the query (e.g. author:ellis) and
     return list of records referred to by these records.
     """
     if query:
         ahitset = search_pattern(p=query)
         citers = intbitset()
         citations = get_cited_by_list(ahitset)
         selfcitations = get_self_cited_by_list(ahitset)
         for cites, selfcites in zip(citations, selfcitations):
             # cites is in the form [(citee, citers), ...]
             citers += cites[1] - selfcites[1]
         return citers
     else:
         return intbitset([])
 
 def search_unit_citedby(query):
     """
     Search for records satisfying the query (e.g. author:ellis) and
     return list of records cited by these records.
     """
     if query:
         ahitset = search_pattern(p=query)
         if ahitset:
             return get_citedby_hitset(ahitset)
         else:
             return intbitset([])
     else:
         return intbitset([])
 
 def search_unit_citedby_excluding_selfcites(query):
     """
     Search for records satisfying the query (e.g. author:ellis) and
     return list of records referred to by these records.
     """
     if query:
         ahitset = search_pattern(p=query)
         citees = intbitset()
         references = get_refers_to_list(ahitset)
         selfreferences = get_self_refers_to_list(ahitset)
         for refs, selfrefs in zip(references, selfreferences):
             # refs is in the form [(citer, citees), ...]
             citees += refs[1] - selfrefs[1]
         return citees
     else:
         return intbitset([])
 
 def intersect_results_with_collrecs(req, hitset_in_any_collection, colls, ap=0, of="hb", verbose=0, ln=CFG_SITE_LANG, display_nearest_terms_box=True):
     """Return dict of hitsets given by intersection of hitset with the collection universes."""
 
     _ = gettext_set_language(ln)
 
     # search stage 4: intersect with the collection universe
     if verbose and of.startswith("h"):
         t1 = os.times()[4]
 
     results = {}  # all final results
     results_nbhits = 0
 
     # calculate the list of recids (restricted or not) that the user has rights to access and we should display (only those)
     records_that_can_be_displayed = intbitset()
 
     if not req or isinstance(req, cStringIO.OutputType): # called from CLI
         user_info = {}
         for coll in colls:
             results[coll] = hitset_in_any_collection & get_collection_reclist(coll)
             results_nbhits += len(results[coll])
         records_that_can_be_displayed = hitset_in_any_collection
         permitted_restricted_collections = []
 
     else:
         user_info = collect_user_info(req)
         policy = CFG_WEBSEARCH_VIEWRESTRCOLL_POLICY.strip().upper()
         # let's get the restricted collections the user has rights to view
         if user_info['guest'] == '1':
             permitted_restricted_collections = []
             ## For guest users that are actually authorized to some restricted
             ## collection (by virtue of the IP address in a FireRole rule)
             ## we explicitly build the list of permitted_restricted_collections
             for coll in colls:
                 if collection_restricted_p(coll) and (acc_authorize_action(user_info, 'viewrestrcoll', collection=coll)[0] == 0):
                     permitted_restricted_collections.append(coll)
         else:
             permitted_restricted_collections = user_info.get('precached_permitted_restricted_collections', [])
 
         # let's build the list of the both public and restricted
         # child collections of the collection from which the user
         # started his/her search. This list of children colls will be
         # used in the warning proposing a search in that collections
         try:
             current_coll = req.argd['cc'] # current_coll: coll from which user started his/her search
         except (AttributeError, KeyError):
             current_coll = CFG_SITE_NAME
         current_coll_children = get_collection_allchildren(current_coll) # real & virtual
         # add all restricted collections, that the user has access to, and are under the current collection
         # do not use set here, in order to maintain a specific order:
         # children of 'cc' (real, virtual, restricted), rest of 'c' that are  not cc's children
         colls_to_be_displayed = [coll for coll in current_coll_children if coll in colls or coll in permitted_restricted_collections]
         colls_to_be_displayed.extend([coll for coll in colls if coll not in colls_to_be_displayed])
 
         if policy == 'ANY':# the user needs to have access to at least one collection that restricts the records
             #we need this to be able to remove records that are both in a public and restricted collection
             permitted_recids = intbitset()
             notpermitted_recids = intbitset()
             for collection in restricted_collection_cache.cache:
                 if collection in permitted_restricted_collections:
                     permitted_recids |= get_collection_reclist(collection)
                 else:
                     notpermitted_recids |= get_collection_reclist(collection)
             records_that_can_be_displayed = hitset_in_any_collection - (notpermitted_recids - permitted_recids)
 
         else:# the user needs to have access to all collections that restrict a records
             notpermitted_recids = intbitset()
             for collection in restricted_collection_cache.cache:
                 if collection not in permitted_restricted_collections:
                     notpermitted_recids |= get_collection_reclist(collection)
             records_that_can_be_displayed = hitset_in_any_collection - notpermitted_recids
 
         for coll in colls_to_be_displayed:
             results[coll] = results.get(coll, intbitset()).union_update(records_that_can_be_displayed & get_collection_reclist(coll))
             results_nbhits += len(results[coll])
 
     if results_nbhits == 0:
         # no hits found, try to search in Home and restricted and/or hidden collections:
         results = {}
         results_in_Home = records_that_can_be_displayed & get_collection_reclist(CFG_SITE_NAME)
         results_in_restricted_collections = intbitset()
         results_in_hidden_collections = intbitset()
         for coll in permitted_restricted_collections:
             if not get_coll_ancestors(coll): # hidden collection
                 results_in_hidden_collections.union_update(records_that_can_be_displayed & get_collection_reclist(coll))
             else:
                 results_in_restricted_collections.union_update(records_that_can_be_displayed & get_collection_reclist(coll))
 
         # in this way, we do not count twice, records that are both in Home collection and in a restricted collection
         total_results = len(results_in_Home.union(results_in_restricted_collections))
 
         if total_results > 0:
             # some hits found in Home and/or restricted collections, so propose this search:
             if of.startswith("h") and display_nearest_terms_box:
                 url = websearch_templates.build_search_url(req.argd, cc=CFG_SITE_NAME, c=[])
                 len_colls_to_display = len(colls_to_be_displayed)
                 # trim the list of collections to first two, since it might get very large
                 write_warning(_("No match found in collection %(x_collection)s. Other collections gave %(x_url_open)s%(x_nb_hits)d hits%(x_url_close)s.") %\
                               {'x_collection': '<em>' + \
                                     string.join([get_coll_i18nname(coll, ln, False) for coll in colls_to_be_displayed[:2]], ', ') + \
                                     (len_colls_to_display > 2 and ' et al' or '') + '</em>',
                                'x_url_open': '<a class="nearestterms" href="%s">' % (url),
                                'x_nb_hits': total_results,
                                'x_url_close': '</a>'}, req=req)
                 # display the hole list of collections in a comment
                 if len_colls_to_display > 2:
                     write_warning("<!--No match found in collection <em>%(x_collection)s</em>.-->" %\
                                   {'x_collection': string.join([get_coll_i18nname(coll, ln, False) for coll in colls_to_be_displayed], ', ')},
                                   req=req)
         else:
             # no hits found, either user is looking for a document and he/she has not rights
             # or user is looking for a hidden document:
             if of.startswith("h") and display_nearest_terms_box:
                 if len(results_in_hidden_collections) > 0:
                     write_warning(_("No public collection matched your query. "
                                          "If you were looking for a hidden document, please type "
                                          "the correct URL for this record."), req=req)
                 else:
                     write_warning(_("No public collection matched your query. "
                                          "If you were looking for a non-public document, please choose "
                                          "the desired restricted collection first."), req=req)
 
     if verbose and of.startswith("h"):
         t2 = os.times()[4]
         write_warning("Search stage 4: intersecting with collection universe gave %d hits." % results_nbhits, req=req)
         write_warning("Search stage 4: execution took %.2f seconds." % (t2 - t1), req=req)
 
     return results
 
 def intersect_results_with_hitset(req, results, hitset, ap=0, aptext="", of="hb"):
     """Return intersection of search 'results' (a dict of hitsets
        with collection as key) with the 'hitset', i.e. apply
        'hitset' intersection to each collection within search
        'results'.
 
        If the final set is to be empty, and 'ap'
        (approximate pattern) is true, and then print the `warningtext'
        and return the original 'results' set unchanged.  If 'ap' is
        false, then return empty results set.
     """
     if ap:
         results_ap = copy.deepcopy(results)
     else:
         results_ap = {} # will return empty dict in case of no hits found
     nb_total = 0
     final_results = {}
     for coll in results.keys():
         final_results[coll] = results[coll].intersection(hitset)
         nb_total += len(final_results[coll])
     if nb_total == 0:
         if of.startswith("h"):
             write_warning(aptext, req=req)
         final_results = results_ap
     return final_results
 
 def create_similarly_named_authors_link_box(author_name, ln=CFG_SITE_LANG):
     """Return a box similar to ``Not satisfied...'' one by proposing
        author searches for similar names.  Namely, take AUTHOR_NAME
        and the first initial of the firstame (after comma) and look
        into author index whether authors with e.g. middle names exist.
        Useful mainly for CERN Library that sometimes contains name
        forms like Ellis-N, Ellis-Nick, Ellis-Nicolas all denoting the
        same person.  The box isn't proposed if no similarly named
        authors are found to exist.
     """
     # return nothing if not configured:
     if CFG_WEBSEARCH_CREATE_SIMILARLY_NAMED_AUTHORS_LINK_BOX == 0:
         return ""
     # return empty box if there is no initial:
     if re.match(r'[^ ,]+, [^ ]', author_name) is None:
         return ""
     # firstly find name comma initial:
     author_name_to_search = re.sub(r'^([^ ,]+, +[^ ,]).*$', '\\1', author_name)
 
     # secondly search for similar name forms:
     similar_author_names = {}
     for name in author_name_to_search, strip_accents(author_name_to_search):
         for tag in get_field_tags("author"):
             # deduce into which bibxxx table we will search:
             digit1, digit2 = int(tag[0]), int(tag[1])
             bx = "bib%d%dx" % (digit1, digit2)
             bibx = "bibrec_bib%d%dx" % (digit1, digit2)
             if len(tag) != 6 or tag[-1:]=='%':
                 # only the beginning of field 't' is defined, so add wildcard character:
                 res = run_sql("""SELECT bx.value FROM %s AS bx
                                   WHERE bx.value LIKE %%s AND bx.tag LIKE %%s""" % bx,
                               (name + "%", tag + "%"))
             else:
                 res = run_sql("""SELECT bx.value FROM %s AS bx
                                   WHERE bx.value LIKE %%s AND bx.tag=%%s""" % bx,
                               (name + "%", tag))
             for row in res:
                 similar_author_names[row[0]] = 1
     # remove the original name and sort the list:
     try:
         del similar_author_names[author_name]
     except KeyError:
         pass
     # thirdly print the box:
     out = ""
     if similar_author_names:
         out_authors = similar_author_names.keys()
         out_authors.sort()
 
         tmp_authors = []
         for out_author in out_authors:
             nbhits = get_nbhits_in_bibxxx(out_author, "author")
             if nbhits:
                 tmp_authors.append((out_author, nbhits))
         out += websearch_templates.tmpl_similar_author_names(
                  authors=tmp_authors, ln=ln)
 
     return out
 
 def create_nearest_terms_box(urlargd, p, f, t='w', n=5, ln=CFG_SITE_LANG, intro_text_p=True):
     """Return text box containing list of 'n' nearest terms above/below 'p'
        for the field 'f' for matching type 't' (words/phrases) in
        language 'ln'.
        Propose new searches according to `urlargs' with the new words.
        If `intro_text_p' is true, then display the introductory message,
        otherwise print only the nearest terms in the box content.
     """
     # load the right message language
     _ = gettext_set_language(ln)
 
     if not CFG_WEBSEARCH_DISPLAY_NEAREST_TERMS:
         return _("Your search did not match any records.  Please try again.")
     nearest_terms = []
     if not p: # sanity check
         p = "."
     if p.startswith('%') and p.endswith('%'):
         p = p[1:-1] # fix for partial phrase
     index_id = get_index_id_from_field(f)
     if f == 'fulltext':
         if CFG_SOLR_URL:
             return _("No match found, please enter different search terms.")
         else:
             # FIXME: workaround for not having native phrase index yet
             t = 'w'
     # special indexes:
     if f == 'refersto' or f == 'referstoexcludingselfcites':
         return _("There are no records referring to %s.") % cgi.escape(p)
     if f == 'citedby' or f == 'citedbyexcludingselfcites':
         return _("There are no records cited by %s.") % cgi.escape(p)
     # look for nearest terms:
     if t == 'w':
         nearest_terms = get_nearest_terms_in_bibwords(p, f, n, n)
         if not nearest_terms:
             return _("No word index is available for %s.") % \
                    ('<em>' + cgi.escape(get_field_i18nname(get_field_name(f) or f, ln, False)) + '</em>')
     else:
         nearest_terms = []
         if index_id:
             nearest_terms = get_nearest_terms_in_idxphrase(p, index_id, n, n)
         if f == 'datecreated' or f == 'datemodified':
             nearest_terms = get_nearest_terms_in_bibrec(p, f, n, n)
         if not nearest_terms:
             nearest_terms = get_nearest_terms_in_bibxxx(p, f, n, n)
         if not nearest_terms:
             return _("No phrase index is available for %s.") % \
                    ('<em>' + cgi.escape(get_field_i18nname(get_field_name(f) or f, ln, False)) + '</em>')
 
     terminfo = []
     for term in nearest_terms:
         if t == 'w':
             hits = get_nbhits_in_bibwords(term, f)
         else:
             if index_id:
                 hits = get_nbhits_in_idxphrases(term, f)
             elif f == 'datecreated' or f == 'datemodified':
                 hits = get_nbhits_in_bibrec(term, f)
             else:
                 hits = get_nbhits_in_bibxxx(term, f)
 
         argd = {}
         argd.update(urlargd)
 
         # check which fields contained the requested parameter, and replace it.
         for (px, fx) in ('p', 'f'), ('p1', 'f1'), ('p2', 'f2'), ('p3', 'f3'):
             if px in argd:
                 argd_px = argd[px]
                 if t == 'w':
                     # p was stripped of accents, to do the same:
                     argd_px = strip_accents(argd_px)
                 #argd[px] = string.replace(argd_px, p, term, 1)
                 #we need something similar, but case insensitive
                 pattern_index = string.find(argd_px.lower(), p.lower())
                 if pattern_index > -1:
                     argd[px] = argd_px[:pattern_index] + term + argd_px[pattern_index+len(p):]
                     break
                 #this is doing exactly the same as:
                 #argd[px] = re.sub('(?i)' + re.escape(p), term, argd_px, 1)
                 #but is ~4x faster (2us vs. 8.25us)
         terminfo.append((term, hits, argd))
 
     intro = ""
     if intro_text_p: # add full leading introductory text
         if f:
             intro = _("Search term %(x_term)s inside index %(x_index)s did not match any record. Nearest terms in any collection are:") % \
                      {'x_term': "<em>" + cgi.escape(p.startswith("%") and p.endswith("%") and p[1:-1] or p) + "</em>",
                       'x_index': "<em>" + cgi.escape(get_field_i18nname(get_field_name(f) or f, ln, False)) + "</em>"}
         else:
             intro = _("Search term %s did not match any record. Nearest terms in any collection are:") % \
                      ("<em>" + cgi.escape(p.startswith("%") and p.endswith("%") and p[1:-1] or p) + "</em>")
 
     return websearch_templates.tmpl_nearest_term_box(p=p, ln=ln, f=f, terminfo=terminfo,
                                                      intro=intro)
 
 def get_nearest_terms_in_bibwords(p, f, n_below, n_above):
     """Return list of +n -n nearest terms to word `p' in index for field `f'."""
     nearest_words = [] # will hold the (sorted) list of nearest words to return
     # deduce into which bibwordsX table we will search:
     bibwordsX = "idxWORD%02dF" % get_index_id_from_field("anyfield")
     if f:
         index_id = get_index_id_from_field(f)
         if index_id:
             bibwordsX = "idxWORD%02dF" % index_id
         else:
             return nearest_words
     # firstly try to get `n' closest words above `p':
     res = run_sql("SELECT term FROM %s WHERE term<%%s ORDER BY term DESC LIMIT %%s" % bibwordsX,
                   (p, n_above))
     for row in res:
         nearest_words.append(row[0])
     nearest_words.reverse()
     # secondly insert given word `p':
     nearest_words.append(p)
     # finally try to get `n' closest words below `p':
     res = run_sql("SELECT term FROM %s WHERE term>%%s ORDER BY term ASC LIMIT %%s" % bibwordsX,
                   (p, n_below))
     for row in res:
         nearest_words.append(row[0])
     return nearest_words
 
 def get_nearest_terms_in_idxphrase(p, index_id, n_below, n_above):
     """Browse (-n_above, +n_below) closest bibliographic phrases
        for the given pattern p in the given field idxPHRASE table,
        regardless of collection.
        Return list of [phrase1, phrase2, ... , phrase_n]."""
     if CFG_INSPIRE_SITE and index_id in (3, 15): # FIXME: workaround due to new fuzzy index
         return [p,]
     idxphraseX = "idxPHRASE%02dF" % index_id
     res_above = run_sql("SELECT term FROM %s WHERE term<%%s ORDER BY term DESC LIMIT %%s" % idxphraseX, (p, n_above))
     res_above = map(lambda x: x[0], res_above)
     res_above.reverse()
 
     res_below = run_sql("SELECT term FROM %s WHERE term>=%%s ORDER BY term ASC LIMIT %%s" % idxphraseX, (p, n_below))
     res_below = map(lambda x: x[0], res_below)
 
     return res_above + res_below
 
 def get_nearest_terms_in_idxphrase_with_collection(p, index_id, n_below, n_above, collection):
     """Browse (-n_above, +n_below) closest bibliographic phrases
        for the given pattern p in the given field idxPHRASE table,
        considering the collection (intbitset).
        Return list of [(phrase1, hitset), (phrase2, hitset), ... , (phrase_n, hitset)]."""
     idxphraseX = "idxPHRASE%02dF" % index_id
     res_above = run_sql("SELECT term,hitlist FROM %s WHERE term<%%s ORDER BY term DESC LIMIT %%s" % idxphraseX, (p, n_above * 3))
     res_above = [(term, intbitset(hitlist) & collection) for term, hitlist in res_above]
     res_above = [(term, len(hitlist)) for term, hitlist in res_above if hitlist]
 
     res_below = run_sql("SELECT term,hitlist FROM %s WHERE term>=%%s ORDER BY term ASC LIMIT %%s" % idxphraseX, (p, n_below * 3))
     res_below = [(term, intbitset(hitlist) & collection) for term, hitlist in res_below]
     res_below = [(term, len(hitlist)) for term, hitlist in res_below if hitlist]
 
     res_above.reverse()
     return res_above[-n_above:] + res_below[:n_below]
 
 
 def get_nearest_terms_in_bibxxx(p, f, n_below, n_above):
     """Browse (-n_above, +n_below) closest bibliographic phrases
        for the given pattern p in the given field f, regardless
        of collection.
        Return list of [phrase1, phrase2, ... , phrase_n]."""
     ## determine browse field:
     if not f and string.find(p, ":") > 0: # does 'p' contain ':'?
         f, p = string.split(p, ":", 1)
 
     # FIXME: quick hack for the journal index
     if f == 'journal':
         return get_nearest_terms_in_bibwords(p, f, n_below, n_above)
 
     ## We are going to take max(n_below, n_above) as the number of
     ## values to ferch from bibXXx.  This is needed to work around
     ## MySQL UTF-8 sorting troubles in 4.0.x.  Proper solution is to
     ## use MySQL 4.1.x or our own idxPHRASE in the future.
 
     index_id = get_index_id_from_field(f)
     if index_id:
         return get_nearest_terms_in_idxphrase(p, index_id, n_below, n_above)
 
     n_fetch = 2*max(n_below, n_above)
     ## construct 'tl' which defines the tag list (MARC tags) to search in:
     tl = []
     if str(f[0]).isdigit() and str(f[1]).isdigit():
         tl.append(f) # 'f' seems to be okay as it starts by two digits
     else:
         # deduce desired MARC tags on the basis of chosen 'f'
         tl = get_field_tags(f)
     ## start browsing to fetch list of hits:
     browsed_phrases = {} # will hold {phrase1: 1, phrase2: 1, ..., phraseN: 1} dict of browsed phrases (to make them unique)
     # always add self to the results set:
     browsed_phrases[p.startswith("%") and p.endswith("%") and p[1:-1] or p] = 1
     for t in tl:
         # deduce into which bibxxx table we will search:
         digit1, digit2 = int(t[0]), int(t[1])
         bx = "bib%d%dx" % (digit1, digit2)
         bibx = "bibrec_bib%d%dx" % (digit1, digit2)
         # firstly try to get `n' closest phrases above `p':
         if len(t) != 6 or t[-1:]=='%': # only the beginning of field 't' is defined, so add wildcard character:
             res = run_sql("""SELECT bx.value FROM %s AS bx
                               WHERE bx.value<%%s AND bx.tag LIKE %%s
                               ORDER BY bx.value DESC LIMIT %%s""" % bx,
                           (p, t + "%", n_fetch))
         else:
             res = run_sql("""SELECT bx.value FROM %s AS bx
                               WHERE bx.value<%%s AND bx.tag=%%s
                               ORDER BY bx.value DESC LIMIT %%s""" % bx,
                           (p, t, n_fetch))
         for row in res:
             browsed_phrases[row[0]] = 1
         # secondly try to get `n' closest phrases equal to or below `p':
         if len(t) != 6 or t[-1:]=='%': # only the beginning of field 't' is defined, so add wildcard character:
             res = run_sql("""SELECT bx.value FROM %s AS bx
                               WHERE bx.value>=%%s AND bx.tag LIKE %%s
                               ORDER BY bx.value ASC LIMIT %%s""" % bx,
                           (p, t + "%", n_fetch))
         else:
             res = run_sql("""SELECT bx.value FROM %s AS bx
                               WHERE bx.value>=%%s AND bx.tag=%%s
                               ORDER BY bx.value ASC LIMIT %%s""" % bx,
                           (p, t, n_fetch))
         for row in res:
             browsed_phrases[row[0]] = 1
     # select first n words only: (this is needed as we were searching
     # in many different tables and so aren't sure we have more than n
     # words right; this of course won't be needed when we shall have
     # one ACC table only for given field):
     phrases_out = browsed_phrases.keys()
     phrases_out.sort(lambda x, y: cmp(string.lower(strip_accents(x)),
                                       string.lower(strip_accents(y))))
     # find position of self:
     try:
         idx_p = phrases_out.index(p)
     except:
         idx_p = len(phrases_out)/2
     # return n_above and n_below:
     return phrases_out[max(0, idx_p-n_above):idx_p+n_below]
 
 def get_nearest_terms_in_bibrec(p, f, n_below, n_above):
     """Return list of nearest terms and counts from bibrec table.
     p is usually a date, and f either datecreated or datemodified.
 
     Note: below/above count is very approximative, not really respected.
     """
     col = 'creation_date'
     if f == 'datemodified':
         col = 'modification_date'
     res_above = run_sql("""SELECT DATE_FORMAT(%s,'%%%%Y-%%%%m-%%%%d %%%%H:%%%%i:%%%%s')
                              FROM bibrec WHERE %s < %%s
                             ORDER BY %s DESC LIMIT %%s""" % (col, col, col),
                         (p, n_above))
     res_below = run_sql("""SELECT DATE_FORMAT(%s,'%%%%Y-%%%%m-%%%%d %%%%H:%%%%i:%%%%s')
                              FROM bibrec WHERE %s > %%s
                             ORDER BY %s ASC LIMIT %%s""" % (col, col, col),
                         (p, n_below))
     out = set([])
     for row in res_above:
         out.add(row[0])
     for row in res_below:
         out.add(row[0])
     out_list = list(out)
     out_list.sort()
     return list(out_list)
 
 def get_nbhits_in_bibrec(term, f):
     """Return number of hits in bibrec table.  term is usually a date,
     and f is either 'datecreated' or 'datemodified'."""
     col = 'creation_date'
     if f == 'datemodified':
         col = 'modification_date'
     res = run_sql("SELECT COUNT(*) FROM bibrec WHERE %s LIKE %%s" % (col,),
                   (term + '%',))
     return res[0][0]
 
 def get_nbhits_in_bibwords(word, f):
     """Return number of hits for word 'word' inside words index for field 'f'."""
     out = 0
     # deduce into which bibwordsX table we will search:
     bibwordsX = "idxWORD%02dF" % get_index_id_from_field("anyfield")
     if f:
         index_id = get_index_id_from_field(f)
         if index_id:
             bibwordsX = "idxWORD%02dF" % index_id
         else:
             return 0
     if word:
         res = run_sql("SELECT hitlist FROM %s WHERE term=%%s" % bibwordsX,
                       (word,))
         for hitlist in res:
             out += len(intbitset(hitlist[0]))
     return out
 
 def get_nbhits_in_idxphrases(word, f):
     """Return number of hits for word 'word' inside phrase index for field 'f'."""
     out = 0
     # deduce into which bibwordsX table we will search:
     idxphraseX = "idxPHRASE%02dF" % get_index_id_from_field("anyfield")
     if f:
         index_id = get_index_id_from_field(f)
         if index_id:
             idxphraseX = "idxPHRASE%02dF" % index_id
         else:
             return 0
     if word:
         res = run_sql("SELECT hitlist FROM %s WHERE term=%%s" % idxphraseX,
                       (word,))
         for hitlist in res:
             out += len(intbitset(hitlist[0]))
     return out
 
 def get_nbhits_in_bibxxx(p, f, in_hitset=None):
     """Return number of hits for word 'word' inside words index for field 'f'."""
     ## determine browse field:
     if not f and string.find(p, ":") > 0: # does 'p' contain ':'?
         f, p = string.split(p, ":", 1)
 
     # FIXME: quick hack for the journal index
     if f == 'journal':
         return get_nbhits_in_bibwords(p, f)
 
     ## construct 'tl' which defines the tag list (MARC tags) to search in:
     tl = []
     if str(f[0]).isdigit() and str(f[1]).isdigit():
         tl.append(f) # 'f' seems to be okay as it starts by two digits
     else:
         # deduce desired MARC tags on the basis of chosen 'f'
         tl = get_field_tags(f)
     # start searching:
     recIDs = {} # will hold dict of {recID1: 1, recID2: 1, ..., }  (unique recIDs, therefore)
     for t in tl:
         # deduce into which bibxxx table we will search:
         digit1, digit2 = int(t[0]), int(t[1])
         bx = "bib%d%dx" % (digit1, digit2)
         bibx = "bibrec_bib%d%dx" % (digit1, digit2)
         if len(t) != 6 or t[-1:]=='%': # only the beginning of field 't' is defined, so add wildcard character:
             res = run_sql("""SELECT bibx.id_bibrec FROM %s AS bibx, %s AS bx
                               WHERE bx.value=%%s AND bx.tag LIKE %%s
                                 AND bibx.id_bibxxx=bx.id""" % (bibx, bx),
                           (p, t + "%"))
         else:
             res = run_sql("""SELECT bibx.id_bibrec FROM %s AS bibx, %s AS bx
                               WHERE bx.value=%%s AND bx.tag=%%s
                                 AND bibx.id_bibxxx=bx.id""" % (bibx, bx),
                           (p, t))
         for row in res:
             recIDs[row[0]] = 1
 
     if in_hitset is None:
         nbhits = len(recIDs)
     else:
         nbhits = len(intbitset(recIDs.keys()).intersection(in_hitset))
     return nbhits
 
 def get_mysql_recid_from_aleph_sysno(sysno):
     """Returns DB's recID for ALEPH sysno passed in the argument (e.g. "002379334CER").
        Returns None in case of failure."""
     out = None
     res = run_sql("""SELECT bb.id_bibrec FROM bibrec_bib97x AS bb, bib97x AS b
                       WHERE b.value=%s AND b.tag='970__a' AND bb.id_bibxxx=b.id""",
                   (sysno,))
     if res:
         out = res[0][0]
     return out
 
 def guess_primary_collection_of_a_record(recID):
     """Return primary collection name a record recid belongs to, by
        testing 980 identifier.
        May lead to bad guesses when a collection is defined dynamically
        via dbquery.
        In that case, return 'CFG_SITE_NAME'."""
     out = CFG_SITE_NAME
     dbcollids = get_fieldvalues(recID, "980__a")
     for dbcollid in dbcollids:
         variants = ("collection:" + dbcollid,
                     'collection:"' + dbcollid + '"',
                     "980__a:" + dbcollid,
                     '980__a:"' + dbcollid + '"',
                     '980:' + dbcollid ,
                     '980:"' + dbcollid + '"')
         res = run_sql("SELECT name FROM collection WHERE dbquery IN (%s,%s,%s,%s,%s,%s)", variants)
         if res:
             out = res[0][0]
             break
     if CFG_CERN_SITE:
         recID = int(recID)
         # dirty hack for ATLAS collections at CERN:
         if out in ('ATLAS Communications', 'ATLAS Internal Notes'):
             for alternative_collection in ('ATLAS Communications Physics',
                                            'ATLAS Communications General',
                                            'ATLAS Internal Notes Physics',
                                            'ATLAS Internal Notes General',):
                 if recID in get_collection_reclist(alternative_collection):
                     return alternative_collection
 
         # dirty hack for FP
         FP_collections = {'DO': ['Current Price Enquiries', 'Archived Price Enquiries'],
                           'IT': ['Current Invitation for Tenders', 'Archived Invitation for Tenders'],
                           'MS': ['Current Market Surveys', 'Archived Market Surveys']}
         fp_coll_ids = [coll for coll in dbcollids if coll in FP_collections]
         for coll in fp_coll_ids:
             for coll_name in FP_collections[coll]:
                 if recID in get_collection_reclist(coll_name):
                     return coll_name
 
     return out
 
 _re_collection_url = re.compile('/collection/(.+)')
 def guess_collection_of_a_record(recID, referer=None, recreate_cache_if_needed=True):
     """Return collection name a record recid belongs to, by first testing
        the referer URL if provided and otherwise returning the
        primary collection."""
     if referer:
         dummy, hostname, path, dummy, query, dummy = urlparse.urlparse(referer)
         #requests can come from different invenio installations, with different collections
         if CFG_SITE_URL.find(hostname) < 0:
             return guess_primary_collection_of_a_record(recID)
         g = _re_collection_url.match(path)
         if g:
             name = urllib.unquote_plus(g.group(1))
             #check if this collection actually exist (also normalize the name if case-insensitive)
             name = get_coll_normalised_name(name)
             if name and recID in get_collection_reclist(name):
                 return name
         elif path.startswith('/search'):
             if recreate_cache_if_needed:
                 collection_reclist_cache.recreate_cache_if_needed()
             query = cgi.parse_qs(query)
             for name in query.get('cc', []) + query.get('c', []):
                 name = get_coll_normalised_name(name)
                 if name and recID in get_collection_reclist(name, recreate_cache_if_needed=False):
                     return name
     return guess_primary_collection_of_a_record(recID)
 
 def is_record_in_any_collection(recID, recreate_cache_if_needed=True):
     """Return True if the record belongs to at least one collection. This is a
     good, although not perfect, indicator to guess if webcoll has already run
     after this record has been entered into the system.
     """
     if recreate_cache_if_needed:
         collection_reclist_cache.recreate_cache_if_needed()
     for name in collection_reclist_cache.cache.keys():
         if recID in get_collection_reclist(name, recreate_cache_if_needed=False):
             return True
     return False
 
 def get_all_collections_of_a_record(recID, recreate_cache_if_needed=True):
     """Return all the collection names a record belongs to.
     Note this function is O(n_collections)."""
     ret = []
     if recreate_cache_if_needed:
         collection_reclist_cache.recreate_cache_if_needed()
     for name in collection_reclist_cache.cache.keys():
         if recID in get_collection_reclist(name, recreate_cache_if_needed=False):
             ret.append(name)
     return ret
 
 def get_tag_name(tag_value, prolog="", epilog=""):
     """Return tag name from the known tag value, by looking up the 'tag' table.
        Return empty string in case of failure.
        Example: input='100__%', output=first author'."""
     out = ""
     res = run_sql("SELECT name FROM tag WHERE value=%s", (tag_value,))
     if res:
         out = prolog + res[0][0] + epilog
     return out
 
 def get_fieldcodes():
     """Returns a list of field codes that may have been passed as 'search options' in URL.
        Example: output=['subject','division']."""
     out = []
     res = run_sql("SELECT DISTINCT(code) FROM field")
     for row in res:
         out.append(row[0])
     return out
 
 def get_field_name(code):
     """Return the corresponding field_name given the field code.
     e.g. reportnumber -> report number."""
     res = run_sql("SELECT name FROM field WHERE code=%s", (code, ))
     if res:
         return res[0][0]
     else:
         return ""
 
 def get_field_tags(field):
     """Returns a list of MARC tags for the field code 'field'.
        Returns empty list in case of error.
        Example: field='author', output=['100__%','700__%']."""
     out = []
     query = """SELECT t.value FROM tag AS t, field_tag AS ft, field AS f
                 WHERE f.code=%s AND ft.id_field=f.id AND t.id=ft.id_tag
                 ORDER BY ft.score DESC"""
     res = run_sql(query, (field, ))
     for val in res:
         out.append(val[0])
     return out
 
 def get_merged_recid(recID):
     """ Return the record ID of the record with
     which the given record has been merged.
     @param recID: deleted record recID
     @type recID: int
     @return: merged record recID
     @rtype: int or None
     """
     merged_recid = None
     for val in get_fieldvalues(recID, "970__d"):
         try:
             merged_recid = int(val)
             break
         except ValueError:
             pass
     return merged_recid
 
 def record_exists(recID):
     """Return 1 if record RECID exists.
        Return 0 if it doesn't exist.
        Return -1 if it exists but is marked as deleted.
     """
     out = 0
     res = run_sql("SELECT id FROM bibrec WHERE id=%s", (recID,), 1)
     if res:
         try: # if recid is '123foo', mysql will return id=123, and we don't want that
             recID = int(recID)
         except ValueError:
             return 0
         # record exists; now check whether it isn't marked as deleted:
         dbcollids = get_fieldvalues(recID, "980__%")
         if ("DELETED" in dbcollids) or (CFG_CERN_SITE and "DUMMY" in dbcollids):
             out = -1 # exists, but marked as deleted
         else:
             out = 1 # exists fine
     return out
 
 def record_empty(recID):
     """
     Is this record empty, e.g. has only 001, waiting for integration?
 
     @param recID: the record identifier.
     @type recID: int
     @return: 1 if the record is empty, 0 otherwise.
     @rtype: int
     """
     return bibrecord.record_empty(get_record(recID))
 
 def record_public_p(recID, recreate_cache_if_needed=True):
     """Return 1 if the record is public, i.e. if it can be found in the Home collection.
        Return 0 otherwise.
     """
     return recID in get_collection_reclist(CFG_SITE_NAME, recreate_cache_if_needed=recreate_cache_if_needed)
 
 def get_creation_date(recID, fmt="%Y-%m-%d"):
     "Returns the creation date of the record 'recID'."
     out = ""
     res = run_sql("SELECT DATE_FORMAT(creation_date,%s) FROM bibrec WHERE id=%s", (fmt, recID), 1)
     if res:
         out = res[0][0]
     return out
 
 def get_modification_date(recID, fmt="%Y-%m-%d"):
     "Returns the date of last modification for the record 'recID'."
     out = ""
     res = run_sql("SELECT DATE_FORMAT(modification_date,%s) FROM bibrec WHERE id=%s", (fmt, recID), 1)
     if res:
         out = res[0][0]
     return out
 
 def print_search_info(p, f, sf, so, sp, rm, of, ot, collection=CFG_SITE_NAME, nb_found=-1, jrec=1, rg=CFG_WEBSEARCH_DEF_RECORDS_IN_GROUPS,
                       aas=0, ln=CFG_SITE_LANG, p1="", p2="", p3="", f1="", f2="", f3="", m1="", m2="", m3="", op1="", op2="",
                       sc=1, pl_in_url="",
                       d1y=0, d1m=0, d1d=0, d2y=0, d2m=0, d2d=0, dt="",
                       cpu_time=-1, middle_only=0, em=""):
     """Prints stripe with the information on 'collection' and 'nb_found' results and CPU time.
        Also, prints navigation links (beg/next/prev/end) inside the results set.
        If middle_only is set to 1, it will only print the middle box information (beg/netx/prev/end/etc) links.
        This is suitable for displaying navigation links at the bottom of the search results page."""
 
     if em != '' and EM_REPOSITORY["search_info"] not in em:
         return ""
     # sanity check:
     if jrec < 1:
         jrec = 1
     if jrec > nb_found:
         jrec = max(nb_found-rg+1, 1)
 
     return websearch_templates.tmpl_print_search_info(
              ln = ln,
              collection = collection,
              aas = aas,
              collection_name = get_coll_i18nname(collection, ln, False),
              collection_id = get_colID(collection),
              middle_only = middle_only,
              rg = rg,
              nb_found = nb_found,
              sf = sf,
              so = so,
              rm = rm,
              of = of,
              ot = ot,
              p = p,
              f = f,
              p1 = p1,
              p2 = p2,
              p3 = p3,
              f1 = f1,
              f2 = f2,
              f3 = f3,
              m1 = m1,
              m2 = m2,
              m3 = m3,
              op1 = op1,
              op2 = op2,
              pl_in_url = pl_in_url,
              d1y = d1y,
              d1m = d1m,
              d1d = d1d,
              d2y = d2y,
              d2m = d2m,
              d2d = d2d,
              dt = dt,
              jrec = jrec,
              sc = sc,
              sp = sp,
              all_fieldcodes = get_fieldcodes(),
              cpu_time = cpu_time,
            )
 
 def print_hosted_search_info(p, f, sf, so, sp, rm, of, ot, collection=CFG_SITE_NAME, nb_found=-1, jrec=1, rg=CFG_WEBSEARCH_DEF_RECORDS_IN_GROUPS,
                       aas=0, ln=CFG_SITE_LANG, p1="", p2="", p3="", f1="", f2="", f3="", m1="", m2="", m3="", op1="", op2="",
                       sc=1, pl_in_url="",
                       d1y=0, d1m=0, d1d=0, d2y=0, d2m=0, d2d=0, dt="",
                       cpu_time=-1, middle_only=0, em=""):
     """Prints stripe with the information on 'collection' and 'nb_found' results and CPU time.
        Also, prints navigation links (beg/next/prev/end) inside the results set.
        If middle_only is set to 1, it will only print the middle box information (beg/netx/prev/end/etc) links.
        This is suitable for displaying navigation links at the bottom of the search results page."""
 
     if em != '' and EM_REPOSITORY["search_info"] not in em:
         return ""
 
     # sanity check:
     if jrec < 1:
         jrec = 1
     if jrec > nb_found:
         jrec = max(nb_found-rg+1, 1)
 
     return websearch_templates.tmpl_print_hosted_search_info(
              ln = ln,
              collection = collection,
              aas = aas,
              collection_name = get_coll_i18nname(collection, ln, False),
              collection_id = get_colID(collection),
              middle_only = middle_only,
              rg = rg,
              nb_found = nb_found,
              sf = sf,
              so = so,
              rm = rm,
              of = of,
              ot = ot,
              p = p,
              f = f,
              p1 = p1,
              p2 = p2,
              p3 = p3,
              f1 = f1,
              f2 = f2,
              f3 = f3,
              m1 = m1,
              m2 = m2,
              m3 = m3,
              op1 = op1,
              op2 = op2,
              pl_in_url = pl_in_url,
              d1y = d1y,
              d1m = d1m,
              d1d = d1d,
              d2y = d2y,
              d2m = d2m,
              d2d = d2d,
              dt = dt,
              jrec = jrec,
              sc = sc,
              sp = sp,
              all_fieldcodes = get_fieldcodes(),
              cpu_time = cpu_time,
            )
 
 def print_results_overview(colls, results_final_nb_total, results_final_nb, cpu_time, ln=CFG_SITE_LANG, ec=[], hosted_colls_potential_results_p=False, em=""):
     """Prints results overview box with links to particular collections below."""
 
     if em != "" and EM_REPOSITORY["overview"] not in em:
         return ""
     new_colls = []
     for coll in colls:
         new_colls.append({
                           'id': get_colID(coll),
                           'code': coll,
                           'name': get_coll_i18nname(coll, ln, False),
                          })
 
     return websearch_templates.tmpl_print_results_overview(
              ln = ln,
              results_final_nb_total = results_final_nb_total,
              results_final_nb = results_final_nb,
              cpu_time = cpu_time,
              colls = new_colls,
              ec = ec,
              hosted_colls_potential_results_p = hosted_colls_potential_results_p,
            )
 
 def print_hosted_results(url_and_engine, ln=CFG_SITE_LANG, of=None, req=None, no_records_found=False, search_timed_out=False, limit=CFG_EXTERNAL_COLLECTION_MAXRESULTS, em = ""):
     """Prints the full results of a hosted collection"""
 
     if of.startswith("h"):
         if no_records_found:
             return "<br />No results found."
         if search_timed_out:
             return "<br />The search engine did not respond in time."
 
     return websearch_templates.tmpl_print_hosted_results(
         url_and_engine=url_and_engine,
         ln=ln,
         of=of,
         req=req,
         limit=limit,
         display_body = em == "" or EM_REPOSITORY["body"] in em,
         display_add_to_basket = em == "" or EM_REPOSITORY["basket"] in em)
 
 class BibSortDataCacher(DataCacher):
     """
     Cache holding all structures created by bibsort
     (   _data, data_dict).
     """
     def __init__(self, method_name):
         self.method_name = method_name
         self.method_id = 0
         try:
             res = run_sql("""SELECT id from bsrMETHOD where name = %s""", (self.method_name,))
         except:
             self.method_id = 0
         if res and res[0]:
             self.method_id = res[0][0]
         else:
             self.method_id = 0
 
         def cache_filler():
             method_id = self.method_id
             alldicts = {}
             if self.method_id == 0:
                 return {}
             try:
                 res_data = run_sql("""SELECT data_dict_ordered from bsrMETHODDATA \
                                    where id_bsrMETHOD = %s""", (method_id,))
                 res_buckets = run_sql("""SELECT bucket_no, bucket_data from bsrMETHODDATABUCKET\
                                       where id_bsrMETHOD = %s""", (method_id,))
             except Exception:
                 # database problems, return empty cache
                 return {}
             try:
                 data_dict_ordered = deserialize_via_marshal(res_data[0][0])
             except:
                 data_dict_ordered = {}
             alldicts['data_dict_ordered'] = data_dict_ordered # recid: weight
             if not res_buckets:
                 alldicts['bucket_data'] = {}
                 return alldicts
 
             for row in res_buckets:
                 bucket_no = row[0]
                 try:
                     bucket_data = intbitset(row[1])
                 except:
                     bucket_data = intbitset([])
                 alldicts.setdefault('bucket_data', {})[bucket_no] = bucket_data
 
             return alldicts
 
         def timestamp_verifier():
             method_id = self.method_id
             res = run_sql("""SELECT last_updated from bsrMETHODDATA where id_bsrMETHOD = %s""", (method_id,))
             try:
                 update_time_methoddata = str(res[0][0])
             except IndexError:
                 update_time_methoddata = '1970-01-01 00:00:00'
             res = run_sql("""SELECT max(last_updated) from bsrMETHODDATABUCKET where id_bsrMETHOD = %s""", (method_id,))
             try:
                 update_time_buckets = str(res[0][0])
             except IndexError:
                 update_time_buckets = '1970-01-01 00:00:00'
             return max(update_time_methoddata, update_time_buckets)
 
         DataCacher.__init__(self, cache_filler, timestamp_verifier)
 
 def get_sorting_methods():
     if not CFG_BIBSORT_BUCKETS: # we do not want to use buckets
         return {}
     try: # make sure the method has some data
         res = run_sql("""SELECT m.name, m.definition FROM bsrMETHOD m, bsrMETHODDATA md WHERE m.id = md.id_bsrMETHOD""")
     except:
         return {}
     return dict(res)
 
 sorting_methods = get_sorting_methods()
 cache_sorted_data = {}
 for sorting_method in sorting_methods:
     try:
         cache_sorted_data[sorting_method].is_ok_p
     except Exception:
         cache_sorted_data[sorting_method] = BibSortDataCacher(sorting_method)
 
 
 def get_tags_from_sort_fields(sort_fields):
     """Given a list of sort_fields, return the tags associated with it and
     also the name of the field that has no tags associated, to be able to
     display a message to the user."""
     tags = []
     if not sort_fields:
         return [], ''
     for sort_field in sort_fields:
         if sort_field and str(sort_field[0:2]).isdigit():
             # sort_field starts by two digits, so this is probably a MARC tag already
             tags.append(sort_field)
         else:
             # let us check the 'field' table
             field_tags = get_field_tags(sort_field)
             if field_tags:
                 tags.extend(field_tags)
             else:
                 return [], sort_field
     return tags, ''
 
 
 def rank_records(req, rank_method_code, rank_limit_relevance, hitset_global, pattern=None, verbose=0, sort_order='d', of='hb', ln=CFG_SITE_LANG, rg=None, jrec=None, field=''):
     """Initial entry point for ranking records, acts like a dispatcher.
        (i) rank_method_code is in bsrMETHOD, bibsort buckets can be used;
        (ii)rank_method_code is not in bsrMETHOD, use bibrank;
     """
 
     if CFG_BIBSORT_BUCKETS and sorting_methods:
         for sort_method in sorting_methods:
             definition = sorting_methods[sort_method]
             if definition.startswith('RNK') and \
             definition.replace('RNK:','').strip().lower() == string.lower(rank_method_code):
                 (solution_recs, solution_scores) = sort_records_bibsort(req, hitset_global, sort_method, '', sort_order, verbose, of, ln, rg, jrec, 'r')
                 #return (solution_recs, solution_scores, '', '', '')
                 comment = ''
                 if verbose > 0:
                     comment = 'find_citations retlist %s' % [[solution_recs[i], solution_scores[i]] for i in range(len(solution_recs))]
                 return (solution_recs, solution_scores, '(', ')', comment)
     return rank_records_bibrank(rank_method_code, rank_limit_relevance, hitset_global, pattern, verbose, field, rg, jrec)
 
 
 def sort_records(req, recIDs, sort_field='', sort_order='d', sort_pattern='', verbose=0, of='hb', ln=CFG_SITE_LANG, rg=None, jrec=None):
     """Initial entry point for sorting records, acts like a dispatcher.
        (i) sort_field is in the bsrMETHOD, and thus, the BibSort has sorted the data for this field, so we can use the cache;
        (ii)sort_field is not in bsrMETHOD, and thus, the cache does not contain any information regarding this sorting method"""
 
     _ = gettext_set_language(ln)
 
     #we should return sorted records up to irec_max(exclusive)
     dummy, irec_max = get_interval_for_records_to_sort(len(recIDs), jrec, rg)
     #calculate the min index on the reverted list
     index_min = max(len(recIDs) - irec_max, 0) #just to be sure that the min index is not negative
 
     #bibsort does not handle sort_pattern for now, use bibxxx
     if sort_pattern:
         return sort_records_bibxxx(req, recIDs, None, sort_field, sort_order, sort_pattern, verbose, of, ln, rg, jrec)
 
     use_sorting_buckets = True
 
     if not CFG_BIBSORT_BUCKETS or not sorting_methods: #ignore the use of buckets, use old fashion sorting
         use_sorting_buckets = False
 
     if not sort_field:
         if use_sorting_buckets:
             return sort_records_bibsort(req, recIDs, 'latest first', sort_field, sort_order, verbose, of, ln, rg, jrec)
         else:
             return recIDs[index_min:]
 
     sort_fields = string.split(sort_field, ",")
     if len(sort_fields) == 1:
         # we have only one sorting_field, check if it is treated by BibSort
         for sort_method in sorting_methods:
             definition = sorting_methods[sort_method]
             if use_sorting_buckets and \
                ((definition.startswith('FIELD') and \
                 definition.replace('FIELD:','').strip().lower() == string.lower(sort_fields[0])) or \
                 sort_method == sort_fields[0]):
                 #use BibSort
                 return sort_records_bibsort(req, recIDs, sort_method, sort_field, sort_order, verbose, of, ln, rg, jrec)
     #deduce sorting MARC tag out of the 'sort_field' argument:
     tags, error_field = get_tags_from_sort_fields(sort_fields)
     if error_field:
         if use_sorting_buckets:
             return sort_records_bibsort(req, recIDs, 'latest first', sort_field, sort_order, verbose, of, ln, rg, jrec)
         else:
             if of.startswith('h'):
                 write_warning(_("Sorry, %s does not seem to be a valid sort option. The records will not be sorted.") % cgi.escape(error_field), "Error", req=req)
             return recIDs[index_min:]
     if tags:
         for sort_method in sorting_methods:
             definition = sorting_methods[sort_method]
             if definition.startswith('MARC') \
                     and definition.replace('MARC:','').strip().split(',') == tags \
                     and use_sorting_buckets:
                 #this list of tags have a designated method in BibSort, so use it
                 return sort_records_bibsort(req, recIDs, sort_method, sort_field, sort_order, verbose, of, ln, rg, jrec)
         #we do not have this sort_field in BibSort tables -> do the old fashion sorting
         return sort_records_bibxxx(req, recIDs, tags, sort_field, sort_order, sort_pattern, verbose, of, ln, rg, jrec)
 
     return recIDs[index_min:]
 
 
 def sort_records_bibsort(req, recIDs, sort_method, sort_field='', sort_order='d', verbose=0, of='hb', ln=CFG_SITE_LANG, rg=None, jrec=None, sort_or_rank = 's'):
     """This function orders the recIDs list, based on a sorting method(sort_field) using the BibSortDataCacher for speed"""
 
     _ = gettext_set_language(ln)
 
     #sanity check
     if sort_method not in sorting_methods:
         if sort_or_rank == 'r':
             return rank_records_bibrank(sort_method, 0, recIDs, None, verbose)
         else:
             return sort_records_bibxxx(req, recIDs, None, sort_field, sort_order, '', verbose, of, ln, rg, jrec)
     if verbose >= 3 and of.startswith('h'):
         write_warning("Sorting (using BibSort cache) by method %s (definition %s)." \
                       % (cgi.escape(repr(sort_method)), cgi.escape(repr(sorting_methods[sort_method]))), req=req)
     #we should return sorted records up to irec_max(exclusive)
     dummy, irec_max = get_interval_for_records_to_sort(len(recIDs), jrec, rg)
     solution = intbitset([])
     input_recids = intbitset(recIDs)
     cache_sorted_data[sort_method].recreate_cache_if_needed()
     sort_cache = cache_sorted_data[sort_method].cache
     bucket_numbers = sort_cache['bucket_data'].keys()
     #check if all buckets have been constructed
     if len(bucket_numbers) != CFG_BIBSORT_BUCKETS:
         if verbose > 3 and of.startswith('h'):
             write_warning("Not all buckets have been constructed.. switching to old fashion sorting.", req=req)
         if sort_or_rank == 'r':
             return rank_records_bibrank(sort_method, 0, recIDs, None, verbose)
         else:
             return sort_records_bibxxx(req, recIDs, None, sort_field, sort_order, '', verbose, of, ln, rg, jrec)
     if sort_order == 'd':
         bucket_numbers.reverse()
     for bucket_no in bucket_numbers:
         solution.union_update(input_recids & sort_cache['bucket_data'][bucket_no])
         if len(solution) >= irec_max:
             break
     dict_solution = {}
     missing_records = []
     for recid in solution:
         try:
             dict_solution[recid] = sort_cache['data_dict_ordered'][recid]
         except KeyError:
             #recid is in buckets, but not in the bsrMETHODDATA,
             #maybe because the value has been deleted, but the change has not yet been propagated to the buckets
             missing_records.append(recid)
     #check if there are recids that are not in any bucket -> to be added at the end/top, ordered by insertion date
     if len(solution) < irec_max:
         #some records have not been yet inserted in the bibsort structures
         #or, some records have no value for the sort_method
         missing_records = sorted(missing_records + list(input_recids.difference(solution)))
     #the records need to be sorted in reverse order for the print record function
     #the return statement should be equivalent with the following statements
     #(these are clearer, but less efficient, since they revert the same list twice)
     #sorted_solution = (missing_records + sorted(dict_solution, key=dict_solution.__getitem__, reverse=sort_order=='d'))[:irec_max]
     #sorted_solution.reverse()
     #return sorted_solution
     if sort_method.strip().lower().startswith('latest') and sort_order == 'd':
         # if we want to sort the records on their insertion date, add the mission records at the top
         solution = sorted(dict_solution, key=dict_solution.__getitem__, reverse=sort_order=='a') + missing_records
     else:
         solution = missing_records + sorted(dict_solution, key=dict_solution.__getitem__, reverse=sort_order=='a')
     #calculate the min index on the reverted list
     index_min = max(len(solution) - irec_max, 0) #just to be sure that the min index is not negative
     #return all the records up to irec_max, but on the reverted list
     if sort_or_rank == 'r':
         # we need the recids, with values
         return (solution[index_min:], [dict_solution.get(record, 0) for record in solution[index_min:]])
     else:
         return solution[index_min:]
 
 
 def sort_records_bibxxx(req, recIDs, tags, sort_field='', sort_order='d', sort_pattern='', verbose=0, of='hb', ln=CFG_SITE_LANG, rg=None, jrec=None):
     """OLD FASHION SORTING WITH NO CACHE, for sort fields that are not run in BibSort
        Sort records in 'recIDs' list according sort field 'sort_field' in order 'sort_order'.
        If more than one instance of 'sort_field' is found for a given record, try to choose that that is given by
        'sort pattern', for example "sort by report number that starts by CERN-PS".
        Note that 'sort_field' can be field code like 'author' or MARC tag like '100__a' directly."""
 
     _ = gettext_set_language(ln)
 
     #we should return sorted records up to irec_max(exclusive)
     dummy, irec_max = get_interval_for_records_to_sort(len(recIDs), jrec, rg)
     #calculate the min index on the reverted list
     index_min = max(len(recIDs) - irec_max, 0) #just to be sure that the min index is not negative
 
     ## check arguments:
     if not sort_field:
         return recIDs[index_min:]
     if len(recIDs) > CFG_WEBSEARCH_NB_RECORDS_TO_SORT:
         if of.startswith('h'):
             write_warning(_("Sorry, sorting is allowed on sets of up to %d records only. Using default sort order.") % CFG_WEBSEARCH_NB_RECORDS_TO_SORT, "Warning", req=req)
         return recIDs[index_min:]
     recIDs_dict = {}
     recIDs_out = []
 
     if not tags:
         # tags have not been camputed yet
         sort_fields = string.split(sort_field, ",")
         tags, error_field = get_tags_from_sort_fields(sort_fields)
         if error_field:
             if of.startswith('h'):
                 write_warning(_("Sorry, %s does not seem to be a valid sort option. The records will not be sorted.") % cgi.escape(error_field), "Error", req=req)
             return recIDs[index_min:]
     if verbose >= 3 and of.startswith('h'):
         write_warning("Sorting by tags %s." % cgi.escape(repr(tags)), req=req)
         if sort_pattern:
             write_warning("Sorting preferentially by %s." % cgi.escape(sort_pattern), req=req)
      ## check if we have sorting tag defined:
     if tags:
         # fetch the necessary field values:
         for recID in recIDs:
             val = "" # will hold value for recID according to which sort
             vals = [] # will hold all values found in sorting tag for recID
             for tag in tags:
                 if CFG_CERN_SITE and tag == '773__c':
                     # CERN hack: journal sorting
                     # 773__c contains page numbers, e.g. 3-13, and we want to sort by 3, and numerically:
                     vals.extend(["%050s" % x.split("-", 1)[0] for x in get_fieldvalues(recID, tag)])
                 else:
                     vals.extend(get_fieldvalues(recID, tag))
             if sort_pattern:
                 # try to pick that tag value that corresponds to sort pattern
                 bingo = 0
                 for v in vals:
                     if v.lower().startswith(sort_pattern.lower()): # bingo!
                         bingo = 1
                         val = v
                         break
                 if not bingo: # sort_pattern not present, so add other vals after spaces
                     val = sort_pattern + "          " + string.join(vals)
             else:
                 # no sort pattern defined, so join them all together
                 val = string.join(vals)
             val = strip_accents(val.lower()) # sort values regardless of accents and case
             if recIDs_dict.has_key(val):
                 recIDs_dict[val].append(recID)
             else:
                 recIDs_dict[val] = [recID]
         # sort them:
         recIDs_dict_keys = recIDs_dict.keys()
         recIDs_dict_keys.sort()
         # now that keys are sorted, create output array:
         for k in recIDs_dict_keys:
             for s in recIDs_dict[k]:
                 recIDs_out.append(s)
         # ascending or descending?
         if sort_order == 'a':
             recIDs_out.reverse()
         # okay, we are done
         # return only up to the maximum that we need to sort
         if len(recIDs_out) != len(recIDs):
             dummy, irec_max = get_interval_for_records_to_sort(len(recIDs_out), jrec, rg)
             index_min = max(len(recIDs_out) - irec_max, 0) #just to be sure that the min index is not negative
         return recIDs_out[index_min:]
     else:
         # good, no sort needed
         return recIDs[index_min:]
 
 def get_interval_for_records_to_sort(nb_found, jrec=None, rg=None):
     """calculates in which interval should the sorted records be
     a value of 'rg=-9999' means to print all records: to be used with care."""
 
     if not jrec:
         jrec = 1
 
     if not rg:
         #return all
         return jrec-1, nb_found
 
     if rg == -9999: # print all records
         rg = nb_found
     else:
         rg = abs(rg)
     if jrec < 1: # sanity checks
         jrec = 1
     if jrec > nb_found:
         jrec = max(nb_found-rg+1, 1)
 
     # will sort records from irec_min to irec_max excluded
     irec_min = jrec - 1
     irec_max = irec_min + rg
     if irec_min < 0:
         irec_min = 0
     if irec_max > nb_found:
         irec_max = nb_found
 
     return irec_min, irec_max
 
 def print_records(req, recIDs, jrec=1, rg=CFG_WEBSEARCH_DEF_RECORDS_IN_GROUPS, format='hb', ot='', ln=CFG_SITE_LANG,
                   relevances=[], relevances_prologue="(", relevances_epilogue="%%)",
                   decompress=zlib.decompress, search_pattern='', print_records_prologue_p=True,
                   print_records_epilogue_p=True, verbose=0, tab='', sf='', so='d', sp='',
                   rm='', em=''):
 
     """
     Prints list of records 'recIDs' formatted according to 'format' in
     groups of 'rg' starting from 'jrec'.
 
     Assumes that the input list 'recIDs' is sorted in reverse order,
     so it counts records from tail to head.
 
     A value of 'rg=-9999' means to print all records: to be used with care.
 
     Print also list of RELEVANCES for each record (if defined), in
     between RELEVANCE_PROLOGUE and RELEVANCE_EPILOGUE.
 
     Print prologue and/or epilogue specific to 'format' if
     'print_records_prologue_p' and/or print_records_epilogue_p' are
     True.
 
     'sf' is sort field and 'rm' is ranking method that are passed here
     only for proper linking purposes: e.g. when a certain ranking
     method or a certain sort field was selected, keep it selected in
     any dynamic search links that may be printed.
     """
 
     if em != "" and EM_REPOSITORY["body"] not in em:
         return
     # load the right message language
     _ = gettext_set_language(ln)
 
     # sanity checking:
     if req is None:
         return
 
     # get user_info (for formatting based on user)
     if isinstance(req, cStringIO.OutputType):
         user_info = {}
     else:
         user_info = collect_user_info(req)
 
     if len(recIDs):
         nb_found = len(recIDs)
 
         if rg == -9999: # print all records
             rg = nb_found
         else:
             rg = abs(rg)
         if jrec < 1: # sanity checks
             jrec = 1
         if jrec > nb_found:
             jrec = max(nb_found-rg+1, 1)
 
         # will print records from irec_max to irec_min excluded:
         irec_max = nb_found - jrec
         irec_min = nb_found - jrec - rg
         if irec_min < 0:
             irec_min = -1
         if irec_max >= nb_found:
             irec_max = nb_found - 1
 
         #req.write("%s:%d-%d" % (recIDs, irec_min, irec_max))
 
         if format.startswith('x'):
 
             # print header if needed
             if print_records_prologue_p:
                 print_records_prologue(req, format)
 
             # print records
             recIDs_to_print = [recIDs[x] for x in range(irec_max, irec_min, -1)]
 
             if ot:
                 # asked to print some filtered fields only, so call print_record() on the fly:
                 for irec in range(irec_max, irec_min, -1):
                     x = print_record(recIDs[irec], format, ot, ln, search_pattern=search_pattern,
                                     user_info=user_info, verbose=verbose, sf=sf, so=so, sp=sp, rm=rm)
                     req.write(x)
                     if x:
                         req.write('\n')
             else:
                 format_records(recIDs_to_print,
                                format,
                                ln=ln,
                                search_pattern=search_pattern,
                                record_separator="\n",
                                user_info=user_info,
                                req=req)
 
             # print footer if needed
             if print_records_epilogue_p:
                 print_records_epilogue(req, format)
 
         elif format.startswith('t') or str(format[0:3]).isdigit():
             # we are doing plain text output:
             for irec in range(irec_max, irec_min, -1):
                 x = print_record(recIDs[irec], format, ot, ln, search_pattern=search_pattern,
                                  user_info=user_info, verbose=verbose, sf=sf, so=so, sp=sp, rm=rm)
                 req.write(x)
                 if x:
                     req.write('\n')
         elif format == 'excel':
             recIDs_to_print = [recIDs[x] for x in range(irec_max, irec_min, -1)]
             create_excel(recIDs=recIDs_to_print, req=req, ln=ln, ot=ot, user_info=user_info)
         else:
             # we are doing HTML output:
             if format == 'hp' or format.startswith("hb_") or format.startswith("hd_"):
                 # portfolio and on-the-fly formats:
                 for irec in range(irec_max, irec_min, -1):
                     req.write(print_record(recIDs[irec], format, ot, ln, search_pattern=search_pattern,
                                            user_info=user_info, verbose=verbose, sf=sf, so=so, sp=sp, rm=rm))
             elif format.startswith("hb"):
                 # HTML brief format:
                 display_add_to_basket = True
                 if user_info:
                     if user_info['email'] == 'guest':
                         if CFG_ACCESS_CONTROL_LEVEL_ACCOUNTS > 4:
                             display_add_to_basket = False
                     else:
                         if not user_info['precached_usebaskets']:
                             display_add_to_basket = False
                 if em != "" and EM_REPOSITORY["basket"] not in em:
                     display_add_to_basket = False
                 req.write(websearch_templates.tmpl_record_format_htmlbrief_header(
                     ln = ln))
                 for irec in range(irec_max, irec_min, -1):
                     row_number = jrec+irec_max-irec
                     recid = recIDs[irec]
                     if relevances and relevances[irec]:
                         relevance = relevances[irec]
                     else:
                         relevance = ''
                     record = print_record(recIDs[irec], format, ot, ln, search_pattern=search_pattern,
                                                   user_info=user_info, verbose=verbose, sf=sf, so=so, sp=sp, rm=rm)
 
                     req.write(websearch_templates.tmpl_record_format_htmlbrief_body(
                         ln = ln,
                         recid = recid,
                         row_number = row_number,
                         relevance = relevance,
                         record = record,
                         relevances_prologue = relevances_prologue,
                         relevances_epilogue = relevances_epilogue,
                         display_add_to_basket = display_add_to_basket
                         ))
 
                 req.write(websearch_templates.tmpl_record_format_htmlbrief_footer(
                     ln = ln,
                     display_add_to_basket = display_add_to_basket))
 
             elif format.startswith("hd"):
                 # HTML detailed format:
                 for irec in range(irec_max, irec_min, -1):
                     if record_exists(recIDs[irec]) == -1:
                         write_warning(_("The record has been deleted."), req=req)
                         merged_recid = get_merged_recid(recIDs[irec])
                         if merged_recid:
                             write_warning(_("The record %d replaces it." % merged_recid), req=req)
                         continue
                     unordered_tabs = get_detailed_page_tabs(get_colID(guess_primary_collection_of_a_record(recIDs[irec])),
                                                             recIDs[irec], ln=ln)
                     ordered_tabs_id = [(tab_id, values['order']) for (tab_id, values) in unordered_tabs.iteritems()]
                     ordered_tabs_id.sort(lambda x, y: cmp(x[1], y[1]))
 
                     link_ln = ''
 
                     if ln != CFG_SITE_LANG:
                         link_ln = '?ln=%s' % ln
 
                     recid = recIDs[irec]
                     recid_to_display = recid  # Record ID used to build the URL.
                     if CFG_WEBSEARCH_USE_ALEPH_SYSNOS:
                         try:
                             recid_to_display = get_fieldvalues(recid,
                                     CFG_BIBUPLOAD_EXTERNAL_SYSNO_TAG)[0]
                         except IndexError:
                             # No external sysno is available, keep using
                             # internal recid.
                             pass
 
                     tabs = [(unordered_tabs[tab_id]['label'], \
                              '%s/%s/%s/%s%s' % (CFG_SITE_URL, CFG_SITE_RECORD, recid_to_display, tab_id, link_ln), \
                              tab_id == tab,
                              unordered_tabs[tab_id]['enabled']) \
                             for (tab_id, order) in ordered_tabs_id
                             if unordered_tabs[tab_id]['visible'] == True]
 
                     tabs_counts = get_detailed_page_tabs_counts(recid)
                     citedbynum = tabs_counts['Citations']
                     references = tabs_counts['References']
                     discussions = tabs_counts['Discussions']
 
                     # load content
                     if tab == 'usage':
                         req.write(webstyle_templates.detailed_record_container_top(recIDs[irec],
                                                      tabs,
                                                      ln,
                                                      citationnum=citedbynum,
                                                      referencenum=references,
                                                      discussionnum=discussions))
                         r = calculate_reading_similarity_list(recIDs[irec], "downloads")
                         downloadsimilarity = None
                         downloadhistory = None
                         #if r:
                         #    downloadsimilarity = r
                         if CFG_BIBRANK_SHOW_DOWNLOAD_GRAPHS:
                             downloadhistory = create_download_history_graph_and_box(recIDs[irec], ln)
 
                         r = calculate_reading_similarity_list(recIDs[irec], "pageviews")
                         viewsimilarity = None
                         if r: viewsimilarity = r
                         content = websearch_templates.tmpl_detailed_record_statistics(recIDs[irec],
                                                                                       ln,
                                                                                       downloadsimilarity=downloadsimilarity,
                                                                                       downloadhistory=downloadhistory,
                                                                                       viewsimilarity=viewsimilarity)
                         req.write(content)
                         req.write(webstyle_templates.detailed_record_container_bottom(recIDs[irec],
                                                                                       tabs,
                                                                                       ln))
                     elif tab == 'citations':
                         recid = recIDs[irec]
                         req.write(webstyle_templates.detailed_record_container_top(recid,
                                                      tabs,
                                                      ln,
                                                      citationnum=citedbynum,
                                                      referencenum=references,
                                                      discussionnum=discussions))
                         req.write(websearch_templates.tmpl_detailed_record_citations_prologue(recid, ln))
 
                         # Citing
                         citinglist = calculate_cited_by_list(recid)
                         req.write(websearch_templates.tmpl_detailed_record_citations_citing_list(recid,
                                                                                                  ln,
                                                                                                  citinglist,
                                                                                                  sf=sf,
                                                                                                  so=so,
                                                                                                  sp=sp,
                                                                                                  rm=rm))
                         # Self-cited
                         selfcited = get_self_cited_by(recid)
                         req.write(websearch_templates.tmpl_detailed_record_citations_self_cited(recid,
                                   ln, selfcited=selfcited, citinglist=citinglist))
                         # Co-cited
                         s = calculate_co_cited_with_list(recid)
                         cociting = None
                         if s:
                             cociting = s
                         req.write(websearch_templates.tmpl_detailed_record_citations_co_citing(recid,
                                                                                                ln,
                                                                                                cociting=cociting))
                         # Citation history, if needed
                         citationhistory = None
                         if citinglist:
                             citationhistory = create_citation_history_graph_and_box(recid, ln)
                         #debug
                         if verbose > 3:
-                            write_warning("Citation graph debug: " + \
+                            write_warning("Citation graph debug: " +
                                           str(len(citationhistory)), req=req)
-                        req.write(websearch_templates.tmpl_detailed_record_citations_citation_history(recid, ln, citationhistory))
+
+                        req.write(websearch_templates.tmpl_detailed_record_citations_citation_history(ln, citationhistory))
+
+                        # Citation log
+                        entries = get_citers_log(recid)
+                        req.write(websearch_templates.tmpl_detailed_record_citations_citation_log(ln, entries))
+
+
                         req.write(websearch_templates.tmpl_detailed_record_citations_epilogue(recid, ln))
                         req.write(webstyle_templates.detailed_record_container_bottom(recid,
                                                                                       tabs,
                                                                                       ln))
                     elif tab == 'references':
                         req.write(webstyle_templates.detailed_record_container_top(recIDs[irec],
                                                      tabs,
                                                      ln,
                                                      citationnum=citedbynum,
                                                      referencenum=references,
                                                      discussionnum=discussions))
 
                         req.write(format_record(recIDs[irec], 'HDREF', ln=ln, user_info=user_info, verbose=verbose, force_2nd_pass=True))
                         req.write(webstyle_templates.detailed_record_container_bottom(recIDs[irec],
                                                                                       tabs,
                                                                                       ln))
                     elif tab == 'keywords':
                         from invenio.bibclassify_webinterface import \
                             record_get_keywords, write_keywords_body, \
                             generate_keywords
                         from invenio.webinterface_handler import wash_urlargd
                         form = req.form
                         argd = wash_urlargd(form, {
                             'generate': (str, 'no'),
                             'sort': (str, 'occurrences'),
                             'type': (str, 'tagcloud'),
                             'numbering': (str, 'off'),
                             })
                         recid = recIDs[irec]
 
                         req.write(webstyle_templates.detailed_record_container_top(recid,
                                                                                    tabs,
                                                                                    ln))
                         content = websearch_templates.tmpl_record_plots(recID=recid,
                                                                          ln=ln)
                         req.write(content)
                         req.write(webstyle_templates.detailed_record_container_bottom(recid,
                                                                                       tabs,
                                                                                       ln))
 
                         req.write(webstyle_templates.detailed_record_container_top(recid,
                             tabs, ln, citationnum=citedbynum, referencenum=references))
 
                         if argd['generate'] == 'yes':
                             # The user asked to generate the keywords.
                             keywords = generate_keywords(req, recid, argd)
                         else:
                             # Get the keywords contained in the MARC.
                             keywords = record_get_keywords(recid, argd)
 
                         if argd['sort'] == 'related' and not keywords:
                             req.write('You may want to run BibIndex.')
 
                         # Output the keywords or the generate button.
                         write_keywords_body(keywords, req, recid, argd)
 
                         req.write(webstyle_templates.detailed_record_container_bottom(recid,
                             tabs, ln))
                     elif tab == 'plots':
                         req.write(webstyle_templates.detailed_record_container_top(recIDs[irec],
                                                                                    tabs,
                                                                                    ln))
                         content = websearch_templates.tmpl_record_plots(recID=recIDs[irec],
                                                                          ln=ln)
                         req.write(content)
                         req.write(webstyle_templates.detailed_record_container_bottom(recIDs[irec],
                                                                                       tabs,
                                                                                       ln))
 
                     elif tab == 'hepdata':
                         req.write(webstyle_templates.detailed_record_container_top(recIDs[irec],
                                                                                    tabs,
                                                                                    ln, include_jquery = True,
                                                                                    include_mathjax = True))
                         from invenio import hepdatautils
                         from invenio import hepdatadisplayutils
                         data = hepdatautils.retrieve_data_for_record(recIDs[irec])
                         heplink = hepdatadisplayutils.get_hepdata_link(recIDs[irec])
 
                         if data:
                             content = websearch_templates.tmpl_record_hepdata(data, recIDs[irec], True)
                         else:
                             content = websearch_templates.tmpl_record_no_hepdata()
 
                         req.write(content)
                         req.write(webstyle_templates.detailed_record_container_bottom(recIDs[irec],
                                                                                       tabs,
                                                                                       ln))
                     else:
                         # Metadata tab
                         req.write(webstyle_templates.detailed_record_container_top(recIDs[irec],
                                                      tabs,
                                                      ln,
                                                      show_short_rec_p=False,
                                                      citationnum=citedbynum, referencenum=references,
                                                      discussionnum=discussions))
 
                         creationdate = None
                         modificationdate = None
                         if record_exists(recIDs[irec]) == 1:
                             creationdate = get_creation_date(recIDs[irec])
                             modificationdate = get_modification_date(recIDs[irec])
 
                         content = print_record(recIDs[irec], format, ot, ln,
                                                search_pattern=search_pattern,
                                                user_info=user_info, verbose=verbose,
                                                sf=sf, so=so, sp=sp, rm=rm)
                         content = websearch_templates.tmpl_detailed_record_metadata(
                             recID = recIDs[irec],
                             ln = ln,
                             format = format,
                             creationdate = creationdate,
                             modificationdate = modificationdate,
                             content = content)
                         # display of the next-hit/previous-hit/back-to-search links
                         # on the detailed record pages
                         content += websearch_templates.tmpl_display_back_to_search(req,
                                                                                    recIDs[irec],
                                                                                    ln)
                         req.write(content)
                         req.write(webstyle_templates.detailed_record_container_bottom(recIDs[irec],
                                                                                       tabs,
                                                                                       ln,
                                                                                       creationdate=creationdate,
                                                                                       modificationdate=modificationdate,
                                                                                       show_short_rec_p=False))
 
                         if len(tabs) > 0:
                             # Add the mini box at bottom of the page
                             if CFG_WEBCOMMENT_ALLOW_REVIEWS:
                                 from invenio.webcomment import get_mini_reviews
                                 reviews = get_mini_reviews(recid = recIDs[irec], ln=ln)
                             else:
                                 reviews = ''
                             actions = format_record(recIDs[irec], 'HDACT', ln=ln, user_info=user_info, verbose=verbose)
                             files = format_record(recIDs[irec], 'HDFILE', ln=ln, user_info=user_info, verbose=verbose)
                             req.write(webstyle_templates.detailed_record_mini_panel(recIDs[irec],
                                                                                     ln,
                                                                                     format,
                                                                                     files=files,
                                                                                     reviews=reviews,
                                                                                     actions=actions))
             else:
                 # Other formats
                 for irec in range(irec_max, irec_min, -1):
                     req.write(print_record(recIDs[irec], format, ot, ln,
                                            search_pattern=search_pattern,
                                            user_info=user_info, verbose=verbose,
                                            sf=sf, so=so, sp=sp, rm=rm))
     else:
         write_warning(_("Use different search terms."), req=req)
 
 def print_records_prologue(req, format, cc=None):
     """
     Print the appropriate prologue for list of records in the given
     format.
     """
     prologue = "" # no prologue needed for HTML or Text formats
     if format.startswith('xm'):
         prologue = websearch_templates.tmpl_xml_marc_prologue()
     elif format.startswith('xn'):
         prologue = websearch_templates.tmpl_xml_nlm_prologue()
     elif format.startswith('xw'):
         prologue = websearch_templates.tmpl_xml_refworks_prologue()
     elif format.startswith('xr'):
         prologue = websearch_templates.tmpl_xml_rss_prologue(cc=cc)
     elif format.startswith('xe8x'):
         prologue = websearch_templates.tmpl_xml_endnote_8x_prologue()
     elif format.startswith('xe'):
         prologue = websearch_templates.tmpl_xml_endnote_prologue()
     elif format.startswith('xo'):
         prologue = websearch_templates.tmpl_xml_mods_prologue()
     elif format.startswith('xp'):
         prologue = websearch_templates.tmpl_xml_podcast_prologue(cc=cc)
     elif format.startswith('x'):
         prologue = websearch_templates.tmpl_xml_default_prologue()
     req.write(prologue)
 
 def print_records_epilogue(req, format):
     """
     Print the appropriate epilogue for list of records in the given
     format.
     """
     epilogue = "" # no epilogue needed for HTML or Text formats
     if format.startswith('xm'):
         epilogue = websearch_templates.tmpl_xml_marc_epilogue()
     elif format.startswith('xn'):
         epilogue = websearch_templates.tmpl_xml_nlm_epilogue()
     elif format.startswith('xw'):
         epilogue = websearch_templates.tmpl_xml_refworks_epilogue()
     elif format.startswith('xr'):
         epilogue = websearch_templates.tmpl_xml_rss_epilogue()
     elif format.startswith('xe8x'):
         epilogue = websearch_templates.tmpl_xml_endnote_8x_epilogue()
     elif format.startswith('xe'):
         epilogue = websearch_templates.tmpl_xml_endnote_epilogue()
     elif format.startswith('xo'):
         epilogue = websearch_templates.tmpl_xml_mods_epilogue()
     elif format.startswith('xp'):
         epilogue = websearch_templates.tmpl_xml_podcast_epilogue()
     elif format.startswith('x'):
         epilogue = websearch_templates.tmpl_xml_default_epilogue()
     req.write(epilogue)
 
 def get_record(recid):
     """Directly the record object corresponding to the recid."""
     if CFG_BIBUPLOAD_SERIALIZE_RECORD_STRUCTURE:
         value = run_sql("SELECT value FROM bibfmt WHERE id_bibrec=%s AND FORMAT='recstruct'",  (recid, ))
         if value:
             try:
                 return deserialize_via_marshal(value[0][0])
             except:
                 ### In case of corruption, let's rebuild it!
                 pass
     return create_record(print_record(recid, 'xm'))[0]
 
 def print_record(recID, format='hb', ot='', ln=CFG_SITE_LANG, decompress=zlib.decompress,
                  search_pattern=None, user_info=None, verbose=0, sf='', so='d', sp='', rm=''):
     """
     Prints record 'recID' formatted according to 'format'.
 
     'sf' is sort field and 'rm' is ranking method that are passed here
     only for proper linking purposes: e.g. when a certain ranking
     method or a certain sort field was selected, keep it selected in
     any dynamic search links that may be printed.
     """
     if format == 'recstruct':
         return get_record(recID)
 
     _ = gettext_set_language(ln)
 
     display_claim_this_paper = False
 
     try:
         display_claim_this_paper = user_info["precached_viewclaimlink"]
     except (KeyError, TypeError):
         display_claim_this_paper = False
     #check from user information if the user has the right to see hidden fields/tags in the
     #records as well
     can_see_hidden = False
     if user_info:
         can_see_hidden = user_info.get('precached_canseehiddenmarctags', False)
 
     out = ""
 
     # sanity check:
     record_exist_p = record_exists(recID)
     if record_exist_p == 0: # doesn't exist
         return out
 
     # New Python BibFormat procedure for formatting
     # Old procedure follows further below
     # We must still check some special formats, but these
     # should disappear when BibFormat improves.
     if not (CFG_BIBFORMAT_USE_OLD_BIBFORMAT \
             or format.lower().startswith('t') \
             or format.lower().startswith('hm') \
             or str(format[0:3]).isdigit() \
             or ot):
 
         # Unspecified format is hd
         if format == '':
             format = 'hd'
 
         if record_exist_p == -1 and get_output_format_content_type(format) == 'text/html':
             # HTML output displays a default value for deleted records.
             # Other format have to deal with it.
             out += _("The record has been deleted.")
             # was record deleted-but-merged ?
             merged_recid = get_merged_recid(recID)
             if merged_recid:
                 out += ' ' + _("The record %d replaces it." % merged_recid)
         else:
             out += call_bibformat(recID, format, ln, search_pattern=search_pattern,
                                   user_info=user_info, verbose=verbose)
 
             # at the end of HTML brief mode, print the "Detailed record" functionality:
             if format.lower().startswith('hb') and \
                    format.lower() != 'hb_p':
                 out += websearch_templates.tmpl_print_record_brief_links(ln=ln,
                                                                          recID=recID,
                                                                          sf=sf,
                                                                          so=so,
                                                                          sp=sp,
                                                                          rm=rm,
                                                                          display_claim_link=display_claim_this_paper)
         return out
 
     # Old PHP BibFormat procedure for formatting
     # print record opening tags, if needed:
     if format == "marcxml" or format == "oai_dc":
         out += "  <record>\n"
         out += "   <header>\n"
         for oai_id in get_fieldvalues(recID, CFG_OAI_ID_FIELD):
             out += "    <identifier>%s</identifier>\n" % oai_id
         out += "    <datestamp>%s</datestamp>\n" % get_modification_date(recID)
         out += "   </header>\n"
         out += "   <metadata>\n"
 
     if format.startswith("xm") or format == "marcxml":
         # look for detailed format existence:
         query = "SELECT value FROM bibfmt WHERE id_bibrec=%s AND format=%s"
         res = run_sql(query, (recID, format), 1)
         if res and record_exist_p == 1 and not ot:
             # record 'recID' is formatted in 'format', and we are not
             # asking for field-filtered output; so print it:
             out += "%s" % decompress(res[0][0])
         elif ot:
             # field-filtered output was asked for; print only some fields
             if not can_see_hidden:
                 ot = list(set(ot) - set(CFG_BIBFORMAT_HIDDEN_TAGS))
             out += record_xml_output(get_record(recID), ot)
         else:
             # record 'recID' is not formatted in 'format' or we ask
             # for field-filtered output -- they are not in "bibfmt"
             # table; so fetch all the data from "bibXXx" tables:
             if format == "marcxml":
                 out += """    <record xmlns="http://www.loc.gov/MARC21/slim">\n"""
                 out += "        <controlfield tag=\"001\">%d</controlfield>\n" % int(recID)
             elif format.startswith("xm"):
                 out += """    <record>\n"""
                 out += "        <controlfield tag=\"001\">%d</controlfield>\n" % int(recID)
             if record_exist_p == -1:
                 # deleted record, so display only OAI ID and 980:
                 oai_ids = get_fieldvalues(recID, CFG_OAI_ID_FIELD)
                 if oai_ids:
                     out += "<datafield tag=\"%s\" ind1=\"%s\" ind2=\"%s\"><subfield code=\"%s\">%s</subfield></datafield>\n" % \
                            (CFG_OAI_ID_FIELD[0:3], CFG_OAI_ID_FIELD[3:4], CFG_OAI_ID_FIELD[4:5], CFG_OAI_ID_FIELD[5:6], oai_ids[0])
                 out += "<datafield tag=\"980\" ind1=\"\" ind2=\"\"><subfield code=\"c\">DELETED</subfield></datafield>\n"
             else:
                 # controlfields
                 query = "SELECT b.tag,b.value,bb.field_number FROM bib00x AS b, bibrec_bib00x AS bb "\
                         "WHERE bb.id_bibrec=%s AND b.id=bb.id_bibxxx AND b.tag LIKE '00%%' "\
                         "ORDER BY bb.field_number, b.tag ASC"
                 res = run_sql(query, (recID, ))
                 for row in res:
                     field, value = row[0], row[1]
                     value = encode_for_xml(value)
                     out += """        <controlfield tag="%s">%s</controlfield>\n""" % \
                            (encode_for_xml(field[0:3]), value)
                 # datafields
                 i = 1 # Do not process bib00x and bibrec_bib00x, as
                       # they are controlfields. So start at bib01x and
                       # bibrec_bib00x (and set i = 0 at the end of
                       # first loop)
                 for digit1 in range(0, 10):
                     for digit2 in range(i, 10):
                         bx = "bib%d%dx" % (digit1, digit2)
                         bibx = "bibrec_bib%d%dx" % (digit1, digit2)
                         query = "SELECT b.tag,b.value,bb.field_number FROM %s AS b, %s AS bb "\
                                 "WHERE bb.id_bibrec=%%s AND b.id=bb.id_bibxxx AND b.tag LIKE %%s"\
                                 "ORDER BY bb.field_number, b.tag ASC" % (bx, bibx)
                         res = run_sql(query, (recID, str(digit1)+str(digit2)+'%'))
                         field_number_old = -999
                         field_old = ""
                         for row in res:
                             field, value, field_number = row[0], row[1], row[2]
                             ind1, ind2 = field[3], field[4]
                             if ind1 == "_" or ind1 == "":
                                 ind1 = " "
                             if ind2 == "_" or ind2 == "":
                                 ind2 = " "
                             # print field tag, unless hidden
                             printme = True
                             if not can_see_hidden:
                                 for htag in CFG_BIBFORMAT_HIDDEN_TAGS:
                                     ltag = len(htag)
                                     samelenfield = field[0:ltag]
                                     if samelenfield == htag:
                                         printme = False
 
                             if printme:
                                 if field_number != field_number_old or field[:-1] != field_old[:-1]:
                                     if field_number_old != -999:
                                         out += """        </datafield>\n"""
                                     out += """        <datafield tag="%s" ind1="%s" ind2="%s">\n""" % \
                                                (encode_for_xml(field[0:3]), encode_for_xml(ind1), encode_for_xml(ind2))
                                     field_number_old = field_number
                                     field_old = field
                                 # print subfield value
                                 value = encode_for_xml(value)
                                 out += """            <subfield code="%s">%s</subfield>\n""" % \
                                    (encode_for_xml(field[-1:]), value)
 
                         # all fields/subfields printed in this run, so close the tag:
                         if field_number_old != -999:
                             out += """        </datafield>\n"""
                     i = 0 # Next loop should start looking at bib%0 and bibrec_bib00x
             # we are at the end of printing the record:
             out += "    </record>\n"
 
     elif format == "xd" or format == "oai_dc":
         # XML Dublin Core format, possibly OAI -- select only some bibXXx fields:
         out += """    <dc xmlns="http://purl.org/dc/elements/1.1/"
                          xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
                          xsi:schemaLocation="http://purl.org/dc/elements/1.1/
                                              http://www.openarchives.org/OAI/1.1/dc.xsd">\n"""
         if record_exist_p == -1:
             out += ""
         else:
             for f in get_fieldvalues(recID, "041__a"):
                 out += "        <language>%s</language>\n" % f
 
             for f in get_fieldvalues(recID, "100__a"):
                 out += "        <creator>%s</creator>\n" % encode_for_xml(f)
 
             for f in get_fieldvalues(recID, "700__a"):
                 out += "        <creator>%s</creator>\n" % encode_for_xml(f)
 
             for f in get_fieldvalues(recID, "245__a"):
                 out += "        <title>%s</title>\n" % encode_for_xml(f)
 
             for f in get_fieldvalues(recID, "65017a"):
                 out += "        <subject>%s</subject>\n" % encode_for_xml(f)
 
             for f in get_fieldvalues(recID, "8564_u"):
                 if f.split('.') == 'png':
                     continue
                 out += "        <identifier>%s</identifier>\n" % encode_for_xml(f)
 
             for f in get_fieldvalues(recID, "520__a"):
                 out += "        <description>%s</description>\n" % encode_for_xml(f)
 
             out += "        <date>%s</date>\n" % get_creation_date(recID)
         out += "    </dc>\n"
 
     elif len(format) == 6 and str(format[0:3]).isdigit():
         # user has asked to print some fields only
         if format == "001":
             out += "<!--%s-begin-->%s<!--%s-end-->\n" % (format, recID, format)
         else:
             vals = get_fieldvalues(recID, format)
             for val in vals:
                 out += "<!--%s-begin-->%s<!--%s-end-->\n" % (format, val, format)
 
     elif format.startswith('t'):
         ## user directly asked for some tags to be displayed only
         if record_exist_p == -1:
             out += get_fieldvalues_alephseq_like(recID, ["001", CFG_OAI_ID_FIELD, "980"], can_see_hidden)
         else:
             out += get_fieldvalues_alephseq_like(recID, ot, can_see_hidden)
 
     elif format == "hm":
         if record_exist_p == -1:
             out += "\n<pre style=\"margin: 1em 0px;\">" + cgi.escape(get_fieldvalues_alephseq_like(recID, ["001", CFG_OAI_ID_FIELD, "980"], can_see_hidden)) + "</pre>"
         else:
             out += "\n<pre style=\"margin: 1em 0px;\">" + cgi.escape(get_fieldvalues_alephseq_like(recID, ot, can_see_hidden)) + "</pre>"
 
     elif format.startswith("h") and ot:
         ## user directly asked for some tags to be displayed only
         if record_exist_p == -1:
             out += "\n<pre>" + get_fieldvalues_alephseq_like(recID, ["001", CFG_OAI_ID_FIELD, "980"], can_see_hidden) + "</pre>"
         else:
             out += "\n<pre>" + get_fieldvalues_alephseq_like(recID, ot, can_see_hidden) + "</pre>"
 
     elif format == "hd":
         # HTML detailed format
         if record_exist_p == -1:
             out += _("The record has been deleted.")
         else:
             # look for detailed format existence:
             query = "SELECT value FROM bibfmt WHERE id_bibrec=%s AND format=%s"
             res = run_sql(query, (recID, format), 1)
             if res:
                 # record 'recID' is formatted in 'format', so print it
                 out += "%s" % decompress(res[0][0])
             else:
                 # record 'recID' is not formatted in 'format', so try to call BibFormat on the fly or use default format:
                 out_record_in_format = call_bibformat(recID, format, ln, search_pattern=search_pattern,
                                                       user_info=user_info, verbose=verbose)
                 if out_record_in_format:
                     out += out_record_in_format
                 else:
                     out += websearch_templates.tmpl_print_record_detailed(
                              ln = ln,
                              recID = recID,
                            )
 
     elif format.startswith("hb_") or format.startswith("hd_"):
         # underscore means that HTML brief/detailed formats should be called on-the-fly; suitable for testing formats
         if record_exist_p == -1:
             out += _("The record has been deleted.")
         else:
             out += call_bibformat(recID, format, ln, search_pattern=search_pattern,
                                   user_info=user_info, verbose=verbose)
 
     elif format.startswith("hx"):
         # BibTeX format, called on the fly:
         if record_exist_p == -1:
             out += _("The record has been deleted.")
         else:
             out += call_bibformat(recID, format, ln, search_pattern=search_pattern,
                                   user_info=user_info, verbose=verbose)
 
     elif format.startswith("hs"):
         # for citation/download similarity navigation links:
         if record_exist_p == -1:
             out += _("The record has been deleted.")
         else:
             out += '<a href="%s">' % websearch_templates.build_search_url(recid=recID, ln=ln)
             # firstly, title:
             titles = get_fieldvalues(recID, "245__a")
             if titles:
                 for title in titles:
                     out += "<strong>%s</strong>" % title
             else:
                 # usual title not found, try conference title:
                 titles = get_fieldvalues(recID, "111__a")
                 if titles:
                     for title in titles:
                         out += "<strong>%s</strong>" % title
                 else:
                     # just print record ID:
                     out += "<strong>%s %d</strong>" % (get_field_i18nname("record ID", ln, False), recID)
             out += "</a>"
             # secondly, authors:
             authors = get_fieldvalues(recID, "100__a") + get_fieldvalues(recID, "700__a")
             if authors:
                 out += " - %s" % authors[0]
                 if len(authors) > 1:
                     out += " <em>et al</em>"
             # thirdly publication info:
             publinfos = get_fieldvalues(recID, "773__s")
             if not publinfos:
                 publinfos = get_fieldvalues(recID, "909C4s")
                 if not publinfos:
                     publinfos = get_fieldvalues(recID, "037__a")
                     if not publinfos:
                         publinfos = get_fieldvalues(recID, "088__a")
             if publinfos:
                 out += " - %s" % publinfos[0]
             else:
                 # fourthly publication year (if not publication info):
                 years = get_fieldvalues(recID, "773__y")
                 if not years:
                     years = get_fieldvalues(recID, "909C4y")
                     if not years:
                         years = get_fieldvalues(recID, "260__c")
                 if years:
                     out += " (%s)" % years[0]
     else:
         # HTML brief format by default
         if record_exist_p == -1:
             out += _("The record has been deleted.")
         else:
             query = "SELECT value FROM bibfmt WHERE id_bibrec=%s AND format=%s"
             res = run_sql(query, (recID, format))
             if res:
                 # record 'recID' is formatted in 'format', so print it
                 out += "%s" % decompress(res[0][0])
             else:
                 # record 'recID' is not formatted in 'format', so try to call BibFormat on the fly: or use default format:
                 if CFG_WEBSEARCH_CALL_BIBFORMAT:
                     out_record_in_format = call_bibformat(recID, format, ln, search_pattern=search_pattern,
                                                           user_info=user_info, verbose=verbose)
                     if out_record_in_format:
                         out += out_record_in_format
                     else:
                         out += websearch_templates.tmpl_print_record_brief(
                                  ln = ln,
                                  recID = recID,
                                )
                 else:
                     out += websearch_templates.tmpl_print_record_brief(
                              ln = ln,
                              recID = recID,
                            )
 
             # at the end of HTML brief mode, print the "Detailed record" functionality:
             if format == 'hp' or format.startswith("hb_") or format.startswith("hd_"):
                 pass # do nothing for portfolio and on-the-fly formats
             else:
                 out += websearch_templates.tmpl_print_record_brief_links(ln=ln,
                                                                          recID=recID,
                                                                          sf=sf,
                                                                          so=so,
                                                                          sp=sp,
                                                                          rm=rm,
                                                                          display_claim_link=display_claim_this_paper)
 
     # print record closing tags, if needed:
     if format == "marcxml" or format == "oai_dc":
         out += "   </metadata>\n"
         out += "  </record>\n"
 
     return out
 
 def call_bibformat(recID, format="HD", ln=CFG_SITE_LANG, search_pattern=None, user_info=None, verbose=0):
     """
     Calls BibFormat and returns formatted record.
 
     BibFormat will decide by itself if old or new BibFormat must be used.
     """
 
     from invenio.bibformat_utils import get_pdf_snippets
 
     keywords = []
     if search_pattern is not None:
         for unit in create_basic_search_units(None, str(search_pattern), None):
             bsu_o, bsu_p, bsu_f, bsu_m = unit[0], unit[1], unit[2], unit[3]
             if (bsu_o != '-' and bsu_f in [None, 'fulltext']):
                 if bsu_m == 'a' and bsu_p.startswith('%') and bsu_p.endswith('%'):
                     # remove leading and training `%' representing partial phrase search
                     keywords.append(bsu_p[1:-1])
                 else:
                     keywords.append(bsu_p)
 
     out = format_record(recID,
                          of=format,
                          ln=ln,
                          search_pattern=keywords,
                          user_info=user_info,
                          verbose=verbose)
 
     if CFG_WEBSEARCH_FULLTEXT_SNIPPETS and user_info and \
            'fulltext' in user_info['uri'].lower():
         # check snippets only if URL contains fulltext
         # FIXME: make it work for CLI too, via new function arg
         if keywords:
             snippets = ''
             try:
                 snippets = get_pdf_snippets(recID, keywords, user_info)
             except:
                 register_exception()
             if snippets:
                 out += snippets
 
     return out
 
 def log_query(hostname, query_args, uid=-1):
     """
     Log query into the query and user_query tables.
     Return id_query or None in case of problems.
     """
     id_query = None
     if uid >= 0:
         # log the query only if uid is reasonable
         res = run_sql("SELECT id FROM query WHERE urlargs=%s", (query_args,), 1)
         try:
             id_query = res[0][0]
         except:
             id_query = run_sql("INSERT INTO query (type, urlargs) VALUES ('r', %s)", (query_args,))
         if id_query:
             run_sql("INSERT INTO user_query (id_user, id_query, hostname, date) VALUES (%s, %s, %s, %s)",
                     (uid, id_query, hostname,
                      time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())))
     return id_query
 
 def log_query_info(action, p, f, colls, nb_records_found_total=-1):
     """Write some info to the log file for later analysis."""
     try:
         log = open(CFG_LOGDIR + "/search.log", "a")
         log.write(time.strftime("%Y%m%d%H%M%S#", time.localtime()))
         log.write(action+"#")
         log.write(p+"#")
         log.write(f+"#")
         for coll in colls[:-1]:
             log.write("%s," % coll)
         log.write("%s#" % colls[-1])
         log.write("%d" % nb_records_found_total)
         log.write("\n")
         log.close()
     except:
         pass
     return
 
 def clean_dictionary(dictionary, list_of_items):
     """Returns a copy of the dictionary with all the items
        in the list_of_items as empty strings"""
     out_dictionary = dictionary.copy()
     out_dictionary.update((item, '') for item in list_of_items)
     return out_dictionary
 
 
 ### CALLABLES
 
 def perform_request_search(req=None, cc=CFG_SITE_NAME, c=None, p="", f="", rg=CFG_WEBSEARCH_DEF_RECORDS_IN_GROUPS, sf="", so="d", sp="", rm="", of="id", ot="", aas=0,
                         p1="", f1="", m1="", op1="", p2="", f2="", m2="", op2="", p3="", f3="", m3="", sc=0, jrec=0,
                         recid=-1, recidb=-1, sysno="", id=-1, idb=-1, sysnb="", action="", d1="",
                         d1y=0, d1m=0, d1d=0, d2="", d2y=0, d2m=0, d2d=0, dt="", verbose=0, ap=0, ln=CFG_SITE_LANG, ec=None, tab="",
                         wl=0, em=""):
     """Perform search or browse request, without checking for
        authentication.  Return list of recIDs found, if of=id.
        Otherwise create web page.
 
        The arguments are as follows:
 
          req - mod_python Request class instance.
 
           cc - current collection (e.g. "ATLAS").  The collection the
                user started to search/browse from.
 
            c - collection list (e.g. ["Theses", "Books"]).  The
                collections user may have selected/deselected when
                starting to search from 'cc'.
 
            p - pattern to search for (e.g. "ellis and muon or kaon").
 
            f - field to search within (e.g. "author").
 
           rg - records in groups of (e.g. "10").  Defines how many hits
                per collection in the search results page are
                displayed.  (Note that `rg' is ignored in case of `of=id'.)
 
           sf - sort field (e.g. "title").
 
           so - sort order ("a"=ascending, "d"=descending).
 
           sp - sort pattern (e.g. "CERN-") -- in case there are more
                values in a sort field, this argument tells which one
                to prefer
 
           rm - ranking method (e.g. "jif").  Defines whether results
                should be ranked by some known ranking method.
 
           of - output format (e.g. "hb").  Usually starting "h" means
                HTML output (and "hb" for HTML brief, "hd" for HTML
                detailed), "x" means XML output, "t" means plain text
                output, "id" means no output at all but to return list
                of recIDs found, "intbitset" means to return an intbitset
                representation of the recIDs found (no sorting or ranking
                will be performed).  (Suitable for high-level API.)
 
           ot - output only these MARC tags (e.g. "100,700,909C0b").
                Useful if only some fields are to be shown in the
                output, e.g. for library to control some fields.
 
           em - output only part of the page.
 
          aas - advanced search ("0" means no, "1" means yes).  Whether
                search was called from within the advanced search
                interface.
 
           p1 - first pattern to search for in the advanced search
                interface.  Much like 'p'.
 
           f1 - first field to search within in the advanced search
                interface.  Much like 'f'.
 
           m1 - first matching type in the advanced search interface.
                ("a" all of the words, "o" any of the words, "e" exact
                phrase, "p" partial phrase, "r" regular expression).
 
          op1 - first operator, to join the first and the second unit
                in the advanced search interface.  ("a" add, "o" or,
                "n" not).
 
           p2 - second pattern to search for in the advanced search
                interface.  Much like 'p'.
 
           f2 - second field to search within in the advanced search
                interface.  Much like 'f'.
 
           m2 - second matching type in the advanced search interface.
                ("a" all of the words, "o" any of the words, "e" exact
                phrase, "p" partial phrase, "r" regular expression).
 
          op2 - second operator, to join the second and the third unit
                in the advanced search interface.  ("a" add, "o" or,
                "n" not).
 
           p3 - third pattern to search for in the advanced search
                interface.  Much like 'p'.
 
           f3 - third field to search within in the advanced search
                interface.  Much like 'f'.
 
           m3 - third matching type in the advanced search interface.
                ("a" all of the words, "o" any of the words, "e" exact
                phrase, "p" partial phrase, "r" regular expression).
 
           sc - split by collection ("0" no, "1" yes).  Governs whether
                we want to present the results in a single huge list,
                or splitted by collection.
 
         jrec - jump to record (e.g. "234").  Used for navigation
                inside the search results.  (Note that `jrec' is ignored
                in case of `of=id'.)
 
        recid - display record ID (e.g. "20000").  Do not
                search/browse but go straight away to the Detailed
                record page for the given recID.
 
       recidb - display record ID bis (e.g. "20010").  If greater than
                'recid', then display records from recid to recidb.
                Useful for example for dumping records from the
                database for reformatting.
 
        sysno - display old system SYS number (e.g. "").  If you
                migrate to Invenio from another system, and store your
                old SYS call numbers, you can use them instead of recid
                if you wish so.
 
           id - the same as recid, in case recid is not set.  For
                backwards compatibility.
 
          idb - the same as recid, in case recidb is not set.  For
                backwards compatibility.
 
        sysnb - the same as sysno, in case sysno is not set.  For
                backwards compatibility.
 
       action - action to do.  "SEARCH" for searching, "Browse" for
                browsing.  Default is to search.
 
           d1 - first datetime in full YYYY-mm-dd HH:MM:DD format
                (e.g. "1998-08-23 12:34:56"). Useful for search limits
                on creation/modification date (see 'dt' argument
                below).  Note that 'd1' takes precedence over d1y, d1m,
                d1d if these are defined.
 
          d1y - first date's year (e.g. "1998").  Useful for search
                limits on creation/modification date.
 
          d1m - first date's month (e.g. "08").  Useful for search
                limits on creation/modification date.
 
          d1d - first date's day (e.g. "23").  Useful for search
                limits on creation/modification date.
 
           d2 - second datetime in full YYYY-mm-dd HH:MM:DD format
                (e.g. "1998-09-02 12:34:56"). Useful for search limits
                on creation/modification date (see 'dt' argument
                below).  Note that 'd2' takes precedence over d2y, d2m,
                d2d if these are defined.
 
          d2y - second date's year (e.g. "1998").  Useful for search
                limits on creation/modification date.
 
          d2m - second date's month (e.g. "09").  Useful for search
                limits on creation/modification date.
 
          d2d - second date's day (e.g. "02").  Useful for search
                limits on creation/modification date.
 
           dt - first and second date's type (e.g. "c").  Specifies
                whether to search in creation dates ("c") or in
                modification dates ("m").  When dt is not set and d1*
                and d2* are set, the default is "c".
 
      verbose - verbose level (0=min, 9=max).  Useful to print some
                internal information on the searching process in case
                something goes wrong.
 
           ap - alternative patterns (0=no, 1=yes).  In case no exact
                match is found, the search engine can try alternative
                patterns e.g. to replace non-alphanumeric characters by
                a boolean query.  ap defines if this is wanted.
 
           ln - language of the search interface (e.g. "en").  Useful
                for internationalization.
 
           ec - list of external search engines to search as well
                (e.g. "SPIRES HEP").
 
           wl - wildcard limit (ex: 100) the wildcard queries will be
                limited at 100 results
     """
     kwargs = prs_wash_arguments(req=req, cc=cc, c=c, p=p, f=f, rg=rg, sf=sf, so=so, sp=sp, rm=rm, of=of, ot=ot, aas=aas,
                                 p1=p1, f1=f1, m1=m1, op1=op1, p2=p2, f2=f2, m2=m2, op2=op2, p3=p3, f3=f3, m3=m3, sc=sc, jrec=jrec,
                                 recid=recid, recidb=recidb, sysno=sysno, id=id, idb=idb, sysnb=sysnb, action=action, d1=d1,
                                 d1y=d1y, d1m=d1m, d1d=d1d, d2=d2, d2y=d2y, d2m=d2m, d2d=d2d, dt=dt, verbose=verbose, ap=ap, ln=ln, ec=ec,
                                 tab=tab, wl=wl, em=em)
 
     return prs_perform_search(kwargs=kwargs, **kwargs)
 
 
 def prs_perform_search(kwargs=None, **dummy):
     """Internal call which does the search, it is calling standard Invenio;
     Unless you know what you are doing, don't use this call as an API
     """
     # separately because we can call it independently
     out = prs_wash_arguments_colls(kwargs=kwargs, **kwargs)
     if not out:
         return out
     return prs_search(kwargs=kwargs, **kwargs)
 
 
 def prs_wash_arguments_colls(kwargs=None, of=None, req=None, cc=None, c=None, sc=None, verbose=None,
                           aas=None, ln=None, em="", **dummy):
     """
     Check and wash collection list argument before we start searching.
     If there are troubles, e.g. a collection is not defined, print
     warning to the browser.
 
     @return: True if collection list is OK, and various False values
         (empty string, empty list) if there was an error.
     """
 
     # raise an exception when trying to print out html from the cli
     if of.startswith("h"):
         assert req
 
     # for every search engine request asking for an HTML output, we
     # first regenerate cache of collection and field I18N names if
     # needed; so that later we won't bother checking timestamps for
     # I18N names at all:
     if of.startswith("h"):
         collection_i18nname_cache.recreate_cache_if_needed()
         field_i18nname_cache.recreate_cache_if_needed()
 
     try:
         (cc, colls_to_display, colls_to_search, hosted_colls, wash_colls_debug) = wash_colls(cc, c, sc, verbose) # which colls to search and to display?
         kwargs['colls_to_display'] = colls_to_display
         kwargs['colls_to_search'] = colls_to_search
         kwargs['hosted_colls'] = hosted_colls
         kwargs['wash_colls_debug'] = wash_colls_debug
     except InvenioWebSearchUnknownCollectionError, exc:
         colname = exc.colname
         if of.startswith("h"):
             page_start(req, of, cc, aas, ln, getUid(req),
                        websearch_templates.tmpl_collection_not_found_page_title(colname, ln))
             req.write(websearch_templates.tmpl_collection_not_found_page_body(colname, ln))
             page_end(req, of, ln, em)
             return ''
         elif of == "id":
             return []
         elif of == "intbitset":
             return intbitset()
         elif of.startswith("x"):
             # Print empty, but valid XML
             print_records_prologue(req, of)
             print_records_epilogue(req, of)
             page_end(req, of, ln, em)
             return ''
         else:
             page_end(req, of, ln, em)
             return ''
     return True
 
 
 def prs_wash_arguments(req=None, cc=CFG_SITE_NAME, c=None, p="", f="", rg=CFG_WEBSEARCH_DEF_RECORDS_IN_GROUPS,
                       sf="", so="d", sp="", rm="", of="id", ot="", aas=0,
                       p1="", f1="", m1="", op1="", p2="", f2="", m2="", op2="", p3="", f3="", m3="",
                       sc=0, jrec=0, recid=-1, recidb=-1, sysno="", id=-1, idb=-1, sysnb="", action="", d1="",
                       d1y=0, d1m=0, d1d=0, d2="", d2y=0, d2m=0, d2d=0, dt="", verbose=0, ap=0, ln=CFG_SITE_LANG,
                       ec=None, tab="", uid=None, wl=0, em="", **dummy):
     """
     Sets the (default) values and checks others for the PRS call
     """
 
     # wash output format:
     of = wash_output_format(of)
 
     # wash all arguments requiring special care
     p = wash_pattern(p)
     f = wash_field(f)
     p1 = wash_pattern(p1)
     f1 = wash_field(f1)
     p2 = wash_pattern(p2)
     f2 = wash_field(f2)
     p3 = wash_pattern(p3)
     f3 = wash_field(f3)
     (d1y, d1m, d1d, d2y, d2m, d2d) = map(int, (d1y, d1m, d1d, d2y, d2m, d2d))
     datetext1, datetext2 = wash_dates(d1, d1y, d1m, d1d, d2, d2y, d2m, d2d)
 
     # wash ranking method:
     if not is_method_valid(None, rm):
         rm = ""
 
     # backwards compatibility: id, idb, sysnb -> recid, recidb, sysno (if applicable)
     if sysnb != "" and sysno == "":
         sysno = sysnb
     if id > 0 and recid == -1:
         recid = id
     if idb > 0 and recidb == -1:
         recidb = idb
     # TODO deduce passed search limiting criterias (if applicable)
     pl, pl_in_url = "", "" # no limits by default
     if action != "browse" and req and not isinstance(req, cStringIO.OutputType) \
            and req.args: # we do not want to add options while browsing or while calling via command-line
         fieldargs = cgi.parse_qs(req.args)
         for fieldcode in get_fieldcodes():
             if fieldargs.has_key(fieldcode):
                 for val in fieldargs[fieldcode]:
                     pl += "+%s:\"%s\" " % (fieldcode, val)
                     pl_in_url += "&amp;%s=%s" % (urllib.quote(fieldcode), urllib.quote(val))
     # deduce recid from sysno argument (if applicable):
     if sysno: # ALEPH SYS number was passed, so deduce DB recID for the record:
         recid = get_mysql_recid_from_aleph_sysno(sysno)
         if recid is None:
             recid = 0 # use recid 0 to indicate that this sysno does not exist
     # deduce collection we are in (if applicable):
     if recid > 0:
         referer = None
         if req:
             referer = req.headers_in.get('Referer')
         cc = guess_collection_of_a_record(recid, referer)
     # deduce user id (if applicable):
     if uid is None:
         try:
             uid = getUid(req)
         except:
             uid = 0
 
     _ = gettext_set_language(ln)
 
     kwargs = {'req':req,'cc':cc, 'c':c, 'p':p, 'f':f, 'rg':rg, 'sf':sf, 'so':so, 'sp':sp, 'rm':rm, 'of':of, 'ot':ot, 'aas':aas,
               'p1':p1, 'f1':f1, 'm1':m1, 'op1':op1, 'p2':p2, 'f2':f2, 'm2':m2, 'op2':op2, 'p3':p3, 'f3':f3, 'm3':m3, 'sc':sc, 'jrec':jrec,
               'recid':recid, 'recidb':recidb, 'sysno':sysno, 'id':id, 'idb':idb, 'sysnb':sysnb, 'action':action, 'd1':d1,
               'd1y':d1y, 'd1m':d1m, 'd1d':d1d, 'd2':d2, 'd2y':d2y, 'd2m':d2m, 'd2d':d2d, 'dt':dt, 'verbose':verbose, 'ap':ap, 'ln':ln, 'ec':ec,
               'tab':tab, 'wl':wl, 'em': em,
               'datetext1': datetext1, 'datetext2': datetext2, 'uid': uid, 'cc':cc, 'pl': pl, 'pl_in_url': pl_in_url, '_': _,
               'selected_external_collections_infos':None,
             }
 
     kwargs.update(**dummy)
     return kwargs
 
 
 def prs_search(kwargs=None, recid=0, req=None, cc=None, p=None, p1=None, p2=None, p3=None,
               f=None, ec=None, verbose=None, ln=None, selected_external_collections_infos=None,
               action=None,rm=None, of=None, em=None,
               **dummy):
     """
     This function write various bits into the req object as the search
     proceeds (so that pieces of a page are rendered even before the
     search ended)
     """
 
     ## 0 - start output
     if recid >= 0: # recid can be 0 if deduced from sysno and if such sysno does not exist
         output = prs_detailed_record(kwargs=kwargs, **kwargs)
         if output is not None:
             return output
 
     elif action == "browse":
         ## 2 - browse needed
         of = 'hb'
         output = prs_browse(kwargs=kwargs, **kwargs)
         if output is not None:
             return output
 
     elif rm and p.startswith("recid:"):
         ## 3-ter - similarity search (or old-style citation search) needed
         output = prs_search_similar_records(kwargs=kwargs, **kwargs)
         if output is not None:
             return output
 
     elif p.startswith("cocitedwith:"):  #WAS EXPERIMENTAL
         ## 3-terter - cited by search needed
         output = prs_search_cocitedwith(kwargs=kwargs, **kwargs)
         if output is not None:
             return output
 
     else:
         ## 3 - common search needed
         output = prs_search_common(kwargs=kwargs, **kwargs)
         if output is not None:
             return output
 
     # External searches
     if of.startswith("h"):
         if not of in ['hcs', 'hcs2']:
             perform_external_collection_search_with_em(req, cc, [p, p1, p2, p3], f, ec, verbose,
                                                        ln, selected_external_collections_infos, em=em)
     return page_end(req, of, ln, em)
 
 
 def prs_detailed_record(kwargs=None, req=None, of=None, cc=None, aas=None, ln=None, uid=None, recid=None, recidb=None,
                       p=None, verbose=None, tab=None, sf=None, so=None, sp=None, rm=None, ot=None, _=None, em=None,
                       **dummy):
     """Formats and prints one record"""
 
     ## 1 - detailed record display
     title, description, keywords = \
            websearch_templates.tmpl_record_page_header_content(req, recid, ln)
 
     if req is not None and not req.header_only:
         page_start(req, of, cc, aas, ln, uid, title, description, keywords, recid, tab, em)
 
     # Default format is hb but we are in detailed -> change 'of'
     if of == "hb":
         of = "hd"
     if record_exists(recid):
         if recidb <= recid: # sanity check
             recidb = recid + 1
         if of in ["id", "intbitset"]:
             result = [recidx for recidx in range(recid, recidb) if record_exists(recidx)]
             if of == "intbitset":
                 return intbitset(result)
             else:
                 return result
         else:
             print_records(req, range(recid, recidb), -1, -9999, of, ot, ln, search_pattern=p, verbose=verbose,
                           tab=tab, sf=sf, so=so, sp=sp, rm=rm, em=em)
         if req and of.startswith("h"): # register detailed record page view event
             client_ip_address = str(req.remote_ip)
             register_page_view_event(recid, uid, client_ip_address)
     else: # record does not exist
         if of == "id":
             return []
         elif of == "intbitset":
             return intbitset()
         elif of.startswith("x"):
             # Print empty, but valid XML
             print_records_prologue(req, of)
             print_records_epilogue(req, of)
         elif of.startswith("h"):
             if req.header_only:
                 raise apache.SERVER_RETURN, apache.HTTP_NOT_FOUND
             else:
                 write_warning(_("Requested record does not seem to exist."), req=req)
 
 
 def prs_browse(kwargs=None, req=None, of=None, cc=None, aas=None, ln=None, uid=None, _=None, p=None,
               p1=None, p2=None, p3=None, colls_to_display=None, f=None, rg=None, sf=None,
               so=None, sp=None, rm=None, ot=None, f1=None, m1=None, op1=None,
               f2=None, m2=None, op2=None, f3=None, m3=None, sc=None, pl=None,
               d1y=None, d1m=None, d1d=None, d2y=None, d2m=None, d2d=None,
               dt=None, jrec=None, ec=None, action=None,
               colls_to_search=None, verbose=None, em=None, **dummy):
     page_start(req, of, cc, aas, ln, uid, _("Browse"), p=create_page_title_search_pattern_info(p, p1, p2, p3), em=em)
     req.write(create_search_box(cc, colls_to_display, p, f, rg, sf, so, sp, rm, of, ot, aas, ln, p1, f1, m1, op1,
                                 p2, f2, m2, op2, p3, f3, m3, sc, pl, d1y, d1m, d1d, d2y, d2m, d2d, dt, jrec, ec, action,
                                 em
                                 ))
     write_warning(create_exact_author_browse_help_link(p, p1, p2, p3, f, f1, f2, f3,
                                                 rm, cc, ln, jrec, rg, aas, action),
                                                 req=req)
     try:
         if aas == 1 or (p1 or p2 or p3):
             browse_pattern(req, colls_to_search, p1, f1, rg, ln)
             browse_pattern(req, colls_to_search, p2, f2, rg, ln)
             browse_pattern(req, colls_to_search, p3, f3, rg, ln)
         else:
             browse_pattern(req, colls_to_search, p, f, rg, ln)
     except:
         register_exception(req=req, alert_admin=True)
         if of.startswith("h"):
             req.write(create_error_box(req, verbose=verbose, ln=ln))
         elif of.startswith("x"):
             # Print empty, but valid XML
             print_records_prologue(req, of)
             print_records_epilogue(req, of)
         return page_end(req, of, ln, em)
 
 
 def prs_search_similar_records(kwargs=None, req=None, of=None, cc=None, pl_in_url=None, ln=None, uid=None, _=None, p=None,
                     p1=None, p2=None, p3=None, colls_to_display=None, f=None, rg=None, sf=None,
                     so=None, sp=None, rm=None, ot=None, aas=None, f1=None, m1=None, op1=None,
                     f2=None, m2=None, op2=None, f3=None, m3=None, sc=None, pl=None,
                     d1y=None, d1m=None, d1d=None, d2y=None, d2m=None, d2d=None,
                     dt=None, jrec=None, ec=None, action=None, em=None,
                     verbose=None, **dummy):
     if req and not req.header_only:
         page_start(req, of, cc, aas, ln, uid, _("Search Results"), p=create_page_title_search_pattern_info(p, p1, p2, p3),
                    em=em)
     if of.startswith("h"):
         req.write(create_search_box(cc, colls_to_display, p, f, rg, sf, so, sp, rm, of, ot, aas, ln, p1, f1, m1, op1,
                                     p2, f2, m2, op2, p3, f3, m3, sc, pl, d1y, d1m, d1d, d2y, d2m, d2d, dt, jrec, ec, action,
                                     em
                                     ))
     if record_exists(p[6:]) != 1:
         # record does not exist
         if of.startswith("h"):
             if req.header_only:
                 raise apache.SERVER_RETURN, apache.HTTP_NOT_FOUND
             else:
                 write_warning(_("Requested record does not seem to exist."), req=req)
         if of == "id":
             return []
         if of == "intbitset":
             return intbitset()
         elif of.startswith("x"):
             # Print empty, but valid XML
             print_records_prologue(req, of)
             print_records_epilogue(req, of)
     else:
         # record well exists, so find similar ones to it
         t1 = os.times()[4]
         results_similar_recIDs, results_similar_relevances, results_similar_relevances_prologue, results_similar_relevances_epilogue, results_similar_comments = \
                                 rank_records_bibrank(rm, 0, get_collection_reclist(cc), string.split(p), verbose, f, rg, jrec)
         if results_similar_recIDs:
             t2 = os.times()[4]
             cpu_time = t2 - t1
             if of.startswith("h"):
                 req.write(print_search_info(p, f, sf, so, sp, rm, of, ot, cc, len(results_similar_recIDs),
                                             jrec, rg, aas, ln, p1, p2, p3, f1, f2, f3, m1, m2, m3, op1, op2,
                                             sc, pl_in_url,
                                             d1y, d1m, d1d, d2y, d2m, d2d, dt, cpu_time, em=em))
                 write_warning(results_similar_comments, req=req)
                 print_records(req, results_similar_recIDs, jrec, rg, of, ot, ln,
                               results_similar_relevances, results_similar_relevances_prologue,
                               results_similar_relevances_epilogue,
                               search_pattern=p, verbose=verbose, sf=sf, so=so, sp=sp, rm=rm, em=em)
             elif of == "id":
                 return results_similar_recIDs
             elif of == "intbitset":
                 return intbitset(results_similar_recIDs)
             elif of.startswith("x"):
                 print_records(req, results_similar_recIDs, jrec, rg, of, ot, ln,
                               results_similar_relevances, results_similar_relevances_prologue,
                               results_similar_relevances_epilogue, search_pattern=p, verbose=verbose,
                               sf=sf, so=so, sp=sp, rm=rm, em=em)
             else:
                 # rank_records failed and returned some error message to display:
                 if of.startswith("h"):
                     write_warning(results_similar_relevances_prologue, req=req)
                     write_warning(results_similar_relevances_epilogue, req=req)
                     write_warning(results_similar_comments, req=req)
                 if of == "id":
                     return []
                 elif of == "intbitset":
                     return intbitset()
                 elif of.startswith("x"):
                     # Print empty, but valid XML
                     print_records_prologue(req, of)
                     print_records_epilogue(req, of)
 
 
 def prs_search_cocitedwith(kwargs=None, req=None, of=None, cc=None, pl_in_url=None, ln=None, uid=None, _=None, p=None,
                     p1=None, p2=None, p3=None, colls_to_display=None, f=None, rg=None, sf=None,
                     so=None, sp=None, rm=None, ot=None, aas=None, f1=None, m1=None, op1=None,
                     f2=None, m2=None, op2=None, f3=None, m3=None, sc=None, pl=None,
                     d1y=None, d1m=None, d1d=None, d2y=None, d2m=None, d2d=None,
                     dt=None, jrec=None, ec=None, action=None,
                     verbose=None, em=None, **dummy):
     page_start(req, of, cc, aas, ln, uid, _("Search Results"), p=create_page_title_search_pattern_info(p, p1, p2, p3),
                em=em)
     if of.startswith("h"):
         req.write(create_search_box(cc, colls_to_display, p, f, rg, sf, so, sp, rm, of, ot, aas, ln, p1, f1, m1, op1,
                                     p2, f2, m2, op2, p3, f3, m3, sc, pl, d1y, d1m, d1d, d2y, d2m, d2d, dt, jrec, ec, action,
                                     em
                                     ))
     recID = p[12:]
     if record_exists(recID) != 1:
         # record does not exist
         if of.startswith("h"):
             write_warning(_("Requested record does not seem to exist."), req=req)
         if of == "id":
             return []
         elif of == "intbitset":
             return intbitset()
         elif of.startswith("x"):
             # Print empty, but valid XML
             print_records_prologue(req, of)
             print_records_epilogue(req, of)
     else:
         # record well exists, so find co-cited ones:
         t1 = os.times()[4]
         results_cocited_recIDs = map(lambda x: x[0], calculate_co_cited_with_list(int(recID)))
         if results_cocited_recIDs:
             t2 = os.times()[4]
             cpu_time = t2 - t1
             if of.startswith("h"):
                 req.write(print_search_info(p, f, sf, so, sp, rm, of, ot, CFG_SITE_NAME, len(results_cocited_recIDs),
                                             jrec, rg, aas, ln, p1, p2, p3, f1, f2, f3, m1, m2, m3, op1, op2,
                                             sc, pl_in_url,
                                             d1y, d1m, d1d, d2y, d2m, d2d, dt, cpu_time, em=em))
                 print_records(req, results_cocited_recIDs, jrec, rg, of, ot, ln, search_pattern=p, verbose=verbose,
                               sf=sf, so=so, sp=sp, rm=rm, em=em)
             elif of == "id":
                 return results_cocited_recIDs
             elif of == "intbitset":
                 return intbitset(results_cocited_recIDs)
             elif of.startswith("x"):
                 print_records(req, results_cocited_recIDs, jrec, rg, of, ot, ln, search_pattern=p, verbose=verbose,
                               sf=sf, so=so, sp=sp, rm=rm, em=em)
             else:
                 # cited rank_records failed and returned some error message to display:
                 if of.startswith("h"):
                     write_warning("nothing found", req=req)
                 if of == "id":
                     return []
                 elif of == "intbitset":
                     return intbitset()
                 elif of.startswith("x"):
                     # Print empty, but valid XML
                     print_records_prologue(req, of)
                     print_records_epilogue(req, of)
 
 
 def prs_search_hosted_collections(kwargs=None, req=None, of=None, ln=None, _=None, p=None,
                     p1=None, p2=None, p3=None, hosted_colls=None, f=None,
                     colls_to_search=None, hosted_colls_actual_or_potential_results_p=None,
                     verbose=None, **dummy):
     hosted_colls_results = hosted_colls_timeouts = hosted_colls_true_results = None
 
     # search into the hosted collections only if the output format is html or xml
     if hosted_colls and (of.startswith("h") or of.startswith("x")) and not p.startswith("recid:"):
 
         # hosted_colls_results : the hosted collections' searches that did not timeout
         # hosted_colls_timeouts : the hosted collections' searches that timed out and will be searched later on again
         (hosted_colls_results, hosted_colls_timeouts) = calculate_hosted_collections_results(req, [p, p1, p2, p3], f, hosted_colls, verbose, ln, CFG_HOSTED_COLLECTION_TIMEOUT_ANTE_SEARCH)
 
         # successful searches
         if hosted_colls_results:
             hosted_colls_true_results = []
             for result in hosted_colls_results:
                 # if the number of results is None or 0 (or False) then just do nothing
                     if result[1] == None or result[1] == False:
                         # these are the searches the returned no or zero results
                         if verbose:
                             write_warning("Hosted collections (perform_search_request): %s returned no results" % result[0][1].name, req=req)
                     else:
                         # these are the searches that actually returned results on time
                         hosted_colls_true_results.append(result)
                         if verbose:
                             write_warning("Hosted collections (perform_search_request): %s returned %s results in %s seconds" % (result[0][1].name, result[1], result[2]), req=req)
             else:
                 if verbose:
                     write_warning("Hosted collections (perform_search_request): there were no hosted collections results to be printed at this time", req=req)
             if hosted_colls_timeouts:
                 if verbose:
                     for timeout in hosted_colls_timeouts:
                         write_warning("Hosted collections (perform_search_request): %s timed out and will be searched again later" % timeout[0][1].name, req=req)
         # we need to know for later use if there were any hosted collections to be searched even if they weren't in the end
         elif hosted_colls and ((not (of.startswith("h") or of.startswith("x"))) or p.startswith("recid:")):
             (hosted_colls_results, hosted_colls_timeouts) = (None, None)
         else:
             if verbose:
                 write_warning("Hosted collections (perform_search_request): there were no hosted collections to be searched", req=req)
          ## let's define some useful boolean variables:
         # True means there are actual or potential hosted collections results to be printed
     kwargs['hosted_colls_actual_or_potential_results_p'] = not (not hosted_colls or not ((hosted_colls_results and hosted_colls_true_results) or hosted_colls_timeouts))
 
     # True means there are hosted collections timeouts to take care of later
     # (useful for more accurate printing of results later)
     kwargs['hosted_colls_potential_results_p'] = not (not hosted_colls or not hosted_colls_timeouts)
 
     # True means we only have hosted collections to deal with
     kwargs['only_hosted_colls_actual_or_potential_results_p'] = not colls_to_search and hosted_colls_actual_or_potential_results_p
 
     kwargs['hosted_colls_results'] = hosted_colls_results
     kwargs['hosted_colls_timeouts'] = hosted_colls_timeouts
     kwargs['hosted_colls_true_results'] = hosted_colls_true_results
 
 
 def prs_advanced_search(results_in_any_collection, kwargs=None, req=None, of=None,
                         cc=None, ln=None, _=None, p=None, p1=None, p2=None, p3=None,
                         f=None, f1=None, m1=None, op1=None, f2=None, m2=None,
                         op2=None, f3=None, m3=None, ap=None, ec=None,
                         selected_external_collections_infos=None, verbose=None,
                         wl=None, em=None, **dummy):
     len_results_p1 = 0
     len_results_p2 = 0
     len_results_p3 = 0
     try:
         results_in_any_collection.union_update(search_pattern_parenthesised(req, p1, f1, m1, ap=ap, of=of, verbose=verbose, ln=ln, wl=wl))
         len_results_p1 = len(results_in_any_collection)
         if len_results_p1 == 0:
             if of.startswith("h"):
                 perform_external_collection_search_with_em(req, cc, [p, p1, p2, p3], f, ec,
                                                            verbose, ln, selected_external_collections_infos, em=em)
             elif of.startswith("x"):
                 # Print empty, but valid XML
                 print_records_prologue(req, of)
                 print_records_epilogue(req, of)
             return page_end(req, of, ln, em)
         if p2:
             results_tmp = search_pattern_parenthesised(req, p2, f2, m2, ap=ap, of=of, verbose=verbose, ln=ln, wl=wl)
             len_results_p2 = len(results_tmp)
             if op1 == "a": # add
                 results_in_any_collection.intersection_update(results_tmp)
             elif op1 == "o": # or
                 results_in_any_collection.union_update(results_tmp)
             elif op1 == "n": # not
                 results_in_any_collection.difference_update(results_tmp)
             else:
                 if of.startswith("h"):
                     write_warning("Invalid set operation %s." % cgi.escape(op1), "Error", req=req)
             if len(results_in_any_collection) == 0:
                 if of.startswith("h"):
                     if len_results_p2:
                         #each individual query returned results, but the boolean operation did not
                         nearestterms = []
                         nearest_search_args = req.argd.copy()
                         if p1:
                             nearestterms.append((p1, len_results_p1, clean_dictionary(nearest_search_args, ['p2', 'f2', 'm2', 'p3', 'f3', 'm3'])))
                         nearestterms.append((p2, len_results_p2, clean_dictionary(nearest_search_args, ['p1', 'f1', 'm1', 'p3', 'f3', 'm3'])))
                         write_warning(websearch_templates.tmpl_search_no_boolean_hits(ln=ln, nearestterms=nearestterms), req=req)
                     perform_external_collection_search_with_em(req, cc, [p, p1, p2, p3], f, ec, verbose,
                                                                ln, selected_external_collections_infos, em=em)
                 elif of.startswith("x"):
                     # Print empty, but valid XML
                     print_records_prologue(req, of)
                     print_records_epilogue(req, of)
         if p3:
             results_tmp = search_pattern_parenthesised(req, p3, f3, m3, ap=ap, of=of, verbose=verbose, ln=ln, wl=wl)
             len_results_p3 = len(results_tmp)
             if op2 == "a": # add
                 results_in_any_collection.intersection_update(results_tmp)
             elif op2 == "o": # or
                 results_in_any_collection.union_update(results_tmp)
             elif op2 == "n": # not
                 results_in_any_collection.difference_update(results_tmp)
             else:
                 if of.startswith("h"):
                     write_warning("Invalid set operation %s." % cgi.escape(op2), "Error", req=req)
             if len(results_in_any_collection) == 0 and len_results_p3 and of.startswith("h"):
                 #each individual query returned results but the boolean operation did not
                 nearestterms = []
                 nearest_search_args = req.argd.copy()
                 if p1:
                     nearestterms.append((p1, len_results_p1, clean_dictionary(nearest_search_args, ['p2', 'f2', 'm2', 'p3', 'f3', 'm3'])))
                 if p2:
                     nearestterms.append((p2, len_results_p2, clean_dictionary(nearest_search_args, ['p1', 'f1', 'm1', 'p3', 'f3', 'm3'])))
                 nearestterms.append((p3, len_results_p3, clean_dictionary(nearest_search_args, ['p1', 'f1', 'm1', 'p2', 'f2', 'm2'])))
                 write_warning(websearch_templates.tmpl_search_no_boolean_hits(ln=ln,  nearestterms=nearestterms), req=req)
     except:
         register_exception(req=req, alert_admin=True)
         if of.startswith("h"):
             req.write(create_error_box(req, verbose=verbose, ln=ln))
             perform_external_collection_search_with_em(req, cc, [p, p1, p2, p3], f, ec, verbose,
                                                ln, selected_external_collections_infos, em=em)
         elif of.startswith("x"):
             # Print empty, but valid XML
             print_records_prologue(req, of)
             print_records_epilogue(req, of)
 
         return page_end(req, of, ln, em)
 
 
 def prs_simple_search(results_in_any_collection, kwargs=None, req=None, of=None, cc=None, ln=None, p=None, f=None,
                     p1=None, p2=None, p3=None, ec=None, verbose=None, selected_external_collections_infos=None,
                     only_hosted_colls_actual_or_potential_results_p=None, query_representation_in_cache=None,
                     ap=None, hosted_colls_actual_or_potential_results_p=None, wl=None, em=None,
                     **dummy):
     if search_results_cache.cache.has_key(query_representation_in_cache):
         # query is not in the cache already, so reuse it:
         results_in_any_collection.union_update(search_results_cache.cache[query_representation_in_cache])
         if verbose and of.startswith("h"):
             write_warning("Search stage 0: query found in cache, reusing cached results.", req=req)
     else:
         try:
             # added the display_nearest_terms_box parameter to avoid printing out the "Nearest terms in any collection"
             # recommendations when there are results only in the hosted collections. Also added the if clause to avoid
             # searching in case we know we only have actual or potential hosted collections results
             if not only_hosted_colls_actual_or_potential_results_p:
                 results_in_any_collection.union_update(search_pattern_parenthesised(req, p, f, ap=ap, of=of, verbose=verbose, ln=ln,
                                                                                     display_nearest_terms_box=not hosted_colls_actual_or_potential_results_p,
                                                                                     wl=wl))
         except:
             register_exception(req=req, alert_admin=True)
             if of.startswith("h"):
                 req.write(create_error_box(req, verbose=verbose, ln=ln))
                 perform_external_collection_search_with_em(req, cc, [p, p1, p2, p3], f, ec, verbose,
                                                            ln, selected_external_collections_infos, em=em)
             return page_end(req, of, ln, em)
 
 
 def prs_intersect_results_with_collrecs(results_final, results_in_any_collection, kwargs=None, colls_to_search=None,
                                     req=None, ap=None, of=None, ln=None,
                                     cc=None, p=None, p1=None, p2=None, p3=None, f=None,
                                     ec=None, verbose=None, selected_external_collections_infos=None, em=None,
                                     **dummy):
     display_nearest_terms_box=not kwargs['hosted_colls_actual_or_potential_results_p']
     try:
         # added the display_nearest_terms_box parameter to avoid printing out the "Nearest terms in any collection"
         # recommendations when there results only in the hosted collections. Also added the if clause to avoid
         # searching in case we know since the last stage that we have no results in any collection
         if len(results_in_any_collection) != 0:
             results_final.update(intersect_results_with_collrecs(req, results_in_any_collection, colls_to_search, ap, of,
                                                                  verbose, ln, display_nearest_terms_box=display_nearest_terms_box))
     except:
         register_exception(req=req, alert_admin=True)
         if of.startswith("h"):
             req.write(create_error_box(req, verbose=verbose, ln=ln))
             perform_external_collection_search_with_em(req, cc, [p, p1, p2, p3], f, ec, verbose,
                                                ln, selected_external_collections_infos, em=em)
         return page_end(req, of, ln, em)
 
 
 def prs_store_results_in_cache(query_representation_in_cache, results_in_any_collection, req=None, verbose=None, of=None, **dummy):
     if CFG_WEBSEARCH_SEARCH_CACHE_SIZE and not search_results_cache.cache.has_key(query_representation_in_cache):
         if len(search_results_cache.cache) > CFG_WEBSEARCH_SEARCH_CACHE_SIZE:
             search_results_cache.clear()
         search_results_cache.cache[query_representation_in_cache] = results_in_any_collection
         if verbose and of.startswith("h"):
             write_warning(req, "Search stage 3: storing query results in cache.", req=req)
 
 
 def prs_apply_search_limits(results_final, kwargs=None, req=None, of=None, cc=None, ln=None, _=None,
                             p=None, p1=None, p2=None, p3=None, f=None, pl=None, ap=None, dt=None,
                             ec=None, selected_external_collections_infos=None,
                             hosted_colls_actual_or_potential_results_p=None,
                             datetext1=None, datetext2=None, verbose=None, wl=None, em=None,
                             **dummy):
 
     if datetext1 != "" and results_final != {}:
         if verbose and of.startswith("h"):
             write_warning("Search stage 5: applying time etc limits, from %s until %s..." % (datetext1, datetext2), req=req)
         try:
             results_final = intersect_results_with_hitset(req,
                                                           results_final,
                                                           search_unit_in_bibrec(datetext1, datetext2, dt),
                                                           ap,
                                                           aptext= _("No match within your time limits, "
                                                                     "discarding this condition..."),
                                                           of=of)
         except:
             register_exception(req=req, alert_admin=True)
             if of.startswith("h"):
                 req.write(create_error_box(req, verbose=verbose, ln=ln))
                 perform_external_collection_search_with_em(req, cc, [p, p1, p2, p3], f, ec, verbose,
                                                            ln, selected_external_collections_infos, em=em)
             return page_end(req, of, ln, em)
         if results_final == {} and not hosted_colls_actual_or_potential_results_p:
             if of.startswith("h"):
                 perform_external_collection_search_with_em(req, cc, [p, p1, p2, p3], f, ec, verbose,
                                                    ln, selected_external_collections_infos, em=em)
             #if of.startswith("x"):
             #    # Print empty, but valid XML
             #    print_records_prologue(req, of)
             #    print_records_epilogue(req, of)
             return page_end(req, of, ln, em)
 
     if pl and results_final != {}:
         pl = wash_pattern(pl)
         if verbose and of.startswith("h"):
             write_warning("Search stage 5: applying search pattern limit %s..." % cgi.escape(pl), req=req)
         try:
             results_final = intersect_results_with_hitset(req,
                                                           results_final,
                                                           search_pattern_parenthesised(req, pl, ap=0, ln=ln, wl=wl),
                                                           ap,
                                                           aptext=_("No match within your search limits, "
                                                                    "discarding this condition..."),
                                                           of=of)
         except:
             register_exception(req=req, alert_admin=True)
             if of.startswith("h"):
                 req.write(create_error_box(req, verbose=verbose, ln=ln))
                 perform_external_collection_search_with_em(req, cc, [p, p1, p2, p3], f, ec, verbose,
                                                            ln, selected_external_collections_infos, em=em)
             return page_end(req, of, ln, em)
 
         if results_final == {} and not hosted_colls_actual_or_potential_results_p:
             if of.startswith("h"):
                 perform_external_collection_search_with_em(req, cc, [p, p1, p2, p3], f, ec, verbose,
                                                            ln, selected_external_collections_infos, em=em)
             if of.startswith("x"):
                 # Print empty, but valid XML
                 print_records_prologue(req, of)
                 print_records_epilogue(req, of)
             return page_end(req, of, ln, em)
 
 
 def prs_split_into_collections(kwargs=None, results_final=None, colls_to_search=None, hosted_colls_results=None,
                        cpu_time=0, results_final_nb_total=None, hosted_colls_actual_or_potential_results_p=None,
                        hosted_colls_true_results=None, hosted_colls_timeouts=None, **dummy):
     results_final_nb_total = 0
     results_final_nb = {} # will hold number of records found in each collection
                           # (in simple dict to display overview more easily)
     for coll in results_final.keys():
         results_final_nb[coll] = len(results_final[coll])
         #results_final_nb_total += results_final_nb[coll]
 
     # Now let us calculate results_final_nb_total more precisely,
     # in order to get the total number of "distinct" hits across
     # searched collections; this is useful because a record might
     # have been attributed to more than one primary collection; so
     # we have to avoid counting it multiple times.  The price to
     # pay for this accuracy of results_final_nb_total is somewhat
     # increased CPU time.
     if results_final.keys() == 1:
         # only one collection; no need to union them
         results_final_for_all_selected_colls = results_final.values()[0]
         results_final_nb_total = results_final_nb.values()[0]
     else:
         # okay, some work ahead to union hits across collections:
         results_final_for_all_selected_colls = intbitset()
         for coll in results_final.keys():
             results_final_for_all_selected_colls.union_update(results_final[coll])
         results_final_nb_total = len(results_final_for_all_selected_colls)
 
     #if hosted_colls and (of.startswith("h") or of.startswith("x")):
     if hosted_colls_actual_or_potential_results_p:
         if hosted_colls_results:
             for result in hosted_colls_true_results:
                 colls_to_search.append(result[0][1].name)
                 results_final_nb[result[0][1].name] = result[1]
                 results_final_nb_total += result[1]
                 cpu_time += result[2]
         if hosted_colls_timeouts:
             for timeout in hosted_colls_timeouts:
                 colls_to_search.append(timeout[1].name)
                 # use -963 as a special number to identify the collections that timed out
                 results_final_nb[timeout[1].name] = -963
 
     kwargs['results_final_nb'] = results_final_nb
     kwargs['results_final_nb_total'] = results_final_nb_total
     kwargs['results_final_for_all_selected_colls'] = results_final_for_all_selected_colls
     kwargs['cpu_time'] = cpu_time  #rca TODO: check where the cpu_time is used, this line was missing
     return (results_final_nb, results_final_nb_total, results_final_for_all_selected_colls)
 
 
 def prs_summarize_records(kwargs=None, req=None, p=None, f=None, aas=None,
                        p1=None, p2=None, p3=None, f1=None, f2=None, f3=None, op1=None, op2=None,
                        ln=None, results_final_for_all_selected_colls=None, of='hcs', **dummy):
     # feed the current search to be summarized:
     from invenio.search_engine_summarizer import summarize_records
     search_p = p
     search_f = f
     if not p and (aas == 1 or p1 or p2 or p3):
         op_d = {'n': ' and not ', 'a': ' and ', 'o': ' or ', '': ''}
         triples = ziplist([f1, f2, f3], [p1, p2, p3], [op1, op2, ''])
         triples_len = len(triples)
         for i in range(triples_len):
             fi, pi, oi = triples[i]                       # e.g.:
             if i < triples_len-1 and not triples[i+1][1]: # if p2 empty
                 triples[i+1][0] = ''                      #   f2 must be too
                 oi = ''                                   #   and o1
             if ' ' in pi:
                 pi = '"'+pi+'"'
             if fi:
                 fi = fi + ':'
             search_p += fi + pi + op_d[oi]
         search_f = ''
     summarize_records(results_final_for_all_selected_colls, of, ln, search_p, search_f, req)
 
 
 def prs_print_records(kwargs=None, results_final=None, req=None, of=None, cc=None, pl_in_url=None,
                     ln=None, _=None, p=None, p1=None, p2=None, p3=None, f=None, rg=None, sf=None,
                     so=None, sp=None, rm=None, ot=None, aas=None, f1=None, m1=None, op1=None,
                     f2=None, m2=None, op2=None, f3=None, m3=None, sc=None, d1y=None, d1m=None,
                     d1d=None, d2y=None, d2m=None, d2d=None, dt=None, jrec=None, colls_to_search=None,
                     hosted_colls_actual_or_potential_results_p=None, hosted_colls_results=None,
                     hosted_colls_true_results=None, hosted_colls_timeouts=None, results_final_nb=None,
                     cpu_time=None, verbose=None, em=None, **dummy):
 
     if len(colls_to_search)>1:
         cpu_time = -1 # we do not want to have search time printed on each collection
 
     print_records_prologue(req, of, cc=cc)
     results_final_colls = []
     wlqh_results_overlimit = 0
     for coll in colls_to_search:
         if results_final.has_key(coll) and len(results_final[coll]):
             if of.startswith("h"):
                 req.write(print_search_info(p, f, sf, so, sp, rm, of, ot, coll, results_final_nb[coll],
                                             jrec, rg, aas, ln, p1, p2, p3, f1, f2, f3, m1, m2, m3, op1, op2,
                                             sc, pl_in_url,
                                             d1y, d1m, d1d, d2y, d2m, d2d, dt, cpu_time, em=em))
             results_final_recIDs = list(results_final[coll])
             results_final_relevances = []
             results_final_relevances_prologue = ""
             results_final_relevances_epilogue = ""
             if rm: # do we have to rank?
                 results_final_recIDs_ranked, results_final_relevances, results_final_relevances_prologue, results_final_relevances_epilogue, results_final_comments = \
                                              rank_records(req, rm, 0, results_final[coll],
                                                           string.split(p) + string.split(p1) +
                                                           string.split(p2) + string.split(p3), verbose, so, of, ln, rg, jrec, kwargs['f'])
                 if of.startswith("h"):
                     write_warning(results_final_comments, req=req)
                 if results_final_recIDs_ranked:
                     results_final_recIDs = results_final_recIDs_ranked
                 else:
                     # rank_records failed and returned some error message to display:
                     write_warning(results_final_relevances_prologue, req=req)
                     write_warning(results_final_relevances_epilogue, req=req)
             elif sf or (CFG_BIBSORT_BUCKETS and sorting_methods): # do we have to sort?
                 results_final_recIDs = sort_records(req, results_final_recIDs, sf, so, sp, verbose, of, ln, rg, jrec)
 
             if len(results_final_recIDs) < CFG_WEBSEARCH_PREV_NEXT_HIT_LIMIT:
                 results_final_colls.append(results_final_recIDs)
             else:
                 wlqh_results_overlimit = 1
 
             print_records(req, results_final_recIDs, jrec, rg, of, ot, ln,
                           results_final_relevances,
                           results_final_relevances_prologue,
                           results_final_relevances_epilogue,
                           search_pattern=p,
                           print_records_prologue_p=False,
                           print_records_epilogue_p=False,
                           verbose=verbose,
                           sf=sf,
                           so=so,
                           sp=sp,
                           rm=rm,
                           em=em)
 
             if of.startswith("h"):
                 req.write(print_search_info(p, f, sf, so, sp, rm, of, ot, coll, results_final_nb[coll],
                                             jrec, rg, aas, ln, p1, p2, p3, f1, f2, f3, m1, m2, m3, op1, op2,
                                             sc, pl_in_url,
                                             d1y, d1m, d1d, d2y, d2m, d2d, dt, cpu_time, 1, em=em))
 
     if req and not isinstance(req, cStringIO.OutputType):
         # store the last search results page
         session_param_set(req, 'websearch-last-query', req.unparsed_uri)
         if wlqh_results_overlimit:
             results_final_colls = None
         # store list of results if user wants to display hits
         # in a single list, or store list of collections of records
         # if user displays hits split by collections:
         session_param_set(req, 'websearch-last-query-hits', results_final_colls)
 
     #if hosted_colls and (of.startswith("h") or of.startswith("x")):
     if hosted_colls_actual_or_potential_results_p:
         if hosted_colls_results:
             # TODO: add a verbose message here
             for result in hosted_colls_true_results:
                 if of.startswith("h"):
                     req.write(print_hosted_search_info(p, f, sf, so, sp, rm, of, ot, result[0][1].name, results_final_nb[result[0][1].name],
                                                 jrec, rg, aas, ln, p1, p2, p3, f1, f2, f3, m1, m2, m3, op1, op2,
                                                 sc, pl_in_url,
                                                 d1y, d1m, d1d, d2y, d2m, d2d, dt, cpu_time, em=em))
                 req.write(print_hosted_results(url_and_engine=result[0], ln=ln, of=of, req=req, limit=rg, em=em))
                 if of.startswith("h"):
                     req.write(print_hosted_search_info(p, f, sf, so, sp, rm, of, ot, result[0][1].name, results_final_nb[result[0][1].name],
                                                 jrec, rg, aas, ln, p1, p2, p3, f1, f2, f3, m1, m2, m3, op1, op2,
                                                 sc, pl_in_url,
                                                 d1y, d1m, d1d, d2y, d2m, d2d, dt, cpu_time, 1))
         if hosted_colls_timeouts:
             # TODO: add a verbose message here
             # TODO: check if verbose messages still work when dealing with (re)calculations of timeouts
             (hosted_colls_timeouts_results, hosted_colls_timeouts_timeouts) = do_calculate_hosted_collections_results(req, ln, None, verbose, None, hosted_colls_timeouts, CFG_HOSTED_COLLECTION_TIMEOUT_POST_SEARCH)
             if hosted_colls_timeouts_results:
                 for result in hosted_colls_timeouts_results:
                     if result[1] == None or result[1] == False:
                         ## these are the searches the returned no or zero results
                         ## also print a nearest terms box, in case this is the only
                         ## collection being searched and it returns no results?
                         if of.startswith("h"):
                             req.write(print_hosted_search_info(p, f, sf, so, sp, rm, of, ot, result[0][1].name, -963,
                                                         jrec, rg, aas, ln, p1, p2, p3, f1, f2, f3, m1, m2, m3, op1, op2,
                                                         sc, pl_in_url,
                                                         d1y, d1m, d1d, d2y, d2m, d2d, dt, cpu_time))
                             req.write(print_hosted_results(url_and_engine=result[0], ln=ln, of=of, req=req, no_records_found=True, limit=rg, em=em))
                             req.write(print_hosted_search_info(p, f, sf, so, sp, rm, of, ot, result[0][1].name, -963,
                                                         jrec, rg, aas, ln, p1, p2, p3, f1, f2, f3, m1, m2, m3, op1, op2,
                                                         sc, pl_in_url,
                                                         d1y, d1m, d1d, d2y, d2m, d2d, dt, cpu_time, 1))
                     else:
                         # these are the searches that actually returned results on time
                         if of.startswith("h"):
                             req.write(print_hosted_search_info(p, f, sf, so, sp, rm, of, ot, result[0][1].name, result[1],
                                                         jrec, rg, aas, ln, p1, p2, p3, f1, f2, f3, m1, m2, m3, op1, op2,
                                                         sc, pl_in_url,
                                                         d1y, d1m, d1d, d2y, d2m, d2d, dt, cpu_time))
                         req.write(print_hosted_results(url_and_engine=result[0], ln=ln, of=of, req=req, limit=rg, em=em))
                         if of.startswith("h"):
                             req.write(print_hosted_search_info(p, f, sf, so, sp, rm, of, ot, result[0][1].name, result[1],
                                                         jrec, rg, aas, ln, p1, p2, p3, f1, f2, f3, m1, m2, m3, op1, op2,
                                                         sc, pl_in_url,
                                                         d1y, d1m, d1d, d2y, d2m, d2d, dt, cpu_time, 1))
             if hosted_colls_timeouts_timeouts:
                 for timeout in hosted_colls_timeouts_timeouts:
                     if of.startswith("h"):
                         req.write(print_hosted_search_info(p, f, sf, so, sp, rm, of, ot, timeout[1].name, -963,
                                                     jrec, rg, aas, ln, p1, p2, p3, f1, f2, f3, m1, m2, m3, op1, op2,
                                                     sc, pl_in_url,
                                                     d1y, d1m, d1d, d2y, d2m, d2d, dt, cpu_time))
                         req.write(print_hosted_results(url_and_engine=timeout[0], ln=ln, of=of, req=req, search_timed_out=True, limit=rg, em=em))
                         req.write(print_hosted_search_info(p, f, sf, so, sp, rm, of, ot, timeout[1].name, -963,
                                                     jrec, rg, aas, ln, p1, p2, p3, f1, f2, f3, m1, m2, m3, op1, op2,
                                                     sc, pl_in_url,
                                                     d1y, d1m, d1d, d2y, d2m, d2d, dt, cpu_time, 1))
 
     print_records_epilogue(req, of)
     if f == "author" and of.startswith("h"):
         req.write(create_similarly_named_authors_link_box(p, ln))
 
 
 def prs_log_query(kwargs=None, req=None, uid=None, of=None, ln=None, p=None, f=None,
                colls_to_search=None, results_final_nb_total=None, em=None, **dummy):
     # log query:
     try:
         id_query = log_query(req.remote_host, req.args, uid)
         if of.startswith("h") and id_query and (em == '' or EM_REPOSITORY["alert"] in em):
             if not of in ['hcs', 'hcs2']:
                 # display alert/RSS teaser for non-summary formats:
                 user_info = collect_user_info(req)
                 display_email_alert_part = True
                 if user_info:
                     if user_info['email'] == 'guest':
                         if CFG_ACCESS_CONTROL_LEVEL_ACCOUNTS > 4:
                             display_email_alert_part = False
                     else:
                         if not user_info['precached_usealerts']:
                             display_email_alert_part = False
                 req.write(websearch_templates.tmpl_alert_rss_teaser_box_for_query(id_query, \
                                      ln=ln, display_email_alert_part=display_email_alert_part))
     except:
         # do not log query if req is None (used by CLI interface)
         pass
     log_query_info("ss", p, f, colls_to_search, results_final_nb_total)
 
 try:
     loaded_websearch_services is not None
 except Exception:
     loaded_websearch_services = get_search_services()
 
 def prs_search_common(kwargs=None, req=None, of=None, cc=None, ln=None, uid=None, _=None, p=None,
                     p1=None, p2=None, p3=None, colls_to_display=None, f=None, rg=None, sf=None,
                     so=None, sp=None, rm=None, ot=None, aas=None, f1=None, m1=None, op1=None,
                     f2=None, m2=None, op2=None, f3=None, m3=None, sc=None, pl=None,
                     d1y=None, d1m=None, d1d=None, d2y=None, d2m=None, d2d=None,
                     dt=None, jrec=None, ec=None, action=None, colls_to_search=None, wash_colls_debug=None,
                     verbose=None, wl=None, em=None, **dummy):
 
     query_representation_in_cache = repr((p, f, colls_to_search, wl))
     page_start(req, of, cc, aas, ln, uid, p=create_page_title_search_pattern_info(p, p1, p2, p3), em=em)
 
     if of.startswith("h") and verbose and wash_colls_debug:
         write_warning("wash_colls debugging info : %s" % wash_colls_debug, req=req)
 
     prs_search_hosted_collections(kwargs=kwargs, **kwargs)
 
 
     if of.startswith("h"):
         req.write(create_search_box(cc, colls_to_display, p, f, rg, sf, so, sp, rm, of, ot, aas, ln, p1, f1, m1, op1,
                                     p2, f2, m2, op2, p3, f3, m3, sc, pl, d1y, d1m, d1d, d2y, d2m, d2d, dt, jrec, ec, action,
                                     em
                                     ))
 
         # WebSearch services
         if jrec <= 1 and \
                (em == "" and True or (EM_REPOSITORY["search_services"] in em)):
             user_info = collect_user_info(req)
             # display only on first search page, and only if wanted
             # when 'em' param set.
             if p:
                 search_units = create_basic_search_units(req, p, f)
             else:
                 search_units = []
             search_service_answers = [search_service.answer(req, user_info, of, cc, colls_to_search, p, f, search_units, ln) \
                                       for search_service in loaded_websearch_services]
             search_service_answers.sort(reverse=True)
             nb_answers = 0
             best_relevance = None
 
             for answer_relevance, answer_html in search_service_answers:
                 nb_answers += 1
                 if best_relevance is None:
                     best_relevance = answer_relevance
                 if best_relevance <= CFG_WEBSEARCH_SERVICE_MIN_RELEVANCE_TO_DISPLAY:
                     # The answer is not relevant enough
                     if verbose > 8:
                         write_warning("Service relevance too low (%i). Answer would be: %s" % (answer_relevance, answer_html), req=req)
                     break
                 if nb_answers > CFG_WEBSEARCH_SERVICE_MAX_NB_SERVICE_DISPLAY:
                     # We have reached the max number of service to display
                     if verbose > 8:
                         write_warning("Max number of services (%i) reached." % CFG_WEBSEARCH_SERVICE_MAX_NB_SERVICE_DISPLAY, req=req)
                     break
                 if best_relevance - answer_relevance > CFG_WEBSEARCH_SERVICE_MAX_RELEVANCE_DIFFERENCE:
                     # The service gave an answer that is way less good than previous ones.
                     if verbose > 8:
                         write_warning("Service relevance too low (%i) compared to best one (%i). Answer would be: %s" % (answer_relevance, best_relevance, answer_html), req=req)
                     break
                 req.write('<div class="searchservicebox">')
                 req.write(answer_html)
                 if verbose > 8:
                     write_warning("Service relevance: %i" % answer_relevance, req=req)
 
                 req.write('</div>')
                 if answer_relevance == CFG_WEBSEARCH_SERVICE_MAX_SERVICE_ANSWER_RELEVANCE:
                     # The service assumes it has given the definitive answer
                     if verbose > 8:
                         write_warning("There cannot be a better answer. Leaving", req=req)
                     break
 
     t1 = os.times()[4]
     results_in_any_collection = intbitset()
     if aas == 1 or (p1 or p2 or p3):
         ## 3A - advanced search
         output = prs_advanced_search(results_in_any_collection, kwargs=kwargs, **kwargs)
         if output is not None:
             return output
 
     else:
         ## 3B - simple search
         output = prs_simple_search(results_in_any_collection, kwargs=kwargs, **kwargs)
         if output is not None:
             return output
 
 
     if len(results_in_any_collection) == 0 and not kwargs['hosted_colls_actual_or_potential_results_p']:
         if of.startswith("x"):
             # Print empty, but valid XML
             print_records_prologue(req, of)
             print_records_epilogue(req, of)
         return None
 
     # store this search query results into search results cache if needed:
     prs_store_results_in_cache(query_representation_in_cache, results_in_any_collection, **kwargs)
 
     # search stage 4 and 5: intersection with collection universe and sorting/limiting
     try:
         output = prs_intersect_with_colls_and_apply_search_limits(results_in_any_collection, kwargs=kwargs, **kwargs)
         if output is not None:
             return output
     except Exception: # no results to display
         return None
 
     t2 = os.times()[4]
     cpu_time = t2 - t1
     kwargs['cpu_time'] = cpu_time
 
     ## search stage 6: display results:
     return prs_display_results(kwargs=kwargs, **kwargs)
 
 
 def prs_intersect_with_colls_and_apply_search_limits(results_in_any_collection,
                                                kwargs=None, req=None, of=None, ln=None, _=None,
                                                p=None, p1=None, p2=None, p3=None, f=None, cc=None, ec=None,
                                                verbose=None, em=None, **dummy):
     # search stage 4: intersection with collection universe:
     results_final = {}
     output = prs_intersect_results_with_collrecs(results_final, results_in_any_collection, kwargs, **kwargs)
     if output is not None:
         return output
 
     # another external search if we still don't have something
     if results_final == {} and not kwargs['hosted_colls_actual_or_potential_results_p']:
         if of.startswith("x"):
             # Print empty, but valid XML
             print_records_prologue(req, of)
             print_records_epilogue(req, of)
         kwargs['results_final'] = results_final
         raise Exception
 
     # search stage 5: apply search option limits and restrictions:
     output = prs_apply_search_limits(results_final, kwargs=kwargs, **kwargs)
     kwargs['results_final'] = results_final
     if output is not None:
         return output
 
 
 def prs_display_results(kwargs=None, results_final=None, req=None, of=None, sf=None,
                         so=None, sp=None, verbose=None, p=None, p1=None, p2=None, p3=None,
                         cc=None, ln=None, _=None, ec=None, colls_to_search=None, rm=None, cpu_time=None,
                         f=None, em=None, **dummy
                      ):
 
     ## search stage 6: display results:
 
     # split result set into collections
     (results_final_nb, results_final_nb_total, results_final_for_all_selected_colls) = prs_split_into_collections(kwargs=kwargs, **kwargs)
 
     # we continue past this point only if there is a hosted collection that has timed out and might offer potential results
     if results_final_nb_total == 0 and not kwargs['hosted_colls_potential_results_p']:
         if of.startswith("h"):
             write_warning("No match found, please enter different search terms.", req=req)
         elif of.startswith("x"):
             # Print empty, but valid XML
             print_records_prologue(req, of)
             print_records_epilogue(req, of)
     else:
         # yes, some hits found: good!
         # collection list may have changed due to not-exact-match-found policy so check it out:
         for coll in results_final.keys():
             if coll not in colls_to_search:
                 colls_to_search.append(coll)
         # print results overview:
         if of == "intbitset":
             #return the result as an intbitset
             return results_final_for_all_selected_colls
         elif of == "id":
             # we have been asked to return list of recIDs
             recIDs = list(results_final_for_all_selected_colls)
             if rm: # do we have to rank?
                 results_final_for_all_colls_rank_records_output = rank_records(req, rm, 0, results_final_for_all_selected_colls,
                                                                                string.split(p) + string.split(p1) +
                                                                                string.split(p2) + string.split(p3), verbose, so, of, ln, kwargs['rg'], kwargs['jrec'], kwargs['f'])
                 if results_final_for_all_colls_rank_records_output[0]:
                     recIDs = results_final_for_all_colls_rank_records_output[0]
             elif sf or (CFG_BIBSORT_BUCKETS and sorting_methods): # do we have to sort?
                 recIDs = sort_records(req, recIDs, sf, so, sp, verbose, of, ln)
             return recIDs
 
         elif of.startswith("h"):
             if of not in ['hcs', 'hcs2', 'hcv', 'htcv', 'tlcv']:
                 # added the hosted_colls_potential_results_p parameter to help print out the overview more accurately
                 req.write(print_results_overview(colls_to_search, results_final_nb_total, results_final_nb, cpu_time,
                             ln, ec, hosted_colls_potential_results_p=kwargs['hosted_colls_potential_results_p'], em=em))
                 kwargs['selected_external_collections_infos'] = print_external_results_overview(req, cc, [p, p1, p2, p3],
                                         f, ec, verbose, ln, print_overview=em == "" or EM_REPOSITORY["overview"] in em)
         # print number of hits found for XML outputs:
         if of.startswith("x") or of == 'mobb':
             req.write("<!-- Search-Engine-Total-Number-Of-Results: %s -->\n" % kwargs['results_final_nb_total'])
         # print records:
         if of in ['hcs', 'hcs2']:
             prs_summarize_records(kwargs=kwargs, **kwargs)
         elif of in ['hcv', 'htcv', 'tlcv'] and CFG_INSPIRE_SITE:
             from invenio.search_engine_cvifier import cvify_records
             cvify_records(results_final_for_all_selected_colls, of, req, so)
         else:
             prs_print_records(kwargs=kwargs, **kwargs)
 
 
         prs_log_query(kwargs=kwargs, **kwargs)
 
 
 # this is a copy of the prs_display_results with output parts removed, needed for external modules
 def prs_rank_results(kwargs=None, results_final=None, req=None, colls_to_search=None,
                      sf=None, so=None, sp=None, of=None, rm=None, p=None, p1=None, p2=None, p3=None,
                      verbose=None, **dummy
                      ):
 
     ## search stage 6: display results:
 
     # split result set into collections
     (results_final_nb, results_final_nb_total, results_final_for_all_selected_colls) = prs_split_into_collections(kwargs=kwargs, **kwargs)
 
 
     # yes, some hits found: good!
     # collection list may have changed due to not-exact-match-found policy so check it out:
     for coll in results_final.keys():
         if coll not in colls_to_search:
             colls_to_search.append(coll)
 
     # we have been asked to return list of recIDs
     recIDs = list(results_final_for_all_selected_colls)
     if rm: # do we have to rank?
         results_final_for_all_colls_rank_records_output = rank_records(req, rm, 0, results_final_for_all_selected_colls,
                                                                        string.split(p) + string.split(p1) +
                                                                        string.split(p2) + string.split(p3), verbose, so, of, field=kwargs['f'])
         if results_final_for_all_colls_rank_records_output[0]:
             recIDs = results_final_for_all_colls_rank_records_output[0]
     elif sf or (CFG_BIBSORT_BUCKETS and sorting_methods): # do we have to sort?
         recIDs = sort_records(req, recIDs, sf, so, sp, verbose, of)
     return recIDs
 
 
 def perform_request_cache(req, action="show"):
     """Manipulates the search engine cache."""
     req.content_type = "text/html"
     req.send_http_header()
     req.write("<html>")
     out = ""
     out += "<h1>Search Cache</h1>"
     # clear cache if requested:
     if action == "clear":
         search_results_cache.clear()
     req.write(out)
     # show collection reclist cache:
     out = "<h3>Collection reclist cache</h3>"
     out += "- collection table last updated: %s" % get_table_update_time('collection')
     out += "<br />- reclist cache timestamp: %s" % collection_reclist_cache.timestamp
     out += "<br />- reclist cache contents:"
     out += "<blockquote>"
     for coll in collection_reclist_cache.cache.keys():
         if collection_reclist_cache.cache[coll]:
             out += "%s (%d)<br />" % (coll, len(collection_reclist_cache.cache[coll]))
     out += "</blockquote>"
     req.write(out)
     # show search results cache:
     out = "<h3>Search Cache</h3>"
     out += "- search cache usage: %d queries cached (max. ~%d)" % \
            (len(search_results_cache.cache), CFG_WEBSEARCH_SEARCH_CACHE_SIZE)
     if len(search_results_cache.cache):
         out += "<br />- search cache contents:"
         out += "<blockquote>"
         for query, hitset in search_results_cache.cache.items():
             out += "<br />%s ... %s" % (query, hitset)
         out += """<p><a href="%s/search/cache?action=clear">clear search results cache</a>""" % CFG_SITE_URL
         out += "</blockquote>"
     req.write(out)
     # show field i18nname cache:
     out = "<h3>Field I18N names cache</h3>"
     out += "- fieldname table last updated: %s" % get_table_update_time('fieldname')
     out += "<br />- i18nname cache timestamp: %s" % field_i18nname_cache.timestamp
     out += "<br />- i18nname cache contents:"
     out += "<blockquote>"
     for field in field_i18nname_cache.cache.keys():
         for ln in field_i18nname_cache.cache[field].keys():
             out += "%s, %s = %s<br />" % (field, ln, field_i18nname_cache.cache[field][ln])
     out += "</blockquote>"
     req.write(out)
     # show collection i18nname cache:
     out = "<h3>Collection I18N names cache</h3>"
     out += "- collectionname table last updated: %s" % get_table_update_time('collectionname')
     out += "<br />- i18nname cache timestamp: %s" % collection_i18nname_cache.timestamp
     out += "<br />- i18nname cache contents:"
     out += "<blockquote>"
     for coll in collection_i18nname_cache.cache.keys():
         for ln in collection_i18nname_cache.cache[coll].keys():
             out += "%s, %s = %s<br />" % (coll, ln, collection_i18nname_cache.cache[coll][ln])
     out += "</blockquote>"
     req.write(out)
     req.write("</html>")
     return "\n"
 
 def perform_request_log(req, date=""):
     """Display search log information for given date."""
     req.content_type = "text/html"
     req.send_http_header()
     req.write("<html>")
     req.write("<h1>Search Log</h1>")
     if date: # case A: display stats for a day
         yyyymmdd = string.atoi(date)
         req.write("<p><big><strong>Date: %d</strong></big><p>" % yyyymmdd)
         req.write("""<table border="1">""")
         req.write("<tr><td><strong>%s</strong></td><td><strong>%s</strong></td><td><strong>%s</strong></td><td><strong>%s</strong></td><td><strong>%s</strong></td><td><strong>%s</strong></td></tr>" % ("No.", "Time", "Pattern", "Field", "Collection", "Number of Hits"))
         # read file:
         p = os.popen("grep ^%d %s/search.log" % (yyyymmdd, CFG_LOGDIR), 'r')
         lines = p.readlines()
         p.close()
         # process lines:
         i = 0
         for line in lines:
             try:
                 datetime, dummy_aas, p, f, c, nbhits = string.split(line,"#")
                 i += 1
                 req.write("<tr><td align=\"right\">#%d</td><td>%s:%s:%s</td><td>%s</td><td>%s</td><td>%s</td><td>%s</td></tr>" \
                           % (i, datetime[8:10], datetime[10:12], datetime[12:], p, f, c, nbhits))
             except:
                 pass # ignore eventual wrong log lines
         req.write("</table>")
     else: # case B: display summary stats per day
         yyyymm01 = int(time.strftime("%Y%m01", time.localtime()))
         yyyymmdd = int(time.strftime("%Y%m%d", time.localtime()))
         req.write("""<table border="1">""")
         req.write("<tr><td><strong>%s</strong></td><td><strong>%s</strong></tr>" % ("Day", "Number of Queries"))
         for day in range(yyyymm01, yyyymmdd + 1):
             p = os.popen("grep -c ^%d %s/search.log" % (day, CFG_LOGDIR), 'r')
             for line in p.readlines():
                 req.write("""<tr><td>%s</td><td align="right"><a href="%s/search/log?date=%d">%s</a></td></tr>""" % \
                           (day, CFG_SITE_URL, day, line))
             p.close()
         req.write("</table>")
     req.write("</html>")
     return "\n"
 
 def get_all_field_values(tag):
     """
     Return all existing values stored for a given tag.
     @param tag: the full tag, e.g. 909C0b
     @type tag: string
     @return: the list of values
     @rtype: list of strings
     """
     table = 'bib%02dx' % int(tag[:2])
     return [row[0] for row in run_sql("SELECT DISTINCT(value) FROM %s WHERE tag=%%s" % table, (tag, ))]
 
 
 def get_most_popular_field_values(recids, tags, exclude_values=None, count_repetitive_values=True, split_by=0):
     """
     Analyze RECIDS and look for TAGS and return most popular values
     and the frequency with which they occur sorted according to
     descending frequency.
 
     If a value is found in EXCLUDE_VALUES, then do not count it.
 
     If COUNT_REPETITIVE_VALUES is True, then we count every occurrence
     of value in the tags.  If False, then we count the value only once
     regardless of the number of times it may appear in a record.
     (But, if the same value occurs in another record, we count it, of
     course.)
 
     @return: list of tuples containing tag and its frequency
 
     Example:
      >>> get_most_popular_field_values(range(11,20), '980__a')
      [('PREPRINT', 10), ('THESIS', 7), ...]
      >>> get_most_popular_field_values(range(11,20), ('100__a', '700__a'))
      [('Ellis, J', 10), ('Ellis, N', 7), ...]
      >>> get_most_popular_field_values(range(11,20), ('100__a', '700__a'), ('Ellis, J'))
      [('Ellis, N', 7), ...]
     """
 
     def _get_most_popular_field_values_helper_sorter(val1, val2):
         """Compare VAL1 and VAL2 according to, firstly, frequency, then
         secondly, alphabetically."""
         compared_via_frequencies = cmp(valuefreqdict[val2],
                                        valuefreqdict[val1])
         if compared_via_frequencies == 0:
             return cmp(val1.lower(), val2.lower())
         else:
             return compared_via_frequencies
 
     valuefreqdict = {}
     ## sanity check:
     if not exclude_values:
         exclude_values = []
     if isinstance(tags, str):
         tags = (tags,)
     ## find values to count:
     vals_to_count = []
     displaytmp = {}
     if count_repetitive_values:
         # counting technique A: can look up many records at once: (very fast)
         for tag in tags:
             vals_to_count.extend(get_fieldvalues(recids, tag, sort=False,
                                                  split_by=split_by))
     else:
         # counting technique B: must count record-by-record: (slow)
         for recid in recids:
             vals_in_rec = []
             for tag in tags:
                 for val in get_fieldvalues(recid, tag, False):
                     vals_in_rec.append(val)
             # do not count repetitive values within this record
             # (even across various tags, so need to unify again):
             dtmp = {}
             for val in vals_in_rec:
                 dtmp[val.lower()] = 1
                 displaytmp[val.lower()] = val
             vals_in_rec = dtmp.keys()
             vals_to_count.extend(vals_in_rec)
     ## are we to exclude some of found values?
     for val in vals_to_count:
         if val not in exclude_values:
             if val in valuefreqdict:
                 valuefreqdict[val] += 1
             else:
                 valuefreqdict[val] = 1
     ## sort by descending frequency of values:
     if not CFG_NUMPY_IMPORTABLE:
         ## original version
         out = []
         vals = valuefreqdict.keys()
         vals.sort(_get_most_popular_field_values_helper_sorter)
         for val in vals:
             tmpdisplv = ''
             if val in displaytmp:
                 tmpdisplv = displaytmp[val]
             else:
                 tmpdisplv = val
             out.append((tmpdisplv, valuefreqdict[val]))
         return out
     else:
         f = []   # frequencies
         n = []   # original names
         ln = []  # lowercased names
         ## build lists within one iteration
         for (val, freq) in valuefreqdict.iteritems():
             f.append(-1 * freq)
             if val in displaytmp:
                 n.append(displaytmp[val])
             else:
                 n.append(val)
             ln.append(val.lower())
         ## sort by frequency (desc) and then by lowercased name.
         return [(n[i], -1 * f[i]) for i in numpy.lexsort([ln, f])]
 
 
 def profile(p="", f="", c=CFG_SITE_NAME):
     """Profile search time."""
     import profile
     import pstats
     profile.run("perform_request_search(p='%s',f='%s', c='%s')" % (p, f, c), "perform_request_search_profile")
     p = pstats.Stats("perform_request_search_profile")
     p.strip_dirs().sort_stats("cumulative").print_stats()
     return 0
 
 
 def perform_external_collection_search_with_em(req, current_collection, pattern_list, field,
         external_collection, verbosity_level=0, lang=CFG_SITE_LANG,
         selected_external_collections_infos=None, em=""):
     perform_external_collection_search(req, current_collection, pattern_list, field, external_collection,
                             verbosity_level, lang, selected_external_collections_infos,
                             print_overview=em == "" or EM_REPOSITORY["overview"] in em,
                             print_search_info=em == "" or EM_REPOSITORY["search_info"] in em,
                             print_see_also_box=em == "" or EM_REPOSITORY["see_also_box"] in em,
                             print_body=em == "" or EM_REPOSITORY["body"] in em)
diff --git a/modules/websearch/lib/websearch_templates.py b/modules/websearch/lib/websearch_templates.py
index 67348c758..9e6473b63 100644
--- a/modules/websearch/lib/websearch_templates.py
+++ b/modules/websearch/lib/websearch_templates.py
@@ -1,4802 +1,4832 @@
 # -*- coding: utf-8 -*-
 
 ## This file is part of Invenio.
 ## Copyright (C) 2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012, 2013 CERN.
 ##
 ## Invenio is free software; you can redistribute it and/or
 ## modify it under the terms of the GNU General Public License as
 ## published by the Free Software Foundation; either version 2 of the
 ## License, or (at your option) any later version.
 ##
 ## Invenio is distributed in the hope that it will be useful, but
 ## WITHOUT ANY WARRANTY; without even the implied warranty of
 ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 ## General Public License for more details.
 ##
 ## You should have received a copy of the GNU General Public License
 ## along with Invenio; if not, write to the Free Software Foundation, Inc.,
 ## 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA.
 
 # pylint: disable=C0301
 
 __revision__ = "$Id$"
 
 import time
 import cgi
 import string
 import re
 import locale
 from urllib import quote, urlencode
 from xml.sax.saxutils import escape as xml_escape
 
 from invenio.config import \
      CFG_WEBSEARCH_LIGHTSEARCH_PATTERN_BOX_WIDTH, \
      CFG_WEBSEARCH_SIMPLESEARCH_PATTERN_BOX_WIDTH, \
      CFG_WEBSEARCH_ADVANCEDSEARCH_PATTERN_BOX_WIDTH, \
      CFG_WEBSEARCH_AUTHOR_ET_AL_THRESHOLD, \
      CFG_WEBSEARCH_USE_ALEPH_SYSNOS, \
      CFG_WEBSEARCH_SPLIT_BY_COLLECTION, \
      CFG_WEBSEARCH_DEF_RECORDS_IN_GROUPS, \
      CFG_BIBRANK_SHOW_READING_STATS, \
      CFG_BIBRANK_SHOW_DOWNLOAD_STATS, \
      CFG_BIBRANK_SHOW_DOWNLOAD_GRAPHS, \
      CFG_BIBRANK_SHOW_CITATION_LINKS, \
      CFG_BIBRANK_SHOW_CITATION_STATS, \
      CFG_BIBRANK_SHOW_CITATION_GRAPHS, \
      CFG_WEBSEARCH_RSS_TTL, \
      CFG_SITE_LANG, \
      CFG_SITE_NAME, \
      CFG_SITE_NAME_INTL, \
      CFG_VERSION, \
      CFG_SITE_URL, \
      CFG_SITE_SUPPORT_EMAIL, \
      CFG_SITE_ADMIN_EMAIL, \
      CFG_CERN_SITE, \
      CFG_INSPIRE_SITE, \
      CFG_WEBSEARCH_DEFAULT_SEARCH_INTERFACE, \
      CFG_WEBSEARCH_ENABLED_SEARCH_INTERFACES, \
      CFG_WEBSEARCH_MAX_RECORDS_IN_GROUPS, \
      CFG_BIBINDEX_CHARS_PUNCTUATION, \
      CFG_WEBCOMMENT_ALLOW_COMMENTS, \
      CFG_WEBCOMMENT_ALLOW_REVIEWS, \
      CFG_WEBSEARCH_WILDCARD_LIMIT, \
      CFG_WEBSEARCH_SHOW_COMMENT_COUNT, \
      CFG_WEBSEARCH_SHOW_REVIEW_COUNT, \
      CFG_SITE_RECORD, \
      CFG_WEBSEARCH_PREV_NEXT_HIT_LIMIT, \
      CFG_HEPDATA_URL, \
      CFG_HEPDATA_PLOTSIZE
 
 from invenio.search_engine_config import CFG_WEBSEARCH_RESULTS_OVERVIEW_MAX_COLLS_TO_PRINT
 from invenio.websearch_services import \
      CFG_WEBSEARCH_MAX_SEARCH_COLL_RESULTS_TO_PRINT
 
 from invenio.dbquery import run_sql
 from invenio.messages import gettext_set_language
 from invenio.urlutils import make_canonical_urlargd, drop_default_urlargd, create_html_link, create_url
 from invenio.htmlutils import nmtoken_from_string
 from invenio.webinterface_handler import wash_urlargd
 from invenio.bibrank_citation_searcher import get_cited_by_count
 from invenio.webuser import session_param_get
 
 from invenio.intbitset import intbitset
 
 from invenio.websearch_external_collections import external_collection_get_state, get_external_collection_engine
 from invenio.websearch_external_collections_utils import get_collection_id
 from invenio.websearch_external_collections_config import CFG_EXTERNAL_COLLECTION_MAXRESULTS
 from invenio.search_engine_utils import get_fieldvalues
-
-import sys
+from invenio.bibformat import format_record
 
 from invenio import hepdatadisplayutils
 _RE_PUNCTUATION = re.compile(CFG_BIBINDEX_CHARS_PUNCTUATION)
 _RE_SPACES = re.compile(r"\s+")
 
 class Template:
 
     # This dictionary maps Invenio language code to locale codes (ISO 639)
     tmpl_localemap = {
         'bg': 'bg_BG',
         'ar': 'ar_AR',
         'ca': 'ca_ES',
         'de': 'de_DE',
         'el': 'el_GR',
         'en': 'en_US',
         'es': 'es_ES',
         'pt': 'pt_BR',
         'fa': 'fa_IR',
         'fr': 'fr_FR',
         'it': 'it_IT',
         'ka': 'ka_GE',
         'lt': 'lt_LT',
         'ro': 'ro_RO',
         'ru': 'ru_RU',
         'rw': 'rw_RW',
         'sk': 'sk_SK',
         'cs': 'cs_CZ',
         'no': 'no_NO',
         'sv': 'sv_SE',
         'uk': 'uk_UA',
         'ja': 'ja_JA',
         'pl': 'pl_PL',
         'hr': 'hr_HR',
         'zh_CN': 'zh_CN',
         'zh_TW': 'zh_TW',
         'hu': 'hu_HU',
         'af': 'af_ZA',
         'gl': 'gl_ES'
         }
     tmpl_default_locale = "en_US" # which locale to use by default, useful in case of failure
 
     # Type of the allowed parameters for the web interface for search results
     search_results_default_urlargd = {
         'cc': (str, CFG_SITE_NAME),
         'c': (list, []),
         'p': (str, ""), 'f': (str, ""),
         'rg': (int, CFG_WEBSEARCH_DEF_RECORDS_IN_GROUPS),
         'sf': (str, ""),
         'so': (str, "d"),
         'sp': (str, ""),
         'rm': (str, ""),
         'of': (str, "hb"),
         'ot': (list, []),
         'em': (str,""),
         'aas': (int, CFG_WEBSEARCH_DEFAULT_SEARCH_INTERFACE),
         'as': (int, CFG_WEBSEARCH_DEFAULT_SEARCH_INTERFACE),
         'p1': (str, ""), 'f1': (str, ""), 'm1': (str, ""), 'op1':(str, ""),
         'p2': (str, ""), 'f2': (str, ""), 'm2': (str, ""), 'op2':(str, ""),
         'p3': (str, ""), 'f3': (str, ""), 'm3': (str, ""),
         'sc': (int, 0),
         'jrec': (int, 0),
         'recid': (int, -1), 'recidb': (int, -1), 'sysno': (str, ""),
         'id': (int, -1), 'idb': (int, -1), 'sysnb': (str, ""),
         'action': (str, "search"),
         'action_search': (str, ""),
         'action_browse': (str, ""),
         'd1': (str, ""),
         'd1y': (int, 0), 'd1m': (int, 0), 'd1d': (int, 0),
         'd2': (str, ""),
         'd2y': (int, 0), 'd2m': (int, 0), 'd2d': (int, 0),
         'dt': (str, ""),
         'ap': (int, 1),
         'verbose': (int, 0),
         'ec': (list, []),
         'wl': (int, CFG_WEBSEARCH_WILDCARD_LIMIT),
         }
 
     # ...and for search interfaces
     search_interface_default_urlargd = {
         'aas': (int, CFG_WEBSEARCH_DEFAULT_SEARCH_INTERFACE),
         'as': (int, CFG_WEBSEARCH_DEFAULT_SEARCH_INTERFACE),
         'verbose': (int, 0),
         'em' : (str, "")}
 
     # ...and for RSS feeds
     rss_default_urlargd = {'c'  : (list, []),
                            'cc' : (str, ""),
                            'p'  : (str, ""),
                            'f'  : (str, ""),
                            'p1' : (str, ""),
                            'f1' : (str, ""),
                            'm1' : (str, ""),
                            'op1': (str, ""),
                            'p2' : (str, ""),
                            'f2' : (str, ""),
                            'm2' : (str, ""),
                            'op2': (str, ""),
                            'p3' : (str, ""),
                            'f3' : (str, ""),
                            'm3' : (str, ""),
                            'wl' : (int, CFG_WEBSEARCH_WILDCARD_LIMIT)}
 
     tmpl_openurl_accepted_args = {
             'id' : (list, []),
             'genre' : (str, ''),
             'aulast' : (str, ''),
             'aufirst' : (str, ''),
             'auinit' : (str, ''),
             'auinit1' : (str, ''),
             'auinitm' : (str, ''),
             'issn' : (str, ''),
             'eissn' : (str, ''),
             'coden' : (str, ''),
             'isbn' : (str, ''),
             'sici' : (str, ''),
             'bici' : (str, ''),
             'title' : (str, ''),
             'stitle' : (str, ''),
             'atitle' : (str, ''),
             'volume' : (str, ''),
             'part' : (str, ''),
             'issue' : (str, ''),
             'spage' : (str, ''),
             'epage' : (str, ''),
             'pages' : (str, ''),
             'artnum' : (str, ''),
             'date' : (str, ''),
             'ssn' : (str, ''),
             'quarter' : (str, ''),
             'url_ver' : (str, ''),
             'ctx_ver' : (str, ''),
             'rft_val_fmt' : (str, ''),
             'rft_id' : (list, []),
             'rft.atitle' : (str, ''),
             'rft.title' : (str, ''),
             'rft.jtitle' : (str, ''),
             'rft.stitle' : (str, ''),
             'rft.date' : (str, ''),
             'rft.volume' : (str, ''),
             'rft.issue' : (str, ''),
             'rft.spage' : (str, ''),
             'rft.epage' : (str, ''),
             'rft.pages' : (str, ''),
             'rft.artnumber' : (str, ''),
             'rft.issn' : (str, ''),
             'rft.eissn' : (str, ''),
             'rft.aulast' : (str, ''),
             'rft.aufirst' : (str, ''),
             'rft.auinit' : (str, ''),
             'rft.auinit1' : (str, ''),
             'rft.auinitm' : (str, ''),
             'rft.ausuffix' : (str, ''),
             'rft.au' : (list, []),
             'rft.aucorp' : (str, ''),
             'rft.isbn' : (str, ''),
             'rft.coden' : (str, ''),
             'rft.sici' : (str, ''),
             'rft.genre' : (str, 'unknown'),
             'rft.chron' : (str, ''),
             'rft.ssn' : (str, ''),
             'rft.quarter' : (int, ''),
             'rft.part' : (str, ''),
             'rft.btitle' : (str, ''),
             'rft.isbn' : (str, ''),
             'rft.atitle' : (str, ''),
             'rft.place' : (str, ''),
             'rft.pub' : (str, ''),
             'rft.edition' : (str, ''),
             'rft.tpages' : (str, ''),
             'rft.series' : (str, ''),
     }
 
     tmpl_opensearch_rss_url_syntax = "%(CFG_SITE_URL)s/rss?p={searchTerms}&amp;jrec={startIndex}&amp;rg={count}&amp;ln={language}" % {'CFG_SITE_URL': CFG_SITE_URL}
     tmpl_opensearch_html_url_syntax = "%(CFG_SITE_URL)s/search?p={searchTerms}&amp;jrec={startIndex}&amp;rg={count}&amp;ln={language}" % {'CFG_SITE_URL': CFG_SITE_URL}
 
     def tmpl_openurl2invenio(self, openurl_data):
         """ Return an Invenio url corresponding to a search with the data
         included in the openurl form map.
         """
         def isbn_to_isbn13_isbn10(isbn):
             isbn = isbn.replace(' ', '').replace('-', '')
             if len(isbn) == 10 and isbn.isdigit():
                 ## We already have isbn10
                 return ('', isbn)
             if len(isbn) != 13 and isbn.isdigit():
                 return ('', '')
             isbn13, isbn10 = isbn, isbn[3:-1]
             checksum = 0
             weight = 10
             for char in isbn10:
                 checksum += int(char) * weight
                 weight -= 1
             checksum = 11 - (checksum % 11)
             if checksum == 10:
                 isbn10 += 'X'
             if checksum == 11:
                 isbn10 += '0'
             else:
                 isbn10 += str(checksum)
             return (isbn13, isbn10)
 
         from invenio.search_engine import perform_request_search
         doi = ''
         pmid = ''
         bibcode = ''
         oai = ''
         issn = ''
         isbn = ''
         for elem in openurl_data['id']:
             if elem.startswith('doi:'):
                 doi = elem[len('doi:'):]
             elif elem.startswith('pmid:'):
                 pmid = elem[len('pmid:'):]
             elif elem.startswith('bibcode:'):
                 bibcode = elem[len('bibcode:'):]
             elif elem.startswith('oai:'):
                 oai = elem[len('oai:'):]
         for elem in openurl_data['rft_id']:
             if elem.startswith('info:doi/'):
                 doi = elem[len('info:doi/'):]
             elif elem.startswith('info:pmid/'):
                 pmid = elem[len('info:pmid/'):]
             elif elem.startswith('info:bibcode/'):
                 bibcode = elem[len('info:bibcode/'):]
             elif elem.startswith('info:oai/'):
                 oai = elem[len('info:oai/')]
             elif elem.startswith('urn:ISBN:'):
                 isbn = elem[len('urn:ISBN:'):]
             elif elem.startswith('urn:ISSN:'):
                 issn = elem[len('urn:ISSN:'):]
 
         ## Building author query
         aulast = openurl_data['rft.aulast'] or openurl_data['aulast']
         aufirst = openurl_data['rft.aufirst'] or openurl_data['aufirst']
         auinit = openurl_data['rft.auinit'] or \
                  openurl_data['auinit'] or \
                  openurl_data['rft.auinit1'] + ' ' + openurl_data['rft.auinitm'] or \
                  openurl_data['auinit1'] + ' ' + openurl_data['auinitm'] or  aufirst[:1]
         auinit = auinit.upper()
         if aulast and aufirst:
             author_query = 'author:"%s, %s" or author:"%s, %s"' % (aulast, aufirst, aulast, auinit)
         elif aulast and auinit:
             author_query = 'author:"%s, %s"' % (aulast, auinit)
         else:
             author_query = ''
 
         ## Building title query
         title = openurl_data['rft.atitle'] or \
                 openurl_data['atitle'] or \
                 openurl_data['rft.btitle'] or \
                 openurl_data['rft.title'] or \
                 openurl_data['title']
         if title:
             title_query = 'title:"%s"' % title
             title_query_cleaned = 'title:"%s"' % _RE_SPACES.sub(' ', _RE_PUNCTUATION.sub(' ', title))
         else:
             title_query = ''
 
         ## Building journal query
         jtitle = openurl_data['rft.stitle'] or \
                  openurl_data['stitle'] or \
                  openurl_data['rft.jtitle'] or \
                  openurl_data['title']
         if jtitle:
             journal_query = 'journal:"%s"' % jtitle
         else:
             journal_query = ''
 
         ## Building isbn query
         isbn = isbn or openurl_data['rft.isbn'] or \
                openurl_data['isbn']
         isbn13, isbn10 = isbn_to_isbn13_isbn10(isbn)
         if isbn13:
             isbn_query = 'isbn:"%s" or isbn:"%s"' % (isbn13, isbn10)
         elif isbn10:
             isbn_query = 'isbn:"%s"' % isbn10
         else:
             isbn_query = ''
 
         ## Building issn query
         issn = issn or openurl_data['rft.eissn'] or \
                openurl_data['eissn'] or \
                openurl_data['rft.issn'] or \
                openurl_data['issn']
         if issn:
             issn_query = 'issn:"%s"' % issn
         else:
             issn_query = ''
 
         ## Building coden query
         coden = openurl_data['rft.coden'] or openurl_data['coden']
         if coden:
             coden_query = 'coden:"%s"' % coden
         else:
             coden_query = ''
 
         ## Building doi query
         if False: #doi: #FIXME Temporaly disabled until doi field is properly setup
             doi_query = 'doi:"%s"' % doi
         else:
             doi_query = ''
 
         ## Trying possible searches
         if doi_query:
             if perform_request_search(p=doi_query):
                 return '%s/search?%s' % (CFG_SITE_URL, urlencode({
                     'p' : doi_query,
                     'sc' : CFG_WEBSEARCH_SPLIT_BY_COLLECTION,
                     'of' : 'hd'}))
         if isbn_query:
             if perform_request_search(p=isbn_query):
                 return '%s/search?%s' % (CFG_SITE_URL, urlencode({
                     'p' : isbn_query,
                     'sc' : CFG_WEBSEARCH_SPLIT_BY_COLLECTION,
                     'of' : 'hd'}))
         if coden_query:
             if perform_request_search(p=coden_query):
                 return '%s/search?%s' % (CFG_SITE_URL, urlencode({
                     'p' : coden_query,
                     'sc' : CFG_WEBSEARCH_SPLIT_BY_COLLECTION,
                     'of' : 'hd'}))
         if author_query and title_query:
             if perform_request_search(p='%s and %s' % (title_query, author_query)):
                 return '%s/search?%s' % (CFG_SITE_URL, urlencode({
                     'p' : '%s and %s' % (title_query, author_query),
                     'sc' : CFG_WEBSEARCH_SPLIT_BY_COLLECTION,
                     'of' : 'hd'}))
         if title_query:
             result = len(perform_request_search(p=title_query))
             if result == 1:
                 return '%s/search?%s' % (CFG_SITE_URL, urlencode({
                     'p' : title_query,
                     'sc' : CFG_WEBSEARCH_SPLIT_BY_COLLECTION,
                     'of' : 'hd'}))
             elif result > 1:
                 return '%s/search?%s' % (CFG_SITE_URL, urlencode({
                     'p' : title_query,
                     'sc' : CFG_WEBSEARCH_SPLIT_BY_COLLECTION,
                     'of' : 'hb'}))
 
         ## Nothing worked, let's return a search that the user can improve
         if author_query and title_query:
             return '%s/search%s' % (CFG_SITE_URL, make_canonical_urlargd({
                 'p' : '%s and %s' % (title_query_cleaned, author_query),
                 'sc' : CFG_WEBSEARCH_SPLIT_BY_COLLECTION,
                 'of' : 'hb'}, {}))
         elif title_query:
             return '%s/search%s' % (CFG_SITE_URL, make_canonical_urlargd({
                 'p' : title_query_cleaned,
                 'sc' : CFG_WEBSEARCH_SPLIT_BY_COLLECTION,
                 'of' : 'hb'}, {}))
         else:
             ## Mmh. Too few information provided.
             return '%s/search%s' % (CFG_SITE_URL, make_canonical_urlargd({
                         'p' : 'recid:-1',
                         'sc' : CFG_WEBSEARCH_SPLIT_BY_COLLECTION,
                         'of' : 'hb'}, {}))
 
     def tmpl_opensearch_description(self, ln):
         """ Returns the OpenSearch description file of this site.
         """
         _ = gettext_set_language(ln)
         return """<OpenSearchDescription xmlns="http://a9.com/-/spec/opensearch/1.1/"
                        xmlns:moz="http://www.mozilla.org/2006/browser/search/">
 <ShortName>%(short_name)s</ShortName>
 <LongName>%(long_name)s</LongName>
 <Description>%(description)s</Description>
 <InputEncoding>UTF-8</InputEncoding>
 <OutputEncoding>UTF-8</OutputEncoding>
 <Language>*</Language>
 <Contact>%(CFG_SITE_ADMIN_EMAIL)s</Contact>
 <Query role="example" searchTerms="a" />
 <Developer>Powered by Invenio</Developer>
 <Url type="text/html" indexOffset="1" rel="results" template="%(html_search_syntax)s" />
 <Url type="application/rss+xml" indexOffset="1" rel="results" template="%(rss_search_syntax)s" />
 <Url type="application/opensearchdescription+xml" rel="self" template="%(CFG_SITE_URL)s/opensearchdescription" />
 <moz:SearchForm>%(CFG_SITE_URL)s</moz:SearchForm>
 </OpenSearchDescription>""" % \
   {'CFG_SITE_URL': CFG_SITE_URL,
    'short_name': CFG_SITE_NAME_INTL.get(ln, CFG_SITE_NAME)[:16],
    'long_name': CFG_SITE_NAME_INTL.get(ln, CFG_SITE_NAME),
    'description': (_("Search on %(x_CFG_SITE_NAME_INTL)s") % \
    {'x_CFG_SITE_NAME_INTL': CFG_SITE_NAME_INTL.get(ln, CFG_SITE_NAME)})[:1024],
    'CFG_SITE_ADMIN_EMAIL': CFG_SITE_ADMIN_EMAIL,
    'rss_search_syntax': self.tmpl_opensearch_rss_url_syntax,
    'html_search_syntax': self.tmpl_opensearch_html_url_syntax
    }
 
     def build_search_url(self, known_parameters={}, **kargs):
         """ Helper for generating a canonical search
         url. 'known_parameters' is the list of query parameters you
         inherit from your current query. You can then pass keyword
         arguments to modify this query.
 
            build_search_url(known_parameters, of="xm")
 
         The generated URL is absolute.
         """
 
         parameters = {}
         parameters.update(known_parameters)
         parameters.update(kargs)
 
         # Now, we only have the arguments which have _not_ their default value
         parameters = drop_default_urlargd(parameters, self.search_results_default_urlargd)
 
         # Treat `as' argument specially:
         if parameters.has_key('aas'):
             parameters['as'] = parameters['aas']
             del parameters['aas']
 
         # Asking for a recid? Return a /CFG_SITE_RECORD/<recid> URL
         if 'recid' in parameters:
             target = "%s/%s/%s" % (CFG_SITE_URL, CFG_SITE_RECORD, parameters['recid'])
             del parameters['recid']
             target += make_canonical_urlargd(parameters, self.search_results_default_urlargd)
             return target
 
         return "%s/search%s" % (CFG_SITE_URL, make_canonical_urlargd(parameters, self.search_results_default_urlargd))
 
     def build_search_interface_url(self, known_parameters={}, **kargs):
         """ Helper for generating a canonical search interface URL."""
 
         parameters = {}
         parameters.update(known_parameters)
         parameters.update(kargs)
 
         c = parameters['c']
         del parameters['c']
 
         # Now, we only have the arguments which have _not_ their default value
         parameters = drop_default_urlargd(parameters, self.search_results_default_urlargd)
 
         # Treat `as' argument specially:
         if parameters.has_key('aas'):
             parameters['as'] = parameters['aas']
             del parameters['aas']
 
         if c and c != CFG_SITE_NAME:
             base = CFG_SITE_URL + '/collection/' + quote(c)
         else:
             base = CFG_SITE_URL
         return create_url(base, parameters)
 
     def build_rss_url(self, known_parameters, **kargs):
         """Helper for generating a canonical RSS URL"""
 
         parameters = {}
         parameters.update(known_parameters)
         parameters.update(kargs)
 
         # Keep only interesting parameters
         argd = wash_urlargd(parameters, self.rss_default_urlargd)
 
         if argd:
             # Handle 'c' differently since it is a list
             c = argd.get('c', [])
             del argd['c']
             # Create query, and drop empty params
             args = make_canonical_urlargd(argd, self.rss_default_urlargd)
             if c != []:
                 # Add collections
                 c = [quote(coll) for coll in c]
                 if args == '':
                     args += '?'
                 else:
                     args += '&amp;'
                 args += 'c=' + '&amp;c='.join(c)
 
         return CFG_SITE_URL + '/rss' + args
 
     def tmpl_record_page_header_content(self, req, recid, ln):
         """
         Provide extra information in the header of /CFG_SITE_RECORD pages
 
         Return (title, description, keywords), not escaped for HTML
         """
 
         _ = gettext_set_language(ln)
 
         title = get_fieldvalues(recid, "245__a") or \
                 get_fieldvalues(recid, "111__a")
 
         if title:
             title = title[0]
         else:
             title = _("Record") + ' #%d' % recid
 
         keywords = ', '.join(get_fieldvalues(recid, "6531_a"))
         description = ' '.join(get_fieldvalues(recid, "520__a"))
         description += "\n"
         description += '; '.join(get_fieldvalues(recid, "100__a") + get_fieldvalues(recid, "700__a"))
 
         return (title, description, keywords)
 
 
     def tmpl_exact_author_browse_help_link(self, p, p1, p2, p3, f, f1, f2, f3, rm, cc, ln, jrec, rg, aas, action, link_name):
         """
         Creates the 'exact author' help link for browsing.
 
         """
         _ = gettext_set_language(ln)
         url = create_html_link(self.build_search_url(p=p,
                                                      p1=p1,
                                                      p2=p2,
                                                      p3=p3,
                                                      f=f,
                                                      f1=f1,
                                                      f2=f2,
                                                      f3=f3,
                                                      rm=rm,
                                                      cc=cc,
                                                      ln=ln,
                                                      jrec=jrec,
                                                      rg=rg,
                                                      aas=aas,
                                                      action=action),
                                {}, _(link_name), {'class': 'nearestterms'})
         return "Did you mean to browse in %s index?" % url
 
 
     def tmpl_navtrail_links(self, aas, ln, dads):
         """
         Creates the navigation bar at top of each search page (*Home > Root collection > subcollection > ...*)
 
         Parameters:
 
           - 'aas' *int* - Should we display an advanced search box?
 
           - 'ln' *string* - The language to display
 
           - 'separator' *string* - The separator between two consecutive collections
 
           - 'dads' *list* - A list of parent links, eachone being a dictionary of ('name', 'longname')
         """
         out = []
         for url, name in dads:
             args = {'c': url, 'as': aas, 'ln': ln}
             out.append(create_html_link(self.build_search_interface_url(**args), {}, cgi.escape(name), {'class': 'navtrail'}))
 
         return ' &gt; '.join(out)
 
     def tmpl_webcoll_body(self, ln, collection, te_portalbox,
                           searchfor, np_portalbox, narrowsearch,
                           focuson, instantbrowse, ne_portalbox, show_body=True):
 
         """ Creates the body of the main search page.
 
         Parameters:
 
           - 'ln' *string* - language of the page being generated
 
           - 'collection' - collection id of the page being generated
 
           - 'te_portalbox' *string* - The HTML code for the portalbox on top of search
 
           - 'searchfor' *string* - The HTML code for the search for box
 
           - 'np_portalbox' *string* - The HTML code for the portalbox on bottom of search
 
           - 'narrowsearch' *string* - The HTML code for the search categories (left bottom of page)
 
           - 'focuson' *string* - The HTML code for the "focuson" categories (right bottom of page)
 
           - 'ne_portalbox' *string* - The HTML code for the bottom of the page
         """
 
         if not narrowsearch:
             narrowsearch = instantbrowse
 
         body = '''
                 <form name="search" action="%(siteurl)s/search" method="get">
                 %(searchfor)s
                 %(np_portalbox)s''' % {
                  'siteurl' : CFG_SITE_URL,
                  'searchfor' : searchfor,
                  'np_portalbox' : np_portalbox
                  }
         if show_body:
             body += '''
                     <table cellspacing="0" cellpadding="0" border="0" class="narrowandfocusonsearchbox">
                       <tr>
                         <td valign="top">%(narrowsearch)s</td>
                    ''' % { 'narrowsearch' : narrowsearch }
             if focuson:
                 body += """<td valign="top">""" + focuson + """</td>"""
             body += """</tr></table>"""
         elif focuson:
             body += focuson
         body += """%(ne_portalbox)s
                </form>""" % {'ne_portalbox' : ne_portalbox}
         return body
 
     def tmpl_portalbox(self, title, body):
         """Creates portalboxes based on the parameters
         Parameters:
 
           - 'title' *string* - The title of the box
 
           - 'body' *string* - The HTML code for the body of the box
 
         """
         out = """<div class="portalbox">
                     <div class="portalboxheader">%(title)s</div>
                     <div class="portalboxbody">%(body)s</div>
                  </div>""" % {'title' : cgi.escape(title), 'body' : body}
 
         return out
 
     def tmpl_searchfor_light(self, ln, collection_id, collection_name, record_count,
                              example_search_queries): # EXPERIMENTAL
         """Produces light *Search for* box for the current collection.
 
         Parameters:
 
           - 'ln' *string* - *str* The language to display
 
           - 'collection_id' - *str* The collection id
 
           - 'collection_name' - *str* The collection name in current language
 
           - 'example_search_queries' - *list* List of search queries given as example for this collection
         """
 
         # load the right message language
         _ = gettext_set_language(ln)
 
         out = '''
         <!--create_searchfor_light()-->
         '''
 
         argd = drop_default_urlargd({'ln': ln, 'sc': CFG_WEBSEARCH_SPLIT_BY_COLLECTION},
                                     self.search_results_default_urlargd)
 
         # Only add non-default hidden values
         for field, value in argd.items():
             out += self.tmpl_input_hidden(field, value)
 
 
         header = _("Search %s records for:") % \
                  self.tmpl_nbrecs_info(record_count, "", "")
         asearchurl = self.build_search_interface_url(c=collection_id,
                                                      aas=max(CFG_WEBSEARCH_ENABLED_SEARCH_INTERFACES),
                                                      ln=ln)
 
         # Build example of queries for this collection
         example_search_queries_links = [create_html_link(self.build_search_url(p=example_query,
                                                                                ln=ln,
                                                                                aas= -1,
                                                                                c=collection_id),
                                                          {},
                                                          cgi.escape(example_query),
                                                          {'class': 'examplequery'}) \
                                         for example_query in example_search_queries]
         example_query_html = ''
         if len(example_search_queries) > 0:
             example_query_link = example_search_queries_links[0]
 
             # offers more examples if possible
             more = ''
             if len(example_search_queries_links) > 1:
                 more = '''
                 <script type="text/javascript">
                 function toggle_more_example_queries_visibility(){
                     var more = document.getElementById('more_example_queries');
                     var link = document.getElementById('link_example_queries');
                     var sep = document.getElementById('more_example_sep');
                     if (more.style.display=='none'){
                         more.style.display = '';
                         link.innerHTML = "%(show_less)s"
                         link.style.color = "rgb(204,0,0)";
                         sep.style.display = 'none';
                     } else {
                         more.style.display = 'none';
                         link.innerHTML = "%(show_more)s"
                         link.style.color = "rgb(0,0,204)";
                         sep.style.display = '';
                     }
                     return false;
                 }
                 </script>
                 <span id="more_example_queries" style="display:none;text-align:right"><br/>%(more_example_queries)s<br/></span>
                 <a id="link_example_queries" href="#" onclick="toggle_more_example_queries_visibility()" style="display:none"></a>
                 <script type="text/javascript">
                     var link = document.getElementById('link_example_queries');
                     var sep = document.getElementById('more_example_sep');
                     link.style.display = '';
                     link.innerHTML = "%(show_more)s";
                     sep.style.display = '';
                 </script>
                 ''' % {'more_example_queries': '<br/>'.join(example_search_queries_links[1:]),
                        'show_less':_("less"),
                        'show_more':_("more")}
 
             example_query_html += '''<p style="text-align:right;margin:0px;">
             %(example)s<span id="more_example_sep" style="display:none;">&nbsp;&nbsp;::&nbsp;</span>%(more)s
             </p>
             ''' % {'example': _("Example: %(x_sample_search_query)s") % \
                    {'x_sample_search_query': example_query_link},
                    'more': more}
 
         # display options to search in current collection or everywhere
         search_in = ''
         if collection_name != CFG_SITE_NAME_INTL.get(ln, CFG_SITE_NAME):
             search_in += '''
            <input type="radio" name="cc" value="%(collection_id)s" id="searchCollection" checked="checked"/>
            <label for="searchCollection">%(search_in_collection_name)s</label>
            <input type="radio" name="cc" value="%(root_collection_name)s" id="searchEverywhere" />
            <label for="searchEverywhere">%(search_everywhere)s</label>
            ''' % {'search_in_collection_name': _("Search in %(x_collection_name)s") % \
                   {'x_collection_name': collection_name},
                   'collection_id': collection_id,
                   'root_collection_name': CFG_SITE_NAME,
                   'search_everywhere': _("Search everywhere")}
 
         # print commentary start:
         out += '''
         <table class="searchbox lightsearch">
          <tbody>
           <tr valign="baseline">
            <td class="searchboxbody" align="right"><input type="text" name="p" size="%(sizepattern)d" value="" class="lightsearchfield"/><br/>
              <small><small>%(example_query_html)s</small></small>
            </td>
            <td class="searchboxbody" align="left">
              <input class="formbutton" type="submit" name="action_search" value="%(msg_search)s" />
            </td>
            <td class="searchboxbody" align="left" rowspan="2" valign="top">
              <small><small>
              <a href="%(siteurl)s/help/search-tips%(langlink)s">%(msg_search_tips)s</a><br/>
              %(asearch)s
              </small></small>
            </td>
           </tr></table>
           <!--<tr valign="baseline">
            <td class="searchboxbody" colspan="2" align="left">
              <small>
                --><small>%(search_in)s</small><!--
              </small>
            </td>
           </tr>
          </tbody>
         </table>-->
         <!--/create_searchfor_light()-->
         ''' % {'ln' : ln,
                'sizepattern' : CFG_WEBSEARCH_LIGHTSEARCH_PATTERN_BOX_WIDTH,
                'langlink': '?ln=' + ln,
                'siteurl' : CFG_SITE_URL,
                'asearch' : create_html_link(asearchurl, {}, _('Advanced Search')),
                'header' : header,
                'msg_search' : _('Search'),
                'msg_browse' : _('Browse'),
                'msg_search_tips' : _('Search Tips'),
                'search_in': search_in,
                'example_query_html': example_query_html}
 
         return out
 
     def tmpl_searchfor_simple(self, ln, collection_id, collection_name, record_count, middle_option):
         """Produces simple *Search for* box for the current collection.
 
         Parameters:
 
           - 'ln' *string* - *str* The language to display
 
           - 'collection_id' - *str* The collection id
 
           - 'collection_name' - *str* The collection name in current language
 
           - 'record_count' - *str* Number of records in this collection
 
           - 'middle_option' *string* - HTML code for the options (any field, specific fields ...)
         """
 
         # load the right message language
         _ = gettext_set_language(ln)
 
         out = '''
         <!--create_searchfor_simple()-->
         '''
 
         argd = drop_default_urlargd({'ln': ln, 'cc': collection_id, 'sc': CFG_WEBSEARCH_SPLIT_BY_COLLECTION},
                                     self.search_results_default_urlargd)
 
         # Only add non-default hidden values
         for field, value in argd.items():
             out += self.tmpl_input_hidden(field, value)
 
 
         header = _("Search %s records for:") % \
                  self.tmpl_nbrecs_info(record_count, "", "")
         asearchurl = self.build_search_interface_url(c=collection_id,
                                                      aas=max(CFG_WEBSEARCH_ENABLED_SEARCH_INTERFACES),
                                                      ln=ln)
         # print commentary start:
         out += '''
         <table class="searchbox simplesearch">
          <thead>
           <tr align="left">
            <th colspan="3" class="searchboxheader">%(header)s</th>
           </tr>
          </thead>
          <tbody>
           <tr valign="baseline">
            <td class="searchboxbody" align="left"><input type="text" name="p" size="%(sizepattern)d" value="" class="simplesearchfield"/></td>
            <td class="searchboxbody" align="left">%(middle_option)s</td>
            <td class="searchboxbody" align="left">
              <input class="formbutton" type="submit" name="action_search" value="%(msg_search)s" />
              <input class="formbutton" type="submit" name="action_browse" value="%(msg_browse)s" /></td>
           </tr>
           <tr valign="baseline">
            <td class="searchboxbody" colspan="3" align="right">
              <small>
                <a href="%(siteurl)s/help/search-tips%(langlink)s">%(msg_search_tips)s</a> ::
                %(asearch)s
              </small>
            </td>
           </tr>
          </tbody>
         </table>
         <!--/create_searchfor_simple()-->
         ''' % {'ln' : ln,
                'sizepattern' : CFG_WEBSEARCH_SIMPLESEARCH_PATTERN_BOX_WIDTH,
                'langlink': '?ln=' + ln,
                'siteurl' : CFG_SITE_URL,
                'asearch' : create_html_link(asearchurl, {}, _('Advanced Search')),
                'header' : header,
                'middle_option' : middle_option,
                'msg_search' : _('Search'),
                'msg_browse' : _('Browse'),
                'msg_search_tips' : _('Search Tips')}
 
         return out
 
     def tmpl_searchfor_advanced(self,
                                 ln, # current language
                                 collection_id,
                                 collection_name,
                                 record_count,
                                 middle_option_1, middle_option_2, middle_option_3,
                                 searchoptions,
                                 sortoptions,
                                 rankoptions,
                                 displayoptions,
                                 formatoptions
                                 ):
         """
           Produces advanced *Search for* box for the current collection.
 
           Parameters:
 
             - 'ln' *string* - The language to display
 
             - 'middle_option_1' *string* - HTML code for the first row of options (any field, specific fields ...)
 
             - 'middle_option_2' *string* - HTML code for the second row of options (any field, specific fields ...)
 
             - 'middle_option_3' *string* - HTML code for the third row of options (any field, specific fields ...)
 
             - 'searchoptions' *string* - HTML code for the search options
 
             - 'sortoptions' *string* - HTML code for the sort options
 
             - 'rankoptions' *string* - HTML code for the rank options
 
             - 'displayoptions' *string* - HTML code for the display options
 
             - 'formatoptions' *string* - HTML code for the format options
 
         """
 
         # load the right message language
         _ = gettext_set_language(ln)
 
         out = '''
         <!--create_searchfor_advanced()-->
         '''
 
         argd = drop_default_urlargd({'ln': ln, 'aas': 1, 'cc': collection_id, 'sc': CFG_WEBSEARCH_SPLIT_BY_COLLECTION},
                                     self.search_results_default_urlargd)
 
         # Only add non-default hidden values
         for field, value in argd.items():
             out += self.tmpl_input_hidden(field, value)
 
 
         header = _("Search %s records for") % \
                  self.tmpl_nbrecs_info(record_count, "", "")
         header += ':'
         ssearchurl = self.build_search_interface_url(c=collection_id, aas=min(CFG_WEBSEARCH_ENABLED_SEARCH_INTERFACES), ln=ln)
 
         out += '''
         <table class="searchbox advancedsearch">
          <thead>
           <tr>
            <th class="searchboxheader" colspan="3">%(header)s</th>
           </tr>
          </thead>
          <tbody>
           <tr valign="bottom">
             <td class="searchboxbody" style="white-space: nowrap;">
                 %(matchbox_m1)s<input type="text" name="p1" size="%(sizepattern)d" value="" class="advancedsearchfield"/>
             </td>
             <td class="searchboxbody" style="white-space: nowrap;">%(middle_option_1)s</td>
             <td class="searchboxbody">%(andornot_op1)s</td>
           </tr>
           <tr valign="bottom">
             <td class="searchboxbody" style="white-space: nowrap;">
                 %(matchbox_m2)s<input type="text" name="p2" size="%(sizepattern)d" value="" class="advancedsearchfield"/>
             </td>
             <td class="searchboxbody">%(middle_option_2)s</td>
             <td class="searchboxbody">%(andornot_op2)s</td>
           </tr>
           <tr valign="bottom">
             <td class="searchboxbody" style="white-space: nowrap;">
                 %(matchbox_m3)s<input type="text" name="p3" size="%(sizepattern)d" value="" class="advancedsearchfield"/>
             </td>
             <td class="searchboxbody">%(middle_option_3)s</td>
             <td class="searchboxbody" style="white-space: nowrap;">
               <input class="formbutton" type="submit" name="action_search" value="%(msg_search)s" />
               <input class="formbutton" type="submit" name="action_browse" value="%(msg_browse)s" /></td>
           </tr>
           <tr valign="bottom">
             <td colspan="3" class="searchboxbody" align="right">
               <small>
                 <a href="%(siteurl)s/help/search-tips%(langlink)s">%(msg_search_tips)s</a> ::
                 %(ssearch)s
               </small>
             </td>
           </tr>
          </tbody>
         </table>
         <!-- @todo - more imports -->
         ''' % {'ln' : ln,
                'sizepattern' : CFG_WEBSEARCH_ADVANCEDSEARCH_PATTERN_BOX_WIDTH,
                'langlink': '?ln=' + ln,
                'siteurl' : CFG_SITE_URL,
                'ssearch' : create_html_link(ssearchurl, {}, _("Simple Search")),
                'header' : header,
 
                'matchbox_m1' : self.tmpl_matchtype_box('m1', ln=ln),
                'middle_option_1' : middle_option_1,
                'andornot_op1' : self.tmpl_andornot_box('op1', ln=ln),
 
                'matchbox_m2' : self.tmpl_matchtype_box('m2', ln=ln),
                'middle_option_2' : middle_option_2,
                'andornot_op2' : self.tmpl_andornot_box('op2', ln=ln),
 
                'matchbox_m3' : self.tmpl_matchtype_box('m3', ln=ln),
                'middle_option_3' : middle_option_3,
 
                'msg_search' : _("Search"),
                'msg_browse' : _("Browse"),
                'msg_search_tips' : _("Search Tips")}
 
         if (searchoptions):
             out += """<table class="searchbox">
                       <thead>
                        <tr>
                          <th class="searchboxheader">
                            %(searchheader)s
                          </th>
                        </tr>
                       </thead>
                       <tbody>
                        <tr valign="bottom">
                         <td class="searchboxbody">%(searchoptions)s</td>
                        </tr>
                       </tbody>
                      </table>""" % {
                        'searchheader' : _("Search options:"),
                        'searchoptions' : searchoptions
                      }
 
         out += """<table class="searchbox">
                    <thead>
                     <tr>
                       <th class="searchboxheader">
                         %(added)s
                       </th>
                       <th class="searchboxheader">
                         %(until)s
                       </th>
                     </tr>
                    </thead>
                    <tbody>
                     <tr valign="bottom">
                       <td class="searchboxbody">%(added_or_modified)s %(date_added)s</td>
                       <td class="searchboxbody">%(date_until)s</td>
                     </tr>
                    </tbody>
                   </table>
                   <table class="searchbox">
                    <thead>
                     <tr>
                       <th class="searchboxheader">
                         %(msg_sort)s
                       </th>
                       <th class="searchboxheader">
                         %(msg_display)s
                       </th>
                       <th class="searchboxheader">
                         %(msg_format)s
                       </th>
                     </tr>
                    </thead>
                    <tbody>
                     <tr valign="bottom">
                       <td class="searchboxbody">%(sortoptions)s %(rankoptions)s</td>
                       <td class="searchboxbody">%(displayoptions)s</td>
                       <td class="searchboxbody">%(formatoptions)s</td>
                     </tr>
                    </tbody>
                   </table>
                   <!--/create_searchfor_advanced()-->
               """ % {
 
                     'added' : _("Added/modified since:"),
                     'until' : _("until:"),
                     'added_or_modified': self.tmpl_inputdatetype(ln=ln),
                     'date_added' : self.tmpl_inputdate("d1", ln=ln),
                     'date_until' : self.tmpl_inputdate("d2", ln=ln),
 
                     'msg_sort' : _("Sort by:"),
                     'msg_display' : _("Display results:"),
                     'msg_format' : _("Output format:"),
                     'sortoptions' : sortoptions,
                     'rankoptions' : rankoptions,
                     'displayoptions' : displayoptions,
                     'formatoptions' : formatoptions
                   }
         return out
 
     def tmpl_matchtype_box(self, name='m', value='', ln='en'):
         """Returns HTML code for the 'match type' selection box.
 
           Parameters:
 
             - 'name' *string* - The name of the produced select
 
             - 'value' *string* - The selected value (if any value is already selected)
 
             - 'ln' *string* - the language to display
         """
 
         # load the right message language
         _ = gettext_set_language(ln)
 
         out = """
         <select name="%(name)s">
         <option value="a"%(sela)s>%(opta)s</option>
         <option value="o"%(selo)s>%(opto)s</option>
         <option value="e"%(sele)s>%(opte)s</option>
         <option value="p"%(selp)s>%(optp)s</option>
         <option value="r"%(selr)s>%(optr)s</option>
         </select>
         """ % {'name' : name,
                'sela' : self.tmpl_is_selected('a', value),
                                                            'opta' : _("All of the words:"),
                'selo' : self.tmpl_is_selected('o', value),
                                                            'opto' : _("Any of the words:"),
                'sele' : self.tmpl_is_selected('e', value),
                                                            'opte' : _("Exact phrase:"),
                'selp' : self.tmpl_is_selected('p', value),
                                                            'optp' : _("Partial phrase:"),
                'selr' : self.tmpl_is_selected('r', value),
                                                            'optr' : _("Regular expression:")
               }
         return out
 
     def tmpl_is_selected(self, var, fld):
         """
           Checks if *var* and *fld* are equal, and if yes, returns ' selected="selected"'.  Useful for select boxes.
 
           Parameters:
 
           - 'var' *string* - First value to compare
 
           - 'fld' *string* - Second value to compare
         """
         if var == fld:
             return ' selected="selected"'
         else:
             return ""
 
     def tmpl_andornot_box(self, name='op', value='', ln='en'):
         """
           Returns HTML code for the AND/OR/NOT selection box.
 
           Parameters:
 
             - 'name' *string* - The name of the produced select
 
             - 'value' *string* - The selected value (if any value is already selected)
 
             - 'ln' *string* - the language to display
         """
 
         # load the right message language
         _ = gettext_set_language(ln)
 
         out = """
         <select name="%(name)s">
         <option value="a"%(sela)s>%(opta)s</option>
         <option value="o"%(selo)s>%(opto)s</option>
         <option value="n"%(seln)s>%(optn)s</option>
         </select>
         """ % {'name' : name,
                'sela' : self.tmpl_is_selected('a', value), 'opta' : _("AND"),
                'selo' : self.tmpl_is_selected('o', value), 'opto' : _("OR"),
                'seln' : self.tmpl_is_selected('n', value), 'optn' : _("AND NOT")
               }
         return out
 
     def tmpl_inputdate(self, name, ln, sy=0, sm=0, sd=0):
         """
           Produces *From Date*, *Until Date* kind of selection box. Suitable for search options.
 
           Parameters:
 
             - 'name' *string* - The base name of the produced selects
 
             - 'ln' *string* - the language to display
         """
         # load the right message language
         _ = gettext_set_language(ln)
 
         box = """
                <select name="%(name)sd">
                  <option value=""%(sel)s>%(any)s</option>
               """ % {
                 'name' : name,
                 'any' : _("any day"),
                 'sel' : self.tmpl_is_selected(sd, 0)
               }
         for day in range(1, 32):
             box += """<option value="%02d"%s>%02d</option>""" % (day, self.tmpl_is_selected(sd, day), day)
         box += """</select>"""
         # month
         box += """
                 <select name="%(name)sm">
                   <option value=""%(sel)s>%(any)s</option>
                """ % {
                  'name' : name,
                  'any' : _("any month"),
                  'sel' : self.tmpl_is_selected(sm, 0)
                }
         # trailing space in May distinguishes short/long form of the month name
         for mm, month in [(1, _("January")), (2, _("February")), (3, _("March")), (4, _("April")), \
                           (5, _("May ")), (6, _("June")), (7, _("July")), (8, _("August")), \
                           (9, _("September")), (10, _("October")), (11, _("November")), (12, _("December"))]:
             box += """<option value="%02d"%s>%s</option>""" % (mm, self.tmpl_is_selected(sm, mm), month.strip())
         box += """</select>"""
         # year
         box += """
                 <select name="%(name)sy">
                   <option value=""%(sel)s>%(any)s</option>
                """ % {
                  'name' : name,
                  'any' : _("any year"),
                  'sel' : self.tmpl_is_selected(sy, 0)
                }
         this_year = int(time.strftime("%Y", time.localtime()))
         for year in range(this_year - 20, this_year + 1):
             box += """<option value="%d"%s>%d</option>""" % (year, self.tmpl_is_selected(sy, year), year)
         box += """</select>"""
         return box
 
     def tmpl_inputdatetype(self, dt='', ln=CFG_SITE_LANG):
         """
           Produces input date type selection box to choose
           added-or-modified date search option.
 
           Parameters:
 
             - 'dt' *string - date type (c=created, m=modified)
 
             - 'ln' *string* - the language to display
         """
         # load the right message language
         _ = gettext_set_language(ln)
 
         box = """<select name="dt">
                   <option value="">%(added)s </option>
                   <option value="m"%(sel)s>%(modified)s </option>
                  </select>
               """ % { 'added': _("Added since:"),
                       'modified': _("Modified since:"),
                       'sel': self.tmpl_is_selected(dt, 'm'),
                     }
         return box
 
     def tmpl_narrowsearch(self, aas, ln, type, father,
                           has_grandchildren, sons, display_grandsons,
                           grandsons):
 
         """
         Creates list of collection descendants of type *type* under title *title*.
         If aas==1, then links to Advanced Search interfaces; otherwise Simple Search.
         Suitable for 'Narrow search' and 'Focus on' boxes.
 
         Parameters:
 
           - 'aas' *bool* - Should we display an advanced search box?
 
           - 'ln' *string* - The language to display
 
           - 'type' *string* - The type of the produced box (virtual collections or normal collections)
 
           - 'father' *collection* - The current collection
 
           - 'has_grandchildren' *bool* - If the current collection has grand children
 
           - 'sons' *list* - The list of the sub-collections (first level)
 
           - 'display_grandsons' *bool* - If the grand children collections should be displayed (2 level deep display)
 
           - 'grandsons' *list* - The list of sub-collections (second level)
         """
 
         # load the right message language
         _ = gettext_set_language(ln)
 
         title = father.get_collectionbox_name(ln, type)
 
         if has_grandchildren:
             style_prolog = "<strong>"
             style_epilog = "</strong>"
         else:
             style_prolog = ""
             style_epilog = ""
 
         out = """<table class="%(narrowsearchbox)s">
                    <thead>
                     <tr>
                      <th colspan="2" align="left" class="%(narrowsearchbox)sheader">
                       %(title)s
                      </th>
                     </tr>
                    </thead>
                    <tbody>""" % {'title' : title,
                                  'narrowsearchbox': {'r': 'narrowsearchbox',
                                                      'v': 'focusonsearchbox'}[type]}
         # iterate through sons:
         i = 0
         for son in sons:
             out += """<tr><td class="%(narrowsearchbox)sbody" valign="top">""" % \
                    { 'narrowsearchbox': {'r': 'narrowsearchbox',
                                          'v': 'focusonsearchbox'}[type]}
 
             if type == 'r':
                 if son.restricted_p() and son.restricted_p() != father.restricted_p():
                     out += """<input type="checkbox" name="c" value="%(name)s" /></td>""" % {'name' : cgi.escape(son.name) }
                 # hosted collections are checked by default only when configured so
                 elif str(son.dbquery).startswith("hostedcollection:"):
                     external_collection_engine = get_external_collection_engine(str(son.name))
                     if external_collection_engine and external_collection_engine.selected_by_default:
                         out += """<input type="checkbox" name="c" value="%(name)s" checked="checked" /></td>""" % {'name' : cgi.escape(son.name) }
                     elif external_collection_engine and not external_collection_engine.selected_by_default:
                         out += """<input type="checkbox" name="c" value="%(name)s" /></td>""" % {'name' : cgi.escape(son.name) }
                     else:
                         # strangely, the external collection engine was never found. In that case,
                         # why was the hosted collection here in the first place?
                         out += """<input type="checkbox" name="c" value="%(name)s" /></td>""" % {'name' : cgi.escape(son.name) }
                 else:
                     out += """<input type="checkbox" name="c" value="%(name)s" checked="checked" /></td>""" % {'name' : cgi.escape(son.name) }
             else:
                 out += '</td>'
             out += """<td valign="top">%(link)s%(recs)s """ % {
                 'link': create_html_link(self.build_search_interface_url(c=son.name, ln=ln, aas=aas),
                                          {}, style_prolog + cgi.escape(son.get_name(ln)) + style_epilog),
                 'recs' : self.tmpl_nbrecs_info(son.nbrecs, ln=ln)}
 
             # the following prints the "external collection" arrow just after the name and
             # number of records of the hosted collection
             # 1) we might want to make the arrow work as an anchor to the hosted collection as well.
             # That would probably require a new separate function under invenio.urlutils
             # 2) we might want to place the arrow between the name and the number of records of the hosted collection
             # That would require to edit/separate the above out += ...
             if type == 'r':
                 if str(son.dbquery).startswith("hostedcollection:"):
                     out += """<img src="%(siteurl)s/img/external-icon-light-8x8.gif" border="0" alt="%(name)s"/>""" % \
                            { 'siteurl' : CFG_SITE_URL, 'name' : cgi.escape(son.name), }
 
             if son.restricted_p():
                 out += """ <small class="warning">[%(msg)s]</small> """ % { 'msg' : _("restricted") }
             if display_grandsons and len(grandsons[i]):
                 # iterate trough grandsons:
                 out += """<br />"""
                 for grandson in grandsons[i]:
                     out += """ <small>%(link)s%(nbrec)s</small> """ % {
                         'link': create_html_link(self.build_search_interface_url(c=grandson.name, ln=ln, aas=aas),
                                                  {},
                                                  cgi.escape(grandson.get_name(ln))),
                         'nbrec' : self.tmpl_nbrecs_info(grandson.nbrecs, ln=ln)}
                     # the following prints the "external collection" arrow just after the name and
                     # number of records of the hosted collection
                     # Some relatives comments have been made just above
                     if type == 'r':
                         if str(grandson.dbquery).startswith("hostedcollection:"):
                             out += """<img src="%(siteurl)s/img/external-icon-light-8x8.gif" border="0" alt="%(name)s"/>""" % \
                                     { 'siteurl' : CFG_SITE_URL, 'name' : cgi.escape(grandson.name), }
 
             out += """</td></tr>"""
             i += 1
         out += "</tbody></table>"
 
         return out
 
     def tmpl_searchalso(self, ln, engines_list, collection_id):
         _ = gettext_set_language(ln)
 
         box_name = _("Search also:")
 
         html = """<table cellspacing="0" cellpadding="0" border="0">
             <tr><td valign="top"><table class="searchalsosearchbox">
             <thead><tr><th colspan="2" align="left" class="searchalsosearchboxheader">%(box_name)s
             </th></tr></thead><tbody>
         """ % locals()
 
         for engine in engines_list:
             internal_name = engine.name
             name = _(internal_name)
             base_url = engine.base_url
             if external_collection_get_state(engine, collection_id) == 3:
                 checked = ' checked="checked"'
             else:
                 checked = ''
 
             html += """<tr><td class="searchalsosearchboxbody" valign="top">
                 <input type="checkbox" name="ec" id="%(id)s" value="%(internal_name)s" %(checked)s /></td>
                 <td valign="top" class="searchalsosearchboxbody">
                 <div style="white-space: nowrap"><label for="%(id)s">%(name)s</label>
                 <a href="%(base_url)s">
                 <img src="%(siteurl)s/img/external-icon-light-8x8.gif" border="0" alt="%(name)s"/></a>
                 </div></td></tr>""" % \
                                  { 'checked': checked,
                                    'base_url': base_url,
                                    'internal_name': internal_name,
                                    'name': cgi.escape(name),
                                    'id': "extSearch" + nmtoken_from_string(name),
                                    'siteurl': CFG_SITE_URL, }
 
         html += """</tbody></table></td></tr></table>"""
         return html
 
     def tmpl_nbrecs_info(self, number, prolog=None, epilog=None, ln=CFG_SITE_LANG):
         """
         Return information on the number of records.
 
         Parameters:
 
         - 'number' *string* - The number of records
 
         - 'prolog' *string* (optional) - An HTML code to prefix the number (if **None**, will be
         '<small class="nbdoccoll">(')
 
         - 'epilog' *string* (optional) - An HTML code to append to the number (if **None**, will be
         ')</small>')
         """
 
         if number is None:
             number = 0
         if prolog is None:
             prolog = '''&nbsp;<small class="nbdoccoll">('''
         if epilog is None:
             epilog = ''')</small>'''
 
         return prolog + self.tmpl_nice_number(number, ln) + epilog
 
     def tmpl_box_restricted_content(self, ln):
         """
           Displays a box containing a *restricted content* message
 
         Parameters:
 
           - 'ln' *string* - The language to display
 
         """
 
         # load the right message language
         _ = gettext_set_language(ln)
 
         return _("This collection is restricted.  If you are authorized to access it, please click on the Search button.")
 
     def tmpl_box_hosted_collection(self, ln):
         """
           Displays a box containing a *hosted collection* message
 
         Parameters:
 
           - 'ln' *string* - The language to display
 
         """
 
         # load the right message language
         _ = gettext_set_language(ln)
 
         return _("This is a hosted external collection. Please click on the Search button to see its content.")
 
     def tmpl_box_no_records(self, ln):
         """
           Displays a box containing a *no content* message
 
         Parameters:
 
           - 'ln' *string* - The language to display
 
         """
 
         # load the right message language
         _ = gettext_set_language(ln)
 
         return _("This collection does not contain any document yet.")
 
 
     def tmpl_instant_browse(self, aas, ln, recids, more_link=None, grid_layout=False, father=None):
         """
           Formats a list of records (given in the recids list) from the database.
 
         Parameters:
 
           - 'aas' *int* - Advanced Search interface or not (0 or 1)
 
           - 'ln' *string* - The language to display
 
           - 'recids' *list* - the list of records from the database
 
           - 'more_link' *string* - the "More..." link for the record. If not given, will not be displayed
 
           - 'father' *collection* - The current collection
         """
 
         # load the right message language
         _ = gettext_set_language(ln)
 
         body = '''<table class="latestadditionsbox">'''
         if grid_layout:
             body += '<tr><td><div>'
         for recid in recids:
             if grid_layout:
                 body += '''
                 <abbr class="unapi-id" title="%(recid)s"></abbr>
                 %(body)s
             ''' % {
                 'recid': recid['id'],
                 'body': recid['body']}
             else:
                 body += '''
                 <tr>
                   <td class="latestadditionsboxtimebody">%(date)s</td>
                   <td class="latestadditionsboxrecordbody">
                     <abbr class="unapi-id" title="%(recid)s"></abbr>
                     %(body)s
                   </td>
                 </tr>''' % {
                         'recid': recid['id'],
                         'date': recid['date'],
                         'body': recid['body']
                       }
         if grid_layout:
             body += '''<div style="clear:both"></div>'''
             body += '''</div></td></tr>'''
         body += "</table>"
         if more_link:
             body += '<div align="right"><small>' + \
                     create_html_link(more_link, {}, '[&gt;&gt; %s]' % _("more")) + \
                     '</small></div>'
 
         return '''
         <table class="narrowsearchbox">
           <thead>
             <tr>
               <th class="narrowsearchboxheader">%(header)s</th>
             </tr>
           </thead>
           <tbody>
             <tr>
             <td class="narrowsearchboxbody">%(body)s</td>
             </tr>
           </tbody>
         </table>''' % {'header' : father.get_collectionbox_name(ln, 'l') ,
                        'body' : body,
                        }
 
 
     def tmpl_searchwithin_select(self, ln, fieldname, selected, values):
         """
           Produces 'search within' selection box for the current collection.
 
         Parameters:
 
           - 'ln' *string* - The language to display
 
           - 'fieldname' *string* - the name of the select box produced
 
           - 'selected' *string* - which of the values is selected
 
           - 'values' *list* - the list of values in the select
         """
 
         out = '<select name="%(fieldname)s">' % {'fieldname': fieldname}
 
         if values:
             for pair in values:
                 out += """<option value="%(value)s"%(selected)s>%(text)s</option>""" % {
                          'value'    : cgi.escape(pair['value']),
                          'selected' : self.tmpl_is_selected(pair['value'], selected),
                          'text'     : cgi.escape(pair['text'])
                        }
         out += """</select>"""
         return out
 
     def tmpl_select(self, fieldname, values, selected=None, css_class=''):
         """
           Produces a generic select box
 
         Parameters:
 
           - 'css_class' *string* - optional, a css class to display this select with
 
           - 'fieldname' *list* - the name of the select box produced
 
           - 'selected' *string* - which of the values is selected
 
           - 'values' *list* - the list of values in the select
         """
         if css_class != '':
             class_field = ' class="%s"' % css_class
         else:
             class_field = ''
         out = '<select name="%(fieldname)s"%(class)s>' % {
             'fieldname' : fieldname,
             'class' : class_field
             }
 
         for pair in values:
             if pair.get('selected', False) or pair['value'] == selected:
                 flag = ' selected="selected"'
             else:
                 flag = ''
 
             out += '<option value="%(value)s"%(selected)s>%(text)s</option>' % {
                      'value'    : cgi.escape(str(pair['value'])),
                      'selected' : flag,
                      'text'     : cgi.escape(pair['text'])
                    }
 
         out += """</select>"""
         return out
 
     def tmpl_record_links(self, recid, ln, sf='', so='d', sp='', rm=''):
         """
           Displays the *More info* and *Find similar* links for a record
 
         Parameters:
 
           - 'ln' *string* - The language to display
 
           - 'recid' *string* - the id of the displayed record
         """
 
         # load the right message language
         _ = gettext_set_language(ln)
 
         out = '''<br /><span class="moreinfo">%(detailed)s - %(similar)s</span>''' % {
             'detailed': create_html_link(self.build_search_url(recid=recid, ln=ln),
                                          {},
                                          _("Detailed record"), {'class': "moreinfo"}),
             'similar': create_html_link(self.build_search_url(p="recid:%d" % recid, rm='wrd', ln=ln),
                                         {},
                                         _("Similar records"),
                                         {'class': "moreinfo"})}
 
         if CFG_BIBRANK_SHOW_CITATION_LINKS:
             num_timescited = get_cited_by_count(recid)
             if num_timescited:
                 out += '''<span class="moreinfo"> - %s </span>''' % \
                        create_html_link(self.build_search_url(p='refersto:recid:%d' % recid,
                                                               sf=sf,
                                                               so=so,
                                                               sp=sp,
                                                               rm=rm,
                                                               ln=ln),
                                         {}, _("Cited by %i records") % num_timescited, {'class': "moreinfo"})
 
         return out
 
     def tmpl_record_body(self, titles, authors, dates, rns, abstracts, urls_u, urls_z, ln):
         """
           Displays the "HTML basic" format of a record
 
         Parameters:
 
           - 'authors' *list* - the authors (as strings)
 
           - 'dates' *list* - the dates of publication
 
           - 'rns' *list* - the quicknotes for the record
 
           - 'abstracts' *list* - the abstracts for the record
 
           - 'urls_u' *list* - URLs to the original versions of the record
 
           - 'urls_z' *list* - Not used
         """
         out = ""
         for title in titles:
             out += "<strong>%(title)s</strong> " % {
                      'title' : cgi.escape(title)
                    }
         if authors:
             out += " / "
             for author in authors[:CFG_WEBSEARCH_AUTHOR_ET_AL_THRESHOLD]:
                 out += '%s ' % \
                        create_html_link(self.build_search_url(p=author, f='author', ln=ln),
                                         {}, cgi.escape(author))
 
             if len(authors) > CFG_WEBSEARCH_AUTHOR_ET_AL_THRESHOLD:
                 out += "<em>et al</em>"
         for date in dates:
             out += " %s." % cgi.escape(date)
         for rn in rns:
             out += """ <small class="quicknote">[%(rn)s]</small>""" % {'rn' : cgi.escape(rn)}
         for abstract in abstracts:
             out += "<br /><small>%(abstract)s [...]</small>" % {'abstract' : cgi.escape(abstract[:1 + string.find(abstract, '.')]) }
         for idx in range(0, len(urls_u)):
             out += """<br /><small class="note"><a class="note" href="%(url)s">%(name)s</a></small>""" % {
                      'url' : urls_u[idx],
                      'name' : urls_u[idx]
                    }
         return out
 
     def tmpl_search_in_bibwords(self, p, f, ln, nearest_box):
         """
           Displays the *Words like current ones* links for a search
 
         Parameters:
 
           - 'p' *string* - Current search words
 
           - 'f' *string* - the fields in which the search was done
 
           - 'nearest_box' *string* - the HTML code for the "nearest_terms" box - most probably from a create_nearest_terms_box call
         """
 
         # load the right message language
         _ = gettext_set_language(ln)
         out = '<p>'
         if f:
             out += _("Words nearest to %(x_word)s inside %(x_field)s in any collection are:") % {'x_word': '<em>' + cgi.escape(p) + '</em>',
                                                                                                  'x_field': '<em>' + cgi.escape(f) + '</em>'}
         else:
             out += _("Words nearest to %(x_word)s in any collection are:") % {'x_word': '<em>' + cgi.escape(p) + '</em>'}
         out += '<br />' + nearest_box + '</p>'
         return out
 
     def tmpl_nearest_term_box(self, p, ln, f, terminfo, intro):
         """
           Displays the *Nearest search terms* box
 
         Parameters:
 
           - 'p' *string* - Current search words
 
           - 'f' *string* - a collection description (if the search has been completed in a collection)
 
           - 'ln' *string* - The language to display
 
           - 'terminfo': tuple (term, hits, argd) for each near term
 
           - 'intro' *string* - the intro HTML to prefix the box with
         """
 
         out = '''<table class="nearesttermsbox" cellpadding="0" cellspacing="0" border="0">'''
 
         for term, hits, argd in terminfo:
 
             if hits:
                 hitsinfo = str(hits)
             else:
                 hitsinfo = '-'
 
             term = cgi.escape(term)
 
             if term == p: # print search word for orientation:
                 nearesttermsboxbody_class = "nearesttermsboxbodyselected"
                 if hits > 0:
                     term = create_html_link(self.build_search_url(argd), {},
                                             term, {'class': "nearesttermsselected"})
             else:
                 nearesttermsboxbody_class = "nearesttermsboxbody"
                 term = create_html_link(self.build_search_url(argd), {},
                                         term, {'class': "nearestterms"})
 
             out += '''\
             <tr>
               <td class="%(nearesttermsboxbody_class)s" align="right">%(hits)s</td>
               <td class="%(nearesttermsboxbody_class)s" width="15">&nbsp;</td>
               <td class="%(nearesttermsboxbody_class)s" align="left">%(term)s</td>
             </tr>
             ''' % {'hits': hitsinfo,
                    'nearesttermsboxbody_class': nearesttermsboxbody_class,
                    'term': term}
 
         out += "</table>"
         return intro + "<blockquote>" + out + "</blockquote>"
 
     def tmpl_browse_pattern(self, f, fn, ln, browsed_phrases_in_colls, colls, rg):
         """
           Displays the *Nearest search terms* box
 
         Parameters:
 
           - 'f' *string* - field (*not* i18nized)
 
           - 'fn' *string* - field name (i18nized)
 
           - 'ln' *string* - The language to display
 
           - 'browsed_phrases_in_colls' *array* - the phrases to display
 
           - 'colls' *array* - the list of collection parameters of the search (c's)
 
           - 'rg' *int* - the number of records
         """
 
         # load the right message language
         _ = gettext_set_language(ln)
 
         out = """<table class="searchresultsbox">
               <thead>
                <tr>
                 <th class="searchresultsboxheader" style="text-align: right;" width="15">
                   %(hits)s
                 </th>
                 <th class="searchresultsboxheader" width="15">
                   &nbsp;
                 </th>
                 <th class="searchresultsboxheader" style="text-align: left;">
                   %(fn)s
                 </th>
                </tr>
               </thead>
               <tbody>""" % {
                 'hits' : _("Hits"),
                 'fn' : cgi.escape(fn)
               }
 
         if len(browsed_phrases_in_colls) == 1:
             # one hit only found:
             phrase, nbhits = browsed_phrases_in_colls[0][0], browsed_phrases_in_colls[0][1]
 
             query = {'c': colls,
                      'ln': ln,
                      'p': '"%s"' % phrase.replace('"', '\\"'),
                      'f': f,
                      'rg' : rg}
 
             out += """<tr>
                        <td class="searchresultsboxbody" style="text-align: right;">
                         %(nbhits)s
                        </td>
                        <td class="searchresultsboxbody" width="15">
                         &nbsp;
                        </td>
                        <td class="searchresultsboxbody" style="text-align: left;">
                         %(link)s
                        </td>
                       </tr>""" % {'nbhits': nbhits,
                                   'link': create_html_link(self.build_search_url(query),
                                                            {}, cgi.escape(phrase))}
 
         elif len(browsed_phrases_in_colls) > 1:
             # first display what was found but the last one:
             for phrase, nbhits in browsed_phrases_in_colls[:-1]:
                 query = {'c': colls,
                          'ln': ln,
                          'p': '"%s"' % phrase.replace('"', '\\"'),
                          'f': f,
                          'rg' : rg}
 
                 out += """<tr>
                            <td class="searchresultsboxbody" style="text-align: right;">
                             %(nbhits)s
                            </td>
                            <td class="searchresultsboxbody" width="15">
                             &nbsp;
                            </td>
                            <td class="searchresultsboxbody" style="text-align: left;">
                             %(link)s
                            </td>
                           </tr>""" % {'nbhits' : nbhits,
                                       'link': create_html_link(self.build_search_url(query),
                                                                {},
                                                                cgi.escape(phrase))}
 
             # now display last hit as "previous term":
             phrase, nbhits = browsed_phrases_in_colls[0]
             query_previous = {'c': colls,
                      'ln': ln,
                      'p': '"%s"' % phrase.replace('"', '\\"'),
                      'f': f,
                      'rg' : rg}
 
             # now display last hit as "next term":
             phrase, nbhits = browsed_phrases_in_colls[-1]
             query_next = {'c': colls,
                      'ln': ln,
                      'p': '"%s"' % phrase.replace('"', '\\"'),
                      'f': f,
                      'rg' : rg}
 
             out += """<tr><td colspan="2" class="normal">
                             &nbsp;
                           </td>
                           <td class="normal">
                             %(link_previous)s
                             <img src="%(siteurl)s/img/sp.gif" alt="" border="0" />
                             <img src="%(siteurl)s/img/sn.gif" alt="" border="0" />
                             %(link_next)s
                           </td>
                       </tr>""" % {'link_previous': create_html_link(self.build_search_url(query_previous, action='browse'), {}, _("Previous")),
                       'link_next': create_html_link(self.build_search_url(query_next, action='browse'),
                                                            {}, _("next")),
                                   'siteurl' : CFG_SITE_URL}
         out += """</tbody>
             </table>"""
         return out
 
     def tmpl_search_box(self, ln, aas, cc, cc_intl, ot, sp,
                         action, fieldslist, f1, f2, f3, m1, m2, m3,
                         p1, p2, p3, op1, op2, rm, p, f, coll_selects,
                         d1y, d2y, d1m, d2m, d1d, d2d, dt, sort_fields,
                         sf, so, ranks, sc, rg, formats, of, pl, jrec, ec,
                         show_colls=True, show_title=True):
 
         """
           Displays the *Nearest search terms* box
 
         Parameters:
 
           - 'ln' *string* - The language to display
 
           - 'aas' *bool* - Should we display an advanced search box? -1 -> 1, from simpler to more advanced
 
           - 'cc_intl' *string* - the i18nized current collection name, used for display
 
           - 'cc' *string* - the internal current collection name
 
           - 'ot', 'sp' *string* - hidden values
 
           - 'action' *string* - the action demanded by the user
 
           - 'fieldslist' *list* - the list of all fields available, for use in select within boxes in advanced search
 
           - 'p, f, f1, f2, f3, m1, m2, m3, p1, p2, p3, op1, op2, op3, rm' *strings* - the search parameters
 
           - 'coll_selects' *array* - a list of lists, each containing the collections selects to display
 
           - 'd1y, d2y, d1m, d2m, d1d, d2d' *int* - the search between dates
 
           - 'dt' *string* - the dates' types (creation dates, modification dates)
 
           - 'sort_fields' *array* - the select information for the sort fields
 
           - 'sf' *string* - the currently selected sort field
 
           - 'so' *string* - the currently selected sort order ("a" or "d")
 
           - 'ranks' *array* - ranking methods
 
           - 'rm' *string* - selected ranking method
 
           - 'sc' *string* - split by collection or not
 
           - 'rg' *string* - selected results/page
 
           - 'formats' *array* - available output formats
 
           - 'of' *string* - the selected output format
 
           - 'pl' *string* - `limit to' search pattern
 
           - show_colls *bool* - propose coll selection box?
 
           - show_title *bool* show cc_intl in page title?
         """
 
         # load the right message language
         _ = gettext_set_language(ln)
 
 
         # These are hidden fields the user does not manipulate
         # directly
         if aas == -1:
             argd = drop_default_urlargd({
                 'ln': ln, 'aas': aas,
                 'ot': ot, 'sp': sp, 'ec': ec,
                 }, self.search_results_default_urlargd)
         else:
             argd = drop_default_urlargd({
                 'cc': cc, 'ln': ln, 'aas': aas,
                 'ot': ot, 'sp': sp, 'ec': ec,
                 }, self.search_results_default_urlargd)
 
         out = ""
         if show_title:
             # display cc name if asked for
             out += '''
             <h1 class="headline">%(ccname)s</h1>''' % {'ccname' : cgi.escape(cc_intl), }
 
         out += '''
         <form name="search" action="%(siteurl)s/search" method="get">
         ''' % {'siteurl' : CFG_SITE_URL}
 
         # Only add non-default hidden values
         for field, value in argd.items():
             out += self.tmpl_input_hidden(field, value)
 
         leadingtext = _("Search")
 
         if action == 'browse':
             leadingtext = _("Browse")
 
         if aas == 1:
             # print Advanced Search form:
 
             # define search box elements:
             out += '''
             <table class="searchbox advancedsearch">
              <thead>
               <tr>
                <th colspan="3" class="searchboxheader">
                 %(leading)s:
                </th>
               </tr>
              </thead>
              <tbody>
               <tr valign="top" style="white-space:nowrap;">
                 <td class="searchboxbody">%(matchbox1)s
                   <input type="text" name="p1" size="%(sizepattern)d" value="%(p1)s" class="advancedsearchfield"/>
                 </td>
                 <td class="searchboxbody">%(searchwithin1)s</td>
                 <td class="searchboxbody">%(andornot1)s</td>
               </tr>
               <tr valign="top">
                 <td class="searchboxbody">%(matchbox2)s
                   <input type="text" name="p2" size="%(sizepattern)d" value="%(p2)s" class="advancedsearchfield"/>
                 </td>
                 <td class="searchboxbody">%(searchwithin2)s</td>
                 <td class="searchboxbody">%(andornot2)s</td>
               </tr>
               <tr valign="top">
                 <td class="searchboxbody">%(matchbox3)s
                   <input type="text" name="p3" size="%(sizepattern)d" value="%(p3)s" class="advancedsearchfield"/>
                 </td>
                 <td class="searchboxbody">%(searchwithin3)s</td>
                 <td class="searchboxbody"  style="white-space:nowrap;">
                   <input class="formbutton" type="submit" name="action_search" value="%(search)s" />
                   <input class="formbutton" type="submit" name="action_browse" value="%(browse)s" />&nbsp;
                 </td>
               </tr>
               <tr valign="bottom">
                 <td colspan="3" align="right" class="searchboxbody">
                   <small>
                     <a href="%(siteurl)s/help/search-tips%(langlink)s">%(search_tips)s</a> ::
                     %(simple_search)s
                   </small>
                 </td>
               </tr>
              </tbody>
             </table>
             ''' % {
                 'simple_search': create_html_link(self.build_search_url(p=p1, f=f1, rm=rm, cc=cc, ln=ln, jrec=jrec, rg=rg),
                                                   {}, _("Simple Search")),
 
                 'leading' : leadingtext,
                 'sizepattern' : CFG_WEBSEARCH_ADVANCEDSEARCH_PATTERN_BOX_WIDTH,
                 'matchbox1' : self.tmpl_matchtype_box('m1', m1, ln=ln),
                 'p1' : cgi.escape(p1, 1),
                 'searchwithin1' : self.tmpl_searchwithin_select(
                                   ln=ln,
                                   fieldname='f1',
                                   selected=f1,
                                   values=self._add_mark_to_field(value=f1, fields=fieldslist, ln=ln)
                                 ),
               'andornot1' : self.tmpl_andornot_box(
                                   name='op1',
                                   value=op1,
                                   ln=ln
                                 ),
               'matchbox2' : self.tmpl_matchtype_box('m2', m2, ln=ln),
               'p2' : cgi.escape(p2, 1),
               'searchwithin2' : self.tmpl_searchwithin_select(
                                   ln=ln,
                                   fieldname='f2',
                                   selected=f2,
                                   values=self._add_mark_to_field(value=f2, fields=fieldslist, ln=ln)
                                 ),
               'andornot2' : self.tmpl_andornot_box(
                                   name='op2',
                                   value=op2,
                                   ln=ln
                                 ),
               'matchbox3' : self.tmpl_matchtype_box('m3', m3, ln=ln),
               'p3' : cgi.escape(p3, 1),
               'searchwithin3' : self.tmpl_searchwithin_select(
                                   ln=ln,
                                   fieldname='f3',
                                   selected=f3,
                                   values=self._add_mark_to_field(value=f3, fields=fieldslist, ln=ln)
                                 ),
               'search' : _("Search"),
               'browse' : _("Browse"),
               'siteurl' : CFG_SITE_URL,
               'ln' : ln,
               'langlink': '?ln=' + ln,
               'search_tips': _("Search Tips")
             }
         elif aas == 0:
             # print Simple Search form:
             out += '''
             <table class="searchbox simplesearch">
              <thead>
               <tr>
                <th colspan="3" class="searchboxheader">
                 %(leading)s:
                </th>
               </tr>
              </thead>
              <tbody>
               <tr valign="top">
                 <td class="searchboxbody"><input type="text" name="p" size="%(sizepattern)d" value="%(p)s" class="simplesearchfield"/></td>
                 <td class="searchboxbody">%(searchwithin)s</td>
                 <td class="searchboxbody">
                   <input class="formbutton" type="submit" name="action_search" value="%(search)s" />
                   <input class="formbutton" type="submit" name="action_browse" value="%(browse)s" />&nbsp;
                 </td>
               </tr>
               <tr valign="bottom">
                 <td colspan="3" align="right" class="searchboxbody">
                   <small>
                     <a href="%(siteurl)s/help/search-tips%(langlink)s">%(search_tips)s</a> ::
                     %(advanced_search)s
                   </small>
                 </td>
               </tr>
              </tbody>
             </table>
             ''' % {
               'advanced_search': create_html_link(self.build_search_url(p1=p,
                                                                         f1=f,
                                                                         rm=rm,
                                                                         aas=max(CFG_WEBSEARCH_ENABLED_SEARCH_INTERFACES),
                                                                         cc=cc,
                                                                         jrec=jrec,
                                                                         ln=ln,
                                                                         rg=rg),
                                                   {}, _("Advanced Search")),
 
               'leading' : leadingtext,
               'sizepattern' : CFG_WEBSEARCH_SIMPLESEARCH_PATTERN_BOX_WIDTH,
               'p' : cgi.escape(p, 1),
               'searchwithin' : self.tmpl_searchwithin_select(
                                   ln=ln,
                                   fieldname='f',
                                   selected=f,
                                   values=self._add_mark_to_field(value=f, fields=fieldslist, ln=ln)
                                 ),
               'search' : _("Search"),
               'browse' : _("Browse"),
               'siteurl' : CFG_SITE_URL,
               'ln' : ln,
               'langlink': '?ln=' + ln,
               'search_tips': _("Search Tips")
             }
         else:
             # EXPERIMENTAL
             # print light search form:
             search_in = ''
             if cc_intl != CFG_SITE_NAME_INTL.get(ln, CFG_SITE_NAME):
                 search_in = '''
             <input type="radio" name="cc" value="%(collection_id)s" id="searchCollection" checked="checked"/>
             <label for="searchCollection">%(search_in_collection_name)s</label>
             <input type="radio" name="cc" value="%(root_collection_name)s" id="searchEverywhere" />
             <label for="searchEverywhere">%(search_everywhere)s</label>
             ''' % {'search_in_collection_name': _("Search in %(x_collection_name)s") % \
                   {'x_collection_name': cgi.escape(cc_intl)},
                   'collection_id': cc,
                   'root_collection_name': CFG_SITE_NAME,
                   'search_everywhere': _("Search everywhere")}
             out += '''
             <table class="searchbox lightsearch">
               <tr valign="top">
                 <td class="searchboxbody"><input type="text" name="p" size="%(sizepattern)d" value="%(p)s" class="lightsearchfield"/></td>
                 <td class="searchboxbody">
                   <input class="formbutton" type="submit" name="action_search" value="%(search)s" />
                 </td>
                 <td class="searchboxbody" align="left" rowspan="2" valign="top">
                   <small><small>
                   <a href="%(siteurl)s/help/search-tips%(langlink)s">%(search_tips)s</a><br/>
                   %(advanced_search)s
                 </td>
               </tr>
             </table>
             <small>%(search_in)s</small>
             ''' % {
               'sizepattern' : CFG_WEBSEARCH_LIGHTSEARCH_PATTERN_BOX_WIDTH,
               'advanced_search': create_html_link(self.build_search_url(p1=p,
                                                                         f1=f,
                                                                         rm=rm,
                                                                         aas=max(CFG_WEBSEARCH_ENABLED_SEARCH_INTERFACES),
                                                                         cc=cc,
                                                                         jrec=jrec,
                                                                         ln=ln,
                                                                         rg=rg),
                                                   {}, _("Advanced Search")),
 
               'leading' : leadingtext,
               'p' : cgi.escape(p, 1),
               'searchwithin' : self.tmpl_searchwithin_select(
                                   ln=ln,
                                   fieldname='f',
                                   selected=f,
                                   values=self._add_mark_to_field(value=f, fields=fieldslist, ln=ln)
                                 ),
               'search' : _("Search"),
               'browse' : _("Browse"),
               'siteurl' : CFG_SITE_URL,
               'ln' : ln,
               'langlink': '?ln=' + ln,
               'search_tips': _("Search Tips"),
               'search_in': search_in
             }
         ## secondly, print Collection(s) box:
 
         if show_colls and aas > -1:
             # display collections only if there is more than one
             selects = ''
             for sel in coll_selects:
                 selects += self.tmpl_select(fieldname='c', values=sel)
 
             out += """
                 <table class="searchbox">
                  <thead>
                   <tr>
                    <th colspan="3" class="searchboxheader">
                     %(leading)s %(msg_coll)s:
                    </th>
                   </tr>
                  </thead>
                  <tbody>
                   <tr valign="bottom">
                    <td valign="top" class="searchboxbody">
                      %(colls)s
                    </td>
                   </tr>
                  </tbody>
                 </table>
                  """ % {
                    'leading' : leadingtext,
                    'msg_coll' : _("collections"),
                    'colls' : selects,
                  }
 
         ## thirdly, print search limits, if applicable:
         if action != _("Browse") and pl:
             out += """<table class="searchbox">
                        <thead>
                         <tr>
                           <th class="searchboxheader">
                             %(limitto)s
                           </th>
                         </tr>
                        </thead>
                        <tbody>
                         <tr valign="bottom">
                           <td class="searchboxbody">
                            <input type="text" name="pl" size="%(sizepattern)d" value="%(pl)s" />
                           </td>
                         </tr>
                        </tbody>
                       </table>""" % {
                         'limitto' : _("Limit to:"),
                         'sizepattern' : CFG_WEBSEARCH_ADVANCEDSEARCH_PATTERN_BOX_WIDTH,
                         'pl' : cgi.escape(pl, 1),
                       }
 
         ## fourthly, print from/until date boxen, if applicable:
         if action == _("Browse") or (d1y == 0 and d1m == 0 and d1d == 0 and d2y == 0 and d2m == 0 and d2d == 0):
             pass # do not need it
         else:
             cell_6_a = self.tmpl_inputdatetype(dt, ln) + self.tmpl_inputdate("d1", ln, d1y, d1m, d1d)
             cell_6_b = self.tmpl_inputdate("d2", ln, d2y, d2m, d2d)
             out += """<table class="searchbox">
                        <thead>
                         <tr>
                           <th class="searchboxheader">
                             %(added)s
                           </th>
                           <th class="searchboxheader">
                             %(until)s
                           </th>
                         </tr>
                        </thead>
                        <tbody>
                         <tr valign="bottom">
                           <td class="searchboxbody">%(added_or_modified)s %(date1)s</td>
                           <td class="searchboxbody">%(date2)s</td>
                         </tr>
                        </tbody>
                       </table>""" % {
                         'added' : _("Added/modified since:"),
                         'until' : _("until:"),
                         'added_or_modified': self.tmpl_inputdatetype(dt, ln),
                         'date1' : self.tmpl_inputdate("d1", ln, d1y, d1m, d1d),
                         'date2' : self.tmpl_inputdate("d2", ln, d2y, d2m, d2d),
                       }
 
         ## fifthly, print Display results box, including sort/rank, formats, etc:
         if action != _("Browse") and aas > -1:
 
             rgs = []
             for i in [10, 25, 50, 100, 250, 500]:
                 if i <= CFG_WEBSEARCH_MAX_RECORDS_IN_GROUPS:
                     rgs.append({ 'value' : i, 'text' : "%d %s" % (i, _("results"))})
             # enrich sort fields list if we are sorting by some MARC tag:
             sort_fields = self._add_mark_to_field(value=sf, fields=sort_fields, ln=ln)
             # create sort by HTML box:
             out += """<table class="searchbox">
                  <thead>
                   <tr>
                    <th class="searchboxheader">
                     %(sort_by)s
                    </th>
                    <th class="searchboxheader">
                     %(display_res)s
                    </th>
                    <th class="searchboxheader">
                     %(out_format)s
                    </th>
                   </tr>
                  </thead>
                  <tbody>
                   <tr valign="bottom">
                    <td class="searchboxbody">
                      %(select_sf)s %(select_so)s %(select_rm)s
                    </td>
                    <td class="searchboxbody">
                      %(select_rg)s %(select_sc)s
                    </td>
                    <td class="searchboxbody">%(select_of)s</td>
                   </tr>
                  </tbody>
                 </table>""" % {
                   'sort_by' : _("Sort by:"),
                   'display_res' : _("Display results:"),
                   'out_format' : _("Output format:"),
                   'select_sf' : self.tmpl_select(fieldname='sf', values=sort_fields, selected=sf, css_class='address'),
                   'select_so' : self.tmpl_select(fieldname='so', values=[{
                                     'value' : 'a',
                                     'text' : _("asc.")
                                   }, {
                                     'value' : 'd',
                                     'text' : _("desc.")
                                   }], selected=so, css_class='address'),
                   'select_rm' : self.tmpl_select(fieldname='rm', values=ranks, selected=rm, css_class='address'),
                   'select_rg' : self.tmpl_select(fieldname='rg', values=rgs, selected=rg, css_class='address'),
                   'select_sc' : self.tmpl_select(fieldname='sc', values=[{
                                     'value' : 0,
                                     'text' : _("single list")
                                   }, {
                                     'value' : 1,
                                     'text' : _("split by collection")
                                   }], selected=sc, css_class='address'),
                   'select_of' : self.tmpl_select(
                                   fieldname='of',
                                   selected=of,
                                   values=self._add_mark_to_field(value=of, fields=formats, chars=3, ln=ln),
                                   css_class='address'),
                 }
 
         ## last but not least, print end of search box:
         out += """</form>"""
         return out
 
     def tmpl_input_hidden(self, name, value):
         "Produces the HTML code for a hidden field "
         if isinstance(value, list):
             list_input = [self.tmpl_input_hidden(name, val) for val in value]
             return "\n".join(list_input)
 
         # # Treat `as', `aas' arguments specially:
         if name == 'aas':
             name = 'as'
 
         return """<input type="hidden" name="%(name)s" value="%(value)s" />""" % {
                  'name' : cgi.escape(str(name), 1),
                  'value' : cgi.escape(str(value), 1),
                }
 
     def _add_mark_to_field(self, value, fields, ln, chars=1):
         """Adds the current value as a MARC tag in the fields array
         Useful for advanced search"""
 
         # load the right message language
         _ = gettext_set_language(ln)
 
         out = fields
         if value and str(value[0:chars]).isdigit():
             out.append({'value' : value,
                         'text' : str(value) + " " + _("MARC tag")
                         })
         return out
 
     def tmpl_search_pagestart(self, ln) :
         "page start for search page. Will display after the page header"
         return """<div class="pagebody"><div class="pagebodystripemiddle">"""
 
     def tmpl_search_pageend(self, ln) :
         "page end for search page. Will display just before the page footer"
         return """</div></div>"""
 
     def tmpl_print_search_info(self, ln, middle_only,
                                collection, collection_name, collection_id,
                                aas, sf, so, rm, rg, nb_found, of, ot, p, f, f1,
                                f2, f3, m1, m2, m3, op1, op2, p1, p2,
                                p3, d1y, d1m, d1d, d2y, d2m, d2d, dt,
                                all_fieldcodes, cpu_time, pl_in_url,
                                jrec, sc, sp):
 
         """Prints stripe with the information on 'collection' and 'nb_found' results and CPU time.
            Also, prints navigation links (beg/next/prev/end) inside the results set.
            If middle_only is set to 1, it will only print the middle box information (beg/netx/prev/end/etc) links.
            This is suitable for displaying navigation links at the bottom of the search results page.
 
         Parameters:
 
           - 'ln' *string* - The language to display
 
           - 'middle_only' *bool* - Only display parts of the interface
 
           - 'collection' *string* - the collection name
 
           - 'collection_name' *string* - the i18nized current collection name
 
           - 'aas' *bool* - if we display the advanced search interface
 
           - 'sf' *string* - the currently selected sort format
 
           - 'so' *string* - the currently selected sort order ("a" or "d")
 
           - 'rm' *string* - selected ranking method
 
           - 'rg' *int* - selected results/page
 
           - 'nb_found' *int* - number of results found
 
           - 'of' *string* - the selected output format
 
           - 'ot' *string* - hidden values
 
           - 'p' *string* - Current search words
 
           - 'f' *string* - the fields in which the search was done
 
           - 'f1, f2, f3, m1, m2, m3, p1, p2, p3, op1, op2' *strings* - the search parameters
 
           - 'jrec' *int* - number of first record on this page
 
           - 'd1y, d2y, d1m, d2m, d1d, d2d' *int* - the search between dates
 
           - 'dt' *string* the dates' type (creation date, modification date)
 
           - 'all_fieldcodes' *array* - all the available fields
 
           - 'cpu_time' *float* - the time of the query in seconds
 
         """
 
         # load the right message language
         _ = gettext_set_language(ln)
 
         out = ""
         # left table cells: print collection name
         if not middle_only:
             out += '''
                   <a name="%(collection_id)s"></a>
                   <form action="%(siteurl)s/search" method="get">
                   <table class="searchresultsbox"><tr><td class="searchresultsboxheader" align="left">
                   <strong><big>%(collection_link)s</big></strong></td>
                   ''' % {
                     'collection_id': collection_id,
                     'siteurl' : CFG_SITE_URL,
                     'collection_link': create_html_link(self.build_search_interface_url(c=collection, aas=aas, ln=ln),
                                                         {}, cgi.escape(collection_name))
                   }
         else:
             out += """
                   <div style="clear:both"></div>
                   <form action="%(siteurl)s/search" method="get"><div align="center">
                   """ % { 'siteurl' : CFG_SITE_URL }
 
         # middle table cell: print beg/next/prev/end arrows:
         if not middle_only:
             out += """<td class="searchresultsboxheader" align="center">
                       %(recs_found)s &nbsp;""" % {
                      'recs_found' : _("%s records found") % ('<strong>' + self.tmpl_nice_number(nb_found, ln) + '</strong>')
                    }
         else:
             out += "<small>"
             if nb_found > rg:
                 out += "" + cgi.escape(collection_name) + " : " + _("%s records found") % ('<strong>' + self.tmpl_nice_number(nb_found, ln) + '</strong>') + " &nbsp; "
 
         if nb_found > rg: # navig.arrows are needed, since we have many hits
 
             query = {'p': p, 'f': f,
                      'cc': collection,
                      'sf': sf, 'so': so,
                      'sp': sp, 'rm': rm,
                      'of': of, 'ot': ot,
                      'aas': aas, 'ln': ln,
                      'p1': p1, 'p2': p2, 'p3': p3,
                      'f1': f1, 'f2': f2, 'f3': f3,
                      'm1': m1, 'm2': m2, 'm3': m3,
                      'op1': op1, 'op2': op2,
                      'sc': 0,
                      'd1y': d1y, 'd1m': d1m, 'd1d': d1d,
                      'd2y': d2y, 'd2m': d2m, 'd2d': d2d,
                      'dt': dt,
                 }
 
             # @todo here
             def img(gif, txt):
                 return '<img src="%(siteurl)s/img/%(gif)s.gif" alt="%(txt)s" border="0" />' % {
                     'txt': txt, 'gif': gif, 'siteurl': CFG_SITE_URL}
 
             if jrec - rg > 1:
                 out += create_html_link(self.build_search_url(query, jrec=1, rg=rg),
                                         {}, img('sb', _("begin")),
                                         {'class': 'img'})
 
             if jrec > 1:
                 out += create_html_link(self.build_search_url(query, jrec=max(jrec - rg, 1), rg=rg),
                                         {}, img('sp', _("previous")),
                                         {'class': 'img'})
 
             if jrec + rg - 1 < nb_found:
                 out += "%d - %d" % (jrec, jrec + rg - 1)
             else:
                 out += "%d - %d" % (jrec, nb_found)
 
             if nb_found >= jrec + rg:
                 out += create_html_link(self.build_search_url(query,
                                                               jrec=jrec + rg,
                                                               rg=rg),
                                         {}, img('sn', _("next")),
                                         {'class':'img'})
 
             if nb_found >= jrec + rg + rg:
                 out += create_html_link(self.build_search_url(query,
                                                             jrec=nb_found - rg + 1,
                                                             rg=rg),
                                         {}, img('se', _("end")),
                                         {'class': 'img'})
 
 
             # still in the navigation part
             cc = collection
             sc = 0
             for var in ['p', 'cc', 'f', 'sf', 'so', 'of', 'rg', 'aas', 'ln', 'p1', 'p2', 'p3', 'f1', 'f2', 'f3', 'm1', 'm2', 'm3', 'op1', 'op2', 'sc', 'd1y', 'd1m', 'd1d', 'd2y', 'd2m', 'd2d', 'dt']:
                 out += self.tmpl_input_hidden(name=var, value=vars()[var])
             for var in ['ot', 'sp', 'rm']:
                 if vars()[var]:
                     out += self.tmpl_input_hidden(name=var, value=vars()[var])
             if pl_in_url:
                 fieldargs = cgi.parse_qs(pl_in_url)
                 for fieldcode in all_fieldcodes:
                     # get_fieldcodes():
                     if fieldargs.has_key(fieldcode):
                         for val in fieldargs[fieldcode]:
                             out += self.tmpl_input_hidden(name=fieldcode, value=val)
             out += """&nbsp; %(jump)s <input type="text" name="jrec" size="4" value="%(jrec)d" />""" % {
                      'jump' : _("jump to record:"),
                      'jrec' : jrec,
                    }
 
         if not middle_only:
             out += "</td>"
         else:
             out += "</small>"
 
         # right table cell: cpu time info
         if not middle_only:
             if cpu_time > -1:
                 out += """<td class="searchresultsboxheader" align="right"><small>%(time)s</small>&nbsp;</td>""" % {
                          'time' : _("Search took %s seconds.") % ('%.2f' % cpu_time),
                        }
             out += "</tr></table>"
         else:
             out += "</div>"
         out += "</form>"
         return out
 
     def tmpl_print_hosted_search_info(self, ln, middle_only,
                                collection, collection_name, collection_id,
                                aas, sf, so, rm, rg, nb_found, of, ot, p, f, f1,
                                f2, f3, m1, m2, m3, op1, op2, p1, p2,
                                p3, d1y, d1m, d1d, d2y, d2m, d2d, dt,
                                all_fieldcodes, cpu_time, pl_in_url,
                                jrec, sc, sp):
 
         """Prints stripe with the information on 'collection' and 'nb_found' results and CPU time.
            Also, prints navigation links (beg/next/prev/end) inside the results set.
            If middle_only is set to 1, it will only print the middle box information (beg/netx/prev/end/etc) links.
            This is suitable for displaying navigation links at the bottom of the search results page.
 
         Parameters:
 
           - 'ln' *string* - The language to display
 
           - 'middle_only' *bool* - Only display parts of the interface
 
           - 'collection' *string* - the collection name
 
           - 'collection_name' *string* - the i18nized current collection name
 
           - 'aas' *bool* - if we display the advanced search interface
 
           - 'sf' *string* - the currently selected sort format
 
           - 'so' *string* - the currently selected sort order ("a" or "d")
 
           - 'rm' *string* - selected ranking method
 
           - 'rg' *int* - selected results/page
 
           - 'nb_found' *int* - number of results found
 
           - 'of' *string* - the selected output format
 
           - 'ot' *string* - hidden values
 
           - 'p' *string* - Current search words
 
           - 'f' *string* - the fields in which the search was done
 
           - 'f1, f2, f3, m1, m2, m3, p1, p2, p3, op1, op2' *strings* - the search parameters
 
           - 'jrec' *int* - number of first record on this page
 
           - 'd1y, d2y, d1m, d2m, d1d, d2d' *int* - the search between dates
 
           - 'dt' *string* the dates' type (creation date, modification date)
 
           - 'all_fieldcodes' *array* - all the available fields
 
           - 'cpu_time' *float* - the time of the query in seconds
 
         """
 
         # load the right message language
         _ = gettext_set_language(ln)
 
         out = ""
         # left table cells: print collection name
         if not middle_only:
             out += '''
                   <a name="%(collection_id)s"></a>
                   <form action="%(siteurl)s/search" method="get">
                   <table class="searchresultsbox"><tr><td class="searchresultsboxheader" align="left">
                   <strong><big>%(collection_link)s</big></strong></td>
                   ''' % {
                     'collection_id': collection_id,
                     'siteurl' : CFG_SITE_URL,
                     'collection_link': create_html_link(self.build_search_interface_url(c=collection, aas=aas, ln=ln),
                                                         {}, cgi.escape(collection_name))
                   }
 
         else:
             out += """
                   <form action="%(siteurl)s/search" method="get"><div align="center">
                   """ % { 'siteurl' : CFG_SITE_URL }
 
         # middle table cell: print beg/next/prev/end arrows:
         if not middle_only:
             # in case we have a hosted collection that timed out do not print its number of records, as it is yet unknown
             if nb_found != -963:
                 out += """<td class="searchresultsboxheader" align="center">
                           %(recs_found)s &nbsp;""" % {
                          'recs_found' : _("%s records found") % ('<strong>' + self.tmpl_nice_number(nb_found, ln) + '</strong>')
                        }
             #elif nb_found = -963:
             #    out += """<td class="searchresultsboxheader" align="center">
             #              %(recs_found)s &nbsp;""" % {
             #             'recs_found' : _("%s records found") % ('<strong>' + self.tmpl_nice_number(nb_found, ln) + '</strong>')
             #           }
         else:
             out += "<small>"
             # we do not care about timed out hosted collections here, because the bumber of records found will never be bigger
             # than rg anyway, since it's negative
             if nb_found > rg:
                 out += "" + cgi.escape(collection_name) + " : " + _("%s records found") % ('<strong>' + self.tmpl_nice_number(nb_found, ln) + '</strong>') + " &nbsp; "
 
         if nb_found > rg: # navig.arrows are needed, since we have many hits
 
             query = {'p': p, 'f': f,
                      'cc': collection,
                      'sf': sf, 'so': so,
                      'sp': sp, 'rm': rm,
                      'of': of, 'ot': ot,
                      'aas': aas, 'ln': ln,
                      'p1': p1, 'p2': p2, 'p3': p3,
                      'f1': f1, 'f2': f2, 'f3': f3,
                      'm1': m1, 'm2': m2, 'm3': m3,
                      'op1': op1, 'op2': op2,
                      'sc': 0,
                      'd1y': d1y, 'd1m': d1m, 'd1d': d1d,
                      'd2y': d2y, 'd2m': d2m, 'd2d': d2d,
                      'dt': dt,
                 }
 
             # @todo here
             def img(gif, txt):
                 return '<img src="%(siteurl)s/img/%(gif)s.gif" alt="%(txt)s" border="0" />' % {
                     'txt': txt, 'gif': gif, 'siteurl': CFG_SITE_URL}
 
             if jrec - rg > 1:
                 out += create_html_link(self.build_search_url(query, jrec=1, rg=rg),
                                         {}, img('sb', _("begin")),
                                         {'class': 'img'})
 
             if jrec > 1:
                 out += create_html_link(self.build_search_url(query, jrec=max(jrec - rg, 1), rg=rg),
                                         {}, img('sp', _("previous")),
                                         {'class': 'img'})
 
             if jrec + rg - 1 < nb_found:
                 out += "%d - %d" % (jrec, jrec + rg - 1)
             else:
                 out += "%d - %d" % (jrec, nb_found)
 
             if nb_found >= jrec + rg:
                 out += create_html_link(self.build_search_url(query,
                                                               jrec=jrec + rg,
                                                               rg=rg),
                                         {}, img('sn', _("next")),
                                         {'class':'img'})
 
             if nb_found >= jrec + rg + rg:
                 out += create_html_link(self.build_search_url(query,
                                                             jrec=nb_found - rg + 1,
                                                             rg=rg),
                                         {}, img('se', _("end")),
                                         {'class': 'img'})
 
 
             # still in the navigation part
             cc = collection
             sc = 0
             for var in ['p', 'cc', 'f', 'sf', 'so', 'of', 'rg', 'aas', 'ln', 'p1', 'p2', 'p3', 'f1', 'f2', 'f3', 'm1', 'm2', 'm3', 'op1', 'op2', 'sc', 'd1y', 'd1m', 'd1d', 'd2y', 'd2m', 'd2d', 'dt']:
                 out += self.tmpl_input_hidden(name=var, value=vars()[var])
             for var in ['ot', 'sp', 'rm']:
                 if vars()[var]:
                     out += self.tmpl_input_hidden(name=var, value=vars()[var])
             if pl_in_url:
                 fieldargs = cgi.parse_qs(pl_in_url)
                 for fieldcode in all_fieldcodes:
                     # get_fieldcodes():
                     if fieldargs.has_key(fieldcode):
                         for val in fieldargs[fieldcode]:
                             out += self.tmpl_input_hidden(name=fieldcode, value=val)
             out += """&nbsp; %(jump)s <input type="text" name="jrec" size="4" value="%(jrec)d" />""" % {
                      'jump' : _("jump to record:"),
                      'jrec' : jrec,
                    }
 
         if not middle_only:
             out += "</td>"
         else:
             out += "</small>"
 
         # right table cell: cpu time info
         if not middle_only:
             if cpu_time > -1:
                 out += """<td class="searchresultsboxheader" align="right"><small>%(time)s</small>&nbsp;</td>""" % {
                          'time' : _("Search took %s seconds.") % ('%.2f' % cpu_time),
                        }
             out += "</tr></table>"
         else:
             out += "</div>"
         out += "</form>"
         return out
 
     def tmpl_nice_number(self, number, ln=CFG_SITE_LANG, thousands_separator=',', max_ndigits_after_dot=None):
         """
         Return nicely printed number NUMBER in language LN using
         given THOUSANDS_SEPARATOR character.
         If max_ndigits_after_dot is specified and the number is float, the
         number is rounded by taking in consideration up to max_ndigits_after_dot
         digit after the dot.
 
         This version does not pay attention to locale.  See
         tmpl_nice_number_via_locale().
         """
         if type(number) is float:
             if max_ndigits_after_dot is not None:
                 number = round(number, max_ndigits_after_dot)
             int_part, frac_part = str(number).split('.')
             return '%s.%s' % (self.tmpl_nice_number(int(int_part), ln, thousands_separator), frac_part)
         else:
             chars_in = list(str(number))
             number = len(chars_in)
             chars_out = []
             for i in range(0, number):
                 if i % 3 == 0 and i != 0:
                     chars_out.append(thousands_separator)
                 chars_out.append(chars_in[number - i - 1])
             chars_out.reverse()
             return ''.join(chars_out)
 
     def tmpl_nice_number_via_locale(self, number, ln=CFG_SITE_LANG):
         """
         Return nicely printed number NUM in language LN using the locale.
         See also version tmpl_nice_number().
         """
         if number is None:
             return None
         # Temporarily switch the numeric locale to the requested one, and format the number
         # In case the system has no locale definition, use the vanilla form
         ol = locale.getlocale(locale.LC_NUMERIC)
         try:
             locale.setlocale(locale.LC_NUMERIC, self.tmpl_localemap.get(ln, self.tmpl_default_locale))
         except locale.Error:
             return str(number)
         try:
             number = locale.format('%d', number, True)
         except TypeError:
             return str(number)
         locale.setlocale(locale.LC_NUMERIC, ol)
         return number
 
     def tmpl_record_format_htmlbrief_header(self, ln):
         """Returns the header of the search results list when output
         is html brief. Note that this function is called for each collection
         results when 'split by collection' is enabled.
 
         See also: tmpl_record_format_htmlbrief_footer,
                   tmpl_record_format_htmlbrief_body
 
         Parameters:
 
           - 'ln' *string* - The language to display
 
         """
 
         # load the right message language
         _ = gettext_set_language(ln)
 
         out = """
               <form action="%(siteurl)s/yourbaskets/add" method="post">
               <table>
               """ % {
                 'siteurl' : CFG_SITE_URL,
               }
 
         return out
 
     def tmpl_record_format_htmlbrief_footer(self, ln, display_add_to_basket=True):
         """Returns the footer of the search results list when output
         is html brief. Note that this function is called for each collection
         results when 'split by collection' is enabled.
 
         See also: tmpl_record_format_htmlbrief_header(..),
                   tmpl_record_format_htmlbrief_body(..)
 
         Parameters:
 
           - 'ln' *string* - The language to display
           - 'display_add_to_basket' *bool* - whether to display Add-to-basket button
         """
 
         # load the right message language
         _ = gettext_set_language(ln)
 
         out = """</table>
                <br />
                <input type="hidden" name="colid" value="0" />
                %(add_to_basket)s
                </form>""" % {
                'add_to_basket': display_add_to_basket and """<input class="formbutton" type="submit" name="action" value="%s" />""" % _("Add to basket") or "",
                  }
 
         return out
 
     def tmpl_record_format_htmlbrief_body(self, ln, recid,
                                           row_number, relevance,
                                           record, relevances_prologue,
                                           relevances_epilogue,
                                           display_add_to_basket=True):
         """Returns the html brief format of one record. Used in the
         search results list for each record.
 
         See also: tmpl_record_format_htmlbrief_header(..),
                   tmpl_record_format_htmlbrief_footer(..)
 
         Parameters:
 
           - 'ln' *string* - The language to display
 
           - 'row_number' *int* - The position of this record in the list
 
           - 'recid' *int* - The recID
 
           - 'relevance' *string* - The relevance of the record
 
           - 'record' *string* - The formatted record
 
           - 'relevances_prologue' *string* - HTML code to prepend the relevance indicator
 
           - 'relevances_epilogue' *string* - HTML code to append to the relevance indicator (used mostly for formatting)
 
         """
 
         # load the right message language
         _ = gettext_set_language(ln)
 
         checkbox_for_baskets = """<input name="recid" type="checkbox" value="%(recid)s" />""" % \
                                {'recid': recid, }
         if not display_add_to_basket:
             checkbox_for_baskets = ''
         out = """
                 <tr><td valign="top" align="right" style="white-space: nowrap;">
                     %(checkbox_for_baskets)s
                     <abbr class="unapi-id" title="%(recid)s"></abbr>
 
                 %(number)s.
                """ % {'recid': recid,
                       'number': row_number,
                       'checkbox_for_baskets': checkbox_for_baskets}
         if relevance:
             out += """<br /><div class="rankscoreinfo"><a title="rank score">%(prologue)s%(relevance)s%(epilogue)s</a></div>""" % {
                 'prologue' : relevances_prologue,
                 'epilogue' : relevances_epilogue,
                 'relevance' : relevance
                 }
         out += """</td><td valign="top">%s</td></tr>""" % record
 
         return out
 
     def tmpl_print_results_overview(self, ln, results_final_nb_total, cpu_time, results_final_nb, colls, ec, hosted_colls_potential_results_p=False):
         """Prints results overview box with links to particular collections below.
 
         Parameters:
 
           - 'ln' *string* - The language to display
 
           - 'results_final_nb_total' *int* - The total number of hits for the query
 
           - 'colls' *array* - The collections with hits, in the format:
 
           - 'coll[code]' *string* - The code of the collection (canonical name)
 
           - 'coll[name]' *string* - The display name of the collection
 
           - 'results_final_nb' *array* - The number of hits, indexed by the collection codes:
 
           - 'cpu_time' *string* - The time the query took
 
           - 'url_args' *string* - The rest of the search query
 
           - 'ec' *array* - selected external collections
 
           - 'hosted_colls_potential_results_p' *boolean* - check if there are any hosted collections searches
                                                     that timed out during the pre-search
         """
 
         if len(colls) == 1 and not ec:
             # if one collection only and no external collections, print nothing:
             return ""
 
         # load the right message language
         _ = gettext_set_language(ln)
 
         # first find total number of hits:
         # if there were no hosted collections that timed out during the pre-search print out the exact number of records found
         if not hosted_colls_potential_results_p:
             out = """<table class="searchresultsbox">
                     <thead><tr><th class="searchresultsboxheader">%(founds)s</th></tr></thead>
                     <tbody><tr><td class="searchresultsboxbody"> """ % {
                     'founds' : _("%(x_fmt_open)sResults overview:%(x_fmt_close)s Found %(x_nb_records)s records in %(x_nb_seconds)s seconds.") % \
                     {'x_fmt_open': '<strong>',
                      'x_fmt_close': '</strong>',
                      'x_nb_records': '<strong>' + self.tmpl_nice_number(results_final_nb_total, ln) + '</strong>',
                      'x_nb_seconds': '%.2f' % cpu_time}
                   }
         # if there were (only) hosted_collections that timed out during the pre-search print out a fuzzier message
         else:
             if results_final_nb_total == 0:
                 out = """<table class="searchresultsbox">
                         <thead><tr><th class="searchresultsboxheader">%(founds)s</th></tr></thead>
                         <tbody><tr><td class="searchresultsboxbody"> """ % {
                         'founds' : _("%(x_fmt_open)sResults overview%(x_fmt_close)s") % \
                         {'x_fmt_open': '<strong>',
                          'x_fmt_close': '</strong>'}
                       }
             elif results_final_nb_total > 0:
                 out = """<table class="searchresultsbox">
                         <thead><tr><th class="searchresultsboxheader">%(founds)s</th></tr></thead>
                         <tbody><tr><td class="searchresultsboxbody"> """ % {
                         'founds' : _("%(x_fmt_open)sResults overview:%(x_fmt_close)s Found at least %(x_nb_records)s records in %(x_nb_seconds)s seconds.") % \
                         {'x_fmt_open': '<strong>',
                          'x_fmt_close': '</strong>',
                          'x_nb_records': '<strong>' + self.tmpl_nice_number(results_final_nb_total, ln) + '</strong>',
                          'x_nb_seconds': '%.2f' % cpu_time}
                       }
         # then print hits per collection:
         out += """<script type="text/javascript">
             $(document).ready(function() {
                 $('a.morecolls').click(function() {
                     $('.morecollslist').show();
                     $(this).hide();
                     $('.lesscolls').show();
                     return false;
                 });
                 $('a.lesscolls').click(function() {
                     $('.morecollslist').hide();
                     $(this).hide();
                     $('.morecolls').show();
                     return false;
                 });
             });
             </script>"""
         count = 0
         for coll in colls:
             if results_final_nb.has_key(coll['code']) and results_final_nb[coll['code']] > 0:
                 count += 1
                 out += """
                       <span %(collclass)s><strong><a href="#%(coll)s">%(coll_name)s</a></strong>, <a href="#%(coll)s">%(number)s</a><br /></span>""" % \
                                       {'collclass' : count > CFG_WEBSEARCH_RESULTS_OVERVIEW_MAX_COLLS_TO_PRINT and 'class="morecollslist" style="display:none"' or '',
                                        'coll' : coll['id'],
                                        'coll_name' : cgi.escape(coll['name']),
                                        'number' : _("%s records found") % \
                                        ('<strong>' + self.tmpl_nice_number(results_final_nb[coll['code']], ln) + '</strong>')}
             # the following is used for hosted collections that have timed out,
             # i.e. for which we don't know the exact number of results yet.
             elif results_final_nb.has_key(coll['code']) and results_final_nb[coll['code']] == -963:
                 count += 1
                 out += """
                       <span %(collclass)s><strong><a href="#%(coll)s">%(coll_name)s</a></strong><br /></span>""" % \
                                       {'collclass' : count > CFG_WEBSEARCH_RESULTS_OVERVIEW_MAX_COLLS_TO_PRINT and 'class="morecollslist" style="display:none"' or '',
                                        'coll' : coll['id'],
                                        'coll_name' : cgi.escape(coll['name']),
                                        'number' : _("%s records found") % \
                                        ('<strong>' + self.tmpl_nice_number(results_final_nb[coll['code']], ln) + '</strong>')}
         if count > CFG_WEBSEARCH_RESULTS_OVERVIEW_MAX_COLLS_TO_PRINT:
             out += """<a class="lesscolls" style="display:none; color:red; font-size:small" href="#"><i>%s</i></a>""" % _("Show less collections")
             out += """<a class="morecolls" style="color:red; font-size:small" href="#"><i>%s</i></a>""" % _("Show all collections")
 
         out += "</td></tr></tbody></table>"
         return out
 
     def tmpl_print_hosted_results(self, url_and_engine, ln, of=None, req=None, limit=CFG_EXTERNAL_COLLECTION_MAXRESULTS, display_body=True, display_add_to_basket = True):
         """Print results of a given search engine.
         """
 
         if display_body:
             _ = gettext_set_language(ln)
             #url = url_and_engine[0]
             engine = url_and_engine[1]
             #name = _(engine.name)
             db_id = get_collection_id(engine.name)
             #base_url = engine.base_url
 
             out = ""
 
             results = engine.parser.parse_and_get_results(None, of=of, req=req, limit=limit, parseonly=True)
 
             if len(results) != 0:
                 if of == 'hb':
                     out += """
                           <form action="%(siteurl)s/yourbaskets/add" method="post">
                           <input type="hidden" name="colid" value="%(col_db_id)s" />
                           <table>
                           """ % {
                             'siteurl' : CFG_SITE_URL,
                             'col_db_id' : db_id,
                           }
             else:
                 if of == 'hb':
                     out += """
                           <table>
                           """
 
             for result in results:
                 out += result.html.replace('>Detailed record<', '>External record<').replace('>Similar records<', '>Similar external records<')
 
             if len(results) != 0:
                 if of == 'hb':
                     out += """</table>
                            <br />"""
                     if display_add_to_basket:
                         out += """<input class="formbutton" type="submit" name="action" value="%(basket)s" />
                     """ % {'basket' : _("Add to basket")}
                     out += """</form>"""
             else:
                 if of == 'hb':
                     out += """
                           </table>
                           """
 
             # we have already checked if there are results or no, maybe the following if should be removed?
             if not results:
                 if of.startswith("h"):
                     out = _('No results found...') + '<br />'
 
             return out
         else:
             return ""
 
     def tmpl_print_service_list_links(self, label, labels_and_urls, ln=CFG_SITE_URL):
         """
         Prints service results as list
 
         @param label: the label to display before the list of links
         @type label: string
         @param labels_and_urls: list of tuples (label, url), already translated, not escaped
         @type labels_and_urls: list(string, string)
         @param ln: language
         """
         # load the right message language
         _ = gettext_set_language(ln)
 
 
         out = '''
         <span class="searchservicelabel">%s</span> ''' % cgi.escape(label)
 
         out += """<script type="text/javascript">
             $(document).ready(function() {
                 $('a.moreserviceitemslink').click(function() {
                     $('.moreserviceitemslist', $(this).parent()).show();
                     $(this).hide();
                     $('.lessserviceitemslink', $(this).parent()).show();
                     return false;
                 });
                 $('a.lessserviceitemslink').click(function() {
                     $('.moreserviceitemslist', $(this).parent()).hide();
                     $(this).hide();
                     $('.moreserviceitemslink', $(this).parent()).show();
                     return false;
                 });
             });
             </script>"""
         count = 0
         for link_label, link_url in labels_and_urls:
             count += 1
             out += """<span %(itemclass)s>%(separator)s <a class="searchserviceitem" href="%(url)s">%(link_label)s</a></span>""" % \
                    {'itemclass' : count > CFG_WEBSEARCH_MAX_SEARCH_COLL_RESULTS_TO_PRINT and 'class="moreserviceitemslist" style="display:none"' or '',
                     'separator': count > 1 and ', ' or '',
                     'url' : link_url,
                     'link_label' : cgi.escape(link_label)}
 
         if count > CFG_WEBSEARCH_MAX_SEARCH_COLL_RESULTS_TO_PRINT:
             out += """ <a class="lessserviceitemslink" style="display:none;" href="#">%s</a>""" % _("Less suggestions")
             out += """ <a class="moreserviceitemslink" style="" href="#">%s</a>""" % _("More suggestions")
 
         return out
 
     def tmpl_print_searchresultbox(self, header, body):
         """print a nicely formatted box for search results """
         #_ = gettext_set_language(ln)
 
         # first find total number of hits:
         out = '<table class="searchresultsbox"><thead><tr><th class="searchresultsboxheader">' + header + '</th></tr></thead><tbody><tr><td class="searchresultsboxbody">' + body + '</td></tr></tbody></table>'
         return out
 
 
     def tmpl_search_no_boolean_hits(self, ln, nearestterms):
         """No hits found, proposes alternative boolean queries
 
         Parameters:
 
           - 'ln' *string* - The language to display
 
           - 'nearestterms' *array* - Parts of the interface to display, in the format:
 
           - 'nearestterms[nbhits]' *int* - The resulting number of hits
 
           - 'nearestterms[url_args]' *string* - The search parameters
 
           - 'nearestterms[p]' *string* - The search terms
 
         """
 
         # load the right message language
         _ = gettext_set_language(ln)
 
         out = _("Boolean query returned no hits. Please combine your search terms differently.")
 
         out += '''<blockquote><table class="nearesttermsbox" cellpadding="0" cellspacing="0" border="0">'''
         for term, hits, argd in nearestterms:
             out += '''\
             <tr>
               <td class="nearesttermsboxbody" align="right">%(hits)s</td>
               <td class="nearesttermsboxbody" width="15">&nbsp;</td>
               <td class="nearesttermsboxbody" align="left">
                 %(link)s
               </td>
             </tr>''' % {'hits' : hits,
                         'link': create_html_link(self.build_search_url(argd),
                                                  {}, cgi.escape(term),
                                                  {'class': "nearestterms"})}
         out += """</table></blockquote>"""
         return out
 
     def tmpl_similar_author_names(self, authors, ln):
         """No hits found, proposes alternative boolean queries
 
         Parameters:
 
           - 'authors': a list of (name, hits) tuples
           - 'ln' *string* - The language to display
         """
 
         # load the right message language
         _ = gettext_set_language(ln)
 
         out = '''<a name="googlebox"></a>
                  <table class="googlebox"><tr><th colspan="2" class="googleboxheader">%(similar)s</th></tr>''' % {
                 'similar' : _("See also: similar author names")
               }
         for author, hits in authors:
             out += '''\
             <tr>
               <td class="googleboxbody">%(nb)d</td>
               <td class="googleboxbody">%(link)s</td>
             </tr>''' % {'link': create_html_link(
                                     self.build_search_url(p=author,
                                                           f='author',
                                                           ln=ln),
                                     {}, cgi.escape(author), {'class':"google"}),
                         'nb' : hits}
 
         out += """</table>"""
 
         return out
 
     def tmpl_print_record_detailed(self, recID, ln):
         """Displays a detailed on-the-fly record
 
         Parameters:
 
           - 'ln' *string* - The language to display
 
           - 'recID' *int* - The record id
         """
         # okay, need to construct a simple "Detailed record" format of our own:
         out = "<p>&nbsp;"
         # secondly, title:
         titles = get_fieldvalues(recID, "245__a") or \
                  get_fieldvalues(recID, "111__a")
         for title in titles:
             out += "<p><center><big><strong>%s</strong></big></center></p>" % cgi.escape(title)
         # thirdly, authors:
         authors = get_fieldvalues(recID, "100__a") + get_fieldvalues(recID, "700__a")
         if authors:
             out += "<p><center>"
             for author in authors:
                 out += '%s; ' % create_html_link(self.build_search_url(
                                                                 ln=ln,
                                                                 p=author,
                                                                 f='author'),
                                                  {}, cgi.escape(author))
             out += "</center></p>"
         # fourthly, date of creation:
         dates = get_fieldvalues(recID, "260__c")
         for date in dates:
             out += "<p><center><small>%s</small></center></p>" % date
         # fifthly, abstract:
         abstracts = get_fieldvalues(recID, "520__a")
         for abstract in abstracts:
             out += """<p style="margin-left: 15%%; width: 70%%">
                      <small><strong>Abstract:</strong> %s</small></p>""" % abstract
         # fifthly bis, keywords:
         keywords = get_fieldvalues(recID, "6531_a")
         if len(keywords):
             out += """<p style="margin-left: 15%%; width: 70%%">
                      <small><strong>Keyword(s):</strong>"""
             for keyword in keywords:
                 out += '%s; ' % create_html_link(
                                     self.build_search_url(ln=ln,
                                                           p=keyword,
                                                           f='keyword'),
                                     {}, cgi.escape(keyword))
 
             out += '</small></p>'
         # fifthly bis bis, published in:
         prs_p = get_fieldvalues(recID, "909C4p")
         prs_v = get_fieldvalues(recID, "909C4v")
         prs_y = get_fieldvalues(recID, "909C4y")
         prs_n = get_fieldvalues(recID, "909C4n")
         prs_c = get_fieldvalues(recID, "909C4c")
         for idx in range(0, len(prs_p)):
             out += """<p style="margin-left: 15%%; width: 70%%">
                      <small><strong>Publ. in:</strong> %s""" % prs_p[idx]
             if prs_v and prs_v[idx]:
                 out += """<strong>%s</strong>""" % prs_v[idx]
             if prs_y and prs_y[idx]:
                 out += """(%s)""" % prs_y[idx]
             if prs_n and prs_n[idx]:
                 out += """, no.%s""" % prs_n[idx]
             if prs_c and prs_c[idx]:
                 out += """, p.%s""" % prs_c[idx]
             out += """.</small></p>"""
         # sixthly, fulltext link:
         urls_z = get_fieldvalues(recID, "8564_z")
         urls_u = get_fieldvalues(recID, "8564_u")
         # we separate the fulltext links and image links
         for url_u in urls_u:
             if url_u.endswith('.png'):
                 continue
             else:
                 link_text = "URL"
                 try:
                     if urls_z[idx]:
                         link_text = urls_z[idx]
                 except IndexError:
                     pass
                 out += """<p style="margin-left: 15%%; width: 70%%">
                 <small><strong>%s:</strong> <a href="%s">%s</a></small></p>""" % (link_text, urls_u[idx], urls_u[idx])
 
         # print some white space at the end:
         out += "<br /><br />"
         return out
 
     def tmpl_print_record_list_for_similarity_boxen(self, title, recID_score_list, ln=CFG_SITE_LANG):
         """Print list of records in the "hs" (HTML Similarity) format for similarity boxes.
            RECID_SCORE_LIST is a list of (recID1, score1), (recID2, score2), etc.
         """
 
         from invenio.search_engine import print_record, record_public_p
 
         recID_score_list_to_be_printed = []
 
         # firstly find 5 first public records to print:
         nb_records_to_be_printed = 0
         nb_records_seen = 0
         while nb_records_to_be_printed < 5 and nb_records_seen < len(recID_score_list) and nb_records_seen < 50:
             # looking through first 50 records only, picking first 5 public ones
             (recID, score) = recID_score_list[nb_records_seen]
             nb_records_seen += 1
             if record_public_p(recID):
                 nb_records_to_be_printed += 1
                 recID_score_list_to_be_printed.append([recID, score])
 
         # secondly print them:
         out = '''
         <table><tr>
          <td>
           <table><tr><td class="blocknote">%(title)s</td></tr></table>
          </td>
          </tr>
          <tr>
           <td><table>
         ''' % { 'title': cgi.escape(title) }
         for recid, score in recID_score_list_to_be_printed:
             out += '''
             <tr><td><font class="rankscoreinfo"><a>(%(score)s)&nbsp;</a></font><small>&nbsp;%(info)s</small></td></tr>''' % {
                 'score': score,
                 'info' : print_record(recid, format="hs", ln=ln),
                 }
 
         out += """</table></td></tr></table> """
         return out
 
     def tmpl_print_record_brief(self, ln, recID):
         """Displays a brief record on-the-fly
 
         Parameters:
 
           - 'ln' *string* - The language to display
 
           - 'recID' *int* - The record id
         """
         out = ""
 
         # record 'recID' does not exist in format 'format', so print some default format:
         # firstly, title:
         titles = get_fieldvalues(recID, "245__a") or \
                  get_fieldvalues(recID, "111__a")
         # secondly, authors:
         authors = get_fieldvalues(recID, "100__a") + get_fieldvalues(recID, "700__a")
         # thirdly, date of creation:
         dates = get_fieldvalues(recID, "260__c")
         # thirdly bis, report numbers:
         rns = get_fieldvalues(recID, "037__a")
         rns = get_fieldvalues(recID, "088__a")
         # fourthly, beginning of abstract:
         abstracts = get_fieldvalues(recID, "520__a")
         # fifthly, fulltext link:
         urls_z = get_fieldvalues(recID, "8564_z")
         urls_u = get_fieldvalues(recID, "8564_u")
         # get rid of images
         images = []
         non_image_urls_u = []
         for url_u in urls_u:
             if url_u.endswith('.png'):
                 images.append(url_u)
             else:
                 non_image_urls_u.append(url_u)
 
         ## unAPI identifier
         out = '<abbr class="unapi-id" title="%s"></abbr>\n' % recID
         out += self.tmpl_record_body(
                  titles=titles,
                  authors=authors,
                  dates=dates,
                  rns=rns,
                  abstracts=abstracts,
                  urls_u=non_image_urls_u,
                  urls_z=urls_z,
                  ln=ln)
 
         return out
 
     def tmpl_print_record_brief_links(self, ln, recID, sf='', so='d', sp='', rm='', display_claim_link=False):
         """Displays links for brief record on-the-fly
 
         Parameters:
 
           - 'ln' *string* - The language to display
 
           - 'recID' *int* - The record id
         """
         from invenio.webcommentadminlib import get_nb_reviews, get_nb_comments
 
         # load the right message language
         _ = gettext_set_language(ln)
 
         out = '<div class="moreinfo">'
         if CFG_WEBSEARCH_USE_ALEPH_SYSNOS:
             alephsysnos = get_fieldvalues(recID, "970__a")
             if len(alephsysnos) > 0:
                 alephsysno = alephsysnos[0]
                 out += '<span class="moreinfo">%s</span>' % \
                     create_html_link(self.build_search_url(recid=alephsysno,
                                                            ln=ln),
                                      {}, _("Detailed record"),
                                      {'class': "moreinfo"})
             else:
                 out += '<span class="moreinfo">%s</span>' % \
                     create_html_link(self.build_search_url(recid=recID, ln=ln),
                                      {},
                                      _("Detailed record"),
                                      {'class': "moreinfo"})
         else:
             out += '<span class="moreinfo">%s</span>' % \
                    create_html_link(self.build_search_url(recid=recID, ln=ln),
                                     {}, _("Detailed record"),
                                     {'class': "moreinfo"})
 
             out += '<span class="moreinfo"> - %s</span>' % \
                    create_html_link(self.build_search_url(p="recid:%d" % recID,
                                                      rm="wrd",
                                                      ln=ln),
                                     {}, _("Similar records"),
                                     {'class': "moreinfo"})
 
         if CFG_BIBRANK_SHOW_CITATION_LINKS:
             num_timescited = get_cited_by_count(recID)
             if num_timescited:
                 out += '<span class="moreinfo"> - %s</span>' % \
                        create_html_link(self.build_search_url(p="refersto:recid:%d" % recID,
                                                               sf=sf,
                                                               so=so,
                                                               sp=sp,
                                                               rm=rm,
                                                               ln=ln),
                                         {}, num_timescited > 1 and _("Cited by %i records") % num_timescited
                                         or _("Cited by 1 record"),
                                         {'class': "moreinfo"})
             else:
                 out += "<!--not showing citations links-->"
         if display_claim_link: #Maybe we want not to show the link to who cannot use id?
             out += '<span class="moreinfo"> - %s</span>' % \
                 create_html_link(CFG_SITE_URL + '/person/action', {'claim':'True', 'selection':str(recID)},
                                                                         'Attribute this paper',
                                                                         {'class': "moreinfo"})
 
         if CFG_WEBCOMMENT_ALLOW_COMMENTS and CFG_WEBSEARCH_SHOW_COMMENT_COUNT:
             num_comments = get_nb_comments(recID, count_deleted=False)
             if num_comments:
                 out += '<span class="moreinfo"> - %s</span>' % \
                         create_html_link(CFG_SITE_URL + '/' + CFG_SITE_RECORD + '/' + str(recID)
                         + '/comments?ln=%s' % ln, {}, num_comments > 1 and _("%i comments")
                         % (num_comments) or _("1 comment"),
                         {'class': "moreinfo"})
             else:
                 out += "<!--not showing reviews links-->"
 
         if CFG_WEBCOMMENT_ALLOW_REVIEWS and CFG_WEBSEARCH_SHOW_REVIEW_COUNT:
             num_reviews = get_nb_reviews(recID, count_deleted=False)
             if num_reviews:
                 out += '<span class="moreinfo"> - %s</span>' % \
                         create_html_link(CFG_SITE_URL + '/' + CFG_SITE_RECORD + '/' + str(recID)
                         + '/reviews?ln=%s' % ln, {}, num_reviews > 1 and _("%i reviews")
                         % (num_reviews) or _("1 review"), {'class': "moreinfo"})
             else:
                 out += "<!--not showing reviews links-->"
 
 
         out += '</div>'
         return out
 
     def tmpl_xml_rss_prologue(self, current_url=None,
                               previous_url=None, next_url=None,
                               first_url=None, last_url=None,
                               nb_found=None, jrec=None, rg=None, cc=None):
         """Creates XML RSS 2.0 prologue."""
         title = CFG_SITE_NAME
         description = '%s latest documents' % CFG_SITE_NAME
         if cc and cc != CFG_SITE_NAME:
             title += ': ' + cgi.escape(cc)
             description += ' in ' + cgi.escape(cc)
 
         out = """<rss version="2.0"
         xmlns:media="http://search.yahoo.com/mrss/"
         xmlns:atom="http://www.w3.org/2005/Atom"
         xmlns:dc="http://purl.org/dc/elements/1.1/"
         xmlns:dcterms="http://purl.org/dc/terms/"
         xmlns:opensearch="http://a9.com/-/spec/opensearch/1.1/">
       <channel>
         <title>%(rss_title)s</title>
         <link>%(siteurl)s</link>
         <description>%(rss_description)s</description>
         <language>%(sitelang)s</language>
         <pubDate>%(timestamp)s</pubDate>
         <category></category>
         <generator>Invenio %(version)s</generator>
         <webMaster>%(sitesupportemail)s</webMaster>
         <ttl>%(timetolive)s</ttl>%(previous_link)s%(next_link)s%(current_link)s%(total_results)s%(start_index)s%(items_per_page)s
         <image>
             <url>%(siteurl)s/img/site_logo_rss.png</url>
             <title>%(sitename)s</title>
             <link>%(siteurl)s</link>
         </image>
          <atom:link rel="search" href="%(siteurl)s/opensearchdescription" type="application/opensearchdescription+xml" title="Content Search" />
 
         <textInput>
           <title>Search </title>
           <description>Search this site:</description>
           <name>p</name>
           <link>%(siteurl)s/search</link>
         </textInput>
         """ % {'sitename': CFG_SITE_NAME,
                'siteurl': CFG_SITE_URL,
                'sitelang': CFG_SITE_LANG,
                'search_syntax': self.tmpl_opensearch_rss_url_syntax,
                'timestamp': time.strftime("%a, %d %b %Y %H:%M:%S GMT", time.gmtime()),
                'version': CFG_VERSION,
                'sitesupportemail': CFG_SITE_SUPPORT_EMAIL,
                'timetolive': CFG_WEBSEARCH_RSS_TTL,
                'current_link': (current_url and \
                                  '\n<atom:link rel="self" href="%s" />\n' % current_url) or '',
                'previous_link': (previous_url and \
                                  '\n<atom:link rel="previous" href="%s" />' % previous_url) or '',
                'next_link': (next_url and \
                              '\n<atom:link rel="next" href="%s" />' % next_url) or '',
                'first_link': (first_url and \
                              '\n<atom:link rel="first" href="%s" />' % first_url) or '',
                'last_link': (last_url and \
                              '\n<atom:link rel="last" href="%s" />' % last_url) or '',
                'total_results': (nb_found and \
                              '\n<opensearch:totalResults>%i</opensearch:totalResults>' % nb_found) or '',
                'start_index': (jrec and \
                              '\n<opensearch:startIndex>%i</opensearch:startIndex>' % jrec) or '',
                'items_per_page': (rg and \
                              '\n<opensearch:itemsPerPage>%i</opensearch:itemsPerPage>' % rg) or '',
                'rss_title': title,
                'rss_description': description
         }
         return out
 
     def tmpl_xml_rss_epilogue(self):
         """Creates XML RSS 2.0 epilogue."""
         out = """\
       </channel>
 </rss>\n"""
         return out
 
     def tmpl_xml_podcast_prologue(self, current_url=None,
                                   previous_url=None, next_url=None,
                                   first_url=None, last_url=None,
                                   nb_found=None, jrec=None, rg=None, cc=None):
         """Creates XML podcast prologue."""
         title = CFG_SITE_NAME
         description = '%s latest documents' % CFG_SITE_NAME
         if CFG_CERN_SITE:
             title = 'CERN'
             description = 'CERN latest documents'
         if cc and cc != CFG_SITE_NAME:
             title += ': ' + cgi.escape(cc)
             description += ' in ' + cgi.escape(cc)
 
         out = """<rss xmlns:itunes="http://www.itunes.com/dtds/podcast-1.0.dtd" version="2.0">
         <channel>
         <title>%(podcast_title)s</title>
     	<link>%(siteurl)s</link>
         <description>%(podcast_description)s</description>
         <language>%(sitelang)s</language>
         <pubDate>%(timestamp)s</pubDate>
         <category></category>
 	    <generator>Invenio %(version)s</generator>
         <webMaster>%(siteadminemail)s</webMaster>
         <ttl>%(timetolive)s</ttl>%(previous_link)s%(next_link)s%(current_link)s
         <image>
             <url>%(siteurl)s/img/site_logo_rss.png</url>
             <title>%(sitename)s</title>
             <link>%(siteurl)s</link>
         </image>
         <itunes:owner>
         <itunes:email>%(siteadminemail)s</itunes:email>
         </itunes:owner>
         """ % {'sitename': CFG_SITE_NAME,
                'siteurl': CFG_SITE_URL,
                'sitelang': CFG_SITE_LANG,
                'siteadminemail': CFG_SITE_ADMIN_EMAIL,
                'timestamp': time.strftime("%a, %d %b %Y %H:%M:%S GMT", time.gmtime()),
                'version': CFG_VERSION,
                'sitesupportemail': CFG_SITE_SUPPORT_EMAIL,
                'timetolive': CFG_WEBSEARCH_RSS_TTL,
                'current_link': (current_url and \
                                  '\n<atom:link rel="self" href="%s" />\n' % current_url) or '',
                'previous_link': (previous_url and \
                                  '\n<atom:link rel="previous" href="%s" />' % previous_url) or '',
                'next_link': (next_url and \
                              '\n<atom:link rel="next" href="%s" />' % next_url) or '',
                'first_link': (first_url and \
                              '\n<atom:link rel="first" href="%s" />' % first_url) or '',
                'last_link': (last_url and \
                              '\n<atom:link rel="last" href="%s" />' % last_url) or '',
                 'podcast_title': title,
                 'podcast_description': description
                }
         return out
 
     def tmpl_xml_podcast_epilogue(self):
         """Creates XML podcast epilogue."""
         out = """\n</channel>
 </rss>\n"""
         return out
 
     def tmpl_xml_nlm_prologue(self):
         """Creates XML NLM prologue."""
         out = """<articles>\n"""
         return out
 
     def tmpl_xml_nlm_epilogue(self):
         """Creates XML NLM epilogue."""
         out = """\n</articles>"""
         return out
 
     def tmpl_xml_refworks_prologue(self):
         """Creates XML RefWorks prologue."""
         out = """<references>\n"""
         return out
 
     def tmpl_xml_refworks_epilogue(self):
         """Creates XML RefWorks epilogue."""
         out = """\n</references>"""
         return out
 
     def tmpl_xml_endnote_prologue(self):
         """Creates XML EndNote prologue."""
         out = """<xml>\n<records>\n"""
         return out
 
     def tmpl_xml_endnote_8x_prologue(self):
         """Creates XML EndNote prologue."""
         out = """<records>\n"""
         return out
 
     def tmpl_xml_endnote_epilogue(self):
         """Creates XML EndNote epilogue."""
         out = """\n</records>\n</xml>"""
         return out
 
     def tmpl_xml_endnote_8x_epilogue(self):
         """Creates XML EndNote epilogue."""
         out = """\n</records>"""
         return out
 
     def tmpl_xml_marc_prologue(self):
         """Creates XML MARC prologue."""
         out = """<collection xmlns="http://www.loc.gov/MARC21/slim">\n"""
         return out
 
     def tmpl_xml_marc_epilogue(self):
         """Creates XML MARC epilogue."""
         out = """\n</collection>"""
         return out
 
     def tmpl_xml_mods_prologue(self):
         """Creates XML MODS prologue."""
         out = """<modsCollection xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"\n
                    xsi:schemaLocation="http://www.loc.gov/mods/v3\n
                                        http://www.loc.gov/standards/mods/v3/mods-3-3.xsd">\n"""
         return out
 
     def tmpl_xml_mods_epilogue(self):
         """Creates XML MODS epilogue."""
         out = """\n</modsCollection>"""
         return out
 
     def tmpl_xml_default_prologue(self):
         """Creates XML default format prologue. (Sanity calls only.)"""
         out = """<collection>\n"""
         return out
 
     def tmpl_xml_default_epilogue(self):
         """Creates XML default format epilogue. (Sanity calls only.)"""
         out = """\n</collection>"""
         return out
 
     def tmpl_collection_not_found_page_title(self, colname, ln=CFG_SITE_LANG):
         """
         Create page title for cases when unexisting collection was asked for.
         """
         _ = gettext_set_language(ln)
         out = _("Collection %s Not Found") % cgi.escape(colname)
         return out
 
     def tmpl_collection_not_found_page_body(self, colname, ln=CFG_SITE_LANG):
         """
         Create page body for cases when unexisting collection was asked for.
         """
         _ = gettext_set_language(ln)
         out = """<h1>%(title)s</h1>
                  <p>%(sorry)s</p>
                  <p>%(you_may_want)s</p>
               """ % { 'title': self.tmpl_collection_not_found_page_title(colname, ln),
                       'sorry': _("Sorry, collection %s does not seem to exist.") % \
                                 ('<strong>' + cgi.escape(colname) + '</strong>'),
                       'you_may_want': _("You may want to start browsing from %s.") % \
                                  ('<a href="' + CFG_SITE_URL + '?ln=' + ln + '">' + \
                                         cgi.escape(CFG_SITE_NAME_INTL.get(ln, CFG_SITE_NAME)) + '</a>')}
         return out
 
     def tmpl_alert_rss_teaser_box_for_query(self, id_query, ln, display_email_alert_part=True):
         """Propose teaser for setting up this query as alert or RSS feed.
 
         Parameters:
           - 'id_query' *int* - ID of the query we make teaser for
           - 'ln' *string* - The language to display
           - 'display_email_alert_part' *bool* - whether to display email alert part
         """
 
         # load the right message language
         _ = gettext_set_language(ln)
 
         # get query arguments:
         res = run_sql("SELECT urlargs FROM query WHERE id=%s", (id_query,))
         argd = {}
         if res:
             argd = cgi.parse_qs(res[0][0])
 
         rssurl = self.build_rss_url(argd)
         alerturl = CFG_SITE_URL + '/youralerts/input?ln=%s&amp;idq=%s' % (ln, id_query)
 
         if display_email_alert_part:
             msg_alert = _("""Set up a personal %(x_url1_open)semail alert%(x_url1_close)s
                                   or subscribe to the %(x_url2_open)sRSS feed%(x_url2_close)s.""") % \
                         {'x_url1_open': '<a href="%s"><img src="%s/img/mail-icon-12x8.gif" border="0" alt="" /></a> ' % (alerturl, CFG_SITE_URL) + ' <a class="google" href="%s">' % (alerturl),
                          'x_url1_close': '</a>',
                          'x_url2_open': '<a href="%s"><img src="%s/img/feed-icon-12x12.gif" border="0" alt="" /></a> ' % (rssurl, CFG_SITE_URL) + ' <a class="google" href="%s">' % rssurl,
                          'x_url2_close': '</a>', }
         else:
             msg_alert = _("""Subscribe to the %(x_url2_open)sRSS feed%(x_url2_close)s.""") % \
                         {'x_url2_open': '<a href="%s"><img src="%s/img/feed-icon-12x12.gif" border="0" alt="" /></a> ' % (rssurl, CFG_SITE_URL) + ' <a class="google" href="%s">' % rssurl,
                          'x_url2_close': '</a>', }
 
         out = '''<a name="googlebox"></a>
                  <table class="googlebox"><tr><th class="googleboxheader">%(similar)s</th></tr>
                  <tr><td class="googleboxbody">%(msg_alert)s</td></tr>
                  </table>
                  ''' % {
                 'similar' : _("Interested in being notified about new results for this query?"),
                 'msg_alert': msg_alert, }
         return out
 
     def tmpl_detailed_record_metadata(self, recID, ln, format,
                                       content,
                                       creationdate=None,
                                       modificationdate=None):
         """Returns the main detailed page of a record
 
         Parameters:
 
           - 'recID' *int* - The ID of the printed record
 
           - 'ln' *string* - The language to display
 
           - 'format' *string* - The format in used to print the record
 
           - 'content' *string* - The main content of the page
 
           - 'creationdate' *string* - The creation date of the printed record
 
           - 'modificationdate' *string* - The last modification date of the printed record
         """
         _ = gettext_set_language(ln)
 
         ## unAPI identifier
         out = '<abbr class="unapi-id" title="%s"></abbr>\n' % recID
         out += content
         return out
 
     def tmpl_display_back_to_search(self, req, recID, ln):
         """
         Displays next-hit/previous-hit/back-to-search links
         on the detailed record pages in order to be able to quickly
         flip between detailed record pages
         @param req: Apache request object
         @type req: Apache request object
         @param recID: detailed record ID
         @type recID: int
         @param ln: language of the page
         @type ln: string
         @return: html output
         @rtype: html
         """
 
         _ = gettext_set_language(ln)
 
         # this variable is set to zero and then, nothing is displayed
         if not CFG_WEBSEARCH_PREV_NEXT_HIT_LIMIT:
             return ''
 
         # search for a specific record having not done any search before
         wlq = session_param_get(req, 'websearch-last-query', '')
         wlqh = session_param_get(req, 'websearch-last-query-hits')
 
         out = '''<br/><br/><div align="right">'''
         # excedeed limit CFG_WEBSEARCH_PREV_NEXT_HIT_LIMIT,
         # then will be displayed only the back to search link
         if wlqh is None:
             out += '''<div style="padding-bottom:2px;padding-top:30px;"><span class="moreinfo" style="margin-right:10px;">
                         %(back)s </span></div></div>''' % \
                         {'back': create_html_link(wlq, {}, _("Back to search"), {'class': "moreinfo"})}
             return out
 
         # let's look for the recID's collection
         record_found = False
         for coll in wlqh:
             if recID in coll:
                 record_found = True
                 coll_recID = coll
                 break
 
         # let's calculate lenght of recID's collection
         if record_found:
             recIDs = coll_recID[::-1]
             totalrec = len(recIDs)
         # search for a specific record having not done any search before
         else:
             return ''
 
         # if there is only one hit,
         # to show only the "back to search" link
         if totalrec == 1:
             # to go back to the last search results page
             out += '''<div style="padding-bottom:2px;padding-top:30px;"><span class="moreinfo" style="margin-right:10px;">
                         %(back)s </span></div></div>''' % \
                         {'back': create_html_link(wlq, {}, _("Back to search"), {'class': "moreinfo"})}
         elif totalrec > 1:
             pos = recIDs.index(recID)
             numrec = pos + 1
             if pos == 0:
                 recIDnext = recIDs[pos + 1]
                 recIDlast = recIDs[totalrec - 1]
                 # to display only next and last links
                 out += '''<div><span class="moreinfo" style="margin-right:10px;">
                                     %(numrec)s %(totalrec)s %(next)s %(last)s </span></div> ''' % {
                                 'numrec': _("%s of") % ('<strong>' + self.tmpl_nice_number(numrec, ln) + '</strong>'),
                                 'totalrec': ("%s") % ('<strong>' + self.tmpl_nice_number(totalrec, ln) + '</strong>'),
                                 'next': create_html_link(self.build_search_url(recid=recIDnext, ln=ln),
                                         {}, ('<font size="4">&rsaquo;</font>'), {'class': "moreinfo"}),
                                 'last': create_html_link(self.build_search_url(recid=recIDlast, ln=ln),
                                         {}, ('<font size="4">&raquo;</font>'), {'class': "moreinfo"})}
             elif pos == totalrec - 1:
                 recIDfirst = recIDs[0]
                 recIDprev = recIDs[pos - 1]
                 # to display only first and previous links
                 out += '''<div style="padding-top:30px;"><span class="moreinfo" style="margin-right:10px;">
                                     %(first)s %(previous)s %(numrec)s %(totalrec)s</span></div>''' % {
                                 'first': create_html_link(self.build_search_url(recid=recIDfirst, ln=ln),
                                             {}, ('<font size="4">&laquo;</font>'), {'class': "moreinfo"}),
                                 'previous': create_html_link(self.build_search_url(recid=recIDprev, ln=ln),
                                             {}, ('<font size="4">&lsaquo;</font>'), {'class': "moreinfo"}),
                                 'numrec': _("%s of") % ('<strong>' + self.tmpl_nice_number(numrec, ln) + '</strong>'),
                                 'totalrec': ("%s") % ('<strong>' + self.tmpl_nice_number(totalrec, ln) + '</strong>')}
             else:
                 # to display all links
                 recIDfirst = recIDs[0]
                 recIDprev = recIDs[pos - 1]
                 recIDnext = recIDs[pos + 1]
                 recIDlast = recIDs[len(recIDs) - 1]
                 out += '''<div style="padding-top:30px;"><span class="moreinfo" style="margin-right:10px;">
                                     %(first)s %(previous)s
                                     %(numrec)s %(totalrec)s %(next)s %(last)s </span></div>''' % {
                                 'first': create_html_link(self.build_search_url(recid=recIDfirst, ln=ln),
                                             {}, ('<font size="4">&laquo;</font>'),
                                             {'class': "moreinfo"}),
                                 'previous': create_html_link(self.build_search_url(recid=recIDprev, ln=ln),
                                             {}, ('<font size="4">&lsaquo;</font>'), {'class': "moreinfo"}),
                                 'numrec': _("%s of") % ('<strong>' + self.tmpl_nice_number(numrec, ln) + '</strong>'),
                                 'totalrec': ("%s") % ('<strong>' + self.tmpl_nice_number(totalrec, ln) + '</strong>'),
                                 'next': create_html_link(self.build_search_url(recid=recIDnext, ln=ln),
                                             {}, ('<font size="4">&rsaquo;</font>'), {'class': "moreinfo"}),
                                 'last': create_html_link(self.build_search_url(recid=recIDlast, ln=ln),
                                             {}, ('<font size="4">&raquo;</font>'), {'class': "moreinfo"})}
             out += '''<div style="padding-bottom:2px;"><span class="moreinfo" style="margin-right:10px;">
                         %(back)s </span></div></div>''' % {
                     'back': create_html_link(wlq, {}, _("Back to search"), {'class': "moreinfo"})}
         return out
 
     def tmpl_record_hepdata(self, data, recid, isLong=True):
         """ Generate a page for HepData records
         """
         c = []
 
         c.append("<div style=\"background-color: #ecece0;\">")
         c.append("<div style=\"background-color: #ececec;\">")
         c.append("<h3>This data comes from the <a href=\"%s\">Durham HepData project</a></h3>" % ("http://hepdata.cedar.ac.uk/view/ins%s" % (str(recid), ), ));
         c.append("<h3>Summary:</h3>")
         c.append("""<div class="hepdataSummary">%s</div>""" % (data.comment, ))
 
         if data.systematics and data.systematics.strip() != "":
             c.append("<h3>Systematic data: </h3>")
             c.append(data.systematics)
             c.append("</div>")
 
         if data.additional_data_links:
             c.append("<h3>Additional data:</h3>")
             for link in data.additional_data_links:
                 if "href" in link and "description" in link:
                     c.append("<a href=\"%s/%s\">%s</a><br>" % (CFG_HEPDATA_URL, link["href"], link["description"]))
 
         seq = 0
 
         for dataset in data.datasets:
             seq += 1
             c.append(hepdatadisplayutils.render_hepdata_dataset_html(dataset, recid, seq))
 
         c.append("</div>")
 
         return "\n".join(c)
 
     def tmpl_record_no_hepdata(self):
         return "This record does not have HEP data associated"
 
     def tmpl_record_plots(self, recID, ln):
         """
           Displays little tables containing the images and captions contained in the specified document.
 
         Parameters:
 
           - 'recID' *int* - The ID of the printed record
 
           - 'ln' *string* - The language to display
         """
         from invenio.search_engine import get_record
         from invenio.bibrecord import field_get_subfield_values
         from invenio.bibrecord import record_get_field_instances
         _ = gettext_set_language(ln)
 
         out = ''
 
         rec = get_record(recID)
         flds = record_get_field_instances(rec, '856', '4')
 
         images = []
 
         for fld in flds:
             image = field_get_subfield_values(fld, 'u')
             caption = field_get_subfield_values(fld, 'y')
             data_urls = field_get_subfield_values(fld, 'z')
             if type(data_urls) == list and len(data_urls) > 0:
                 data_urls = str(data_urls[0])
                 if data_urls.startswith("HEPDATA:"):
                     data_urls = data_urls[8:].split(";")
                 else:
                     data_urls = []
 
             if type(image) == list and len(image) > 0:
                 image = image[0]
             else:
                 continue
             if type(caption) == list and len(caption) > 0:
                 caption = caption[0]
             else:
                 continue
 
             if not image.endswith('.png'):
                 # huh?
                 continue
 
             if len(caption) >= 5:
                 images.append((int(caption[:5]), image, caption[5:], data_urls))
             else:
                 # we don't have any idea of the order... just put it on
                 images.append(99999, image, caption, data_urls)
 
         images = sorted(images, key=lambda x: x[0])
 
         for (index, image, caption, data_urls) in images:
             # let's put everything in nice little subtables with the image
             # next to the caption
             data_string_list = []
             seq_num = 1
 
             for data_url in data_urls:
                 val = ""
                 if len(data_urls) > 1:
                     val = " %i" % seq_num
                 data_string_list.append("<br><a href=\"%s\">Data%s</a>" % (str(data_url), val))
                 seq_num += 1
 
             data_string = "".join(data_string_list)
             out = out + '<table width="95%" style="display: inline;">' + \
                  '<tr><td width="66%"><a name="' + str(index) + '" ' + \
                  'href="' + image + '">' + \
                  '<img src="' + image + '" width="95%"/></a></td>' + \
                  '<td width="33%">' + caption +  data_string + '</td></tr>' + \
                  '</table>'
 
         out = out + '<br /><br />'
 
         return out
 
 
     def tmpl_detailed_record_statistics(self, recID, ln,
                                         downloadsimilarity,
                                         downloadhistory, viewsimilarity):
         """Returns the statistics page of a record
 
         Parameters:
 
           - 'recID' *int* - The ID of the printed record
 
           - 'ln' *string* - The language to display
 
           - downloadsimilarity *string* - downloadsimilarity box
 
           - downloadhistory *string* - downloadhistory box
 
           - viewsimilarity *string* - viewsimilarity box
 
         """
         # load the right message language
         _ = gettext_set_language(ln)
 
         out = ''
 
         if CFG_BIBRANK_SHOW_DOWNLOAD_STATS and downloadsimilarity is not None:
             similar = self.tmpl_print_record_list_for_similarity_boxen (
                 _("People who downloaded this document also downloaded:"), downloadsimilarity, ln)
 
             out = '<table>'
             out += '''
                     <tr><td>%(graph)s</td></tr>
                     <tr><td>%(similar)s</td></tr>
                     ''' % { 'siteurl': CFG_SITE_URL, 'recid': recID, 'ln': ln,
                              'similar': similar, 'more': _("more"),
                              'graph': downloadsimilarity
                              }
 
             out += '</table>'
             out += '<br />'
 
         if CFG_BIBRANK_SHOW_READING_STATS and viewsimilarity is not None:
             out += self.tmpl_print_record_list_for_similarity_boxen (
                 _("People who viewed this page also viewed:"), viewsimilarity, ln)
 
         if CFG_BIBRANK_SHOW_DOWNLOAD_GRAPHS and downloadhistory is not None:
             out += downloadhistory + '<br />'
 
         return out
 
     def tmpl_detailed_record_citations_prologue(self, recID, ln):
         """Returns the prologue of the citations page of a record
 
         Parameters:
 
           - 'recID' *int* - The ID of the printed record
 
           - 'ln' *string* - The language to display
 
         """
 
         return '<table>'
 
     def tmpl_detailed_record_citations_epilogue(self, recID, ln):
         """Returns the epilogue of the citations page of a record
 
         Parameters:
 
           - 'recID' *int* - The ID of the printed record
 
           - 'ln' *string* - The language to display
 
         """
 
         return '</table>'
 
     def tmpl_detailed_record_citations_citing_list(self, recID, ln,
                                                    citinglist,
                                                    sf='', so='d', sp='', rm=''):
         """Returns the list of record citing this one
 
         Parameters:
 
           - 'recID' *int* - The ID of the printed record
 
           - 'ln' *string* - The language to display
 
           - citinglist *list* - a list of tuples [(x1,y1),(x2,y2),..] where x is doc id and y is number of citations
 
         """
         # load the right message language
         _ = gettext_set_language(ln)
 
         out = ''
 
         if CFG_BIBRANK_SHOW_CITATION_STATS and citinglist is not None:
             similar = self.tmpl_print_record_list_for_similarity_boxen(
                 _("Cited by: %s records") % len (citinglist), citinglist, ln)
 
             out += '''
                     <tr><td>
                       %(similar)s&nbsp;%(more)s
                       <br /><br />
                     </td></tr>''' % {
                 'more': create_html_link(
                 self.build_search_url(p='refersto:recid:%d' % recID, #XXXX
                                       sf=sf,
                                       so=so,
                                       sp=sp,
                                       rm=rm,
                                       ln=ln),
                                       {}, _("more")),
                 'similar': similar}
         return out
 
-    def tmpl_detailed_record_citations_citation_history(self, recID, ln,
-                                                        citationhistory):
+    def tmpl_detailed_record_citations_citation_history(self, ln,
+                                                              citationhistory):
         """Returns the citations history graph of this record
 
         Parameters:
 
           - 'recID' *int* - The ID of the printed record
 
           - 'ln' *string* - The language to display
 
           - citationhistory *string* - citationhistory box
 
         """
         # load the right message language
         _ = gettext_set_language(ln)
 
         out = ''
 
         if CFG_BIBRANK_SHOW_CITATION_GRAPHS and citationhistory is not None:
             out = '<!--citation history--><tr><td>%s</td></tr>' % citationhistory
         else:
             out = "<!--not showing citation history. CFG_BIBRANK_SHOW_CITATION_GRAPHS:"
             out += str(CFG_BIBRANK_SHOW_CITATION_GRAPHS) + " citationhistory "
             if citationhistory:
                 out += str(len(citationhistory)) + "-->"
             else:
                 out += "no citationhistory -->"
         return out
 
+    def tmpl_detailed_record_citations_citation_log(self, ln, log_entries):
+        """Returns the citations history graph of this record
+
+        Parameters:
+
+          - 'recID' *int* - The ID of the printed record
+
+          - 'ln' *string* - The language to display
+
+          - citationhistory *string* - citationhistory box
+
+        """
+        # load the right message language
+        _ = gettext_set_language(ln)
+
+        out = []
+        if log_entries:
+            out.append('<style>td.citationlogdate { width: 5.4em; }</style>')
+            out.append('<table><tr><td class="blocknote">Citation Log: </td></tr><tr><td><a id="citationlogshow" class="moreinfo" style="text-decoration: underline; " onclick="$(\'#citationlog\').show(); $(\'#citationlogshow\').hide();">show</a></td></tr></table>')
+            out.append('<table id="citationlog" style="display: none;">')
+            for recid, action_type, action_date in log_entries:
+                record_str = format_record(recid, 'HS2')
+                out.append("""<tr>
+  <td>%s</td>
+  <td class="citationlogdate">%s</td>
+  <td>%s</td>
+</tr>""" % (action_type, action_date.strftime('%Y-%m-%d'), record_str))
+            out.append('</table>')
+
+        return '\n'.join(out)
+
     def tmpl_detailed_record_citations_co_citing(self, recID, ln,
                                                  cociting):
         """Returns the list of cocited records
 
         Parameters:
 
           - 'recID' *int* - The ID of the printed record
 
           - 'ln' *string* - The language to display
 
           - cociting *string* - cociting box
 
         """
         # load the right message language
         _ = gettext_set_language(ln)
 
         out = ''
 
         if CFG_BIBRANK_SHOW_CITATION_STATS and cociting is not None:
             similar = self.tmpl_print_record_list_for_similarity_boxen (
                 _("Co-cited with: %s records") % len (cociting), cociting, ln)
 
             out = '''
                     <tr><td>
                       %(similar)s&nbsp;%(more)s
                       <br />
                     </td></tr>''' % { 'more': create_html_link(self.build_search_url(p='cocitedwith:%d' % recID, ln=ln),
                                                                 {}, _("more")),
                                       'similar': similar }
         return out
 
 
     def tmpl_detailed_record_citations_self_cited(self, recID, ln,
                                                   selfcited, citinglist):
         """Returns the list of self-citations for this record
 
         Parameters:
 
           - 'recID' *int* - The ID of the printed record
 
           - 'ln' *string* - The language to display
 
           - selfcited list - a list of self-citations for recID
 
         """
         # load the right message language
         _ = gettext_set_language(ln)
 
         out = ''
 
         if CFG_BIBRANK_SHOW_CITATION_GRAPHS and selfcited is not None:
             sc_scorelist = [] #a score list for print..
             for s in selfcited:
                 #copy weight from citations
                 weight = 0
                 for c in citinglist:
                     (crec, score) = c
                     if crec == s:
                         weight = score
                 tmp = [s, weight]
                 sc_scorelist.append(tmp)
             scite = self.tmpl_print_record_list_for_similarity_boxen (
                 _(".. of which self-citations: %s records") % len (selfcited), sc_scorelist, ln)
             out = '<tr><td>' + scite + '</td></tr>'
         return out
 
     def tmpl_author_information(self, req, pubs, authorname, num_downloads,
                                 aff_pubdict, citedbylist, kwtuples, authors,
                                 vtuples, names_dict, person_link,
                                 bibauthorid_data, ln, return_html=False):
         """Prints stuff about the author given as authorname.
            1. Author name + his/her institutes. Each institute I has a link
               to papers where the auhtor has I as institute.
            2. Publications, number: link to search by author.
            3. Keywords
            4. Author collabs
            5. Publication venues like journals
            The parameters are data structures needed to produce 1-6, as follows:
            req - request
            pubs - list of recids, probably the records that have the author as an author
            authorname - evident
            num_downloads - evident
            aff_pubdict - a dictionary where keys are inst names and values lists of recordids
            citedbylist - list of recs that cite pubs
            kwtuples - keyword tuples like ('HIGGS BOSON',[3,4]) where 3 and 4 are recids
            authors - a list of authors that have collaborated with authorname
            names_dict - a dict of {name: frequency}
         """
         from invenio.search_engine import perform_request_search
         from operator import itemgetter
         _ = gettext_set_language(ln)
         ib_pubs = intbitset(pubs)
         html = []
 
         # construct an extended search as an interim solution for author id
         # searches. Will build "(exactauthor:v1 OR exactauthor:v2)" strings
 #        extended_author_search_str = ""
 
 #        if bibauthorid_data["is_baid"]:
 #            if len(names_dict.keys()) > 1:
 #                extended_author_search_str = '('
 #
 #            for name_index, name_query in enumerate(names_dict.keys()):
 #                if name_index > 0:
 #                    extended_author_search_str += " OR "
 #
 #                extended_author_search_str += 'exactauthor:"' + name_query + '"'
 #
 #            if len(names_dict.keys()) > 1:
 #                extended_author_search_str += ')'
 #     rec_query = 'exactauthor:"' + authorname + '"'
 #
 #        if bibauthorid_data["is_baid"] and extended_author_search_str:
 #            rec_query = extended_author_search_str
 
 
         baid_query = ""
         extended_author_search_str = ""
 
         if 'is_baid' in bibauthorid_data and bibauthorid_data['is_baid']:
             if bibauthorid_data["cid"]:
                 baid_query = 'author:%s' % bibauthorid_data["cid"]
             elif bibauthorid_data["pid"] > -1:
                 baid_query = 'author:%s' % bibauthorid_data["pid"]
             ## todo: figure out if the author index is filled with pids/cids.
             ## if not: fall back to exactauthor search.
             # if not index:
             #    baid_query = ""
 
         if not baid_query:
             baid_query = 'exactauthor:"' + authorname + '"'
 
             if bibauthorid_data['is_baid']:
                 if len(names_dict.keys()) > 1:
                     extended_author_search_str = '('
 
                 for name_index, name_query in enumerate(names_dict.keys()):
                     if name_index > 0:
                         extended_author_search_str += " OR "
 
                     extended_author_search_str += 'exactauthor:"' + name_query + '"'
 
                 if len(names_dict.keys()) > 1:
                     extended_author_search_str += ')'
 
             if bibauthorid_data['is_baid'] and extended_author_search_str:
                 baid_query = extended_author_search_str
 
         baid_query = baid_query + " "
         sorted_names_list = sorted(names_dict.iteritems(), key=itemgetter(1),
                                    reverse=True)
 
         # Prepare data for display
         # construct names box
         header = "<strong>" + _("Name variants") + "</strong>"
         content = []
 
         for name, frequency in sorted_names_list:
             prquery = baid_query + ' exactauthor:"' + name + '"'
             name_lnk = create_html_link(self.build_search_url(p=prquery),
                                                               {},
                                                               str(frequency),)
             content.append("%s (%s)" % (name, name_lnk))
 
         if not content:
             content = [_("No Name Variants")]
 
         names_box = self.tmpl_print_searchresultbox(header, "<br />\n".join(content))
 
         # construct papers box
         rec_query = baid_query
         searchstr = create_html_link(self.build_search_url(p=rec_query),
                                      {}, "<strong>" + "All papers (" + str(len(pubs)) + ")" + "</strong>",)
         line1 = "<strong>" + _("Papers") + "</strong>"
         line2 = searchstr
 
         if CFG_BIBRANK_SHOW_DOWNLOAD_STATS and num_downloads:
             line2 += " (" + _("downloaded") + " "
             line2 += str(num_downloads) + " " + _("times") + ")"
 
         if CFG_INSPIRE_SITE:
             CFG_COLLS = ['Book',
                          'Conference',
                          'Introductory',
                          'Lectures',
                          'Preprint',
                          'Published',
                          'Review',
                          'Thesis']
         else:
             CFG_COLLS = ['Article',
                          'Book',
                          'Preprint', ]
         collsd = {}
         for coll in CFG_COLLS:
             coll_papers = list(ib_pubs & intbitset(perform_request_search(f="collection", p=coll)))
             if coll_papers:
                 collsd[coll] = coll_papers
         colls = collsd.keys()
         colls.sort(lambda x, y: cmp(len(collsd[y]), len(collsd[x]))) # sort by number of papers
         for coll in colls:
             rec_query = baid_query + 'collection:' + coll
             line2 += "<br />" + create_html_link(self.build_search_url(p=rec_query),
                                                                        {}, coll + " (" + str(len(collsd[coll])) + ")",)
 
         if not pubs:
             line2 = _("No Papers")
 
         papers_box = self.tmpl_print_searchresultbox(line1, line2)
 
         #make a authoraff string that looks like CERN (1), Caltech (2) etc
         authoraff = ""
         aff_pubdict_keys = aff_pubdict.keys()
         aff_pubdict_keys.sort(lambda x, y: cmp(len(aff_pubdict[y]), len(aff_pubdict[x])))
 
         if aff_pubdict_keys:
             for a in aff_pubdict_keys:
                 print_a = a
                 if (print_a == ' '):
                     print_a = _("unknown affiliation")
                 if authoraff:
                     authoraff += '<br>'
                 authoraff += create_html_link(self.build_search_url(p=' or '.join(["%s" % x for x in aff_pubdict[a]]),
                                                                        f='recid'),
                                                                        {}, print_a + ' (' + str(len(aff_pubdict[a])) + ')',)
         else:
             authoraff = _("No Affiliations")
 
         line1 = "<strong>" + _("Affiliations") + "</strong>"
         line2 = authoraff
         affiliations_box = self.tmpl_print_searchresultbox(line1, line2)
 
         # print frequent keywords:
         keywstr = ""
         if (kwtuples):
             for (kw, freq) in kwtuples:
                 if keywstr:
                     keywstr += '<br>'
                 rec_query = baid_query + 'keyword:"' + kw + '"'
                 searchstr = create_html_link(self.build_search_url(p=rec_query),
                                                                    {}, kw + " (" + str(freq) + ")",)
                 keywstr = keywstr + " " + searchstr
 
         else:
             keywstr += _('No Keywords')
 
 
         line1 = "<strong>" + _("Frequent keywords") + "</strong>"
         line2 = keywstr
         keyword_box = self.tmpl_print_searchresultbox(line1, line2)
 
 
         header = "<strong>" + _("Frequent co-authors") + "</strong>"
         content = []
         sorted_coauthors = sorted(sorted(authors.iteritems(), key=itemgetter(0)),
                                   key=itemgetter(1), reverse=True)
 
         for name, frequency in sorted_coauthors:
             rec_query = baid_query + 'exactauthor:"' + name + '"'
             lnk = create_html_link(self.build_search_url(p=rec_query), {}, "%s (%s)" % (name, frequency),)
             content.append("%s" % lnk)
 
         if not content:
             content = [_("No Frequent Co-authors")]
 
         coauthor_box = self.tmpl_print_searchresultbox(header, "<br />\n".join(content))
 
         pubs_to_papers_link = create_html_link(self.build_search_url(p=baid_query), {}, str(len(pubs)))
         display_name = ""
 
         try:
             display_name = sorted_names_list[0][0]
         except IndexError:
             display_name = "&nbsp;"
 
         headertext = ('<h1>%s <span style="font-size:50%%;">(%s papers)</span></h1>'
                       % (display_name, pubs_to_papers_link))
 
         if return_html:
             html.append(headertext)
         else:
             req.write(headertext)
             #req.write("<h1>%s</h1>" % (authorname))
 
         if person_link:
             cmp_link = ('<div><a href="%s/person/claimstub?person=%s">%s</a></div>'
                       % (CFG_SITE_URL, person_link,
                          _("This is me.  Verify my publication list.")))
             if return_html:
                 html.append(cmp_link)
             else:
                 req.write(cmp_link)
 
         if return_html:
             html.append("<table width=80%><tr valign=top><td>")
             html.append(names_box)
             html.append("<br />")
             html.append(papers_box)
             html.append("<br />")
             html.append(keyword_box)
             html.append("</td>")
             html.append("<td>&nbsp;</td>")
             html.append("<td>")
             html.append(affiliations_box)
             html.append("<br />")
             html.append(coauthor_box)
             html.append("</td></tr></table>")
         else:
             req.write("<table width=80%><tr valign=top><td>")
             req.write(names_box)
             req.write("<br />")
             req.write(papers_box)
             req.write("<br />")
             req.write(keyword_box)
             req.write("</td>")
             req.write("<td>&nbsp;</td>")
             req.write("<td>")
             req.write(affiliations_box)
             req.write("<br />")
             req.write(coauthor_box)
             req.write("</td></tr></table>")
 
         # print citations:
         rec_query = baid_query
 
         if len(citedbylist):
             line1 = "<strong>" + _("Citations:") + "</strong>"
             line2 = ""
 
             if not pubs:
                 line2 = _("No Citation Information available")
 
             sr_box = self.tmpl_print_searchresultbox(line1, line2)
 
             if return_html:
                 html.append(sr_box)
             else:
                 req.write(sr_box)
 
         if return_html:
             return "\n".join(html)
 
         # print frequent co-authors:
 #        collabstr = ""
 #        if (authors):
 #            for c in authors:
 #                c = c.strip()
 #                if collabstr:
 #                    collabstr += '<br>'
 #                #do not add this person him/herself in the list
 #                cUP = c.upper()
 #                authornameUP = authorname.upper()
 #                if not cUP == authornameUP:
 #                    commpubs = intbitset(pubs) & intbitset(perform_request_search(p="exactauthor:\"%s\" exactauthor:\"%s\"" % (authorname, c)))
 #                    collabstr = collabstr + create_html_link(self.build_search_url(p='exactauthor:"' + authorname + '" exactauthor:"' + c + '"'),
 #                                                              {}, c + " (" + str(len(commpubs)) + ")",)
 #        else: collabstr += 'None'
 #        banner = self.tmpl_print_searchresultbox("<strong>" + _("Frequent co-authors:") + "</strong>", collabstr)
 
 
         # print frequently publishes in journals:
         #if (vtuples):
         #    pubinfo = ""
         #    for t in vtuples:
         #        (journal, num) = t
         #        pubinfo += create_html_link(self.build_search_url(p='exactauthor:"' + authorname + '" ' + \
         #                                                          'journal:"' + journal + '"'),
         #                                           {}, journal + " ("+str(num)+")<br/>")
         #    banner = self.tmpl_print_searchresultbox("<strong>" + _("Frequently publishes in:") + "<strong>", pubinfo)
         #    req.write(banner)
 
 
     def tmpl_detailed_record_references(self, recID, ln, content):
         """Returns the discussion page of a record
 
         Parameters:
 
           - 'recID' *int* - The ID of the printed record
 
           - 'ln' *string* - The language to display
 
           - 'content' *string* - The main content of the page
         """
         # load the right message language
         out = ''
         if content is not None:
             out += content
 
         return out
 
     def tmpl_citesummary_title(self, ln=CFG_SITE_LANG):
         """HTML citesummary title and breadcrumbs
 
         A part of HCS format suite."""
         return ''
 
     def tmpl_citesummary2_title(self, searchpattern, ln=CFG_SITE_LANG):
         """HTML citesummary title and breadcrumbs
 
         A part of HCS2 format suite."""
         return ''
 
     def tmpl_citesummary_back_link(self, searchpattern, ln=CFG_SITE_LANG):
         """HTML back to citesummary link
 
         A part of HCS2 format suite."""
         _ = gettext_set_language(ln)
         out = ''
         params = {'ln': 'en',
                   'p': quote(searchpattern),
                   'of': 'hcs'}
         msg = _('Back to citesummary')
 
         url = CFG_SITE_URL + '/search?' + \
                           '&'.join(['='.join(i) for i in params.iteritems()])
         out += '<p><a href="%(url)s">%(msg)s</a></p>' % {'url': url, 'msg': msg}
 
         return out
 
     def tmpl_citesummary_more_links(self, searchpattern, ln=CFG_SITE_LANG):
         _ = gettext_set_language(ln)
         out = ''
         msg = _('<p><a href="%(url)s">%(msg)s</a></p>')
         params = {'ln': ln,
                   'p': quote(searchpattern),
                   'of': 'hcs2'}
         url = CFG_SITE_URL + '/search?' + \
                        '&amp;'.join(['='.join(i) for i in params.iteritems()])
         out += msg % {'url': url,
                       'msg': _('Exclude self-citations')}
 
         return out
 
     def tmpl_citesummary_prologue(self, coll_recids, collections, search_patterns,
                                   searchfield, citable_recids, total_count,
                                   ln=CFG_SITE_LANG):
         """HTML citesummary format, prologue. A part of HCS format suite."""
         _ = gettext_set_language(ln)
         out = """<table id="citesummary">
                   <tr>
                     <td>
                       <strong class="headline">%(msg_title)s</strong>
                     </td>""" % \
                {'msg_title': _("Citation summary results"), }
         for coll, dummy in collections:
             out += '<td align="right">%s</td>' % _(coll)
         out += '</tr>'
         out += """<tr><td><strong>%(msg_recs)s</strong></td>""" % \
                {'msg_recs': _("Total number of papers analyzed:"), }
         for coll, colldef in collections:
             link_url = CFG_SITE_URL + '/search?p='
             if search_patterns[coll]:
                 p = search_patterns[coll]
                 if searchfield:
                     if " " in p:
                         p = searchfield + ':"' + p + '"'
                     else:
                         p = searchfield + ':' + p
                 link_url += quote(p)
             if colldef:
                 link_url += '%20AND%20' + quote(colldef)
             link_text = self.tmpl_nice_number(len(coll_recids[coll]), ln)
             out += '<td align="right"><a href="%s">%s</a></td>' % (link_url,
                                                                    link_text)
         out += '</tr>'
         return out
 
     def tmpl_citesummary_overview(self, collections, d_total_cites,
                                   d_avg_cites, ln=CFG_SITE_LANG):
         """HTML citesummary format, overview. A part of HCS format suite."""
         _ = gettext_set_language(ln)
         out = """<tr><td><strong>%(msg_cites)s</strong></td>""" % \
               {'msg_cites': _("Total number of citations:"), }
         for coll, dummy in collections:
             total_cites = d_total_cites[coll]
             out += '<td align="right">%s</td>' % \
                                         self.tmpl_nice_number(total_cites, ln)
         out += '</tr>'
         out += """<tr><td><strong>%(msg_avgcit)s</strong></td>""" % \
                {'msg_avgcit': _("Average citations per paper:"), }
         for coll, dummy in collections:
             avg_cites = d_avg_cites[coll]
             out += '<td align="right">%.1f</td>' % avg_cites
         out += '</tr>'
         return out
 
     def tmpl_citesummary_minus_self_cites(self, d_total_cites, d_avg_cites,
                                           ln=CFG_SITE_LANG):
         """HTML citesummary format, overview. A part of HCS format suite."""
         _ = gettext_set_language(ln)
         msg = _("Total number of citations excluding self-citations")
         out = """<tr><td><strong>%(msg_cites)s</strong>""" % \
                                                            {'msg_cites': msg, }
 
         # use ? help linking in the style of oai_repository_admin.py
         msg = ' <small><small>[<a href="%s%s">?</a>]</small></small></td>'
         out += msg % (CFG_SITE_URL,
                       '/help/citation-metrics#citesummary_self-cites')
 
         for total_cites in d_total_cites.values():
             out += '<td align="right">%s</td>' % \
                                         self.tmpl_nice_number(total_cites, ln)
         out += '</tr>'
         msg = _("Average citations per paper excluding self-citations")
         out += """<tr><td><strong>%(msg_avgcit)s</strong>""" % \
                                                         {'msg_avgcit': msg, }
         # use ? help linking in the style of oai_repository_admin.py
         msg = ' <small><small>[<a href="%s%s">?</a>]</small></small></td>'
         out += msg % (CFG_SITE_URL,
                       '/help/citation-metrics#citesummary_self-cites')
 
         for avg_cites in d_avg_cites.itervalues():
             out += '<td align="right">%.1f</td>' % avg_cites
         out += '</tr>'
         return out
 
     def tmpl_citesummary_footer(self):
         return ''
 
     def tmpl_citesummary_breakdown_header(self, ln=CFG_SITE_LANG):
         _ = gettext_set_language(ln)
         return """<tr><td><strong>%(msg_breakdown)s</strong></td></tr>""" % \
                {'msg_breakdown': _("Breakdown of papers by citations:"), }
 
     def tmpl_citesummary_breakdown_by_fame(self, d_cites, low, high, fame,
                                            l_colls, searchpatterns,
                                            searchfield, ln=CFG_SITE_LANG):
         """HTML citesummary format, breakdown by fame.
 
         A part of HCS format suite."""
         _ = gettext_set_language(ln)
         out = """<tr><td>%(fame)s</td>""" % \
               {'fame': _(fame), }
         for coll, colldef in l_colls:
             if 'excluding self cites' in coll:
                 keyword = 'citedexcludingselfcites'
             else:
                 keyword = 'cited'
             link_url = CFG_SITE_URL + '/search?p='
             if searchpatterns.get(coll, None):
                 p = searchpatterns.get(coll, None)
                 if searchfield:
                     if " " in p:
                         p = searchfield + ':"' + p + '"'
                     else:
                         p = searchfield + ':' + p
                 link_url += quote(p) + '%20AND%20'
             if colldef:
                 link_url += quote(colldef) + '%20AND%20'
             if low == 0 and high == 0:
                 link_url += quote('%s:0' % keyword)
             else:
                 link_url += quote('%s:%i->%i' % (keyword, low, high))
             link_text = self.tmpl_nice_number(d_cites[coll], ln)
             out += '<td align="right"><a href="%s">%s</a></td>' % (link_url,
                                                                    link_text)
         out += '</tr>'
         return out
 
     def tmpl_citesummary_h_index(self, collections,
                                                 d_h_factors, ln=CFG_SITE_LANG):
         """HTML citesummary format, h factor output. A part of the HCS suite."""
         _ = gettext_set_language(ln)
         out = "<tr><td></td></tr><tr><td><strong>%(msg_metrics)s</strong> <small><small>[<a href=\"%(help_url)s\">?</a>]</small></small></td></tr>" % \
               {'msg_metrics': _("Citation metrics"),
                'help_url': CFG_SITE_URL + '/help/citation-metrics', }
         out += '<tr><td>h-index'
         # use ? help linking in the style of oai_repository_admin.py
         msg = ' <small><small>[<a href="%s%s">?</a>]</small></small></td>'
         out += msg % (CFG_SITE_URL,
                       '/help/citation-metrics#citesummary_h-index')
         for coll, dummy in collections:
             h_factors = d_h_factors[coll]
             out += '<td align="right">%s</td>' % \
                                           self.tmpl_nice_number(h_factors, ln)
         out += '</tr>'
         return out
 
     def tmpl_citesummary_epilogue(self):
         """HTML citesummary format, epilogue. A part of HCS format suite."""
         out = "</table>"
         return out
 
     def tmpl_unapi(self, formats, identifier=None):
         """
         Provide a list of object format available from the unAPI service
         for the object identified by IDENTIFIER
         """
         out = '<?xml version="1.0" encoding="UTF-8" ?>\n'
         if identifier:
             out += '<formats id="%i">\n' % (identifier)
         else:
             out += "<formats>\n"
         for format_name, format_type in formats.iteritems():
             docs = ''
             if format_name == 'xn':
                 docs = 'http://www.nlm.nih.gov/databases/dtd/'
                 format_type = 'application/xml'
                 format_name = 'nlm'
             elif format_name == 'xm':
                 docs = 'http://www.loc.gov/standards/marcxml/schema/MARC21slim.xsd'
                 format_type = 'application/xml'
                 format_name = 'marcxml'
             elif format_name == 'xr':
                 format_type = 'application/rss+xml'
                 docs = 'http://www.rssboard.org/rss-2-0/'
             elif format_name == 'xw':
                 format_type = 'application/xml'
                 docs = 'http://www.refworks.com/RefWorks/help/RefWorks_Tagged_Format.htm'
             elif format_name == 'xoaidc':
                 format_type = 'application/xml'
                 docs = 'http://www.openarchives.org/OAI/2.0/oai_dc.xsd'
             elif format_name == 'xe':
                 format_type = 'application/xml'
                 docs = 'http://www.endnote.com/support/'
                 format_name = 'endnote'
             elif format_name == 'xd':
                 format_type = 'application/xml'
                 docs = 'http://dublincore.org/schemas/'
                 format_name = 'dc'
             elif format_name == 'xo':
                 format_type = 'application/xml'
                 docs = 'http://www.loc.gov/standards/mods/v3/mods-3-3.xsd'
                 format_name = 'mods'
             if docs:
                 out += '<format name="%s" type="%s" docs="%s" />\n' % (xml_escape(format_name), xml_escape(format_type), xml_escape(docs))
             else:
                 out += '<format name="%s" type="%s" />\n' % (xml_escape(format_name), xml_escape(format_type))
         out += "</formats>"
         return out