diff --git a/modules/bibrank/lib/bibrank_citation_indexer.py b/modules/bibrank/lib/bibrank_citation_indexer.py
index 4acde6ea5..75e9aead7 100644
--- a/modules/bibrank/lib/bibrank_citation_indexer.py
+++ b/modules/bibrank/lib/bibrank_citation_indexer.py
@@ -1,767 +1,767 @@
 # -*- Coding: utf-8 -*-
 ##
 ## $Id$
 ##
 ## This file is part of CDS Invenio.
 ## Copyright (C) 2002, 2003, 2004, 2005, 2006, 2007, 2008 CERN.
 ##
 ## CDS Invenio is free software; you can redistribute it and/or
 ## modify it under the terms of the GNU General Public License as
 ## published by the Free Software Foundation; either version 2 of the
 ## License, or (at your option) any later version.
 ##
 ## CDS Invenio is distributed in the hope that it will be useful, but
 ## WITHOUT ANY WARRANTY; without even the implied warranty of
 ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 ## General Public License for more details.
 ##
 ## You should have received a copy of the GNU General Public License
 ## along with CDS Invenio; if not, write to the Free Software Foundation, Inc.,
 ## 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA.
 
 __revision__ = "$Id$"
 
 import time
 import sys
 import os
 import marshal
 import traceback
 from zlib import decompress, error
 
 from invenio.dbquery import run_sql, serialize_via_marshal, \
                             deserialize_via_marshal
 from invenio.search_engine import print_record, search_pattern, get_fieldvalues
 from invenio.bibformat_utils import parse_tag
 from invenio.bibtask import write_message, task_get_option
 
 class memoise:
     def __init__(self, function):
         self.memo = {}
         self.function = function
     def __call__(self, *args):
         if self.memo.has_key(args):
             return self.memo[args]
         else:
             object = self.memo[args] = self.function(*args)
             return object
 
 def get_recids_matching_query(pvalue, fvalue):
     """Return list of recIDs matching query for PVALUE and FVALUE."""
     rec_id = list(search_pattern(p=pvalue, f=fvalue, m='e'))
     return rec_id
 get_recids_matching_query = memoise(get_recids_matching_query)
 
 def get_citation_weight(rank_method_code, config):
     """return a dictionary which is used by bibrank daemon for generating
     the index of sorted research results by citation information
     """
     begin_time = time.time()
     last_update_time = get_bibrankmethod_lastupdate(rank_method_code)
     #addition: YOU DO NEED TO RUN WITH OPTION -R SOMETIMES. This is
     #because among the new set (X) there can be records such that the old
-    #records Y cite them. But this kind of situation is not detected 
+    #records Y cite them. But this kind of situation is not detected
     #unless you go though all the records Y+X.
     if task_get_option("quick") == "no":
         last_update_time = "0000-00-00 00:00:00"
-    #if task_get_option('verbose') >= 3:	
+    #if task_get_option('verbose') >= 3:
     last_modified_records = get_last_modified_rec(last_update_time)
     #id option forces re-indexing a certain range even if there are no new recs
     if last_modified_records or task_get_option("id"):
         if task_get_option("id"):
 	    #construct a range of records to index
             id = task_get_option("id")
             first = id[0][0]
             last = id[0][1]
 	    #make range
             updated_recid_list = range(first, last)
-        else: 
+        else:
             updated_recid_list = create_recordid_list(last_modified_records)
-    
+
         write_message("Last update "+str(last_update_time)+" records: "+ \
                        str(len(last_modified_records))+" updates: "+ \
-                       str(len(updated_recid_list)), sys.stderr)	
+                       str(len(updated_recid_list)), sys.stderr)
 
 	#write_message("updated_recid_list: "+str(updated_recid_list), sys.stderr)
-        result_intermediate = last_updated_result(rank_method_code, 
+        result_intermediate = last_updated_result(rank_method_code,
                                                   updated_recid_list)
         #result_intermed should be warranted to exists!
         citation_weight_dic_intermediate = result_intermediate[0]
         citation_list_intermediate = result_intermediate[1]
         reference_list_intermediate = result_intermediate[2]
         citation_informations = get_citation_informations(updated_recid_list, config)
 	#write_message("citation_informations: "+str(citation_informations),sys.stderr)
 	#create_analysis_tables() #temporary.. needed to test how much faster in-mem indexing is
-        dic = ref_analyzer(citation_informations, 
-                           citation_weight_dic_intermediate, 
-                           citation_list_intermediate, 
+        dic = ref_analyzer(citation_informations,
+                           citation_weight_dic_intermediate,
+                           citation_list_intermediate,
                            reference_list_intermediate,
-                           config,updated_recid_list) 
+                           config,updated_recid_list)
                     #dic is docid-numberofreferences like {1: 2, 2: 0, 3: 1}
 	#write_message("Docid-number of known references "+str(dic),sys.stderr)
         end_time = time.time()
         print "Total time of software: ", (end_time - begin_time)
     else:
         dic = {}
         print "No new records added since last time this rank method was executed"
     return dic
 
 def get_bibrankmethod_lastupdate(rank_method_code):
     """return the last excution date of bibrank method
     """
     query = """select last_updated from rnkMETHOD where name ='%s'""" % rank_method_code
     last_update_time = run_sql(query)
     r = last_update_time[0][0]
     if r is None:
         return "0000-00-00 00:00:00"
     return r
 
 def get_last_modified_rec(bibrank_method_lastupdate):
     """ return the list of recods which have been modified after the last execution
         of bibrank method. The result is expected to have ascending numerical order.
     """
-    query = """SELECT id FROM bibrec 
+    query = """SELECT id FROM bibrec
                WHERE modification_date >= '%s' """ % bibrank_method_lastupdate
     query += "order by id ASC"
     list = run_sql(query)
     return list
 
 def create_recordid_list(rec_ids):
-    """Create a list of record ids out of RECIDS. 
+    """Create a list of record ids out of RECIDS.
        The result is expected to have ascending numerical order.
     """
     rec_list = []
     for row in rec_ids:
         rec_list.append(row[0])
     return rec_list
 
 def create_record_tuple(list):
     """Creates a tuple of record id from a list of id.
        The result is expected to have ascending numerical order.
     """
     list_length = len(list)
     if list_length:
         rec_tuple = '('
         for row in list[0:list_length-1]:
             rec_tuple += str(row)
             rec_tuple += ','
         rec_tuple += str(list[list_length-1])
         rec_tuple += ')'
     else: rec_tuple = '()'
     return rec_tuple
 
 def last_updated_result(rank_method_code, recid_list):
     """ return the last value of dictionary in rnkMETHODDATA table if it exists and
         initialize the value of last updated records by zero,otherwise an initial dictionary
         with zero as value for all recids
     """
     result = make_initial_result()
     query = """select relevance_data from rnkMETHOD, rnkMETHODDATA where
-               rnkMETHOD.id = rnkMETHODDATA.id_rnkMETHOD 
+               rnkMETHOD.id = rnkMETHODDATA.id_rnkMETHOD
                and rnkMETHOD.Name = '%s'"""% rank_method_code
     dict = run_sql(query)
     if dict and dict[0] and dict[0][0]:
         #has to be prepared for corrupted data!
         try:
             dic = marshal.loads(decompress(dict[0][0]))
         except error:
             return result
         query = "select object_value from rnkCITATIONDATA where object_name='citationdict'"
         cit_compressed = run_sql(query)
         cit = []
         if cit_compressed and cit_compressed[0] and cit_compressed[0][0]:
             cit = marshal.loads(decompress(cit_compressed[0][0]))
             if cit:
-                query = """select object_value from rnkCITATIONDATA 
+                query = """select object_value from rnkCITATIONDATA
                            where object_name='reversedict'"""
                 ref_compressed = run_sql(query)
                 if ref_compressed and ref_compressed[0] and ref_compressed[0][0]:
                     ref = marshal.loads(decompress(ref_compressed[0][0]))
                     result = get_initial_result(dic, cit, ref, recid_list)
     return result
 
 def get_initial_result(dic, cit, ref, recid_list):
     """initialize the citation weights of the last updated record with zero for
        recalculating it later
     """
     for recid in recid_list:
         dic[recid] = 0
         cit[recid] = []
         if ref.has_key(recid) and ref[recid]:
             for id in ref[recid]:
                 if cit.has_key(id) and recid in cit[id]:
                     cit[id].remove(recid)
                     if dic.has_key(id):
                         dic[id] -= 1
         if cit.has_key(recid) and cit[recid]:
             for id in cit[recid]:
                 if ref.has_key(id) and recid in ref[id]:
                     ref[id].remove(recid)
         ref[recid] = []
     return [dic, cit, ref]
 
 def make_initial_result():
     """return an initial dictinary with recID as key and zero as value
     """
     dic = {}
     cit = {}
     ref = {}
     query = "select id from bibrec"
     res = run_sql(query)
     for key in res:
         dic[key[0]] = 0
         cit[key[0]] = []
         ref[key[0]] = []
     return [dic, cit, ref]
 
 def get_citation_informations(recid_list, config):
     """returns a 3-part dictionary that contains the citation information of cds records
        examples: [ {} {} {} ]
                  [ { 93: ['astro-ph/9812088']},
                    { 93: ['Phys. Rev. Lett. 96 (2006) 081301'] }, {} ]
-	NB: stuff here is for analysing new or changed records. 
+	NB: stuff here is for analysing new or changed records.
         see "ref_analyzer" for more.
     """
     begin_time = os.times()[4]
     d_reports_numbers = {}
     d_references_report_numbers = {}
     d_references_s = {}
     d_records_s = {}
     citation_informations = []
     record_pri_number_tag = config.get(config.get("rank_method", "function"),
                                        "publication_primary_number_tag")
     record_add_number_tag = config.get(config.get("rank_method", "function"),
                                        "publication_aditional_number_tag")
     reference_number_tag = config.get(config.get("rank_method", "function"),
                                       "publication_reference_number_tag")
     reference_tag = config.get(config.get("rank_method", "function"),
                                "publication_reference_tag")
     record_publication_info_tag = config.get(config.get("rank_method", "function"),
                                              "publication_info_tag")
 
     p_record_pri_number_tag = tagify(parse_tag(record_pri_number_tag))
     p_record_add_number_tag = tagify(parse_tag(record_add_number_tag))
     p_reference_number_tag = tagify(parse_tag(reference_number_tag))
     p_reference_tag = tagify(parse_tag(reference_tag))
     p_record_publication_info_tag = tagify(parse_tag(record_publication_info_tag))
-    
+
     for recid in recid_list:
         pri_report_numbers = get_fieldvalues(recid, p_record_pri_number_tag)
         add_report_numbers = get_fieldvalues(recid, p_record_add_number_tag)
         reference_report_numbers = get_fieldvalues(recid, p_reference_number_tag)
         references_s = get_fieldvalues(recid, p_reference_tag)
-        
+
         l_report_numbers = pri_report_numbers
         l_report_numbers.extend(add_report_numbers)
         d_reports_numbers[recid] = l_report_numbers
 
         if reference_report_numbers:
             d_references_report_numbers[recid] = reference_report_numbers
-   
+
         references_s = get_fieldvalues(recid, p_reference_tag)
         if references_s:
             d_references_s[recid] = references_s
 
         record_s = get_fieldvalues(recid, p_record_publication_info_tag)
         if record_s:
             d_records_s[recid] = record_s[0]
 
     citation_informations.append(d_reports_numbers)
     citation_informations.append(d_references_report_numbers)
     citation_informations.append(d_references_s)
     citation_informations.append(d_records_s)
     end_time = os.times()[4]
     print "Execution time for generating \
            citation informations by parsing xml contents: ", (end_time - begin_time)
     return citation_informations
 
 def get_self_citations(new_record_list, citationdic, initial_selfcitdict, config):
     """Check which items have been cited by one of the authors of the
        citing item: go through id's in new_record_list, use citationdic to get citations,
        update "selfcites". Selfcites is originally initial_selfcitdict. Return selfcites.
     """
     i = 0 #just for debugging ..
     #get the tags for main author, coauthors, ext authors from config
     r_mainauthortag = config.get(config.get("rank_method", "function"), "main_author_tag")
     r_coauthortag = config.get(config.get("rank_method", "function"), "coauthor_tag")
     r_extauthortag = config.get(config.get("rank_method", "function"), "extauthor_tag")
     #parse the tags
     mainauthortag = tagify(parse_tag(r_mainauthortag))
     coauthortag = tagify(parse_tag(r_coauthortag))
     extauthortag = tagify(parse_tag(r_extauthortag))
 
     selfcites = initial_selfcitdict
     for k in new_record_list:
         i = i+1
-        if task_get_option('verbose') >= 3:     
+        if task_get_option('verbose') >= 3:
             if (i % 100 == 0):
                 write_message("Done "+str(i)+" records", sys.stderr)
         #get the author of k
         authorlist = get_fieldvalues(k, mainauthortag)
         coauthl = get_fieldvalues(k, coauthortag)
         extauthl = get_fieldvalues(k, extauthortag)
         authorlist.append(coauthl)
         authorlist.append(extauthl)
-        #author tag 
+        #author tag
         #print "record "+str(k)+" by "+str(authorlist)
         #print "is cited by"
         #get the "x-cites-this" list
         if citationdic.has_key(k):
             xct = citationdic[k]
             for c in xct:
                 #get authors of c
                 cauthorlist = get_fieldvalues(c, mainauthortag)
                 coauthl = get_fieldvalues(c, coauthortag)
                 extauthl = get_fieldvalues(c, extauthortag)
                 cauthorlist.extend(coauthl)
                 cauthorlist.extend(extauthl)
                 #print str(c)+" by "+str(cauthorlist)
                 for ca in cauthorlist:
                     if (ca in authorlist):
 			#found!
                         if selfcites.has_key(k):
                             val = selfcites[k]
                             #add only if not there already
                             if val:
                                 if not c in val:
                                     val.append(c)
                             selfcites[k] = val
                         else:
 			    #new key for selfcites
                             selfcites[k] = [c]
     return selfcites
 
 def get_author_citations(updated_redic_list, citedbydict, initial_author_dict, config):
     """Traverses citedbydict in order to build "which author is quoted where" dict.
        The keys of this are author names. An entry like "Apollinaire"->[1,2,3] means
        Apollinaire is cited in records 1,2 and 3.
        Input: citedbydict, updated_redic_list = records to be searched, initial_author_dict:
               the dicts from the database.
        Output: authorciteddict. It is initially set to initial_author_dict
     """
 
     #sorry bout repeated code to get the tags
     r_mainauthortag = config.get(config.get("rank_method", "function"), "main_author_tag")
     r_coauthortag = config.get(config.get("rank_method", "function"), "coauthor_tag")
     r_extauthortag = config.get(config.get("rank_method", "function"), "extauthor_tag")
     #parse the tags
     mainauthortag = tagify(parse_tag(r_mainauthortag))
     coauthortag = tagify(parse_tag(r_coauthortag))
     extauthortag = tagify(parse_tag(r_extauthortag))
 
     author_cited_in = initial_author_dict
     if citedbydict:
         i = 0 #just a counter for debug
         write_message("Checking records referred to in new records", sys.stderr)
         for u in updated_redic_list:
             if citedbydict.has_key(u):
                 these_cite_k = citedbydict[u]
                 if (these_cite_k is None):
                     these_cite_k = [] #verify it is an empty list, not None
                 authors = get_fieldvalues(u, mainauthortag)
                 coauthl = get_fieldvalues(u, coauthortag)
                 extauthl = get_fieldvalues(u, extauthortag)
                 authors.extend(coauthl)
                 authors.extend(extauthl)
                 for a in authors:
                     if a and author_cited_in.has_key(a):
                         #add all elements in these_cite_k
                         #that are not there already
                         for citer in these_cite_k:
                             tmplist = author_cited_in[a]
                             if (tmplist.count(citer) == 0):
                                 tmplist.append(citer)
                                 author_cited_in[a] = tmplist
                             else:
                                 author_cited_in[a] = these_cite_k
-    
+
         #go through the dictionary again: all keys but search only if new records are cited
         write_message("Checking authors in new records", sys.stderr)
         for k in citedbydict.keys():
             these_cite_k = citedbydict[k]
             if (these_cite_k is None):
                 these_cite_k = [] #verify it is an empty list, not None
             #do things only if these_cite_k contains any new stuff
             intersec_list = list(set(these_cite_k)&set(updated_redic_list))
             if intersec_list:
                 authors = get_fieldvalues(k, mainauthortag)
                 coauthl = get_fieldvalues(k, coauthortag)
                 extauthl = get_fieldvalues(k, extauthortag)
                 authors.extend(coauthl)
                 authors.extend(extauthl)
                 for a in authors:
                     if a and author_cited_in.has_key(a):
                         #add all elements in these_cite_k
                         #that are not there already
                         for citer in these_cite_k:
                             tmplist = author_cited_in[a]
                             if (tmplist.count(citer) == 0):
                                 tmplist.append(citer)
                                 author_cited_in[a] = tmplist
                             else:
                                 author_cited_in[a] = these_cite_k
 
     return author_cited_in
 
 
-def ref_analyzer(citation_informations, initialresult, initial_citationlist, 
+def ref_analyzer(citation_informations, initialresult, initial_citationlist,
                  initial_referencelist,config, updated_rec_list ):
     """Analyze the citation informations and calculate the citation weight
        and cited by list dictionary.
     """
     pubrefntag = record_pri_number_tag = config.get(config.get("rank_method", "function"),
                                                     "publication_reference_number_tag")
     pubreftag = record_pri_number_tag = config.get(config.get("rank_method", "function"),
                                                     "publication_reference_tag")
     #pubrefntag is prob 999C5r, pubreftag 999c5s
     citation_list = initial_citationlist
     reference_list = initial_referencelist
     result = initialresult
     d_reports_numbers = citation_informations[0]
     d_references_report_numbers = citation_informations[1]
-    d_references_s = citation_informations[2] 
+    d_references_s = citation_informations[2]
        #of type: {77: ['Nucl. Phys. B 72 (1974) 461','blah blah'], 93: ['..'], ..}
     d_records_s = citation_informations[3]
     t1 = os.times()[4]
     if task_get_option('verbose') >= 1:
         write_message("Phase 1: d_references_report_numbers", sys.stderr)
     #d_references_report_numbers: e.g 8 -> ([astro-ph/9889],[hep-ph/768])
     #meaning: rec 8 contains these in bibliography
 
     #debug: add ref lit to tmpcit
     #for k in reference_list.keys():
     #    li = reference_list[k]
     #	for l in li:
     #   write_citer_cited(k,l)
 
     for recid, refnumbers in d_references_report_numbers.iteritems():
         for refnumber in refnumbers:
             if refnumber:
                 p = refnumber
                 f = 'reportnumber'
                 #sanitise p
                 p.replace("\n",'')
                 #search for "hep-th/5644654 or such" in existing records
                 rec_id = get_recids_matching_query(p, f)
                 if rec_id and rec_id[0]:
                     write_citer_cited(recid, rec_id[0])
                     remove_from_missing(p)
                     if result.has_key(rec_id[0]):
                         result[rec_id[0]] += 1
                     # Citation list should have rec_id[0] but check anyway
                     if citation_list.has_key(rec_id[0]):
                         citation_list[rec_id[0]].append(recid)
                     else:
                         citation_list[rec_id[0]] = [recid]
                     if reference_list.has_key(recid):
                         reference_list[recid].append(rec_id[0])
                     else:
                         reference_list[recid] = [rec_id[0]]
                 else:
                     #the reference we wanted was not found among our records.
                     #put the reference in the "missing"
-                    insert_into_missing(recid, p) 
+                    insert_into_missing(recid, p)
     t2 = os.times()[4]
     if task_get_option('verbose') >= 1:
         write_message("Phase 2: d_references_s", sys.stderr)
     for recid, refss in d_references_s.iteritems():
         for refs in refss:
             if refs:
                 p = refs
                 f = 'publref'
                 rec_id = get_recids_matching_query(p, f)
                 if rec_id and not recid in citation_list[rec_id[0]]:
                     result[rec_id[0]] += 1
                     citation_list[rec_id[0]].append(recid)
                 if rec_id and not rec_id[0] in reference_list[recid]:
                     reference_list[recid].append(rec_id[0])
     t3 = os.times()[4]
     if task_get_option('verbose') >= 1:
         write_message("Phase 3: d_reports_numbers", sys.stderr)
 
     for rec_id, recnumbers in d_reports_numbers.iteritems():
         for recnumber in recnumbers:
             if recnumber:
                 p = recnumber
                 recid_list = get_recids_matching_query(p, pubrefntag)
                 if recid_list:
                     for recid in recid_list:
                         if not citation_list.has_key(rec_id):
                             citation_list[rec_id] = []
                         if not recid in citation_list[rec_id]:
                             result[rec_id] += 1
                             citation_list[rec_id].append(recid)
                         if not reference_list.has_key(recid):
                             reference_list[recid] = []
                         if not rec_id in reference_list[recid]:
                             reference_list[recid].append(rec_id)
     if task_get_option('verbose') >= 1:
         write_message("Phase 4: d_records_s", sys.stderr)
     t4 = os.times()[4]
     for recid, recs in d_records_s.iteritems():
         tmp = recs.find("-")
         if tmp < 0:
             recs_modified = recs
         else:
             recs_modified = recs[:tmp]
         p = recs_modified
         rec_ids = get_recids_matching_query(p, pubreftag)
         if rec_ids:
             for rec_id in rec_ids:
                 if not rec_id in citation_list[recid]:
                     result[recid] += 1
                     citation_list[recid].append(rec_id)
                 if not recid in reference_list[rec_id]:
                     reference_list[rec_id].append(recid)
 
     if task_get_option('verbose') >= 1:
         write_message("Phase 5: reverse lists", sys.stderr)
 
     #remove empty lists in citation and reference
     keys = citation_list.keys()
     for k in keys:
         if not citation_list[k]:
             del citation_list[k]
 
     keys = reference_list.keys()
     for k in keys:
         if not reference_list[k]:
             del reference_list[k]
 
     if task_get_option('verbose') >= 1:
         write_message("Phase 6: self-citations", sys.stderr)
     selfdic = {}
     #get the initial self citation dict
-    initial_self_dict = get_cit_dict("selfcitdict") 
-    #add new records to selfdic 
-    selfdic = get_self_citations(updated_rec_list, citation_list, 
+    initial_self_dict = get_cit_dict("selfcitdict")
+    #add new records to selfdic
+    selfdic = get_self_citations(updated_rec_list, citation_list,
                                  initial_self_dict, config)
     #selfdic consists of
     #key k -> list of values [v1,v2,..]
     #where k is a record with author A and k cites v1,v2.. and A appears in v1,v2..
 
     #create a reverse "x cited by y" self cit dict
     selfcitedbydic = {}
     for k in selfdic.keys():
         vlist = selfdic[k]
         for v in vlist:
             if selfcitedbydic.has_key(v):
                 tmplist = selfcitedbydic[v]
                 tmplist.append(k)
             else:
                 tmplist = [k]
             selfcitedbydic[v] = tmplist
 
     if task_get_option('verbose') >= 1:
         write_message("Getting author citations", sys.stderr)
 
 
     #get author citations for records in updated_rec_list
     initial_author_dict = get_initial_author_dict()
-    authorcitdic = get_author_citations(updated_rec_list, citation_list, 
-                                        initial_author_dict, config) 
+    authorcitdic = get_author_citations(updated_rec_list, citation_list,
+                                        initial_author_dict, config)
 
 
-    if task_get_option('verbose') >= 3:         
+    if task_get_option('verbose') >= 3:
         #print only X first to prevent flood
         tmpdict = {}
         tmp = citation_list.keys()[0:10]
         for t in tmp:
             tmpdict[t] = citation_list[t]
         write_message("citation_list (x is cited by y): "+str(tmpdict), sys.stderr)
         write_message("size: "+str(len(citation_list.keys())), sys.stderr)
         tmp = reference_list.keys()[0:10]
         tmpdict = {}
         for t in tmp:
             tmpdict[t] = reference_list[t]
-        write_message("reference_list (x cites y): "+str(tmpdict), sys.stderr)   
+        write_message("reference_list (x cites y): "+str(tmpdict), sys.stderr)
         write_message("size: "+str(len(reference_list.keys())), sys.stderr)
         tmp = selfcitedbydic.keys()[0:10]
         tmpdict = {}
         for t in tmp:
             tmpdict[t] = selfcitedbydic[t]
         write_message("selfcitedbydic (x is cited by y and one  \
-                       of the authors of x same as y's): "+str(tmpdict), sys.stderr)     
+                       of the authors of x same as y's): "+str(tmpdict), sys.stderr)
         write_message("size: "+str(len(selfcitedbydic.keys())), sys.stderr)
         tmp = selfdic.keys()[0:100]
         tmpdict = {}
         for t in tmp:
             tmpdict[t] = selfdic[t]
         write_message("selfdic (x cites y and one of the authors \
-                       of x same as y's): "+str(tmpdict), sys.stderr)  
+                       of x same as y's): "+str(tmpdict), sys.stderr)
         write_message("size: "+str(len(selfdic.keys())), sys.stderr)
         tmp = authorcitdic.keys()[0:10]
         tmpdict = {}
         for t in tmp:
             tmpdict[t] = authorcitdic[t]
-        write_message("authorcitdic (author is cited in recs): "+str(tmpdict), sys.stderr)       
+        write_message("authorcitdic (author is cited in recs): "+str(tmpdict), sys.stderr)
         write_message("size: "+str(len(authorcitdic.keys())), sys.stderr)
-    insert_cit_ref_list_intodb(citation_list, reference_list, 
+    insert_cit_ref_list_intodb(citation_list, reference_list,
                                selfcitedbydic, selfdic, authorcitdic)
 
     t5 = os.times()[4]
     print "\nExecution time for analyzing the citation information generating the dictionary: "
     print "checking ref number: ", (t2-t1)
     print "checking ref ypvt: ", (t3-t2)
     print "checking rec number: ", (t4-t3)
     print "checking rec ypvt: ", (t5-t4)
     print "total time of ref_analyze: ", (t5-t1)
     return result
 
 def get_decompressed_xml(xml):
     """return a decompressed content of xml into a xml content
     """
     decompressed_xml = create_records(decompress(xml))
     return decompressed_xml
 
 def insert_cit_ref_list_intodb(citation_dic, reference_dic, selfcbdic,
                                selfdic, authorcitdic):
     """Insert the reference and citation list into the database"""
     insert_into_cit_db(reference_dic,"reversedict")
     insert_into_cit_db(citation_dic,"citationdict")
     insert_into_cit_db(selfcbdic,"selfcitedbydict")
     insert_into_cit_db(selfdic,"selfcitdict")
 
     #update author-citations.. but make sure the table exists
     sql = """CREATE TABLE IF NOT EXISTS rnkAUTHORDATAR (aterm varchar(50) default NULL,
              hitlist longblob, UNIQUE KEY aterm (aterm))"""
     try:
         run_sql(sql)
     except:
         pass
 
     for a in authorcitdic.keys():
         lserarr = (serialize_via_marshal(authorcitdic[a]))
         #author name: replace " with something else
         a.replace('"', '\'')
         a = unicode(a, 'utf-8')
         try:
             ablob = run_sql("select hitlist from rnkAUTHORDATAR where aterm = %s", (a,))
             if not (ablob):
                 #print "insert into rnkAUTHORDATAR(aterm,hitlist) values (%s,%s)" , (a,lserarr)
-                run_sql("insert into rnkAUTHORDATAR(aterm,hitlist) values (%s,%s)", 
+                run_sql("insert into rnkAUTHORDATAR(aterm,hitlist) values (%s,%s)",
                          (a,lserarr))
             else:
                 #print "UPDATE rnkAUTHORDATAR SET hitlist  = %s where aterm=%s""" , (lserarr,a)
-                run_sql("UPDATE rnkAUTHORDATAR SET hitlist  = %s where aterm=%s", 
+                run_sql("UPDATE rnkAUTHORDATAR SET hitlist  = %s where aterm=%s",
                         (lserarr,a))
         except:
             print "Critical error: could not write rnkAUTHORDATAR "
             print "into db. aterm="+a+" hitlist="+str(lserarr)+"\n"
             traceback.print_tb(sys.exc_info()[2])
 
 def insert_into_cit_db(dic, name):
     """an aux thing to avoid repeating code"""
     ndate = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())
     try:
         s = serialize_via_marshal(dic)
         print "size of "+name+" "+str(len(s))
-        run_sql("UPDATE rnkCITATIONDATA SET object_value = %s where object_name = %s", 
+        run_sql("UPDATE rnkCITATIONDATA SET object_value = %s where object_name = %s",
                 (s, name))
-        run_sql("UPDATE rnkCITATIONDATA SET last_updated = %s where object_name = %s", 
+        run_sql("UPDATE rnkCITATIONDATA SET last_updated = %s where object_name = %s",
                  (ndate,name))
     except:
         print "Critical error: could not write "+name+" into db"
-        traceback.print_tb(sys.exc_info()[2])       
+        traceback.print_tb(sys.exc_info()[2])
 
 
 def get_cit_dict(name):
     """get a named citation dict from the db"""
     cdict = {}
     try:
         cdict = run_sql("select object_value from rnkCITATIONDATA where object_name = %s",
                        (name,))
         if cdict and cdict[0] and cdict[0][0]:
             dict_from_db = marshal.loads(decompress(cdict[0][0]))
             return dict_from_db
         else:
             return {}
     except:
         print "Critical error: could not read "+name+" from db"
-        traceback.print_tb(sys.exc_info()[2])       
+        traceback.print_tb(sys.exc_info()[2])
     return dict
 
 def get_initial_author_dict():
     """read author->citedinlist dict from the db"""
     dict = {}
     try:
-        ah = run_sql("select aterm,hitlist from rnkAUTHORDATAR") 
+        ah = run_sql("select aterm,hitlist from rnkAUTHORDATAR")
         for (a, h) in ah:
             dict[a] = deserialize_via_marshal(h)
         return dict
     except:
         print "Critical error: could not read rnkAUTHORDATAR"
         traceback.print_tb(sys.exc_info()[2])
         dict = {}
         return dict
 
 
 def insert_into_missing(recid, report):
-    """put the referingrecordnum-publicationstring into 
+    """put the referingrecordnum-publicationstring into
        the "we are missing these" table"""
     report.replace('"','\'')
     try:
         srecid = str(recid)
         wasalready = run_sql("select id_bibrec from rnkCITATIONDATAEXT where id_bibrec = %s and extcitepubinfo = %s",
                               (srecid,report))
         if not wasalready:
             run_sql("insert into rnkCITATIONDATAEXT(id_bibrec, extcitepubinfo) values (%s,%s)",
                    (srecid, report))
     except:
         #we should complain but it can result to million lines of warnings so just pass..
         pass
-        
+
 def remove_from_missing(report):
     """remove the recid-ref -pairs from the "missing" table for report x: prob
        in the case ref got in our library collection"""
     report.replace('"','\'')
     try:
         run_sql("delete from rnkCITATIONDATAEXT where extcitepubinfo= %s", (report,))
     except:
         #we should complain but it can result to million lines of warnings so just pass..
         pass
-                                                          
+
 
 def create_analysis_tables():
     """temporary simple table + index"""
     sql1 = "CREATE TABLE IF NOT EXISTS tmpcit (citer mediumint(10), cited mediumint(10)) TYPE=MyISAM"
     sql2 = "CREATE UNIQUE INDEX citercited on tmpcit(citer, cited)"
     sql3 = "CREATE INDEX citer on tmpcit(citer)"
     sql4 = "CREATE INDEX cited on tmpcit(cited)"
     try:
         run_sql(sql1)
         run_sql(sql2)
         run_sql(sql3)
         run_sql(sql4)
     except:
         pass
 
 def write_citer_cited(citer, cited):
     """write an entry to tmp table"""
     sciter = str(citer)
     scited = str(cited)
     try:
         run_sql("insert into tmpcit(citer, cited) values (%s,%s)", (sciter,scited))
     except:
         pass
 
 def print_missing(num):
     """Print the contents of rnkCITATIONDATAEXT for records that are needed more than num times"""
     if not num:
         num = 50
     try:
         res = run_sql("select count(id_bibrec), extcitepubinfo from rnkCITATIONDATAEXT \
                        group by id_bibrec having count(id_bibrec) >= %s \
                        order by count(id_bibrec)",(num,))
         for (cnt, brec) in res:
             print str(cnt)+"\t"+brec
     except:
         pass
 
 def tagify(parsedtag):
     """aux auf to make '100__a' out of ['100','','','a']"""
     tag = ""
     for t in parsedtag:
         if t == '':
             t = '_'
         tag = tag+t
-    return tag
\ No newline at end of file
+    return tag
diff --git a/modules/bibrank/lib/bibrank_tag_based_indexer.py b/modules/bibrank/lib/bibrank_tag_based_indexer.py
index 0190de1c8..a39ed9153 100644
--- a/modules/bibrank/lib/bibrank_tag_based_indexer.py
+++ b/modules/bibrank/lib/bibrank_tag_based_indexer.py
@@ -1,443 +1,443 @@
 # -*- coding: utf-8 -*-
 
 ## $Id$
 ## Ranking of records using different parameters and methods.
 
 ## This file is part of CDS Invenio.
 ## Copyright (C) 2002, 2003, 2004, 2005, 2006, 2007, 2008 CERN.
 ##
 ## CDS Invenio is free software; you can redistribute it and/or
 ## modify it under the terms of the GNU General Public License as
 ## published by the Free Software Foundation; either version 2 of the
 ## License, or (at your option) any later version.
 ##
 ## CDS Invenio is distributed in the hope that it will be useful, but
 ## WITHOUT ANY WARRANTY; without even the implied warranty of
 ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 ## General Public License for more details.
 ##
 ## You should have received a copy of the GNU General Public License
 ## along with CDS Invenio; if not, write to the Free Software Foundation, Inc.,
 ## 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA.
 
 __revision__ = "$Id$"
 
 import sys
 import time
 import marshal
 import traceback
 import ConfigParser
 
 from invenio.config import \
      CFG_SITE_LANG, \
      CFG_ETCDIR
 from invenio.search_engine import perform_request_search, HitSet
 from invenio.bibrank_citation_indexer import get_citation_weight, print_missing
 from invenio.bibrank_downloads_indexer import *
 from invenio.dbquery import run_sql, serialize_via_marshal, deserialize_via_marshal
 from invenio.bibtask import task_get_option, write_message
 
 
 options = {}
 
 def citation_exec(rank_method_code, name, config):
     """Rank method for citation analysis"""
     #first check if this is a specific task
     if task_get_option("cmd") == "print-missing":
         num = task_get_option("num")
         print_missing(num)
     dict = get_citation_weight(rank_method_code, config)
     date = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())
     if dict: intoDB(dict, date, rank_method_code)
     else: write_message("no need to update the indexes for citations")
 
 def download_weight_filtering_user(run):
     return bibrank_engine(run)
 
 def download_weight_total(run):
     return bibrank_engine(run)
 
 def file_similarity_by_times_downloaded(run):
     return bibrank_engine(run)
 
 def download_weight_filtering_user_exec (rank_method_code, name, config):
     """Ranking by number of downloads per User.
-    Only one full Text Download is taken in account for one 
+    Only one full Text Download is taken in account for one
     specific userIP address"""
     time1 = time.time()
     dic = fromDB(rank_method_code)
     last_updated = get_lastupdated(rank_method_code)
     keys = new_downloads_to_index(last_updated)
     filter_downloads_per_hour(keys, last_updated)
     dic = get_download_weight_filtering_user(dic, keys)
     date = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())
     intoDB(dic, date, rank_method_code)
     time2 = time.time()
     return {"time":time2-time1}
 
 def download_weight_total_exec(rank_method_code, name, config):
     """rankink by total number of downloads without check the user ip
     if users downloads 3 time the same full text document it has to be count as 3 downloads"""
     time1 = time.time()
     dic = fromDB(rank_method_code)
     last_updated = get_lastupdated(rank_method_code)
     keys = new_downloads_to_index(last_updated)
     filter_downloads_per_hour(keys, last_updated)
     dic = get_download_weight_total(dic, keys)
     date = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())
     intoDB(dic, date, rank_method_code)
     time2 = time.time()
     return {"time":time2-time1}
 
 def file_similarity_by_times_downloaded_exec(rank_method_code, name, config):
     """update dictionnary {recid:[(recid, nb page similarity), ()..]}"""
     time1 = time.time()
     dic = fromDB(rank_method_code)
     last_updated = get_lastupdated(rank_method_code)
     keys = new_downloads_to_index(last_updated)
     filter_downloads_per_hour(keys, last_updated)
     dic = get_file_similarity_by_times_downloaded(dic, keys)
     date = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())
     intoDB(dic, date, rank_method_code)
     time2 = time.time()
     return {"time":time2-time1}
 
 def single_tag_rank_method_exec(rank_method_code, name, config):
     """Creating the rank method data"""
     startCreate = time.time()
     rnkset = {}
     rnkset_old = fromDB(rank_method_code)
     date = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())
     rnkset_new = single_tag_rank(config)
     rnkset = union_dicts(rnkset_old, rnkset_new)
     intoDB(rnkset, date, rank_method_code)
 
 def single_tag_rank(config):
     """Connect the given tag with the data from the kb file given"""
     write_message("Loading knowledgebase file", verbose=9)
     kb_data = {}
     records = []
 
     write_message("Reading knowledgebase file: %s" % \
                    config.get(config.get("rank_method", "function"), "kb_src"))
     input = open(config.get(config.get("rank_method", "function"), "kb_src"), 'r')
     data = input.readlines()
     for line in data:
         if not line[0:1] == "#":
             kb_data[string.strip((string.split(string.strip(line), "---"))[0])] = (string.split(string.strip(line), "---"))[1]
     write_message("Number of lines read from knowledgebase file: %s" % len(kb_data))
 
     tag = config.get(config.get("rank_method", "function"), "tag")
     tags = config.get(config.get("rank_method", "function"), "check_mandatory_tags").split(", ")
     if tags == ['']:
         tags = ""
 
     records = []
     for (recids, recide) in options["recid_range"]:
         write_message("......Processing records #%s-%s" % (recids, recide))
         recs = run_sql("SELECT id_bibrec, value FROM bib%sx, bibrec_bib%sx WHERE tag=%%s AND id_bibxxx=id and id_bibrec >=%%s and id_bibrec<=%%s" % (tag[0:2], tag[0:2]), (tag, recids, recide))
         valid = HitSet(trailing_bits=1)
         valid.discard(0)
         for key in tags:
             newset = HitSet()
             newset += [recid[0] for recid in (run_sql("SELECT id_bibrec FROM bib%sx, bibrec_bib%sx WHERE id_bibxxx=id AND tag=%%s AND id_bibxxx=id and id_bibrec >=%%s and id_bibrec<=%%s" % (tag[0:2], tag[0:2]), (key, recids, recide)))]
             valid.intersection_update(newset)
         if tags:
             recs = filter(lambda x: x[0] in valid, recs)
         records = records + list(recs)
         write_message("Number of records found with the necessary tags: %s" % len(records))
 
     records = filter(lambda x: x[0] in options["validset"], records)
     rnkset = {}
     for key, value in records:
         if kb_data.has_key(value):
             if not rnkset.has_key(key):
                 rnkset[key] = float(kb_data[value])
             else:
                 if kb_data.has_key(rnkset[key]) and float(kb_data[value]) > float((rnkset[key])[1]):
                     rnkset[key] = float(kb_data[value])
         else:
             rnkset[key] = 0
 
     write_message("Number of records available in rank method: %s" % len(rnkset))
     return rnkset
 
 def get_lastupdated(rank_method_code):
     """Get the last time the rank method was updated"""
     res = run_sql("SELECT rnkMETHOD.last_updated FROM rnkMETHOD WHERE name=%s", (rank_method_code, ))
     if res:
         return res[0][0]
     else:
         raise Exception("Is this the first run? Please do a complete update.")
 
 def intoDB(dict, date, rank_method_code):
     """Insert the rank method data into the database"""
     mid = run_sql("SELECT id from rnkMETHOD where name=%s", (rank_method_code, ))
     del_rank_method_codeDATA(rank_method_code)
     serdata = serialize_via_marshal(dict);
     midstr = str(mid[0][0]);
     run_sql("INSERT INTO rnkMETHODDATA(id_rnkMETHOD, relevance_data) VALUES (%s,%s)", (midstr, serdata,))
     run_sql("UPDATE rnkMETHOD SET last_updated=%s WHERE name=%s", (date, rank_method_code))
 
 def fromDB(rank_method_code):
     """Get the data for a rank method"""
     id = run_sql("SELECT id from rnkMETHOD where name=%s", (rank_method_code, ))
     res = run_sql("SELECT relevance_data FROM rnkMETHODDATA WHERE id_rnkMETHOD=%s", (id[0][0], ))
     if res:
         return deserialize_via_marshal(res[0][0])
     else:
         return {}
 
 def del_rank_method_codeDATA(rank_method_code):
     """Delete the data for a rank method"""
     id = run_sql("SELECT id from rnkMETHOD where name=%s", (rank_method_code, ))
     res = run_sql("DELETE FROM rnkMETHODDATA WHERE id_rnkMETHOD=%s", (id[0][0], ))
 
 def del_recids(rank_method_code, range_rec):
     """Delete some records from the rank method"""
     id = run_sql("SELECT id from rnkMETHOD where name=%s", (rank_method_code, ))
     res = run_sql("SELECT relevance_data FROM rnkMETHODDATA WHERE id_rnkMETHOD=%s", (id[0][0] ))
     if res:
         rec_dict = deserialize_via_marshal(res[0][0])
         write_message("Old size: %s" % len(rec_dict))
         for (recids, recide) in range_rec:
             for i in range(int(recids), int(recide)):
                 if rec_dict.has_key(i):
                     del rec_dict[i]
         write_message("New size: %s" % len(rec_dict))
         date = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())
         intoDB(rec_dict, date, rank_method_code)
     else:
         write_message("Create before deleting!")
 
 def union_dicts(dict1, dict2):
     "Returns union of the two dicts."
     union_dict = {}
     for (key, value) in dict1.iteritems():
         union_dict[key] = value
     for (key, value) in dict2.iteritems():
         union_dict[key] = value
     return union_dict
 
 def rank_method_code_statistics(rank_method_code):
     """Print statistics"""
 
     method = fromDB(rank_method_code)
     max = ('', -999999)
     maxcount = 0
     min = ('', 999999)
     mincount = 0
 
     for (recID, value) in method.iteritems():
         if value < min and value > 0:
             min = value
         if value > max:
             max = value
 
     for (recID, value) in method.iteritems():
         if value == min:
             mincount += 1
         if value == max:
             maxcount += 1
 
     write_message("Showing statistic for selected method")
     write_message("Method name: %s" % getName(rank_method_code))
     write_message("Short name: %s" % rank_method_code)
     write_message("Last run: %s" % get_lastupdated(rank_method_code))
     write_message("Number of records: %s" % len(method))
     write_message("Lowest value: %s - Number of records: %s" % (min, mincount))
     write_message("Highest value: %s - Number of records: %s" % (max, maxcount))
     write_message("Divided into 10 sets:")
     for i in range(1, 11):
         setcount = 0
         distinct_values = {}
         lower = -1.0 + ((float(max + 1) / 10)) * (i - 1)
         upper = -1.0 + ((float(max + 1) / 10)) * i
         for (recID, value) in method.iteritems():
             if value >= lower and value <= upper:
                 setcount += 1
                 distinct_values[value] = 1
         write_message("Set %s (%s-%s) %s Distinct values: %s" % (i, lower, upper, len(distinct_values), setcount))
 
 def check_method(rank_method_code):
     write_message("Checking rank method...")
     if len(fromDB(rank_method_code)) == 0:
         write_message("Rank method not yet executed, please run it to create the necessary data.")
     else:
         if len(add_recIDs_by_date(rank_method_code)) > 0:
             write_message("Records modified, update recommended")
         else:
             write_message("No records modified, update not necessary")
 
 def bibrank_engine(run):
     """Run the indexing task.
     Return 1 in case of success and 0 in case of failure.
     """
 
     try:
         import psyco
         psyco.bind(single_tag_rank)
         psyco.bind(single_tag_rank_method_exec)
         psyco.bind(serialize_via_marshal)
         psyco.bind(deserialize_via_marshal)
     except StandardError, e:
         pass
 
     startCreate = time.time()
     sets = {}
     try:
         options["run"] = []
         options["run"].append(run)
         for rank_method_code in options["run"]:
             cfg_name = getName(rank_method_code)
             write_message("Running rank method: %s." % cfg_name)
 
             file = CFG_ETCDIR + "/bibrank/" + rank_method_code + ".cfg"
             config = ConfigParser.ConfigParser()
             try:
                 config.readfp(open(file))
             except StandardError, e:
                 write_message("Cannot find configurationfile: %s" % file, sys.stderr)
                 raise StandardError
 
             cfg_short = rank_method_code
             cfg_function = config.get("rank_method", "function") + "_exec"
             cfg_name = getName(cfg_short)
             options["validset"] = get_valid_range(rank_method_code)
 
             if task_get_option("collection"):
                 l_of_colls = string.split(task_get_option("collection"), ", ")
                 recIDs = perform_request_search(c=l_of_colls)
                 recIDs_range = []
                 for recID in recIDs:
                     recIDs_range.append([recID, recID])
                 options["recid_range"] = recIDs_range
             elif task_get_option("id"):
                 options["recid_range"] = task_get_option("id")
             elif task_get_option("modified"):
                 options["recid_range"] = add_recIDs_by_date(rank_method_code, task_get_option("modified"))
             elif task_get_option("last_updated"):
                 options["recid_range"] = add_recIDs_by_date(rank_method_code)
             else:
                 write_message("No records specified, updating all", verbose=2)
                 min_id = run_sql("SELECT min(id) from bibrec")[0][0]
                 max_id = run_sql("SELECT max(id) from bibrec")[0][0]
                 options["recid_range"] = [[min_id, max_id]]
 
             if task_get_option("quick") == "no":
                 write_message("Recalculate parameter not used, parameter ignored.", verbose=9)
 
             if task_get_option("cmd") == "del":
                 del_recids(cfg_short, options["recid_range"])
             elif task_get_option("cmd") == "add":
                 func_object = globals().get(cfg_function)
                 func_object(rank_method_code, cfg_name, config)
             elif task_get_option("cmd") == "stat":
                 rank_method_code_statistics(rank_method_code)
             elif task_get_option("cmd") == "check":
                 check_method(rank_method_code)
             elif task_get_option("cmd") == "print-missing":
                 func_object = globals().get(cfg_function)
-                func_object(rank_method_code, cfg_name, config)                
+                func_object(rank_method_code, cfg_name, config)
             elif task_get_option("cmd") == "repair":
                 pass
             else:
                 write_message("Invalid command found processing %s" % rank_method_code, sys.stderr)
                 raise StandardError
     except StandardError, e:
         write_message("\nException caught: %s" % e, sys.stderr)
         if task_get_option("verbose") >= 9:
             traceback.print_tb(sys.exc_info()[2])
         raise StandardError
 
     if task_get_option("verbose"):
         showtime((time.time() - startCreate))
     return 1
 
 def get_valid_range(rank_method_code):
     """Return a range of records"""
     write_message("Getting records from collections enabled for rank method.", verbose=9)
 
     res = run_sql("SELECT collection.name FROM collection, collection_rnkMETHOD, rnkMETHOD WHERE collection.id=id_collection and id_rnkMETHOD=rnkMETHOD.id and rnkMETHOD.name=%s",  (rank_method_code, ))
     l_of_colls = []
     for coll in res:
         l_of_colls.append(coll[0])
     if len(l_of_colls) > 0:
         recIDs = perform_request_search(c=l_of_colls)
     else:
         recIDs = []
     valid = HitSet()
     valid += recIDs
     return valid
 
 def add_recIDs_by_date(rank_method_code, dates=""):
     """Return recID range from records modified between DATES[0] and DATES[1].
        If DATES is not set, then add records modified since the last run of
        the ranking method RANK_METHOD_CODE.
     """
     if not dates:
         try:
             dates = (get_lastupdated(rank_method_code), '')
         except Exception, e:
             dates = ("0000-00-00 00:00:00", '')
     if dates[0] is None:
         dates = ("0000-00-00 00:00:00", '')
     query = """SELECT b.id FROM bibrec AS b WHERE b.modification_date >= %s"""
     if dates[1]:
         query += " and b.modification_date <= %s"
     query += " ORDER BY b.id ASC"""
     if dates[1]:
         res = run_sql(query, (dates[0], dates[1]))
     else:
         res = run_sql(query, (dates[0], ))
     list = create_range_list(res)
     if not list:
         write_message("No new records added since last time method was run")
     return list
 
 def getName(rank_method_code, ln=CFG_SITE_LANG, type='ln'):
     """Returns the name of the method if it exists"""
 
     try:
         rnkid = run_sql("SELECT id FROM rnkMETHOD where name=%s", (rank_method_code, ))
         if rnkid:
             rnkid = str(rnkid[0][0])
             res = run_sql("SELECT value FROM rnkMETHODNAME where type=%s and ln=%s and id_rnkMETHOD=%s", (type, ln, rnkid))
             if not res:
                 res = run_sql("SELECT value FROM rnkMETHODNAME WHERE ln=%s and id_rnkMETHOD=%s and type=%s", (CFG_SITE_LANG, rnkid, type))
             if not res:
                 return rank_method_code
             return res[0][0]
         else:
             raise Exception
     except Exception, e:
         write_message("Cannot run rank method, either given code for method is wrong, or it has not been added using the webinterface.")
         raise Exception
 
 def create_range_list(res):
     """Creates a range list from a recID select query result contained
     in res. The result is expected to have ascending numerical order."""
     if not res:
         return []
     row = res[0]
     if not row:
         return []
     else:
         range_list = [[row[0], row[0]]]
     for row in res[1:]:
         id = row[0]
         if id == range_list[-1][1] + 1:
             range_list[-1][1] = id
         else:
             range_list.append([id, id])
     return range_list
 
 def single_tag_rank_method(run):
     return bibrank_engine(run)
 
 def showtime(timeused):
     """Show time used for method"""
     write_message("Time used: %d second(s)." % timeused, verbose=9)
 
 def citation(run):
     return bibrank_engine(run)
diff --git a/modules/websearch/lib/search_engine.py b/modules/websearch/lib/search_engine.py
index b932325c5..e6ca42ada 100644
--- a/modules/websearch/lib/search_engine.py
+++ b/modules/websearch/lib/search_engine.py
@@ -1,4101 +1,4101 @@
 # -*- coding: utf-8 -*-
 ## $Id$
 
 ## This file is part of CDS Invenio.
 ## Copyright (C) 2002, 2003, 2004, 2005, 2006, 2007, 2008 CERN.
 ##
 ## CDS Invenio is free software; you can redistribute it and/or
 ## modify it under the terms of the GNU General Public License as
 ## published by the Free Software Foundation; either version 2 of the
 ## License, or (at your option) any later version.
 ##
 ## CDS Invenio is distributed in the hope that it will be useful, but
 ## WITHOUT ANY WARRANTY; without even the implied warranty of
 ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 ## General Public License for more details.
 ##
 ## You should have received a copy of the GNU General Public License
 ## along with CDS Invenio; if not, write to the Free Software Foundation, Inc.,
 ## 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA.
 
 # pylint: disable-msg=C0301
 
 """CDS Invenio Search Engine in mod_python."""
 
 __lastupdated__ = """$Date$"""
 
 __revision__ = "$Id$"
 
 ## import general modules:
 import cgi
 import copy
 import string
 import os
 import re
 import time
 import urllib
 import zlib
 
 ## import CDS Invenio stuff:
 from invenio.config import \
      CFG_CERN_SITE, \
      CFG_OAI_ID_FIELD, \
      CFG_WEBCOMMENT_ALLOW_REVIEWS, \
      CFG_WEBSEARCH_CALL_BIBFORMAT, \
      CFG_WEBSEARCH_CREATE_SIMILARLY_NAMED_AUTHORS_LINK_BOX, \
      CFG_WEBSEARCH_FIELDS_CONVERT, \
      CFG_WEBSEARCH_NB_RECORDS_TO_SORT, \
      CFG_WEBSEARCH_SEARCH_CACHE_SIZE, \
      CFG_WEBSEARCH_USE_JSMATH_FOR_FORMATS, \
      CFG_BIBRANK_SHOW_DOWNLOAD_GRAPHS, \
      CFG_SITE_LANG, \
      CFG_SITE_NAME, \
      CFG_LOGDIR, \
      CFG_SITE_URL
 from invenio.search_engine_config import CFG_EXPERIMENTAL_FEATURES, InvenioWebSearchUnknownCollectionError
 from invenio.bibrecord import create_records, record_get_field_value, record_get_field_values
 from invenio.bibrank_record_sorter import get_bibrank_methods, rank_records
 from invenio.bibrank_downloads_similarity import register_page_view_event, calculate_reading_similarity_list
 from invenio.bibindex_engine_stemmer import stem
 from invenio.bibformat import format_record, format_records, get_output_format_content_type, create_excel
 from invenio.bibformat_config import CFG_BIBFORMAT_USE_OLD_BIBFORMAT
 from invenio.bibrank_downloads_grapher import create_download_history_graph_and_box
 from invenio.data_cacher import DataCacher
 from invenio.websearch_external_collections import print_external_results_overview, perform_external_collection_search
 from invenio.access_control_admin import acc_get_action_id
 from invenio.access_control_config import VIEWRESTRCOLL, \
     CFG_ACC_GRANT_AUTHOR_RIGHTS_TO_EMAILS_IN_TAGS
 from invenio.websearchadminlib import get_detailed_page_tabs
 from invenio.intbitset import intbitset as HitSet
 from invenio.webinterface_handler import wash_urlargd
 from invenio.urlutils import make_canonical_urlargd
 from invenio.dbquery import DatabaseError
 from invenio.access_control_engine import acc_authorize_action
 
 import invenio.template
 webstyle_templates = invenio.template.load('webstyle')
 webcomment_templates = invenio.template.load('webcomment')
 
 from invenio.bibrank_citation_searcher import calculate_cited_by_list, \
 calculate_co_cited_with_list, get_self_cited_in, get_self_cited_by, get_records_with_num_cites
 from invenio.bibrank_citation_grapher import create_citation_history_graph_and_box
 
 from invenio.dbquery import run_sql, run_sql_cached, get_table_update_time, Error
 from invenio.webuser import getUid, collect_user_info
 from invenio.webpage import page, pageheaderonly, pagefooteronly, create_error_box
 from invenio.messages import gettext_set_language
 
 try:
     from mod_python import apache
 except ImportError, e:
     pass # ignore user personalisation, needed e.g. for command-line
 
 try:
     import invenio.template
     websearch_templates = invenio.template.load('websearch')
 except:
     pass
 
 ## global vars:
 search_cache = {} # will cache results of previous searches
 cfg_nb_browse_seen_records = 100 # limit of the number of records to check when browsing certain collection
 cfg_nicely_ordered_collection_list = 0 # do we propose collection list nicely ordered or alphabetical?
 collection_reclist_cache_timestamp = 0
 field_i18nname_cache_timestamp = 0
 collection_i18nname_cache_timestamp = 0
 
 ## precompile some often-used regexp for speed reasons:
 re_word = re.compile('[\s]')
 re_quotes = re.compile('[\'\"]')
 re_doublequote = re.compile('\"')
 re_equal = re.compile('\=')
 re_logical_and = re.compile('\sand\s', re.I)
 re_logical_or = re.compile('\sor\s', re.I)
 re_logical_not = re.compile('\snot\s', re.I)
 re_operators = re.compile(r'\s([\+\-\|])\s')
 re_pattern_wildcards_at_beginning = re.compile(r'(\s)[\*\%]+')
 re_pattern_single_quotes = re.compile("'(.*?)'")
 re_pattern_double_quotes = re.compile("\"(.*?)\"")
 re_pattern_regexp_quotes = re.compile("\/(.*?)\/")
 re_pattern_short_words = re.compile(r'([\s\"]\w{1,3})[\*\%]+')
 re_pattern_space = re.compile("__SPACE__")
 re_pattern_today = re.compile("\$TODAY\$")
 re_unicode_lowercase_a = re.compile(unicode(r"(?u)[áàäâãå]", "utf-8"))
 re_unicode_lowercase_ae = re.compile(unicode(r"(?u)[æ]", "utf-8"))
 re_unicode_lowercase_e = re.compile(unicode(r"(?u)[éèëê]", "utf-8"))
 re_unicode_lowercase_i = re.compile(unicode(r"(?u)[íìïî]", "utf-8"))
 re_unicode_lowercase_o = re.compile(unicode(r"(?u)[óòöôõø]", "utf-8"))
 re_unicode_lowercase_u = re.compile(unicode(r"(?u)[úùüû]", "utf-8"))
 re_unicode_lowercase_y = re.compile(unicode(r"(?u)[ýÿ]", "utf-8"))
 re_unicode_lowercase_c = re.compile(unicode(r"(?u)[çć]", "utf-8"))
 re_unicode_lowercase_n = re.compile(unicode(r"(?u)[ñ]", "utf-8"))
 re_unicode_uppercase_a = re.compile(unicode(r"(?u)[ÁÀÄÂÃÅ]", "utf-8"))
 re_unicode_uppercase_ae = re.compile(unicode(r"(?u)[Æ]", "utf-8"))
 re_unicode_uppercase_e = re.compile(unicode(r"(?u)[ÉÈËÊ]", "utf-8"))
 re_unicode_uppercase_i = re.compile(unicode(r"(?u)[ÍÌÏÎ]", "utf-8"))
 re_unicode_uppercase_o = re.compile(unicode(r"(?u)[ÓÒÖÔÕØ]", "utf-8"))
 re_unicode_uppercase_u = re.compile(unicode(r"(?u)[ÚÙÜÛ]", "utf-8"))
 re_unicode_uppercase_y = re.compile(unicode(r"(?u)[Ý]", "utf-8"))
 re_unicode_uppercase_c = re.compile(unicode(r"(?u)[ÇĆ]", "utf-8"))
 re_unicode_uppercase_n = re.compile(unicode(r"(?u)[Ñ]", "utf-8"))
 re_latex_lowercase_a = re.compile("\\\\[\"H'`~^vu=k]\{?a\}?")
 re_latex_lowercase_ae = re.compile("\\\\ae\\{\\}?")
 re_latex_lowercase_e = re.compile("\\\\[\"H'`~^vu=k]\\{?e\\}?")
 re_latex_lowercase_i = re.compile("\\\\[\"H'`~^vu=k]\\{?i\\}?")
 re_latex_lowercase_o = re.compile("\\\\[\"H'`~^vu=k]\\{?o\\}?")
 re_latex_lowercase_u = re.compile("\\\\[\"H'`~^vu=k]\\{?u\\}?")
 re_latex_lowercase_y = re.compile("\\\\[\"']\\{?y\\}?")
 re_latex_lowercase_c = re.compile("\\\\['uc]\\{?c\\}?")
 re_latex_lowercase_n = re.compile("\\\\[c'~^vu]\\{?n\\}?")
 re_latex_uppercase_a = re.compile("\\\\[\"H'`~^vu=k]\\{?A\\}?")
 re_latex_uppercase_ae = re.compile("\\\\AE\\{?\\}?")
 re_latex_uppercase_e = re.compile("\\\\[\"H'`~^vu=k]\\{?E\\}?")
 re_latex_uppercase_i = re.compile("\\\\[\"H'`~^vu=k]\\{?I\\}?")
 re_latex_uppercase_o = re.compile("\\\\[\"H'`~^vu=k]\\{?O\\}?")
 re_latex_uppercase_u = re.compile("\\\\[\"H'`~^vu=k]\\{?U\\}?")
 re_latex_uppercase_y = re.compile("\\\\[\"']\\{?Y\\}?")
 re_latex_uppercase_c = re.compile("\\\\['uc]\\{?C\\}?")
 re_latex_uppercase_n = re.compile("\\\\[c'~^vu]\\{?N\\}?")
 
 
 
 class RestrictedCollectionDataCacher(DataCacher):
     def __init__(self):
         def cache_filler():
             ret = []
             try:
                 viewcollid = acc_get_action_id(VIEWRESTRCOLL)
                 res = run_sql("""SELECT DISTINCT ar.value
                     FROM accROLE_accACTION_accARGUMENT raa JOIN accARGUMENT ar ON raa.id_accARGUMENT = ar.id
                     WHERE ar.keyword = 'collection' AND raa.id_accACTION = %s""", (viewcollid,))
             except Exception:
                 # database problems, return empty cache
                 return []
             for coll in res:
                 ret.append(coll[0])
             return ret
 
         def timestamp_getter():
             return max(get_table_update_time('accROLE_accACTION_accARGUMENT'), get_table_update_time('accARGUMENT'))
 
         DataCacher.__init__(self, cache_filler, timestamp_getter)
 
 def collection_restricted_p(collection):
     cache = restricted_collection_cache.get_cache()
     return collection in cache
 
 try:
     restricted_collection_cache.is_ok_p
 except Exception:
     restricted_collection_cache = RestrictedCollectionDataCacher()
 
 
 def check_user_can_view_record(user_info, recid):
     """Check if the user is authorized to view the given recid. The function
     grants access in two cases: either user has author rights on ths record,
     or he has view rights to the primary collection this record belongs to.
     Returns the same type as acc_authorize_action
     """
 
     def _is_user_in_authorized_author_list_for_recid(user_info, recid):
         """Return True if the user have submitted the given record."""
         authorized_emails = []
         for tag in CFG_ACC_GRANT_AUTHOR_RIGHTS_TO_EMAILS_IN_TAGS:
             authorized_emails.extend(get_fieldvalues(recid, tag))
         for email in authorized_emails:
             email = email.strip().lower()
             if user_info['email'].strip().lower() == email:
                 return True
         return False
 
     record_primary_collection = guess_primary_collection_of_a_record(recid)
     if collection_restricted_p(record_primary_collection):
         (auth_code, auth_msg) = acc_authorize_action(user_info, VIEWRESTRCOLL, collection=record_primary_collection)
         if auth_code == 0 or _is_user_in_authorized_author_list_for_recid(user_info, recid):
             return (0, '')
         else:
             return (auth_code, auth_msg)
     else:
         return (0, '')
 
 class IndexStemmingDataCacher(DataCacher):
     def __init__(self):
         def cache_filler():
             try:
                 res = run_sql("""SELECT id, stemming_language FROM idxINDEX""")
             except DatabaseError:
                 # database problems, return empty cache
                 return {}
             return dict(res)
 
         def timestamp_getter():
             return get_table_update_time('idxINDEX')
 
         DataCacher.__init__(self, cache_filler, timestamp_getter)
 
 def get_index_stemming_language(index_id):
     cache = index_stemming_cache.get_cache()
     return cache[index_id]
 
 try:
     index_stemming_cache.is_ok_p
 except Exception:
     index_stemming_cache = IndexStemmingDataCacher()
 
 class FieldI18nNameDataCacher(DataCacher):
     def __init__(self):
         def cache_filler():
             ret = {}
             try:
                 res = run_sql("SELECT f.name,fn.ln,fn.value FROM fieldname AS fn, field AS f WHERE fn.id_field=f.id AND fn.type='ln'") # ln=long name
             except Exception:
                 # database problems, return empty cache
                 return {}
             for f, ln, i18nname in res:
                 if i18nname:
                     if not ret.has_key(f):
                         ret[f] = {}
                     ret[f][ln] = i18nname
             return ret
 
         def timestamp_getter():
             return get_table_update_time('fieldname')
 
         DataCacher.__init__(self, cache_filler, timestamp_getter)
 
     def get_field_i18nname(self, f, ln=CFG_SITE_LANG):
         out = f
         try:
             out = self.get_cache()[f][ln]
         except KeyError:
             pass # translation in LN does not exist
         return out
 
 try:
     if not field_i18n_name_cache.is_ok_p:
         raise Exception
 except Exception:
     field_i18n_name_cache = FieldI18nNameDataCacher()
 
 
 class CollectionRecListDataCacher(DataCacher):
     def __init__(self):
         def cache_filler():
             ret = {}
             try:
                 res = run_sql("SELECT name,reclist FROM collection")
             except Exception:
                 # database problems, return empty cache
                 return {}
             for name, reclist in res:
                 ret[name] = None # this will be filled later during runtime by calling get_collection_reclist(coll)
             return ret
 
         def timestamp_getter():
             return get_table_update_time('collection')
 
         DataCacher.__init__(self, cache_filler, timestamp_getter)
 
     def get_collection_reclist(self, coll):
         cache = self.get_cache()
         if not cache[coll]:
             # not yet it the cache, so calculate it and fill the cache:
             set = HitSet()
             query = "SELECT nbrecs,reclist FROM collection WHERE name='%s'" % coll
             res = run_sql(query, None, 1)
             if res:
                 try:
                     set = HitSet(res[0][1])
                 except:
                     pass
             self.cache[coll] = set
             cache[coll] = set
         # finally, return reclist:
         return cache[coll]
 
 try:
     if not collection_reclist_cache.is_ok_p:
         raise Exception
 except Exception:
     collection_reclist_cache = CollectionRecListDataCacher()
 
 
 class CollectionI18nDataCacher(DataCacher):
     def __init__(self):
         def cache_filler():
             ret = {}
             try:
                 res = run_sql("SELECT c.name,cn.ln,cn.value FROM collectionname AS cn, collection AS c WHERE cn.id_collection=c.id AND cn.type='ln'") # ln=long name
             except Exception:
                 # database problems,
                 return {}
             for c, ln, i18nname in res:
                 if i18nname:
                     if not ret.has_key(c):
                         ret[c] = {}
                     ret[c][ln] = i18nname
             return ret
 
         def timestamp_getter():
             return get_table_update_time('collectionname')
 
         DataCacher.__init__(self, cache_filler, timestamp_getter)
 
     def get_coll_i18nname(self, c, ln=CFG_SITE_LANG):
         """Return nicely formatted collection name (of name type 'ln',
         'long name') for collection C in language LN."""
         cache = self.get_cache()
         out = c
         try:
             out = cache[c][ln]
         except KeyError:
             pass # translation in LN does not exist
         return out
 
 try:
     if not collection_i18n_name_cache.is_ok_p:
         raise Exception
 except Exception:
     collection_i18n_name_cache = CollectionI18nDataCacher()
 
 
 def get_alphabetically_ordered_collection_list(level=0, ln=CFG_SITE_LANG):
     """Returns nicely ordered (score respected) list of collections, more exactly list of tuples
        (collection name, printable collection name).
        Suitable for create_search_box()."""
     out = []
     query = "SELECT id,name FROM collection ORDER BY name ASC"
     res = run_sql(query)
     for c_id, c_name in res:
         # make a nice printable name (e.g. truncate c_printable for
         # long collection names in given language):
         c_printable = get_coll_i18nname(c_name, ln)
         if len(c_printable)>30:
             c_printable = c_printable[:30] + "..."
         if level:
             c_printable = " " + level * '-' + " " + c_printable
         out.append([c_name, c_printable])
     return out
 
 def get_nicely_ordered_collection_list(collid=1, level=0, ln=CFG_SITE_LANG):
     """Returns nicely ordered (score respected) list of collections, more exactly list of tuples
        (collection name, printable collection name).
        Suitable for create_search_box()."""
     colls_nicely_ordered = []
     query = "SELECT c.name,cc.id_son FROM collection_collection AS cc, collection AS c "\
             " WHERE c.id=cc.id_son AND cc.id_dad='%s' ORDER BY score DESC" % collid
     res = run_sql(query)
     for c, cid in res:
         # make a nice printable name (e.g. truncate c_printable for
         # long collection names in given language):
         c_printable = get_coll_i18nname(c, ln)
         if len(c_printable)>30:
             c_printable = c_printable[:30] + "..."
         if level:
             c_printable = " " + level * '-' + " " + c_printable
         colls_nicely_ordered.append([c, c_printable])
         colls_nicely_ordered  = colls_nicely_ordered + get_nicely_ordered_collection_list(cid, level+1, ln=ln)
     return colls_nicely_ordered
 
 def get_index_id_from_field(field):
     """Returns first index id where the field code FIELD is indexed.
        Returns zero in case there is no table for this index.
        Example: field='author', output=4."""
     out = 0
     res = run_sql("""SELECT w.id FROM idxINDEX AS w, idxINDEX_field AS wf, field AS f
                       WHERE f.code=%s AND wf.id_field=f.id AND w.id=wf.id_idxINDEX
                       LIMIT 1""", (field,))
     if res:
         out = res[0][0]
     return out
 
 def get_words_from_pattern(pattern):
     "Returns list of whitespace-separated words from pattern."
     words = {}
     for word in string.split(pattern):
         if not words.has_key(word):
             words[word] = 1;
     return words.keys()
 
 def create_basic_search_units(req, p, f, m=None, of='hb'):
     """Splits search pattern and search field into a list of independently searchable units.
        - A search unit consists of '(operator, pattern, field, type, hitset)' tuples where
           'operator' is set union (|), set intersection (+) or set exclusion (-);
           'pattern' is either a word (e.g. muon*) or a phrase (e.g. 'nuclear physics');
           'field' is either a code like 'title' or MARC tag like '100__a';
           'type' is the search type ('w' for word file search, 'a' for access file search).
         - Optionally, the function accepts the match type argument 'm'.
           If it is set (e.g. from advanced search interface), then it
           performs this kind of matching.  If it is not set, then a guess is made.
           'm' can have values: 'a'='all of the words', 'o'='any of the words',
                                'p'='phrase/substring', 'r'='regular expression',
                                'e'='exact value'.
         - Warnings are printed on req (when not None) in case of HTML output formats."""
 
     opfts = [] # will hold (o,p,f,t,h) units
 
     ## check arguments: if matching type phrase/string/regexp, do we have field defined?
     if (m=='p' or m=='r' or m=='e') and not f:
         m = 'a'
         if of.startswith("h"):
             print_warning(req, "This matching type cannot be used within <em>any field</em>.  I will perform a word search instead." )
             print_warning(req, "If you want to phrase/substring/regexp search in a specific field, e.g. inside title, then please choose <em>within title</em> search option.")
 
     ## is desired matching type set?
     if m:
         ## A - matching type is known; good!
         if m == 'e':
             # A1 - exact value:
             opfts.append(['+', p, f, 'a']) # '+' since we have only one unit
         elif m == 'p':
             # A2 - phrase/substring:
             opfts.append(['+', "%" + p + "%", f, 'a']) # '+' since we have only one unit
         elif m == 'r':
             # A3 - regular expression:
             opfts.append(['+', p, f, 'r']) # '+' since we have only one unit
         elif m == 'a' or m == 'w':
             # A4 - all of the words:
             p = strip_accents(p) # strip accents for 'w' mode, FIXME: delete when not needed
             for word in get_words_from_pattern(p):
                 opfts.append(['+', word, f, 'w']) # '+' in all units
         elif m == 'o':
             # A5 - any of the words:
             p = strip_accents(p) # strip accents for 'w' mode, FIXME: delete when not needed
             for word in get_words_from_pattern(p):
                 if len(opfts)==0:
                     opfts.append(['+', word, f, 'w']) # '+' in the first unit
                 else:
                     opfts.append(['|', word, f, 'w']) # '|' in further units
         else:
             if of.startswith("h"):
                 print_warning(req, "Matching type '%s' is not implemented yet." % m, "Warning")
             opfts.append(['+', "%" + p + "%", f, 'a'])
     else:
         ## B - matching type is not known: let us try to determine it by some heuristics
         if f and p[0] == '"' and p[-1] == '"':
             ## B0 - does 'p' start and end by double quote, and is 'f' defined? => doing ACC search
             opfts.append(['+', p[1:-1], f, 'a'])
         elif f and p[0] == "'" and p[-1] == "'":
             ## B0bis - does 'p' start and end by single quote, and is 'f' defined? => doing ACC search
             opfts.append(['+', '%' + p[1:-1] + '%', f, 'a'])
         elif f and p[0] == "/" and p[-1] == "/":
             ## B0ter - does 'p' start and end by a slash, and is 'f' defined? => doing regexp search
             opfts.append(['+', p[1:-1], f, 'r'])
         elif f and string.find(p, ',') >= 0:
             ## B1 - does 'p' contain comma, and is 'f' defined? => doing ACC search
             opfts.append(['+', p, f, 'a'])
         elif f and str(f[0:2]).isdigit():
             ## B2 - does 'f' exist and starts by two digits?  => doing ACC search
             opfts.append(['+', p, f, 'a'])
         else:
             ## B3 - doing WRD search, but maybe ACC too
             # search units are separated by spaces unless the space is within single or double quotes
             # so, let us replace temporarily any space within quotes by '__SPACE__'
             p = re_pattern_single_quotes.sub(lambda x: "'"+string.replace(x.group(1), ' ', '__SPACE__')+"'", p)
             p = re_pattern_double_quotes.sub(lambda x: "\""+string.replace(x.group(1), ' ', '__SPACE__')+"\"", p)
             p = re_pattern_regexp_quotes.sub(lambda x: "/"+string.replace(x.group(1), ' ', '__SPACE__')+"/", p)
             # wash argument:
             p = re_equal.sub(":", p)
             p = re_logical_and.sub(" ", p)
             p = re_logical_or.sub(" |", p)
             p = re_logical_not.sub(" -", p)
             p = re_operators.sub(r' \1', p)
             for pi in string.split(p): # iterate through separated units (or items, as "pi" stands for "p item")
                 pi = re_pattern_space.sub(" ", pi) # replace back '__SPACE__' by ' '
                 # firstly, determine set operator
                 if pi[0] == '+' or pi[0] == '-' or pi[0] == '|':
                     oi = pi[0]
                     pi = pi[1:]
                 else:
                     # okay, there is no operator, so let us decide what to do by default
                     oi = '+' # by default we are doing set intersection...
                 # secondly, determine search pattern and field:
                 if string.find(pi, ":") > 0:
                     fi, pi = string.split(pi, ":", 1)
                 else:
                     fi, pi = f, pi
                 # look also for old ALEPH field names:
                 if fi and CFG_WEBSEARCH_FIELDS_CONVERT.has_key(string.lower(fi)):
                     fi = CFG_WEBSEARCH_FIELDS_CONVERT[string.lower(fi)]
                 # wash 'pi' argument:
                 if re_quotes.match(pi):
                     # B3a - quotes are found => do ACC search (phrase search)
                     if fi:
                         if pi[0] == '"' and pi[-1] == '"':
                             pi = string.replace(pi, '"', '') # remove quote signs
                             opfts.append([oi, pi, fi, 'a'])
                         elif pi[0] == "'" and pi[-1] == "'":
                             pi = string.replace(pi, "'", "") # remove quote signs
                             opfts.append([oi, "%" + pi + "%", fi, 'a'])
                         else: # unbalanced quotes, so do WRD query:
                             opfts.append([oi, pi, fi, 'w'])
                     else:
                         # fi is not defined, look at where we are doing exact or subphrase search (single/double quotes):
                         if pi[0] == '"' and pi[-1] == '"':
                             opfts.append([oi, pi[1:-1], "anyfield", 'a'])
                             if of.startswith("h"):
                                 print_warning(req, "Searching for an exact match inside any field may be slow.  You may want to search for words instead, or choose to search within specific field.")
                         else:
                             # nope, subphrase in global index is not possible => change back to WRD search
                             pi = strip_accents(pi) # strip accents for 'w' mode, FIXME: delete when not needed
                             for pii in get_words_from_pattern(pi):
                                 # since there may be '-' and other chars that we do not index in WRD
                                 opfts.append([oi, pii, fi, 'w'])
                             if of.startswith("h"):
                                 print_warning(req, "The partial phrase search does not work in any field.  I'll do a boolean AND searching instead.")
                                 print_warning(req, "If you want to do a partial phrase search in a specific field, e.g. inside title, then please choose 'within title' search option.", "Tip")
                                 print_warning(req, "If you want to do exact phrase matching, then please use double quotes.", "Tip")
                 elif fi and str(fi[0]).isdigit() and str(fi[0]).isdigit():
                     # B3b - fi exists and starts by two digits => do ACC search
                     opfts.append([oi, pi, fi, 'a'])
                 elif fi and not get_index_id_from_field(fi):
                     # B3c - fi exists but there is no words table for fi => try ACC search
                     opfts.append([oi, pi, fi, 'a'])
                 elif fi and pi.startswith('/') and pi.endswith('/'):
                     # B3d - fi exists and slashes found => try regexp search
                     opfts.append([oi, pi[1:-1], fi, 'r'])
                 else:
                     # B3e - general case => do WRD search
                     pi = strip_accents(pi) # strip accents for 'w' mode, FIXME: delete when not needed
                     for pii in get_words_from_pattern(pi):
                         opfts.append([oi, pii, fi, 'w'])
 
     ## sanity check:
     for i in range(0, len(opfts)):
         try:
             pi = opfts[i][1]
             if pi == '*':
                 if of.startswith("h"):
                     print_warning(req, "Ignoring standalone wildcard word.", "Warning")
                 del opfts[i]
             if pi == '' or pi == ' ':
                 fi = opfts[i][2]
                 if fi:
                     if of.startswith("h"):
                         print_warning(req, "Ignoring empty <em>%s</em> search term." % fi, "Warning")
                 del opfts[i]
         except:
             pass
 
     ## return search units:
     return opfts
 
 def page_start(req, of, cc, as, ln, uid, title_message=None,
                description='', keywords='', recID=-1, tab=''):
     "Start page according to given output format."
     _ = gettext_set_language(ln)
 
     if not title_message: title_message = _("Search Results")
 
     if not req:
         return # we were called from CLI
 
     content_type = get_output_format_content_type(of)
 
     if of.startswith('x'):
         if of == 'xr':
             # we are doing RSS output
             req.content_type = "application/rss+xml"
             req.send_http_header()
             req.write("""<?xml version="1.0" encoding="UTF-8"?>\n""")
         else:
             # we are doing XML output:
             req.content_type = "text/xml"
             req.send_http_header()
             req.write("""<?xml version="1.0" encoding="UTF-8"?>\n""")
     elif of.startswith('t') or str(of[0:3]).isdigit():
         # we are doing plain text output:
         req.content_type = "text/plain"
         req.send_http_header()
     elif of == "id":
         pass # nothing to do, we shall only return list of recIDs
     elif content_type == 'text/html':
         # we are doing HTML output:
         req.content_type = "text/html"
         req.send_http_header()
 
         if not description:
             description = "%s %s." % (cc, _("Search Results"))
 
         if not keywords:
             keywords = "%s, WebSearch, %s" % (get_coll_i18nname(CFG_SITE_NAME, ln), get_coll_i18nname(cc, ln))
 
         argd = {}
         if req.args:
             argd = cgi.parse_qs(req.args)
         rssurl = websearch_templates.build_rss_url(argd)
 
         navtrail = create_navtrail_links(cc, as, ln)
         navtrail_append_title_p = 1
 
         # FIXME: Find a good point to put this code.
         # This is a nice hack to trigger jsMath only when displaying single
         # records.
         if of.lower() in CFG_WEBSEARCH_USE_JSMATH_FOR_FORMATS:
             metaheaderadd = """
   <script type='text/javascript'>
     jsMath = {
         styles: {'#jsMath_button': 'display: none'},
         Controls: {cookie: {printwarn: 0}}
     };
   </script>
   <script src='/jsMath/easy/invenio-jsmath.js' type='text/javascript'></script>
 """
         else:
             metaheaderadd = ''
         if tab != '' or ((of != '' or of.lower() != 'hd') and of != 'hb'):
             # If we are not in information tab in HD format, customize
             # the nav. trail to have a link back to main record. (Due
             # to the way perform_request_search() works, hb
             # (lowercase) is equal to hd)
             if (of != '' or of.lower() != 'hd') and of != 'hb':
                 # Export
                 format_name = of
                 query = "SELECT name FROM format WHERE code=%s"
                 res = run_sql(query, (of,))
                 if res:
                     format_name = res[0][0]
                 navtrail += ' &gt; <a class="navtrail" href="%s/record/%s">%s</a> &gt; %s' % \
                             (CFG_SITE_URL, recID, title_message, format_name)
             else:
                 # Discussion, citations, etc. tabs
                 tab_label = get_detailed_page_tabs(cc, ln=ln)[tab]['label']
                 navtrail += ' &gt; <a class="navtrail" href="%s/record/%s">%s</a> &gt; %s' % \
                             (CFG_SITE_URL, recID, title_message, _(tab_label))
             navtrail_append_title_p = 0
 
         req.write(pageheaderonly(req=req, title=title_message,
                                  navtrail=navtrail,
                                  description=description,
                                  keywords=keywords,
                                  metaheaderadd=metaheaderadd,
                                  uid=uid,
                                  language=ln,
                                  navmenuid='search',
                                  navtrail_append_title_p=\
                                  navtrail_append_title_p,
                                  rssurl=rssurl))
         req.write(websearch_templates.tmpl_search_pagestart(ln=ln))
     #else:
     #    req.send_http_header()
 
 def page_end(req, of="hb", ln=CFG_SITE_LANG):
     "End page according to given output format: e.g. close XML tags, add HTML footer, etc."
     if of == "id":
         return [] # empty recID list
     if not req:
         return # we were called from CLI
     if of.startswith('h'):
         req.write(websearch_templates.tmpl_search_pageend(ln = ln)) # pagebody end
         req.write(pagefooteronly(lastupdated=__lastupdated__, language=ln, req=req))
     return "\n"
 
 def create_inputdate_box(name="d1", selected_year=0, selected_month=0, selected_day=0, ln=CFG_SITE_LANG):
     "Produces 'From Date', 'Until Date' kind of selection box.  Suitable for search options."
 
     _ = gettext_set_language(ln)
 
     box = ""
     # day
     box += """<select name="%sd">""" % name
     box += """<option value="">%s""" % _("any day")
     for day in range(1, 32):
         box += """<option value="%02d"%s>%02d""" % (day, is_selected(day, selected_day), day)
     box += """</select>"""
     # month
     box += """<select name="%sm">""" % name
     box += """<option value="">%s""" % _("any month")
     for mm, month in [(1, _("January")), (2, _("February")), (3, _("March")), (4, _("April")), \
                       (5, _("May")), (6, _("June")), (7, _("July")), (8, _("August")), \
                       (9, _("September")), (10, _("October")), (11, _("November")), (12, _("December"))]:
         box += """<option value="%02d"%s>%s""" % (mm, is_selected(mm, selected_month), month)
     box += """</select>"""
     # year
     box += """<select name="%sy">""" % name
     box += """<option value="">%s""" % _("any year")
     this_year = int(time.strftime("%Y", time.localtime()))
     for year in range(this_year-20, this_year+1):
         box += """<option value="%d"%s>%d""" % (year, is_selected(year, selected_year), year)
     box += """</select>"""
     return box
 
 def create_search_box(cc, colls, p, f, rg, sf, so, sp, rm, of, ot, as,
                       ln, p1, f1, m1, op1, p2, f2, m2, op2, p3, f3,
                       m3, sc, pl, d1y, d1m, d1d, d2y, d2m, d2d, dt, jrec, ec,
                       action=""):
 
     """Create search box for 'search again in the results page' functionality."""
 
     # load the right message language
     _ = gettext_set_language(ln)
 
     # some computations
     cc_intl = get_coll_i18nname(cc, ln)
     cc_colID = get_colID(cc)
 
     colls_nicely_ordered = []
     if cfg_nicely_ordered_collection_list:
         colls_nicely_ordered = get_nicely_ordered_collection_list(ln=ln)
     else:
         colls_nicely_ordered = get_alphabetically_ordered_collection_list(ln=ln)
 
     colls_nice = []
     for (cx, cx_printable) in colls_nicely_ordered:
         if not cx.startswith("Unnamed collection"):
             colls_nice.append({ 'value' : cx,
                                 'text' : cx_printable
                               })
 
     coll_selects = []
     if colls and colls[0] != CFG_SITE_NAME:
         # some collections are defined, so print these first, and only then print 'add another collection' heading:
         for c in colls:
             if c:
                 temp = []
                 temp.append({ 'value' : '',
                               'text' : '*** %s ***' % _("any collection")
                             })
                 for val in colls_nice:
                     # print collection:
                     if not cx.startswith("Unnamed collection"):
                         temp.append({ 'value' : val['value'],
                                       'text' : val['text'],
                                       'selected' : (c == re.sub("^[\s\-]*","", val['value']))
                                     })
                 coll_selects.append(temp)
         coll_selects.append([{ 'value' : '',
                                'text' : '*** %s ***' % _("add another collection")
                              }] + colls_nice)
     else: # we searched in CFG_SITE_NAME, so print 'any collection' heading
         coll_selects.append([{ 'value' : '',
                                'text' : '*** %s ***' % _("any collection")
                              }] + colls_nice)
 
     sort_fields = [{
                       'value' : '',
                       'text' : _("latest first")
                     }]
     query = """SELECT DISTINCT(f.code),f.name FROM field AS f, collection_field_fieldvalue AS cff
                 WHERE cff.type='soo' AND cff.id_field=f.id
                 ORDER BY cff.score DESC, f.name ASC"""
     res = run_sql(query)
     for code, name in res:
         sort_fields.append({
                               'value' : code,
                               'text' : name,
                             })
 
     ## ranking methods
     ranks = [{
                'value' : '',
                'text' : "- %s %s -" % (_("OR").lower (), _("rank by")),
              }]
     for (code, name) in get_bibrank_methods(cc_colID, ln):
         # propose found rank methods:
         ranks.append({
                        'value' : code,
                        'text' : name,
                      })
 
     formats = []
     query = """SELECT code,name FROM format WHERE visibility='1' ORDER BY name ASC"""
     res = run_sql(query)
     if res:
         # propose found formats:
         for code, name in res:
             formats.append({ 'value' : code,
                              'text' : name
                            })
     else:
         formats.append({'value' : 'hb',
                         'text' : _("HTML brief")
                        })
 
     return websearch_templates.tmpl_search_box(
              ln = ln,
              as = as,
              cc_intl = cc_intl,
              cc = cc,
              ot = ot,
              sp = sp,
              action = action,
              fieldslist = get_searchwithin_fields(ln=ln, colID=cc_colID),
              f1 = f1,
              f2 = f2,
              f3 = f3,
              m1 = m1,
              m2 = m2,
              m3 = m3,
              p1 = p1,
              p2 = p2,
              p3 = p3,
              op1 = op1,
              op2 = op2,
              rm = rm,
              p = p,
              f = f,
              coll_selects = coll_selects,
              d1y = d1y, d2y = d2y, d1m = d1m, d2m = d2m, d1d = d1d, d2d = d2d,
              dt = dt,
              sort_fields = sort_fields,
              sf = sf,
              so = so,
              ranks = ranks,
              sc = sc,
              rg = rg,
              formats = formats,
              of = of,
              pl = pl,
              jrec = jrec,
              ec = ec,
            )
 
 def create_navtrail_links(cc=CFG_SITE_NAME, as=0, ln=CFG_SITE_LANG, self_p=1, tab=''):
     """Creates navigation trail links, i.e. links to collection
     ancestors (except Home collection).  If as==1, then links to
     Advanced Search interfaces; otherwise Simple Search.
     """
 
     dads = []
     for dad in get_coll_ancestors(cc):
         if dad != CFG_SITE_NAME: # exclude Home collection
             dads.append ((dad, get_coll_i18nname (dad, ln)))
 
     if self_p and cc != CFG_SITE_NAME:
         dads.append((cc, get_coll_i18nname(cc, ln)))
 
     return websearch_templates.tmpl_navtrail_links(
         as=as, ln=ln, dads=dads)
 
 def get_searchwithin_fields(ln='en', colID=None):
     """Retrieves the fields name used in the 'search within' selection box for the collection ID colID."""
     res = None
     if colID:
         res = run_sql_cached("""SELECT f.code,f.name FROM field AS f, collection_field_fieldvalue AS cff
                                  WHERE cff.type='sew' AND cff.id_collection=%s AND cff.id_field=f.id
                               ORDER BY cff.score DESC, f.name ASC""", (colID,))
     if not res:
         res = run_sql_cached("SELECT code,name FROM field ORDER BY name ASC")
     fields = [{
                 'value' : '',
                 'text' : get_field_i18nname("any field", ln)
               }]
     for field_code, field_name in res:
         if field_code and field_code != "anyfield":
             fields.append({ 'value' : field_code,
                             'text' : get_field_i18nname(field_name, ln)
                           })
     return fields
 
 def create_andornot_box(name='op', value='', ln='en'):
     "Returns HTML code for the AND/OR/NOT selection box."
 
     _ = gettext_set_language(ln)
 
     out = """
     <select name="%s">
     <option value="a"%s>%s
     <option value="o"%s>%s
     <option value="n"%s>%s
     </select>
     """ % (name,
            is_selected('a', value), _("AND"),
            is_selected('o', value), _("OR"),
            is_selected('n', value), _("AND NOT"))
 
     return out
 
 def create_matchtype_box(name='m', value='', ln='en'):
     "Returns HTML code for the 'match type' selection box."
 
     _ = gettext_set_language(ln)
 
     out = """
     <select name="%s">
     <option value="a"%s>%s
     <option value="o"%s>%s
     <option value="e"%s>%s
     <option value="p"%s>%s
     <option value="r"%s>%s
     </select>
     """ % (name,
            is_selected('a', value), _("All of the words:"),
            is_selected('o', value), _("Any of the words:"),
            is_selected('e', value), _("Exact phrase:"),
            is_selected('p', value), _("Partial phrase:"),
            is_selected('r', value), _("Regular expression:"))
     return out
 
 def is_selected(var, fld):
     "Checks if the two are equal, and if yes, returns ' selected'.  Useful for select boxes."
     if type(var) is int and type(fld) is int:
         if var == fld:
             return " selected"
     elif str(var) == str(fld):
         return " selected"
     elif fld and len(fld)==3 and fld[0] == "w" and var == fld[1:]:
         return " selected"
     return ""
 
 def wash_colls(cc, c, split_colls=0):
     """Wash collection list by checking whether user has deselected
     anything under 'Narrow search'.  Checks also if cc is a list or not.
        Return list of cc, colls_to_display, colls_to_search since the list
     of collections to display is different from that to search in.
     This is because users might have chosen 'split by collection'
     functionality.
        The behaviour of "collections to display" depends solely whether
     user has deselected a particular collection: e.g. if it started
     from 'Articles and Preprints' page, and deselected 'Preprints',
     then collection to display is 'Articles'.  If he did not deselect
     anything, then collection to display is 'Articles & Preprints'.
        The behaviour of "collections to search in" depends on the
     'split_colls' parameter:
          * if is equal to 1, then we can wash the colls list down
            and search solely in the collection the user started from;
          * if is equal to 0, then we are splitting to the first level
            of collections, i.e. collections as they appear on the page
            we started to search from;
 
     The function raises exception
     InvenioWebSearchUnknownCollectionError
     if cc or one of c collections is not known.
     """
 
     colls_out = []
     colls_out_for_display = []
 
     # check what type is 'cc':
     if type(cc) is list:
         for ci in cc:
             if collection_reclist_cache.has_key(ci):
                 # yes this collection is real, so use it:
                 cc = ci
                 break
     else:
         # check once if cc is real:
         if not collection_reclist_cache.has_key(cc):
             if cc:
                 raise InvenioWebSearchUnknownCollectionError(cc)
             else:
                 cc = CFG_SITE_NAME # cc is not set, so replace it with Home collection
 
     # check type of 'c' argument:
     if type(c) is list:
         colls = c
     else:
         colls = [c]
 
     # remove all 'unreal' collections:
     colls_real = []
     for coll in colls:
         if collection_reclist_cache.has_key(coll):
             colls_real.append(coll)
         else:
             if coll:
                 raise InvenioWebSearchUnknownCollectionError(coll)
     colls = colls_real
 
     # check if some real collections remain:
     if len(colls)==0:
         colls = [cc]
 
     # then let us check the list of non-restricted "real" sons of 'cc' and compare it to 'coll':
     res = run_sql("""SELECT c.name FROM collection AS c,
                                         collection_collection AS cc,
                                         collection AS ccc
                      WHERE c.id=cc.id_son AND cc.id_dad=ccc.id
                        AND ccc.name=%s AND cc.type='r'
                        AND c.restricted IS NULL""", (cc,))
     l_cc_nonrestricted_sons = []
     l_c = colls
     for row in res:
         l_cc_nonrestricted_sons.append(row[0])
     l_c.sort()
     l_cc_nonrestricted_sons.sort()
     if l_cc_nonrestricted_sons == l_c:
         colls_out_for_display = [cc] # yep, washing permitted, it is sufficient to display 'cc'
     else:
         colls_out_for_display = colls # nope, we need to display all 'colls' successively
 
     # remove duplicates:
     colls_out_for_display_nondups=filter(lambda x, colls_out_for_display=colls_out_for_display: colls_out_for_display[x-1] not in colls_out_for_display[x:], range(1, len(colls_out_for_display)+1))
     colls_out_for_display = map(lambda x, colls_out_for_display=colls_out_for_display:colls_out_for_display[x-1], colls_out_for_display_nondups)
 
     # second, let us decide on collection splitting:
     if split_colls == 0:
         # type A - no sons are wanted
         colls_out = colls_out_for_display
 #    elif split_colls == 1:
     else:
         # type B - sons (first-level descendants) are wanted
         for coll in colls_out_for_display:
             coll_sons = get_coll_sons(coll)
             if coll_sons == []:
                 colls_out.append(coll)
             else:
                 colls_out = colls_out + coll_sons
 
     # remove duplicates:
     colls_out_nondups=filter(lambda x, colls_out=colls_out: colls_out[x-1] not in colls_out[x:], range(1, len(colls_out)+1))
     colls_out = map(lambda x, colls_out=colls_out:colls_out[x-1], colls_out_nondups)
 
     return (cc, colls_out_for_display, colls_out)
 
 def strip_accents(x):
     """Strip accents in the input phrase X (assumed in UTF-8) by replacing
     accented characters with their unaccented cousins (e.g. é by e).
     Return such a stripped X."""
     x = re_latex_lowercase_a.sub("a", x)
     x = re_latex_lowercase_ae.sub("ae", x)
     x = re_latex_lowercase_e.sub("e", x)
     x = re_latex_lowercase_i.sub("i", x)
     x = re_latex_lowercase_o.sub("o", x)
     x = re_latex_lowercase_u.sub("u", x)
     x = re_latex_lowercase_y.sub("x", x)
     x = re_latex_lowercase_c.sub("c", x)
     x = re_latex_lowercase_n.sub("n", x)
     x = re_latex_uppercase_a.sub("A", x)
     x = re_latex_uppercase_ae.sub("AE", x)
     x = re_latex_uppercase_e.sub("E", x)
     x = re_latex_uppercase_i.sub("I", x)
     x = re_latex_uppercase_o.sub("O", x)
     x = re_latex_uppercase_u.sub("U", x)
     x = re_latex_uppercase_y.sub("Y", x)
     x = re_latex_uppercase_c.sub("C", x)
     x = re_latex_uppercase_n.sub("N", x)
 
     # convert input into Unicode string:
     try:
         y = unicode(x, "utf-8")
     except:
         return x # something went wrong, probably the input wasn't UTF-8
     # asciify Latin-1 lowercase characters:
     y = re_unicode_lowercase_a.sub("a", y)
     y = re_unicode_lowercase_ae.sub("ae", y)
     y = re_unicode_lowercase_e.sub("e", y)
     y = re_unicode_lowercase_i.sub("i", y)
     y = re_unicode_lowercase_o.sub("o", y)
     y = re_unicode_lowercase_u.sub("u", y)
     y = re_unicode_lowercase_y.sub("y", y)
     y = re_unicode_lowercase_c.sub("c", y)
     y = re_unicode_lowercase_n.sub("n", y)
     # asciify Latin-1 uppercase characters:
     y = re_unicode_uppercase_a.sub("A", y)
     y = re_unicode_uppercase_ae.sub("AE", y)
     y = re_unicode_uppercase_e.sub("E", y)
     y = re_unicode_uppercase_i.sub("I", y)
     y = re_unicode_uppercase_o.sub("O", y)
     y = re_unicode_uppercase_u.sub("U", y)
     y = re_unicode_uppercase_y.sub("Y", y)
     y = re_unicode_uppercase_c.sub("C", y)
     y = re_unicode_uppercase_n.sub("N", y)
     # return UTF-8 representation of the Unicode string:
     return y.encode("utf-8")
 
 def wash_index_term(term, max_char_length=50):
     """
     Return washed form of the index term TERM that would be suitable
     for storing into idxWORD* tables.  I.e., lower the TERM, and
     truncate it safely to MAX_CHAR_LENGTH UTF-8 characters (meaning,
     in principle, 4*MAX_CHAR_LENGTH bytes).
 
     The function works by an internal conversion of TERM, when needed,
     from its input Python UTF-8 binary string format into Python
     Unicode format, and then truncating it safely to the given number
     of TF-8 characters, without possible mis-truncation in the middle
     of a multi-byte UTF-8 character that could otherwise happen if we
     would have been working with UTF-8 binary representation directly.
 
     Note that MAX_CHAR_LENGTH corresponds to the length of the term
     column in idxINDEX* tables.
     """
     washed_term = unicode(term, 'utf-8').lower()
     if len(washed_term) <= max_char_length:
         # no need to truncate the term, because it will fit
         # nicely even if it uses four-byte UTF-8 characters
         return washed_term.encode('utf-8')
     else:
         # truncate the term in a safe position:
         return washed_term[:max_char_length].encode('utf-8')
 
 def wash_pattern(p):
     """Wash pattern passed by URL. Check for sanity of the wildcard by
     removing wildcards if they are appended to extremely short words
     (1-3 letters).  TODO: instead of this approximative treatment, it
     will be much better to introduce a temporal limit, e.g. to kill a
     query if it does not finish in 10 seconds."""
     # strip accents:
     # p = strip_accents(p) # FIXME: when available, strip accents all the time
     # add leading/trailing whitespace for the two following wildcard-sanity checking regexps:
     p = " " + p + " "
     # get rid of wildcards at the beginning of words:
     p = re_pattern_wildcards_at_beginning.sub("\\1", p)
     # replace spaces within quotes by __SPACE__ temporarily:
     p = re_pattern_single_quotes.sub(lambda x: "'"+string.replace(x.group(1), ' ', '__SPACE__')+"'", p)
     p = re_pattern_double_quotes.sub(lambda x: "\""+string.replace(x.group(1), ' ', '__SPACE__')+"\"", p)
     p = re_pattern_regexp_quotes.sub(lambda x: "/"+string.replace(x.group(1), ' ', '__SPACE__')+"/", p)
     # get rid of extremely short words (1-3 letters with wildcards):
     p = re_pattern_short_words.sub("\\1", p)
     # replace back __SPACE__ by spaces:
     p = re_pattern_space.sub(" ", p)
     # replace special terms:
     p = re_pattern_today.sub(time.strftime("%Y-%m-%d", time.localtime()), p)
     # remove unnecessary whitespace:
     p = string.strip(p)
     return p
 
 def wash_field(f):
     """Wash field passed by URL."""
     # get rid of unnecessary whitespace:
     f = string.strip(f)
     # wash old-style CDS Invenio/ALEPH 'f' field argument, e.g. replaces 'wau' and 'au' by 'author'
     if CFG_WEBSEARCH_FIELDS_CONVERT.has_key(string.lower(f)):
         f = CFG_WEBSEARCH_FIELDS_CONVERT[f]
     return f
 
 def wash_dates(d1="", d1y=0, d1m=0, d1d=0, d2="", d2y=0, d2m=0, d2d=0):
     """
     Take user-submitted date arguments D1 (full datetime string) or
     (D1Y, D1M, D1Y) year, month, day tuple and D2 or (D2Y, D2M, D2Y)
     and return (YYY1-M1-D2 H1:M1:S2, YYY2-M2-D2 H2:M2:S2) datetime
     strings in the YYYY-MM-DD HH:MM:SS format suitable for time
     restricted searching.
 
     Note that when both D1 and (D1Y, D1M, D1D) parameters are present,
     the precedence goes to D1.  Ditto for D2*.
 
     Note that when (D1Y, D1M, D1D) are taken into account, some values
     may be missing and are completed e.g. to 01 or 12 according to
     whether it is the starting or the ending date.
     """
     datetext1, datetext2 =  "", ""
     # sanity checking:
     if d1 == "" and d1y == 0 and d1m == 0 and d1d == 0 and d2 == "" and d2y == 0 and d2m == 0 and d2d == 0:
         return ("", "") # nothing selected, so return empty values
     # wash first (starting) date:
     if d1:
         # full datetime string takes precedence:
         datetext1 = d1
     else:
         # okay, first date passed as (year,month,day):
         if d1y:
             datetext1 += "%04d" % d1y
         else:
             datetext1 += "0000"
         if d1m:
             datetext1 += "-%02d" % d1m
         else:
             datetext1 += "-01"
         if d1d:
             datetext1 += "-%02d" % d1d
         else:
             datetext1 += "-01"
         datetext1 += " 00:00:00"
     # wash second (ending) date:
     if d2:
         # full datetime string takes precedence:
         datetext2 = d2
     else:
         # okay, second date passed as (year,month,day):
         if d2y:
             datetext2 += "%04d" % d2y
         else:
             datetext2 += "9999"
         if d2m:
             datetext2 += "-%02d" % d2m
         else:
             datetext2 += "-12"
         if d2d:
             datetext2 += "-%02d" % d2d
         else:
             datetext2 += "-31" # NOTE: perhaps we should add max(datenumber) in
                                # given month, but for our quering it's not
                                # needed, 31 will always do
         datetext2 += " 00:00:00"
     # okay, return constructed YYYY-MM-DD HH:MM:SS datetexts:
     return (datetext1, datetext2)
 
 def get_colID(c):
     "Return collection ID for collection name C.  Return None if no match found."
     colID = None
     res = run_sql("SELECT id FROM collection WHERE name=%s", (c,), 1)
     if res:
         colID = res[0][0]
     return colID
 
 def get_coll_i18nname(c, ln=CFG_SITE_LANG):
     """Return nicely formatted collection name (of name type 'ln',
     'long name') for collection C in language LN."""
     global collection_i18nname_cache
     global collection_i18nname_cache_timestamp
     # firstly, check whether the collectionname table was modified:
     if get_table_update_time('collectionname') > collection_i18nname_cache_timestamp:
         # yes it was, cache clear-up needed:
         collection_i18nname_cache = create_collection_i18nname_cache()
     # secondly, read i18n name from either the cache or return common name:
     out = c
     try:
         out = collection_i18nname_cache[c][ln]
     except KeyError:
         pass # translation in LN does not exist
     return out
 
 def get_field_i18nname(f, ln=CFG_SITE_LANG):
     """Return nicely formatted field name (of type 'ln', 'long name')
        for field F in language LN."""
     global field_i18nname_cache
     global field_i18nname_cache_timestamp
     # firstly, check whether the fieldname table was modified:
     if get_table_update_time('fieldname') > field_i18nname_cache_timestamp:
         # yes it was, cache clear-up needed:
         field_i18nname_cache = create_field_i18nname_cache()
     # secondly, read i18n name from either the cache or return common name:
     out = f
     try:
         out = field_i18nname_cache[f][ln]
     except KeyError:
         pass # translation in LN does not exist
     return out
 
 def get_coll_ancestors(coll):
     "Returns a list of ancestors for collection 'coll'."
     coll_ancestors = []
     coll_ancestor = coll
     while 1:
         res = run_sql("""SELECT c.name FROM collection AS c
                           LEFT JOIN collection_collection AS cc ON c.id=cc.id_dad
                           LEFT JOIN collection AS ccc ON ccc.id=cc.id_son
                           WHERE ccc.name=%s ORDER BY cc.id_dad ASC LIMIT 1""",
                       (coll_ancestor,))
         if res:
             coll_name = res[0][0]
             coll_ancestors.append(coll_name)
             coll_ancestor = coll_name
         else:
             break
     # ancestors found, return reversed list:
     coll_ancestors.reverse()
     return coll_ancestors
 
 def get_coll_sons(coll, type='r', public_only=1):
     """Return a list of sons (first-level descendants) of type 'type' for collection 'coll'.
        If public_only, then return only non-restricted son collections.
     """
     coll_sons = []
     query = "SELECT c.name FROM collection AS c "\
             "LEFT JOIN collection_collection AS cc ON c.id=cc.id_son "\
             "LEFT JOIN collection AS ccc ON ccc.id=cc.id_dad "\
             "WHERE cc.type=%s AND ccc.name=%s"
     if public_only:
         query += " AND c.restricted IS NULL "
     query += " ORDER BY cc.score DESC"
     res = run_sql(query, (type, coll))
     for name in res:
         coll_sons.append(name[0])
     return coll_sons
 
 def get_coll_real_descendants(coll):
     """Return a list of all descendants of collection 'coll' that are defined by a 'dbquery'.
        IOW, we need to decompose compound collections like "A & B" into "A" and "B" provided
        that "A & B" has no associated database query defined.
     """
     coll_sons = []
     res = run_sql("""SELECT c.name,c.dbquery FROM collection AS c
                      LEFT JOIN collection_collection AS cc ON c.id=cc.id_son
                      LEFT JOIN collection AS ccc ON ccc.id=cc.id_dad
                      WHERE ccc.name=%s ORDER BY cc.score DESC""",
                   (coll,))
     for name, dbquery in res:
         if dbquery: # this is 'real' collection, so return it:
             coll_sons.append(name)
         else: # this is 'composed' collection, so recurse:
             coll_sons.extend(get_coll_real_descendants(name))
     return coll_sons
 
 def get_collection_reclist(coll):
     """Return hitset of recIDs that belong to the collection 'coll'.
        But firstly check the last updated date of the collection table.
        If it's newer than the cache timestamp, then empty the cache,
        since new records could have been added."""
     global collection_reclist_cache
     global collection_reclist_cache_timestamp
     # firstly, check whether the collection table was modified:
     if get_table_update_time('collection') > collection_reclist_cache_timestamp:
         # yes it was, cache clear-up needed:
         collection_reclist_cache = create_collection_reclist_cache()
     # secondly, read reclist from either the cache or the database:
     if not collection_reclist_cache[coll]:
         # not yet it the cache, so calculate it and fill the cache:
         query = "SELECT nbrecs,reclist FROM collection WHERE name='%s'" % coll
         res = run_sql(query, None, 1)
         if res:
             try:
                 set = HitSet(res[0][1])
             except:
                 set = HitSet()
         collection_reclist_cache[coll] = set
     # finally, return reclist:
     return collection_reclist_cache[coll]
 
 def coll_restricted_p(coll):
     "Predicate to test if the collection coll is restricted or not."
     if not coll:
         return 0
     res = run_sql("SELECT restricted FROM collection WHERE name=%s", (coll,))
     if res and res[0][0] is not None:
         return 1
     else:
         return 0
 
 def coll_restricted_group(coll):
     "Return Apache group to which the collection is restricted.  Return None if it's public."
     if not coll:
         return None
     res = run_sql("SELECT restricted FROM collection WHERE name=%s", (coll,))
     if res:
         return res[0][0]
     else:
         return None
 
 def create_collection_reclist_cache():
     """Creates list of records belonging to collections.  Called on startup
     and used later for intersecting search results with collection universe."""
     global collection_reclist_cache_timestamp
     # populate collection reclist cache:
     collrecs = {}
     try:
         res = run_sql("SELECT name,reclist FROM collection")
     except Error:
         # database problems, set timestamp to zero and return empty cache
         collection_reclist_cache_timestamp = 0
         return collrecs
     for name, reclist in res:
         collrecs[name] = None # this will be filled later during runtime by calling get_collection_reclist(coll)
     # update timestamp:
     try:
         collection_reclist_cache_timestamp = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())
     except NameError:
         collection_reclist_cache_timestamp = 0
     return collrecs
 
 try:
     collection_reclist_cache.has_key(CFG_SITE_NAME)
 except:
     try:
         collection_reclist_cache = create_collection_reclist_cache()
     except:
         collection_reclist_cache = {}
 
 def create_collection_i18nname_cache():
     """Create cache of I18N collection names of type 'ln' (=long name).
     Called on startup and used later during the search time."""
     global collection_i18nname_cache_timestamp
     # populate collection I18N name cache:
     names = {}
     try:
         res = run_sql("SELECT c.name,cn.ln,cn.value FROM collectionname AS cn, collection AS c WHERE cn.id_collection=c.id AND cn.type='ln'") # ln=long name
     except Error:
         # database problems, set timestamp to zero and return empty cache
         collection_i18nname_cache_timestamp = 0
         return names
     for c, ln, i18nname in res:
         if i18nname:
             if not names.has_key(c):
                 names[c] = {}
             names[c][ln] = i18nname
     # update timestamp:
     try:
         collection_i18nname_cache_timestamp = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())
     except NameError:
         collection_i18nname_cache_timestamp = 0
     return names
 
 try:
     collection_i18nname_cache.has_key(CFG_SITE_NAME)
 except:
     try:
         collection_i18nname_cache = create_collection_i18nname_cache()
     except:
         collection_i18nname_cache = {}
 
 def create_field_i18nname_cache():
     """Create cache of I18N field names of type 'ln' (=long name).
     Called on startup and used later during the search time."""
     global field_i18nname_cache_timestamp
     # populate field I18 name cache:
     names = {}
     try:
         res = run_sql("SELECT f.name,fn.ln,fn.value FROM fieldname AS fn, field AS f WHERE fn.id_field=f.id AND fn.type='ln'") # ln=long name
     except Error:
         # database problems, set timestamp to zero and return empty cache
         field_i18nname_cache_timestamp = 0
         return names
     for f, ln, i18nname in res:
         if i18nname:
             if not names.has_key(f):
                 names[f] = {}
             names[f][ln] = i18nname
     # update timestamp:
     try:
         field_i18nname_cache_timestamp = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())
     except NameError:
         field_i18nname_cache_timestamp = 0
     return names
 
 try:
     field_i18nname_cache.has_key(CFG_SITE_NAME)
 except:
     try:
         field_i18nname_cache = create_field_i18nname_cache()
     except:
         field_i18nname_cache = {}
 
 def browse_pattern(req, colls, p, f, rg, ln=CFG_SITE_LANG):
     """Browse either biliographic phrases or words indexes, and display it."""
 
     # load the right message language
     _ = gettext_set_language(ln)
 
     ## do we search in words indexes?
     if not f:
         return browse_in_bibwords(req, p, f)
 
     ## is p enclosed in quotes? (coming from exact search)
     if p.startswith('"') and p.endswith('"'):
         p = p[1:-1]
 
     p_orig = p
     ## okay, "real browse" follows:
     browsed_phrases = get_nearest_terms_in_bibxxx(p, f, rg, 1)
     while not browsed_phrases:
         # try again and again with shorter and shorter pattern:
         try:
             p = p[:-1]
             browsed_phrases = get_nearest_terms_in_bibxxx(p, f, rg, 1)
         except:
             # probably there are no hits at all:
             req.write(_("No values found."))
             return
 
     ## try to check hits in these particular collection selection:
     browsed_phrases_in_colls = []
     if 0:
         for phrase in browsed_phrases:
             phrase_hitset = HitSet()
             phrase_hitsets = search_pattern("", phrase, f, 'e')
             for coll in colls:
                 phrase_hitset.union_update(phrase_hitsets[coll])
             if len(phrase_hitset) > 0:
                 # okay, this phrase has some hits in colls, so add it:
                 browsed_phrases_in_colls.append([phrase, len(phrase_hitset)])
 
     ## were there hits in collections?
     if browsed_phrases_in_colls == []:
         if browsed_phrases != []:
             #print_warning(req, """<p>No match close to <em>%s</em> found in given collections.
             #Please try different term.<p>Displaying matches in any collection...""" % p_orig)
             ## try to get nbhits for these phrases in any collection:
             for phrase in browsed_phrases:
                 browsed_phrases_in_colls.append([phrase, get_nbhits_in_bibxxx(phrase, f)])
 
     ## display results now:
     out = websearch_templates.tmpl_browse_pattern(
             f=f,
             fn=get_field_i18nname(f, ln),
             ln=ln,
             browsed_phrases_in_colls=browsed_phrases_in_colls,
             colls=colls,
           )
     req.write(out)
     return
 
 def browse_in_bibwords(req, p, f, ln=CFG_SITE_LANG):
     """Browse inside words indexes."""
     if not p:
         return
     _ = gettext_set_language(ln)
 
     urlargd = {}
     urlargd.update(req.argd)
     urlargd['action'] = 'search'
 
     nearest_box = create_nearest_terms_box(urlargd, p, f, 'w', ln=ln, intro_text_p=0)
 
     req.write(websearch_templates.tmpl_search_in_bibwords(
         p = p,
         f = f,
         ln = ln,
         nearest_box = nearest_box
     ))
     return
 
 
 def search_special_fields(bsu_p, bsu_f, bsu_m):
     """Stuff that actually cannot be found from just one record goes here.
        Example: give records that have been cited 200 times: cites=200"""
     if bsu_f == "cites":
         #search.. bsu_p will look like "200" or "0-9" or "5000+"
         numstr = "\""+bsu_p+"\""
         x = get_records_with_num_cites(numstr)
         return HitSet(x)
     return HitSet([])
 
 def search_pattern(req=None, p=None, f=None, m=None, ap=0, of="id", verbose=0, ln=CFG_SITE_LANG):
     """Search for complex pattern 'p' within field 'f' according to
        matching type 'm'.  Return hitset of recIDs.
 
        The function uses multi-stage searching algorithm in case of no
        exact match found.  See the Search Internals document for
        detailed description.
 
        The 'ap' argument governs whether an alternative patterns are to
        be used in case there is no direct hit for (p,f,m).  For
        example, whether to replace non-alphanumeric characters by
        spaces if it would give some hits.  See the Search Internals
        document for detailed description.  (ap=0 forbits the
        alternative pattern usage, ap=1 permits it.)
 
        The 'of' argument governs whether to print or not some
        information to the user in case of no match found.  (Usually it
        prints the information in case of HTML formats, otherwise it's
        silent).
 
        The 'verbose' argument controls the level of debugging information
        to be printed (0=least, 9=most).
 
        All the parameters are assumed to have been previously washed.
 
        This function is suitable as a mid-level API.
     """
 
     _ = gettext_set_language(ln)
 
     hitset_empty = HitSet()
     # sanity check:
     if not p:
         hitset_full = HitSet(trailing_bits=1)
         hitset_full.discard(0)
         # no pattern, so return all universe
         return hitset_full
     # search stage 1: break up arguments into basic search units:
     if verbose and of.startswith("h"):
         t1 = os.times()[4]
     basic_search_units = create_basic_search_units(req, p, f, m, of)
     if verbose and of.startswith("h"):
         t2 = os.times()[4]
         print_warning(req, "Search stage 1: basic search units are: %s" % basic_search_units)
         print_warning(req, "Search stage 1: execution took %.2f seconds." % (t2 - t1))
     # search stage 2: do search for each search unit and verify hit presence:
     if verbose and of.startswith("h"):
         t1 = os.times()[4]
     basic_search_units_hitsets = []
     for idx_unit in range(0, len(basic_search_units)):
         bsu_o, bsu_p, bsu_f, bsu_m = basic_search_units[idx_unit]
         basic_search_unit_hitset = search_unit(bsu_p, bsu_f, bsu_m)
         if not basic_search_unit_hitset:
             #stuff like "search by number of citations" i.e. cites>500 or such goes here
             basic_search_unit_hitset = search_special_fields(bsu_p, bsu_f, bsu_m)
         if verbose >= 9 and of.startswith("h"):
             print_warning(req, "Search stage 1: pattern %s gave hitlist %s" % (bsu_p, list(basic_search_unit_hitset)))
         if len(basic_search_unit_hitset) > 0 or \
            ap==0 or \
            bsu_o=="|" or \
            ((idx_unit+1)<len(basic_search_units) and basic_search_units[idx_unit+1][0]=="|"):
             # stage 2-1: this basic search unit is retained, since
             # either the hitset is non-empty, or the approximate
             # pattern treatment is switched off, or the search unit
             # was joined by an OR operator to preceding/following
             # units so we do not require that it exists
             basic_search_units_hitsets.append(basic_search_unit_hitset)
         else:
             # stage 2-2: no hits found for this search unit, try to replace non-alphanumeric chars inside pattern:
             if re.search(r'[^a-zA-Z0-9\s\:]', bsu_p):
                 if bsu_p.startswith('"') and bsu_p.endswith('"'): # is it ACC query?
                     bsu_pn = re.sub(r'[^a-zA-Z0-9\s\:]+', "*", bsu_p)
                 else: # it is WRD query
                     bsu_pn = re.sub(r'[^a-zA-Z0-9\s\:]+', " ", bsu_p)
                 if verbose and of.startswith('h') and req:
                     print_warning(req, "trying (%s,%s,%s)" % (bsu_pn, bsu_f, bsu_m))
                 basic_search_unit_hitset = search_pattern(req=None, p=bsu_pn, f=bsu_f, m=bsu_m, of="id", ln=ln)
                 if len(basic_search_unit_hitset) > 0:
                     # we retain the new unit instead
                     if of.startswith('h'):
                         print_warning(req, _("No exact match found for %(x_query1)s, using %(x_query2)s instead...") % \
                                       {'x_query1': "<em>" + cgi.escape(bsu_p) + "</em>",
                                        'x_query2': "<em>" + cgi.escape(bsu_pn) + "</em>"})
                     basic_search_units[idx_unit][1] = bsu_pn
                     basic_search_units_hitsets.append(basic_search_unit_hitset)
                 else:
                     # stage 2-3: no hits found either, propose nearest indexed terms:
                     if of.startswith('h'):
                         if req:
                             if bsu_f == "recid":
                                 print_warning(req, "Requested record does not seem to exist.")
                             else:
                                 print_warning(req, create_nearest_terms_box(req.argd, bsu_p, bsu_f, bsu_m, ln=ln))
                     return hitset_empty
             else:
                 # stage 2-3: no hits found either, propose nearest indexed terms:
                 if of.startswith('h'):
                     if req:
                         if bsu_f == "recid":
                             print_warning(req, "Requested record does not seem to exist.")
                         else:
                             print_warning(req, create_nearest_terms_box(req.argd, bsu_p, bsu_f, bsu_m, ln=ln))
                 return hitset_empty
     if verbose and of.startswith("h"):
         t2 = os.times()[4]
         for idx_unit in range(0, len(basic_search_units)):
             print_warning(req, "Search stage 2: basic search unit %s gave %d hits." %
                           (basic_search_units[idx_unit][1:], len(basic_search_units_hitsets[idx_unit])))
         print_warning(req, "Search stage 2: execution took %.2f seconds." % (t2 - t1))
     # search stage 3: apply boolean query for each search unit:
     if verbose and of.startswith("h"):
         t1 = os.times()[4]
     # let the initial set be the complete universe:
     hitset_in_any_collection = HitSet(trailing_bits=1)
     hitset_in_any_collection.discard(0)
     for idx_unit in range(0, len(basic_search_units)):
         this_unit_operation = basic_search_units[idx_unit][0]
         this_unit_hitset = basic_search_units_hitsets[idx_unit]
         if this_unit_operation == '+':
             hitset_in_any_collection.intersection_update(this_unit_hitset)
         elif this_unit_operation == '-':
             hitset_in_any_collection.difference_update(this_unit_hitset)
         elif this_unit_operation == '|':
             hitset_in_any_collection.union_update(this_unit_hitset)
         else:
             if of.startswith("h"):
                 print_warning(req, "Invalid set operation %s." % this_unit_operation, "Error")
     if len(hitset_in_any_collection) == 0:
         # no hits found, propose alternative boolean query:
         if of.startswith('h'):
             nearestterms = []
             for idx_unit in range(0, len(basic_search_units)):
                 bsu_o, bsu_p, bsu_f, bsu_m = basic_search_units[idx_unit]
                 if bsu_p.startswith("%") and bsu_p.endswith("%"):
                     bsu_p = "'" + bsu_p[1:-1] + "'"
                 bsu_nbhits = len(basic_search_units_hitsets[idx_unit])
 
                 # create a similar query, but with the basic search unit only
                 argd = {}
                 argd.update(req.argd)
 
                 argd['p'] = bsu_p
                 argd['f'] = bsu_f
 
                 nearestterms.append((bsu_p, bsu_nbhits, argd))
 
             text = websearch_templates.tmpl_search_no_boolean_hits(
                      ln=ln,  nearestterms=nearestterms)
             print_warning(req, text)
     if verbose and of.startswith("h"):
         t2 = os.times()[4]
         print_warning(req, "Search stage 3: boolean query gave %d hits." % len(hitset_in_any_collection))
         print_warning(req, "Search stage 3: execution took %.2f seconds." % (t2 - t1))
     return hitset_in_any_collection
 
 def search_unit(p, f=None, m=None):
     """Search for basic search unit defined by pattern 'p' and field
        'f' and matching type 'm'.  Return hitset of recIDs.
 
        All the parameters are assumed to have been previously washed.
        'p' is assumed to be already a ``basic search unit'' so that it
        is searched as such and is not broken up in any way.  Only
        wildcard and span queries are being detected inside 'p'.
 
        This function is suitable as a low-level API.
     """
 
     ## create empty output results set:
     set = HitSet()
     if not p: # sanity checking
         return set
     if m == 'a' or m == 'r':
         # we are doing either direct bibxxx search or phrase search or regexp search
         set = search_unit_in_bibxxx(p, f, m)
     else:
         # we are doing bibwords search by default
         set = search_unit_in_bibwords(p, f)
     return set
 
 def search_unit_in_bibwords(word, f, decompress=zlib.decompress):
     """Searches for 'word' inside bibwordsX table for field 'f' and returns hitset of recIDs."""
     set = HitSet() # will hold output result set
     set_used = 0 # not-yet-used flag, to be able to circumvent set operations
     # deduce into which bibwordsX table we will search:
     stemming_language = get_index_stemming_language(get_index_id_from_field("anyfield"))
     bibwordsX = "idxWORD%02dF" % get_index_id_from_field("anyfield")
     if f:
         index_id = get_index_id_from_field(f)
         if index_id:
             bibwordsX = "idxWORD%02dF" % index_id
             stemming_language = get_index_stemming_language(index_id)
         else:
             return HitSet() # word index f does not exist
 
     # wash 'word' argument and run query:
     word = string.replace(word, '*', '%') # we now use '*' as the truncation character
     words = string.split(word, "->", 1) # check for span query
     if len(words) == 2:
         word0 = re_word.sub('', words[0])
         word1 = re_word.sub('', words[1])
         if stemming_language:
             word0 = stem(word0, stemming_language)
             word1 = stem(word1, stemming_language)
         res = run_sql("SELECT term,hitlist FROM %s WHERE term BETWEEN %%s AND %%s" % bibwordsX,
                       (wash_index_term(word0), wash_index_term(word1)))
     else:
         word = re_word.sub('', word)
         if stemming_language:
             word = stem(word, stemming_language)
         if string.find(word, '%') >= 0: # do we have wildcard in the word?
             res = run_sql("SELECT term,hitlist FROM %s WHERE term LIKE %%s" % bibwordsX,
                           (wash_index_term(word),))
         else:
             res = run_sql("SELECT term,hitlist FROM %s WHERE term=%%s" % bibwordsX,
                           (wash_index_term(word),))
     # fill the result set:
     for word, hitlist in res:
         hitset_bibwrd = HitSet(hitlist)
         # add the results:
         if set_used:
             set.union_update(hitset_bibwrd)
         else:
             set = hitset_bibwrd
             set_used = 1
     # okay, return result set:
     return set
 
 def search_unit_in_bibxxx(p, f, type):
     """Searches for pattern 'p' inside bibxxx tables for field 'f' and returns hitset of recIDs found.
     The search type is defined by 'type' (e.g. equals to 'r' for a regexp search)."""
     p_orig = p # saving for eventual future 'no match' reporting
     query_addons = "" # will hold additional SQL code for the query
     query_params = () # will hold parameters for the query (their number may vary depending on TYPE argument)
     # wash arguments:
     f = string.replace(f, '*', '%') # replace truncation char '*' in field definition
     if type == 'r':
         query_addons = "REGEXP %s"
         query_params = (p,)
     else:
         p = string.replace(p, '*', '%') # we now use '*' as the truncation character
         ps = string.split(p, "->", 1) # check for span query:
         if len(ps) == 2:
             query_addons = "BETWEEN %s AND %s"
             query_params = (ps[0], ps[1])
         else:
             if string.find(p, '%') > -1:
                 query_addons = "LIKE %s"
                 query_params = (ps[0],)
             else:
                 query_addons = "= %s"
                 query_params = (ps[0],)
     # construct 'tl' which defines the tag list (MARC tags) to search in:
     tl = []
     if str(f[0]).isdigit() and str(f[1]).isdigit():
         tl.append(f) # 'f' seems to be okay as it starts by two digits
     else:
         # convert old ALEPH tag names, if appropriate: (TODO: get rid of this before entering this function)
         if CFG_WEBSEARCH_FIELDS_CONVERT.has_key(string.lower(f)):
             f = CFG_WEBSEARCH_FIELDS_CONVERT[string.lower(f)]
         # deduce desired MARC tags on the basis of chosen 'f'
         tl = get_field_tags(f)
         if not tl:
             # f index does not exist, nevermind
             pass
     # okay, start search:
     l = [] # will hold list of recID that matched
     for t in tl:
         # deduce into which bibxxx table we will search:
         digit1, digit2 = int(t[0]), int(t[1])
         bx = "bib%d%dx" % (digit1, digit2)
         bibx = "bibrec_bib%d%dx" % (digit1, digit2)
         # construct and run query:
         if t == "001":
             res = run_sql("SELECT id FROM bibrec WHERE id %s" % query_addons,
                           query_params)
         else:
             query = "SELECT bibx.id_bibrec FROM %s AS bx LEFT JOIN %s AS bibx ON bx.id=bibx.id_bibxxx WHERE bx.value %s" % \
                     (bx, bibx, query_addons)
             if len(t) != 6 or t[-1:]=='%':
                 # wildcard query, or only the beginning of field 't'
                 # is defined, so add wildcard character:
                 query += " AND bx.tag LIKE %s"
                 res = run_sql(query, query_params + (t + '%',))
             else:
                 # exact query for 't':
                 query += " AND bx.tag=%s"
                 res = run_sql(query, query_params + (t,))
         # fill the result set:
         for id_bibrec in res:
             if id_bibrec[0]:
                 l.append(id_bibrec[0])
     # check no of hits found:
     nb_hits = len(l)
     # okay, return result set:
     set = HitSet(l)
     return set
 
 def search_unit_in_bibrec(datetext1, datetext2, type='c'):
     """
     Return hitset of recIDs found that were either created or modified
     (according to 'type' arg being 'c' or 'm') from datetext1 until datetext2, inclusive.
     Does not pay attention to pattern, collection, anything.  Useful
     to intersect later on with the 'real' query.
     """
     set = HitSet()
     if type.startswith("m"):
         type = "modification_date"
     else:
         type = "creation_date" # by default we are searching for creation dates
     res = run_sql("SELECT id FROM bibrec WHERE %s>=%%s AND %s<=%%s" % (type, type),
                   (datetext1, datetext2))
     for row in res:
         set += row[0]
     return set
 
 def intersect_results_with_collrecs(req, hitset_in_any_collection, colls, ap=0, of="hb", verbose=0, ln=CFG_SITE_LANG):
     """Return dict of hitsets given by intersection of hitset with the collection universes."""
     _ = gettext_set_language(ln)
 
     # search stage 4: intersect with the collection universe:
     if verbose and of.startswith("h"):
         t1 = os.times()[4]
     results = {}
     results_nbhits = 0
     for coll in colls:
         results[coll] = hitset_in_any_collection & get_collection_reclist(coll)
         results_nbhits += len(results[coll])
     if results_nbhits == 0:
         # no hits found, try to search in Home:
         results_in_Home = hitset_in_any_collection & get_collection_reclist(CFG_SITE_NAME)
         if len(results_in_Home) > 0:
             # some hits found in Home, so propose this search:
             if of.startswith("h"):
                 url = websearch_templates.build_search_url(req.argd, cc=CFG_SITE_NAME, c=[])
                 print_warning(req, _("No match found in collection %(x_collection)s. Other public collections gave %(x_url_open)s%(x_nb_hits)d hits%(x_url_close)s.") %\
                               {'x_collection': '<em>' + string.join([get_coll_i18nname(coll, ln) for coll in colls], ', ') + '</em>',
                                'x_url_open': '<a class="nearestterms" href="%s">' % (url),
                                'x_nb_hits': len(results_in_Home),
                                'x_url_close': '</a>'})
             results = {}
         else:
             # no hits found in Home, recommend different search terms:
             if of.startswith("h"):
                 print_warning(req, _("No public collection matched your query. "
                                      "If you were looking for a non-public document, please choose "
                                      "the desired restricted collection first."))
             results = {}
     if verbose and of.startswith("h"):
         t2 = os.times()[4]
         print_warning(req, "Search stage 4: intersecting with collection universe gave %d hits." % results_nbhits)
         print_warning(req, "Search stage 4: execution took %.2f seconds." % (t2 - t1))
     return results
 
 def intersect_results_with_hitset(req, results, hitset, ap=0, aptext="", of="hb"):
     """Return intersection of search 'results' (a dict of hitsets
        with collection as key) with the 'hitset', i.e. apply
        'hitset' intersection to each collection within search
        'results'.
 
        If the final 'results' set is to be empty, and 'ap'
        (approximate pattern) is true, and then print the `warningtext'
        and return the original 'results' set unchanged.  If 'ap' is
        false, then return empty results set.
     """
     if ap:
         results_ap = copy.deepcopy(results)
     else:
         results_ap = {} # will return empty dict in case of no hits found
     nb_total = 0
     for coll in results.keys():
         results[coll].intersection_update(hitset)
         nb_total += len(results[coll])
     if nb_total == 0:
         if of.startswith("h"):
             print_warning(req, aptext)
         results = results_ap
     return results
 
 def create_similarly_named_authors_link_box(author_name, ln=CFG_SITE_LANG):
     """Return a box similar to ``Not satisfied...'' one by proposing
        author searches for similar names.  Namely, take AUTHOR_NAME
        and the first initial of the firstame (after comma) and look
        into author index whether authors with e.g. middle names exist.
        Useful mainly for CERN Library that sometimes contains name
        forms like Ellis-N, Ellis-Nick, Ellis-Nicolas all denoting the
        same person.  The box isn't proposed if no similarly named
        authors are found to exist.
     """
     # return nothing if not configured:
     if CFG_WEBSEARCH_CREATE_SIMILARLY_NAMED_AUTHORS_LINK_BOX == 0:
         return ""
     # return empty box if there is no initial:
     if re.match(r'[^ ,]+, [^ ]', author_name) is None:
         return ""
     # firstly find name comma initial:
     author_name_to_search = re.sub(r'^([^ ,]+, +[^ ,]).*$', '\\1', author_name)
 
     # secondly search for similar name forms:
     similar_author_names = {}
     for name in author_name_to_search, strip_accents(author_name_to_search):
         for tag in get_field_tags("author"):
             # deduce into which bibxxx table we will search:
             digit1, digit2 = int(tag[0]), int(tag[1])
             bx = "bib%d%dx" % (digit1, digit2)
             bibx = "bibrec_bib%d%dx" % (digit1, digit2)
             if len(tag) != 6 or tag[-1:]=='%':
                 # only the beginning of field 't' is defined, so add wildcard character:
                 res = run_sql("""SELECT bx.value FROM %s AS bx
                                   WHERE bx.value LIKE %%s AND bx.tag LIKE %%s""" % bx,
                               (name + "%", tag + "%"))
             else:
                 res = run_sql("""SELECT bx.value FROM %s AS bx
                                   WHERE bx.value LIKE %%s AND bx.tag=%%s""" % bx,
                               (name + "%", tag))
             for row in res:
                 similar_author_names[row[0]] = 1
     # remove the original name and sort the list:
     try:
         del similar_author_names[author_name]
     except KeyError:
         pass
     # thirdly print the box:
     out = ""
     if similar_author_names:
         out_authors = similar_author_names.keys()
         out_authors.sort()
 
         tmp_authors = []
         for out_author in out_authors:
             nbhits = get_nbhits_in_bibxxx(out_author, "author")
             if nbhits:
                 tmp_authors.append((out_author, nbhits))
         out += websearch_templates.tmpl_similar_author_names(
                  authors=tmp_authors, ln=ln)
 
     return out
 
 def create_nearest_terms_box(urlargd, p, f, t='w', n=5, ln=CFG_SITE_LANG, intro_text_p=True):
     """Return text box containing list of 'n' nearest terms above/below 'p'
        for the field 'f' for matching type 't' (words/phrases) in
        language 'ln'.
        Propose new searches according to `urlargs' with the new words.
        If `intro_text_p' is true, then display the introductory message,
        otherwise print only the nearest terms in the box content.
     """
     # load the right message language
     _ = gettext_set_language(ln)
 
     out = ""
     nearest_terms = []
     if not p: # sanity check
         p = "."
     # look for nearest terms:
     if t == 'w':
         nearest_terms = get_nearest_terms_in_bibwords(p, f, n, n)
         if not nearest_terms:
             return "%s %s." % (_("No words index available for"), get_field_i18nname(f, ln))
     else:
         nearest_terms = get_nearest_terms_in_bibxxx(p, f, n, n)
         if not nearest_terms:
             return "%s %s." % (_("No phrase index available for"), get_field_i18nname(f, ln))
 
     terminfo = []
     for term in nearest_terms:
         if t == 'w':
             hits = get_nbhits_in_bibwords(term, f)
         else:
             hits = get_nbhits_in_bibxxx(term, f)
 
         argd = {}
         argd.update(urlargd)
 
         # check which fields contained the requested parameter, and replace it.
         for (px, fx) in ('p', 'f'), ('p1', 'f1'), ('p2', 'f2'), ('p3', 'f3'):
             if px in argd:
                 if f == argd[fx] or f == "anyfield" or f == "":
                     if string.find(argd[px], p) > -1:
                         argd[px] = string.replace(argd[px], p, term)
                         break
                 else:
                     if string.find(argd[px], f+':'+p) > -1:
                         argd[px] = string.replace(argd[px], f+':'+p, f+':'+term)
                         break
                     elif string.find(argd[px], f+':"'+p+'"') > -1:
                         argd[px] = string.replace(argd[px], f+':"'+p+'"', f+':"'+term+'"')
                         break
 
         terminfo.append((term, hits, argd))
 
     intro = ""
     if intro_text_p: # add full leading introductory text
         if f:
             intro = _("Search term %(x_term)s inside index %(x_index)s did not match any record. Nearest terms in any collection are:") % \
                      {'x_term': "<em>" + cgi.escape(p.startswith("%") and p.endswith("%") and p[1:-1] or p) + "</em>",
                       'x_index': "<em>" + cgi.escape(get_field_i18nname(f, ln)) + "</em>"}
         else:
             intro = _("Search term %s did not match any record. Nearest terms in any collection are:") % \
                      ("<em>" + cgi.escape(p.startswith("%") and p.endswith("%") and p[1:-1] or p) + "</em>")
 
     return websearch_templates.tmpl_nearest_term_box(p=p, ln=ln, f=f, terminfo=terminfo,
                                                      intro=intro)
 
 def get_nearest_terms_in_bibwords(p, f, n_below, n_above):
     """Return list of +n -n nearest terms to word `p' in index for field `f'."""
     nearest_words = [] # will hold the (sorted) list of nearest words to return
     # deduce into which bibwordsX table we will search:
     bibwordsX = "idxWORD%02dF" % get_index_id_from_field("anyfield")
     if f:
         index_id = get_index_id_from_field(f)
         if index_id:
             bibwordsX = "idxWORD%02dF" % index_id
         else:
             return nearest_words
     # firstly try to get `n' closest words above `p':
     res = run_sql("SELECT term FROM %s WHERE term<%%s ORDER BY term DESC LIMIT %%s" % bibwordsX,
                   (p, n_above))
     for row in res:
         nearest_words.append(row[0])
     nearest_words.reverse()
     # secondly insert given word `p':
     nearest_words.append(p)
     # finally try to get `n' closest words below `p':
     res = run_sql("SELECT term FROM %s WHERE term>%%s ORDER BY term ASC LIMIT %%s" % bibwordsX,
                   (p, n_below))
     for row in res:
         nearest_words.append(row[0])
     return nearest_words
 
 def get_nearest_terms_in_bibxxx(p, f, n_below, n_above):
     """Browse (-n_above, +n_below) closest bibliographic phrases
        for the given pattern p in the given field f, regardless
        of collection.
        Return list of [phrase1, phrase2, ... , phrase_n]."""
     ## determine browse field:
     if not f and string.find(p, ":") > 0: # does 'p' contain ':'?
         f, p = string.split(p, ":", 1)
     ## We are going to take max(n_below, n_above) as the number of
     ## values to ferch from bibXXx.  This is needed to work around
     ## MySQL UTF-8 sorting troubles in 4.0.x.  Proper solution is to
     ## use MySQL 4.1.x or our own idxPHRASE in the future.
     n_fetch = 2*max(n_below, n_above)
     ## construct 'tl' which defines the tag list (MARC tags) to search in:
     tl = []
     if str(f[0]).isdigit() and str(f[1]).isdigit():
         tl.append(f) # 'f' seems to be okay as it starts by two digits
     else:
         # deduce desired MARC tags on the basis of chosen 'f'
         tl = get_field_tags(f)
     ## start browsing to fetch list of hits:
     browsed_phrases = {} # will hold {phrase1: 1, phrase2: 1, ..., phraseN: 1} dict of browsed phrases (to make them unique)
     # always add self to the results set:
     browsed_phrases[p.startswith("%") and p.endswith("%") and p[1:-1] or p] = 1
     for t in tl:
         # deduce into which bibxxx table we will search:
         digit1, digit2 = int(t[0]), int(t[1])
         bx = "bib%d%dx" % (digit1, digit2)
         bibx = "bibrec_bib%d%dx" % (digit1, digit2)
         # firstly try to get `n' closest phrases above `p':
         if len(t) != 6 or t[-1:]=='%': # only the beginning of field 't' is defined, so add wildcard character:
             res = run_sql("""SELECT bx.value FROM %s AS bx
                               WHERE bx.value<%%s AND bx.tag LIKE %%s
                               ORDER BY bx.value DESC LIMIT %%s""" % bx,
                           (p, t + "%", n_fetch))
         else:
             res = run_sql("""SELECT bx.value FROM %s AS bx
                               WHERE bx.value<%%s AND bx.tag=%%s
                               ORDER BY bx.value DESC LIMIT %%s""" % bx,
                           (p, t, n_fetch))
         for row in res:
             browsed_phrases[row[0]] = 1
         # secondly try to get `n' closest phrases equal to or below `p':
         if len(t) != 6 or t[-1:]=='%': # only the beginning of field 't' is defined, so add wildcard character:
             res = run_sql("""SELECT bx.value FROM %s AS bx
                               WHERE bx.value>=%%s AND bx.tag LIKE %%s
                               ORDER BY bx.value ASC LIMIT %%s""" % bx,
                           (p, t + "%", n_fetch))
         else:
             res = run_sql("""SELECT bx.value FROM %s AS bx
                               WHERE bx.value>=%%s AND bx.tag=%%s
                               ORDER BY bx.value ASC LIMIT %%s""" % bx,
                           (p, t, n_fetch))
         for row in res:
             browsed_phrases[row[0]] = 1
     # select first n words only: (this is needed as we were searching
     # in many different tables and so aren't sure we have more than n
     # words right; this of course won't be needed when we shall have
     # one ACC table only for given field):
     phrases_out = browsed_phrases.keys()
     phrases_out.sort(lambda x, y: cmp(string.lower(strip_accents(x)),
                                       string.lower(strip_accents(y))))
     # find position of self:
     try:
         idx_p = phrases_out.index(p)
     except:
         idx_p = len(phrases_out)/2
     # return n_above and n_below:
     return phrases_out[max(0, idx_p-n_above):idx_p+n_below]
 
 def get_nbhits_in_bibwords(word, f):
     """Return number of hits for word 'word' inside words index for field 'f'."""
     out = 0
     # deduce into which bibwordsX table we will search:
     bibwordsX = "idxWORD%02dF" % get_index_id_from_field("anyfield")
     if f:
         index_id = get_index_id_from_field(f)
         if index_id:
             bibwordsX = "idxWORD%02dF" % index_id
         else:
             return 0
     if word:
         res = run_sql("SELECT hitlist FROM %s WHERE term=%%s" % bibwordsX,
                       (word,))
         for hitlist in res:
             out += len(HitSet(hitlist[0]))
     return out
 
 def get_nbhits_in_bibxxx(p, f):
     """Return number of hits for word 'word' inside words index for field 'f'."""
     ## determine browse field:
     if not f and string.find(p, ":") > 0: # does 'p' contain ':'?
         f, p = string.split(p, ":", 1)
     ## construct 'tl' which defines the tag list (MARC tags) to search in:
     tl = []
     if str(f[0]).isdigit() and str(f[1]).isdigit():
         tl.append(f) # 'f' seems to be okay as it starts by two digits
     else:
         # deduce desired MARC tags on the basis of chosen 'f'
         tl = get_field_tags(f)
     # start searching:
     recIDs = {} # will hold dict of {recID1: 1, recID2: 1, ..., }  (unique recIDs, therefore)
     for t in tl:
         # deduce into which bibxxx table we will search:
         digit1, digit2 = int(t[0]), int(t[1])
         bx = "bib%d%dx" % (digit1, digit2)
         bibx = "bibrec_bib%d%dx" % (digit1, digit2)
         if len(t) != 6 or t[-1:]=='%': # only the beginning of field 't' is defined, so add wildcard character:
             res = run_sql("""SELECT bibx.id_bibrec FROM %s AS bibx, %s AS bx
                               WHERE bx.value=%%s AND bx.tag LIKE %%s
                                 AND bibx.id_bibxxx=bx.id""" % (bibx, bx),
                           (p, t + "%"))
         else:
             res = run_sql("""SELECT bibx.id_bibrec FROM %s AS bibx, %s AS bx
                               WHERE bx.value=%%s AND bx.tag=%%s
                                 AND bibx.id_bibxxx=bx.id""" % (bibx, bx),
                           (p, t))
         for row in res:
             recIDs[row[0]] = 1
     return len(recIDs)
 
 def get_mysql_recid_from_aleph_sysno(sysno):
     """Returns DB's recID for ALEPH sysno passed in the argument (e.g. "002379334CER").
        Returns None in case of failure."""
     out = None
     res = run_sql("""SELECT bb.id_bibrec FROM bibrec_bib97x AS bb, bib97x AS b
                       WHERE b.value=%s AND b.tag='970__a' AND bb.id_bibxxx=b.id""",
                   (sysno,))
     if res:
         out = res[0][0]
     return out
 
 def guess_primary_collection_of_a_record(recID):
     """Return primary collection name a record recid belongs to, by testing 980 identifier.
        May lead to bad guesses when a collection is defined dynamically bia dbquery.
        In that case, return 'CFG_SITE_NAME'."""
     out = CFG_SITE_NAME
     dbcollids = get_fieldvalues(recID, "980__a")
     if dbcollids:
         dbquery = "collection:" + dbcollids[0]
         res = run_sql("SELECT name FROM collection WHERE dbquery=%s", (dbquery,))
         if res:
             out = res[0][0]
     return out
 
 def get_tag_name(tag_value, prolog="", epilog=""):
     """Return tag name from the known tag value, by looking up the 'tag' table.
        Return empty string in case of failure.
        Example: input='100__%', output=first author'."""
     out = ""
     res = run_sql("SELECT name FROM tag WHERE value=%s", (tag_value,))
     if res:
         out = prolog + res[0][0] + epilog
     return out
 
 def get_fieldcodes():
     """Returns a list of field codes that may have been passed as 'search options' in URL.
        Example: output=['subject','division']."""
     out = []
     res = run_sql("SELECT DISTINCT(code) FROM field")
     for row in res:
         out.append(row[0])
     return out
 
 def get_field_tags(field):
     """Returns a list of MARC tags for the field code 'field'.
        Returns empty list in case of error.
        Example: field='author', output=['100__%','700__%']."""
     out = []
     query = """SELECT t.value FROM tag AS t, field_tag AS ft, field AS f
                 WHERE f.code=%s AND ft.id_field=f.id AND t.id=ft.id_tag
                 ORDER BY ft.score DESC"""
     res = run_sql(query, (field, ))
     for val in res:
         out.append(val[0])
     return out
 
 def get_fieldvalues(recID, tag):
     """Return list of field values for field TAG inside record RECID."""
     out = []
     if tag == "001___":
         # we have asked for recID that is not stored in bibXXx tables
         out.append(str(recID))
     else:
         # we are going to look inside bibXXx tables
         digits = tag[0:2]
         try:
             intdigits = int(digits)
             if intdigits < 0 or intdigits > 99:
                 raise ValueError
         except ValueError:
             # invalid tag value asked for
             return []
         bx = "bib%sx" % digits
         bibx = "bibrec_bib%sx" % digits
         query = "SELECT bx.value FROM %s AS bx, %s AS bibx " \
                 " WHERE bibx.id_bibrec='%s' AND bx.id=bibx.id_bibxxx AND bx.tag LIKE '%s' " \
                 " ORDER BY bibx.field_number, bx.tag ASC" % (bx, bibx, recID, tag)
         res = run_sql(query)
         for row in res:
             out.append(row[0])
     return out
 
 def get_fieldvalues_alephseq_like(recID, tags_in):
     """Return buffer of ALEPH sequential-like textual format with fields found in the list TAGS_IN for record RECID."""
     out = ""
     if type(tags_in) is not list:
         tags_in = [tags_in,]
     if len(tags_in) == 1 and len(tags_in[0]) == 6:
         ## case A: one concrete subfield asked, so print its value if found
         ##         (use with care: can false you if field has multiple occurrences)
         out += string.join(get_fieldvalues(recID, tags_in[0]),"\n")
     else:
         ## case B: print our "text MARC" format; works safely all the time
         # find out which tags to output:
         dict_of_tags_out = {}
         if not tags_in:
             for i in range(0, 10):
                 for j in range(0, 10):
                     dict_of_tags_out["%d%d%%" % (i, j)] = 1
         else:
             for tag in tags_in:
                 if len(tag) == 0:
                     for i in range(0, 10):
                         for j in range(0, 10):
                             dict_of_tags_out["%d%d%%" % (i, j)] = 1
                 elif len(tag) == 1:
                     for j in range(0, 10):
                         dict_of_tags_out["%s%d%%" % (tag, j)] = 1
                 elif len(tag) < 5:
                     dict_of_tags_out["%s%%" % tag] = 1
                 elif tag >= 6:
                     dict_of_tags_out[tag[0:5]] = 1
         tags_out = dict_of_tags_out.keys()
         tags_out.sort()
         # search all bibXXx tables as needed:
         for tag in tags_out:
             digits = tag[0:2]
             try:
                 intdigits = int(digits)
                 if intdigits < 0 or intdigits > 99:
                     raise ValueError
             except ValueError:
                 # invalid tag value asked for
                 continue
             if tag.startswith("001") or tag.startswith("00%"):
                 if out:
                     out += "\n"
                 out += "%09d %s %d" % (recID, "001__", recID)
             bx = "bib%sx" % digits
             bibx = "bibrec_bib%sx" % digits
             query = "SELECT b.tag,b.value,bb.field_number FROM %s AS b, %s AS bb "\
                     "WHERE bb.id_bibrec='%s' AND b.id=bb.id_bibxxx AND b.tag LIKE '%s%%' "\
                     "ORDER BY bb.field_number, b.tag ASC" % (bx, bibx, recID, tag)
             res = run_sql(query)
             # go through fields:
             field_number_old = -999
             field_old = ""
             for row in res:
                 field, value, field_number = row[0], row[1], row[2]
                 ind1, ind2 = field[3], field[4]
                 if ind1 == "_":
                     ind1 = ""
                 if ind2 == "_":
                     ind2 = ""
                 # print field tag
                 if field_number != field_number_old or field[:-1] != field_old[:-1]:
                     if out:
                         out += "\n"
                     out += "%09d %s " % (recID, field[:5])
                     field_number_old = field_number
                     field_old = field
                 # print subfield value
                 if field[0:2] == "00" and field[-1:] == "_":
                     out += value
                 else:
                     out += "$$%s%s" % (field[-1:], value)
     return out
 
 def record_exists(recID):
     """Return 1 if record RECID exists.
        Return 0 if it doesn't exist.
        Return -1 if it exists but is marked as deleted."""
     out = 0
     query = "SELECT id FROM bibrec WHERE id='%s'" % recID
     res = run_sql(query, None, 1)
     if res:
         # record exists; now check whether it isn't marked as deleted:
         dbcollids = get_fieldvalues(recID, "980__%")
         if ("DELETED" in dbcollids) or (CFG_CERN_SITE and "DUMMY" in dbcollids):
             out = -1 # exists, but marked as deleted
         else:
             out = 1 # exists fine
     return out
 
 def record_public_p(recID):
     """Return 1 if the record is public, i.e. if it can be found in the Home collection.
        Return 0 otherwise.
     """
     return recID in get_collection_reclist(CFG_SITE_NAME)
 
 def get_creation_date(recID, fmt="%Y-%m-%d"):
     "Returns the creation date of the record 'recID'."
     out = ""
     res = run_sql("SELECT DATE_FORMAT(creation_date,%s) FROM bibrec WHERE id=%s", (fmt, recID), 1)
     if res:
         out = res[0][0]
     return out
 
 def get_modification_date(recID, fmt="%Y-%m-%d"):
     "Returns the date of last modification for the record 'recID'."
     out = ""
     res = run_sql("SELECT DATE_FORMAT(modification_date,%s) FROM bibrec WHERE id=%s", (fmt, recID), 1)
     if res:
         out = res[0][0]
     return out
 
 def print_warning(req, msg, type='', prologue='<br />', epilogue='<br />'):
     "Prints warning message and flushes output."
     if req and msg:
         req.write(websearch_templates.tmpl_print_warning(
                    msg = msg,
                    type = type,
                    prologue = prologue,
                    epilogue = epilogue,
                  ))
         return
 
 def print_search_info(p, f, sf, so, sp, rm, of, ot, collection=CFG_SITE_NAME, nb_found=-1, jrec=1, rg=10,
                       as=0, ln=CFG_SITE_LANG, p1="", p2="", p3="", f1="", f2="", f3="", m1="", m2="", m3="", op1="", op2="",
                       sc=1, pl_in_url="",
                       d1y=0, d1m=0, d1d=0, d2y=0, d2m=0, d2d=0, dt="",
                       cpu_time=-1, middle_only=0):
     """Prints stripe with the information on 'collection' and 'nb_found' results and CPU time.
        Also, prints navigation links (beg/next/prev/end) inside the results set.
        If middle_only is set to 1, it will only print the middle box information (beg/netx/prev/end/etc) links.
        This is suitable for displaying navigation links at the bottom of the search results page."""
 
     out = ""
 
     # sanity check:
     if jrec < 1:
         jrec = 1
     if jrec > nb_found:
         jrec = max(nb_found-rg+1, 1)
 
     return websearch_templates.tmpl_print_search_info(
              ln = ln,
              collection = collection,
              as = as,
              collection_name = get_coll_i18nname(collection, ln),
              collection_id = get_colID(collection),
              middle_only = middle_only,
              rg = rg,
              nb_found = nb_found,
              sf = sf,
              so = so,
              rm = rm,
              of = of,
              ot = ot,
              p = p,
              f = f,
              p1 = p1,
              p2 = p2,
              p3 = p3,
              f1 = f1,
              f2 = f2,
              f3 = f3,
              m1 = m1,
              m2 = m2,
              m3 = m3,
              op1 = op1,
              op2 = op2,
              pl_in_url = pl_in_url,
              d1y = d1y,
              d1m = d1m,
              d1d = d1d,
              d2y = d2y,
              d2m = d2m,
              d2d = d2d,
              dt = dt,
              jrec = jrec,
              sc = sc,
              sp = sp,
              all_fieldcodes = get_fieldcodes(),
              cpu_time = cpu_time,
            )
 
 def print_results_overview(req, colls, results_final_nb_total, results_final_nb, cpu_time, ln=CFG_SITE_LANG, ec=[]):
     """Prints results overview box with links to particular collections below."""
 
     out = ""
     new_colls = []
     for coll in colls:
         new_colls.append({
                           'id': get_colID(coll),
                           'code': coll,
                           'name': get_coll_i18nname(coll, ln),
                          })
 
     return websearch_templates.tmpl_print_results_overview(
              ln = ln,
              results_final_nb_total = results_final_nb_total,
              results_final_nb = results_final_nb,
              cpu_time = cpu_time,
              colls = new_colls,
              ec = ec,
            )
 
 def sort_records(req, recIDs, sort_field='', sort_order='d', sort_pattern='', verbose=0, of='hb', ln=CFG_SITE_LANG):
     """Sort records in 'recIDs' list according sort field 'sort_field' in order 'sort_order'.
        If more than one instance of 'sort_field' is found for a given record, try to choose that that is given by
        'sort pattern', for example "sort by report number that starts by CERN-PS".
        Note that 'sort_field' can be field code like 'author' or MARC tag like '100__a' directly."""
 
     _ = gettext_set_language(ln)
 
     ## check arguments:
     if not sort_field:
         return recIDs
     if len(recIDs) > CFG_WEBSEARCH_NB_RECORDS_TO_SORT:
         if of.startswith('h'):
             print_warning(req, _("Sorry, sorting is allowed on sets of up to %d records only. Using default sort order.") % CFG_WEBSEARCH_NB_RECORDS_TO_SORT, "Warning")
         return recIDs
 
     sort_fields = string.split(sort_field, ",")
     recIDs_dict = {}
     recIDs_out = []
 
     ## first deduce sorting MARC tag out of the 'sort_field' argument:
     tags = []
     for sort_field in sort_fields:
         if sort_field and str(sort_field[0:2]).isdigit():
             # sort_field starts by two digits, so this is probably a MARC tag already
             tags.append(sort_field)
         else:
             # let us check the 'field' table
             query = """SELECT DISTINCT(t.value) FROM tag AS t, field_tag AS ft, field AS f
                         WHERE f.code='%s' AND ft.id_field=f.id AND t.id=ft.id_tag
                         ORDER BY ft.score DESC""" % sort_field
             res = run_sql(query)
             if res:
                 for row in res:
                     tags.append(row[0])
             else:
                 if of.startswith('h'):
                     print_warning(req, _("Sorry, %s does not seem to be a valid sort option. Choosing title sort instead.") % sort_field, "Error")
                 tags.append("245__a")
     if verbose >= 3:
         print_warning(req, "Sorting by tags %s." % tags)
         if sort_pattern:
             print_warning(req, "Sorting preferentially by %s." % sort_pattern)
 
     ## check if we have sorting tag defined:
     if tags:
         # fetch the necessary field values:
         for recID in recIDs:
             val = "" # will hold value for recID according to which sort
             vals = [] # will hold all values found in sorting tag for recID
             for tag in tags:
                 vals.extend(get_fieldvalues(recID, tag))
             if sort_pattern:
                 # try to pick that tag value that corresponds to sort pattern
                 bingo = 0
                 for v in vals:
                     if v.lower().startswith(sort_pattern.lower()): # bingo!
                         bingo = 1
                         val = v
                         break
                 if not bingo: # sort_pattern not present, so add other vals after spaces
                     val = sort_pattern + "          " + string.join(vals)
             else:
                 # no sort pattern defined, so join them all together
                 val = string.join(vals)
             val = strip_accents(val.lower()) # sort values regardless of accents and case
             if recIDs_dict.has_key(val):
                 recIDs_dict[val].append(recID)
             else:
                 recIDs_dict[val] = [recID]
         # sort them:
         recIDs_dict_keys = recIDs_dict.keys()
         recIDs_dict_keys.sort()
         # now that keys are sorted, create output array:
         for k in recIDs_dict_keys:
             for s in recIDs_dict[k]:
                 recIDs_out.append(s)
         # ascending or descending?
         if sort_order == 'a':
             recIDs_out.reverse()
         # okay, we are done
         return recIDs_out
     else:
         # good, no sort needed
         return recIDs
 
 def print_records(req, recIDs, jrec=1, rg=10, format='hb', ot='', ln=CFG_SITE_LANG, relevances=[], relevances_prologue="(", relevances_epilogue="%%)", decompress=zlib.decompress, search_pattern='', print_records_prologue_p=True, print_records_epilogue_p=True, verbose=0, tab=''):
 
     """
     Prints list of records 'recIDs' formatted according to 'format' in
     groups of 'rg' starting from 'jrec'.
 
     Assumes that the input list 'recIDs' is sorted in reverse order,
     so it counts records from tail to head.
 
     A value of 'rg=-9999' means to print all records: to be used with care.
 
     Print also list of RELEVANCES for each record (if defined), in
     between RELEVANCE_PROLOGUE and RELEVANCE_EPILOGUE.
 
     Print prologue and/or epilogue specific to 'format' if
     'print_records_prologue_p' and/or print_records_epilogue_p' are
     True.
     """
 
     # load the right message language
     _ = gettext_set_language(ln)
 
     # sanity checking:
     if req is None:
         return
 
     # get user_info (for formatting based on user)
     user_info = collect_user_info(req)
 
     if len(recIDs):
         nb_found = len(recIDs)
 
         if rg == -9999: # print all records
             rg = nb_found
         else:
             rg = abs(rg)
         if jrec < 1: # sanity checks
             jrec = 1
         if jrec > nb_found:
             jrec = max(nb_found-rg+1, 1)
 
         # will print records from irec_max to irec_min excluded:
         irec_max = nb_found - jrec
         irec_min = nb_found - jrec - rg
         if irec_min < 0:
             irec_min = -1
         if irec_max >= nb_found:
             irec_max = nb_found - 1
 
         #req.write("%s:%d-%d" % (recIDs, irec_min, irec_max))
 
         if format.startswith('x'):
 
             # print header if needed
             if print_records_prologue_p:
                 print_records_prologue(req, format)
 
             # print records
             recIDs_to_print = [recIDs[x] for x in range(irec_max, irec_min, -1)]
             format_records(recIDs_to_print,
                            format,
                            ln=ln,
                            search_pattern=search_pattern,
                            record_separator="\n",
                            user_info=user_info,
                            req=req)
             # print footer if needed
             if print_records_epilogue_p:
                 print_records_epilogue(req, format)
 
         elif format.startswith('t') or str(format[0:3]).isdigit():
             # we are doing plain text output:
             for irec in range(irec_max, irec_min, -1):
                 x = print_record(recIDs[irec], format, ot, ln, search_pattern=search_pattern,
                                  user_info=user_info, verbose=verbose)
                 req.write(x)
                 if x:
                     req.write('\n')
         elif format == 'excel':
             recIDs_to_print = [recIDs[x] for x in range(irec_max, irec_min, -1)]
             create_excel(recIDs=recIDs_to_print, req=req, ln=ln)
         else:
             # we are doing HTML output:
             if format == 'hp' or format.startswith("hb_") or format.startswith("hd_"):
                 # portfolio and on-the-fly formats:
                 for irec in range(irec_max, irec_min, -1):
                     req.write(print_record(recIDs[irec], format, ot, ln, search_pattern=search_pattern,
                                            user_info=user_info, verbose=verbose))
             elif format.startswith("hb"):
                 # HTML brief format:
                 req.write(websearch_templates.tmpl_record_format_htmlbrief_header(
                     ln = ln))
                 for irec in range(irec_max, irec_min, -1):
                     row_number = jrec+irec_max-irec
                     recid = recIDs[irec]
                     if relevances and relevances[irec]:
                         relevance = relevances[irec]
                     else:
                         relevance = ''
                     record = print_record(recIDs[irec], format, ot, ln, search_pattern=search_pattern,
                                                   user_info=user_info, verbose=verbose)
 
                     req.write(websearch_templates.tmpl_record_format_htmlbrief_body(
                         ln = ln,
                         recid = recid,
                         row_number = row_number,
                         relevance = relevance,
                         record = record,
                         relevances_prologue = relevances_prologue,
                         relevances_epilogue = relevances_epilogue,
                         ))
                 req.write(websearch_templates.tmpl_record_format_htmlbrief_footer(
                     ln = ln))
 
             elif format.startswith("hd"):
                 # HTML detailed format:
                 for irec in range(irec_max, irec_min, -1):
                     unordered_tabs = get_detailed_page_tabs(get_colID(guess_primary_collection_of_a_record(recIDs[irec])),
                                                             recIDs[irec], ln=ln)
                     ordered_tabs_id = [(tab_id, values['order']) for (tab_id, values) in unordered_tabs.iteritems()]
                     ordered_tabs_id.sort(lambda x,y: cmp(x[1],y[1]))
                     link_ln = ''
                     if ln != CFG_SITE_LANG:
                         link_ln = '?ln=%s' % ln
                     tabs = [(unordered_tabs[tab_id]['label'], \
                              '%s/record/%s/%s%s' % (CFG_SITE_URL, recIDs[irec], tab_id, link_ln), \
                              tab_id == tab,
                              unordered_tabs[tab_id]['enabled']) \
                             for (tab_id, order) in ordered_tabs_id
                             if unordered_tabs[tab_id]['visible'] == True]
 
                     content = ''
                     # load content
                     if tab == 'usage':
                         r = calculate_reading_similarity_list(recIDs[irec], "downloads")
                         downloadsimilarity = None
                         downloadhistory = None
                         #if r:
                         #    downloadsimilarity = r
                         if CFG_BIBRANK_SHOW_DOWNLOAD_GRAPHS:
                             downloadhistory = create_download_history_graph_and_box(recIDs[irec], ln)
 
                         r = calculate_reading_similarity_list(recIDs[irec], "pageviews")
                         viewsimilarity = None
                         if r: viewsimilarity = r
                         content = websearch_templates.tmpl_detailed_record_statistics(recIDs[irec],
                                                                                       ln,
                                                                                       downloadsimilarity=downloadsimilarity,
                                                                                       downloadhistory=downloadhistory,
                                                                                       viewsimilarity=viewsimilarity)
                         req.write(webstyle_templates.detailed_record_container(content,
                                                                                recIDs[irec],
                                                                                tabs,
                                                                                ln))
                     elif tab == 'citations':
                         citinglist = []
                         citationhistory = None
                         recid = recIDs[irec]
                         selfcited = get_self_cited_by(recid)
                         r = calculate_cited_by_list(recid)
                         if r:
                             citinglist = r
                             citationhistory = create_citation_history_graph_and_box(recid, ln)
 
                         r = calculate_co_cited_with_list(recid)
                         cociting = None
                         if r:
                             cociting = r
 
                         content = websearch_templates.tmpl_detailed_record_citations(recid,
                                                                                      ln,
                                                                                      citinglist=citinglist,
                                                                                      citationhistory=citationhistory,
                                                                                      cociting=cociting,
                                              selfcited=selfcited)
                         req.write(webstyle_templates.detailed_record_container(content,
                                                                                recid,
                                                                                tabs,
                                                                                ln))
                     elif tab == 'references':
                         content = format_record(recIDs[irec], 'HDREF', ln=ln, user_info=user_info, verbose=verbose)
                         req.write(webstyle_templates.detailed_record_container(content,
                                                                                recIDs[irec],
                                                                                tabs,
                                                                                ln))
                     else:
                         # Metadata tab
                         content = print_record(recIDs[irec], format, ot, ln,
                                                search_pattern=search_pattern,
                                                user_info=user_info, verbose=verbose)
 
                         creationdate = None
                         modificationdate = None
                         if record_exists(recIDs[irec]) == 1:
                             creationdate = get_creation_date(recIDs[irec])
                             modificationdate = get_modification_date(recIDs[irec])
 
                         content = websearch_templates.tmpl_detailed_record_metadata(
                             recID = recIDs[irec],
                             ln = ln,
                             format = format,
                             creationdate = creationdate,
                             modificationdate = modificationdate,
                             content = content)
 
                         req.write(webstyle_templates.detailed_record_container(content,
                                                                                recIDs[irec],
                                                                                tabs,
                                                                                ln=ln,
                                                                                creationdate=creationdate,
                                                                                modificationdate=modificationdate,
                                                                                show_short_rec_p=False))
 
                         if len(tabs) > 0:
                             # Add the mini box at bottom of the page
                             if CFG_WEBCOMMENT_ALLOW_REVIEWS:
                                 from invenio.webcomment import get_mini_reviews
                                 reviews = get_mini_reviews(recid = recIDs[irec], ln=ln)
                             else:
                                 reviews = ''
                             actions = format_record(recIDs[irec], 'HDACT', ln=ln, user_info=user_info, verbose=verbose)
                             files = format_record(recIDs[irec], 'HDFILE', ln=ln, user_info=user_info, verbose=verbose)
                             req.write(webstyle_templates.detailed_record_mini_panel(recIDs[irec],
                                                                                     ln,
                                                                                     format,
                                                                                     files=files,
                                                                                     reviews=reviews,
                                                                                     actions=actions))
             else:
                 # Other formats
                 for irec in range(irec_max, irec_min, -1):
                     req.write(print_record(recIDs[irec], format, ot, ln,
                                            search_pattern=search_pattern,
                                            user_info=user_info, verbose=verbose))
 
     else:
         print_warning(req, _("Use different search terms."))
 
 def print_records_prologue(req, format):
     """
     Print the appropriate prologue for list of records in the given
     format.
     """
     prologue = "" # no prologue needed for HTML or Text formats
     if format.startswith('xm'):
         prologue = websearch_templates.tmpl_xml_marc_prologue()
     elif format.startswith('xn'):
         prologue = websearch_templates.tmpl_xml_nlm_prologue()
     elif format.startswith('xr'):
         prologue = websearch_templates.tmpl_xml_rss_prologue()
     elif format.startswith('x'):
         prologue = websearch_templates.tmpl_xml_default_prologue()
     req.write(prologue)
 
 def print_records_epilogue(req, format):
     """
     Print the appropriate epilogue for list of records in the given
     format.
     """
     epilogue = "" # no epilogue needed for HTML or Text formats
     if format.startswith('xm'):
         epilogue = websearch_templates.tmpl_xml_marc_epilogue()
     elif format.startswith('xn'):
         epilogue = websearch_templates.tmpl_xml_nlm_epilogue()
     elif format.startswith('xr'):
         epilogue = websearch_templates.tmpl_xml_rss_epilogue()
     elif format.startswith('x'):
         epilogue = websearch_templates.tmpl_xml_default_epilogue()
     req.write(epilogue)
 
 def print_record(recID, format='hb', ot='', ln=CFG_SITE_LANG, decompress=zlib.decompress,
                  search_pattern=None, user_info=None, verbose=0):
     """Prints record 'recID' formatted accoding to 'format'."""
 
     _ = gettext_set_language(ln)
 
     out = ""
 
     # sanity check:
     record_exist_p = record_exists(recID)
     if record_exist_p == 0: # doesn't exist
         return out
 
     # New Python BibFormat procedure for formatting
     # Old procedure follows further below
     # We must still check some special formats, but these
     # should disappear when BibFormat improves.
     if not (CFG_BIBFORMAT_USE_OLD_BIBFORMAT \
             or format.lower().startswith('t') \
             or format.lower().startswith('hm') \
             or str(format[0:3]).isdigit() \
             or ot):
 
         # Unspecified format is hd
         if format == '':
             format = 'hd'
 
         if record_exist_p == -1 and get_output_format_content_type(format) == 'text/html':
             # HTML output displays a default value for deleted records.
             # Other format have to deal with it.
             out += _("The record has been deleted.")
         else:
             out += call_bibformat(recID, format, ln, search_pattern=search_pattern,
                                   user_info=user_info, verbose=verbose)
 
             # at the end of HTML brief mode, print the "Detailed record" functionality:
             if format.lower().startswith('hb') and \
                    format.lower() != 'hb_p':
                 out += websearch_templates.tmpl_print_record_brief_links(
                     ln = ln,
                     recID = recID,
                     )
         return out
 
     # Old PHP BibFormat procedure for formatting
     # print record opening tags, if needed:
     if format == "marcxml" or format == "oai_dc":
         out += "  <record>\n"
         out += "   <header>\n"
         for oai_id in get_fieldvalues(recID, CFG_OAI_ID_FIELD):
             out += "    <identifier>%s</identifier>\n" % oai_id
         out += "    <datestamp>%s</datestamp>\n" % get_modification_date(recID)
         out += "   </header>\n"
         out += "   <metadata>\n"
 
     if format.startswith("xm") or format == "marcxml":
         # look for detailed format existence:
         query = "SELECT value FROM bibfmt WHERE id_bibrec='%s' AND format='%s'" % (recID, format)
         res = run_sql(query, None, 1)
         if res and record_exist_p == 1:
             # record 'recID' is formatted in 'format', so print it
             out += "%s" % decompress(res[0][0])
         else:
             # record 'recID' is not formatted in 'format' -- they are not in "bibfmt" table; so fetch all the data from "bibXXx" tables:
             if format == "marcxml":
                 out += """    <record xmlns="http://www.loc.gov/MARC21/slim">\n"""
                 out += "        <controlfield tag=\"001\">%d</controlfield>\n" % int(recID)
             elif format.startswith("xm"):
                 out += """    <record>\n"""
                 out += "        <controlfield tag=\"001\">%d</controlfield>\n" % int(recID)
             if record_exist_p == -1:
                 # deleted record, so display only OAI ID and 980:
                 oai_ids = get_fieldvalues(recID, CFG_OAI_ID_FIELD)
                 if oai_ids:
                     out += "<datafield tag=\"%s\" ind1=\"%s\" ind2=\"%s\"><subfield code=\"%s\">%s</subfield></datafield>\n" % \
                            (CFG_OAI_ID_FIELD[0:3], CFG_OAI_ID_FIELD[3:4], CFG_OAI_ID_FIELD[4:5], CFG_OAI_ID_FIELD[5:6], oai_ids[0])
                 out += "<datafield tag=\"980\" ind1=\"\" ind2=\"\"><subfield code=\"c\">DELETED</subfield></datafield>\n"
             else:
                 # controlfields
                 query = "SELECT b.tag,b.value,bb.field_number FROM bib00x AS b, bibrec_bib00x AS bb "\
                         "WHERE bb.id_bibrec='%s' AND b.id=bb.id_bibxxx AND b.tag LIKE '00%%' "\
                         "ORDER BY bb.field_number, b.tag ASC" % recID
                 res = run_sql(query)
                 for row in res:
                     field, value = row[0], row[1]
                     value = encode_for_xml(value)
                     out += """        <controlfield tag="%s" >%s</controlfield>\n""" % \
                            (encode_for_xml(field[0:3]), value)
                 # datafields
                 i = 1 # Do not process bib00x and bibrec_bib00x, as
                       # they are controlfields. So start at bib01x and
                       # bibrec_bib00x (and set i = 0 at the end of
                       # first loop)
                 for digit1 in range(0, 10):
                     for digit2 in range(i, 10):
                         bx = "bib%d%dx" % (digit1, digit2)
                         bibx = "bibrec_bib%d%dx" % (digit1, digit2)
                         query = "SELECT b.tag,b.value,bb.field_number FROM %s AS b, %s AS bb "\
                                 "WHERE bb.id_bibrec='%s' AND b.id=bb.id_bibxxx AND b.tag LIKE '%s%%' "\
                                 "ORDER BY bb.field_number, b.tag ASC" % (bx, bibx, recID, str(digit1)+str(digit2))
                         res = run_sql(query)
                         field_number_old = -999
                         field_old = ""
                         for row in res:
                             field, value, field_number = row[0], row[1], row[2]
                             ind1, ind2 = field[3], field[4]
                             if ind1 == "_" or ind1 == "":
                                 ind1 = " "
                             if ind2 == "_" or ind2 == "":
                                 ind2 = " "
                             # print field tag
                             if field_number != field_number_old or field[:-1] != field_old[:-1]:
                                 if field_number_old != -999:
                                     out += """        </datafield>\n"""
                                 out += """        <datafield tag="%s" ind1="%s" ind2="%s">\n""" % \
                                            (encode_for_xml(field[0:3]), encode_for_xml(ind1), encode_for_xml(ind2))
                                 field_number_old = field_number
                                 field_old = field
                             # print subfield value
                             value = encode_for_xml(value)
                             out += """            <subfield code="%s">%s</subfield>\n""" % \
                                    (encode_for_xml(field[-1:]), value)
 
                         # all fields/subfields printed in this run, so close the tag:
                         if field_number_old != -999:
                             out += """        </datafield>\n"""
                     i = 0 # Next loop should start looking at bib%0 and bibrec_bib00x
             # we are at the end of printing the record:
             out += "    </record>\n"
 
     elif format == "xd" or format == "oai_dc":
         # XML Dublin Core format, possibly OAI -- select only some bibXXx fields:
         out += """    <dc xmlns="http://purl.org/dc/elements/1.1/"
                          xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
                          xsi:schemaLocation="http://purl.org/dc/elements/1.1/
                                              http://www.openarchives.org/OAI/1.1/dc.xsd">\n"""
         if record_exist_p == -1:
             out += ""
         else:
             for f in get_fieldvalues(recID, "041__a"):
                 out += "        <language>%s</language>\n" % f
 
             for f in get_fieldvalues(recID, "100__a"):
                 out += "        <creator>%s</creator>\n" % encode_for_xml(f)
 
             for f in get_fieldvalues(recID, "700__a"):
                 out += "        <creator>%s</creator>\n" % encode_for_xml(f)
 
             for f in get_fieldvalues(recID, "245__a"):
                 out += "        <title>%s</title>\n" % encode_for_xml(f)
 
             for f in get_fieldvalues(recID, "65017a"):
                 out += "        <subject>%s</subject>\n" % encode_for_xml(f)
 
             for f in get_fieldvalues(recID, "8564_u"):
                 out += "        <identifier>%s</identifier>\n" % encode_for_xml(f)
 
             for f in get_fieldvalues(recID, "520__a"):
                 out += "        <description>%s</description>\n" % encode_for_xml(f)
 
             out += "        <date>%s</date>\n" % get_creation_date(recID)
         out += "    </dc>\n"
 
     elif str(format[0:3]).isdigit():
         # user has asked to print some fields only
         if format == "001":
             out += "<!--%s-begin-->%s<!--%s-end-->\n" % (format, recID, format)
         else:
             vals = get_fieldvalues(recID, format)
             for val in vals:
                 out += "<!--%s-begin-->%s<!--%s-end-->\n" % (format, val, format)
 
     elif format.startswith('t'):
         ## user directly asked for some tags to be displayed only
         if record_exist_p == -1:
             out += get_fieldvalues_alephseq_like(recID, ["001", CFG_OAI_ID_FIELD, "980"])
         else:
             out += get_fieldvalues_alephseq_like(recID, ot)
 
     elif format == "hm":
         if record_exist_p == -1:
             out += "<pre>" + cgi.escape(get_fieldvalues_alephseq_like(recID, ["001", CFG_OAI_ID_FIELD, "980"])) + "</pre>"
         else:
             out += "<pre>" + cgi.escape(get_fieldvalues_alephseq_like(recID, ot)) + "</pre>"
 
     elif format.startswith("h") and ot:
         ## user directly asked for some tags to be displayed only
         if record_exist_p == -1:
             out += "<pre>" + get_fieldvalues_alephseq_like(recID, ["001", CFG_OAI_ID_FIELD, "980"]) + "</pre>"
         else:
             out += "<pre>" + get_fieldvalues_alephseq_like(recID, ot) + "</pre>"
 
     elif format == "hd":
         # HTML detailed format
         if record_exist_p == -1:
             out += _("The record has been deleted.")
         else:
             # look for detailed format existence:
             query = "SELECT value FROM bibfmt WHERE id_bibrec='%s' AND format='%s'" % (recID, format)
             res = run_sql(query, None, 1)
             if res:
                 # record 'recID' is formatted in 'format', so print it
                 out += "%s" % decompress(res[0][0])
             else:
                 # record 'recID' is not formatted in 'format', so try to call BibFormat on the fly or use default format:
                 out_record_in_format = call_bibformat(recID, format, ln, search_pattern=search_pattern,
                                                       user_info=user_info, verbose=verbose)
                 if out_record_in_format:
                     out += out_record_in_format
                 else:
                     out += websearch_templates.tmpl_print_record_detailed(
                              ln = ln,
                              recID = recID,
                            )
 
     elif format.startswith("hb_") or format.startswith("hd_"):
         # underscore means that HTML brief/detailed formats should be called on-the-fly; suitable for testing formats
         if record_exist_p == -1:
             out += _("The record has been deleted.")
         else:
             out += call_bibformat(recID, format, ln, search_pattern=search_pattern,
                                   user_info=user_info, verbose=verbose)
 
     elif format.startswith("hx"):
         # BibTeX format, called on the fly:
         if record_exist_p == -1:
             out += _("The record has been deleted.")
         else:
             out += call_bibformat(recID, format, ln, search_pattern=search_pattern,
                                   user_info=user_info, verbose=verbose)
 
     elif format.startswith("hs"):
         # for citation/download similarity navigation links:
         if record_exist_p == -1:
             out += _("The record has been deleted.")
         else:
             out += '<a href="%s">' % websearch_templates.build_search_url(recid=recID, ln=ln)
             # firstly, title:
             titles = get_fieldvalues(recID, "245__a")
             if titles:
                 for title in titles:
                     out += "<strong>%s</strong>" % title
             else:
                 # usual title not found, try conference title:
                 titles = get_fieldvalues(recID, "111__a")
                 if titles:
                     for title in titles:
                         out += "<strong>%s</strong>" % title
                 else:
                     # just print record ID:
                     out += "<strong>%s %d</strong>" % (get_field_i18nname("record ID", ln), recID)
             out += "</a>"
             # secondly, authors:
             authors = get_fieldvalues(recID, "100__a") + get_fieldvalues(recID, "700__a")
             if authors:
                 out += " - %s" % authors[0]
                 if len(authors) > 1:
                     out += " <em>et al</em>"
             # thirdly publication info:
             publinfos = get_fieldvalues(recID, "773__s")
             if not publinfos:
                 publinfos = get_fieldvalues(recID, "909C4s")
                 if not publinfos:
                     publinfos = get_fieldvalues(recID, "037__a")
                     if not publinfos:
                         publinfos = get_fieldvalues(recID, "088__a")
             if publinfos:
                 out += " - %s" % publinfos[0]
             else:
                 # fourthly publication year (if not publication info):
                 years = get_fieldvalues(recID, "773__y")
                 if not years:
                     years = get_fieldvalues(recID, "909C4y")
                     if not years:
                         years = get_fieldvalues(recID, "260__c")
                 if years:
                     out += " (%s)" % years[0]
     else:
         # HTML brief format by default
         if record_exist_p == -1:
             out += _("The record has been deleted.")
         else:
             query = "SELECT value FROM bibfmt WHERE id_bibrec='%s' AND format='%s'" % (recID, format)
             res = run_sql(query)
             if res:
                 # record 'recID' is formatted in 'format', so print it
                 out += "%s" % decompress(res[0][0])
             else:
                 # record 'recID' is not formatted in 'format', so try to call BibFormat on the fly: or use default format:
                 if CFG_WEBSEARCH_CALL_BIBFORMAT:
                     out_record_in_format = call_bibformat(recID, format, ln, search_pattern=search_pattern,
                                                           user_info=user_info, verbose=verbose)
                     if out_record_in_format:
                         out += out_record_in_format
                     else:
                         out += websearch_templates.tmpl_print_record_brief(
                                  ln = ln,
                                  recID = recID,
                                )
                 else:
                     out += websearch_templates.tmpl_print_record_brief(
                              ln = ln,
                              recID = recID,
                            )
 
             # at the end of HTML brief mode, print the "Detailed record" functionality:
             if format == 'hp' or format.startswith("hb_") or format.startswith("hd_"):
                 pass # do nothing for portfolio and on-the-fly formats
             else:
                 out += websearch_templates.tmpl_print_record_brief_links(
                          ln = ln,
                          recID = recID,
                        )
 
     # print record closing tags, if needed:
     if format == "marcxml" or format == "oai_dc":
         out += "   </metadata>\n"
         out += "  </record>\n"
 
     return out
 
 def encode_for_xml(s):
     "Encode special chars in string so that it would be XML-compliant."
     s = string.replace(s, '&', '&amp;')
     s = string.replace(s, '<', '&lt;')
     return s
 
 def call_bibformat(recID, format="HD", ln=CFG_SITE_LANG, search_pattern=None, user_info=None, verbose=0):
     """
     Calls BibFormat and returns formatted record.
 
     BibFormat will decide by itself if old or new BibFormat must be used.
     """
 
     keywords = []
     if search_pattern is not None:
         units = create_basic_search_units(None, str(search_pattern), None)
         keywords = [unit[1] for unit in units if unit[0] != '-']
 
     return format_record(recID,
                          of=format,
                          ln=ln,
                          search_pattern=keywords,
                          user_info=user_info,
                          verbose=verbose)
 
 def log_query(hostname, query_args, uid=-1):
     """
     Log query into the query and user_query tables.
     Return id_query or None in case of problems.
     """
     id_query = None
     if uid > 0:
         # log the query only if uid is reasonable
         res = run_sql("SELECT id FROM query WHERE urlargs=%s", (query_args,), 1)
         try:
             id_query = res[0][0]
         except:
             id_query = run_sql("INSERT INTO query (type, urlargs) VALUES ('r', %s)", (query_args,))
         if id_query:
             run_sql("INSERT INTO user_query (id_user, id_query, hostname, date) VALUES (%s, %s, %s, %s)",
                     (uid, id_query, hostname,
                      time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())))
     return id_query
 
 def log_query_info(action, p, f, colls, nb_records_found_total=-1):
     """Write some info to the log file for later analysis."""
     try:
         log = open(CFG_LOGDIR + "/search.log", "a")
         log.write(time.strftime("%Y%m%d%H%M%S#", time.localtime()))
         log.write(action+"#")
         log.write(p+"#")
         log.write(f+"#")
         for coll in colls[:-1]:
             log.write("%s," % coll)
         log.write("%s#" % colls[-1])
         log.write("%d" % nb_records_found_total)
         log.write("\n")
         log.close()
     except:
         pass
     return
 
 def wash_url_argument(var, new_type):
     """Wash list argument into 'new_type', that can be 'list',
        'str', or 'int'.  Useful for washing mod_python passed
        arguments, that are all lists of strings (URL args may be
        multiple), but we sometimes want only to take the first value,
        and sometimes to represent it as string or numerical value."""
     out = []
     if new_type == 'list':  # return lst
         if type(var) is list:
             out = var
         else:
             out = [var]
     elif new_type == 'str':  # return str
         if type(var) is list:
             try:
                 out = "%s" % var[0]
             except:
                 out = ""
         elif type(var) is str:
             out = var
         else:
             out = "%s" % var
     elif new_type == 'int': # return int
         if type(var) is list:
             try:
                 out = string.atoi(var[0])
             except:
                 out = 0
         elif type(var) is int:
             out = var
         elif type(var) is str:
             try:
                 out = string.atoi(var)
             except:
                 out = 0
         else:
             out = 0
     return out
 
 ### CALLABLES
 
 def perform_request_search(req=None, cc=CFG_SITE_NAME, c=None, p="", f="", rg=10, sf="", so="d", sp="", rm="", of="id", ot="", as=0,
                            p1="", f1="", m1="", op1="", p2="", f2="", m2="", op2="", p3="", f3="", m3="", sc=0, jrec=0,
                            recid=-1, recidb=-1, sysno="", id=-1, idb=-1, sysnb="", action="", d1="",
                            d1y=0, d1m=0, d1d=0, d2="", d2y=0, d2m=0, d2d=0, dt="", verbose=0, ap=0, ln=CFG_SITE_LANG, ec=None, tab=""):
     """Perform search or browse request, without checking for
        authentication.  Return list of recIDs found, if of=id.
        Otherwise create web page.
 
        The arguments are as follows:
 
          req - mod_python Request class instance.
 
           cc - current collection (e.g. "ATLAS").  The collection the
                user started to search/browse from.
 
            c - collection list (e.g. ["Theses", "Books"]).  The
                collections user may have selected/deselected when
                starting to search from 'cc'.
 
            p - pattern to search for (e.g. "ellis and muon or kaon").
 
            f - field to search within (e.g. "author").
 
           rg - records in groups of (e.g. "10").  Defines how many hits
                per collection in the search results page are
                displayed.
 
           sf - sort field (e.g. "title").
 
           so - sort order ("a"=ascending, "d"=descending).
 
           sp - sort pattern (e.g. "CERN-") -- in case there are more
                values in a sort field, this argument tells which one
                to prefer
 
           rm - ranking method (e.g. "jif").  Defines whether results
                should be ranked by some known ranking method.
 
           of - output format (e.g. "hb").  Usually starting "h" means
                HTML output (and "hb" for HTML brief, "hd" for HTML
                detailed), "x" means XML output, "t" means plain text
                output, "id" means no output at all but to return list
                of recIDs found.  (Suitable for high-level API.)
 
           ot - output only these MARC tags (e.g. "100,700,909C0b").
                Useful if only some fields are to be shown in the
                output, e.g. for library to control some fields.
 
           as - advanced search ("0" means no, "1" means yes).  Whether
                search was called from within the advanced search
                interface.
 
           p1 - first pattern to search for in the advanced search
                interface.  Much like 'p'.
 
           f1 - first field to search within in the advanced search
                interface.  Much like 'f'.
 
           m1 - first matching type in the advanced search interface.
                ("a" all of the words, "o" any of the words, "e" exact
                phrase, "p" partial phrase, "r" regular expression).
 
          op1 - first operator, to join the first and the second unit
                in the advanced search interface.  ("a" add, "o" or,
                "n" not).
 
           p2 - second pattern to search for in the advanced search
                interface.  Much like 'p'.
 
           f2 - second field to search within in the advanced search
                interface.  Much like 'f'.
 
           m2 - second matching type in the advanced search interface.
                ("a" all of the words, "o" any of the words, "e" exact
                phrase, "p" partial phrase, "r" regular expression).
 
          op2 - second operator, to join the second and the third unit
                in the advanced search interface.  ("a" add, "o" or,
                "n" not).
 
           p3 - third pattern to search for in the advanced search
                interface.  Much like 'p'.
 
           f3 - third field to search within in the advanced search
                interface.  Much like 'f'.
 
           m3 - third matching type in the advanced search interface.
                ("a" all of the words, "o" any of the words, "e" exact
                phrase, "p" partial phrase, "r" regular expression).
 
           sc - split by collection ("0" no, "1" yes).  Governs whether
                we want to present the results in a single huge list,
                or splitted by collection.
 
         jrec - jump to record (e.g. "234").  Used for navigation
                inside the search results.
 
        recid - display record ID (e.g. "20000").  Do not
                search/browse but go straight away to the Detailed
                record page for the given recID.
 
       recidb - display record ID bis (e.g. "20010").  If greater than
                'recid', then display records from recid to recidb.
                Useful for example for dumping records from the
                database for reformatting.
 
        sysno - display old system SYS number (e.g. "").  If you
                migrate to CDS Invenio from another system, and store your
                old SYS call numbers, you can use them instead of recid
                if you wish so.
 
           id - the same as recid, in case recid is not set.  For
                backwards compatibility.
 
          idb - the same as recid, in case recidb is not set.  For
                backwards compatibility.
 
        sysnb - the same as sysno, in case sysno is not set.  For
                backwards compatibility.
 
       action - action to do.  "SEARCH" for searching, "Browse" for
                browsing.  Default is to search.
 
           d1 - first datetime in full YYYY-mm-dd HH:MM:DD format
                (e.g. "1998-08-23 12:34:56"). Useful for search limits
                on creation/modification date (see 'dt' argument
                below).  Note that 'd1' takes precedence over d1y, d1m,
                d1d if these are defined.
 
          d1y - first date's year (e.g. "1998").  Useful for search
                limits on creation/modification date.
 
          d1m - first date's month (e.g. "08").  Useful for search
                limits on creation/modification date.
 
          d1d - first date's day (e.g. "23").  Useful for search
                limits on creation/modification date.
 
           d2 - second datetime in full YYYY-mm-dd HH:MM:DD format
                (e.g. "1998-09-02 12:34:56"). Useful for search limits
                on creation/modification date (see 'dt' argument
                below).  Note that 'd2' takes precedence over d2y, d2m,
                d2d if these are defined.
 
          d2y - second date's year (e.g. "1998").  Useful for search
                limits on creation/modification date.
 
          d2m - second date's month (e.g. "09").  Useful for search
                limits on creation/modification date.
 
          d2d - second date's day (e.g. "02").  Useful for search
                limits on creation/modification date.
 
           dt - first and second date's type (e.g. "c").  Specifies
                whether to search in creation dates ("c") or in
                modification dates ("m").  When dt is not set and d1*
                and d2* are set, the default is "c".
 
      verbose - verbose level (0=min, 9=max).  Useful to print some
                internal information on the searching process in case
                something goes wrong.
 
           ap - alternative patterns (0=no, 1=yes).  In case no exact
                match is found, the search engine can try alternative
                patterns e.g. to replace non-alphanumeric characters by
                a boolean query.  ap defines if this is wanted.
 
           ln - language of the search interface (e.g. "en").  Useful
                for internationalization.
 
           ec - list of external search engines to search as well
                (e.g. "SPIRES HEP").
     """
     selected_external_collections_infos = None
 
     # wash all arguments requiring special care
     try:
         (cc, colls_to_display, colls_to_search) = wash_colls(cc, c, sc) # which colls to search and to display?
     except InvenioWebSearchUnknownCollectionError, exc:
         colname = exc.colname
         if of.startswith("h"):
             page_start(req, of, cc, as, ln, getUid(req),
                        websearch_templates.tmpl_collection_not_found_page_title(colname, ln))
             req.write(websearch_templates.tmpl_collection_not_found_page_body(colname, ln))
             return page_end(req, of, ln)
         elif of == "id":
             return []
         elif of.startswith("x"):
             # Print empty, but valid XML
             print_records_prologue(req, of)
             print_records_epilogue(req, of)
         else:
             return page_end(req, of, ln)
 
     p = wash_pattern(p)
     f = wash_field(f)
     p1 = wash_pattern(p1)
     f1 = wash_field(f1)
     p2 = wash_pattern(p2)
     f2 = wash_field(f2)
     p3 = wash_pattern(p3)
     f3 = wash_field(f3)
     datetext1, datetext2 = wash_dates(d1, d1y, d1m, d1d, d2, d2y, d2m, d2d)
 
     _ = gettext_set_language(ln)
 
     # backwards compatibility: id, idb, sysnb -> recid, recidb, sysno (if applicable)
     if sysnb != "" and sysno == "":
         sysno = sysnb
     if id > 0 and recid == -1:
         recid = id
     if idb > 0 and recidb == -1:
         recidb = idb
     # TODO deduce passed search limiting criterias (if applicable)
     pl, pl_in_url = "", "" # no limits by default
     if action != "browse" and req and req.args: # we do not want to add options while browsing or while calling via command-line
         fieldargs = cgi.parse_qs(req.args)
         for fieldcode in get_fieldcodes():
             if fieldargs.has_key(fieldcode):
                 for val in fieldargs[fieldcode]:
                     pl += "+%s:\"%s\" " % (fieldcode, val)
                     pl_in_url += "&amp;%s=%s" % (urllib.quote(fieldcode), urllib.quote(val))
     # deduce recid from sysno argument (if applicable):
     if sysno: # ALEPH SYS number was passed, so deduce DB recID for the record:
         recid = get_mysql_recid_from_aleph_sysno(sysno)
         if recid is None:
             recid = 0 # use recid 0 to indicate that this sysno does not exist
     # deduce collection we are in (if applicable):
     if recid > 0:
         cc = guess_primary_collection_of_a_record(recid)
     # deduce user id (if applicable):
     try:
         uid = getUid(req)
     except:
         uid = 0
     ## 0 - start output
     if recid >= 0: # recid can be 0 if deduced from sysno and if such sysno does not exist
         ## 1 - detailed record display
         title, description, keywords = \
                websearch_templates.tmpl_record_page_header_content(req, recid, ln)
 
         page_start(req, of, cc, as, ln, uid, title, description, keywords, recid, tab)
         # Default format is hb but we are in detailed -> change 'of'
         if of == "hb":
             of = "hd"
         if record_exists(recid):
             if recidb <= recid: # sanity check
                 recidb = recid + 1
             if of == "id":
                 return [recidx for recidx in range(recid, recidb) if record_exists(recidx)]
             else:
                 print_records(req, range(recid, recidb), -1, -9999, of, ot, ln, search_pattern=p, verbose=verbose, tab=tab)
             if req and of.startswith("h"): # register detailed record page view event
                 client_ip_address = str(req.get_remote_host(apache.REMOTE_NOLOOKUP))
                 register_page_view_event(recid, uid, client_ip_address)
         else: # record does not exist
             if of == "id":
                 return []
             elif of.startswith("x"):
                 # Print empty, but valid XML
                 print_records_prologue(req, of)
                 print_records_epilogue(req, of)
             elif of.startswith("h"):
                 print_warning(req, _("Requested record does not seem to exist."))
 
     elif action == "browse":
         ## 2 - browse needed
         page_start(req, of, cc, as, ln, uid, _("Browse"))
         if of.startswith("h"):
             req.write(create_search_box(cc, colls_to_display, p, f, rg, sf, so, sp, rm, of, ot, as, ln, p1, f1, m1, op1,
                                         p2, f2, m2, op2, p3, f3, m3, sc, pl, d1y, d1m, d1d, d2y, d2m, d2d, dt, jrec, ec, action))
         try:
             if as == 1 or (p1 or p2 or p3):
                 browse_pattern(req, colls_to_search, p1, f1, rg, ln)
                 browse_pattern(req, colls_to_search, p2, f2, rg, ln)
                 browse_pattern(req, colls_to_search, p3, f3, rg, ln)
             else:
                 browse_pattern(req, colls_to_search, p, f, rg, ln)
         except:
             if of.startswith("h"):
                 req.write(create_error_box(req, verbose=verbose, ln=ln))
             elif of.startswith("x"):
                 # Print empty, but valid XML
                 print_records_prologue(req, of)
                 print_records_epilogue(req, of)
             return page_end(req, of, ln)
 
     elif rm and p.startswith("recid:"):
         ## 3-ter - similarity search needed
         page_start(req, of, cc, as, ln, uid, _("Search Results"))
         if of.startswith("h"):
             req.write(create_search_box(cc, colls_to_display, p, f, rg, sf, so, sp, rm, of, ot, as, ln, p1, f1, m1, op1,
                                         p2, f2, m2, op2, p3, f3, m3, sc, pl, d1y, d1m, d1d, d2y, d2m, d2d, dt, jrec, ec, action))
         if record_exists(p[6:]) != 1:
             # record does not exist
             if of.startswith("h"):
                 print_warning(req, "Requested record does not seem to exist.")
             if of == "id":
                 return []
             elif of.startswith("x"):
                 # Print empty, but valid XML
                 print_records_prologue(req, of)
                 print_records_epilogue(req, of)
         else:
             # record well exists, so find similar ones to it
             t1 = os.times()[4]
             results_similar_recIDs, results_similar_relevances, results_similar_relevances_prologue, results_similar_relevances_epilogue, results_similar_comments = \
                                     rank_records(rm, 0, get_collection_reclist(cc), string.split(p), verbose)
             if results_similar_recIDs:
                 t2 = os.times()[4]
                 cpu_time = t2 - t1
                 if of.startswith("h"):
                     req.write(print_search_info(p, f, sf, so, sp, rm, of, ot, cc, len(results_similar_recIDs),
                                                 jrec, rg, as, ln, p1, p2, p3, f1, f2, f3, m1, m2, m3, op1, op2,
                                                 sc, pl_in_url,
                                                 d1y, d1m, d1d, d2y, d2m, d2d, dt, cpu_time))
                     print_warning(req, results_similar_comments)
                     print_records(req, results_similar_recIDs, jrec, rg, of, ot, ln,
                                   results_similar_relevances, results_similar_relevances_prologue, results_similar_relevances_epilogue, search_pattern=p, verbose=verbose)
                 elif of=="id":
                     return results_similar_recIDs
                 elif of.startswith("x"):
                     print_records(req, results_similar_recIDs, jrec, rg, of, ot, ln,
                                   results_similar_relevances, results_similar_relevances_prologue, results_similar_relevances_epilogue, search_pattern=p, verbose=verbose)
             else:
                 # rank_records failed and returned some error message to display:
                 if of.startswith("h"):
                     print_warning(req, results_similar_relevances_prologue)
                     print_warning(req, results_similar_relevances_epilogue)
                     print_warning(req, results_similar_comments)
                 if of == "id":
                     return []
                 elif of.startswith("x"):
                     # Print empty, but valid XML
                     print_records_prologue(req, of)
                     print_records_epilogue(req, of)
 
     elif p.startswith("cocitedwith:"):  #WAS EXPERIMENTAL
         ## 3-terter - cited by search needed
         page_start(req, of, cc, as, ln, uid, _("Search Results"))
         if of.startswith("h"):
             req.write(create_search_box(cc, colls_to_display, p, f, rg, sf, so, sp, rm, of, ot, as, ln, p1, f1, m1, op1,
                                         p2, f2, m2, op2, p3, f3, m3, sc, pl, d1y, d1m, d1d, d2y, d2m, d2d, dt, jrec, ec, action))
         recID = p[12:]
         if record_exists(recID) != 1:
             # record does not exist
             if of.startswith("h"):
                 print_warning(req, "Requested record does not seem to exist.")
             if of == "id":
                 return []
             elif of.startswith("x"):
                 # Print empty, but valid XML
                 print_records_prologue(req, of)
                 print_records_epilogue(req, of)
         else:
             # record well exists, so find co-cited ones:
             t1 = os.times()[4]
             results_cocited_recIDs = map(lambda x: x[0], calculate_co_cited_with_list(int(recID)))
             if results_cocited_recIDs:
                 t2 = os.times()[4]
                 cpu_time = t2 - t1
                 if of.startswith("h"):
                     req.write(print_search_info(p, f, sf, so, sp, rm, of, ot, CFG_SITE_NAME, len(results_cocited_recIDs),
                                                 jrec, rg, as, ln, p1, p2, p3, f1, f2, f3, m1, m2, m3, op1, op2,
                                                 sc, pl_in_url,
                                                 d1y, d1m, d1d, d2y, d2m, d2d, dt, cpu_time))
                     print_records(req, results_cocited_recIDs, jrec, rg, of, ot, ln, search_pattern=p, verbose=verbose)
                 elif of=="id":
                     return results_cocited_recIDs
                 elif of.startswith("x"):
                     print_records(req, results_cocited_recIDs, jrec, rg, of, ot, ln, search_pattern=p, verbose=verbose)
 
             else:
                 # cited rank_records failed and returned some error message to display:
                 if of.startswith("h"):
                     print_warning(req, "nothing found")
                 if of == "id":
                     return []
                 elif of.startswith("x"):
                     # Print empty, but valid XML
                     print_records_prologue(req, of)
                     print_records_epilogue(req, of)
     else:
         ## 3 - common search needed
         page_start(req, of, cc, as, ln, uid, _("Search Results"))
         if of.startswith("h"):
             req.write(create_search_box(cc, colls_to_display, p, f, rg, sf, so, sp, rm, of, ot, as, ln, p1, f1, m1, op1,
                                         p2, f2, m2, op2, p3, f3, m3, sc, pl, d1y, d1m, d1d, d2y, d2m, d2d, dt, jrec, ec, action))
         t1 = os.times()[4]
         results_in_any_collection = HitSet()
         if as == 1 or (p1 or p2 or p3):
             ## 3A - advanced search
             try:
                 results_in_any_collection = search_pattern(req, p1, f1, m1, ap=ap, of=of, verbose=verbose, ln=ln)
                 if len(results_in_any_collection) == 0:
                     if of.startswith("h"):
                         perform_external_collection_search(req, cc, [p, p1, p2, p3], f, ec, verbose, ln, selected_external_collections_infos)
                     elif of.startswith("x"):
                         # Print empty, but valid XML
                         print_records_prologue(req, of)
                         print_records_epilogue(req, of)
                     return page_end(req, of, ln)
                 if p2:
                     results_tmp = search_pattern(req, p2, f2, m2, ap=ap, of=of, verbose=verbose, ln=ln)
                     if op1 == "a": # add
                         results_in_any_collection.intersection_update(results_tmp)
                     elif op1 == "o": # or
                         results_in_any_collection.union_update(results_tmp)
                     elif op1 == "n": # not
                         results_in_any_collection.difference_update(results_tmp)
                     else:
                         if of.startswith("h"):
                             print_warning(req, "Invalid set operation %s." % op1, "Error")
                     if len(results_in_any_collection) == 0:
                         if of.startswith("h"):
                             perform_external_collection_search(req, cc, [p, p1, p2, p3], f, ec, verbose, ln, selected_external_collections_infos)
                         elif of.startswith("x"):
                             # Print empty, but valid XML
                             print_records_prologue(req, of)
                             print_records_epilogue(req, of)
                         return page_end(req, of, ln)
                 if p3:
                     results_tmp = search_pattern(req, p3, f3, m3, ap=ap, of=of, verbose=verbose, ln=ln)
                     if op2 == "a": # add
                         results_in_any_collection.intersection_update(results_tmp)
                     elif op2 == "o": # or
                         results_in_any_collection.union_update(results_tmp)
                     elif op2 == "n": # not
                         results_in_any_collection.difference_update(results_tmp)
                     else:
                         if of.startswith("h"):
                             print_warning(req, "Invalid set operation %s." % op2, "Error")
             except:
                 if of.startswith("h"):
                     req.write(create_error_box(req, verbose=verbose, ln=ln))
                     perform_external_collection_search(req, cc, [p, p1, p2, p3], f, ec, verbose, ln, selected_external_collections_infos)
                 elif of.startswith("x"):
                     # Print empty, but valid XML
                     print_records_prologue(req, of)
                     print_records_epilogue(req, of)
 
                 return page_end(req, of, ln)
         else:
             ## 3B - simple search
             try:
                 results_in_any_collection = search_pattern(req, p, f, ap=ap, of=of, verbose=verbose, ln=ln)
             except:
                 if of.startswith("h"):
                     req.write(create_error_box(req, verbose=verbose, ln=ln))
                     perform_external_collection_search(req, cc, [p, p1, p2, p3], f, ec, verbose, ln, selected_external_collections_infos)
                 return page_end(req, of, ln)
 
         if len(results_in_any_collection) == 0:
             if of.startswith("h"):
                 perform_external_collection_search(req, cc, [p, p1, p2, p3], f, ec, verbose, ln, selected_external_collections_infos)
             elif of.startswith("x"):
                 # Print empty, but valid XML
                 print_records_prologue(req, of)
                 print_records_epilogue(req, of)
             return page_end(req, of, ln)
 
 #             search_cache_key = p+"@"+f+"@"+string.join(colls_to_search,",")
 #             if search_cache.has_key(search_cache_key): # is the result in search cache?
 #                 results_final = search_cache[search_cache_key]
 #             else:
 #                 results_final = search_pattern(req, p, f, colls_to_search)
 #                 search_cache[search_cache_key] = results_final
 #             if len(search_cache) > CFG_WEBSEARCH_SEARCH_CACHE_SIZE: # is the cache full? (sanity cleaning)
 #                 search_cache.clear()
 
         # search stage 4: intersection with collection universe:
         try:
             results_final = intersect_results_with_collrecs(req, results_in_any_collection, colls_to_search, ap, of, verbose, ln)
         except:
             if of.startswith("h"):
                 req.write(create_error_box(req, verbose=verbose, ln=ln))
                 perform_external_collection_search(req, cc, [p, p1, p2, p3], f, ec, verbose, ln, selected_external_collections_infos)
             return page_end(req, of, ln)
 
         if results_final == {}:
             if of.startswith("h"):
                 perform_external_collection_search(req, cc, [p, p1, p2, p3], f, ec, verbose, ln, selected_external_collections_infos)
             if of.startswith("x"):
                 # Print empty, but valid XML
                 print_records_prologue(req, of)
                 print_records_epilogue(req, of)
             return page_end(req, of, ln)
 
         # search stage 5: apply search option limits and restrictions:
         if datetext1 != "":
             if verbose and of.startswith("h"):
                 print_warning(req, "Search stage 5: applying time etc limits, from %s until %s..." % (datetext1, datetext2))
             try:
                 results_final = intersect_results_with_hitset(req,
                                                               results_final,
                                                               search_unit_in_bibrec(datetext1, datetext2, dt),
                                                               ap,
                                                               aptext= _("No match within your time limits, "
                                                                         "discarding this condition..."),
                                                               of=of)
             except:
                 if of.startswith("h"):
                     req.write(create_error_box(req, verbose=verbose, ln=ln))
                     perform_external_collection_search(req, cc, [p, p1, p2, p3], f, ec, verbose, ln, selected_external_collections_infos)
                 return page_end(req, of, ln)
             if results_final == {}:
                 if of.startswith("h"):
                     perform_external_collection_search(req, cc, [p, p1, p2, p3], f, ec, verbose, ln, selected_external_collections_infos)
                 return page_end(req, of, ln)
 
 
 
         if pl:
             pl = wash_pattern(pl)
             if verbose and of.startswith("h"):
                 print_warning(req, "Search stage 5: applying search pattern limit %s..." % (pl,))
             try:
                 results_final = intersect_results_with_hitset(req,
                                                               results_final,
                                                               search_pattern(req, pl, ap=0, ln=ln),
                                                               ap,
                                                               aptext=_("No match within your search limits, "
                                                                        "discarding this condition..."),
                                                               of=of)
             except:
                 if of.startswith("h"):
                     req.write(create_error_box(req, verbose=verbose, ln=ln))
                     perform_external_collection_search(req, cc, [p, p1, p2, p3], f, ec, verbose, ln, selected_external_collections_infos)
                 return page_end(req, of, ln)
             if results_final == {}:
                 if of.startswith("h"):
                     perform_external_collection_search(req, cc, [p, p1, p2, p3], f, ec, verbose, ln, selected_external_collections_infos)
                 if of.startswith("x"):
                     # Print empty, but valid XML
                     print_records_prologue(req, of)
                     print_records_epilogue(req, of)
                 return page_end(req, of, ln)
 
         t2 = os.times()[4]
         cpu_time = t2 - t1
         ## search stage 6: display results:
         results_final_nb_total = 0
         results_final_nb = {} # will hold number of records found in each collection
                               # (in simple dict to display overview more easily)
         for coll in results_final.keys():
             results_final_nb[coll] = len(results_final[coll])
             #results_final_nb_total += results_final_nb[coll]
 
         # Now let us calculate results_final_nb_total more precisely,
         # in order to get the total number of "distinct" hits across
         # searched collections; this is useful because a record might
         # have been attributed to more than one primary collection; so
         # we have to avoid counting it multiple times.  The price to
         # pay for this accuracy of results_final_nb_total is somewhat
         # increased CPU time.
         if results_final.keys() == 1:
             # only one collection; no need to union them
             results_final_for_all_selected_colls = results_final.values()[0]
             results_final_nb_total = results_final_nb.values()[0]
         else:
             # okay, some work ahead to union hits across collections:
             results_final_for_all_selected_colls = HitSet()
             for coll in results_final.keys():
                 results_final_for_all_selected_colls.union_update(results_final[coll])
             results_final_nb_total = len(results_final_for_all_selected_colls)
 
         if results_final_nb_total == 0:
             if of.startswith('h'):
                 print_warning(req, "No match found, please enter different search terms.")
             elif of.startswith("x"):
                 # Print empty, but valid XML
                 print_records_prologue(req, of)
                 print_records_epilogue(req, of)
         else:
             # yes, some hits found: good!
             # collection list may have changed due to not-exact-match-found policy so check it out:
             for coll in results_final.keys():
                 if coll not in colls_to_search:
                     colls_to_search.append(coll)
             # print results overview:
             if of == "id":
                 # we have been asked to return list of recIDs
                 recIDs = list(results_final_for_all_selected_colls)
                 if sf: # do we have to sort?
                     recIDs = sort_records(req, recIDs, sf, so, sp, verbose, of)
                 elif rm: # do we have to rank?
                     results_final_for_all_colls_rank_records_output = rank_records(rm, 0, results_final_for_all_selected_colls,
                                                                                    string.split(p) + string.split(p1) +
                                                                                    string.split(p2) + string.split(p3), verbose)
                     if results_final_for_all_colls_rank_records_output[0]:
                         recIDs = results_final_for_all_colls_rank_records_output[0]
                 return recIDs
             elif of.startswith("h"):
                 req.write(print_results_overview(req, colls_to_search, results_final_nb_total, results_final_nb, cpu_time, ln, ec))
                 selected_external_collections_infos = print_external_results_overview(req, cc, [p, p1, p2, p3], f, ec, verbose, ln)
             # print number of hits found for XML outputs:
             if of.startswith("x"):
                 req.write("<!-- Search-Engine-Total-Number-Of-Results: %s -->\n" % results_final_nb_total)
             # print records:
             if len(colls_to_search)>1:
                 cpu_time = -1 # we do not want to have search time printed on each collection
             print_records_prologue(req, of)
             for coll in colls_to_search:
                 if results_final.has_key(coll) and len(results_final[coll]):
                     if of.startswith("h"):
                         req.write(print_search_info(p, f, sf, so, sp, rm, of, ot, coll, results_final_nb[coll],
                                                     jrec, rg, as, ln, p1, p2, p3, f1, f2, f3, m1, m2, m3, op1, op2,
                                                     sc, pl_in_url,
                                                     d1y, d1m, d1d, d2y, d2m, d2d, dt, cpu_time))
                     results_final_recIDs = list(results_final[coll])
                     results_final_relevances = []
                     results_final_relevances_prologue = ""
                     results_final_relevances_epilogue = ""
                     if sf: # do we have to sort?
                         results_final_recIDs = sort_records(req, results_final_recIDs, sf, so, sp, verbose, of)
                     elif rm: # do we have to rank?
                         results_final_recIDs_ranked, results_final_relevances, results_final_relevances_prologue, results_final_relevances_epilogue, results_final_comments = \
                                                      rank_records(rm, 0, results_final[coll],
                                                                   string.split(p) + string.split(p1) +
                                                                   string.split(p2) + string.split(p3), verbose)
                         if of.startswith("h"):
                             print_warning(req, results_final_comments)
                         if results_final_recIDs_ranked:
                             results_final_recIDs = results_final_recIDs_ranked
                         else:
                             # rank_records failed and returned some error message to display:
                             print_warning(req, results_final_relevances_prologue)
                             print_warning(req, results_final_relevances_epilogue)
                     print_records(req, results_final_recIDs, jrec, rg, of, ot, ln,
                                   results_final_relevances,
                                   results_final_relevances_prologue,
                                   results_final_relevances_epilogue,
                                   search_pattern=p,
                                   print_records_prologue_p=False,
                                   print_records_epilogue_p=False,
                                   verbose=verbose)
                     if of.startswith("h"):
                         req.write(print_search_info(p, f, sf, so, sp, rm, of, ot, coll, results_final_nb[coll],
                                                     jrec, rg, as, ln, p1, p2, p3, f1, f2, f3, m1, m2, m3, op1, op2,
                                                     sc, pl_in_url,
                                                     d1y, d1m, d1d, d2y, d2m, d2d, dt, cpu_time, 1))
             print_records_epilogue(req, of)
             if f == "author" and of.startswith("h"):
                 req.write(create_similarly_named_authors_link_box(p, ln))
             # log query:
             try:
                 id_query = log_query(req.get_remote_host(), req.args, uid)
                 if of.startswith("h") and id_query:
                     # Alert/RSS teaser:
                     req.write(websearch_templates.tmpl_alert_rss_teaser_box_for_query(id_query, ln=ln))
             except:
                 # do not log query if req is None (used by CLI interface)
                 pass
             log_query_info("ss", p, f, colls_to_search, results_final_nb_total)
 
     # External searches
     if of.startswith("h"):
         perform_external_collection_search(req, cc, [p, p1, p2, p3], f, ec, verbose, ln, selected_external_collections_infos)
 
     return page_end(req, of, ln)
 
 def perform_request_cache(req, action="show"):
     """Manipulates the search engine cache."""
     global search_cache
     global collection_reclist_cache
     global collection_reclist_cache_timestamp
     global field_i18nname_cache
     global field_i18nname_cache_timestamp
     global collection_i18nname_cache
     global collection_i18nname_cache_timestamp
     req.content_type = "text/html"
     req.send_http_header()
     out = ""
     out += "<h1>Search Cache</h1>"
     # clear cache if requested:
     if action == "clear":
         search_cache = {}
         collection_reclist_cache = create_collection_reclist_cache()
     # show collection reclist cache:
     out += "<h3>Collection reclist cache</h3>"
     out += "- collection table last updated: %s" % get_table_update_time('collection')
     out += "<br />- reclist cache timestamp: %s" % collection_reclist_cache_timestamp
     out += "<br />- reclist cache contents:"
     out += "<blockquote>"
     for coll in collection_reclist_cache.keys():
         if collection_reclist_cache[coll]:
             out += "%s (%d)<br />" % (coll, len(get_collection_reclist(coll)))
     out += "</blockquote>"
     # show search cache:
     out += "<h3>Search Cache</h3>"
     out += "<blockquote>"
     if len(search_cache):
         out += """<table border="=">"""
         out += "<tr><td><strong>%s</strong></td><td><strong>%s</strong></td><td><strong>%s</strong></td><td><strong>%s</strong></td></tr>" % \
                ("Pattern", "Field", "Collection", "Number of Hits")
         for search_cache_key in search_cache.keys():
             p, f, c = string.split(search_cache_key, "@", 2)
             # find out about length of cached data:
             l = 0
             for coll in search_cache[search_cache_key]:
                 l += len(search_cache[search_cache_key][coll])
             out += "<tr><td>%s</td><td>%s</td><td>%s</td><td>%d</td></tr>" % (p, f, c, l)
         out += "</table>"
     else:
         out += "<p>Search cache is empty."
     out += "</blockquote>"
     out += """<p><a href="%s/search/cache?action=clear">clear cache</a>""" % CFG_SITE_URL
     # show field i18nname cache:
     out += "<h3>Field I18N names cache</h3>"
     out += "- fieldname table last updated: %s" % get_table_update_time('fieldname')
     out += "<br />- i18nname cache timestamp: %s" % field_i18nname_cache_timestamp
     out += "<br />- i18nname cache contents:"
     out += "<blockquote>"
     for field in field_i18nname_cache.keys():
         for ln in field_i18nname_cache[field].keys():
             out += "%s, %s = %s<br />" % (field, ln, field_i18nname_cache[field][ln])
     out += "</blockquote>"
     # show collection i18nname cache:
     out += "<h3>Collection I18N names cache</h3>"
     out += "- collectionname table last updated: %s" % get_table_update_time('collectionname')
     out += "<br />- i18nname cache timestamp: %s" % collection_i18nname_cache_timestamp
     out += "<br />- i18nname cache contents:"
     out += "<blockquote>"
     for coll in collection_i18nname_cache.keys():
         for ln in collection_i18nname_cache[coll].keys():
             out += "%s, %s = %s<br />" % (coll, ln, collection_i18nname_cache[coll][ln])
     out += "</blockquote>"
     req.write("<html>")
     req.write(out)
     req.write("</html>")
     return "\n"
 
 def perform_request_log(req, date=""):
     """Display search log information for given date."""
     req.content_type = "text/html"
     req.send_http_header()
     req.write("<html>")
     req.write("<h1>Search Log</h1>")
     if date: # case A: display stats for a day
         yyyymmdd = string.atoi(date)
         req.write("<p><big><strong>Date: %d</strong></big><p>" % yyyymmdd)
         req.write("""<table border="1">""")
         req.write("<tr><td><strong>%s</strong></td><td><strong>%s</strong></td><td><strong>%s</strong></td><td><strong>%s</strong></td><td><strong>%s</strong></td><td><strong>%s</strong></td></tr>" % ("No.", "Time", "Pattern", "Field", "Collection", "Number of Hits"))
         # read file:
         p = os.popen("grep ^%d %s/search.log" % (yyyymmdd, CFG_LOGDIR), 'r')
         lines = p.readlines()
         p.close()
         # process lines:
         i = 0
         for line in lines:
             try:
                 datetime, as, p, f, c, nbhits = string.split(line,"#")
                 i += 1
                 req.write("<tr><td align=\"right\">#%d</td><td>%s:%s:%s</td><td>%s</td><td>%s</td><td>%s</td><td>%s</td></tr>" \
                           % (i, datetime[8:10], datetime[10:12], datetime[12:], p, f, c, nbhits))
             except:
                 pass # ignore eventual wrong log lines
         req.write("</table>")
     else: # case B: display summary stats per day
         yyyymm01 = int(time.strftime("%Y%m01", time.localtime()))
         yyyymmdd = int(time.strftime("%Y%m%d", time.localtime()))
         req.write("""<table border="1">""")
         req.write("<tr><td><strong>%s</strong></td><td><strong>%s</strong></tr>" % ("Day", "Number of Queries"))
         for day in range(yyyymm01, yyyymmdd + 1):
             p = os.popen("grep -c ^%d %s/search.log" % (day, CFG_LOGDIR), 'r')
             for line in p.readlines():
                 req.write("""<tr><td>%s</td><td align="right"><a href="%s/search/log?date=%d">%s</a></td></tr>""" % \
                           (day, CFG_SITE_URL, day, line))
             p.close()
         req.write("</table>")
     req.write("</html>")
     return "\n"
 
 
 def get_values_for_code_dict(recids, tag):
     """ gets values of tag for records, puts them in dictionary that contains their frequency"""
     valuefreqdict = {}
     for recid in recids:
         vals = get_fieldvalues(recid, tag)
         for v in vals:
             if valuefreqdict.has_key(v):
                 valuefreqdict[v] = valuefreqdict[v]+1
             else:
                 valuefreqdict[v] = 1
     return valuefreqdict
-              
+
 def get_most_popular_values_for_code(recids, tag):
     """returns a sorted tuple list of the popular values for a given tag"""
     valuefreqdict = get_values_for_code_dict(recids, tag)
     tmppairs = []
     for k,v in valuefreqdict.items():
         tmppairs.append((v,k))
     tmppairs.sort()
     tmppairs.reverse()
     #take only the keys and return them
     sortedvalues = []
     for (v,k) in tmppairs:
         sortedvalues.append(k)
     return sortedvalues
 
 def profile(p="", f="", c=CFG_SITE_NAME):
     """Profile search time."""
     import profile
     import pstats
     profile.run("perform_request_search(p='%s',f='%s', c='%s')" % (p, f, c), "perform_request_search_profile")
     p = pstats.Stats("perform_request_search_profile")
     p.strip_dirs().sort_stats("cumulative").print_stats()
     return 0
 
 ## test cases:
 #print wash_colls(CFG_SITE_NAME,"Library Catalogue", 0)
 #print wash_colls("Periodicals & Progress Reports",["Periodicals","Progress Reports"], 0)
 #print wash_field("wau")
 #print print_record(20,"tm","001,245")
 #print create_opft_search_units(None, "PHE-87-13","reportnumber")
 #print ":"+wash_pattern("* and % doo * %")+":\n"
 #print ":"+wash_pattern("*")+":\n"
 #print ":"+wash_pattern("ellis* ell* e*%")+":\n"
 #print run_sql("SELECT name,dbquery from collection")
 #print get_index_id("author")
 #print get_coll_ancestors("Theses")
 #print get_coll_sons("Articles & Preprints")
 #print get_coll_real_descendants("Articles & Preprints")
 #print get_collection_reclist("Theses")
 #print log(sys.stdin)
 #print search_unit_in_bibrec('2002-12-01','2002-12-12')
 #print type(wash_url_argument("-1",'int'))
 #print get_nearest_terms_in_bibxxx("ellis", "author", 5, 5)
 #print call_bibformat(68, "HB_FLY")
 #print create_collection_i18nname_cache()
 #print get_fieldvalues(10, "980__a")
 #print get_fieldvalues_alephseq_like(10,"001___")
 #print get_fieldvalues_alephseq_like(10,"980__a")
 #print get_fieldvalues_alephseq_like(10,"foo")
 #print get_fieldvalues_alephseq_like(10,"-1")
 #print get_fieldvalues_alephseq_like(10,"99")
 #print get_fieldvalues_alephseq_like(10,["001", "980"])
 
 ## profiling:
 #profile("of the this")
 #print perform_request_search(p="ellis")
diff --git a/modules/websearch/lib/search_engine_summarizer.py b/modules/websearch/lib/search_engine_summarizer.py
index 62ba11010..bc95b6274 100644
--- a/modules/websearch/lib/search_engine_summarizer.py
+++ b/modules/websearch/lib/search_engine_summarizer.py
@@ -1,123 +1,123 @@
 # -*- coding: utf-8 -*-
 
 ## This file is part of CDS Invenio.
 ## Copyright (C) 2002, 2003, 2004, 2005, 2006, 2007, 2008 CERN.
 ##
 ## CDS Invenio is free software; you can redistribute it and/or
 ## modify it under the terms of the GNU General Public License as
 ## published by the Free Software Foundation; either version 2 of the
 ## License, or (at your option) any later version.
 ##
 ## CDS Invenio is distributed in the hope that it will be useful, but
 ## WITHOUT ANY WARRANTY; without even the implied warranty of
 ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 ## General Public License for more details.
 ##
 ## You should have received a copy of the GNU General Public License
 ## along with CDS Invenio; if not, write to the Free Software Foundation, Inc.,
 ## 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA.
 
 
-"""CDS Invenio Search Engine Summarizer: Produce summary formats, 
+"""CDS Invenio Search Engine Summarizer: Produce summary formats,
    like CiteSummary"""
 
 __lastupdated__ = """ """
 
 __revision__ = " "
 
 from invenio.bibrank_citation_searcher import get_cited_by_list
 import invenio.template
 websearch_templates = invenio.template.load('websearch')
 
 def summarize(recids, of, ln):
     """Produces a report in the format defined by of in language ln"""
     if of == 'hbcs':
         #this is a html cite summary
         citedbylist = get_cited_by_list(recids)
         return print_citation_summary_html(citedbylist, ln)
     if of == 'xcs':
         #this is an xml cite summary
         citedbylist = get_cited_by_list(recids)
         return print_citation_summary_xml(citedbylist)
-   
+
 
 tresholdsNames = [    (500, 1000000,'Renowned papers (500+)'),
 		      (250, 499,'Famous papers (250-499)'),
 		      (100, 249,'Very well-known papers (100-249)'),
 		      (50, 99,'Well-known papers (50-99)'),
 		      (10, 49,'Known papers (10-49)'),
                       (1, 9,'Less known papers (0-9)'),
 		      (0, 0,'Unknown papers (0)')
 ]
-    
-    
-    
+
+
+
 #for citation summary, code xcs/hbcs (unless changed)
 def print_citation_summary_xml(citedbylist):
     """Prints citation summary in xml."""
     alldict = calculate_citations(tresholdsNames, citedbylist)
     avgstr = str(alldict['avgcites'])
     totalcites = str(alldict['totalcites'])
-    #format avg so that it does not span 10 digits 
+    #format avg so that it does not span 10 digits
     avgstr = avgstr[0:4]
     reciddict = alldict['reciddict']
     #output formatting
     outp = "<citationsummary records=\""+str(len(citedbylist))
     outp += "\" citations=\""+str(totalcites)+"\">"
     for low, high, name in tresholdsNames:
         #get the name, print the value
         if reciddict.has_key(name):
             recs = reciddict[name]
             outp += "<citationclass>"+name
             outp += "<records>"+str(recs)+"</records>"
             outp += "</citationclass>\n"
     outp = outp + "</citationsummary>"
     #req.write(outp)
     return outp #just to return something
-    
-    
+
+
 def print_citation_summary_html(citedbylist, ln, criteria=""):
     """Prints citation summary in html.
        The criteria, if any, is added to the link"""
     alldict = calculate_citations(tresholdsNames, citedbylist)
     avgstr = str(alldict['avgcites'])
     totalcites = str(alldict['totalcites'])
-    #format avg so that it does not span 10 digits 
+    #format avg so that it does not span 10 digits
     avgstr = avgstr[0:4]
     reciddict = alldict['reciddict']
     return websearch_templates.tmpl_citesummary_html(ln, totalcites, avgstr, reciddict)
-                    
-    
-    
+
+
+
 def calculate_citations(tresholdsNames, citedbylist):
     """calculates records in classes of citations
        defined by tresholds. returns a dictionary that
        contains total, avg, records and a dictionary
-       of treshold names and number corresponding to it"""  
+       of treshold names and number corresponding to it"""
     totalcites = 0
     avgcites = 0
     reciddict = {}
     for recid, cites in citedbylist:
         numcites = len(cites)
         totalcites = totalcites + numcites
         #take the numbers in tresholdsNames
         for low, high, name in tresholdsNames:
             if (numcites >= low) and (numcites <= high):
                 if reciddict.has_key(name):
                     tmp = reciddict[name]
-                    tmp.append(recid)                               
+                    tmp.append(recid)
                     reciddict[name] = tmp
                 else:
                     reciddict[name] = [recid]
     if (len(citedbylist) == 0):
         avgcites = 0
     else:
         avgcites = totalcites*1.0/len(citedbylist)
-        
+
     #create a dictionary that contains all the values
     alldict = {}
     alldict['records'] = len(citedbylist)
     alldict['totalcites'] = totalcites
     alldict['avgcites'] = avgcites
     alldict['reciddict'] = reciddict
     return alldict
diff --git a/modules/websearch/lib/websearch_templates.py b/modules/websearch/lib/websearch_templates.py
index c1897cf5c..a48d886a3 100644
--- a/modules/websearch/lib/websearch_templates.py
+++ b/modules/websearch/lib/websearch_templates.py
@@ -1,3027 +1,3027 @@
 # -*- coding: utf-8 -*-
 ## $Id$
 
 ## This file is part of CDS Invenio.
 ## Copyright (C) 2002, 2003, 2004, 2005, 2006, 2007, 2008 CERN.
 ##
 ## CDS Invenio is free software; you can redistribute it and/or
 ## modify it under the terms of the GNU General Public License as
 ## published by the Free Software Foundation; either version 2 of the
 ## License, or (at your option) any later version.
 ##
 ## CDS Invenio is distributed in the hope that it will be useful, but
 ## WITHOUT ANY WARRANTY; without even the implied warranty of
 ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 ## General Public License for more details.
 ##
 ## You should have received a copy of the GNU General Public License
 ## along with CDS Invenio; if not, write to the Free Software Foundation, Inc.,
 ## 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA.
 
 # pylint: disable-msg=C0301
 
 __revision__ = "$Id$"
 
 import time
 import cgi
 import gettext
 import string
 import locale
 from urllib import quote, urlencode
 
 from invenio.config import \
      CFG_WEBSEARCH_ADVANCEDSEARCH_PATTERN_BOX_WIDTH, \
      CFG_WEBSEARCH_AUTHOR_ET_AL_THRESHOLD, \
      CFG_WEBSEARCH_USE_ALEPH_SYSNOS, \
      CFG_BIBRANK_SHOW_READING_STATS, \
      CFG_BIBRANK_SHOW_DOWNLOAD_STATS, \
      CFG_BIBRANK_SHOW_DOWNLOAD_GRAPHS, \
      CFG_BIBRANK_SHOW_CITATION_LINKS, \
      CFG_BIBRANK_SHOW_CITATION_STATS, \
      CFG_BIBRANK_SHOW_CITATION_GRAPHS, \
      CFG_WEBSEARCH_RSS_TTL, \
      CFG_SITE_LANG, \
      CFG_SITE_NAME, \
      CFG_SITE_NAME_INTL, \
      CFG_VERSION, \
      CFG_SITE_URL, \
      CFG_SITE_SUPPORT_EMAIL
 from invenio.dbquery import run_sql
 from invenio.messages import gettext_set_language
 #from invenio.search_engine_config import CFG_EXPERIMENTAL_FEATURES
 from invenio.urlutils import make_canonical_urlargd, drop_default_urlargd, create_html_link, create_url
 from invenio.htmlutils import nmtoken_from_string
 from invenio.webinterface_handler import wash_urlargd
 
 from invenio.websearch_external_collections import external_collection_get_state
 
 def get_fieldvalues(recID, tag):
     """Return list of field values for field TAG inside record RECID.
        FIXME: should be imported commonly for search_engine too."""
     out = []
     if tag == "001___":
         # we have asked for recID that is not stored in bibXXx tables
         out.append(str(recID))
     else:
         # we are going to look inside bibXXx tables
         digit = tag[0:2]
         bx = "bib%sx" % digit
         bibx = "bibrec_bib%sx" % digit
         query = "SELECT bx.value FROM %s AS bx, %s AS bibx WHERE bibx.id_bibrec='%s' AND bx.id=bibx.id_bibxxx AND bx.tag LIKE '%s'" \
                 "ORDER BY bibx.field_number, bx.tag ASC" % (bx, bibx, recID, tag)
         res = run_sql(query)
         for row in res:
             out.append(row[0])
     return out
 
 class Template:
 
     # This dictionary maps CDS Invenio language code to locale codes (ISO 639)
     tmpl_localemap = {
         'bg': 'bg_BG',
         'ca': 'ca_ES',
         'de': 'de_DE',
         'el': 'el_GR',
         'en': 'en_US',
         'es': 'es_ES',
         'pt': 'pt_BR',
         'fr': 'fr_FR',
         'it': 'it_IT',
         'ru': 'ru_RU',
         'sk': 'sk_SK',
         'cs': 'cs_CZ',
         'no': 'no_NO',
         'sv': 'sv_SE',
         'uk': 'uk_UA',
         'ja': 'ja_JA',
         'pl': 'pl_PL',
         'hr': 'hr_HR',
         'zh_CN': 'zh_CN',
         'zh_TW': 'zh_TW',
         }
     tmpl_default_locale = "en_US" # which locale to use by default, useful in case of failure
 
     # Type of the allowed parameters for the web interface for search results
     search_results_default_urlargd = {
         'cc': (str, CFG_SITE_NAME),
         'c': (list, []),
         'p': (str, ""), 'f': (str, ""),
         'rg': (int, 10),
         'sf': (str, ""),
         'so': (str, "d"),
         'sp': (str, ""),
         'rm': (str, ""),
         'of': (str, "hb"),
         'ot': (list, []),
         'as': (int, 0),
         'p1': (str, ""), 'f1': (str, ""), 'm1': (str, ""), 'op1':(str, ""),
         'p2': (str, ""), 'f2': (str, ""), 'm2': (str, ""), 'op2':(str, ""),
         'p3': (str, ""), 'f3': (str, ""), 'm3': (str, ""),
         'sc': (int, 0),
         'jrec': (int, 0),
         'recid': (int, -1), 'recidb': (int, -1), 'sysno': (str, ""),
         'id': (int, -1), 'idb': (int, -1), 'sysnb': (str, ""),
         'action': (str, "search"),
         'action_search': (str, ""),
         'action_browse': (str, ""),
         'd1': (str, ""),
         'd1y': (int, 0), 'd1m': (int, 0), 'd1d': (int, 0),
         'd2': (str, ""),
         'd2y': (int, 0), 'd2m': (int, 0), 'd2d': (int, 0),
         'dt': (str, ""),
         'ap': (int, 1),
         'verbose': (int, 0),
         'ec': (list, []),
         }
 
     # ...and for search interfaces
     search_interface_default_urlargd = {
         'as': (int, 0),
         'verbose': (int, 0)}
 
     # ...and for RSS feeds
     rss_default_urlargd = {'c'  : (list, []),
                            'cc' : (str, ""),
                            'p'  : (str, ""),
                            'f'  : (str, ""),
                            'p1' : (str, ""),
                            'f1' : (str, ""),
                            'm1' : (str, ""),
                            'op1': (str, ""),
                            'p2' : (str, ""),
                            'f2' : (str, ""),
                            'm2' : (str, ""),
                            'op2': (str, ""),
                            'p3' : (str, ""),
                            'f3' : (str, ""),
                            'm3' : (str, "")}
 
     tmpl_openurl_accepted_args = {
             'id' : (list, []),
             'genre' : (str, ''),
             'aulast' : (str, ''),
             'aufirst' : (str, ''),
             'auinit' : (str, ''),
             'auinit1' : (str, ''),
             'auinitm' : (str, ''),
             'issn' : (str, ''),
             'eissn' : (str, ''),
             'coden' : (str, ''),
             'isbn' : (str, ''),
             'sici' : (str, ''),
             'bici' : (str, ''),
             'title' : (str, ''),
             'stitle' : (str, ''),
             'atitle' : (str, ''),
             'volume' : (str, ''),
             'part' : (str, ''),
             'issue' : (str, ''),
             'spage' : (str, ''),
             'epage' : (str, ''),
             'pages' : (str, ''),
             'artnum' : (str, ''),
             'date' : (str, ''),
             'ssn' : (str, ''),
             'quarter' : (str, ''),
             'url_ver' : (str, ''),
             'ctx_ver' : (str, ''),
             'rft_val_fmt' : (str, ''),
             'rft_id' : (list, []),
             'rft.atitle' : (str, ''),
             'rft.title' : (str, ''),
             'rft.jtitle' : (str, ''),
             'rft.stitle' : (str, ''),
             'rft.date' : (str, ''),
             'rft.volume' : (str, ''),
             'rft.issue' : (str, ''),
             'rft.spage' : (str, ''),
             'rft.epage' : (str, ''),
             'rft.pages' : (str, ''),
             'rft.artnumber' : (str, ''),
             'rft.issn' : (str, ''),
             'rft.eissn' : (str, ''),
             'rft.aulast' : (str, ''),
             'rft.aufirst' : (str, ''),
             'rft.auinit' : (str, ''),
             'rft.auinit1' : (str, ''),
             'rft.auinitm' : (str, ''),
             'rft.ausuffix' : (str, ''),
             'rft.au' : (list, []),
             'rft.aucorp' : (str, ''),
             'rft.isbn' : (str, ''),
             'rft.coden' : (str, ''),
             'rft.sici' : (str, ''),
             'rft.genre' : (str, 'unknown'),
             'rft.chron' : (str, ''),
             'rft.ssn' : (str, ''),
             'rft.quarter' : (int, ''),
             'rft.part' : (str, ''),
             'rft.btitle' : (str, ''),
             'rft.isbn' : (str, ''),
             'rft.atitle' : (str, ''),
             'rft.place' : (str, ''),
             'rft.pub' : (str, ''),
             'rft.edition' : (str, ''),
             'rft.tpages' : (str, ''),
             'rft.series' : (str, ''),
     }
 
     def tmpl_openurl2invenio(self, openurl_data):
         """ Return an Invenio url corresponding to a search with the data
         included in the openurl form map.
         """
 
         from invenio.search_engine import perform_request_search
         doi = ''
         pmid = ''
         bibcode = ''
         oai = ''
         issn = ''
         isbn = ''
         for elem in openurl_data['id']:
             if elem.startswith('doi:'):
                 doi = elem[len('doi:'):]
             elif elem.startswith('pmid:'):
                 pmid = elem[len('pmid:'):]
             elif elem.startswith('bibcode:'):
                 bibcode = elem[len('bibcode:'):]
             elif elem.startswith('oai:'):
                 oai = elem[len('oai:'):]
         for elem in openurl_data['rft_id']:
             if elem.startswith('info:doi/'):
                 doi = elem[len('info:doi/'):]
             elif elem.startswith('info:pmid/'):
                 pmid = elem[len('info:pmid/'):]
             elif elem.startswith('info:bibcode/'):
                 bibcode = elem[len('info:bibcode/'):]
             elif elem.startswith('info:oai/'):
                 oai = elem[len('info:oai/')]
             elif elem.startswith('urn:ISBN:'):
                 isbn = elem[len('urn:ISBN:'):]
             elif elem.startswith('urn:ISSN:'):
                 issn = elem[len('urn:ISSN:'):]
 
         ## Building author query
         aulast = openurl_data['rft.aulast'] or openurl_data['aulast']
         aufirst = openurl_data['rft.aufirst'] or openurl_data['aufirst']
         auinit = openurl_data['rft.auinit'] or \
                  openurl_data['auinit'] or \
                  openurl_data['rft.auinit1'] + ' ' + openurl_data['rft.auinitm'] or \
                  openurl_data['auinit1'] + ' ' + openurl_data['auinitm'] or  aufirst[:1]
         auinit = auinit.upper()
         if aulast and aufirst:
             author_query = 'author:"%s, %s" or author:"%s, %s"' % (aulast, aufirst, aulast, auinit)
         elif aulast and auinit:
             author_query = 'author:"%s, %s"' % (aulast, auinit)
         else:
             author_query = ''
 
         ## Building title query
         title = openurl_data['rft.atitle'] or \
                 openurl_data['atitle'] or \
                 openurl_data['rft.btitle'] or \
                 openurl_data['rft.title'] or \
                 openurl_data['title']
         if title:
             title_query = 'title:"%s"' % title
         else:
             title_query = ''
 
         ## Building journal query
         jtitle = openurl_data['rft.stitle'] or \
                  openurl_data['stitle'] or \
                  openurl_data['rft.jtitle'] or \
                  openurl_data['title']
         if jtitle:
             journal_query = 'journal:"%s"' % jtitle
         else:
             journal_query = ''
 
         ## Building isbn query
         isbn = isbn or openurl_data['rft.isbn'] or \
                openurl_data['isbn']
         isbn = isbn.replace(' ', '').replace('-', '')
         if isbn:
             isbn_query = 'isbn:"%s"' % isbn
         else:
             isbn_query = ''
 
         ## Building issn query
         issn = issn or openurl_data['rft.eissn'] or \
                openurl_data['eissn'] or \
                openurl_data['rft.issn'] or \
                openurl_data['issn']
         if issn:
             issn_query = 'issn:"%s"' % issn
         else:
             issn_query = ''
 
         ## Building coden query
         coden = openurl_data['rft.coden'] or openurl_data['coden']
         if coden:
             coden_query = 'coden:"%s"' % coden
         else:
             coden_query = ''
 
         ## Building doi query
         if False: #doi: #FIXME Temporaly disabled until doi field is properly setup
             doi_query = 'doi:"%s"' % doi
         else:
             doi_query = ''
 
         ## Trying possible searches
         if doi_query:
             if perform_request_search(p=doi_query):
                 return '%s/search?%s' % (CFG_SITE_URL, urlencode({
                     'p' : doi_query,
                     'sc' : 1,
                     'of' : 'hd'}))
         if isbn_query:
             if perform_request_search(p=isbn_query):
                 return '%s/search?%s' % (CFG_SITE_URL, urlencode({
                     'p' : isbn_query,
                     'sc' : 1,
                     'of' : 'hd'}))
         if coden_query:
             if perform_request_search(p=coden_query):
                 return '%s/search?' % (CFG_SITE_URL, urlencode({
                     'p' : coden_query,
                     'sc' : 1,
                     'of' : 'hd'}))
         if author_query and title_query:
             if perform_request_search(p='%s and %s' % (title_query, author_query)):
                 return '%s/search?%s' % (CFG_SITE_URL, urlencode({
                     'p' : '%s and %s' % (title_query, author_query),
                     'sc' : 1,
                     'of' : 'hd'}))
         if title_query:
             if perform_request_search(p=title_query):
                 return '%s/search?%s' % (CFG_SITE_URL, urlencode({
                     'p' : title_query,
                     'sc' : 1,
                     'of' : 'hb'}))
         if title:
             return '%s/search?%s' % (CFG_SITE_URL, urlencode({
                     'p' : title,
                     'sc' : 1,
                     'of' : 'hb'}))
 
         ## Nothing worked, let's return a search that the user can improve
         if author_query and title_query:
             return '%s/search%s' % (CFG_SITE_URL, make_canonical_urlargd({
                 'p' : '%s and %s' % (title_query, author_query),
                 'sc' : 1,
                 'of' : 'hd'}, {}))
         elif title_query:
             return '%s/search%s' % (CFG_SITE_URL, make_canonical_urlargd({
                 'p' : title_query,
                 'sc' : 1,
                 'of' : 'hb'}, {}))
         else:
             ## Mmh. Too few information provided.
             return '%s/search%s' % (CFG_SITE_URL, make_canonical_urlargd({
                         'p' : 'recid:-1',
                         'sc' : 1,
                         'of' : 'hb'}, {}))
 
     def build_search_url(self, known_parameters={}, **kargs):
         """ Helper for generating a canonical search
         url. 'known_parameters' is the list of query parameters you
         inherit from your current query. You can then pass keyword
         arguments to modify this query.
 
            build_search_url(known_parameters, of="xm")
 
         The generated URL is absolute.
         """
 
         parameters = {}
         parameters.update(known_parameters)
         parameters.update(kargs)
 
         # Now, we only have the arguments which have _not_ their default value
         parameters = drop_default_urlargd(parameters, self.search_results_default_urlargd)
 
         # Asking for a recid? Return a /record/<recid> URL
         if 'recid' in parameters:
             target = "%s/record/%d" % (CFG_SITE_URL, parameters['recid'])
             del parameters['recid']
             target += make_canonical_urlargd(parameters, self.search_results_default_urlargd)
             return target
 
         return "%s/search%s" % (CFG_SITE_URL, make_canonical_urlargd(parameters, self.search_results_default_urlargd))
 
     def build_search_interface_url(self, known_parameters={}, **kargs):
         """ Helper for generating a canonical search interface URL."""
 
         parameters = {}
         parameters.update(known_parameters)
         parameters.update(kargs)
 
         c = parameters['c']
         del parameters['c']
 
         # Now, we only have the arguments which have _not_ their default value
         if c and c != CFG_SITE_NAME:
             base = CFG_SITE_URL + '/collection/' + quote(c)
         else:
             base = CFG_SITE_URL
         return create_url(base, drop_default_urlargd(parameters, self.search_results_default_urlargd))
 
     def build_rss_url(self, known_parameters,  **kargs):
         """Helper for generating a canonical RSS URL"""
 
         parameters = {}
         parameters.update(known_parameters)
         parameters.update(kargs)
 
         # Keep only interesting parameters
         argd = wash_urlargd(parameters, self.rss_default_urlargd)
 
         if argd:
             # Handle 'c' differently since it is a list
             c = argd.get('c', [])
             del argd['c']
             # Create query, and drop empty params
             args = make_canonical_urlargd(argd, self.rss_default_urlargd)
             if c != []:
                 # Add collections
                 c = [quote(coll) for coll in c]
                 args += '&amp;c=' + '&amp;c='.join(c)
 
         return CFG_SITE_URL + '/rss' + args
 
     def tmpl_record_page_header_content(self, req, recid, ln):
         """ Provide extra information in the header of /record pages """
 
         _ = gettext_set_language(ln)
 
         title = get_fieldvalues(recid, "245__a")
 
         if title:
             title = _("Record") + '#%d: %s' %(recid, cgi.escape(title[0]))
         else:
             title = _("Record") + ' #%d' % recid
 
         keywords = ', '.join(get_fieldvalues(recid, "6531_a"))
         description = ' '.join(get_fieldvalues(recid, "520__a"))
         description += "\n"
         description += '; '.join(get_fieldvalues(recid, "100__a") + get_fieldvalues(recid, "700__a"))
 
         return [cgi.escape(x, True) for x in (title, description, keywords)]
 
     def tmpl_navtrail_links(self, as, ln, dads):
         """
         Creates the navigation bar at top of each search page (*Home > Root collection > subcollection > ...*)
 
         Parameters:
 
           - 'as' *bool* - Should we display an advanced search box?
 
           - 'ln' *string* - The language to display
 
           - 'separator' *string* - The separator between two consecutive collections
 
           - 'dads' *list* - A list of parent links, eachone being a dictionary of ('name', 'longname')
         """
         out = []
         for url, name in dads:
             out.append(create_html_link(self.build_search_interface_url(c=url, as=as, ln=ln), {}, cgi.escape(name), {'class': 'navtrail'}))
 
         return ' &gt; '.join(out)
 
     def tmpl_webcoll_body(self, ln, collection, te_portalbox,
                           searchfor, np_portalbox, narrowsearch,
                           focuson, instantbrowse, ne_portalbox):
 
         """ Creates the body of the main search page.
 
         Parameters:
 
           - 'ln' *string* - language of the page being generated
 
           - 'collection' - collection id of the page being generated
 
           - 'te_portalbox' *string* - The HTML code for the portalbox on top of search
 
           - 'searchfor' *string* - The HTML code for the search options
 
           - 'np_portalbox' *string* - The HTML code for the portalbox on bottom of search
 
           - 'searchfor' *string* - The HTML code for the search categories (left bottom of page)
 
           - 'focuson' *string* - The HTML code for the "focuson" categories (right bottom of page)
 
           - 'ne_portalbox' *string* - The HTML code for the bottom of the page
         """
 
         if not narrowsearch:
             narrowsearch = instantbrowse
 
         body = '''
                 <form name="search" action="%(siteurl)s/search" method="get">
                 %(searchfor)s
                 %(np_portalbox)s
                 <table cellspacing="0" cellpadding="0" border="0">
                   <tr>
                     <td valign="top">%(narrowsearch)s</td>
                ''' % {
                  'siteurl' : CFG_SITE_URL,
                  'searchfor' : searchfor,
                  'np_portalbox' : np_portalbox,
                  'narrowsearch' : narrowsearch
                }
         if focuson:
             body += """<td valign="top">""" + focuson + """</td>"""
         body += """</tr></table>
             %(ne_portalbox)s
                </form>""" % {'ne_portalbox' : ne_portalbox}
         return body
 
     def tmpl_portalbox(self, title, body):
         """Creates portalboxes based on the parameters
         Parameters:
 
           - 'title' *string* - The title of the box
 
           - 'body' *string* - The HTML code for the body of the box
 
         """
         out = """<div class="portalbox">
                     <div class="portalboxheader">%(title)s</div>
                     <div class="portalboxbody">%(body)s</div>
                  </div>""" % {'title' : cgi.escape(title), 'body' : body}
 
         return out
 
     def tmpl_searchfor_simple(self, ln, collection_id, collection_name, record_count, middle_option):
         """Produces simple *Search for* box for the current collection.
 
         Parameters:
 
           - 'ln' *string* - The language to display
 
           - 'header' *string* - header of search form
 
           - 'middle_option' *string* - HTML code for the options (any field, specific fields ...)
         """
 
         # load the right message language
         _ = gettext_set_language(ln)
 
         out = '''
         <!--create_searchfor_simple()-->
         '''
 
         argd = drop_default_urlargd({'ln': ln, 'cc': collection_id, 'sc': 1},
                                     self.search_results_default_urlargd)
 
         # Only add non-default hidden values
         for field, value in argd.items():
             out += self.tmpl_input_hidden(field, value)
 
 
         header = _("Search %s records for:") % \
                  self.tmpl_nbrecs_info(record_count, "","")
         asearchurl = self.build_search_interface_url(c=collection_id, as=1, ln=ln)
 
         # print commentary start:
         out += '''
         <table class="searchbox">
          <thead>
           <tr align="left">
            <th colspan="3" class="searchboxheader">%(header)s</th>
           </tr>
          </thead>
          <tbody>
           <tr valign="baseline">
            <td class="searchboxbody" align="left"><input type="text" name="p" size="40" value="" /></td>
            <td class="searchboxbody" align="left">%(middle_option)s</td>
            <td class="searchboxbody" align="left">
              <input class="formbutton" type="submit" name="action_search" value="%(msg_search)s" />
              <input class="formbutton" type="submit" name="action_browse" value="%(msg_browse)s" /></td>
           </tr>
           <tr valign="baseline">
            <td class="searchboxbody" colspan="3" align="right">
              <small>
                <a href="%(siteurl)s/help/search-tips%(langlink)s">%(msg_search_tips)s</a> ::
                %(asearch)s
              </small>
            </td>
           </tr>
          </tbody>
         </table>
         <!--/create_searchfor_simple()-->
         ''' % {'ln' : ln,
                'langlink': ln != CFG_SITE_LANG and '?ln=' + ln or '',
                'siteurl' : CFG_SITE_URL,
                'asearch' : create_html_link(asearchurl, {}, _('Advanced Search')),
                'header' : header,
                'middle_option' : middle_option,
                'msg_search' : _('Search'),
                'msg_browse' : _('Browse'),
                'msg_search_tips' : _('Search Tips')}
 
         return out
 
     def tmpl_searchfor_advanced(self,
                                 ln,                  # current language
                                 collection_id,
                                 collection_name,
                                 record_count,
                                 middle_option_1, middle_option_2, middle_option_3,
                                 searchoptions,
                                 sortoptions,
                                 rankoptions,
                                 displayoptions,
                                 formatoptions
                                 ):
         """
           Produces advanced *Search for* box for the current collection.
 
           Parameters:
 
             - 'ln' *string* - The language to display
 
             - 'middle_option_1' *string* - HTML code for the first row of options (any field, specific fields ...)
 
             - 'middle_option_2' *string* - HTML code for the second row of options (any field, specific fields ...)
 
             - 'middle_option_3' *string* - HTML code for the third row of options (any field, specific fields ...)
 
             - 'searchoptions' *string* - HTML code for the search options
 
             - 'sortoptions' *string* - HTML code for the sort options
 
             - 'rankoptions' *string* - HTML code for the rank options
 
             - 'displayoptions' *string* - HTML code for the display options
 
             - 'formatoptions' *string* - HTML code for the format options
 
         """
 
         # load the right message language
         _ = gettext_set_language(ln)
 
         out = '''
         <!--create_searchfor_advanced()-->
         '''
 
         argd = drop_default_urlargd({'ln': ln, 'as': 1, 'cc': collection_id, 'sc': 1},
                                     self.search_results_default_urlargd)
 
         # Only add non-default hidden values
         for field, value in argd.items():
             out += self.tmpl_input_hidden(field, value)
 
 
         header = _("Search %s records for") % \
                  self.tmpl_nbrecs_info(record_count, "","")
         header += ':'
         ssearchurl = self.build_search_interface_url(c=collection_id, as=0, ln=ln)
 
         out += '''
         <table class="searchbox">
          <thead>
           <tr>
            <th class="searchboxheader" colspan="3">%(header)s</th>
           </tr>
          </thead>
          <tbody>
           <tr valign="bottom">
             <td class="searchboxbody" style="white-space: nowrap;">
                 %(matchbox_m1)s<input type="text" name="p1" size="40" value="" />
             </td>
             <td class="searchboxbody" style="white-space: nowrap;">%(middle_option_1)s</td>
             <td class="searchboxbody">%(andornot_op1)s</td>
           </tr>
           <tr valign="bottom">
             <td class="searchboxbody" style="white-space: nowrap;">
                 %(matchbox_m2)s<input type="text" name="p2" size="40" value="" />
             </td>
             <td class="searchboxbody">%(middle_option_2)s</td>
             <td class="searchboxbody">%(andornot_op2)s</td>
           </tr>
           <tr valign="bottom">
             <td class="searchboxbody" style="white-space: nowrap;">
                 %(matchbox_m3)s<input type="text" name="p3" size="40" value="" />
             </td>
             <td class="searchboxbody">%(middle_option_3)s</td>
             <td class="searchboxbody" style="white-space: nowrap;">
               <input class="formbutton" type="submit" name="action_search" value="%(msg_search)s" />
               <input class="formbutton" type="submit" name="action_browse" value="%(msg_browse)s" /></td>
           </tr>
           <tr valign="bottom">
             <td colspan="3" class="searchboxbody" align="right">
               <small>
                 <a href="%(siteurl)s/help/search-tips%(langlink)s">%(msg_search_tips)s</a> ::
                 %(ssearch)s
               </small>
             </td>
           </tr>
          </tbody>
         </table>
         <!-- @todo - more imports -->
         ''' % {'ln' : ln,
                'langlink': ln != CFG_SITE_LANG and '?ln=' + ln or '',
                'siteurl' : CFG_SITE_URL,
                'ssearch' : create_html_link(ssearchurl, {}, _("Simple Search")),
                'header' : header,
 
                'matchbox_m1' : self.tmpl_matchtype_box('m1', ln=ln),
                'middle_option_1' : middle_option_1,
                'andornot_op1' : self.tmpl_andornot_box('op1', ln=ln),
 
                'matchbox_m2' : self.tmpl_matchtype_box('m2', ln=ln),
                'middle_option_2' : middle_option_2,
                'andornot_op2' : self.tmpl_andornot_box('op2', ln=ln),
 
                'matchbox_m3' : self.tmpl_matchtype_box('m3', ln=ln),
                'middle_option_3' : middle_option_3,
 
                'msg_search' : _("Search"),
                'msg_browse' : _("Browse"),
                'msg_search_tips' : _("Search Tips")}
 
         if (searchoptions):
             out += """<table class="searchbox">
                       <thead>
                        <tr>
                          <th class="searchboxheader">
                            %(searchheader)s
                          </th>
                        </tr>
                       </thead>
                       <tbody>
                        <tr valign="bottom">
                         <td class="searchboxbody">%(searchoptions)s</td>
                        </tr>
                       </tbody>
                      </table>""" % {
                        'searchheader' : _("Search options:"),
                        'searchoptions' : searchoptions
                      }
 
         out += """<table class="searchbox">
                    <thead>
                     <tr>
                       <th class="searchboxheader">
                         %(added)s
                       </th>
                       <th class="searchboxheader">
                         %(until)s
                       </th>
                     </tr>
                    </thead>
                    <tbody>
                     <tr valign="bottom">
                       <td class="searchboxbody">%(added_or_modified)s %(date_added)s</td>
                       <td class="searchboxbody">%(date_until)s</td>
                     </tr>
                    </tbody>
                   </table>
                   <table class="searchbox">
                    <thead>
                     <tr>
                       <th class="searchboxheader">
                         %(msg_sort)s
                       </th>
                       <th class="searchboxheader">
                         %(msg_display)s
                       </th>
                       <th class="searchboxheader">
                         %(msg_format)s
                       </th>
                     </tr>
                    </thead>
                    <tbody>
                     <tr valign="bottom">
                       <td class="searchboxbody">%(sortoptions)s %(rankoptions)s</td>
                       <td class="searchboxbody">%(displayoptions)s</td>
                       <td class="searchboxbody">%(formatoptions)s</td>
                     </tr>
                    </tbody>
                   </table>
                   <!--/create_searchfor_advanced()-->
               """ % {
 
                     'added' : _("Added/modified since:"),
                     'until' : _("until:"),
                     'added_or_modified': self.tmpl_inputdatetype(ln=ln),
                     'date_added' : self.tmpl_inputdate("d1", ln=ln),
                     'date_until' : self.tmpl_inputdate("d2", ln=ln),
 
                     'msg_sort' : _("Sort by:"),
                     'msg_display' : _("Display results:"),
                     'msg_format' : _("Output format:"),
                     'sortoptions' : sortoptions,
                     'rankoptions' : rankoptions,
                     'displayoptions' : displayoptions,
                     'formatoptions' : formatoptions
                   }
         return out
 
     def tmpl_matchtype_box(self, name='m', value='', ln='en'):
         """Returns HTML code for the 'match type' selection box.
 
           Parameters:
 
             - 'name' *string* - The name of the produced select
 
             - 'value' *string* - The selected value (if any value is already selected)
 
             - 'ln' *string* - the language to display
         """
 
         # load the right message language
         _ = gettext_set_language(ln)
 
         out = """
         <select name="%(name)s">
         <option value="a"%(sela)s>%(opta)s</option>
         <option value="o"%(selo)s>%(opto)s</option>
         <option value="e"%(sele)s>%(opte)s</option>
         <option value="p"%(selp)s>%(optp)s</option>
         <option value="r"%(selr)s>%(optr)s</option>
         </select>
         """ % {'name' : name,
                'sela' : self.tmpl_is_selected('a', value),
                                                            'opta' : _("All of the words:"),
                'selo' : self.tmpl_is_selected('o', value),
                                                            'opto' : _("Any of the words:"),
                'sele' : self.tmpl_is_selected('e', value),
                                                            'opte' : _("Exact phrase:"),
                'selp' : self.tmpl_is_selected('p', value),
                                                            'optp' : _("Partial phrase:"),
                'selr' : self.tmpl_is_selected('r', value),
                                                            'optr' : _("Regular expression:")
               }
         return out
 
     def tmpl_is_selected(self, var, fld):
         """
           Checks if *var* and *fld* are equal, and if yes, returns ' selected="selected"'.  Useful for select boxes.
 
           Parameters:
 
           - 'var' *string* - First value to compare
 
           - 'fld' *string* - Second value to compare
         """
         if var == fld:
             return ' selected="selected"'
         else:
             return ""
 
     def tmpl_andornot_box(self, name='op', value='', ln='en'):
         """
           Returns HTML code for the AND/OR/NOT selection box.
 
           Parameters:
 
             - 'name' *string* - The name of the produced select
 
             - 'value' *string* - The selected value (if any value is already selected)
 
             - 'ln' *string* - the language to display
         """
 
         # load the right message language
         _ = gettext_set_language(ln)
 
         out = """
         <select name="%(name)s">
         <option value="a"%(sela)s>%(opta)s</option>
         <option value="o"%(selo)s>%(opto)s</option>
         <option value="n"%(seln)s>%(optn)s</option>
         </select>
         """ % {'name' : name,
                'sela' : self.tmpl_is_selected('a', value), 'opta' : _("AND"),
                'selo' : self.tmpl_is_selected('o', value), 'opto' : _("OR"),
                'seln' : self.tmpl_is_selected('n', value), 'optn' : _("AND NOT")
               }
         return out
 
     def tmpl_inputdate(self, name, ln, sy = 0, sm = 0, sd = 0):
         """
           Produces *From Date*, *Until Date* kind of selection box. Suitable for search options.
 
           Parameters:
 
             - 'name' *string* - The base name of the produced selects
 
             - 'ln' *string* - the language to display
         """
         # load the right message language
         _ = gettext_set_language(ln)
 
         box = """
                <select name="%(name)sd">
                  <option value=""%(sel)s>%(any)s</option>
               """ % {
                 'name' : name,
                 'any' : _("any day"),
                 'sel' : self.tmpl_is_selected(sd, 0)
               }
         for day in range(1, 32):
             box += """<option value="%02d"%s>%02d</option>""" % (day, self.tmpl_is_selected(sd, day), day)
         box += """</select>"""
         # month
         box += """
                 <select name="%(name)sm">
                   <option value=""%(sel)s>%(any)s</option>
                """ % {
                  'name' : name,
                  'any' : _("any month"),
                  'sel' : self.tmpl_is_selected(sm, 0)
                }
         for mm, month in [(1, _("January")), (2, _("February")), (3, _("March")), (4, _("April")), \
                           (5, _("May")), (6, _("June")), (7, _("July")), (8, _("August")), \
                           (9, _("September")), (10, _("October")), (11, _("November")), (12, _("December"))]:
             box += """<option value="%02d"%s>%s</option>""" % (mm, self.tmpl_is_selected(sm, mm), month)
         box += """</select>"""
         # year
         box += """
                 <select name="%(name)sy">
                   <option value=""%(sel)s>%(any)s</option>
                """ % {
                  'name' : name,
                  'any' : _("any year"),
                  'sel' : self.tmpl_is_selected(sy, 0)
                }
         this_year = int(time.strftime("%Y", time.localtime()))
         for year in range(this_year-20, this_year+1):
             box += """<option value="%d"%s>%d</option>""" % (year, self.tmpl_is_selected(sy, year), year)
         box += """</select>"""
         return box
 
     def tmpl_inputdatetype(self, dt='', ln=CFG_SITE_LANG):
         """
           Produces input date type selection box to choose
           added-or-modified date search option.
 
           Parameters:
 
             - 'dt' *string - date type (c=created, m=modified)
 
             - 'ln' *string* - the language to display
         """
         # load the right message language
         _ = gettext_set_language(ln)
 
         box = """<select name="dt">
                   <option value="">%(added)s </option>
                   <option value="m"%(sel)s>%(modified)s </option>
                  </select>
               """ % { 'added': _("Added since:"),
                       'modified': _("Modified since:"),
                       'sel': self.tmpl_is_selected(dt, 'm'),
                     }
         return box
 
     def tmpl_narrowsearch(self, as, ln, type, father,
                           has_grandchildren, sons, display_grandsons,
                           grandsons):
 
         """
         Creates list of collection descendants of type *type* under title *title*.
         If as==1, then links to Advanced Search interfaces; otherwise Simple Search.
         Suitable for 'Narrow search' and 'Focus on' boxes.
 
         Parameters:
 
           - 'as' *bool* - Should we display an advanced search box?
 
           - 'ln' *string* - The language to display
 
           - 'type' *string* - The type of the produced box (virtual collections or normal collections)
 
           - 'father' *collection* - The current collection
 
           - 'has_grandchildren' *bool* - If the current collection has grand children
 
           - 'sons' *list* - The list of the sub-collections (first level)
 
           - 'display_grandsons' *bool* - If the grand children collections should be displayed (2 level deep display)
 
           - 'grandsons' *list* - The list of sub-collections (second level)
         """
 
         # load the right message language
         _ = gettext_set_language(ln)
 
         title = {'r': _("Narrow by collection:"),
                  'v': _("Focus on:")}[type]
 
 
         if has_grandchildren:
             style_prolog = "<strong>"
             style_epilog = "</strong>"
         else:
             style_prolog = ""
             style_epilog = ""
 
         out = """<table class="%(narrowsearchbox)s">
                    <thead>
                     <tr>
                      <th colspan="2" align="left" class="%(narrowsearchbox)sheader">
                        %(title)s
                      </th>
                     </tr>
                    </thead>
                    <tbody>""" % {'title' : title,
                                  'narrowsearchbox': {'r': 'narrowsearchbox',
                                                      'v': 'focusonsearchbox'}[type]}
         # iterate through sons:
         i = 0
         for son in sons:
             out += """<tr><td class="%(narrowsearchbox)sbody" valign="top">""" % \
                    { 'narrowsearchbox': {'r': 'narrowsearchbox',
                                          'v': 'focusonsearchbox'}[type]}
 
             if type == 'r':
                 if son.restricted_p() and son.restricted_p() != father.restricted_p():
                     out += """<input type="checkbox" name="c" value="%(name)s" />&nbsp;</td>""" % {'name' : cgi.escape(son.name) }
                 else:
                     out += """<input type="checkbox" name="c" value="%(name)s" checked="checked" />&nbsp;</td>""" % {'name' : cgi.escape(son.name) }
             else:
                 out += '</td>'
             out += """<td valign="top">%(link)s%(recs)s """ % {
                 'link': create_html_link(self.build_search_interface_url(c=son.name, ln=ln, as=as),
                                          {}, style_prolog + cgi.escape(son.get_name(ln)) + style_epilog),
                 'recs' : self.tmpl_nbrecs_info(son.nbrecs, ln=ln)}
 
             if son.restricted_p():
                 out += """ <small class="warning">[%(msg)s]</small> """ % { 'msg' : _("restricted") }
             if display_grandsons and len(grandsons[i]):
                 # iterate trough grandsons:
                 out += """<br />"""
                 for grandson in grandsons[i]:
                     out += """ <small>%(link)s%(nbrec)s</small> """ % {
                         'link': create_html_link(self.build_search_interface_url(c=grandson.name, ln=ln, as=as),
                                                  {},
                                                  cgi.escape(grandson.get_name(ln))),
                         'nbrec' : self.tmpl_nbrecs_info(grandson.nbrecs, ln=ln)}
 
             out += """</td></tr>"""
             i += 1
         out += "</tbody></table>"
 
         return out
 
     def tmpl_searchalso(self, ln, engines_list, collection_id):
         _ = gettext_set_language(ln)
 
         box_name = _("Search also:")
 
         html = """<table cellspacing="0" cellpadding="0" border="0">
             <tr><td valign="top"><table class="searchalsosearchbox">
             <thead><tr><th colspan="2" align="left" class="searchalsosearchboxheader">%(box_name)s
             </th></tr></thead><tbody>
         """ % locals()
 
         for engine in engines_list:
             internal_name = engine.name
             name = _(internal_name)
             base_url = engine.base_url
             if external_collection_get_state(engine, collection_id) == 3:
                 checked = ' checked="checked"'
             else:
                 checked = ''
 
             html += """<tr><td class="searchalsosearchboxbody" valign="top">
                 <input type="checkbox" name="ec" id="%(id)s" value="%(internal_name)s" %(checked)s /></td>
                 <td valign="top" class="searchalsosearchboxbody">
                 <div style="white-space: nowrap"><label for="%(id)s">%(name)s</label>
                 <a href="%(base_url)s">
                 <img src="%(siteurl)s/img/external-icon-light-8x8.gif" border="0" alt="%(name)s"/></a>
                 </div></td></tr>""" % \
                                  { 'checked': checked,
                                    'base_url': base_url,
                                    'internal_name': internal_name,
                                    'name': cgi.escape(name),
                                    'id': "extSearch" + nmtoken_from_string(name),
                                    'siteurl': CFG_SITE_URL,}
 
         html += """</tbody></table></td></tr></table>"""
         return html
 
     def tmpl_nbrecs_info(self, number, prolog=None, epilog=None, ln=CFG_SITE_LANG):
         """
         Return information on the number of records.
 
         Parameters:
 
         - 'number' *string* - The number of records
 
         - 'prolog' *string* (optional) - An HTML code to prefix the number (if **None**, will be
         '<small class="nbdoccoll">(')
 
         - 'epilog' *string* (optional) - An HTML code to append to the number (if **None**, will be
         ')</small>')
         """
 
         if number is None:
             number = 0
         if prolog is None:
             prolog = '''&nbsp;<small class="nbdoccoll">('''
         if epilog is None:
             epilog = ''')</small>'''
 
         return prolog + self.tmpl_nice_number(number, ln) + epilog
 
     def tmpl_box_restricted_content(self, ln):
         """
           Displays a box containing a *restricted content* message
 
         Parameters:
 
           - 'ln' *string* - The language to display
 
         """
 
         # load the right message language
         _ = gettext_set_language(ln)
 
         return _("The contents of this collection is restricted.")
 
     def tmpl_box_no_records(self, ln):
         """
           Displays a box containing a *no content* message
 
         Parameters:
 
           - 'ln' *string* - The language to display
 
         """
 
         # load the right message language
         _ = gettext_set_language(ln)
 
         return _("This collection does not contain any document yet.")
 
 
     def tmpl_instant_browse(self, as, ln, recids, more_link = None):
         """
           Formats a list of records (given in the recids list) from the database.
 
         Parameters:
 
           - 'as' *int* - Advanced Search interface or not (0 or 1)
 
           - 'ln' *string* - The language to display
 
           - 'recids' *list* - the list of records from the database
 
           - 'more_link' *string* - the "More..." link for the record. If not given, will not be displayed
 
         """
 
         # load the right message language
         _ = gettext_set_language(ln)
 
         body = '''<table class="latestadditionsbox">'''
         for recid in recids:
             body += '''
             <tr>
               <td class="latestadditionsboxtimebody">%(date)s</td>
               <td class="latestadditionsboxrecordbody">%(body)s</td>
             </tr>''' % {'date': recid['date'],
                         'body': recid['body']
                       }
         body += "</table>"
         if more_link:
             body += '<div align="right"><small>' + \
                     create_html_link(more_link, {}, '[&gt;&gt; %s]' % _("more")) + \
                     '</small></div>'
 
         return '''
         <table class="narrowsearchbox">
           <thead>
             <tr>
               <th class="narrowsearchboxheader">%(header)s</th>
             </tr>
           </thead>
           <tbody>
             <tr>
             <td class="narrowsearchboxbody">%(body)s</td>
             </tr>
           </tbody>
         </table>''' % {'header' : _("Latest additions:"),
                        'body' : body,
                        }
 
 
     def tmpl_searchwithin_select(self, ln, fieldname, selected, values):
         """
           Produces 'search within' selection box for the current collection.
 
         Parameters:
 
           - 'ln' *string* - The language to display
 
           - 'fieldname' *string* - the name of the select box produced
 
           - 'selected' *string* - which of the values is selected
 
           - 'values' *list* - the list of values in the select
         """
 
         out = '<select name="%(fieldname)s">' % {'fieldname': fieldname}
 
         if values:
             for pair in values:
                 out += """<option value="%(value)s"%(selected)s>%(text)s</option>""" % {
                          'value'    : cgi.escape(pair['value']),
                          'selected' : self.tmpl_is_selected(pair['value'], selected),
                          'text'     : cgi.escape(pair['text'])
                        }
         out += """</select>"""
         return out
 
     def tmpl_select(self, fieldname, values, selected=None, css_class=''):
         """
           Produces a generic select box
 
         Parameters:
 
           - 'css_class' *string* - optional, a css class to display this select with
 
           - 'fieldname' *list* - the name of the select box produced
 
           - 'selected' *string* - which of the values is selected
 
           - 'values' *list* - the list of values in the select
         """
         if css_class != '':
             class_field = ' class="%s"' % css_class
         else:
             class_field = ''
         out = '<select name="%(fieldname)s"%(class)s>' % {
             'fieldname' : fieldname,
             'class' : class_field
             }
 
         for pair in values:
             if pair.get('selected', False) or pair['value'] == selected:
                 flag = ' selected="selected"'
             else:
                 flag = ''
 
             out += '<option value="%(value)s"%(selected)s>%(text)s</option>' % {
                      'value'    : cgi.escape(str(pair['value'])),
                      'selected' : flag,
                      'text'     : cgi.escape(pair['text'])
                    }
 
         out += """</select>"""
         return out
 
     def tmpl_record_links(self, recid, ln):
         """
           Displays the *More info* and *Find similar* links for a record
 
         Parameters:
 
           - 'ln' *string* - The language to display
 
           - 'recid' *string* - the id of the displayed record
         """
 
         # load the right message language
         _ = gettext_set_language(ln)
 
         out = '''<br /><span class="moreinfo">%(detailed)s - %(similar)s</span>''' % {
             'detailed': create_html_link(self.build_search_url(recid=recid, ln=ln),
                                          {},
                                          _("Detailed record"), {'class': "moreinfo"}),
             'similar': create_html_link(self.build_search_url(p="recid:%d" % recid, rm='wrd', ln=ln),
                                         {},
                                         _("Similar records"),
                                         {'class': "moreinfo"})}
 
         if CFG_BIBRANK_SHOW_CITATION_LINKS:
             out += '''<span class="moreinfo"> - %s </span>''' % \
                    create_html_link(self.build_search_url(p='recid:%d' % recid, rm='citation', ln=ln),
                                     {}, _("Cited by"), {'class': "moreinfo"})
 
         return out
 
     def tmpl_record_body(self, titles, authors, dates, rns, abstracts, urls_u, urls_z, ln):
         """
           Displays the "HTML basic" format of a record
 
         Parameters:
 
           - 'authors' *list* - the authors (as strings)
 
           - 'dates' *list* - the dates of publication
 
           - 'rns' *list* - the quicknotes for the record
 
           - 'abstracts' *list* - the abstracts for the record
 
           - 'urls_u' *list* - URLs to the original versions of the record
 
           - 'urls_z' *list* - Not used
         """
         out = ""
         for title in titles:
             out += "<strong>%(title)s</strong> " % {
                      'title' : cgi.escape(title)
                    }
         if authors:
             out += " / "
             for author in authors[:CFG_WEBSEARCH_AUTHOR_ET_AL_THRESHOLD]:
                 out += '%s; ' % \
                        create_html_link(self.build_search_url(p=author, f='author', ln=ln),
                                         {}, cgi.escape(author))
 
             if len(authors) > CFG_WEBSEARCH_AUTHOR_ET_AL_THRESHOLD:
                 out += "<em>et al</em>"
         for date in dates:
             out += " %s." % cgi.escape(date)
         for rn in rns:
             out += """ <small class="quicknote">[%(rn)s]</small>""" % {'rn' : cgi.escape(rn)}
         for abstract in abstracts:
             out += "<br /><small>%(abstract)s [...]</small>" % {'abstract' : cgi.escape(abstract[:1+string.find(abstract, '.')]) }
         for idx in range(0, len(urls_u)):
             out += """<br /><small class="note"><a class="note" href="%(url)s">%(name)s</a></small>""" % {
                      'url' : urls_u[idx],
                      'name' : urls_u[idx]
                    }
         return out
 
     def tmpl_search_in_bibwords(self, p, f, ln, nearest_box):
         """
           Displays the *Words like current ones* links for a search
 
         Parameters:
 
           - 'p' *string* - Current search words
 
           - 'f' *string* - the fields in which the search was done
 
           - 'nearest_box' *string* - the HTML code for the "nearest_terms" box - most probably from a create_nearest_terms_box call
         """
 
         # load the right message language
         _ = gettext_set_language(ln)
         out = '<p>'
         if f:
             out += _("Words nearest to %(x_word)s inside %(x_field)s in any collection are:") % {'x_word': '<em>' + cgi.escape(p) + '</em>',
                                                                                                  'x_field': '<em>' + cgi.escape(f) + '</em>'}
         else:
             out += _("Words nearest to %(x_word)s in any collection are:") % {'x_word': '<em>' + cgi.escape(p) + '</em>'}
         out += '<br />' + nearest_box + '</p>'
         return out
 
     def tmpl_nearest_term_box(self, p, ln, f, terminfo, intro):
         """
           Displays the *Nearest search terms* box
 
         Parameters:
 
           - 'p' *string* - Current search words
 
           - 'f' *string* - a collection description (if the search has been completed in a collection)
 
           - 'ln' *string* - The language to display
 
           - 'terminfo': tuple (term, hits, argd) for each near term
 
           - 'intro' *string* - the intro HTML to prefix the box with
         """
 
         out = '''<table class="nearesttermsbox" cellpadding="0" cellspacing="0" border="0">'''
 
         for term, hits, argd in terminfo:
 
             if hits:
                 hitsinfo = str(hits)
             else:
                 hitsinfo = '-'
 
             term = cgi.escape(term)
 
             if term == p: # print search word for orientation:
                 nearesttermsboxbody_class = "nearesttermsboxbodyselected"
                 if hits > 0:
                     term = create_html_link(self.build_search_url(argd), {},
                                             term, {'class': "nearesttermsselected"})
             else:
                 nearesttermsboxbody_class = "nearesttermsboxbody"
                 term = create_html_link(self.build_search_url(argd), {},
                                         term, {'class': "nearestterms"})
 
             out += '''\
             <tr>
               <td class="%(nearesttermsboxbody_class)s" align="right">%(hits)s</td>
               <td class="%(nearesttermsboxbody_class)s" width="15">&nbsp;</td>
               <td class="%(nearesttermsboxbody_class)s" align="left">%(term)s</td>
             </tr>
             ''' % {'hits': hitsinfo,
                    'nearesttermsboxbody_class': nearesttermsboxbody_class,
                    'term': term}
 
         out += "</table>"
         return intro + "<blockquote>" + out + "</blockquote>"
 
     def tmpl_browse_pattern(self, f, fn, ln, browsed_phrases_in_colls, colls):
         """
           Displays the *Nearest search terms* box
 
         Parameters:
 
           - 'f' *string* - field (*not* i18nized)
 
           - 'fn' *string* - field name (i18nized)
 
           - 'ln' *string* - The language to display
 
           - 'browsed_phrases_in_colls' *array* - the phrases to display
 
           - 'colls' *array* - the list of collection parameters of the search (c's)
         """
 
         # load the right message language
         _ = gettext_set_language(ln)
 
         out = """<table class="searchresultsbox">
               <thead>
                <tr>
                 <th class="searchresultsboxheader" style="text-align: right;">
                   %(hits)s
                 </th>
                 <th class="searchresultsboxheader" width="15">
                   &nbsp;
                 </th>
                 <th class="searchresultsboxheader" style="text-align: left;">
                   %(fn)s
                 </th>
                </tr>
               </thead>
               <tbody>""" % {
                 'hits' : _("Hits"),
                 'fn' : cgi.escape(fn)
               }
 
         if len(browsed_phrases_in_colls) == 1:
             # one hit only found:
             phrase, nbhits = browsed_phrases_in_colls[0][0], browsed_phrases_in_colls[0][1]
 
             query = {'c': colls,
                      'ln': ln,
                      'p': phrase,
                      'f': f}
 
             out += """<tr>
                        <td class="searchresultsboxbody" style="text-align: right;">
                         %(nbhits)s
                        </td>
                        <td class="searchresultsboxbody" width="15">
                         &nbsp;
                        </td>
                        <td class="searchresultsboxbody" style="text-align: left;">
                         %(link)s
                        </td>
                       </tr>""" % {'nbhits': nbhits,
                                   'link': create_html_link(self.build_search_url(query),
                                                            {}, cgi.escape(phrase))}
 
         elif len(browsed_phrases_in_colls) > 1:
             # first display what was found but the last one:
             for phrase, nbhits in browsed_phrases_in_colls[:-1]:
                 query = {'c': colls,
                          'ln': ln,
                          'p': phrase,
                          'f': f}
 
                 out += """<tr>
                            <td class="searchresultsboxbody" style="text-align: right;">
                             %(nbhits)s
                            </td>
                            <td class="searchresultsboxbody" width="15">
                             &nbsp;
                            </td>
                            <td class="searchresultsboxbody" style="text-align: left;">
                             %(link)s
                            </td>
                           </tr>""" % {'nbhits' : nbhits,
                                       'link': create_html_link(self.build_search_url(query),
                                                                {},
                                                                cgi.escape(phrase))}
 
             # now display last hit as "next term":
             phrase, nbhits = browsed_phrases_in_colls[-1]
             query = {'c': colls,
                      'ln': ln,
                      'p': phrase,
                      'f': f}
 
             out += """<tr><td colspan="2" class="normal">
                             &nbsp;
                           </td>
                           <td class="normal">
                             <img src="%(siteurl)s/img/sn.gif" alt="" border="0" />
                             %(link)s
                           </td>
                       </tr>""" % {'link': create_html_link(self.build_search_url(query, action='browse'),
                                                            {}, _("next")),
                                   'siteurl' : CFG_SITE_URL}
         out += """</tbody>
             </table>"""
         return out
 
     def tmpl_search_box(self, ln, as, cc, cc_intl, ot, sp,
                         action, fieldslist, f1, f2, f3, m1, m2, m3,
                         p1, p2, p3, op1, op2, rm, p, f, coll_selects,
                         d1y, d2y, d1m, d2m, d1d, d2d, dt, sort_fields,
                         sf, so, ranks, sc, rg, formats, of, pl, jrec, ec):
 
         """
           Displays the *Nearest search terms* box
 
         Parameters:
 
           - 'ln' *string* - The language to display
 
           - 'as' *bool* - Should we display an advanced search box?
 
           - 'cc_intl' *string* - the i18nized current collection name
 
           - 'cc' *string* - the internal current collection name
 
           - 'ot', 'sp' *string* - hidden values
 
           - 'action' *string* - the action demanded by the user
 
           - 'fieldslist' *list* - the list of all fields available, for use in select within boxes in advanced search
 
           - 'p, f, f1, f2, f3, m1, m2, m3, p1, p2, p3, op1, op2, op3, rm' *strings* - the search parameters
 
           - 'coll_selects' *array* - a list of lists, each containing the collections selects to display
 
           - 'd1y, d2y, d1m, d2m, d1d, d2d' *int* - the search between dates
 
           - 'dt' *string* - the dates' types (creation dates, modification dates)
 
           - 'sort_fields' *array* - the select information for the sort fields
 
           - 'sf' *string* - the currently selected sort field
 
           - 'so' *string* - the currently selected sort order ("a" or "d")
 
           - 'ranks' *array* - ranking methods
 
           - 'rm' *string* - selected ranking method
 
           - 'sc' *string* - split by collection or not
 
           - 'rg' *string* - selected results/page
 
           - 'formats' *array* - available output formats
 
           - 'of' *string* - the selected output format
 
           - 'pl' *string* - `limit to' search pattern
         """
 
         # load the right message language
         _ = gettext_set_language(ln)
 
 
         # These are hidden fields the user does not manipulate
         # directly
         argd = drop_default_urlargd({
             'ln': ln, 'as': as,
             'cc': cc, 'ot': ot, 'sp': sp, 'ec': ec,
             }, self.search_results_default_urlargd)
 
 
         out = '''
         <h1 class="headline">%(ccname)s</h1>
         <form name="search" action="%(siteurl)s/search" method="get">
         ''' % {'ccname' : cgi.escape(cc_intl),
                'siteurl' : CFG_SITE_URL}
 
         # Only add non-default hidden values
         for field, value in argd.items():
             out += self.tmpl_input_hidden(field, value)
 
         leadingtext = _("Search")
 
         if action == 'browse':
             leadingtext = _("Browse")
 
         if as == 1:
             # print Advanced Search form:
 
             # define search box elements:
             out += '''
             <table class="searchbox">
              <thead>
               <tr>
                <th colspan="3" class="searchboxheader">
                 %(leading)s:
                </th>
               </tr>
              </thead>
              <tbody>
               <tr valign="top" style="white-space:nowrap;">
                 <td class="searchboxbody">%(matchbox1)s
                   <input type="text" name="p1" size="%(sizepattern)d" value="%(p1)s" />
                 </td>
                 <td class="searchboxbody">%(searchwithin1)s</td>
                 <td class="searchboxbody">%(andornot1)s</td>
               </tr>
               <tr valign="top">
                 <td class="searchboxbody">%(matchbox2)s
                   <input type="text" name="p2" size="%(sizepattern)d" value="%(p2)s" />
                 </td>
                 <td class="searchboxbody">%(searchwithin2)s</td>
                 <td class="searchboxbody">%(andornot2)s</td>
               </tr>
               <tr valign="top">
                 <td class="searchboxbody">%(matchbox3)s
                   <input type="text" name="p3" size="%(sizepattern)d" value="%(p3)s" />
                 </td>
                 <td class="searchboxbody">%(searchwithin3)s</td>
                 <td class="searchboxbody"  style="white-space:nowrap;">
                   <input class="formbutton" type="submit" name="action_search" value="%(search)s" />
                   <input class="formbutton" type="submit" name="action_browse" value="%(browse)s" />&nbsp;
                 </td>
               </tr>
               <tr valign="bottom">
                 <td colspan="3" align="right" class="searchboxbody">
                   <small>
                     <a href="%(siteurl)s/help/search-tips%(langlink)s">%(search_tips)s</a> ::
                     %(simple_search)s
                   </small>
                 </td>
               </tr>
              </tbody>
             </table>
             ''' % {
                 'simple_search': create_html_link(self.build_search_url(p=p1, f=f1, rm=rm, cc=cc, ln=ln, jrec=jrec, rg=rg),
                                                   {}, _("Simple Search")),
 
                 'leading' : leadingtext,
                 'sizepattern' : CFG_WEBSEARCH_ADVANCEDSEARCH_PATTERN_BOX_WIDTH,
                 'matchbox1' : self.tmpl_matchtype_box('m1', m1, ln=ln),
                 'p1' : cgi.escape(p1,1),
                 'searchwithin1' : self.tmpl_searchwithin_select(
                                   ln = ln,
                                   fieldname = 'f1',
                                   selected = f1,
                                   values = self._add_mark_to_field(value = f1, fields = fieldslist, ln = ln)
                                 ),
               'andornot1' : self.tmpl_andornot_box(
                                   name = 'op1',
                                   value = op1,
                                   ln = ln
                                 ),
               'matchbox2' : self.tmpl_matchtype_box('m2', m2, ln=ln),
               'p2' : cgi.escape(p2,1),
               'searchwithin2' : self.tmpl_searchwithin_select(
                                   ln = ln,
                                   fieldname = 'f2',
                                   selected = f2,
                                   values = self._add_mark_to_field(value = f2, fields = fieldslist, ln = ln)
                                 ),
               'andornot2' : self.tmpl_andornot_box(
                                   name = 'op2',
                                   value = op2,
                                   ln = ln
                                 ),
               'matchbox3' : self.tmpl_matchtype_box('m3', m3, ln=ln),
               'p3' : cgi.escape(p3,1),
               'searchwithin3' : self.tmpl_searchwithin_select(
                                   ln = ln,
                                   fieldname = 'f3',
                                   selected = f3,
                                   values = self._add_mark_to_field(value = f3, fields = fieldslist, ln = ln)
                                 ),
               'search' : _("Search"),
               'browse' : _("Browse"),
               'siteurl' : CFG_SITE_URL,
               'ln' : ln,
               'langlink': ln != CFG_SITE_LANG and '?ln=' + ln or '',
               'search_tips': _("Search Tips")
             }
         else:
             # print Simple Search form:
             out += '''
             <table class="searchbox">
              <thead>
               <tr>
                <th colspan="3" class="searchboxheader">
                 %(leading)s:
                </th>
               </tr>
              </thead>
              <tbody>
               <tr valign="top">
                 <td class="searchboxbody"><input type="text" name="p" size="%(sizepattern)d" value="%(p)s" /></td>
                 <td class="searchboxbody">%(searchwithin)s</td>
                 <td class="searchboxbody">
                   <input class="formbutton" type="submit" name="action_search" value="%(search)s" />
                   <input class="formbutton" type="submit" name="action_browse" value="%(browse)s" />&nbsp;
                 </td>
               </tr>
               <tr valign="bottom">
                 <td colspan="3" align="right" class="searchboxbody">
                   <small>
                     <a href="%(siteurl)s/help/search-tips%(langlink)s">%(search_tips)s</a> ::
                     %(advanced_search)s
                   </small>
                 </td>
               </tr>
              </tbody>
             </table>
             ''' % {
               'advanced_search': create_html_link(self.build_search_url(p1=p,
                                                                         f1=f,
                                                                         rm=rm,
                                                                         as=1,
                                                                         cc=cc,
                                                                         jrec=jrec,
                                                                         ln=ln,
                                                                         rg=rg),
                                                   {}, _("Advanced Search")),
 
               'leading' : leadingtext,
               'sizepattern' : CFG_WEBSEARCH_ADVANCEDSEARCH_PATTERN_BOX_WIDTH,
               'p' : cgi.escape(p, 1),
               'searchwithin' : self.tmpl_searchwithin_select(
                                   ln = ln,
                                   fieldname = 'f',
                                   selected = f,
                                   values = self._add_mark_to_field(value=f, fields=fieldslist, ln=ln)
                                 ),
               'search' : _("Search"),
               'browse' : _("Browse"),
               'siteurl' : CFG_SITE_URL,
               'ln' : ln,
               'langlink': ln != CFG_SITE_LANG and '?ln=' + ln or '',
               'search_tips': _("Search Tips")
             }
 
         ## secondly, print Collection(s) box:
         selects = ''
         for sel in coll_selects:
             selects += self.tmpl_select(fieldname='c', values=sel)
 
         out += """
             <table class="searchbox">
              <thead>
               <tr>
                <th colspan="3" class="searchboxheader">
                 %(leading)s %(msg_coll)s:
                </th>
               </tr>
              </thead>
              <tbody>
               <tr valign="bottom">
                <td valign="top" class="searchboxbody">
                  %(colls)s
                </td>
               </tr>
              </tbody>
             </table>
              """ % {
                'leading' : leadingtext,
                'msg_coll' : _("collections"),
                'colls' : selects,
              }
 
         ## thirdly, print search limits, if applicable:
         if action != _("Browse") and pl:
             out += """<table class="searchbox">
                        <thead>
                         <tr>
                           <th class="searchboxheader">
                             %(limitto)s
                           </th>
                         </tr>
                        </thead>
                        <tbody>
                         <tr valign="bottom">
                           <td class="searchboxbody">
                            <input type="text" name="pl" size="%(sizepattern)d" value="%(pl)s" />
                           </td>
                         </tr>
                        </tbody>
                       </table>""" % {
                         'limitto' : _("Limit to:"),
                         'sizepattern' : CFG_WEBSEARCH_ADVANCEDSEARCH_PATTERN_BOX_WIDTH,
                         'pl' : cgi.escape(pl, 1),
                       }
 
         ## fourthly, print from/until date boxen, if applicable:
         if action == _("Browse") or (d1y==0 and d1m==0 and d1d==0 and d2y==0 and d2m==0 and d2d==0):
             pass # do not need it
         else:
             cell_6_a = self.tmpl_inputdatetype(dt, ln) + self.tmpl_inputdate("d1", ln, d1y, d1m, d1d)
             cell_6_b = self.tmpl_inputdate("d2", ln, d2y, d2m, d2d)
             out += """<table class="searchbox">
                        <thead>
                         <tr>
                           <th class="searchboxheader">
                             %(added)s
                           </th>
                           <th class="searchboxheader">
                             %(until)s
                           </th>
                         </tr>
                        </thead>
                        <tbody>
                         <tr valign="bottom">
                           <td class="searchboxbody">%(added_or_modified)s %(date1)s</td>
                           <td class="searchboxbody">%(date2)s</td>
                         </tr>
                        </tbody>
                       </table>""" % {
                         'added' : _("Added/modified since:"),
                         'until' : _("until:"),
                         'added_or_modified': self.tmpl_inputdatetype(dt, ln),
                         'date1' : self.tmpl_inputdate("d1", ln, d1y, d1m, d1d),
                         'date2' : self.tmpl_inputdate("d2", ln, d2y, d2m, d2d),
                       }
 
         ## fifthly, print Display results box, including sort/rank, formats, etc:
         if action != _("Browse"):
 
             rgs = []
             for i in [10, 25, 50, 100, 250, 500]:
                 rgs.append({ 'value' : i, 'text' : "%d %s" % (i, _("results"))})
 
             # sort by:
             out += """<table class="searchbox">
                  <thead>
                   <tr>
                    <th class="searchboxheader">
                     %(sort_by)s
                    </th>
                    <th class="searchboxheader">
                     %(display_res)s
                    </th>
                    <th class="searchboxheader">
                     %(out_format)s
                    </th>
                   </tr>
                  </thead>
                  <tbody>
                   <tr valign="bottom">
                    <td valign="top" class="searchboxbody">
                      %(select_sf)s %(select_so)s %(select_rm)s
                    </td>
                    <td valign="top" class="searchboxbody">
                      %(select_rg)s %(select_sc)s
                    </td>
                    <td valign="top" class="searchboxbody">%(select_of)s</td>
                   </tr>
                  </tbody>
                 </table>""" % {
                   'sort_by' : _("Sort:"),
                   'display_res' : _("Display results:"),
                   'out_format' : _("Output format:"),
                   'select_sf' : self.tmpl_select(fieldname = 'sf', values = sort_fields, selected = sf, css_class = 'address'),
                   'select_so' : self.tmpl_select(fieldname = 'so', values = [{
                                     'value' : 'a',
                                     'text' : _("asc.")
                                   }, {
                                     'value' : 'd',
                                     'text' : _("desc.")
                                   }], selected = so, css_class = 'address'),
                   'select_rm' : self.tmpl_select(fieldname = 'rm', values = ranks, selected = rm, css_class = 'address'),
                   'select_rg' : self.tmpl_select(fieldname = 'rg', values = rgs, selected = rg, css_class = 'address'),
                   'select_sc' : self.tmpl_select(fieldname = 'sc', values = [{
                                     'value' : 0,
                                     'text' : _("single list")
                                   }, {
                                     'value' : 1,
                                     'text' : _("split by collection")
                                   }], selected = sc, css_class = 'address'),
                   'select_of' : self.tmpl_searchwithin_select(
                                   ln = ln,
                                   fieldname = 'of',
                                   selected = of,
                                   values = self._add_mark_to_field(value = of, fields = formats, chars = 3, ln = ln)
                                 ),
                 }
 
         ## last but not least, print end of search box:
         out += """</form>"""
         return out
 
     def tmpl_input_hidden(self, name, value):
         "Produces the HTML code for a hidden field "
         if isinstance(value, list):
             list_input = [self.tmpl_input_hidden(name, val) for val in value]
             return "\n".join(list_input)
 
         return """<input type="hidden" name="%(name)s" value="%(value)s" />""" % {
                  'name' : cgi.escape(str(name), 1),
                  'value' : cgi.escape(str(value), 1),
                }
 
     def _add_mark_to_field(self, value, fields, ln, chars = 1):
         """Adds the current value as a MARC tag in the fields array
         Useful for advanced search"""
 
         # load the right message language
         _ = gettext_set_language(ln)
 
         out = fields
         if value and str(value[0:chars]).isdigit():
             out.append({'value' : value,
                         'text' : str(value) + " " + _("MARC tag")
                         })
         return out
 
     def tmpl_search_pagestart(self, ln) :
         "page start for search page. Will display after the page header"
         return """<div class="pagebody"><div class="pagebodystripemiddle">"""
 
     def tmpl_search_pageend(self, ln) :
         "page end for search page. Will display just before the page footer"
         return """</div></div>"""
 
     def tmpl_print_warning(self, msg, type, prologue, epilogue):
         """Prints warning message and flushes output.
 
         Parameters:
 
           - 'msg' *string* - The message string
 
           - 'type' *string* - the warning type
 
           - 'prologue' *string* - HTML code to display before the warning
 
           - 'epilogue' *string* - HTML code to display after the warning
         """
 
         out = '\n%s<span class="quicknote">' % (prologue)
         if type:
             out += '%s: ' % type
         out += '%s</span>%s' % (msg, epilogue)
         return out
 
     def tmpl_print_search_info(self, ln, middle_only,
                                collection, collection_name, collection_id,
                                as, sf, so, rm, rg, nb_found, of, ot, p, f, f1,
                                f2, f3, m1, m2, m3, op1, op2, p1, p2,
                                p3, d1y, d1m, d1d, d2y, d2m, d2d, dt,
                                all_fieldcodes, cpu_time, pl_in_url,
                                jrec, sc, sp):
 
         """Prints stripe with the information on 'collection' and 'nb_found' results and CPU time.
            Also, prints navigation links (beg/next/prev/end) inside the results set.
            If middle_only is set to 1, it will only print the middle box information (beg/netx/prev/end/etc) links.
            This is suitable for displaying navigation links at the bottom of the search results page.
 
         Parameters:
 
           - 'ln' *string* - The language to display
 
           - 'middle_only' *bool* - Only display parts of the interface
 
           - 'collection' *string* - the collection name
 
           - 'collection_name' *string* - the i18nized current collection name
 
           - 'as' *bool* - if we display the advanced search interface
 
           - 'sf' *string* - the currently selected sort format
 
           - 'so' *string* - the currently selected sort order ("a" or "d")
 
           - 'rm' *string* - selected ranking method
 
           - 'rg' *int* - selected results/page
 
           - 'nb_found' *int* - number of results found
 
           - 'of' *string* - the selected output format
 
           - 'ot' *string* - hidden values
 
           - 'p' *string* - Current search words
 
           - 'f' *string* - the fields in which the search was done
 
           - 'f1, f2, f3, m1, m2, m3, p1, p2, p3, op1, op2' *strings* - the search parameters
 
           - 'jrec' *int* - number of first record on this page
 
           - 'd1y, d2y, d1m, d2m, d1d, d2d' *int* - the search between dates
 
           - 'dt' *string* the dates' type (creation date, modification date)
 
           - 'all_fieldcodes' *array* - all the available fields
 
           - 'cpu_time' *float* - the time of the query in seconds
 
         """
 
         # load the right message language
         _ = gettext_set_language(ln)
 
         out = ""
         # left table cells: print collection name
         if not middle_only:
             out += '''
                   <a name="%(collection_id)s"></a>
                   <form action="%(siteurl)s/search" method="get">
                   <table class="searchresultsbox"><tr><td class="searchresultsboxheader" align="left">
                   <strong><big>%(collection_link)s</big></strong></td>
                   ''' % {
                     'collection_id': collection_id,
                     'siteurl' : CFG_SITE_URL,
                     'collection_link': create_html_link(self.build_search_interface_url(c=collection, as=as, ln=ln),
                                                         {}, cgi.escape(collection_name))
                   }
         else:
             out += """
                   <form action="%(siteurl)s/search" method="get"><div align="center">
                   """ % { 'siteurl' : CFG_SITE_URL }
 
         # middle table cell: print beg/next/prev/end arrows:
         if not middle_only:
             out += """<td class="searchresultsboxheader" align="center">
                       %(recs_found)s &nbsp;""" % {
                      'recs_found' : _("%s records found") % ('<strong>' + self.tmpl_nice_number(nb_found, ln) + '</strong>')
                    }
         else:
             out += "<small>"
             if nb_found > rg:
                 out += "" + cgi.escape(collection_name) + " : " + _("%s records found") % ('<strong>' + self.tmpl_nice_number(nb_found, ln) + '</strong>') + " &nbsp; "
 
         if nb_found > rg: # navig.arrows are needed, since we have many hits
 
             query = {'p': p, 'f': f,
                      'cc': collection,
                      'sf': sf, 'so': so,
                      'sp': sp, 'rm': rm,
                      'of': of, 'ot': ot,
                      'as': as, 'ln': ln,
                      'p1': p1, 'p2': p2, 'p3': p3,
                      'f1': f1, 'f2': f2, 'f3': f3,
                      'm1': m1, 'm2': m2, 'm3': m3,
                      'op1': op1, 'op2': op2,
                      'sc': 0,
                      'd1y': d1y, 'd1m': d1m, 'd1d': d1d,
                      'd2y': d2y, 'd2m': d2m, 'd2d': d2d,
                      'dt': dt,
                 }
 
             # @todo here
             def img(gif, txt):
                 return '<img src="%(siteurl)s/img/%(gif)s.gif" alt="%(txt)s" border="0" />' % {
                     'txt': txt, 'gif': gif, 'siteurl': CFG_SITE_URL}
 
             if jrec-rg > 1:
                 out += create_html_link(self.build_search_url(query, jrec=1, rg=rg),
                                         {}, img('sb', _("begin")),
                                         {'class': 'img'})
 
             if jrec > 1:
                 out += create_html_link(self.build_search_url(query, jrec=max(jrec-rg, 1), rg=rg),
                                         {}, img('sp', _("previous")),
                                         {'class': 'img'})
 
             if jrec+rg-1 < nb_found:
                 out += "%d - %d" % (jrec, jrec+rg-1)
             else:
                 out += "%d - %d" % (jrec, nb_found)
 
             if nb_found >= jrec+rg:
                 out += create_html_link(self.build_search_url(query,
                                                               jrec=jrec+rg,
                                                               rg=rg),
                                         {}, img('sn', _("next")),
                                         {'class':'img'})
 
             if nb_found >= jrec+rg+rg:
                 out += create_html_link(self.build_search_url(query,
                                                             jrec=nb_found-rg+1,
                                                             rg=rg),
                                         {}, img('se', _("end")),
                                         {'class': 'img'})
 
 
             # still in the navigation part
             cc = collection
             sc = 0
             for var in ['p', 'cc', 'f', 'sf', 'so', 'of', 'rg', 'as', 'ln', 'p1', 'p2', 'p3', 'f1', 'f2', 'f3', 'm1', 'm2', 'm3', 'op1', 'op2', 'sc', 'd1y', 'd1m', 'd1d', 'd2y', 'd2m', 'd2d', 'dt']:
                 out += self.tmpl_input_hidden(name = var, value = vars()[var])
             for var in ['ot', 'sp', 'rm']:
                 if vars()[var]:
                     out += self.tmpl_input_hidden(name = var, value = vars()[var])
             if pl_in_url:
                 fieldargs = cgi.parse_qs(pl_in_url)
                 for fieldcode in all_fieldcodes:
                     # get_fieldcodes():
                     if fieldargs.has_key(fieldcode):
                         for val in fieldargs[fieldcode]:
                             out += self.tmpl_input_hidden(name = fieldcode, value = val)
             out += """&nbsp; %(jump)s <input type="text" name="jrec" size="4" value="%(jrec)d" />""" % {
                      'jump' : _("jump to record:"),
                      'jrec' : jrec,
                    }
 
         if not middle_only:
             out += "</td>"
         else:
             out += "</small>"
 
         # right table cell: cpu time info
         if not middle_only:
             if cpu_time > -1:
                 out += """<td class="searchresultsboxheader" align="right"><small>%(time)s</small>&nbsp;</td>""" % {
                          'time' : _("Search took %s seconds.") % ('%.2f' % cpu_time),
                        }
             out += "</tr></table>"
         else:
             out += "</div>"
         out += "</form>"
         return out
 
     def tmpl_nice_number(self, number, ln=CFG_SITE_LANG, thousands_separator=',', max_ndigits_after_dot=None):
         """
         Return nicely printed number NUMBER in language LN using
         given THOUSANDS_SEPARATOR character.
         If max_ndigits_after_dot is specified and the number is float, the
         number is rounded by taking in consideration up to max_ndigits_after_dot
         digit after the dot.
 
         This version does not pay attention to locale.  See
         tmpl_nice_number_via_locale().
         """
         if type(number) is float:
             if max_ndigits_after_dot is not None:
                 number = round(number, max_ndigits_after_dot)
             int_part, frac_part = str(number).split('.')
             return '%s.%s' % (self.tmpl_nice_number(int(int_part), ln, thousands_separator), frac_part)
         else:
             chars_in = list(str(number))
             number = len(chars_in)
             chars_out = []
             for i in range(0, number):
                 if i % 3 == 0 and i != 0:
                     chars_out.append(thousands_separator)
                 chars_out.append(chars_in[number-i-1])
             chars_out.reverse()
             return ''.join(chars_out)
 
     def tmpl_nice_number_via_locale(self, number, ln=CFG_SITE_LANG):
         """
         Return nicely printed number NUM in language LN using the locale.
         See also version tmpl_nice_number().
         """
         if number is None:
             return None
         # Temporarily switch the numeric locale to the requested one, and format the number
         # In case the system has no locale definition, use the vanilla form
         ol = locale.getlocale(locale.LC_NUMERIC)
         try:
             locale.setlocale(locale.LC_NUMERIC, self.tmpl_localemap.get(ln, self.tmpl_default_locale))
         except locale.Error:
             return str(number)
         try:
             number = locale.format('%d', number, True)
         except TypeError:
             return str(number)
         locale.setlocale(locale.LC_NUMERIC, ol)
         return number
 
     def tmpl_record_format_htmlbrief_header(self, ln):
         """Returns the header of the search results list when output
         is html brief. Note that this function is called for each collection
         results when 'split by collection' is enabled.
 
         See also: tmpl_record_format_htmlbrief_footer(..),
                   tmpl_record_format_htmlbrief_body(..)
 
         Parameters:
 
           - 'ln' *string* - The language to display
 
         """
 
         # load the right message language
         _ = gettext_set_language(ln)
 
         out = """
               <form action="%(siteurl)s/yourbaskets/add" method="post">
               <table>
               """ % {
                 'siteurl' : CFG_SITE_URL,
               }
 
         return out
 
     def tmpl_record_format_htmlbrief_footer(self, ln):
         """Returns the footer of the search results list when output
         is html brief. Note that this function is called for each collection
         results when 'split by collection' is enabled.
 
         See also: tmpl_record_format_htmlbrief_header(..),
                   tmpl_record_format_htmlbrief_body(..)
 
         Parameters:
 
           - 'ln' *string* - The language to display
 
         """
 
         # load the right message language
         _ = gettext_set_language(ln)
 
         out = """</table>
                <br /><input class="formbutton" type="submit" name="action" value="%(basket)s" />
                </form>""" % {
                  'basket' : _("ADD TO BASKET")
                  }
 
         return out
 
     def tmpl_record_format_htmlbrief_body(self, ln, recid,
                                           row_number, relevance,
                                           record, relevances_prologue,
                                           relevances_epilogue):
         """Returns the html brief format of one record. Used in the
         search results list for each record.
 
         See also: tmpl_record_format_htmlbrief_header(..),
                   tmpl_record_format_htmlbrief_footer(..)
 
         Parameters:
 
           - 'ln' *string* - The language to display
 
           - 'row_number' *int* - The position of this record in the list
 
           - 'recid' *int* - The recID
 
           - 'relevance' *string* - The relevance of the record
 
           - 'record' *string* - The formatted record
 
           - 'relevances_prologue' *string* - HTML code to prepend the relevance indicator
 
           - 'relevances_epilogue' *string* - HTML code to append to the relevance indicator (used mostly for formatting)
 
         """
 
         # load the right message language
         _ = gettext_set_language(ln)
 
         out = """
                 <tr><td valign="top" align="right" style="white-space: nowrap;">
                     <input name="recid" type="checkbox" value="%(recid)s" />
                 %(number)s.
                """ % {'recid': recid,
                       'number': row_number}
         if relevance:
             out += """<br /><div class="rankscoreinfo"><a title="rank score">%(prologue)s%(relevance)s%(epilogue)s</a></div>""" % {
                 'prologue' : relevances_prologue,
                 'epilogue' : relevances_epilogue,
                 'relevance' : relevance
                 }
         out += """</td><td valign="top">%s</td></tr>""" % record
 
         return out
 
     def tmpl_print_results_overview(self, ln, results_final_nb_total, cpu_time, results_final_nb, colls, ec):
         """Prints results overview box with links to particular collections below.
 
         Parameters:
 
           - 'ln' *string* - The language to display
 
           - 'results_final_nb_total' *int* - The total number of hits for the query
 
           - 'colls' *array* - The collections with hits, in the format:
 
           - 'coll[code]' *string* - The code of the collection (canonical name)
 
           - 'coll[name]' *string* - The display name of the collection
 
           - 'results_final_nb' *array* - The number of hits, indexed by the collection codes:
 
           - 'cpu_time' *string* - The time the query took
 
           - 'url_args' *string* - The rest of the search query
 
           - 'ec' *array* - selected external collections
         """
 
         if len(colls) == 1 and not ec:
             # if one collection only and no external collections, print nothing:
             return ""
 
         # load the right message language
         _ = gettext_set_language(ln)
 
         # first find total number of hits:
         out = """<table class="searchresultsbox">
                 <thead><tr><th class="searchresultsboxheader">%(founds)s</th></tr></thead>
                 <tbody><tr><td class="searchresultsboxbody"> """ % {
                 'founds' : _("%(x_fmt_open)sResults overview:%(x_fmt_close)s Found %(x_nb_records)s records in %(x_nb_seconds)s seconds.") %\
                 {'x_fmt_open': '<strong>',
                  'x_fmt_close': '</strong>',
                  'x_nb_records': '<strong>' + self.tmpl_nice_number(results_final_nb_total, ln) + '</strong>',
                  'x_nb_seconds': '%.2f' % cpu_time}
               }
         # then print hits per collection:
         for coll in colls:
             if results_final_nb.has_key(coll['code']) and results_final_nb[coll['code']] > 0:
                 out += '''<strong><a href="#%(coll)s">%(coll_name)s</a></strong>,
                       <a href="#%(coll)s">%(number)s</a><br />''' % {
                         'coll' : coll['id'],
                         'coll_name' : cgi.escape(coll['name']),
                         'number' : _("%s records found") % ('<strong>' + self.tmpl_nice_number(results_final_nb[coll['code']], ln) + '</strong>')
                       }
         out += "</td></tr></tbody></table>"
         return out
 
 
     def tmpl_print_searchresultbox(self, header, body):
         """print a nicely formatted box for search results """
         #_ = gettext_set_language(ln)
 
         # first find total number of hits:
         out = '<table class="searchresultsbox"><thead><tr><th class="searchresultsboxheader">'+header+'</th></tr></thead><tbody><tr><td class="searchresultsboxbody">'+body+'</td></tr></tbody></table>'
         return out
 
 
     def tmpl_search_no_boolean_hits(self, ln, nearestterms):
         """No hits found, proposes alternative boolean queries
 
         Parameters:
 
           - 'ln' *string* - The language to display
 
           - 'nearestterms' *array* - Parts of the interface to display, in the format:
 
           - 'nearestterms[nbhits]' *int* - The resulting number of hits
 
           - 'nearestterms[url_args]' *string* - The search parameters
 
           - 'nearestterms[p]' *string* - The search terms
 
         """
 
         # load the right message language
         _ = gettext_set_language(ln)
 
         out = _("Boolean query returned no hits. Please combine your search terms differently.")
 
         out += '''<blockquote><table class="nearesttermsbox" cellpadding="0" cellspacing="0" border="0">'''
         for term, hits, argd in nearestterms:
             out += '''\
             <tr>
               <td class="nearesttermsboxbody" align="right">%(hits)s</td>
               <td class="nearesttermsboxbody" width="15">&nbsp;</td>
               <td class="nearesttermsboxbody" align="left">
                 %(link)s
               </td>
             </tr>''' % {'hits' : hits,
                         'link': create_html_link(self.build_search_url(argd),
                                                  {}, cgi.escape(term),
                                                  {'class': "nearestterms"})}
         out += """</table></blockquote>"""
         return out
 
     def tmpl_similar_author_names(self, authors, ln):
         """No hits found, proposes alternative boolean queries
 
         Parameters:
 
           - 'authors': a list of (name, hits) tuples
           - 'ln' *string* - The language to display
         """
 
         # load the right message language
         _ = gettext_set_language(ln)
 
         out = '''<a name="googlebox"></a>
                  <table class="googlebox"><tr><th colspan="2" class="googleboxheader">%(similar)s</th></tr>''' % {
                 'similar' : _("See also: similar author names")
               }
         for author, hits in authors:
             out += '''\
             <tr>
               <td class="googleboxbody">%(nb)d</td>
               <td class="googleboxbody">%(link)s</td>
             </tr>''' % {'link': create_html_link(
                                     self.build_search_url(p=author,
                                                           f='author',
                                                           ln=ln),
                                     {}, cgi.escape(author), {'class':"google"}),
                         'nb' : hits}
 
         out += """</table>"""
 
         return out
 
     def tmpl_print_record_detailed(self, recID, ln):
         """Displays a detailed on-the-fly record
 
         Parameters:
 
           - 'ln' *string* - The language to display
 
           - 'recID' *int* - The record id
         """
         # okay, need to construct a simple "Detailed record" format of our own:
         out = "<p>&nbsp;"
         # secondly, title:
         titles = get_fieldvalues(recID, "245__a")
         for title in titles:
             out += "<p><center><big><strong>%s</strong></big></center></p>" % cgi.escape(title)
         # thirdly, authors:
         authors = get_fieldvalues(recID, "100__a") + get_fieldvalues(recID, "700__a")
         if authors:
             out += "<p><center>"
             for author in authors:
                 out += '%s; ' % create_html_link(self.build_search_url(
                                                                 ln=ln,
                                                                 p=author,
                                                                 f='author'),
                                                  {}, cgi.escape(author))
             out += "</center></p>"
         # fourthly, date of creation:
         dates = get_fieldvalues(recID, "260__c")
         for date in dates:
             out += "<p><center><small>%s</small></center></p>" % date
         # fifthly, abstract:
         abstracts = get_fieldvalues(recID, "520__a")
         for abstract in abstracts:
             out += """<p style="margin-left: 15%%; width: 70%%">
                      <small><strong>Abstract:</strong> %s</small></p>""" % abstract
         # fifthly bis, keywords:
         keywords = get_fieldvalues(recID, "6531_a")
         if len(keywords):
             out += """<p style="margin-left: 15%%; width: 70%%">
                      <small><strong>Keyword(s):</strong>"""
             for keyword in keywords:
                 out += '%s; ' % create_html_link(
                                     self.build_search_url(ln=ln,
                                                           p=keyword,
                                                           f='keyword'),
                                     {}, cgi.escape(keyword))
 
             out += '</small></p>'
         # fifthly bis bis, published in:
         prs_p = get_fieldvalues(recID, "909C4p")
         prs_v = get_fieldvalues(recID, "909C4v")
         prs_y = get_fieldvalues(recID, "909C4y")
         prs_n = get_fieldvalues(recID, "909C4n")
         prs_c = get_fieldvalues(recID, "909C4c")
         for idx in range(0, len(prs_p)):
             out += """<p style="margin-left: 15%%; width: 70%%">
                      <small><strong>Publ. in:</strong> %s"""  % prs_p[idx]
             if prs_v and prs_v[idx]:
                 out += """<strong>%s</strong>""" % prs_v[idx]
             if prs_y and prs_y[idx]:
                 out += """(%s)""" % prs_y[idx]
             if prs_n and prs_n[idx]:
                 out += """, no.%s""" % prs_n[idx]
             if prs_c and prs_c[idx]:
                 out += """, p.%s""" % prs_c[idx]
             out += """.</small></p>"""
         # sixthly, fulltext link:
         urls_z = get_fieldvalues(recID, "8564_z")
         urls_u = get_fieldvalues(recID, "8564_u")
         for idx in range(0, len(urls_u)):
             link_text = "URL"
             try:
                 if urls_z[idx]:
                     link_text = urls_z[idx]
             except IndexError:
                 pass
             out += """<p style="margin-left: 15%%; width: 70%%">
             <small><strong>%s:</strong> <a href="%s">%s</a></small></p>""" % (link_text, urls_u[idx], urls_u[idx])
         # print some white space at the end:
         out += "<br /><br />"
         return out
 
     def tmpl_print_record_list_for_similarity_boxen(self, title, recID_score_list, ln=CFG_SITE_LANG):
         """Print list of records in the "hs" (HTML Similarity) format for similarity boxes.
            RECID_SCORE_LIST is a list of (recID1, score1), (recID2, score2), etc.
         """
 
         from invenio.search_engine import print_record, record_public_p
 
         recID_score_list_to_be_printed = []
 
         # firstly find 5 first public records to print:
         nb_records_to_be_printed = 0
         nb_records_seen = 0
         while nb_records_to_be_printed < 5 and nb_records_seen < len(recID_score_list) and nb_records_seen < 50:
             # looking through first 50 records only, picking first 5 public ones
             (recID, score) = recID_score_list[nb_records_seen]
             nb_records_seen += 1
             if record_public_p(recID):
                 nb_records_to_be_printed += 1
                 recID_score_list_to_be_printed.append([recID, score])
 
         # secondly print them:
         out = '''
         <table><tr>
          <td>
           <table><tr><td class="blocknote">%(title)s</td></tr></table>
          </td>
          </tr>
          <tr>
           <td><table>
         ''' % { 'title': cgi.escape(title) }
         for recid, score in recID_score_list_to_be_printed:
             out += '''
             <tr><td><font class="rankscoreinfo"><a>(%(score)s)&nbsp;</a></font><small>&nbsp;%(info)s</small></td></tr>''' % {
                 'score': score,
                 'info' : print_record(recid, format="hs", ln=ln),
                 }
 
         out += """</table></td></tr></table> """
         return out
 
     def tmpl_print_record_brief(self, ln, recID):
         """Displays a brief record on-the-fly
 
         Parameters:
 
           - 'ln' *string* - The language to display
 
           - 'recID' *int* - The record id
         """
         out = ""
 
         # record 'recID' does not exist in format 'format', so print some default format:
         # firstly, title:
         titles = get_fieldvalues(recID, "245__a")
         # secondly, authors:
         authors = get_fieldvalues(recID, "100__a") + get_fieldvalues(recID, "700__a")
         # thirdly, date of creation:
         dates = get_fieldvalues(recID, "260__c")
         # thirdly bis, report numbers:
         rns = get_fieldvalues(recID, "037__a")
         rns = get_fieldvalues(recID, "088__a")
         # fourthly, beginning of abstract:
         abstracts = get_fieldvalues(recID, "520__a")
         # fifthly, fulltext link:
         urls_z = get_fieldvalues(recID, "8564_z")
         urls_u = get_fieldvalues(recID, "8564_u")
 
         return self.tmpl_record_body(
                  titles = titles,
                  authors = authors,
                  dates = dates,
                  rns = rns,
                  abstracts = abstracts,
                  urls_u = urls_u,
                  urls_z = urls_z,
                  ln=ln)
 
     def tmpl_print_record_brief_links(self, ln, recID):
         """Displays links for brief record on-the-fly
 
         Parameters:
 
           - 'ln' *string* - The language to display
 
           - 'recID' *int* - The record id
         """
         # load the right message language
         _ = gettext_set_language(ln)
 
         out = ""
         if CFG_WEBSEARCH_USE_ALEPH_SYSNOS:
             alephsysnos = get_fieldvalues(recID, "970__a")
             if len(alephsysnos)>0:
                 alephsysno = alephsysnos[0]
                 out += '<br /><span class="moreinfo">%s</span>' % \
                     create_html_link(self.build_search_url(sysno=alephsysno,
                                                            ln=ln),
                                      {}, _("Detailed record"),
                                      {'class': "moreinfo"})
             else:
                 out += '<br /><span class="moreinfo">%s</span>' % \
                     create_html_link(self.build_search_url(recid=recID, ln=ln),
                                      {},
                                      _("Detailed record"),
                                      {'class': "moreinfo"})
         else:
             out += '<br /><span class="moreinfo">%s</span>' % \
                    create_html_link(self.build_search_url(recid=recID, ln=ln),
                                     {}, _("Detailed record"),
                                     {'class': "moreinfo"})
 
             out += '<span class="moreinfo"> - %s</span>' % \
                    create_html_link(self.build_search_url(p="recid:%d" % recID,
                                                      rm="wrd",
                                                      ln=ln),
                                     {}, _("Similar records"),
                                     {'class': "moreinfo"})
 
         if CFG_BIBRANK_SHOW_CITATION_LINKS:
             out += '<span class="moreinfo"> - %s</span>' % \
                    create_html_link(self.build_search_url(p="recid:%d" % recID,
                                                           rm="citation",
                                                           ln=ln),
                                     {}, _("Cited by"),
                                     {'class': "moreinfo"})
 
         return out
 
     def tmpl_xml_rss_prologue(self):
         """Creates XML RSS 2.0 prologue."""
         out = """<rss version="2.0">
       <channel>
         <title>%(sitename)s</title>
         <link>%(siteurl)s</link>
         <description>%(sitename)s latest documents</description>
         <language>%(sitelang)s</language>
         <pubDate>%(timestamp)s</pubDate>
         <category></category>
         <generator>CDS Invenio %(version)s</generator>
         <webMaster>%(sitesupportemail)s</webMaster>
         <ttl>%(timetolive)s</ttl>
         <image>
             <url>%(siteurl)s/img/cds.png</url>
             <title>%(sitename)s</title>
             <link>%(siteurl)s</link>
         </image>
         <textInput>
           <title>Search </title>
           <description>Search this site:</description>
           <name>p</name>
           <link>%(siteurl)s/search</link>
         </textInput>
         """ % {'sitename': CFG_SITE_NAME,
                'siteurl': CFG_SITE_URL,
                'sitelang': CFG_SITE_LANG,
                'timestamp': time.strftime("%a, %d %b %Y %H:%M:%S %Z", time.localtime()),
                'version': CFG_VERSION,
                'sitesupportemail': CFG_SITE_SUPPORT_EMAIL,
                'timetolive': CFG_WEBSEARCH_RSS_TTL
                }
         return out
 
     def tmpl_xml_rss_epilogue(self):
         """Creates XML RSS 2.0 epilogue."""
         out = """\
       </channel>
 </rss>\n"""
         return out
 
     def tmpl_xml_nlm_prologue(self):
         """Creates XML NLM prologue."""
         out = """<articles>\n"""
         return out
 
     def tmpl_xml_nlm_epilogue(self):
         """Creates XML NLM epilogue."""
         out = """\n</articles>"""
         return out
 
     def tmpl_xml_marc_prologue(self):
         """Creates XML MARC prologue."""
         out = """<collection xmlns="http://www.loc.gov/MARC21/slim">\n"""
         return out
 
     def tmpl_xml_marc_epilogue(self):
         """Creates XML MARC epilogue."""
         out = """\n</collection>"""
         return out
 
     def tmpl_xml_default_prologue(self):
         """Creates XML default format prologue. (Sanity calls only.)"""
         out = """<collection>\n"""
         return out
 
     def tmpl_xml_default_epilogue(self):
         """Creates XML default format epilogue. (Sanity calls only.)"""
         out = """\n</collection>"""
         return out
 
 
     def tmpl_collection_not_found_page_title(self, colname, ln=CFG_SITE_LANG):
         """
         Create page title for cases when unexisting collection was asked for.
         """
         _ = gettext_set_language(ln)
         out = _("Collection %s Not Found") % cgi.escape(colname)
         return out
 
     def tmpl_collection_not_found_page_body(self, colname, ln=CFG_SITE_LANG):
         """
         Create page body for cases when unexisting collection was asked for.
         """
         _ = gettext_set_language(ln)
         out = """<h1>%(title)s</h1>
                  <p>%(sorry)s</p>
                  <p>%(you_may_want)s</p>
               """ % { 'title': self.tmpl_collection_not_found_page_title(colname, ln),
                       'sorry': _("Sorry, collection %s does not seem to exist.") % \
                                 ('<strong>' + cgi.escape(colname) + '</strong>'),
                       'you_may_want': _("You may want to start browsing from %s.") % \
                                  ('<a href="' + CFG_SITE_URL + '?ln=' + ln + '">' + \
                                         cgi.escape(CFG_SITE_NAME_INTL.get(ln, CFG_SITE_NAME)) + '</a>')}
         return out
 
     def tmpl_alert_rss_teaser_box_for_query(self, id_query, ln):
         """Propose teaser for setting up this query as alert or RSS feed.
 
         Parameters:
           - 'id_query' *int* - ID of the query we make teaser for
           - 'ln' *string* - The language to display
         """
 
         # load the right message language
         _ = gettext_set_language(ln)
 
         # get query arguments:
         res = run_sql("SELECT urlargs FROM query WHERE id=%s", (id_query,))
         argd = {}
         if res:
             argd = cgi.parse_qs(res[0][0])
 
         rssurl = self.build_rss_url(argd)
         alerturl = CFG_SITE_URL + '/youralerts/input?ln=%s&amp;idq=%s' % (ln, id_query)
 
         out = '''<a name="googlebox"></a>
                  <table class="googlebox"><tr><th class="googleboxheader">%(similar)s</th></tr>
                  <tr><td class="googleboxbody">%(msg_alert)s</td></tr>
                  </table>
                  ''' % {
                 'similar' : _("Interested in being notified about new results for this query?"),
                 'msg_alert': _("""Set up a personal %(x_url1_open)semail alert%(x_url1_close)s
                                   or subscribe to the %(x_url2_open)sRSS feed%(x_url2_close)s.""") % \
                         {'x_url1_open': '<a href="%s"><img src="%s/img/mail-icon-12x8.gif" border="0" alt="" /></a> ' % (alerturl, CFG_SITE_URL) + ' <a class="google" href="%s">' % (alerturl),
                          'x_url1_close': '</a>',
                          'x_url2_open': '<a href="%s"><img src="%s/img/feed-icon-12x12.gif" border="0" alt="" /></a> ' % (rssurl, CFG_SITE_URL) + ' <a class="google" href="%s">' % rssurl,
                          'x_url2_close': '</a>',
                          }}
         return out
 
     def tmpl_detailed_record_metadata(self, recID, ln, format,
                                       content,
                                       creationdate=None,
                                       modificationdate=None):
         """Returns the main detailed page of a record
 
         Parameters:
 
           - 'recID' *int* - The ID of the printed record
 
           - 'ln' *string* - The language to display
 
           - 'format' *string* - The format in used to print the record
 
           - 'content' *string* - The main content of the page
 
           - 'creationdate' *string* - The creation date of the printed record
 
           - 'modificationdate' *string* - The last modification date of the printed record
         """
         _ = gettext_set_language(ln)
 
         out = content
 
         return out
 
     def tmpl_detailed_record_statistics(self, recID, ln,
                                         downloadsimilarity,
                                         downloadhistory, viewsimilarity):
         """Returns the statistics page of a record
 
         Parameters:
 
           - 'recID' *int* - The ID of the printed record
 
           - 'ln' *string* - The language to display
 
           - downloadsimilarity *string* - downloadsimilarity box
 
           - downloadhistory *string* - downloadhistory box
 
           - viewsimilarity *string* - viewsimilarity box
 
         """
         # load the right message language
         _ = gettext_set_language(ln)
 
         out = ''
 
         if CFG_BIBRANK_SHOW_DOWNLOAD_STATS and downloadsimilarity is not None:
             similar = self.tmpl_print_record_list_for_similarity_boxen (
                 _("People who downloaded this document also downloaded:"), downloadsimilarity, ln)
 
             out = '<table>'
             out += '''
                     <tr><td>%(graph)s</td></tr>
                     <tr><td>%(similar)s</td></tr>
                     ''' % { 'siteurl': CFG_SITE_URL,   'recid': recID, 'ln': ln,
                              'similar': similar, 'more': _("more"),
                              'graph': downloadsimilarity
                              }
 
             out += '</table>'
             out +=  '<br />'
 
         if CFG_BIBRANK_SHOW_READING_STATS and viewsimilarity is not None:
             out += self.tmpl_print_record_list_for_similarity_boxen (
                 _("People who viewed this page also viewed:"), viewsimilarity, ln)
 
         if CFG_BIBRANK_SHOW_DOWNLOAD_GRAPHS and downloadhistory is not None:
             out += downloadhistory + '<br />'
 
         return out
 
     def tmpl_detailed_record_citations(self, recID, ln,
                                        citinglist, citationhistory,
                                        cociting,selfcited):
         """Returns the citations page of a record
 
         Parameters:
 
           - 'recID' *int* - The ID of the printed record
 
           - 'ln' *string* - The language to display
 
           - citinglist *list* - a list of tuples [(x1,y1),(x2,y2),..] where x is doc id and y is number of citations
 
           - citationhistory *string* - citationhistory box
 
           - cociting *string* - cociting box
 
           - selfcited list - a list of self-citations for recID
 
         """
         # load the right message language
         _ = gettext_set_language(ln)
 
         out = '<table>'
         if CFG_BIBRANK_SHOW_CITATION_STATS and citinglist is not None:
             similar = self.tmpl_print_record_list_for_similarity_boxen(
                 _("Cited by: %s records") % len (citinglist), citinglist, ln)
 
             out += '''
                     <tr><td>
                       %(similar)s&nbsp;%(more)s
                       <br /><br />
                     </td></tr>''' % {
                 'more': create_html_link(
                 self.build_search_url(p='recid:%d' % \
                                       recID,      #XXXX
                                       rm='citation', ln=ln),
                                       {}, _("more")),
                 'similar': similar}
 
         if CFG_BIBRANK_SHOW_CITATION_GRAPHS and selfcited is not None:
             sc_scorelist = [] #a score list for print..
             for s in selfcited:
                 #copy weight from citations
                 weight = 0
                 for c in citinglist:
                     (crec,score) = c
                     if crec == s:
                         weight = score
                 tmp = [s,weight]
                 sc_scorelist.append(tmp)
             scite = self.tmpl_print_record_list_for_similarity_boxen (
                 _(".. of which self-citations: %s records") % len (selfcited), sc_scorelist, ln)
             out += '<tr><td>'+scite+'</td></tr>'
 
         if CFG_BIBRANK_SHOW_CITATION_STATS and cociting is not None:
             similar = self.tmpl_print_record_list_for_similarity_boxen (
                 _("Co-cited with: %s records") % len (cociting), cociting, ln)
 
             out += '''
                     <tr><td>
                       %(similar)s&nbsp;%(more)s
                       <br />
                     </td></tr>''' % { 'more': create_html_link(self.build_search_url(p='cocitedwith:%d' % recID, ln=ln),
                                                                 {}, _("more")),
                                       'similar': similar}
 
         if CFG_BIBRANK_SHOW_CITATION_GRAPHS and citationhistory is not None:
             out += '<tr><td>%s</td></tr>' % citationhistory
 
         out += '</table>'
 
         return out
 
     def tmpl_author_information(self, req, pubs, authorname, num_downloads, aff_pubdict,
                                 citedbylist, kwtuples, authors, vtuples, ln):
         """Prints stuff about the author given as authorname.
            1. Author name + his/her institutes. Each institute I has a link
               to papers where the auhtor has I as institute.
            2. Publications, number: link to search by author.
            3. Keywords
            4. Author collabs
            5. Publication venues like journals
            The parameters are data structures needed to produce 1-6, as follows:
            req - request
            pubs - list of recids, probably the records that have the author as an author
            authorname - evident
            num_downloads - evident
            aff_pubdict - a dictionary where keys are inst names and values lists of recordids
            citedbylist - list of recs that cite pubs
            kwtuples - keyword tuples like ('HIGGS BOSON',[3,4]) where 3 and 4 are recids
            authors - a list of authors that have collaborated with authorname
         """
         _ = gettext_set_language(ln)
         #make a authoraff string that looks like CERN (1), Caltech (2) etc
         authoraff = ""
         for a in aff_pubdict.keys():
             recids = "+or+".join(map(str,aff_pubdict[a]))
             searchstr = "<a href=\"../search?f=recid&p="+recids+"\">"+str(len(aff_pubdict[a]))+"</a>"
             if (a == ' '):
                 authoraff = authoraff+" "+_("unknown")+" ("+searchstr+")"
             else:
                 authoraff = authoraff+" "+a+" ("+searchstr+")"
 
         #construct a string for searching a=thisauthor
         searchstr = create_html_link(self.build_search_url(p=authorname,
                                      f='author'),
                                      {}, str(len(pubs)), {'class':"google"})
         #print a "general" banner about the author
         line1 = _("Author")+": <i>"+authorname+"</i>"+authoraff
         line2 = _("Publications")+": "+searchstr+" ("+_("downloaded")+" "
         line2 += str(num_downloads)+" "+_("times")+")"
         banner = self.tmpl_print_searchresultbox(line1, line2)
-        req.write(banner)       
+        req.write(banner)
 
         #keywords, collaborations
         keywstr = ""
         collabstr = ""
         if (kwtuples):
             for (freq, kw) in kwtuples:
                 #create a link in author=x, keyword=y
                 searchstr = create_html_link(self.build_search_url(
                                                 p1=authorname,
                                                 f1='author',
                                                 p2=kw, f2='keyword', m1='e', op1='a', m2='e'),
                                                 {}, kw+" ("+str(freq)+")", {'class':"google"})
                 keywstr = keywstr+" "+searchstr
             banner = self.tmpl_print_searchresultbox(_("Frequent keywords"), keywstr)
             req.write(banner)
         if (authors):
             for c in authors:
                 collabstr = collabstr + " <a href=\"/author/"+c+"\">"+c+"</a>"
             banner = self.tmpl_print_searchresultbox(_("Author collaborations"), collabstr)
             req.write(banner)
-                
+
         if (vtuples):
             banner = self.tmpl_print_searchresultbox(_("Publishes in"), str(vtuples))
-            req.write(banner)       
-                        
-    
+            req.write(banner)
+
+
     def tmpl_detailed_record_references(self, recID, ln, content):
         """Returns the discussion page of a record
 
         Parameters:
 
           - 'recID' *int* - The ID of the printed record
 
           - 'ln' *string* - The language to display
 
           - 'content' *string* - The main content of the page
         """
         # load the right message language
         _ = gettext_set_language(ln)
 
         out = ''
         if content is not None:
             out += content
 
         return out
 
     def tmpl_citesummary_html(self, ln, totalcites, avgstr, reciddict):
         """A template for citation summary -- output in HTML.
-           Parameters: 
-               - ln *string* = language, 
+           Parameters:
+               - ln *string* = language,
                - totalcites *string* = total number of citations,
                - avgstr *string* = average number of citations per records,
-               - reciddict is a dictionary as follows: 
+               - reciddict is a dictionary as follows:
                    "string description of the citation class" -> [id1,id2,..] """
         # load the right message language
         _ = gettext_set_language(ln)
 
         out = "<table>"+ \
             "<tr><td>"+"<strong>"+_("Citation summary")+"</strong></td><td></td></tr>" + \
             "<tr><td>"+"<strong>"+_("Citations")+"</strong></td><td>"+totalcites+"</td></tr>" + \
             "<tr><td>"+"<strong>"+_("Avg cit per record")+"</strong></td><td>"+avgstr+"</td></tr>"
         #print the stuff in reciddict
         for k in reciddict.keys():
             rowtitle = k
             reclist = reciddict[k]
             out += "<tr><td>"+_(rowtitle)+"</td><td>"+str(len(reclist))+"</td></tr>"
         out += '</table>'
-        return out
\ No newline at end of file
+        return out
diff --git a/modules/websearch/lib/websearch_webinterface.py b/modules/websearch/lib/websearch_webinterface.py
index 93a587fa2..8a61387b4 100644
--- a/modules/websearch/lib/websearch_webinterface.py
+++ b/modules/websearch/lib/websearch_webinterface.py
@@ -1,961 +1,961 @@
 ## $Id$
 ##
 ## This file is part of CDS Invenio.
 ## Copyright (C) 2002, 2003, 2004, 2005, 2006, 2007, 2008 CERN.
 ##
 ## CDS Invenio is free software; you can redistribute it and/or
 ## modify it under the terms of the GNU General Public License as
 ## published by the Free Software Foundation; either version 2 of the
 ## License, or (at your option) any later version.
 ##
 ## CDS Invenio is distributed in the hope that it will be useful, but
 ## WITHOUT ANY WARRANTY; without even the implied warranty of
 ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 ## General Public License for more details.
 ##
 ## You should have received a copy of the GNU General Public License
 ## along with CDS Invenio; if not, write to the Free Software Foundation, Inc.,
 ## 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA.
 
 """WebSearch URL handler."""
 
 __revision__ = "$Id$"
 
 import cgi
 import os
 import datetime
 from urllib import quote
 from mod_python import apache
 
 #maximum number of collaborating authors etc shown in GUI
 MAX_COLLAB_LIST = 10
 MAX_KEYWORD_LIST = 10
 MAX_VENUE_LIST = 10
 #tag constants
 AUTHOR_TAG = "100__a"
 COAUTHOR_TAG = "700_a"
 AUTHOR_INST_TAG = "100__u"
 VENUE_TAG = "909C4p"
 KEYWORD_TAG = "6531_a"
 
 try:
     Set = set
 except NameError:
     from sets import Set
 
 from invenio.config import \
      CFG_SITE_URL, \
      CFG_SITE_NAME, \
      CFG_CACHEDIR, \
      CFG_SITE_LANG, \
      CFG_SITE_ADMIN_EMAIL, \
      CFG_SITE_SECURE_URL, \
      CFG_WEBSEARCH_INSTANT_BROWSE_RSS, \
      CFG_WEBSEARCH_RSS_TTL, \
-     CFG_WEBSEARCH_RSS_MAX_CACHED_REQUESTS 
+     CFG_WEBSEARCH_RSS_MAX_CACHED_REQUESTS
 from invenio.dbquery import Error
 from invenio.webinterface_handler import wash_urlargd, WebInterfaceDirectory
 from invenio.urlutils import redirect_to_url, make_canonical_urlargd, drop_default_urlargd, create_html_link
 from invenio.webuser import getUid, page_not_authorized, get_user_preferences, \
     collect_user_info, http_check_credentials
 from invenio import search_engine
 from invenio.websubmit_webinterface import WebInterfaceFilesPages
 from invenio.webcomment_webinterface import WebInterfaceCommentsPages
 from invenio.webpage import page, create_error_box
 from invenio.messages import gettext_set_language
 from invenio.search_engine import get_colID, get_coll_i18nname, \
     check_user_can_view_record, collection_restricted_p, restricted_collection_cache, \
     get_fieldvalues
 from invenio.access_control_engine import acc_authorize_action
 from invenio.access_control_config import VIEWRESTRCOLL
 from invenio.access_control_mailcookie import mail_cookie_create_authorize_action
 from invenio.bibformat import format_records
 from invenio.bibformat_engine import get_output_formats
 from invenio.websearch_webcoll import mymkdir, get_collection
 from invenio.intbitset import intbitset
 from invenio.bibupload import find_record_from_sysno
 from invenio.bibrank_citation_searcher import get_author_cited_by, get_cited_by_list
 from invenio.bibrank_downloads_indexer import get_download_weight_total
 from invenio.search_engine_summarizer import summarize
 
 import invenio.template
 websearch_templates = invenio.template.load('websearch')
 
 search_results_default_urlargd = websearch_templates.search_results_default_urlargd
 search_interface_default_urlargd = websearch_templates.search_interface_default_urlargd
 try:
     output_formats = [output_format['attrs']['code'].lower() for output_format in \
                       get_output_formats(with_attributes=True).values()]
 except KeyError:
     output_formats = ['xd', 'xm', 'hd', 'hb', 'hs', 'hx']
 output_formats.extend(['hm', 't', 'h'])
 
 def wash_search_urlargd(form):
     """
     Create canonical search arguments from those passed via web form.
     """
 
     argd = wash_urlargd(form, search_results_default_urlargd)
 
     # Sometimes, users pass ot=245,700 instead of
     # ot=245&ot=700. Normalize that.
     ots = []
     for ot in argd['ot']:
         ots += ot.split(',')
     argd['ot'] = ots
 
     # We can either get the mode of function as
     # action=<browse|search>, or by setting action_browse or
     # action_search.
     if argd['action_browse']:
         argd['action'] = 'browse'
     elif argd['action_search']:
         argd['action'] = 'search'
     else:
         if argd['action'] not in ('browse', 'search'):
             argd['action'] = 'search'
 
     del argd['action_browse']
     del argd['action_search']
 
     return argd
 
 
 class WebInterfaceAuthorPages(WebInterfaceDirectory):
     """ Handle /author/Doe%2C+John etc set of pages."""
 
     _exports = ['author']
 
     def __init__(self, authorname=''):
         """Constructor."""
         self.authorname = authorname
 
     def _lookup(self, component, path):
         """This handler parses dynamic URLs (/author/John+Doe)."""
         return WebInterfaceAuthorPages(component), path
 
-    
+
     def __call__(self, req, form):
         """Serve the page in the given language."""
         argd = wash_urlargd(form, {'ln': (str, CFG_SITE_LANG)})
         ln = argd['ln']
         req.argd = argd #needed since perform_req_search
         #wants to check it in case of no results
         self.authorname = self.authorname.replace("+"," ")
         citelist = get_author_cited_by(self.authorname)
         req.content_type = "text/html"
         req.send_http_header()
         uid = getUid(req)
 
         search_engine.page_start(req, "hb", "", "", ln, uid)
 
-        
+
         #search the publications by this author
         pubs = search_engine.perform_request_search(req=req, p=self.authorname, f="author")
         #get most frequent first authors of these pubs
         authors = search_engine.get_most_popular_values_for_code(pubs, AUTHOR_TAG)
-        #and affiliates 
+        #and affiliates
         collabs = search_engine.get_most_popular_values_for_code(pubs, COAUTHOR_TAG)
         #and publication venues
         venuedict =  search_engine.get_values_for_code_dict(pubs, VENUE_TAG)
         #and keywords
         kwdict = search_engine.get_values_for_code_dict(pubs, KEYWORD_TAG)
-        
+
         #construct a simple list of tuples that contains keywords that appear more than once
         #moreover, limit the length of the list to MAX_KEYWORD_LIST
         kwtuples = []
         for k in kwdict.keys():
             if kwdict[k] > 1:
                 mytuple = (kwdict[k], k)
                 kwtuples.append(mytuple)
         #sort ..
         kwtuples.sort()
         kwtuples.reverse()
         kwtuples = kwtuples[0:MAX_KEYWORD_LIST]
-        
+
         #same for venues
         vtuples = []
-        
+
         for k in venuedict.keys():
             if venuedict[k] > 1:
                 mytuple = (venuedict[k], k)
                 vtuples.append(mytuple)
         #sort ..
         vtuples.sort()
         vtuples.reverse()
         vtuples = vtuples[0:MAX_VENUE_LIST]
 
-        
+
         authors.extend(collabs) #join
         #remove the author in question from authors: they are associates
         if (authors.count(self.authorname) > 0):
             authors.remove(self.authorname)
-        
+
         authors = authors[0:MAX_COLLAB_LIST] #cut extra
-        
-        #a dict. keys: affiliations, values: lists of publications      
+
+        #a dict. keys: affiliations, values: lists of publications
         author_aff_pubs = self.get_institute_pub_dict(pubs)
         authoraffs = author_aff_pubs.keys()
-        
+
         #find out how many times these records have been downloaded
         recsloads = {}
         recsloads = get_download_weight_total(recsloads, pubs)
         #sum up
         totaldownloads = 0
         for k in recsloads.keys():
             totaldownloads = totaldownloads + recsloads[k]
 
         #get cited by..
         citedbylist = get_cited_by_list(pubs)
         #finally all stuff there, call the template
         websearch_templates.tmpl_author_information(req, pubs, self.authorname,
-                                                    totaldownloads, author_aff_pubs, 
+                                                    totaldownloads, author_aff_pubs,
                                                     citedbylist, kwtuples, authors, vtuples, ln)
-        
-        #cited-by summary       
+
+        #cited-by summary
         out = summarize(pubs, 'hbcs', ln)
         req.write(out)
-                
+
         simauthbox = search_engine.create_similarly_named_authors_link_box(self.authorname)
         req.write(simauthbox)
 
     def get_institute_pub_dict(mee, recids):
-        #return a dictionary consisting of institute -> list of publications            
+        #return a dictionary consisting of institute -> list of publications
         affus = [] #list of insts from the record
         author_aff_pubs = {} #the disct to be build
         for recid in recids:
             #iterate all so that we get first author's intitute
             #if this the first author OR
             #"his" institute if he is an affliate author
             mainauthors = get_fieldvalues(recid, AUTHOR_TAG)
             mainauthor = " "
             if mainauthors:
                 mainauthor = mainauthors[0]
             if (mainauthor == mee.authorname):
                 affus = get_fieldvalues(recid, AUTHOR_INST_TAG)
             #if this is empty, add a dummy " " value
             if (affus == []):
                 affus = [" "]
             for a in affus:
                 #add in author_aff_pubs
                 if (author_aff_pubs.has_key(a)):
                     tmp = author_aff_pubs[a]
                     tmp.append(recid)
                     author_aff_pubs[a] = tmp
                 else:
                     author_aff_pubs[a] = [recid]
         return author_aff_pubs
 
     index = __call__
 
 
 class WebInterfaceRecordPages(WebInterfaceDirectory):
     """ Handling of a /record/<recid> URL fragment """
 
     _exports = ['', 'files', 'reviews', 'comments', 'usage',
                 'references', 'export', 'citations']
 
     #_exports.extend(output_formats)
 
     def __init__(self, recid, tab, format=None):
         self.recid = recid
         self.tab = tab
         self.format = format
 
         self.export = self
         self.files = WebInterfaceFilesPages(self.recid)
         self.reviews = WebInterfaceCommentsPages(self.recid, reviews=1)
         self.comments = WebInterfaceCommentsPages(self.recid)
         self.usage = self
         self.references = self
         self.citations = self
         self.export = WebInterfaceRecordExport(self.recid, self.format)
 
         return
 
     def __call__(self, req, form):
         argd = wash_search_urlargd(form)
         argd['recid'] = self.recid
         argd['tab'] = self.tab
 
         if self.format is not None:
             argd['of'] = self.format
         req.argd = argd
         uid = getUid(req)
         if uid == -1:
             return page_not_authorized(req, "../",
                 text="You are not authorized to view this record.",
                                        navmenuid='search')
         elif uid > 0:
             pref = get_user_preferences(uid)
             try:
                 if not form.has_key('rg'):
                     # fetch user rg preference only if not overridden via URL
                     argd['rg'] = int(pref['websearch_group_records'])
             except (KeyError, ValueError):
                 pass
 
         user_info = collect_user_info(req)
         (auth_code, auth_msg) = check_user_can_view_record(user_info, self.recid)
 
         if auth_code and user_info['email'] == 'guest' and not user_info['apache_user']:
             cookie = mail_cookie_create_authorize_action(VIEWRESTRCOLL, {'collection' : search_engine.guess_primary_collection_of_a_record(self.recid)})
             target = '/youraccount/login' + \
                     make_canonical_urlargd({'action': cookie, 'ln' : argd['ln'], 'referer' : \
                     CFG_SITE_URL + '/record/' + str(self.recid) + make_canonical_urlargd(argd, \
                     search_results_default_urlargd)}, {'ln' : CFG_SITE_LANG})
             return redirect_to_url(req, target)
         elif auth_code:
             return page_not_authorized(req, "../", \
                 text = auth_msg,\
                 navmenuid='search')
 
         # mod_python does not like to return [] in case when of=id:
         out = search_engine.perform_request_search(req, **argd)
         if out == []:
             return str(out)
         else:
             return out
 
     # Return the same page wether we ask for /record/123 or /record/123/
     index = __call__
 
 class WebInterfaceRecordRestrictedPages(WebInterfaceDirectory):
     """ Handling of a /record-restricted/<recid> URL fragment """
 
     _exports = ['', 'files', 'reviews', 'comments', 'usage',
                 'references', 'export', 'citations']
 
     #_exports.extend(output_formats)
 
     def __init__(self, recid, tab, format=None):
         self.recid = recid
         self.tab = tab
         self.format = format
 
         self.files = WebInterfaceFilesPages(self.recid)
         self.reviews = WebInterfaceCommentsPages(self.recid, reviews=1)
         self.comments = WebInterfaceCommentsPages(self.recid)
         self.usage = self
         self.references = self
         self.citations = self
         self.export = WebInterfaceRecordExport(self.recid, self.format)
 
         return
 
     def __call__(self, req, form):
         argd = wash_search_urlargd(form)
         argd['recid'] = self.recid
         if self.format is not None:
             argd['of'] = self.format
 
         req.argd = argd
 
         uid = getUid(req)
         user_info = collect_user_info(req)
         if uid == -1:
             return page_not_authorized(req, "../",
                 text="You are not authorized to view this record.",
                                        navmenuid='search')
         elif uid > 0:
             pref = get_user_preferences(uid)
             try:
                 if not form.has_key('rg'):
                     # fetch user rg preference only if not overridden via URL
                     argd['rg'] = int(pref['websearch_group_records'])
             except (KeyError, ValueError):
                 pass
 
         record_primary_collection = search_engine.guess_primary_collection_of_a_record(self.recid)
 
         if collection_restricted_p(record_primary_collection):
             (auth_code, dummy) = acc_authorize_action(user_info, VIEWRESTRCOLL, collection=record_primary_collection)
             if auth_code:
                 return page_not_authorized(req, "../",
                     text="You are not authorized to view this record.",
                     navmenuid='search')
 
         # Keep all the arguments, they might be reused in the
         # record page itself to derivate other queries
         req.argd = argd
 
         # mod_python does not like to return [] in case when of=id:
         out = search_engine.perform_request_search(req, **argd)
         if out == []:
             return str(out)
         else:
             return out
 
     # Return the same page wether we ask for /record/123 or /record/123/
     index = __call__
 
 class WebInterfaceSearchResultsPages(WebInterfaceDirectory):
     """ Handling of the /search URL and its sub-pages. """
 
     _exports = ['', 'authenticate', 'cache', 'log']
 
     def __call__(self, req, form):
         """ Perform a search. """
         argd = wash_search_urlargd(form)
 
         _ = gettext_set_language(argd['ln'])
 
         if req.method == 'POST':
             raise apache.SERVER_RETURN, apache.HTTP_METHOD_NOT_ALLOWED
 
         uid = getUid(req)
         user_info = collect_user_info(req)
         if uid == -1:
             return page_not_authorized(req, "../",
                 text = _("You are not authorized to view this area."),
                                        navmenuid='search')
         elif uid > 0:
             pref = get_user_preferences(uid)
             try:
                 if not form.has_key('rg'):
                     # fetch user rg preference only if not overridden via URL
                     argd['rg'] = int(pref['websearch_group_records'])
             except (KeyError, ValueError):
                 pass
 
         involved_collections = Set()
         involved_collections.update(argd['c'])
         involved_collections.add(argd['cc'])
 
         if argd['id'] > 0:
             argd['recid'] = argd['id']
         if argd['idb'] > 0:
             argd['recidb'] = argd['idb']
         if argd['sysno']:
             tmp_recid = find_record_from_sysno(argd['sysno'])
             if tmp_recid:
                 argd['recid'] = tmp_recid
         if argd['sysnb']:
             tmp_recid = find_record_from_sysno(argd['sysnb'])
             if tmp_recid:
                 argd['recidb'] = tmp_recid
 
         if argd['recid'] > 0:
             if argd['recidb'] > argd['recid']:
                 # Hack to check if among the restricted collections
                 # at least a record of the range is there and
                 # then if the user is not authorized for that
                 # collection.
                 recids = intbitset(xrange(argd['recid'], argd['recidb']))
                 restricted_colls = restricted_collection_cache.get_cache()
                 for collname in restricted_colls:
                     (auth_code, auth_msg) = acc_authorize_action(user_info, VIEWRESTRCOLL, collection=collname)
                     if auth_code:
                         coll_recids = get_collection(collname).reclist
                         if coll_recids & recids:
                             if auth_code and user_info['email'] == 'guest' and not user_info['apache_user']:
                                 cookie = mail_cookie_create_authorize_action(VIEWRESTRCOLL, {'collection' : collname})
                                 target = '/youraccount/login' + \
                                 make_canonical_urlargd({'action' : cookie,                        'ln' : argd['ln'], 'referer' : \
                                 CFG_SITE_URL + '/search' + make_canonical_urlargd(argd, \
                                 search_results_default_urlargd)}, {'ln' : CFG_SITE_LANG})
                                 return redirect_to_url(req, target)
                             else:
                                 return page_not_authorized(req, "../", \
                                     text = auth_msg,\
                                     navmenuid='search')
             else:
                 involved_collections.add(search_engine.guess_primary_collection_of_a_record(argd['recid']))
 
         # If any of the collection requires authentication, redirect
         # to the authentication form.
         for coll in involved_collections:
             if collection_restricted_p(coll):
                 (auth_code, auth_msg) = acc_authorize_action(user_info, VIEWRESTRCOLL, collection=coll)
                 if auth_code and user_info['email'] == 'guest' and not user_info['apache_user']:
                     cookie = mail_cookie_create_authorize_action(VIEWRESTRCOLL, {'collection' : coll})
                     target = '/youraccount/login' + \
                     make_canonical_urlargd({'action' : cookie,                        'ln' : argd['ln'], 'referer' : \
                     CFG_SITE_URL + '/search' + make_canonical_urlargd(argd, \
                     search_results_default_urlargd)}, {'ln' : CFG_SITE_LANG})
                     return redirect_to_url(req, target)
                 elif auth_code:
                     return page_not_authorized(req, "../", \
                         text = auth_msg,\
                         navmenuid='search')
 
 
         # Keep all the arguments, they might be reused in the
         # search_engine itself to derivate other queries
         req.argd = argd
 
         # mod_python does not like to return [] in case when of=id:
         out = search_engine.perform_request_search(req, **argd)
         if out == []:
             return str(out)
         else:
             return out
 
     def cache(self, req, form):
         """Search cache page."""
         argd = wash_urlargd(form, {'action': (str, 'show')})
         return search_engine.perform_request_cache(req, action=argd['action'])
 
     def log(self, req, form):
         """Search log page."""
         argd = wash_urlargd(form, {'date': (str, '')})
         return search_engine.perform_request_log(req, date=argd['date'])
 
     def authenticate(self, req, form):
         """Restricted search results pages."""
 
         argd = wash_search_urlargd(form)
 
         user_info = collect_user_info(req)
         for coll in argd['c'] + [argd['cc']]:
             if collection_restricted_p(coll):
                 (auth_code, dummy) = acc_authorize_action(user_info, VIEWRESTRCOLL, collection=coll)
                 if auth_code:
                     return page_not_authorized(req, "../",
                         text="You are not authorized to view this collection.",
                         navmenuid='search')
 
         # Keep all the arguments, they might be reused in the
         # search_engine itself to derivate other queries
         req.argd = argd
 
         uid = getUid(req)
         if uid > 0:
             pref = get_user_preferences(uid)
             try:
                 if not form.has_key('rg'):
                     # fetch user rg preference only if not overridden via URL
                     argd['rg'] = int(pref['websearch_group_records'])
             except (KeyError, ValueError):
                 pass
 
 
         # mod_python does not like to return [] in case when of=id:
         out = search_engine.perform_request_search(req, **argd)
         if out == []:
             return str(out)
         else:
             return out
 
 # Parameters for the legacy URLs, of the form /?c=ALEPH
 legacy_collection_default_urlargd = {
     'as': (int, 0),
     'verbose': (int, 0),
     'c': (str, CFG_SITE_NAME)}
 
 class WebInterfaceSearchInterfacePages(WebInterfaceDirectory):
 
     """ Handling of collection navigation."""
 
     _exports = [('index.py', 'legacy_collection'),
                 ('', 'legacy_collection'),
                 ('search.py', 'legacy_search'),
                 'search', 'openurl', 'testsso']
 
     search = WebInterfaceSearchResultsPages()
 
     def testsso(self, req, form):
         """ For testing single sign-on """
         req.add_common_vars()
         sso_env = {}
         for var, value in req.subprocess_env.iteritems():
             if var.startswith('HTTP_ADFS_'):
                 sso_env[var] = value
         out = "<HTML><HEAD><TITLE>SSO test</TITLE</HEAD>"
         out += "<BODY><TABLE>"
         for var, value in sso_env.iteritems():
             out += "<TR><TD><STRONG>%s</STRONG></TD><TD>%s</TD></TR>" % (var, value)
         out += "</TABLE></BODY></HTML>"
         return out
 
     def _lookup(self, component, path):
         """ This handler is invoked for the dynamic URLs (for
         collections and records)"""
 
         if component == 'collection':
             c = '/'.join(path)
 
             def answer(req, form):
                 """Accessing collections cached pages."""
                 # Accessing collections: this is for accessing the
                 # cached page on top of each collection.
 
                 argd = wash_urlargd(form, search_interface_default_urlargd)
 
                 # We simply return the cached page of the collection
                 argd['c'] = c
 
                 if not argd['c']:
                     # collection argument not present; display
                     # home collection by default
                     argd['c'] = CFG_SITE_NAME
 
                 return display_collection(req, **argd)
 
             return answer, []
 
 
         elif component == 'record' or component == 'record-restricted':
             try:
                 recid = int(path[0])
             except IndexError:
                 # display record #1 for URL /record without a number
                 recid = 1
             except ValueError:
                 if path[0] == '':
                     # display record #1 for URL /record/ without a number
                     recid = 1
                 else:
                     # display page not found for URLs like /record/foo
                     return None, []
 
             if recid <= 0:
                 # display page not found for URLs like /record/-5 or /record/0
                 return None, []
 
             format = None
             tab = ''
             try:
                 if path[1] in ['', 'files', 'reviews', 'comments',
                                'usage', 'references', 'citations']:
                     tab = path[1]
                 elif path[1] == 'export':
                     tab = ''
                     format = path[2]
 #                    format = None
 #                elif path[1] in output_formats:
 #                    tab = ''
 #                    format = path[1]
                 else:
                     # display page not found for URLs like /record/references
                     # for a collection where 'references' tabs is not visible
                     return None, []
 
             except IndexError:
                 # Keep normal url if tabs is not specified
                 pass
 
             #if component == 'record-restricted':
                 #return WebInterfaceRecordRestrictedPages(recid, tab, format), path[1:]
             #else:
             return WebInterfaceRecordPages(recid, tab, format), path[1:]
 
         return None, []
 
     def openurl(self, req, form):
         """ OpenURL Handler."""
         argd = wash_urlargd(form, websearch_templates.tmpl_openurl_accepted_args)
         ret_url = websearch_templates.tmpl_openurl2invenio(argd)
         if ret_url:
             return redirect_to_url(req, ret_url)
         else:
             return redirect_to_url(req, CFG_SITE_URL)
 
     def legacy_collection(self, req, form):
         """Collection URL backward compatibility handling."""
         accepted_args = dict(legacy_collection_default_urlargd)
         accepted_args.update({'referer' : (str, '%s/youraccount/your'),
              'realm' : (str, '')})
         argd = wash_urlargd(form, accepted_args)
 
         # Apache authentication stuff
         if argd['realm']:
             http_check_credentials(req, argd['realm'])
             return redirect_to_url(req, argd['referer'] or '%s/youraccount/youradminactivities' % CFG_SITE_SECURE_URL)
 
         del argd['referer']
         del argd['realm']
 
         # If we specify no collection, then we don't need to redirect
         # the user, so that accessing <http://yoursite/> returns the
         # default collection.
         if not form.has_key('c'):
             return display_collection(req, **argd)
 
         # make the collection an element of the path, and keep the
         # other query elements as is. If the collection is CFG_SITE_NAME,
         # however, redirect to the main URL.
         c = argd['c']
         del argd['c']
 
         if c == CFG_SITE_NAME:
             target = '/'
         else:
             target = '/collection/' + quote(c)
 
         target += make_canonical_urlargd(argd, legacy_collection_default_urlargd)
         return redirect_to_url(req, target)
 
 
     def legacy_search(self, req, form):
         """Search URL backward compatibility handling."""
         argd = wash_search_urlargd(form)
 
         # We either jump into the generic search form, or the specific
         # /record/... display if a recid is requested
         if argd['recid'] != -1:
             target = '/record/%d' % argd['recid']
             del argd['recid']
 
         else:
             target = '/search'
 
         target += make_canonical_urlargd(argd, search_results_default_urlargd)
         return redirect_to_url(req, target)
 
 
 def display_collection(req, c, as, verbose, ln):
     """Display search interface page for collection c by looking
     in the collection cache."""
 
     _ = gettext_set_language(ln)
 
     req.argd = drop_default_urlargd({'as': as, 'verbose': verbose, 'ln': ln},
                                     search_interface_default_urlargd)
 
     # get user ID:
     try:
         uid = getUid(req)
         user_preferences = {}
         if uid == -1:
             return page_not_authorized(req, "../",
                 text="You are not authorized to view this collection",
                                        navmenuid='search')
         elif uid > 0:
             user_preferences = get_user_preferences(uid)
     except Error:
         return page(title=_("Internal Error"),
                     body = create_error_box(req, verbose=verbose, ln=ln),
                     description="%s - Internal Error" % CFG_SITE_NAME,
                     keywords="%s, Internal Error" % CFG_SITE_NAME,
                     language=ln,
                     req=req,
                     navmenuid='search')
     # start display:
     req.content_type = "text/html"
     req.send_http_header()
     # deduce collection id:
     colID = get_colID(c)
     if type(colID) is not int:
         page_body = '<p>' + (_("Sorry, collection %s does not seem to exist.") % ('<strong>' + str(c) + '</strong>')) + '</p>'
         page_body = '<p>' + (_("You may want to start browsing from %s.") % ('<a href="' + CFG_SITE_URL + '?ln=' + ln + '">' + get_coll_i18nname(CFG_SITE_NAME, ln) + '</a>')) + '</p>'
         return page(title=_("Collection %s Not Found") % cgi.escape(c),
                     body=page_body,
                     description=(CFG_SITE_NAME + ' - ' + _("Not found") + ': ' + cgi.escape(str(c))),
                     keywords="%s" % CFG_SITE_NAME,
                     uid=uid,
                     language=ln,
                     req=req,
                     navmenuid='search')
     # display collection interface page:
     try:
         filedesc = open("%s/collections/%d/navtrail-as=%d-ln=%s.html" % (CFG_CACHEDIR, colID, as, ln), "r")
         c_navtrail = filedesc.read()
         filedesc.close()
         filedesc = open("%s/collections/%d/body-as=%d-ln=%s.html" % (CFG_CACHEDIR, colID, as, ln), "r")
         c_body = filedesc.read()
         filedesc.close()
         filedesc = open("%s/collections/%d/portalbox-tp-ln=%s.html" % (CFG_CACHEDIR, colID, ln), "r")
         c_portalbox_tp = filedesc.read()
         filedesc.close()
         filedesc = open("%s/collections/%d/portalbox-te-ln=%s.html" % (CFG_CACHEDIR, colID, ln), "r")
         c_portalbox_te = filedesc.read()
         filedesc.close()
         filedesc = open("%s/collections/%d/portalbox-lt-ln=%s.html" % (CFG_CACHEDIR, colID, ln), "r")
         c_portalbox_lt = filedesc.read()
         filedesc.close()
         # show help boxes (usually located in "tr", "top right")
         # if users have not banned them in their preferences:
         c_portalbox_rt = ""
         if user_preferences.get('websearch_helpbox', 1) > 0:
             filedesc = open("%s/collections/%d/portalbox-rt-ln=%s.html" % (CFG_CACHEDIR, colID, ln), "r")
             c_portalbox_rt = filedesc.read()
             filedesc.close()
         filedesc = open("%s/collections/%d/last-updated-ln=%s.html" % (CFG_CACHEDIR, colID, ln), "r")
         c_last_updated = filedesc.read()
         filedesc.close()
 
         title = get_coll_i18nname(c, ln)
         rssurl = CFG_SITE_URL + '/rss'
         if c != CFG_SITE_NAME:
             rssurl += '?cc=' + quote(c)
 
         return page(title=title,
                     body=c_body,
                     navtrail=c_navtrail,
                     description="%s - %s" % (CFG_SITE_NAME, c),
                     keywords="%s, %s" % (CFG_SITE_NAME, c),
                     uid=uid,
                     language=ln,
                     req=req,
                     cdspageboxlefttopadd=c_portalbox_lt,
                     cdspageboxrighttopadd=c_portalbox_rt,
                     titleprologue=c_portalbox_tp,
                     titleepilogue=c_portalbox_te,
                     lastupdated=c_last_updated,
                     navmenuid='search',
                     rssurl=rssurl)
     except:
         if verbose >= 9:
             req.write("<br />c=%s" % c)
             req.write("<br />as=%s" % as)
             req.write("<br />ln=%s" % ln)
             req.write("<br />colID=%s" % colID)
             req.write("<br />uid=%s" % uid)
         return page(title=_("Internal Error"),
                     body = create_error_box(req, ln=ln),
                     description="%s - Internal Error" % CFG_SITE_NAME,
                     keywords="%s, Internal Error" % CFG_SITE_NAME,
                     uid=uid,
                     language=ln,
                     req=req,
                     navmenuid='search')
 
     return "\n"
 
 class WebInterfaceRSSFeedServicePages(WebInterfaceDirectory):
     """RSS 2.0 feed service pages."""
 
     def __call__(self, req, form):
         """RSS 2.0 feed service."""
 
         # Keep only interesting parameters for the search
         argd = wash_urlargd(form, websearch_templates.rss_default_urlargd)
 
         for coll in argd['c'] + [argd['cc']]:
             if collection_restricted_p(coll):
                 #user_info = collect_user_info(req)
                 #(auth_code, auth_msg) = acc_authorize_action(user_info, VIEWRESTRCOLL, collection=coll)
                 #if auth_code and user_info['email'] == 'guest':
                     #cookie = mail_cookie_create_authorize_action(VIEWRESTRCOLL, {'collection' : coll})
                     #target = '/youraccount/login' + \
                     #make_canonical_urlargd({'action' : cookie,                        'ln' : argd['ln'], 'referer' : \
                     #CFG_SITE_URL + user_info['uri']}, {})
                     #return redirect_to_url(req, target)
                 #elif auth_code:
                     #raise apache.SERVER_RETURN, apache.HTTP_UNAUTHORIZED
                 raise apache.SERVER_RETURN, apache.HTTP_UNAUTHORIZED
 
         # Create a standard filename with these parameters
         args = websearch_templates.build_rss_url(argd).split('/')[-1]
 
         req.content_type = "application/rss+xml"
         req.send_http_header()
         try:
             # Try to read from cache
             path = "%s/rss/%s.xml" % (CFG_CACHEDIR, args)
             # Check if cache needs refresh
             filedesc = open(path, "r")
             last_update_time = datetime.datetime.fromtimestamp(os.stat(os.path.abspath(path)).st_mtime)
             assert(datetime.datetime.now() < last_update_time + datetime.timedelta(minutes=CFG_WEBSEARCH_RSS_TTL))
             c_rss = filedesc.read()
             filedesc.close()
             req.write(c_rss)
             return
         except Exception, e:
             # do it live and cache
             rss_prologue = '<?xml version="1.0" encoding="UTF-8"?>\n' + \
                            websearch_templates.tmpl_xml_rss_prologue() + '\n'
             req.write(rss_prologue)
 
             recIDs = search_engine.perform_request_search(req, of="id",
                                                           c=argd['c'], cc=argd['cc'],
                                                           p=argd['p'], f=argd['f'],
                                                           p1=argd['p1'], f1=argd['f1'],
                                                           m1=argd['m1'], op1=argd['op1'],
                                                           p2=argd['p2'], f2=argd['f2'],
                                                           m2=argd['m2'], op2=argd['op2'],
                                                           p3=argd['p3'], f3=argd['f3'],
                                                           m3=argd['m3'])[:-(CFG_WEBSEARCH_INSTANT_BROWSE_RSS+1):-1]
             rss_body = format_records(recIDs,
                                       of='xr',
                                       record_separator="\n",
                                       req=req, epilogue="\n")
             rss_epilogue = websearch_templates.tmpl_xml_rss_epilogue() + '\n'
             req.write(rss_epilogue)
 
             # update cache
             dirname = "%s/rss" % (CFG_CACHEDIR)
             mymkdir(dirname)
             fullfilename = "%s/rss/%s.xml" % (CFG_CACHEDIR, args)
             try:
                 # Remove the file just in case it already existed
                 # so that a bit of space is created
                 os.remove(fullfilename)
             except OSError:
                 pass
 
             # Check if there's enough space to cache the request.
             if len(os.listdir(dirname)) < CFG_WEBSEARCH_RSS_MAX_CACHED_REQUESTS:
                 try:
                     os.umask(022)
                     f = open(fullfilename, "w")
                 except IOError, v:
                     raise v
 
                 f.write(rss_prologue + rss_body + rss_epilogue)
                 f.close()
 
     index = __call__
 
 
 class WebInterfaceRecordExport(WebInterfaceDirectory):
     """ Handling of a /record/<recid>/export/<format> URL fragment """
 
     _exports = output_formats
 
     def __init__(self, recid, format=None):
         self.recid = recid
         self.format = format
 
         for output_format in output_formats:
             self.__dict__[output_format] = self
 
         return
 
     def __call__(self, req, form):
         argd = wash_search_urlargd(form)
         argd['recid'] = self.recid
 
         if self.format is not None:
             argd['of'] = self.format
         req.argd = argd
         uid = getUid(req)
         if uid == -1:
             return page_not_authorized(req, "../",
                 text="You are not authorized to view this record.",
                                        navmenuid='search')
         elif uid > 0:
             pref = get_user_preferences(uid)
             try:
                 if not form.has_key('rg'):
                     # fetch user rg preference only if not overridden via URL
                     argd['rg'] = int(pref['websearch_group_records'])
             except (KeyError, ValueError):
                 pass
 
         # Check if the record belongs to a restricted primary
         # collection.  If yes, redirect to the authenticated URL.
         user_info = collect_user_info(req)
         (auth_code, auth_msg) = check_user_can_view_record(user_info, self.recid)
         if auth_code and user_info['email'] == 'guest' and not user_info['apache_user']:
             cookie = mail_cookie_create_authorize_action(VIEWRESTRCOLL, {'collection' : search_engine.guess_primary_collection_of_a_record(self.recid)})
             target = '/youraccount/login' + \
                     make_canonical_urlargd({'action': cookie, 'ln' : argd['ln'], 'referer' : \
                     CFG_SITE_URL + '/record/' + str(self.recid) + make_canonical_urlargd(argd, \
                     search_results_default_urlargd)}, {'ln' : CFG_SITE_LANG})
             return redirect_to_url(req, target)
         elif auth_code:
             return page_not_authorized(req, "../", \
                 text = auth_msg,\
                 navmenuid='search')
 
         # mod_python does not like to return [] in case when of=id:
         out = search_engine.perform_request_search(req, **argd)
         if out == []:
             return str(out)
         else:
             return out
 
     # Return the same page wether we ask for /record/123/export/xm or /record/123/export/xm/
     index = __call__
diff --git a/po/i18n_extract_from_wml_source.py b/po/i18n_extract_from_wml_source.py
index 8182b49eb..8430c7665 100644
--- a/po/i18n_extract_from_wml_source.py
+++ b/po/i18n_extract_from_wml_source.py
@@ -1,151 +1,151 @@
 ## $Id$
 ##
 ## This file is part of CDS Invenio.
 ## Copyright (C) 2002, 2003, 2004, 2005, 2006, 2007, 2008 CERN.
 ##
 ## CDS Invenio is free software; you can redistribute it and/or
 ## modify it under the terms of the GNU General Public License as
 ## published by the Free Software Foundation; either version 2 of the
 ## License, or (at your option) any later version.
 ##
 ## CDS Invenio is distributed in the hope that it will be useful, but
 ## WITHOUT ANY WARRANTY; without even the implied warranty of
 ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 ## General Public License for more details.
 ##
 ## You should have received a copy of the GNU General Public License
 ## along with CDS Invenio; if not, write to the Free Software Foundation, Inc.,
 ## 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA.
 
 """
 This tool extracts sentences to be translated from HTML / WML source
 files.
 
 The sentences to translate are marked with the following tag:
 
  Blah blah _(To be translated)_ blah.
 
-These tags can span several lines. Extra whitespace is discarded. 
+These tags can span several lines. Extra whitespace is discarded.
 """
 
 __revision__ = "$Id$"
 
 import sys, re, os
 
 _tag_re = re.compile(r'_\((.*?)\)_', re.M)
 _nl_re = re.compile('\n')
 _ws_re = re.compile('\s+')
 
 def print_usage():
     """Print usage info."""
     print """Usage: %s <dirname> <potfiles-filename>
 Description: Extract translatable strings from the list of files read
              from potfiles-filename.  The files specified there are
              relative to dirname.  Print results on stdout.
 """
     return
 
 def quote(text):
-    """Normalize and quote a string for inclusion in the po file."""    
+    """Normalize and quote a string for inclusion in the po file."""
     return text.\
            replace('\\', '\\\\').\
            replace('\n', '\\\\n').\
            replace('\t', '\\\\t').\
            replace('"',  '\\"')
 
 
 def extract_from_wml_files(dirname, potfiles_filename):
     """Extract translatable strings from the list of files read from
     potfiles_filename.  The files specified there are relative to
     dirname.  Print results on stdout.
     """
 
     ## extract messages and fill db:
     db = {}
     for f in [ f.strip() for f in open(potfiles_filename) ]:
         if not f or f.startswith('#'):
             continue
 
         f = f.rstrip(' \\')
         data = open(dirname + "/" + f).read()
 
         lines = [0]
         for m in _nl_re.finditer(data):
             lines.append(m.end())
 
         for m in _tag_re.finditer(data.replace('\n', ' ')):
             word = m.group(1)
             pos  = m.start()
 
             line = len([x for x in lines if x < pos])
 
             ref = '%s:%d' % (f, line)
 
             # normalize the word a bit, as it comes from a file where
             # whitespace is not too significant.
             word = _ws_re.sub(' ', word.strip())
 
             db.setdefault(word, []).append(ref)
 
     ## print po header:
     print r'''
     # # This file is part of CDS Invenio.
     # # Copyright (C) 2002, 2003, 2004, 2005, 2006, 2007, 2008 CERN.
     # #
     # # CDS Invenio is free software; you can redistribute it and/or
     # # modify it under the terms of the GNU General Public License as
     # # published by the Free Software Foundation; either version 2 of the
     # # License, or (at your option) any later version.
     # #
     # # CDS Invenio is distributed in the hope that it will be useful, but
     # # WITHOUT ANY WARRANTY; without even the implied warranty of
     # # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
     # # General Public License for more details.
     # #
     # # You should have received a copy of the GNU General Public License
     # # along with CDS Invenio; if not, write to the Free Software Foundation, Inc.,
     # # 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA.
     msgid ""
     msgstr ""
     "Project-Id-Version: CDS Invenio 0.7\n"
     "POT-Creation-Date: Tue Nov 22 16:44:03 2005\n"
     "PO-Revision-Date: 2005-11-22 11:20+0100\n"
     "Last-Translator: FULL NAME <EMAIL@ADDRESS>\n"
     "Language-Team: LANGUAGE <LL@li.org>\n"
     "MIME-Version: 1.0\n"
     "Content-Type: text/plain; charset=UTF-8\n"
     "Content-Transfer-Encoding: 8bit\n"
     "Generated-By: pygettext.py 1.5\n"
 
     '''
 
-    ## print po content from db:    
+    ## print po content from db:
     for original, refs in db.items():
 
         for ref in refs:
             print "#: %s" % ref
 
         print 'msgid "%s"' % quote(original)
         print 'msgstr ""'
         print
 
     return
 
 if __name__ == "__main__":
     if len(sys.argv) == 3:
         dirname = sys.argv[1]
         potfiles_filename = sys.argv[2]
         if not os.path.isdir(dirname):
             print "ERROR: %s is not a directory." % dirname
             print_usage()
             sys.exit(1)
         elif not os.path.isfile(potfiles_filename):
             print "ERROR: %s is not a file." % potfiles_filename
             print_usage()
             sys.exit(1)
         else:
             extract_from_wml_files(sys.argv[1], sys.argv[2])
     else:
         print_usage()
 
-    
+
diff --git a/po/i18n_update_wml_target.py b/po/i18n_update_wml_target.py
index 1fef04fe0..a99e233d8 100644
--- a/po/i18n_update_wml_target.py
+++ b/po/i18n_update_wml_target.py
@@ -1,70 +1,70 @@
 ## $Id$
 ##
 ## This file is part of CDS Invenio.
 ## Copyright (C) 2002, 2003, 2004, 2005, 2006, 2007, 2008 CERN.
 ##
 ## CDS Invenio is free software; you can redistribute it and/or
 ## modify it under the terms of the GNU General Public License as
 ## published by the Free Software Foundation; either version 2 of the
 ## License, or (at your option) any later version.
 ##
 ## CDS Invenio is distributed in the hope that it will be useful, but
 ## WITHOUT ANY WARRANTY; without even the implied warranty of
 ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 ## General Public License for more details.
 ##
 ## You should have received a copy of the GNU General Public License
 ## along with CDS Invenio; if not, write to the Free Software Foundation, Inc.,
 ## 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA.
 
 """This tool updates WML target file still containing sentences to be
 translated. The sentences to translate are marked with the following
 tag:
 
  Blah blah _(To be translated)_ blah.
 
-These tags can span several lines. Extra whitespace is discarded. 
+These tags can span several lines. Extra whitespace is discarded.
 """
 
 __revision__ = "$Id$"
 
 import sys
 import os
 import re
 
 import gettext
 
 lang = sys.argv[1]
 files = sys.argv[2:]
 
 charset = 'utf-8'
 
 # a translation file is located in the source directory, along with
 # this script. This makes it easy to find its path.
 podir = os.path.dirname(__file__)
 
 translation_file = os.path.join(podir, lang+'.gmo')
 translation = gettext.GNUTranslations(open(translation_file))
-    
+
 
 # This matches the strings to be translated
 _tag_re = re.compile(r'_\((.*?)\)_', re.DOTALL)
 _ws_re = re.compile('\s+')
 
 # we perform the substitution on the whole file at once, as they are
 # not expected to be multi-gigabyte long.
 
 def replace(match):
     """This function is called for each replacement, and fetches the
     translation from the gettext catalog.
     """
     text = match.group(1).decode(charset)
     text = _ws_re.sub(' ', text.strip())
-    
+
     return translation.ugettext(text).encode('utf-8')
 
 for filename in files:
     content = open(filename).read()
     content = _tag_re.sub(replace, content)
 
     open (filename,'w').write(content)