diff --git a/config/invenio.conf b/config/invenio.conf
index c796f7c1e..bd254c904 100644
--- a/config/invenio.conf
+++ b/config/invenio.conf
@@ -1,905 +1,911 @@
 ## This file is part of CDS Invenio.
 ## Copyright (C) 2002, 2003, 2004, 2005, 2006, 2007, 2008 CERN.
 ##
 ## CDS Invenio is free software; you can redistribute it and/or
 ## modify it under the terms of the GNU General Public License as
 ## published by the Free Software Foundation; either version 2 of the
 ## License, or (at your option) any later version.
 ##
 ## CDS Invenio is distributed in the hope that it will be useful, but
 ## WITHOUT ANY WARRANTY; without even the implied warranty of
 ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 ## General Public License for more details.
 ##
 ## You should have received a copy of the GNU General Public License
 ## along with CDS Invenio; if not, write to the Free Software Foundation, Inc.,
 ## 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA.
 
 ###################################################
 ## About 'invenio.conf' and 'invenio-local.conf' ##
 ###################################################
 
 ## The 'invenio.conf' file contains the vanilla default configuration
 ## parameters of a CDS Invenio installation, as coming from the
 ## distribution.  The file should be self-explanatory.  Once installed
 ## in its usual location (usually /opt/cds-invenio/etc), you could in
 ## principle go ahead and change the values according to your local
 ## needs.
 ##
 ## However, you can also create a file named 'invenio-local.conf' in
 ## the same directory where 'invenio.conf' lives and put there only
 ## the localizations you need to have different from the default ones.
 ## For example:
 ##
 ##    $ cat /opt/cds-invenio/etc/invenio-local.conf
 ##    [Invenio]
 ##    CFG_SITE_URL = http://your.site.com
 ##    CFG_SITE_SECURE_URL = https://your.site.com
 ##    CFG_SITE_ADMIN_EMAIL = john.doe@your.site.com
 ##    CFG_SITE_SUPPORT_EMAIL = john.doe@your.site.com
 ##
 ## The Invenio system will then read both the default invenio.conf
 ## file and your customized invenio-local.conf file and it will
 ## override any default options with the ones you have set in your
 ## local file.  This cascading of configuration parameters will ease
 ## you future upgrades.
 
 [Invenio]
 
 ###################################
 ## Part 1:  Essential parameters ##
 ###################################
 
 ## This part defines essential CDS Invenio internal parameters that
 ## everybody should override, like the name of the server or the email
 ## address of the local CDS Invenio administrator.
 
 ## CFG_DATABASE_* - specify which MySQL server to use, the name of the
 ## database to use, and the database access credentials.
 CFG_DATABASE_HOST = localhost
 CFG_DATABASE_PORT = 3306
 CFG_DATABASE_NAME = cdsinvenio
 CFG_DATABASE_USER = cdsinvenio
 CFG_DATABASE_PASS = my123p$ss
 
 ## CFG_SITE_URL - specify URL under which your installation will be
 ## visible.  For example, use "http://your.site.com".  Do not leave
 ## trailing slash.
 CFG_SITE_URL = http://localhost
 
 ## CFG_SITE_SECURE_URL - specify secure URL under which your
 ## installation secure pages such as login or registration will be
 ## visible.  For example, use "https://your.site.com".  Do not leave
 ## trailing slash.  If you don't plan on using HTTPS, then you may
 ## leave this empty.
 CFG_SITE_SECURE_URL = https://localhost
 
 ## CFG_SITE_NAME -- the visible name of your CDS Invenio installation.
 CFG_SITE_NAME = Atlantis Institute of Fictive Science
 
 ## CFG_SITE_NAME_INTL -- the international versions of CFG_SITE_NAME
 ## in various languages.  (See also CFG_SITE_LANGS below.)
 CFG_SITE_NAME_INTL_en = Atlantis Institute of Fictive Science
 CFG_SITE_NAME_INTL_fr = Atlantis Institut des Sciences Fictives
 CFG_SITE_NAME_INTL_de = Atlantis Institut der fiktiven Wissenschaft
 CFG_SITE_NAME_INTL_es = Atlantis Instituto de la Ciencia Fictive
 CFG_SITE_NAME_INTL_ca = Institut Atlantis de Ciència Fictícia
 CFG_SITE_NAME_INTL_pt = Instituto Atlantis de Ciência Fictícia
 CFG_SITE_NAME_INTL_it = Atlantis Istituto di Scienza Fittizia
 CFG_SITE_NAME_INTL_ru = Атлантис Институт фиктивных Наук
 CFG_SITE_NAME_INTL_sk = Atlantis Inštitút Fiktívnych Vied
 CFG_SITE_NAME_INTL_cs = Atlantis Institut Fiktivních Věd
 CFG_SITE_NAME_INTL_no = Atlantis Institutt for Fiktiv Vitenskap
 CFG_SITE_NAME_INTL_sv = Atlantis Institut för Fiktiv Vetenskap
 CFG_SITE_NAME_INTL_el = Ινστιτούτο Φανταστικών Επιστημών Ατλαντίδος
 CFG_SITE_NAME_INTL_uk = Інститут вигаданих наук в Атлантісі
 CFG_SITE_NAME_INTL_ja = Fictive 科学のAtlantis の協会
 CFG_SITE_NAME_INTL_pl = Instytut Fikcyjnej Nauki Atlantis
 CFG_SITE_NAME_INTL_bg = Институт за фиктивни науки Атлантис
 CFG_SITE_NAME_INTL_hr = Institut Fiktivnih Znanosti Atlantis
 CFG_SITE_NAME_INTL_zh_CN = 阿特兰提斯虚拟科学学院
 CFG_SITE_NAME_INTL_zh_TW = 阿特蘭提斯虛擬科學學院
 CFG_SITE_NAME_INTL_hu = Kitalált Tudományok Atlantiszi Intézete
 CFG_SITE_NAME_INTL_af = Atlantis Instituut van Fiktiewe Wetenskap
 CFG_SITE_NAME_INTL_gl = Instituto Atlantis de Ciencia Fictive
 
 ## CFG_SITE_LANG -- the default language of the interface:
 CFG_SITE_LANG = en
 
 ## CFG_SITE_LANGS -- list of all languages the user interface should
 ## be available in, separated by commas.  The order specified below
 ## will be respected on the interface pages.  A good default would be
 ## to use the alphabetical order.  Currently supported languages
 ## include Afrikaans, Bulgarian, Catalan, Czech, German, Greek,
 ## English, Spanish, French, Croatian, Hungarian, Galician, Italian,
 ## Japanese, Norwegian, Polish, Portuguese, Russian, Slovak, Swedish,
 ## Ukrainian, Chinese (China), Chinese (Taiwan), so that the current
 ## eventual maximum you can currently select is
 ## "af,bg,ca,cs,de,el,en,es,fr,hr,gl,it,hu,ja,no,pl,pt,ru,sk,sv,uk,zh_CN,zh_TW".
 CFG_SITE_LANGS = af,bg,ca,cs,de,el,en,es,fr,hr,gl,it,hu,ja,no,pl,pt,ru,sk,sv,uk,zh_CN,zh_TW
 
 ## CFG_SITE_SUPPORT_EMAIL -- the email address of the support team for
 ## this installation:
 CFG_SITE_SUPPORT_EMAIL = cds.support@cern.ch
 
 ## CFG_SITE_ADMIN_EMAIL -- the email address of the 'superuser' for
 ## this installation.  Enter your email address below and login with
 ## this address when using CDS Invenio administration modules.  You
 ## will then be automatically recognized as superuser of the system.
 CFG_SITE_ADMIN_EMAIL = cds.support@cern.ch
 
 ## CFG_SITE_EMERGENCY_PHONE_NUMBERS -- list of mobile phone numbers to
 ## which an sms should be sent in case of emergency (e.g. bibsched queue
 ## has been stopped because of an error).
 ## Note that in order to use this function, if CFG_CERN_SITE is set to 0,
 ## the function send_sms in errorlib should be reimplemented.
 CFG_SITE_EMERGENCY_PHONE_NUMBERS =
 
 ## CFG_CERN_SITE -- do we want to enable CERN-specific code?
 ## Put "1" for "yes" and "0" for "no".
 CFG_CERN_SITE = 0
 
 ## CFG_INSPIRE_SITE -- do we want to enable INSPIRE-specific code?
 ## Put "1" for "yes" and "0" for "no".
 CFG_INSPIRE_SITE = 0
 
 ## CFG_DEVEL_SITE -- is this a development site? If it is, you might
 ## prefer that it doesn't do certain things. For example, you might
 ## not want WebSubmit to send certain emails or trigger certain
 ## processes on a development site.
 ## Put "1" for "yes" (this is a development site) or "0" for "no"
 ## (this isn't a development site.)
 CFG_DEVEL_SITE = 0
 
 ################################
 ## Part 2: Web page style     ##
 ################################
 
 ## The variables affecting the page style.  The most important one is
 ## the 'template skin' you would like to use and the obfuscation mode
 ## for your email addresses.  Please refer to the WebStyle Admin Guide
 ## for more explanation.  The other variables are listed here mostly
 ## for backwards compatibility purposes only.
 
 ## CFG_WEBSTYLE_TEMPLATE_SKIN -- what template skin do you want to
 ## use?
 CFG_WEBSTYLE_TEMPLATE_SKIN = default
 
 ## CFG_WEBSTYLE_EMAIL_ADDRESSES_OBFUSCATION_MODE. How do we "protect"
 ## email addresses from undesired automated email harvesters?  This
 ## setting will not affect 'support' and 'admin' emails.
 ## NOTE: there is no ultimate solution to protect against email
 ## harvesting. All have drawbacks and can more or less be
 ## circumvented. Choose you preferred mode ([t] means "transparent"
 ## for the user):
 ##    -1: hide all emails.
 ## [t] 0 : no protection, email returned as is.
 ##           foo@example.com => foo@example.com
 ##     1 : basic email munging: replaces @ by [at] and . by [dot]
 ##           foo@example.com => foo [at] example [dot] com
 ## [t] 2 : transparent name mangling: characters are replaced by
 ##         equivalent HTML entities.
 ##           foo@example.com => &#102;&#111;&#111;&#64;&#101;&#120;&#97;&#109;&#112;&#108;&#101;&#46;&#99;&#111;&#109;
 ## [t] 3 : javascript insertion. Requires Javascript enabled on client
 ##         side.
 ##     4 : replaces @ and . characters by gif equivalents.
 ##             foo@example.com => foo<img src="at.gif" alt=" [at] ">example<img src="dot.gif" alt=" [dot] ">com
 CFG_WEBSTYLE_EMAIL_ADDRESSES_OBFUSCATION_MODE = 2
 
 ## (deprecated) CFG_WEBSTYLE_CDSPAGEBOXLEFTTOP -- eventual global HTML
 ## left top box:
 CFG_WEBSTYLE_CDSPAGEBOXLEFTTOP =
 
 ## (deprecated) CFG_WEBSTYLE_CDSPAGEBOXLEFTBOTTOM -- eventual global
 ## HTML left bottom box:
 CFG_WEBSTYLE_CDSPAGEBOXLEFTBOTTOM =
 
 ## (deprecated) CFG_WEBSTYLE_CDSPAGEBOXRIGHTTOP -- eventual global
 ## HTML right top box:
 CFG_WEBSTYLE_CDSPAGEBOXRIGHTTOP =
 
 ## (deprecated) CFG_WEBSTYLE_CDSPAGEBOXRIGHTBOTTOM -- eventual global
 ## HTML right bottom box:
 CFG_WEBSTYLE_CDSPAGEBOXRIGHTBOTTOM =
 
 ##################################
 ## Part 3: WebSearch parameters ##
 ##################################
 
 ## This section contains some configuration parameters for WebSearch
 ## module.  Please note that WebSearch is mostly configured on
 ## run-time via its WebSearch Admin web interface.  The parameters
 ## below are the ones that you do not probably want to modify very
 ## often during the runtime.  (Note that you may modify them
 ## afterwards too, though.)
 
 ## CFG_WEBSEARCH_SEARCH_CACHE_SIZE -- how many queries we want to
 ## cache in memory per one Apache httpd process?  This cache is used
 ## mainly for "next/previous page" functionality, but it caches also
 ## "popular" user queries if more than one user happen to search for
 ## the same thing.  Note that large numbers may lead to great memory
 ## consumption.  We recommend a value not greater than 100.
 CFG_WEBSEARCH_SEARCH_CACHE_SIZE = 100
 
 ## CFG_WEBSEARCH_FIELDS_CONVERT -- if you migrate from an older
 ## system, you may want to map field codes of your old system (such as
 ## 'ti') to CDS Invenio/MySQL ("title").  Use Python dictionary syntax
 ## for the translation table, e.g. {'wau':'author', 'wti':'title'}.
 ## Usually you don't want to do that, and you would use empty dict {}.
 CFG_WEBSEARCH_FIELDS_CONVERT = {}
 
 ## CFG_WEBSEARCH_LIGHTSEARCH_PATTERN_BOX_WIDTH -- width of the
 ## search pattern window in the light search interface, in
 ## characters.  CFG_WEBSEARCH_LIGHTSEARCH_PATTERN_BOX_WIDTH = 60
 CFG_WEBSEARCH_LIGHTSEARCH_PATTERN_BOX_WIDTH = 60
 
 ## CFG_WEBSEARCH_SIMPLESEARCH_PATTERN_BOX_WIDTH -- width of the search
 ## pattern window in the simple search interface, in characters.
 CFG_WEBSEARCH_SIMPLESEARCH_PATTERN_BOX_WIDTH = 40
 
 ## CFG_WEBSEARCH_ADVANCEDSEARCH_PATTERN_BOX_WIDTH -- width of the
 ## search pattern window in the advanced search interface, in
 ## characters.
 CFG_WEBSEARCH_ADVANCEDSEARCH_PATTERN_BOX_WIDTH = 30
 
 ## CFG_WEBSEARCH_NB_RECORDS_TO_SORT -- how many records do we still
 ## want to sort?  For higher numbers we print only a warning and won't
 ## perform any sorting other than default 'latest records first', as
 ## sorting would be very time consuming then.  We recommend a value of
 ## not more than a couple of thousands.
 CFG_WEBSEARCH_NB_RECORDS_TO_SORT = 1000
 
 ## CFG_WEBSEARCH_CALL_BIBFORMAT -- if a record is being displayed but
 ## it was not preformatted in the "HTML brief" format, do we want to
 ## call BibFormatting on the fly?  Put "1" for "yes" and "0" for "no".
 ## Note that "1" will display the record exactly as if it were fully
 ## preformatted, but it may be slow due to on-the-fly processing; "0"
 ## will display a default format very fast, but it may not have all
 ## the fields as in the fully preformatted HTML brief format.  Note
 ## also that this option is active only for old (PHP) formats; the new
 ## (Python) formats are called on the fly by default anyway, since
 ## they are much faster.  When usure, please set "0" here.
 CFG_WEBSEARCH_CALL_BIBFORMAT = 0
 
 ## CFG_WEBSEARCH_USE_ALEPH_SYSNOS -- do we want to make old SYSNOs
 ## visible rather than MySQL's record IDs?  You may use this if you
 ## migrate from a different e-doc system, and you store your old
 ## system numbers into 970__a.  Put "1" for "yes" and "0" for
 ## "no". Usually you don't want to do that, though.
 CFG_WEBSEARCH_USE_ALEPH_SYSNOS = 0
 
 ## CFG_WEBSEARCH_I18N_LATEST_ADDITIONS -- Put "1" if you want the
 ## "Latest Additions" in the web collection pages to show
 ## internationalized records.  Useful only if your brief BibFormat
 ## templates contains internationalized strings. Otherwise put "0" in
 ## order not to slow down the creation of latest additions by WebColl.
 CFG_WEBSEARCH_I18N_LATEST_ADDITIONS = 0
 
 ## CFG_WEBSEARCH_INSTANT_BROWSE -- the number of records to display
 ## under 'Latest Additions' in the web collection pages.
 CFG_WEBSEARCH_INSTANT_BROWSE = 10
 
 ## CFG_WEBSEARCH_INSTANT_BROWSE_RSS -- the number of records to
 ## display in the RSS feed.
 CFG_WEBSEARCH_INSTANT_BROWSE_RSS = 25
 
 ## CFG_WEBSEARCH_RSS_TTL -- number of minutes that indicates how long
 ## a feed cache is valid.
 CFG_WEBSEARCH_RSS_TTL = 360
 
 ## CFG_WEBSEARCH_RSS_MAX_CACHED_REQUESTS -- maximum number of request kept
 ## in cache. If the cache is filled, following request are not cached.
 CFG_WEBSEARCH_RSS_MAX_CACHED_REQUESTS = 1000
 
 ## CFG_WEBSEARCH_AUTHOR_ET_AL_THRESHOLD -- up to how many author names
 ## to print explicitely; for more print "et al".  Note that this is
 ## used in default formatting that is seldomly used, as usually
 ## BibFormat defines all the format.  The value below is only used
 ## when BibFormat fails, for example.
 CFG_WEBSEARCH_AUTHOR_ET_AL_THRESHOLD = 3
 
 ## CFG_WEBSEARCH_NARROW_SEARCH_SHOW_GRANDSONS -- whether to show or
 ## not collection grandsons in Narrow Search boxes (sons are shown by
 ## default, grandsons are configurable here).  Use 0 for no and 1 for
 ## yes.
 CFG_WEBSEARCH_NARROW_SEARCH_SHOW_GRANDSONS = 1
 
 ## CFG_WEBSEARCH_CREATE_SIMILARLY_NAMED_AUTHORS_LINK_BOX -- shall we
 ## create help links for Ellis, Nick or Ellis, Nicholas and friends
 ## when Ellis, N was searched for?  Useful if you have one author
 ## stored in the database under several name formats, namely surname
 ## comma firstname and surname comma initial cataloging policy.  Use 0
 ## for no and 1 for yes.
 CFG_WEBSEARCH_CREATE_SIMILARLY_NAMED_AUTHORS_LINK_BOX = 1
 
 ## CFG_WEBSEARCH_USE_JSMATH_FOR_FORMATS -- jsMath is a JavaScript
 ## library that renders (La)TeX mathematical formulas in the client
 ## browser.  This parameter must contain a comma-separated list of
 ## output formats for which to apply the jsMath rendering, for example
 ## "hb,hd".  If the list is empty, jsMath is disabled.
 CFG_WEBSEARCH_USE_JSMATH_FOR_FORMATS =
 
 ## CFG_WEBSEARCH_EXTERNAL_COLLECTION_SEARCH_TIMEOUT -- when searching
 ## external collections (e.g. SPIRES, CiteSeer, etc), how many seconds
 ## do we wait for reply before abandonning?
 CFG_WEBSEARCH_EXTERNAL_COLLECTION_SEARCH_TIMEOUT = 5
 
 ## CFG_WEBSEARCH_EXTERNAL_COLLECTION_SEARCH_MAXRESULTS -- how many
 ## results do we fetch?
 CFG_WEBSEARCH_EXTERNAL_COLLECTION_SEARCH_MAXRESULTS = 10
 
 ## CFG_WEBSEARCH_SPLIT_BY_COLLECTION -- do we want to split the search
 ## results by collection or not?  Use 0 for not, 1 for yes.
 CFG_WEBSEARCH_SPLIT_BY_COLLECTION = 1
 
 ## CFG_WEBSEARCH_MAX_RECORDS_IN_GROUPS -- in order to limit denial of
 ## service attacks the total number of records per group displayed as a
 ## result of a search query will be limited to this number. Only the superuser
 ## queries will not be affected by this limit.
 CFG_WEBSEARCH_MAX_RECORDS_IN_GROUPS = 200
 
 #######################################
 ## Part 4: BibHarvest OAI parameters ##
 #######################################
 
 ## This part defines parameters for the CDS Invenio OAI gateway.
 ## Useful if you are running CDS Invenio as OAI data provider.
 
 ## CFG_OAI_ID_FIELD -- OAI identifier MARC field:
 CFG_OAI_ID_FIELD = 909COo
 
 ## CFG_OAI_SET_FIELD -- OAI set MARC field:
 CFG_OAI_SET_FIELD = 909COp
 
 ## CFG_OAI_DELETED_POLICY -- OAI deletedrecordspolicy
 ## (no/transient/persistent).
 CFG_OAI_DELETED_POLICY = no
 
 ## CFG_OAI_ID_PREFIX -- OAI identifier prefix:
 CFG_OAI_ID_PREFIX = atlantis.cern.ch
 
 ## CFG_OAI_SAMPLE_IDENTIFIER -- OAI sample identifier:
 CFG_OAI_SAMPLE_IDENTIFIER = oai:atlantis.cern.ch:CERN-TH-4036
 
 ## CFG_OAI_IDENTIFY_DESCRIPTION -- description for the OAI Identify verb:
 CFG_OAI_IDENTIFY_DESCRIPTION = <description>
    <oai-identifier xmlns="http://www.openarchives.org/OAI/2.0/oai-identifier"
                    xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
                    xsi:schemaLocation="http://www.openarchives.org/OAI/2.0/oai-identifier
                                        http://www.openarchives.org/OAI/2.0/oai-identifier.xsd">
       <scheme>
        oai
       </scheme>
       <repositoryIdentifier>
        atlantis.cern.ch
       </repositoryIdentifier>
       <delimiter>
        :
       </delimiter>
       <sampleIdentifier>
        oai:atlantis.cern.ch:CERN-TH-4036
       </sampleIdentifier>
    </oai-identifier>
  </description>
  <description>
   <eprints xmlns="http://www.openarchives.org/OAI/1.1/eprints"
            xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
            xsi:schemaLocation="http://www.openarchives.org/OAI/1.1/eprints
                                http://www.openarchives.org/OAI/1.1/eprints.xsd">
       <content>
        <URL>http://atlantis.cern.ch/</URL>
       </content>
       <metadataPolicy>
        <text>Free and unlimited use by anybody with obligation to refer to original record</text>
       </metadataPolicy>
       <dataPolicy>
        <text>Full content, i.e. preprints may not be harvested by robots</text>
       </dataPolicy>
       <submissionPolicy>
        <text>Submission restricted. Submitted documents are subject of approval by OAI repository admins.</text>
       </submissionPolicy>
   </eprints>
  </description>
 
 ## CFG_OAI_LOAD -- OAI number of records in a response:
 CFG_OAI_LOAD = 1000
 
 ## CFG_OAI_EXPIRE -- OAI resumptionToken expiration time:
 CFG_OAI_EXPIRE = 90000
 
 ## CFG_OAI_SLEEP -- service unavailable between two consecutive
 ## requests for CFG_OAI_SLEEP seconds:
 CFG_OAI_SLEEP = 10
 
 ##################################
 ## Part 5: WebSubmit parameters ##
 ##################################
 
 ## This section contains some configuration parameters for WebSubmit
 ## module.  Please note that WebSubmit is mostly configured on
 ## run-time via its WebSubmit Admin web interface.  The parameters
 ## below are the ones that you do not probably want to modify during
 ## the runtime.
 
 ## CFG_WEBSUBMIT_FILESYSTEM_BIBDOC_GROUP_LIMIT -- the fulltext
 ## documents are stored under "/opt/cds-invenio/var/data/files/gX/Y"
 ## directories where X is 0,1,... and Y stands for bibdoc ID.  Thusly
 ## documents Y are grouped into directories X and this variable
 ## indicates the maximum number of documents Y stored in each
 ## directory X.  This limit is imposed solely for filesystem
 ## performance reasons in order not to have too many subdirectories in
 ## a given directory.
 CFG_WEBSUBMIT_FILESYSTEM_BIBDOC_GROUP_LIMIT = 5000
 
 ## CFG_WEBSUBMIT_ADDITIONAL_KNOWN_FILE_EXTENSIONS -- a comma-separated
 ## list of document extensions not listed in Python standard mimetype
 ## library that should be recognized by Invenio.
 CFG_WEBSUBMIT_ADDITIONAL_KNOWN_FILE_EXTENSIONS = hpg,link,lis,llb,mat,mpp,msg,docx,docm,xlsx,xlsm,xlsb,pptx,pptm,ppsx,ppsm
 
 #################################
 ## Part 6: BibIndex parameters ##
 #################################
 
 ## This section contains some configuration parameters for BibIndex
 ## module.  Please note that BibIndex is mostly configured on run-time
 ## via its BibIndex Admin web interface.  The parameters below are the
 ## ones that you do not probably want to modify very often during the
 ## runtime.
 
 ## CFG_BIBINDEX_FULLTEXT_INDEX_LOCAL_FILES_ONLY -- when fulltext indexing, do
 ## you want to index locally stored files only, or also external URLs?
 ## Use "0" to say "no" and "1" to say "yes".
 CFG_BIBINDEX_FULLTEXT_INDEX_LOCAL_FILES_ONLY = 0
 
 ## CFG_BIBINDEX_REMOVE_STOPWORDS -- when indexing, do we want to remove
 ## stopwords?  Use "0" to say "no" and "1" to say "yes".
 CFG_BIBINDEX_REMOVE_STOPWORDS = 0
 
 ## CFG_BIBINDEX_CHARS_ALPHANUMERIC_SEPARATORS -- characters considered as
 ## alphanumeric separators of word-blocks inside words.  You probably
 ## don't want to change this.
 CFG_BIBINDEX_CHARS_ALPHANUMERIC_SEPARATORS = \!\"\#\$\%\&\'\(\)\*\+\,\-\.\/\:\;\<\=\>\?\@\[\\\]\^\_\`\{\|\}\~
 
 ## CFG_BIBINDEX_CHARS_PUNCTUATION -- characters considered as punctuation
 ## between word-blocks inside words.  You probably don't want to
 ## change this.
 CFG_BIBINDEX_CHARS_PUNCTUATION = \.\,\:\;\?\!\"
 
 ## CFG_BIBINDEX_REMOVE_HTML_MARKUP -- should we attempt to remove HTML markup
 ## before indexing?  Use 1 if you have HTML markup inside metadata
 ## (e.g. in abstracts), use 0 otherwise.
 CFG_BIBINDEX_REMOVE_HTML_MARKUP = 0
 
 ## CFG_BIBINDEX_REMOVE_LATEX_MARKUP -- should we attempt to remove LATEX markup
 ## before indexing?  Use 1 if you have LATEX markup inside metadata
 ## (e.g. in abstracts), use 0 otherwise.
 CFG_BIBINDEX_REMOVE_LATEX_MARKUP = 0
 
 ## CFG_BIBINDEX_MIN_WORD_LENGTH -- minimum word length allowed to be added to
 ## index.  The terms smaller then this amount will be discarded.
 ## Useful to keep the database clean, however you can safely leave
 ## this value on 0 for up to 1,000,000 documents.
 CFG_BIBINDEX_MIN_WORD_LENGTH = 0
 
 ## CFG_BIBINDEX_URLOPENER_USERNAME and CFG_BIBINDEX_URLOPENER_PASSWORD --
 ## access credentials to access restricted URLs, interesting only if
 ## you are fulltext-indexing files located on a remote server that is
 ## only available via username/password.  But it's probably better to
 ## handle this case via IP or some convention; the current scheme is
 ## mostly there for demo only.
 CFG_BIBINDEX_URLOPENER_USERNAME = mysuperuser
 CFG_BIBINDEX_URLOPENER_PASSWORD = mysuperpass
 
 ## CFG_INTBITSET_ENABLE_SANITY_CHECKS --
 ## Enable sanity checks for integers passed to the intbitset data
 ## structures. It is good to enable this during debugging
 ## and to disable this value for speed improvements.
 CFG_INTBITSET_ENABLE_SANITY_CHECKS = False
 
 #######################################
 ## Part 7: Access control parameters ##
 #######################################
 
 ## This section contains some configuration parameters for the access
 ## control system.  Please note that WebAccess is mostly configured on
 ## run-time via its WebAccess Admin web interface.  The parameters
 ## below are the ones that you do not probably want to modify very
 ## often during the runtime.  (If you do want to modify them during
 ## runtime, for example te deny access temporarily because of backups,
 ## you can edit access_control_config.py directly, no need to get back
 ## here and no need to redo the make process.)
 
 ## CFG_ACCESS_CONTROL_LEVEL_SITE -- defines how open this site is.
 ## Use 0 for normal operation of the site, 1 for read-only site (all
 ## write operations temporarily closed), 2 for site fully closed,
 ## 3 for also disabling any database connection.
 ## Useful for site maintenance.
 CFG_ACCESS_CONTROL_LEVEL_SITE = 0
 
 ## CFG_ACCESS_CONTROL_LEVEL_GUESTS -- guest users access policy.  Use
 ## 0 to allow guest users, 1 not to allow them (all users must login).
 CFG_ACCESS_CONTROL_LEVEL_GUESTS = 0
 
 ## CFG_ACCESS_CONTROL_LEVEL_ACCOUNTS -- account registration and
 ## activation policy.  When 0, users can register and accounts are
 ## automatically activated.  When 1, users can register but admin must
 ## activate the accounts.  When 2, users cannot register nor update
 ## their email address, only admin can register accounts.  When 3,
 ## users cannot register nor update email address nor password, only
 ## admin can register accounts.  When 4, the same as 3 applies, nor
 ## user cannot change his login method.
 CFG_ACCESS_CONTROL_LEVEL_ACCOUNTS = 0
 
 ## CFG_ACCESS_CONTROL_LIMIT_REGISTRATION_TO_DOMAIN -- limit account
 ## registration to certain email addresses?  If wanted, give domain
 ## name below, e.g. "cern.ch".  If not wanted, leave it empty.
 CFG_ACCESS_CONTROL_LIMIT_REGISTRATION_TO_DOMAIN =
 
 ## CFG_ACCESS_CONTROL_NOTIFY_ADMIN_ABOUT_NEW_ACCOUNTS -- send a
 ## notification email to the administrator when a new account is
 ## created?  Use 0 for no, 1 for yes.
 CFG_ACCESS_CONTROL_NOTIFY_ADMIN_ABOUT_NEW_ACCOUNTS = 0
 
 ## CFG_ACCESS_CONTROL_NOTIFY_USER_ABOUT_NEW_ACCOUNT -- send a
 ## notification email to the user when a new account is created in order to
 ## to verify the validity of the provided email address?  Use
 ## 0 for no, 1 for yes.
 CFG_ACCESS_CONTROL_NOTIFY_USER_ABOUT_NEW_ACCOUNT = 1
 
 ## CFG_ACCESS_CONTROL_NOTIFY_USER_ABOUT_ACTIVATION -- send a
 ## notification email to the user when a new account is activated?
 ## Use 0 for no, 1 for yes.
 CFG_ACCESS_CONTROL_NOTIFY_USER_ABOUT_ACTIVATION = 0
 
 ## CFG_ACCESS_CONTROL_NOTIFY_USER_ABOUT_DELETION -- send a
 ## notification email to the user when a new account is deleted or
 ## account demand rejected?  Use 0 for no, 1 for yes.
 CFG_ACCESS_CONTROL_NOTIFY_USER_ABOUT_DELETION = 0
 
 ## CFG_APACHE_PASSWORD_FILE -- the file where Apache user credentials
 ## are stored.  Must be an absolute pathname.  If the value does not
 ## start by a slash, it is considered to be the filename of a file
 ## located under prefix/var/tmp directory.  This is useful for the
 ## demo site testing purposes.  For the production site, if you plan
 ## to restrict access to some collections based on the Apache user
 ## authentication mechanism, you should put here an absolute path to
 ## your Apache password file.
 CFG_APACHE_PASSWORD_FILE = demo-site-apache-user-passwords
 
 ## CFG_APACHE_GROUP_FILE -- the file where Apache user groups are
 ## defined.  See the documentation of the preceding config variable.
 CFG_APACHE_GROUP_FILE = demo-site-apache-user-groups
 
 ###################################
 ## Part 8: WebSession parameters ##
 ###################################
 
 ## This section contains some configuration parameters for tweaking
 ## session handling.
 
 ## CFG_WEBSESSION_EXPIRY_LIMIT_DEFAULT -- number of days after which a session
 ## and the corresponding cookie is considered expired.
 CFG_WEBSESSION_EXPIRY_LIMIT_DEFAULT = 2
 
 ## CFG_WEBSESSION_EXPIRY_LIMIT_REMEMBER -- number of days after which a session
 ## and the corresponding cookie is considered expired, when the user has
 ## requested to permanently stay logged in.
 CFG_WEBSESSION_EXPIRY_LIMIT_REMEMBER = 365
 
 ## CFG_WEBSESSION_RESET_PASSWORD_EXPIRE_IN_DAYS -- when user requested
 ## a password reset, for how many days is the URL valid?
 CFG_WEBSESSION_RESET_PASSWORD_EXPIRE_IN_DAYS = 3
 
 ## CFG_WEBSESSION_ADDRESS_ACTIVATION_EXPIRE_IN_DAYS -- when an account
 ## activation email was sent, for how many days is the URL valid?
 CFG_WEBSESSION_ADDRESS_ACTIVATION_EXPIRE_IN_DAYS = 3
 
 ## CFG_WEBSESSION_NOT_CONFIRMED_EMAIL_ADDRESS_EXPIRE_IN_DAYS -- when
 ## user won't confirm his email address and not complete
 ## registeration, after how many days will it expire?
 CFG_WEBSESSION_NOT_CONFIRMED_EMAIL_ADDRESS_EXPIRE_IN_DAYS = 10
 
 ## CFG_WEBSESSION_DIFFERENTIATE_BETWEEN_GUESTS -- when set to 1, the session
 ## system allocates the same uid=0 to all guests users regardless of where they
 ## come from. 0 allocate a unique uid to each guest.
 CFG_WEBSESSION_DIFFERENTIATE_BETWEEN_GUESTS = 0
 
 ## CFG_WEBSESSION_IPADDR_CHECK_SKIP_BITS -- to prevent session cookie
 ## stealing, Invenio checks that the IP address of a connection is the
 ## same as that of the connection which created the initial session.
 ## This variable let you decide how many bits should be skipped during
 ## this check.  Set this to 0 in order to enable full IP address
 ## checking.  Set this to 32 in order to disable IP address checking.
 ## Intermediate values (say 8) let you have some degree of security so
 ## that you can trust your local network only while helping to solve
 ## issues related to outside clients that configured their browser to
 ## use a web proxy for HTTP connection but not for HTTPS, thus
 ## potentially having two different IP addresses.
 CFG_WEBSESSION_IPADDR_CHECK_SKIP_BITS = 0
 
 ################################
 ## Part 9: BibRank parameters ##
 ################################
 
 ## This section contains some configuration parameters for the ranking
 ## system.
 
 ## CFG_BIBRANK_SHOW_READING_STATS -- do we want to show reading
 ## similarity stats?  ('People who viewed this page also viewed')
 CFG_BIBRANK_SHOW_READING_STATS = 1
 
 ## CFG_BIBRANK_SHOW_DOWNLOAD_STATS -- do we want to show the download
 ## similarity stats?  ('People who downloaded this document also
 ## downloaded')
 CFG_BIBRANK_SHOW_DOWNLOAD_STATS = 1
 
 ## CFG_BIBRANK_SHOW_DOWNLOAD_GRAPHS -- do we want to show download
 ## history graph?
 CFG_BIBRANK_SHOW_DOWNLOAD_GRAPHS = 1
 
 ## CFG_BIBRANK_SHOW_DOWNLOAD_GRAPHS_CLIENT_IP_DISTRIBUTION -- do we
 ## want to show a graph representing the distribution of client IPs
 ## downloading given document?
 CFG_BIBRANK_SHOW_DOWNLOAD_GRAPHS_CLIENT_IP_DISTRIBUTION = 0
 
 ## CFG_BIBRANK_SHOW_CITATION_LINKS -- do we want to show the 'Cited
 ## by' links?  (useful only when you have citations in the metadata)
 CFG_BIBRANK_SHOW_CITATION_LINKS = 1
 
 ## CFG_BIBRANK_SHOW_CITATION_STATS -- de we want to show citation
 ## stats?  ('Cited by M recors', 'Co-cited with N records')
 CFG_BIBRANK_SHOW_CITATION_STATS = 1
 
 ## CFG_BIBRANK_SHOW_CITATION_GRAPHS -- do we want to show citation
 ## history graph?
 CFG_BIBRANK_SHOW_CITATION_GRAPHS = 1
 
 ####################################
 ## Part 10: WebComment parameters ##
 ####################################
 
 ## This section contains some configuration parameters for the
 ## commenting and reviewing facilities.
 
 ## CFG_WEBCOMMENT_ALLOW_COMMENTS -- do we want to allow users write
 ## public comments on records?
 CFG_WEBCOMMENT_ALLOW_COMMENTS = 1
 
 ## CFG_WEBCOMMENT_ALLOW_REVIEWS -- do we want to allow users write
 ## public reviews of records?
 CFG_WEBCOMMENT_ALLOW_REVIEWS = 1
 
 ## CFG_WEBCOMMENT_ALLOW_SHORT_REVIEWS -- do we want to allow short
 ## reviews, that is just the attribution of stars without submitting
 ## detailed review text?
 CFG_WEBCOMMENT_ALLOW_SHORT_REVIEWS = 0
 
 ## CFG_WEBCOMMENT_NB_REPORTS_BEFORE_SEND_EMAIL_TO_ADMIN -- if users
 ## report a comment to be abusive, how many they have to be before the
 ## site admin is alerted?
 CFG_WEBCOMMENT_NB_REPORTS_BEFORE_SEND_EMAIL_TO_ADMIN = 5
 
 ## CFG_WEBCOMMENT_NB_COMMENTS_IN_DETAILED_VIEW -- how many comments do
 ## we display in the detailed record page upon welcome?
 CFG_WEBCOMMENT_NB_COMMENTS_IN_DETAILED_VIEW = 1
 
 ## CFG_WEBCOMMENT_NB_REVIEWS_IN_DETAILED_VIEW -- how many reviews do
 ## we display in the detailed record page upon welcome?
 CFG_WEBCOMMENT_NB_REVIEWS_IN_DETAILED_VIEW = 1
 
 ## CFG_WEBCOMMENT_ADMIN_NOTIFICATION_LEVEL -- do we notify the site
 ## admin after every comment?
 CFG_WEBCOMMENT_ADMIN_NOTIFICATION_LEVEL = 1
 
 ## CFG_WEBCOMMENT_TIMELIMIT_PROCESSING_COMMENTS_IN_SECONDS -- how many
 ## elapsed seconds do we consider enough when checking for possible
 ## multiple comment submissions by a user?
 CFG_WEBCOMMENT_TIMELIMIT_PROCESSING_COMMENTS_IN_SECONDS = 20
 
 ## CFG_WEBCOMMENT_TIMELIMIT_PROCESSING_REVIEWS_IN_SECONDS -- how many
 ## elapsed seconds do we consider enough when checking for possible
 ## multiple review submissions by a user?
 CFG_WEBCOMMENT_TIMELIMIT_PROCESSING_REVIEWS_IN_SECONDS = 20
 
 ## CFG_WEBCOMMENT_USE_RICH_EDITOR -- enable the WYSIWYG
 ## Javascript-based editor when user edits comments?
 CFG_WEBCOMMENT_USE_RICH_TEXT_EDITOR = False
 
 ##################################
 ## Part 11: BibSched parameters ##
 ##################################
 
 ## This section contains some configuration parameters for the
 ## bibliographic task scheduler.
 
 ## CFG_BIBSCHED_REFRESHTIME -- how often do we want to refresh
 ## bibsched monitor? (in seconds)
 CFG_BIBSCHED_REFRESHTIME = 5
 
 ## CFG_BIBSCHED_LOG_PAGER -- what pager to use to view bibsched task
 ## logs?
 CFG_BIBSCHED_LOG_PAGER = /bin/more
 
 ## CFG_BIBSCHED_GC_TASKS_OLDER_THAN -- after how many days to perform the
 ## gargbage collector of BibSched queue (i.e. removing/moving task to archive).
 CFG_BIBSCHED_GC_TASKS_OLDER_THAN = 30
 
 ## CFG_BIBSCHED_GC_TASKS_TO_REMOVE -- list of BibTask that can be safely
 ## removed from the BibSched queue once they are DONE.
 CFG_BIBSCHED_GC_TASKS_TO_REMOVE = bibindex,bibreformat,webcoll,bibrank,inveniogc
 
 ## CFG_BIBSCHED_GC_TASKS_TO_ARCHIVE -- list of BibTasks that should be safely
 ## archived out of the BibSched queue once they are DONE.
 CFG_BIBSCHED_GC_TASKS_TO_ARCHIVE = bibupload,oaiarchive
 
 ## CFG_BIBSCHED_MAX_NUMBER_CONCURRENT_TASKS -- maximum number of BibTasks
 ## that can run concurrently.
 ## NOTE: concurrent tasks are still considered as an experimental
 ## feature. Please keep this value set to 1 on production environments.
 CFG_BIBSCHED_MAX_NUMBER_CONCURRENT_TASKS = 1
 
 ## CFG_BIBSCHED_PROCESS_USER -- bibsched and bibtask processes must
 ## usually run under the same identity as the Apache web server
 ## process in order to share proper file read/write privileges.  If
 ## you want to force some other bibsched/bibtask user, e.g. because
 ## you are using a local `invenio' user that belongs to your
 ## `www-data' Apache user group and so shares writing rights with your
 ## Apache web server process in this way, then please set its username
 ## identity here.  Otherwise we shall check whether your
 ## bibsched/bibtask processes are run under the same identity as your
 ## Apache web server process (in which case you can leave the default
 ## empty value here).
 CFG_BIBSCHED_PROCESS_USER =
 
 ###################################
 ## Part 12: WebBasket parameters ##
 ###################################
 
 ## CFG_WEBBASKET_MAX_NUMBER_OF_DISPLAYED_BASKETS -- a safety limit for
 ## a maximum number of displayed baskets
 CFG_WEBBASKET_MAX_NUMBER_OF_DISPLAYED_BASKETS = 20
 
 ## CFG_WEBBASKET_USE_RICH_TEXT_EDITOR -- enable the WYSIWYG
 ## Javascript-based editor when user edits comments in WebBasket?
 CFG_WEBBASKET_USE_RICH_TEXT_EDITOR = False
 
 ##################################
 ## Part 13: WebAlert parameters ##
 ##################################
 
 ## This section contains some configuration parameters for the
 ## automatic email notification alert system.
 
 ## CFG_WEBALERT_ALERT_ENGINE_EMAIL -- the email address from which the
 ## alert emails will appear to be sent:
 CFG_WEBALERT_ALERT_ENGINE_EMAIL = cds.alert@cdsdev.cern.ch
 
 ## CFG_WEBALERT_MAX_NUM_OF_RECORDS_IN_ALERT_EMAIL -- how many records
 ## at most do we send in an outgoing alert email?
 CFG_WEBALERT_MAX_NUM_OF_RECORDS_IN_ALERT_EMAIL = 20
 
 ## CFG_WEBALERT_MAX_NUM_OF_CHARS_PER_LINE_IN_ALERT_EMAIL -- number of
 ## chars per line in an outgoing alert email?
 CFG_WEBALERT_MAX_NUM_OF_CHARS_PER_LINE_IN_ALERT_EMAIL = 72
 
 ## CFG_WEBALERT_SEND_EMAIL_NUMBER_OF_TRIES -- when sending alert
 ## emails fails, how many times we retry?
 CFG_WEBALERT_SEND_EMAIL_NUMBER_OF_TRIES = 3
 
 ## CFG_WEBALERT_SEND_EMAIL_SLEEPTIME_BETWEEN_TRIES -- when sending
 ## alert emails fails, what is the sleeptime between tries? (in
 ## seconds)
 CFG_WEBALERT_SEND_EMAIL_SLEEPTIME_BETWEEN_TRIES = 300
 
 ####################################
 ## Part 14: WebMessage parameters ##
 ####################################
 
 ## CFG_WEBMESSAGE_MAX_SIZE_OF_MESSAGE -- how large web messages do we
 ## allow?
 CFG_WEBMESSAGE_MAX_SIZE_OF_MESSAGE = 20000
 
 ## CFG_WEBMESSAGE_MAX_NB_OF_MESSAGES -- how many messages for a
 ## regular user do we allow in its inbox?
 CFG_WEBMESSAGE_MAX_NB_OF_MESSAGES = 30
 
 ## CFG_WEBMESSAGE_DAYS_BEFORE_DELETE_ORPHANS -- how many days before
 ## we delete orphaned messages?
 CFG_WEBMESSAGE_DAYS_BEFORE_DELETE_ORPHANS = 60
 
 ##################################
 ## Part 15: MiscUtil parameters ##
 ##################################
 
 ## CFG_MISCUTIL_SQL_MAX_CACHED_QUERIES -- maximum number of cached SQL
 ## queries possible.  After reaching this number the cache is pruned
 ## by deleting half of the older queries.
 CFG_MISCUTIL_SQL_MAX_CACHED_QUERIES = 10000
 
 ## CFG_MISCUTIL_SQL_USE_SQLALCHEMY -- whether to use SQLAlchemy.pool
 ## in the DB engine of CDS Invenio.  It is okay to enable this flag
 ## even if you have not installed SQLAlchemy.  Note that Invenio will
 ## loose some perfomance if this option is enabled.
 CFG_MISCUTIL_SQL_USE_SQLALCHEMY = False
 
 ## CFG_MISCUTIL_SMTP_HOST -- which server to use as outgoing mail server to
 ## send outgoing emails generated by the system, for example concerning
 ## submissions or email notification alerts.
 CFG_MISCUTIL_SMTP_HOST = localhost
 
 ## CFG_MISCUTIL_SMTP_PORT -- which port to use on the outgoing mail server
 ## defined in the previous step.
 CFG_MISCUTIL_SMTP_PORT = 25
 
 #################################
 ## Part 16: BibEdit parameters ##
 #################################
 
 ## CFG_BIBEDIT_TIMEOUT -- when a user edits a record, this record is
 ## locked to prevent other users to edit it at the same time.  After
 ## how many seconds the locked record will be again free for other
 ## people to edit?
 CFG_BIBEDIT_TIMEOUT = 3600
 
 ## CFG_BIBEDIT_LOCKLEVEL -- when a user tries to edit a record being edited by
 ## another user, the lock level determines when it is permitted to do so.
 ## Level 0 - permits editing if there are no recent edit sessions in tmp directory
 ##           (unsafe, use only if you know what you are doing)
 ## Level 1 - permits editing if there are no queued bibedit tasks for this record
 ##           (safe with respect to bibedit, but not for other bibupload maintenance jobs)
 ## Level 2 - permits editing if there are no queued bibupload tasks of any sort
 ##           (safe, but may lock more than necessary if many cataloguers around)
 ## Level 3 - permits editing if no queued bibupload task concerns given record
 ##           (safe, most precise locking, but slow,
 ##            checks for 001/EXTERNAL_SYSNO_TAG/EXTERNAL_OAIID_TAG)
 ## The recommended level is 3 (default) or 2 (if you use maintenance jobs often).
 CFG_BIBEDIT_LOCKLEVEL = 3
 
 ###################################
 ## Part 17: BibUpload parameters ##
 ###################################
 
 ## CFG_BIBUPLOAD_REFERENCE_TAG -- where do we store references?
 CFG_BIBUPLOAD_REFERENCE_TAG = 999
 
 ## CFG_BIBUPLOAD_EXTERNAL_SYSNO_TAG -- where do we store external
 ## system numbers?  Useful for matching when our records come from an
 ## external digital library system.
 CFG_BIBUPLOAD_EXTERNAL_SYSNO_TAG = 970__a
 
 ## CFG_BIBUPLOAD_EXTERNAL_OAIID_TAG -- where do we store OAI ID tags
 ## of harvested records?  Useful for matching when we harvest stuff
 ## via OAI that we do not want to reexport via Invenio OAI; so records
 ## may have only the source OAI ID stored in this tag (kind of like
 ## external system number too).
 CFG_BIBUPLOAD_EXTERNAL_OAIID_TAG = 035__a
 
 ## CFG_BIBUPLOAD_EXTERNAL_OAIID_PROVENANCE_TAG -- where do we store OAI SRC
 ## tags of harvested records?  Useful for matching when we harvest stuff
 ## via OAI that we do not want to reexport via Invenio OAI; so records
 ## may have only the source OAI SRC stored in this tag (kind of like
 ## external system number too). Note that the field should be the same of
 ## CFG_BIBUPLOAD_EXTERNAL_OAIID_TAG.
 CFG_BIBUPLOAD_EXTERNAL_OAIID_PROVENANCE_TAG = 035__9
 
 ## CFG_BIBUPLOAD_STRONG_TAGS -- a comma-separated list of tags that
 ## are strong enough to resist the replace mode.  Useful for tags that
 ## might be created from an external non-metadata-like source,
 ## e.g. the information about the number of copies left.
 CFG_BIBUPLOAD_STRONG_TAGS = 964
 
 ## CFG_BIBUPLOAD_CONTROLLED_PROVENANCE_TAGS -- a comma-separated list
 ## of tags that contain provenance information that should be checked
 ## in the bibupload correct mode via matching provenance codes.  (Only
 ## field instances of the same provenance information would be acted
 ## upon.)  Please specify the whole tag info up to subfield codes.
 CFG_BIBUPLOAD_CONTROLLED_PROVENANCE_TAGS = 6531_9
 
 ## CFG_BIBUPLOAD_FFT_ALLOWED_LOCAL_PATHS -- a comma-separated list of system
 ## paths from which it is allowed to take fulltextes that will be uploaded via
 ## FFT (CFG_TMPDIR is included by default).
 CFG_BIBUPLOAD_FFT_ALLOWED_LOCAL_PATHS = /tmp,/home
 
+## CFG_BIBUPLOAD_SERIALIZE_RECORD_STRUCTURE -- do we want to serialize
+## internal representation of records (Pythonic record structure) into
+## the database?  This can improve internal processing speed of some
+## operations at the price of somewhat bigger disk space usage.
+CFG_BIBUPLOAD_SERIALIZE_RECORD_STRUCTURE = 1
+
 ##########################
 ##  THAT's ALL, FOLKS!  ##
 ##########################
diff --git a/modules/bibedit/lib/bibedit_engine.py b/modules/bibedit/lib/bibedit_engine.py
index b6fa0c459..068908d56 100644
--- a/modules/bibedit/lib/bibedit_engine.py
+++ b/modules/bibedit/lib/bibedit_engine.py
@@ -1,512 +1,512 @@
 ## This file is part of CDS Invenio.
 ## Copyright (C) 2002, 2003, 2004, 2005, 2006, 2007, 2008 CERN.
 ##
 ## CDS Invenio is free software; you can redistribute it and/or
 ## modify it under the terms of the GNU General Public License as
 ## published by the Free Software Foundation; either version 2 of the
 ## License, or (at your option) any later version.
 ##
 ## CDS Invenio is distributed in the hope that it will be useful, but
 ## WITHOUT ANY WARRANTY; without even the implied warranty of
 ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 ## General Public License for more details.
 ##
 ## You should have received a copy of the GNU General Public License
 ## along with CDS Invenio; if not, write to the Free Software Foundation, Inc.,
 ## 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA.
 
 # pylint: disable-msg=C0103
 
 """bibedit engine."""
 
 __revision__ = "$Id$"
 
 import cPickle
 import difflib
 import os
 import re
 import time
 import zlib
 
 from invenio.bibedit_dblayer import get_marcxml_of_record_revision, \
     get_record_revisions, marc_to_split_tag
 from invenio.bibedit_utils import get_file_path, get_tmp_file_owner, \
     get_tmp_record, record_in_use_p, record_locked_p
 from invenio.bibrecord import record_xml_output, create_record, \
     field_add_subfield, record_add_field
 from invenio.bibtask import task_low_level_submission
 from invenio.config import CFG_BIBEDIT_TIMEOUT
 from invenio.dateutils import convert_datetext_to_dategui
-from invenio.search_engine import print_record, record_exists
+from invenio.search_engine import print_record, record_exists, get_record
 import invenio.template
 
 # Precompile regexp:
 re_revid_split = re.compile('^(\d+)\.(\d{14})$')
 re_revdate_split = re.compile('^(\d\d\d\d)(\d\d)(\d\d)(\d\d)(\d\d)(\d\d)')
 
 bibedit_templates = invenio.template.load('bibedit')
 
 def perform_request_index(ln, recid, cancel, delete, confirm_delete, uid, format_tag, edit_tag,
                           delete_tag, num_field, add, dict_value=None):
     """Returns the body of main page. """
     errors   = []
     warnings = []
     body     = ''
 
     if cancel != 0:
         os.system("rm -f %s.tmp" % get_file_path(cancel))
 
     if delete != 0:
         if confirm_delete != 0:
             body = bibedit_templates.confirm(ln, 'delete', delete, format_tag)
         else:
-            (record, junk) = get_record(delete, uid)
+            (record, junk) = get_temp_and_orig_records(delete, uid)
             add_field(delete, uid, record, "980", "", "", "c", "DELETED")
             save_temp_record(record, uid, "%s.tmp" % get_file_path(delete))
             return perform_request_submit(ln, delete, deleting=True)
 
     else:
         if recid != 0 :
             if record_exists(recid) > 0:
                 body = ''
-                (record, original_record) = get_record(recid, uid)
+                (record, original_record) = get_temp_and_orig_records(recid, uid)
                 if record and not record_locked_p(recid):
                     if edit_tag is not None and dict_value is not None:
                         record = edit_record(recid, uid, record, edit_tag, dict_value, num_field)
                     if delete_tag is not None and num_field is not None:
                         record = delete_field(recid, uid, record, delete_tag, num_field)
 
                     if add == 4:
                         tag     = dict_value.get("add_tag"    , '')
                         ind1    = dict_value.get("add_ind1"   , '')
                         ind2    = dict_value.get("add_ind2"   , '')
                         subcode = dict_value.get("add_subcode", '')
                         value   = dict_value.get("add_value"  , '')
                         another = dict_value.get("addanother" , '')
                         if tag and subcode and value:
                             (record, new_field_number) = add_field(recid, uid, record, tag, ind1, ind2, subcode, value)
                             if another:
                                 #if the user pressed 'another' instead of 'done', take to editing
                                 return perform_request_edit(ln, recid, uid, tag, new_field_number, 0, 'marc', None, 0, dict_value)
                     # Compare original record with version in tmp file, to
                     # determine if it has been edited.
                     if record != original_record:
                         tmp = True
                         body = bibedit_templates.editor_warning_temp_file(ln)
                     else:
                         tmp = False
                     revisions = len(get_record_revision_ids(recid)) - 1
                     body += bibedit_templates.editor_table_header(ln, "record", recid, tmp, format_tag, add=add, revisions=revisions)
                     keys = record.keys()
                     keys.sort()
                     for tag in keys:
                         fields = record.get(str(tag), "empty")
                         if fields != "empty":
                             for field in fields:
                                 if field[0]: # Only display if has subfield(s)
                                     body += bibedit_templates.editor_table_value(ln, recid, tag,
                                                                                field, format_tag, "record", add)
                     if add == 3:
                         body += bibedit_templates.editor_table_value(ln, recid, '', [], format_tag, "record", add, 1)
                     body += bibedit_templates.editor_table_footer(ln, "record", add, 1)
                 elif not record:
                     body = bibedit_templates.record_choice_box(ln, 3)
                 else:
                     body = bibedit_templates.record_choice_box(ln, 4)
                     os.system("rm %s.tmp" % get_file_path(recid))
             else:
                 if record_exists(recid) == -1:
                     body = bibedit_templates.record_choice_box(ln, 2)
                 else:
                     body = bibedit_templates.record_choice_box(ln, 1)
         else:
             body = bibedit_templates.record_choice_box(ln, 0)
 
     return (body, errors, warnings)
 
 def perform_request_edit(ln, recid, uid, tag, num_field, num_subfield,
                          format_tag, act_subfield, add, dict_value):
     """Returns the body of edit page."""
     errors = []
     warnings = []
     body = ''
 
-    (record, junk) = get_record(recid, uid)
+    (record, junk) = get_temp_and_orig_records(recid, uid)
 
     if act_subfield is not None:
         if act_subfield == 'delete':
             record = delete_subfield(recid, uid, record, tag, num_field, num_subfield)
         if act_subfield == 'move_up':
             record = move_subfield('move_up', recid, uid, record, tag, num_field, num_subfield)
         if act_subfield == 'move_down':
             record = move_subfield('move_down', recid, uid, record, tag, num_field, num_subfield)
 
     if add == 2:
         subcode = dict_value.get("add_subcode", "empty")
         value   = dict_value.get("add_value"  , "empty")
         if subcode == '':
             subcode = "empty"
         if value   == '':
             value   = "empty"
 
         if value != "empty" and subcode != "empty":
             record = add_subfield(recid, uid, tag, record, num_field, subcode, value)
 
 
     body += bibedit_templates.editor_table_header(ln, "edit", recid, False,
                                                 tag=tag, num_field=num_field, add=add)
 
     tag = tag[:3]
     fields = record.get(str(tag), 'empty')
     if fields != "empty":
         for field in fields:
             if field[4] == int(num_field) :
                 body += bibedit_templates.editor_table_value(ln, recid, tag, field, format_tag, "edit", add)
                 break
 
     body += bibedit_templates.editor_table_footer(ln, "edit", add)
 
     return (body, errors, warnings)
 
 def save_temp_record(record, uid, file_path):
     """Save record dict in tmp file."""
     file_temp = open(file_path, "w")
     cPickle.dump([uid, record], file_temp)
     file_temp.close()
 
-def get_record(recid, uid):
+def get_temp_and_orig_records(recid, uid):
     """
-    Returns original and tmp record dict. If returned tmp record dict is
+    Returns tmp and original record dict. If returned tmp record dict is
     empty, that indicates another user editing the record.
     """
-    original_record = create_record(print_record(recid, 'xm'))[0]
+    original_record = get_record(recid)
     tmp_record = ''
     file_path = get_file_path(recid)
 
     if os.path.isfile("%s.tmp" % file_path):
         (tmp_record_uid, tmp_record) = get_tmp_record(recid)
         if tmp_record_uid != uid:
             time_tmp_file = os.path.getmtime("%s.tmp" % file_path)
             time_out_file = int(time.time()) - CFG_BIBEDIT_TIMEOUT
             if time_tmp_file < time_out_file :
                 os.system("rm %s.tmp" % file_path)
                 tmp_record = original_record
                 save_temp_record(tmp_record, uid, "%s.tmp" % file_path)
             else:
                 tmp_record = {}
     else:
         tmp_record = original_record
         save_temp_record(tmp_record, uid, "%s.tmp" % file_path)
 
     return tmp_record, original_record
 
 
 ######### EDIT #########
 
 def edit_record(recid, uid, record, edit_tag, dict_value, num_field):
     """Edits value of a record."""
     for num_subfield in range( len(dict_value.keys())/3 ): # Iterate over subfield indices of field
 
         new_subcode = dict_value.get("subcode%s"     % num_subfield, None)
         old_subcode = dict_value.get("old_subcode%s" % num_subfield, None)
         new_value   = dict_value.get("value%s"       % num_subfield, None)
         old_value   = dict_value.get("old_value%s"   % num_subfield, None)
 
         if new_value is not None and old_value is not None \
                and new_subcode is not None and old_subcode is not None: # Make sure we actually get these values
             if new_value != '' and new_subcode != '': # Forbid empty values
                 if new_value != old_value or \
                    new_subcode != old_subcode: # only change when necessary
 
                     edit_tag = edit_tag[:5]
                     record = edit_subfield(record,
                                            edit_tag,
                                            new_subcode,
                                            new_value,
                                            num_field,
                                            num_subfield)
 
     save_temp_record(record, uid, "%s.tmp" % get_file_path(recid))
 
     return record
 
 
 def edit_subfield(record, tag, new_subcode, new_value, num_field, num_subfield):
     """Edits the value of a subfield."""
 
     new_value   = bibedit_templates.clean_value(str(new_value),     "html")
 
     (tag, ind1, ind2, junk) = marc_to_split_tag(tag)
 
     fields = record.get(str(tag), None)
 
     if fields is not None:
         i = -1
         for field in fields:
             i += 1
             if field[4] == int(num_field):
                 subfields = field[0]
                 j = -1
                 for subfield in subfields:
                     j += 1
 
                     if j == num_subfield: # Rely on counted index to identify subfield to edit...
                         record[tag][i][0][j] = (new_subcode, new_value)
                         break
                 break
     return record
 
 
 ######### ADD ########
 
 def add_field(recid, uid, record, tag, ind1, ind2, subcode, value_subfield):
     """Adds a new field to the record."""
 
     tag = tag[:3]
 
     new_field_number = record_add_field(record, tag, ind1, ind2)
     record = add_subfield(recid, uid, tag, record, new_field_number, subcode, value_subfield)
 
     save_temp_record(record, uid, "%s.tmp" % get_file_path(recid))
 
     return record, new_field_number
 
 
 def add_subfield(recid, uid, tag, record, num_field, subcode, value):
     """Adds a new subfield to a field."""
 
     tag = tag[:3]
     fields = record.get(str(tag))
     i = -1
 
     for field in fields:
         i += 1
         if field[4] == int(num_field) :
 
             subfields = field[0]
 
             field_add_subfield(record[tag][i], subcode, value)
             break
 
     save_temp_record(record, uid, "%s.tmp" % get_file_path(recid))
 
     return record
 
 
 ######### DELETE ########
 
 def delete_field(recid, uid, record, tag, num_field):
     """Deletes field in record."""
 
     (tag, junk, junk, junk) = marc_to_split_tag(tag)
     tmp = []
 
     for field in record[tag]:
         if field[4] != int(num_field) :
             tmp.append(field)
 
     if tmp != []:
         record[tag] = tmp
 
     else:
         del record[tag]
 
     save_temp_record(record, uid, "%s.tmp" % get_file_path(recid))
 
     return record
 
 
 def delete_subfield(recid, uid, record, tag, num_field, num_subfield):
     """Deletes subfield of a field."""
 
     (tag, junk, junk, subcode) = marc_to_split_tag(tag)
     tmp = []
     i = -1
     deleted = False
     for field in record[tag]:
         i += 1
         if field[4] == int(num_field):
             j = 0
             for subfield in field[0]:
                 if j != num_subfield:
                 #if subfield[0] != subcode or deleted == True:
                     tmp.append((subfield[0], subfield[1]))
                 #else:
                 #    deleted = True
                 j += 1
             break
 
     record[tag][i] = (tmp, record[tag][i][1], record[tag][i][2], record[tag][i][3], record[tag][i][4])
     save_temp_record(record, uid, "%s.tmp" % get_file_path(recid))
     return record
 
 def move_subfield(direction, recid, uid, record, tag, num_field, num_subfield):
     """Moves a subfield up in the field."""
     (tag, junk, junk, subcode) = marc_to_split_tag(tag)
     i = -1
     for field in record[tag]:
         i += 1
         if field[4] == int(num_field):
             j = -1
             mysubfields = field[0]
             for subfield in mysubfields:
                 j += 1
                 if direction == 'move_up' and num_subfield == j and j > 0:
                     #swap this and the previous..
                     prevsubfield = field[0][j-1]
                     field[0][j-1] = subfield
                     field[0][j] = prevsubfield
                 if direction == 'move_down' and num_subfield == j and j < len(mysubfields):
                     #swap this and the next..
                     nextsubfield = field[0][j+1]
                     field[0][j+1] = subfield
                     field[0][j] = nextsubfield
     save_temp_record(record, uid, "%s.tmp" % get_file_path(recid))
     return record
 
 def perform_request_submit(ln, recid, xml_record='', deleting=False):
     """Submits record to the database. """
     if xml_record:
         save_xml_record(recid, xml_record)
     else:
         save_xml_record(recid)
     errors   = []
     warnings = []
     if deleting:
         body = bibedit_templates.record_choice_box(ln, 6)
     else:
         body = bibedit_templates.record_choice_box(ln, 5)
     return (body, errors, warnings)
 
 def save_xml_record(recid, xml_record=''):
     """Saves XML record file to database."""
     file_path = get_file_path(recid)
     os.system("rm -f %s.xml" % file_path)
     file_temp = open("%s.xml" % file_path, 'w')
     if xml_record:
         file_temp.write(xml_record)
     else:
         file_temp.write(record_xml_output(get_tmp_record(recid)[1]))
         os.system("rm %s.tmp" % file_path)
     file_temp.close()
     task_low_level_submission('bibupload', 'bibedit', '-P', '5', '-r',
                               '%s.xml' % file_path)
 
 def perform_request_history(ln, recid, revid, revid_cmp, action, uid,
                             format_tag):
     """Performs historic operations on a record."""
     errors   = []
     warnings = []
     body = ''
 
     if action == 'revert' and revid:
         body = bibedit_templates.confirm(
             ln, 'revert', recid, format_tag=format_tag, revid=revid,
             revdate=split_revid(revid, 'dategui')[1])
         return (body, errors, warnings)
 
     if action == 'confirm_revert' and revid:
         # Is the record locked for editing?
         if record_locked_p(recid):
             body = bibedit_templates.record_choice_box(ln, 4)
             return (body, errors, warnings)
         # Is the record being edited?
         if record_in_use_p(recid):
             if get_tmp_file_owner(recid) != uid:
                 body = bibedit_templates.record_choice_box(ln, 3)
                 return (body, errors, warnings)
             else:
                 os.system("rm -f %s" % ('%s.tmp' % get_file_path(recid)))
         # Submit the revision.
         return perform_request_submit(ln, recid,
                                       get_marcxml_of_revision_id(revid))
 
     revids = get_record_revision_ids(recid)
     if not revid:
         revid = revids[0]
     body = bibedit_templates.history_container('header')
     revdates = [split_revid(some_revid, 'dategui')[1] for some_revid
                 in revids]
     revdate = split_revid(revid, 'dategui')[1]
 
     if action == 'compare' and revid_cmp:
         revdate_cmp = split_revid(revid_cmp, 'dategui')[1]
         xml1 = get_marcxml_of_revision_id(revid)
         xml2 = get_marcxml_of_revision_id(revid_cmp)
         comparison = bibedit_templates.clean_value(
             get_xml_comparison(revid, revid_cmp, xml1, xml2),
             'text').replace('\n', '<br />\n           ')
         body += bibedit_templates.history_comparebox(ln, revdate,
             revdate_cmp, comparison)
         forms = bibedit_templates.history_forms(ln, recid, revids,
             revdates, 'compare', revid, format_tag, revid_cmp)
 
     else:
         current = revid == revids[0]
         revision = create_record(get_marcxml_of_revision_id(
             revid))[0]
         body += bibedit_templates.history_viewbox(ln, 'header',
             current, recid, revid, revdate)
         body += bibedit_templates.history_revision(ln, recid, format_tag,
                                                         revision)
         body += bibedit_templates.history_viewbox(ln, 'footer',
             current, recid, revid, revdate)
         forms = bibedit_templates.history_forms(ln, recid, revids,
             revdates, 'view', revid, format_tag)
 
     body += forms
     body += bibedit_templates.history_container('footer')
     return (body, errors, warnings)
 
 def get_marcxml_of_revision_id(revid):
     """
     Return MARCXML string with corresponding to revision REVID
     (=RECID.REVDATE) of a record.  Return empty string if revision
     does not exist.  REVID is assumed to be washed already.
     """
     res = ""
     (recid, job_date) = split_revid(revid, 'datetext')
     tmp_res = get_marcxml_of_record_revision(recid, job_date)
     if tmp_res:
         for row in tmp_res:
             res += zlib.decompress(row[0]) + "\n"
     return res
 
 def get_record_revision_ids(recid):
     """
     Return list of all known record revision ids (=RECID.REVDATE) for
     record RECID in chronologically decreasing order (latest first).
     """
     res = []
     tmp_res =  get_record_revisions(recid)
     for row in tmp_res:
         res.append("%s.%s" % (row[0], row[1]))
     return res
 
 def get_xml_comparison(header1, header2, xml1, xml2):
     """
     Return diffs of two MARCXML records.
     """
     return "".join(difflib.unified_diff(xml1.splitlines(1),
         xml2.splitlines(1), header1, header2))
 
 def split_revid(revid, dateformat=''):
     """
     Split revid and return tuple with (recid, revdate).
     Optional dateformat can be datetext or dategui.
     """
     (recid, revdate) = re_revid_split.search(revid).groups()
     if dateformat:
         datetext = '%s-%s-%s %s:%s:%s' % re_revdate_split.search(
             revdate).groups()
         if dateformat == 'datetext':
             revdate = datetext
         elif dateformat == 'dategui':
             revdate = convert_datetext_to_dategui(datetext, secs=True)
     return (recid, revdate)
 
 def revision_format_valid_p(revid):
     """Predicate to test validity of revision ID format (=RECID.REVDATE)."""
     if re_revid_split.match(revid):
         return True
     return False
diff --git a/modules/bibformat/lib/bibformat_bfx_engine.py b/modules/bibformat/lib/bibformat_bfx_engine.py
index a2eb4a35f..bb0a55672 100644
--- a/modules/bibformat/lib/bibformat_bfx_engine.py
+++ b/modules/bibformat/lib/bibformat_bfx_engine.py
@@ -1,1258 +1,1258 @@
 ## This file is part of CDS Invenio.
 ## Copyright (C) 2002, 2003, 2004, 2005, 2006, 2007, 2008 CERN.
 ##
 ## CDS Invenio is free software; you can redistribute it and/or
 ## modify it under the terms of the GNU General Public License as
 ## published by the Free Software Foundation; either version 2 of the
 ## License, or (at your option) any later version.
 ##
 ## CDS Invenio is distributed in the hope that it will be useful, but
 ## WITHOUT ANY WARRANTY; without even the implied warranty of
 ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 ## General Public License for more details.
 ##
 ## You should have received a copy of the GNU General Public License
 ## along with CDS Invenio; if not, write to the Free Software Foundation, Inc.,
 ## 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA.
 
 """
 BFX formatting engine.
 For API: see format_with_bfx() docstring below.
 """
 
 __revision__ = "$Id$"
 
 import re
 import copy as p_copy
 from xml.dom import minidom, Node
 from xml.sax import saxutils
 
 from invenio.bibformat_engine import BibFormatObject, get_format_element, eval_format_element
 from invenio.bibformat_bfx_engine_config import CFG_BIBFORMAT_BFX_LABEL_DEFINITIONS, CFG_BIBFORMAT_BFX_TEMPLATES_PATH
 from invenio.bibformat_bfx_engine_config import CFG_BIBFORMAT_BFX_FORMAT_TEMPLATE_EXTENSION, CFG_BIBFORMAT_BFX_ELEMENT_NAMESPACE
 from invenio.bibformat_bfx_engine_config import CFG_BIBFORMAT_BFX_ERROR_MESSAGES, CFG_BIBFORMAT_BFX_WARNING_MESSAGES
 
 address_pattern = r'(?P<parent>[a-z_]*):?/?(?P<tag>[0-9_?\w]*)/?(?P<code>[\w_?]?)#?(?P<reg>.*)'
 
 def format_with_bfx(recIDs, out_file, template_name, preprocess=None):
     '''
     Format a set of records according to a BFX template.
     This is the main entry point to the BFX engine.
 
     @param recIDs a list of record IDs to format
     @param out_file an object to write in; this can be every object which has a 'write' method: file, req, StringIO
     @param template_name the file name of the BFX template without the path and the .bfx extension
     @param preprocess an optional function; every record is passed through this function for initial preprocessing before formatting
     '''
     trans = MARCTranslator(CFG_BIBFORMAT_BFX_LABEL_DEFINITIONS)
     trans.set_record_ids(recIDs, preprocess)
     parser = BFXParser(trans)
     template_tree = parser.load_template(template_name)
     parser.walk(template_tree, out_file)
     return None
 
 class BFXParser:
     '''
     A general-purpose parser for generating xml/xhtml/text output based on a template system.
     Must be initialised with a translator. A translator is like a blackbox that returns values, calls functions, etc...
     Works with every translator supporting the following simple interface:
         - is_defined(name)
         - get_value(name)
         - iterator(name)
         - call_function(func_name, list_of_parameters)
     Customized for MARC to XML conversion through the use of a MARCTranslator.
 
     Templates are strict XML files. They are built by combining any tags with the
     special BFX tags living in the http://cdsware.cern.ch/invenio/ namespace.
     Easily extensible by tags of your own.
     Defined tags:
         - template: defines a template
         - template_ref: a reference to a template
         - loop structure
         - if, then, elif, else structure
         - text: output text
         - field: query translator for field 'name'
         - element: call external functions
     '''
     def __init__(self, translator):
         '''
         Create an instance of the BFXParser class. Initialize with a translator.
         The BFXparser makes queries to the translator for the values of certain names.
         For the communication it uses the following translator methods:
             - is_defined(name)
             - iterator(name)
             - get_value(name, [display_specifier])
         @param translator the translator used by the class instance
         '''
         self.translator = translator
         self.known_operators = ['style', 'format', 'template', 'template_ref', 'text', 'field', 'element', 'loop', 'if', 'then', 'else', 'elif']
         self.flags = {} # store flags here;
         self.templates = {} # store templates and formats here
         self.start_template_name = None #the name of the template from which the 'execution' starts;
                                         #this is usually a format or the only template found in a doc
 
     def load_template(self, template_name, template_source=None):
         '''
         Load a BFX template file.
         A template file can have one of two forms:
             - it is a file with a single template. Root tag is 'template'.
               In an API call the single template element is 'executed'.
             - it is a 'style' file which contains exactly one format and zero or more templates. Root tag is 'style' with children 'format' and 'template'(s).
               In this case only the format code is 'executed'. Naturally, in it, it would have references to other templates in the document.
 
         Template can be given by name (in that case search path is in
         standard directory for bfx template) or directly using the template source.
         If given, template_source overrides template_name
 
         @param template_name the name of the BFX template, the same as the name of the filename without the extension
         @return a DOM tree of the template
         '''
         if template_source is None:
             template_file_name = CFG_BIBFORMAT_BFX_TEMPLATES_PATH + '/' + template_name + '.' + CFG_BIBFORMAT_BFX_FORMAT_TEMPLATE_EXTENSION
             #load document
             doc = minidom.parse(template_file_name)
         else:
             doc = minidom.parseString(template_source)
         #set exec flag to false and walk document to find templates and formats
         self.flags['exec'] = False
         self.walk(doc)
         #check found templates
         if self.start_template_name:
             start_template = self.templates[self.start_template_name]['node']
         else:
             #print CFG_BIBFORMAT_BFX_WARNING_MESSAGES['WRN_BFX_NO_FORMAT_FOUND']
             if len(self.templates) == 1:
                 # no format found, check if there is a default template
                 self.start_template_name = self.templates.keys()[0]
                 start_template = self.templates[self.start_template_name]['node']
             else:
                 #no formats found, templates either zero or more than one
                 if len(self.templates) > 1:
                     print CFG_BIBFORMAT_BFX_ERROR_MESSAGES['ERR_BFX_TOO_MANY_TEMPLATES']
                 #else:
                 #    print CFG_BIBFORMAT_BFX_ERROR_MESSAGES['ERR_BFX_NO_TEMPLATES_FOUND']
                 return None
         self.flags['exec'] = True
         return start_template
 
     def parse_attribute(self, expression):
         '''
         A function to check if an expression is of the special form [!name:display].
         A short form for saying <bx:field name="name" display="tag">, used in element attributes.
         @param expression a string, usually taken from an attribute value
         @return if the string is special, parse it and return the corresponding value; else return the initial expression
         '''
         output = expression
         pattern = '\[!(?P<tmp>[\w_.:]*)\]'
         expr = re.compile(pattern)
         match = expr.match(expression)
         if match:
             tmp = match.group('tmp')
             tmp = tmp.split(':')
             var = tmp[0]
             display = ''
             if len(tmp) == 2:
                 display = tmp[1]
             output = self.translator.get_value(var, display)
         output = xml_escape(output)
         return output
 
     def walk(self, parent, out_file=None):
         '''
         Walk a template DOM tree.
         The main function in the parser. It is recursively called until all the nodes are processed.
         This function is used in two different ways:
            - for initial loading of the template (and validation)
            - for 'execution' of a format/template
         The different behaviour is achieved through the use of flags, which can be set to True or False.
 
         @param parent a node to process; in an API call this is the root node
         @param out_file an object to write to; must have a 'write' method
 
         @return None
         '''
         for node in parent.childNodes:
             if node.nodeType == Node.TEXT_NODE:
                 value = get_node_value(node)
                 value = value.strip()
                 if out_file:
                     out_file.write(value)
             if node.nodeType == Node.ELEMENT_NODE:
                 #get values
                 name, attributes, element_namespace = get_node_name(node), get_node_attributes(node), get_node_namespace(node)
                 # write values
                 if element_namespace != CFG_BIBFORMAT_BFX_ELEMENT_NAMESPACE:
                     #parse all the attributes
                     for key in attributes.keys():
                         attributes[key] = self.parse_attribute(attributes[key])
                     if node_has_subelements(node):
                         if out_file:
                             out_file.write(create_xml_element(name=name, attrs=attributes, element_type=xmlopen))
                         self.walk(node, out_file) #walk subnodes
                         if out_file:
                             out_file.write(create_xml_element(name=name, element_type=xmlclose))
                     else:
                         if out_file:
                             out_file.write(create_xml_element(name=name, attrs=attributes, element_type=xmlempty))
                 #name is a special name, must fall in one of the next cases:
                 elif node.localName == 'style':
                     self.ctl_style(node, out_file)
                 elif node.localName == 'format':
                     self.ctl_format(node, out_file)
                 elif node.localName == 'template':
                     self.ctl_template(node, out_file)
                 elif node.localName == 'template_ref':
                     self.ctl_template_ref(node, out_file)
                 elif node.localName == 'element':
                     self.ctl_element(node, out_file)
                 elif node.localName == 'field':
                     self.ctl_field(node, out_file)
                 elif node.localName == 'text':
                     self.ctl_text(node, out_file)
                 elif node.localName == 'loop':
                     self.ctl_loop(node, out_file)
                 elif node.localName == 'if':
                     self.ctl_if(node, out_file)
                 elif node.localName == 'then':
                     self.ctl_then(node, out_file)
                 elif node.localName == 'else':
                     self.ctl_else(node, out_file)
                 elif node.localName == 'elif':
                     self.ctl_elif(node, out_file)
                 else:
                     if node.localName in self.known_operators:
                         print 'Note for programmer: you haven\'t implemented operator %s.' % (name)
                     else:
                         print CFG_BIBFORMAT_BFX_ERROR_MESSAGES['ERR_BFX_INVALID_OPERATOR_NAME'] % (name)
         return None
 
     def ctl_style(self, node, out_file):
         '''
         Process a style root node.
         '''
         #exec mode
         if self.flags['exec']:
             return None
         #test mode
         self.walk(node, out_file)
         return None
 
     def ctl_format(self, node, out_file):
         '''
         Process a format node.
         Get name, description and content attributes.
         This function is called only in test mode.
         '''
         #exec mode
         if self.flags['exec']:
             return None
         #test mode
         attrs = get_node_attributes(node)
         #get template name and give control to ctl_template
         if attrs.has_key('name'):
             name = attrs['name']
             if self.templates.has_key(name):
                 print CFG_BIBFORMAT_BFX_ERROR_MESSAGES['ERR_BFX_DUPLICATE_NAME'] % (name)
                 return None
             self.start_template_name = name
             self.ctl_template(node, out_file)
         else:
             print CFG_BIBFORMAT_BFX_ERROR_MESSAGES['ERR_BFX_TEMPLATE_NO_NAME']
             return None
         return None
 
     def ctl_template(self, node, out_file):
         '''
         Process a template node.
         Get name, description and content attributes.
         Register name and store for later calls from template_ref.
         This function is called only in test mode.
         '''
         #exec mode
         if self.flags['exec']:
             return None
         #test mode
         attrs = get_node_attributes(node)
         #get template name
         if attrs.has_key('name'):
             name = attrs['name']
             if self.templates.has_key(name):
                 print CFG_BIBFORMAT_BFX_ERROR_MESSAGES['ERR_BFX_DUPLICATE_NAME'] % (name)
                 return None
             self.templates[name] = {}
             self.templates[name]['node'] = node
         else:
             print CFG_BIBFORMAT_BFX_ERROR_MESSAGES['ERR_BFX_TEMPLATE_NO_NAME']
             return None
         #get template description
         if attrs.has_key('description'):
             description = attrs['description']
         else:
             description = ''
             print CFG_BIBFORMAT_BFX_WARNING_MESSAGES['WRN_BFX_TEMPLATE_NO_DESCRIPTION']
         self.templates[name]['description'] = description
         #get content-type of resulting output
         if attrs.has_key('content'):
             content_type = attrs['content']
         else:
             content_type = 'text/xml'
             print CFG_BIBFORMAT_BFX_WARNING_MESSAGES['WRN_BFX_TEMPLATE_NO_CONTENT']
         self.templates[name]['content_type'] = content_type
         #walk node
         self.walk(node, out_file)
         return None
 
     def ctl_template_ref(self, node, out_file):
         '''
         Reference to an external template.
         This function is called only in execution mode. Bad references appear as run-time errors.
         '''
         #test mode
         if not self.flags['exec']:
             return None
         #exec mode
         attrs = get_node_attributes(node)
         if not attrs.has_key('name'):
             print CFG_BIBFORMAT_BFX_ERROR_MESSAGES['ERR_BFX_TEMPLATE_REF_NO_NAME']
             return None
         name = attrs['name']
         #first check for a template in the same file, that is in the already cached templates
         if self.templates.has_key(name):
             node_to_walk = self.templates[name]['node']
             self.walk(node_to_walk, out_file)
         else:
             #load a file and execute it
             pass
             #template_file_name = CFG_BIBFORMAT_BFX_TEMPLATES_PATH + name + '/' + CFG_BIBFORMAT_BFX_FORMAT_TEMPLATE_EXTENSION
             #try:
             #    node = minidom.parse(template_file_name)
             #except:
             #    print CFG_BIBFORMAT_BFX_ERROR_MESSAGES['ERR_BFX_TEMPLATE_NOT_FOUND'] % (template_file_name)
         return None
 
     def ctl_element(self, node, out_file):
         '''
         Call an external element (written in Python).
         '''
         #test mode
         if not self.flags['exec']:
             return None
         #exec mode
         parameters = get_node_attributes(node)
         if not parameters.has_key('name'):
             print CFG_BIBFORMAT_BFX_ERROR_MESSAGES['ERR_BFX_ELEMENT_NO_NAME']
             return None
         function_name = parameters['name']
         del parameters['name']
         #now run external bfe_name.py, with param attrs
         if function_name:
             value = self.translator.call_function(function_name, parameters)
             value = xml_escape(value)
             out_file.write(value)
         return None
 
     def ctl_field(self, node, out_file):
         '''
         Get the value of a field by its name.
         '''
         #test mode
         if not self.flags['exec']:
             return None
         #exec mode
         attrs = get_node_attributes(node)
         if not attrs.has_key('name'):
             print CFG_BIBFORMAT_BFX_ERROR_MESSAGES['ERR_BFX_FIELD_NO_NAME']
             return None
         display = ''
         if attrs.has_key('display'):
             display = attrs['display']
         var = attrs['name']
         if not self.translator.is_defined(var):
             print CFG_BIBFORMAT_BFX_ERROR_MESSAGES['ERR_BFX_NO_SUCH_FIELD'] % (var)
             return None
         value = self.translator.get_value(var, display)
         value = xml_escape(value)
         out_file.write(value)
         return None
 
     def ctl_text(self, node, out_file):
         '''
         Output a text
         '''
         #test mode
         if not self.flags['exec']:
             return None
         #exec mode
         attrs = get_node_attributes(node)
         if not attrs.has_key('value'):
             print CFG_BIBFORMAT_BFX_ERROR_MESSAGES['ERR_BFX_TEXT_NO_VALUE']
             return None
         value = attrs['value']
         value = value.replace(r'\n', '\n')
         #value = xml_escape(value)
         if type(value) == type(u''):
             value = value.encode('utf-8')
         out_file.write(value)
         return None
 
     def ctl_loop(self, node, out_file):
         '''
         Loop through a set of values.
         '''
         #test mode
         if not self.flags['exec']:
             self.walk(node, out_file)
             return None
         #exec mode
         attrs = get_node_attributes(node)
         if not attrs.has_key('object'):
             print CFG_BIBFORMAT_BFX_ERROR_MESSAGES['ERR_BFX_LOOP_NO_OBJECT']
             return None
         name = attrs['object']
         if not self.translator.is_defined(name):
             print CFG_BIBFORMAT_BFX_ERROR_MESSAGES['ERR_BFX_NO_SUCH_FIELD'] % (name)
             return None
         for new_object in self.translator.iterator(name):
             self.walk(node, out_file)
         return None
 
     def ctl_if(self, node, out_file):
         '''
         An if/then/elif/.../elif/else construct.
         'If' can have several forms:
         <if name="var"/>                  : True if var is non-empty, eval as string
         <if name="var" eq="value"/>       : True if var=value, eval as string
         <if name="var" lt="value"/>       : True if var<value, try to eval as num, else eval as string
         <if name="var" gt="value"/>       : True if var>value, try to eval as num, else eval as string
         <if name="var" le="value"/>       : True if var<=value, try to eval as num, else eval as string
         <if name="var" ge="value"/>       : True if var>=value, try to eval as num, else eval as string
         <if name="var" in="val1 val2"/>   : True if var in [val1, val2], eval as string
         <if name="var" nin="val1 val2"/>  : True if var not in [val1, val2], eval as string
         <if name="var" neq="value"/>      : True if var!=value, eval as string
         <if name="var" like="regexp"/>    : Match against a regular expression
 
         Example:
         <if name="author" eq="Pauli">
           <then>Pauli</then>
           <elif name="" eq="Einstein">
             <then>Pauli</then>
             <else>other</else>
           </elif>
         </if>
         '''
         #test mode
         if not self.flags['exec']:
             self.walk(node, out_file)
             return None
         #exec mode
         attrs = get_node_attributes(node)
         if not attrs.has_key('name'):
             print CFG_BIBFORMAT_BFX_ERROR_MESSAGES['ERR_BFX_IF_NO_NAME']
             return None
         #determine result
         var = attrs['name']
         if not self.translator.is_defined(var):
             print CFG_BIBFORMAT_BFX_ERROR_MESSAGES['ERR_BFX_NO_SUCH_FIELD'] % (var)
             return None
         value = self.translator.get_value(var)
         value = value.strip()
         #equal
         if attrs.has_key('eq'):
             pattern = attrs['eq']
             if is_number(pattern) and is_number(value):
                 result = (float(value)==float(pattern))
             else:
                 result = (value==pattern)
         #not equal
         elif attrs.has_key('neq'):
             pattern = attrs['neq']
             if is_number(pattern) and is_number(value):
                 result = (float(value)!=float(pattern))
             else:
                 result = (value!=pattern)
         #lower than
         elif attrs.has_key('lt'):
             pattern = attrs['lt']
             if is_number(pattern) and is_number(value):
                 result = (float(value)<float(pattern))
             else:
                 result = (value<pattern)
         #greater than
         elif attrs.has_key('gt'):
             pattern = attrs['gt']
             if is_number(pattern) and is_number(value):
                 result = (float(value)>float(pattern))
             else:
                 result = (value>pattern)
         #lower or equal than
         elif attrs.has_key('le'):
             pattern = attrs['le']
             if is_number(pattern) and is_number(value):
                 result = (float(value)<=float(pattern))
             else:
                 result = (value<=pattern)
         #greater or equal than
         elif attrs.has_key('ge'):
             pattern = attrs['ge']
             if is_number(pattern) and is_number(value):
                 result = (float(value)>=float(pattern))
             else:
                 result = (value>=pattern)
         #in
         elif attrs.has_key('in'):
             pattern = attrs['in']
             values = pattern.split()
             result = (value in values)
         #not in
         elif attrs.has_key('nin'):
             pattern = attrs['nin']
             values = pattern.split()
             result = (value not in values)
         #match against a regular expression
         elif attrs.has_key('like'):
             pattern = attrs['like']
             try:
                 expr = re.compile(pattern)
                 result = expr.match(value)
             except:
                 print CFG_BIBFORMAT_BFX_ERROR_MESSAGES['ERR_BFX_INVALID_RE'] % (pattern)
         #simple form: True if non-empty, otherwise False
         else:
             result = value
         #end of evaluation
         #=================
         #validate subnodes
         then_node = get_node_subelement(node, 'then', CFG_BIBFORMAT_BFX_ELEMENT_NAMESPACE)
         else_node = get_node_subelement(node, 'else', CFG_BIBFORMAT_BFX_ELEMENT_NAMESPACE)
         elif_node = get_node_subelement(node, 'elif', CFG_BIBFORMAT_BFX_ELEMENT_NAMESPACE)
         #having else and elif siblings at the same time is a syntax error
         if (else_node is not None) and (elif_node is not None):
             print CFG_BIBFORMAT_BFX_ERROR_MESSAGES['ERR_BFX_IF_WRONG_SYNTAX']
             return None
         #now walk appropriate nodes, according to the result
         if result: #True
             if then_node:
                 self.walk(then_node, out_file)
             #todo: add short form, without 'then', just elements within if statement to walk on 'true' and no 'elif' or 'else' elements
         else:      #False
             if elif_node:
                 self.ctl_if(elif_node, out_file)
             elif else_node:
                 self.walk(else_node, out_file)
         return None
 
     def ctl_then(self, node, out_file):
         '''
         Calling 'then' directly from the walk function means a syntax error.
         '''
         #test mode
         if not self.flags['exec']:
             self.walk(node, out_file)
             return None
         #exec mode
         print CFG_BIBFORMAT_BFX_ERROR_MESSAGES['ERR_BFX_IF_WRONG_SYNTAX']
         return None
 
     def ctl_else(self, node, out_file):
         '''
         Calling 'else' directly from the walk function means a syntax error.
         '''
         #test mode
         if not self.flags['exec']:
             self.walk(node, out_file)
             return None
         #exec mode
         print CFG_BIBFORMAT_BFX_ERROR_MESSAGES['ERR_BFX_IF_WRONG_SYNTAX']
         return None
 
     def ctl_elif(self, node, out_file):
         '''
         Calling 'elif' directly from the walk function means a syntax error.
         '''
         #test mode
         if not self.flags['exec']:
             self.walk(node, out_file)
             return None
         #exec mode
         print CFG_BIBFORMAT_BFX_ERROR_MESSAGES['ERR_BFX_IF_WRONG_SYNTAX']
         return None
 
 
 class MARCTranslator:
     '''
     memory[name]
         [name]['addresses'] - the set of rules for each of the defined names
         [name]['parent'] - the name of the parent; '' if none;
         [name]['children'] - a list with the name of the children of every variable
         [name]['object'] - stored state of object for performance efficiency
     '''
     def __init__(self, labels=None):
         '''
         Create an instance of the translator and init with the list of the defined labels and their rules.
         '''
         if labels is None:
             labels = {}
         self.recIDs = []
         self.recID = 0
         self.recID_index = 0
         self.record = None
         self.memory = {}
         pattern = address_pattern
         expr = re.compile(pattern)
         for name in labels.keys():
             self.memory[name] = {}
             self.memory[name]['object'] = None
             self.memory[name]['parent'] = ''
             self.memory[name]['children'] = []
             self.memory[name]['addresses'] = p_copy.deepcopy(labels[name])
         for name in self.memory:
             for i in range(len(self.memory[name]['addresses'])):
                 address = self.memory[name]['addresses'][i]
                 match = expr.match(address)
                 if not match:
                     print 'Invalid address: ', name, address
                 else:
                     parent_name = match.group('parent')
                     if parent_name:
                         if not self.memory.has_key(parent_name):
                             print CFG_BIBFORMAT_BFX_ERROR_MESSAGES['ERR_BFX_NO_SUCH_FIELD'] % (parent_name)
                         else:
                             self.memory[name]['parent'] = parent_name
                             #now make parent aware of children
                             if not name in self.memory[parent_name]['children']:
                                 self.memory[parent_name]['children'].append(name)
                             level = self.determine_level(parent_name)
                             self.memory[name]['addresses'][i] = self.memory[name]['addresses'][i].replace(parent_name, '/'*level)
         #special case 'record'
         self.memory['record'] = {}
         self.memory['record']['object'] = None
         self.memory['record']['parent'] = ''
         self.memory['record']['children'] = []
 
     def set_record_ids(self, recIDs, preprocess=None):
         '''
         Initialize the translator with the set of record IDs.
         @param recIDs a list of the record IDs
         @param preprocess an optional function which acts on every record structure after creating it
                This can be used to enrich the record with fields not present in the record initially,
                verify the record data or whatever plausible.
                Another solution is to use external function elements.
         '''
         self.record = None
         self.recIDs = recIDs
         self.preprocess = preprocess
         if self.recIDs:
             self.recID_index = 0
             self.recID = self.recIDs[self.recID_index]
-            self.record = get_record(self.recID)
+            self.record = get_bfx_record(self.recID)
             if self.preprocess:
                 self.preprocess(self.record)
         return None
 
     def determine_level(self, name):
         '''
         Determine the type of the variable, whether this is an instance or a subfield.
         This is done by observing the first provided address for the name.
         todo: define variable types in config file, remove this function, results in a clearer concept
         '''
         level = 0 #default value
         if self.memory.has_key(name):
             expr = re.compile(address_pattern)
             if self.memory[name]['addresses']:
                 match = expr.match(self.memory[name]['addresses'][0])
                 if match:
                     tag = match.group('tag')
                     code = match.group('code')
                     reg = match.group('reg')
                     if reg:
                         level = 2 #subfield
                     elif code:
                         level = 2 #subfield
                     elif tag:
                         level = 1 #instance
         return level
 
     #========================================
     #API functions for quering the translator
     #========================================
     def is_defined(self, name):
         '''
         Check whether a variable is defined.
         @param name the name of the variable
         '''
         return self.memory.has_key(name)
 
     def get_num_elements(self, name):
         '''
         An API function to get the number of elements for a variable.
         Do not use this function to build loops, Use iterator instead.
         '''
         if name == 'record':
             return len(self.recIDs)
         num = 0
         for part in self.iterator(name):
             num = num + 1
         return num
 
     def get_value(self, name, display_type='value'):
         '''
         The API function for quering the translator for values of a certain variable.
         Called in a loop will result in a different value each time.
         Objects are cached in memory, so subsequent calls for the same variable take less time.
         @param name the name of the variable you want the value of
         @param display_type an optional value for the type of the desired output, one of: value, tag, ind1, ind2, code, fulltag;
                These can be easily added in the proper place of the code (display_value)
         '''
         if name == 'record':
             return ''
         record = self.get_object(name)
         return self.display_record(record, display_type)
 
     def iterator(self, name):
         '''
         An iterator over the values of a certain name.
         The iterator changes state of internal variables and objects.
         When calling get_value in a loop, this will result each time in a different value.
         '''
         if name == 'record':
             for self.recID in self.recIDs:
-                self.record = get_record(self.recID)
+                self.record = get_bfx_record(self.recID)
                 if self.preprocess:
                     self.preprocess(self.record)
                 yield str(self.recID)
         else:
             full_object = self.build_object(name)
             level = self.determine_level(name)
             for new_object in record_parts(full_object, level):
                 self.memory[name]['object'] = new_object
                 #parent has changed state; also set childs state to None;
                 for children_name in self.memory[name]['children']:
                     self.memory[children_name]['object'] = None
                 yield new_object
             #the result for a call of the same name after an iterator should be the same as if there was no iterator called before
             self.memory[name]['object'] = None
 
     def call_function(self, function_name, parameters=None):
         '''
         Call an external element which is a Python file, using BibFormat
         @param function_name the name of the function to call
         @param parameters a dictionary of the parameters to pass as key=value pairs
         @return a string value, which is the result of the function call
         '''
         if parameters is None:
             parameters = {}
         bfo = BibFormatObject(self.recID)
         format_element = get_format_element(function_name)
         (value, errors) = eval_format_element(format_element, bfo, parameters)
         #to do: check errors from function call
         return value
 
     #========================================
     #end of API functions
     #========================================
 
     def get_object(self, name):
         '''
         Responsible for creating the desired object, corresponding to provided name.
         If object is not cached in memory, it is build again.
         Directly called by API function get_value.
         The result is then formatted by display_record according to display_type.
         '''
         if self.memory[name]['object'] is not None:
             return self.memory[name]['object']
         new_object = self.build_object(name)
         #if you have reached here you are not in an iterator; return first non-empty
         level = self.determine_level(name)
         for tmp_object in record_parts(new_object, level):
             #get the first non-empty
             if tmp_object:
                 new_object = tmp_object
                 break
         self.memory[name]['object'] = new_object
         return new_object
 
     def build_object(self, name):
         '''
         Build the object from the list of addresses
         A slave function for get_object.
         '''
         new_object = {}
         parent_name = self.memory[name]['parent'];
         has_parent = parent_name
         for address in self.memory[name]['addresses']:
             if not has_parent:
                 tmp_object = copy(self.record, address)
                 new_object = merge(new_object, tmp_object)
             else: #has parent
                 parent_object = self.get_object(parent_name) #already returns the parents instance
                 tmp_object = copy(parent_object, address)
                 new_object = merge(new_object, tmp_object)
         return new_object
 
 
     def display_record(self, record, display_type='value'):
         '''
         Decide what the final output value is according to the display_type.
         @param record the record structure to display; this is most probably just a single subfield
         @param display_type a string specifying the desired output; can be one of: value, tag, ind1, ind2, code, fulltag
         @return a string to output
         '''
         output = ''
         tag, ind1, ind2, code, value = '', '', '', '', ''
         if record:
             tags = record.keys()
             tags.sort()
             if tags:
                 fulltag = tags[0]
                 tag, ind1, ind2 = fulltag[0:3], fulltag[3:4], fulltag[4:5]
                 field_instances = record[fulltag]
                 if field_instances:
                     field_instance = field_instances[0]
                     codes = field_instance.keys()
                     codes.sort()
                     if codes:
                         code = codes[0]
                         value = field_instance[code]
         if not display_type:
             display_type = 'value'
         if display_type == 'value':
             output = value
         elif display_type == 'tag':
             output = tag
         elif display_type == 'ind1':
             ind1 = ind1.replace('_', ' ')
             output = ind1
         elif display_type=='ind2':
             ind2 = ind2.replace('_', ' ')
             output = ind2
         elif display_type == 'code':
             output = code
         elif display_type == 'fulltag':
             output = tag + ind1 + ind2
         else:
             print CFG_BIBFORMAT_BFX_ERROR_MESSAGES['ERR_BFX_INVALID_DISPLAY_TYPE'] % (display_type)
         return output
 
 '''
 Functions for use with the structure representing a MARC record defined here.
 This record structure differs from the one defined in bibrecord.
 The reason is that we want a symmetry between controlfields and datafields.
 In this format controlfields are represented internally as a subfield value with code ' ' of a datafield.
 This allows for easier handling of the fields.
 However, there is a restriction associated with this structure and it is that subfields cannot be repeated
 in the same instance. If this is the case, the result will be incorrect.
 
 The record structure has the form:
 
    fields={field_tag:field_instances}
      field_instances=[field_instance]
        field_instance={field_code:field_value}
 
 '''
 def convert_record(old_record):
     '''
     Convert a record from the format defined in bibrecord to the format defined here
     @param old_record the record as returned from bibrecord.create_record()
     @return a record of the new form
     '''
     fields = {}
     old_tags = old_record.keys()
     old_tags.sort()
     for old_tag in old_tags:
         if int(old_tag) < 11:
             #controlfields
             new_tag = old_tag
             fields[new_tag] = [{' ':old_record[old_tag][0][3]}]
         else:
             #datafields
             old_field_instances = old_record[old_tag]
             num_fields = len(old_field_instances)
             for i in range(num_fields):
                 old_field_instance = old_field_instances[i]
                 ind1 = old_field_instance[1]
                 if not ind1 or ind1 == ' ':
                     ind1 = '_'
                 ind2 = old_field_instance[2]
                 if not ind2 or ind2 == ' ':
                     ind2 = '_'
                 new_tag = old_tag + ind1 + ind2
                 new_field_instance = {}
                 for old_subfield in old_field_instance[0]:
                     new_code = old_subfield[0]
                     new_value = old_subfield[1]
                     if new_field_instance.has_key(new_code):
                         print 'Error: Repeating subfield codes in the same instance!'
                     new_field_instance[new_code] = new_value
                 if not fields.has_key(new_tag):
                     fields[new_tag] = []
                 fields[new_tag].append(new_field_instance)
     return fields
 
-def get_record(recID):
+def get_bfx_record(recID):
     '''
     Get a record with a specific recID.
     @param recID the ID of the record
     @return a record in the structure defined here
     '''
     bfo = BibFormatObject(recID)
     return convert_record(bfo.get_record())
 
-def print_record(record):
+def print_bfx_record(record):
     '''
     Print a record.
     '''
     tags = record.keys()
     tags.sort()
     for tag in tags:
         field_instances = record[tag]
         for field_instance in field_instances:
             print tag, field_instance
 
 def record_fields_value(record, tag, subfield):
     '''
     Return a list of all the fields with a certain tag and subfield code.
     Works on subfield level.
     @param record a record
     @param tag a 3 or 5 letter tag; required
     @param subfield a subfield code; required
     '''
     output = []
     if record.has_key(tag):
         for field_instance in record[tag]:
             if field_instance.has_key(subfield):
                 output.append(field_instance[subfield])
     return output
 
 
 def record_add_field_instance(record, tag, field_instance):
     '''
     Add a field_instance to the beginning of the instances of a corresponding tag.
     @param record a record
     @param tag a 3 or 5 letter tag; required
     @param field_instance the field instance to add
     @return None
     '''
     if not record.has_key(tag):
         record[tag] = []
     record[tag] = [field_instance] + record[tag]
     return None
 
 def record_num_parts(record, level):
     '''
     Count the number of instances or the number of subfields in the whole record.
     @param record
     @param level either 1 or 2
            level=1 - view record on instance level
            level=2 - view record on subfield level
     @return the number of parts
     '''
     num = 0
     for part in record_parts(record, level):
         num = num + 1
 
 def record_parts(record, level):
     '''
     An iterator over the instances or subfields of a record.
     @param record
     @param level either 1 or 2
            level=1 - iterate over instances
            level=2 - iterate over subfields
     @yield a record structure representing the part (instance or subfield)
     '''
     if level == 1:
         names = record.keys()
         names.sort()
         for name in names:
             old_field_instances = record[name]
             for old_field_instance in old_field_instances:
                 new_record = {}
                 new_field_instances = []
                 new_field_instance = {}
                 for old_field_code in old_field_instance.keys():
                     new_field_code = old_field_code
                     new_field_value = old_field_instance[old_field_code]
                     new_field_instance[new_field_code] = new_field_value
                 new_field_instances.append(new_field_instance)
                 new_record[name] = []
                 new_record[name].extend(new_field_instances)
                 yield new_record
     if level == 2:
         names = record.keys()
         names.sort()
         for name in names:
             old_field_instances = record[name]
             for old_field_instance in old_field_instances:
                 old_field_codes = old_field_instance.keys()
                 old_field_codes.sort()
                 for old_field_code in old_field_codes:
                     new_record = {}
                     new_field_instances = []
                     new_field_instance = {}
                     new_field_code = old_field_code
                     new_field_value = old_field_instance[old_field_code]
                     new_field_instance[new_field_code] = new_field_value
                     new_field_instances.append(new_field_instance)
                     new_record[name] = []
                     new_record[name].extend(new_field_instances)
                     yield new_record
 
 
 def copy(old_record, address=''):
     '''
     Copy a record by filtering all parts of the old record specified by address
     (A better name for the function is filter.)
     @param record the initial record
     @param address an address; for examples see bibformat_bfx_engine_config.
            If no address is specified, return the initial record.
     @return the filtered record
     '''
     if not old_record:
         return {}
     tag_pattern, code_pattern, reg_pattern = '', '', ''
     expr = re.compile(address_pattern)
     match = expr.match(address)
     if match:
         tag_pattern = match.group('tag')
         code_pattern = match.group('code')
         reg_pattern = match.group('reg')
     if tag_pattern:
         tag_pattern = tag_pattern.replace('?','[0-9_\w]')
     else:
         tag_pattern = r'.*'
     if code_pattern:
         code_pattern = code_pattern.replace('?','[\w ]')
     else:
         code_pattern = r'.*'
     tag_expr = re.compile(tag_pattern)
     code_expr = re.compile(code_pattern)
     new_record = {}
     for tag in old_record.keys():
         tag_match = tag_expr.match(tag)
         if tag_match:
             if tag_match.end() == len(tag):
                 old_field_instances = old_record[tag]
                 new_field_instances = []
                 for old_field_instance in old_field_instances:
                     new_field_instance = {}
                     for old_field_code in old_field_instance.keys():
                         new_field_code = old_field_code
                         code_match = code_expr.match(new_field_code)
                         if code_match:
                             new_field_value = old_field_instance[old_field_code]
                             new_field_instance[new_field_code] = new_field_value
                     if new_field_instance:
                         new_field_instances.append(new_field_instance)
                 if new_field_instances:
                     new_record[tag] = new_field_instances
     #in new_record pass all subfields through regexp
     if reg_pattern:
         for tag in new_record:
             field_instances = new_record[tag]
             for field_instance in field_instances:
                 field_codes = field_instance.keys()
                 for field_code in field_codes:
                     field_instance[field_code] = pass_through_regexp(field_instance[field_code], reg_pattern)
     return new_record
 
 def merge(record1, record2):
     '''
     Merge two records.
     Controlfields with the same tag in record2 as in record1 are ignored.
     @param record1, record2
     @return the merged record
     '''
     new_record = {}
     if record1:
         new_record = copy(record1)
     if not record2:
         return new_record
     for tag in record2.keys():
         #append only datafield tags;
         #if controlfields conflict, leave first;
         old_field_instances = record2[tag]
         new_field_instances = []
         for old_field_instance in old_field_instances:
             new_field_instance = {}
             for old_field_code in old_field_instance.keys():
                 new_field_code = old_field_code
                 new_field_value = old_field_instance[old_field_code]
                 new_field_instance[new_field_code] = new_field_value
             if new_field_instance:
                 new_field_instances.append(new_field_instance)
         if new_field_instances:
             #controlfield
             if len(tag) == 3:
                 if not new_record.has_key(tag):
                     new_record[tag] = []
                     new_record[tag].extend(new_field_instances)
             #datafield
             if len(tag) == 5:
                 if not new_record.has_key(tag):
                     new_record[tag] = []
                 new_record[tag].extend(new_field_instances)
     return new_record
 
 
 #======================
 #Help functions
 #=====================
 
 xmlopen = 1
 xmlclose = 2
 xmlfull = 3
 xmlempty = 4
 
 def create_xml_element(name, value='', attrs=None, element_type=xmlfull, level=0):
     '''
     Create a XML element as string.
     @param name the name of the element
     @param value the element value; default is ''
     @param attrs a dictionary with the element attributes
     @param element_type a constant which defines the type of the output
            xmlopen = 1   <element attr="attr_value">
            xmlclose = 2  </element>
            xmlfull = 3   <element attr="attr_value">value</element>
            xmlempty = 4  <element attr="attr_value"/>
     @return a formatted XML string
     '''
     output = ''
     if attrs is None:
         attrs = {}
     if element_type == xmlempty:
         output += '<'+name
         for attrname in attrs.keys():
             attrvalue = attrs[attrname]
             if type(attrvalue) == type(u''):
                 attrvalue = attrvalue.encode('utf-8')
             output += ' %s="%s"' % (attrname, attrvalue)
         output += ' />'
     if element_type == xmlfull:
         output += '<'+name
         for attrname in attrs.keys():
             attrvalue = attrs[attrname]
             if type(attrvalue) == type(u''):
                 attrvalue = attrvalue.encode('utf-8')
             output += ' %s="%s"' % (attrname, attrvalue)
         output += '>'
         output += value
         output += '</'+name+'>'
     if element_type == xmlopen:
         output += '<'+name
         for attrname in attrs.keys():
             output += ' '+attrname+'="'+attrs[attrname]+'"'
         output += '>'
     if element_type == xmlclose:
         output += '</'+name+'>'
     output = '  '*level + output
     if type(output) == type(u''):
         output = output.encode('utf-8')
     return output
 
 def xml_escape(value):
     '''
     Escape a string value for use as a xml element or attribute value.
     @param value the string value to escape
     @return escaped value
     '''
     return saxutils.escape(value)
 
 def xml_unescape(value):
     '''
     Unescape a string value for use as a xml element.
     @param value the string value to unescape
     @return unescaped value
     '''
     return saxutils.unescape(value)
 
 def node_has_subelements(node):
     '''
     Check if a node has any childnodes.
     Check for element or text nodes.
     @return True if childnodes exist, False otherwise.
     '''
     result = False
     for node in node.childNodes:
         if node.nodeType == Node.ELEMENT_NODE or node.nodeType == Node.TEXT_NODE:
             result = True
     return result
 
 def get_node_subelement(parent_node, name, namespace = None):
     '''
     Get the first childnode with specific name and (optional) namespace
     @param parent_node the node to check
     @param name the name to search
     @param namespace An optional namespace URI. This is usually a URL: http://cdsware.cern.ch/invenio/
     @return the found node; None otherwise
     '''
     output = None
     for node in parent_node.childNodes:
         if node.nodeType == Node.ELEMENT_NODE and node.localName == name and node.namespaceURI == namespace:
             output = node
             return output
     return output
 
 def get_node_value(node):
     '''
     Get the node value of a node. For use with text nodes.
     @param node a text node
     @return a string of the nodevalue encoded in utf-8
     '''
     return node.nodeValue.encode('utf-8')
 
 def get_node_namespace(node):
     '''
     Get node namespace. For use with element nodes.
     @param node an element node
     @return the namespace of the node
     '''
     return node.namespaceURI
 
 def get_node_name(node):
     '''
     Get the node value of a node. For use with element nodes.
     @param node an element node
     @return a string of the node name
     '''
     return node.nodeName
 
 def get_node_attributes(node):
     '''
     Get attributes of an element node.  For use with element nodes
     @param node an element node
     @return a dictionary of the attributes as key:value pairs
     '''
     attributes = {}
     attrs = node.attributes
     for attrname in attrs.keys():
         attrnode = attrs.get(attrname)
         attrvalue = attrnode.nodeValue
         attributes[attrname] = attrvalue
     return attributes
 
 def pass_through_regexp(value, regexp):
     '''
     Pass a value through a regular expression.
     @param value a string
     @param regexp a regexp with a group 'value' in it. No group named 'value' will result in an error.
     @return if the string matches the regexp, return named group 'value', otherwise return ''
     '''
     output = ''
     expr = re.compile(regexp)
     match = expr.match(value)
     if match:
         output = match.group('value')
     return output
 
 def is_number(value):
     '''
     Check if a value is a number.
     @param value the value to check
     @return True or False
     '''
     result = True
     try:
         float(value)
     except ValueError:
         result = False
     return result
 
diff --git a/modules/bibformat/lib/bibformat_engine.py b/modules/bibformat/lib/bibformat_engine.py
index 3c50c392c..1f400ad1d 100644
--- a/modules/bibformat/lib/bibformat_engine.py
+++ b/modules/bibformat/lib/bibformat_engine.py
@@ -1,2066 +1,2065 @@
 # -*- coding: utf-8 -*-
 ##
 ## This file is part of CDS Invenio.
 ## Copyright (C) 2002, 2003, 2004, 2005, 2006, 2007, 2008 CERN.
 ##
 ## CDS Invenio is free software; you can redistribute it and/or
 ## modify it under the terms of the GNU General Public License as
 ## published by the Free Software Foundation; either version 2 of the
 ## License, or (at your option) any later version.
 ##
 ## CDS Invenio is distributed in the hope that it will be useful, but
 ## WITHOUT ANY WARRANTY; without even the implied warranty of
 ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 ## General Public License for more details.
 ##
 ## You should have received a copy of the GNU General Public License
 ## along with CDS Invenio; if not, write to the Free Software Foundation, Inc.,
 ## 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA.
 
 """
 Formats a single XML Marc record using specified format.
 There is no API for the engine. Instead use bibformat.py.
 
 SEE: bibformat.py, bibformat_utils.py
 """
 
 __revision__ = "$Id$"
 
 import re
 import sys
 import os
 import inspect
 import traceback
 import zlib
 import cgi
 
 from invenio.config import \
      CFG_PATH_PHP, \
      CFG_BINDIR, \
      CFG_SITE_LANG
 from invenio.errorlib import \
      register_errors, \
      get_msgs_for_code_list
 from invenio.bibrecord import \
      create_record, \
      record_get_field_instances, \
      record_get_field_value, \
      record_get_field_values
 from invenio.bibformat_xslt_engine import format
 from invenio.dbquery import run_sql
 from invenio.messages import \
      language_list_long, \
      wash_language, \
      gettext_set_language
 from invenio import bibformat_dblayer
 from invenio.bibformat_config import \
      CFG_BIBFORMAT_FORMAT_TEMPLATE_EXTENSION, \
      CFG_BIBFORMAT_FORMAT_OUTPUT_EXTENSION, \
      CFG_BIBFORMAT_TEMPLATES_PATH, \
      CFG_BIBFORMAT_ELEMENTS_PATH, \
      CFG_BIBFORMAT_OUTPUTS_PATH, \
      CFG_BIBFORMAT_ELEMENTS_IMPORT_PATH
 from invenio.bibformat_utils import \
      record_get_xml, \
      parse_tag
 from invenio.htmlutils import \
      HTMLWasher, \
      cfg_html_buffer_allowed_tag_whitelist, \
      cfg_html_buffer_allowed_attribute_whitelist
 from invenio.webuser import collect_user_info
 from HTMLParser import HTMLParseError
 
 if CFG_PATH_PHP: #Remove when call_old_bibformat is removed
     from xml.dom import minidom
     import tempfile
 
 # Cache for data we have already read and parsed
 format_templates_cache = {}
 format_elements_cache = {}
 format_outputs_cache = {}
 kb_mappings_cache = {}
 
 html_field = '<!--HTML-->' # String indicating that field should be
                            # treated as HTML (and therefore no escaping of
                            # HTML tags should occur.
                            # Appears in some field values.
 
 washer = HTMLWasher()      # Used to remove dangerous tags from HTML
                            # sources
 
 # Regular expression for finding <lang>...</lang> tag in format templates
 pattern_lang = re.compile(r'''
     <lang              #<lang tag (no matter case)
     \s*                #any number of white spaces
     >                  #closing <lang> start tag
     (?P<langs>.*?)     #anything but the next group (greedy)
     (</lang\s*>)       #end tag
     ''', re.IGNORECASE | re.DOTALL | re.VERBOSE)
 
 # Builds regular expression for finding each known language in <lang> tags
 ln_pattern_text = r"<("
 for lang in language_list_long(enabled_langs_only=False):
     ln_pattern_text += lang[0] +r"|"
 
 ln_pattern_text = ln_pattern_text.rstrip(r"|")
 ln_pattern_text += r")>(.*?)</\1>"
 
 ln_pattern =  re.compile(ln_pattern_text, re.IGNORECASE | re.DOTALL)
 
 # Regular expression for finding text to be translated
 translation_pattern = re.compile(r'_\((?P<word>.*?)\)_', \
                                  re.IGNORECASE | re.DOTALL | re.VERBOSE)
 
 # Regular expression for finding <name> tag in format templates
 pattern_format_template_name = re.compile(r'''
     <name              #<name tag (no matter case)
     \s*                #any number of white spaces
     >                  #closing <name> start tag
     (?P<name>.*?)      #name value. any char that is not end tag
     (</name\s*>)(\n)?  #end tag
     ''', re.IGNORECASE | re.DOTALL | re.VERBOSE)
 
 # Regular expression for finding <description> tag in format templates
 pattern_format_template_desc = re.compile(r'''
     <description           #<decription tag (no matter case)
     \s*                    #any number of white spaces
     >                      #closing <description> start tag
     (?P<desc>.*?)          #description value. any char that is not end tag
     </description\s*>(\n)? #end tag
     ''', re.IGNORECASE | re.DOTALL | re.VERBOSE)
 
 # Regular expression for finding <BFE_ > tags in format templates
 pattern_tag = re.compile(r'''
     <BFE_                        #every special tag starts with <BFE_ (no matter case)
     (?P<function_name>[^/\s]+)   #any char but a space or slash
     \s*                          #any number of spaces
     (?P<params>(\s*              #params here
      (?P<param>([^=\s])*)\s*     #param name: any chars that is not a white space or equality. Followed by space(s)
      =\s*                        #equality: = followed by any number of spaces
      (?P<sep>[\'"])              #one of the separators
      (?P<value>.*?)              #param value: any chars that is not a separator like previous one
      (?P=sep)                    #same separator as starting one
     )*)                          #many params
     \s*                          #any number of spaces
     (/)?>                        #end of the tag
     ''', re.IGNORECASE | re.DOTALL | re.VERBOSE)
 
 # Regular expression for finding params inside <BFE_ > tags in format templates
 pattern_function_params = re.compile('''
     (?P<param>([^=\s])*)\s*  # Param name: any chars that is not a white space or equality. Followed by space(s)
     =\s*                     # Equality: = followed by any number of spaces
     (?P<sep>[\'"])           # One of the separators
     (?P<value>.*?)           # Param value: any chars that is not a separator like previous one
     (?P=sep)                 # Same separator as starting one
     ''', re.VERBOSE | re.DOTALL )
 
 # Regular expression for finding format elements "params" attributes
 # (defined by @param)
 pattern_format_element_params = re.compile('''
     @param\s*                          # Begins with @param keyword followed by space(s)
     (?P<name>[^\s=]*)\s*               # A single keyword, and then space(s)
     #(=\s*(?P<sep>[\'"])               # Equality, space(s) and then one of the separators
     #(?P<default>.*?)                  # Default value: any chars that is not a separator like previous one
     #(?P=sep)                          # Same separator as starting one
     #)?\s*                             # Default value for param is optional. Followed by space(s)
     (?P<desc>.*)                       # Any text that is not end of line (thanks to MULTILINE parameter)
     ''', re.VERBOSE | re.MULTILINE)
 
 # Regular expression for finding format elements "see also" attribute
 # (defined by @see)
 pattern_format_element_seealso = re.compile('''@see\s*(?P<see>.*)''',
                                             re.VERBOSE | re.MULTILINE)
 
 #Regular expression for finding 2 expressions in quotes, separated by
 #comma (as in template("1st","2nd") )
 #Used when parsing output formats
 ## pattern_parse_tuple_in_quotes = re.compile('''
 ##      (?P<sep1>[\'"])
 ##      (?P<val1>.*)
 ##      (?P=sep1)
 ##      \s*,\s*
 ##      (?P<sep2>[\'"])
 ##      (?P<val2>.*)
 ##      (?P=sep2)
 ##      ''', re.VERBOSE | re.MULTILINE)
 
 def call_old_bibformat(recID, format="HD", on_the_fly=False, verbose=0):
     """
     FIXME: REMOVE FUNCTION WHEN MIGRATION IS DONE
     Calls BibFormat for the record RECID in the desired output format FORMAT.
 
     @param on_the_fly if False, try to return an already preformatted version of the record in the database
 
     Note: this functions always try to return HTML, so when
     bibformat returns XML with embedded HTML format inside the tag
     FMT $g, as is suitable for prestoring output formats, we
     perform un-XML-izing here in order to return HTML body only.
     """
     out = ""
     res = []
     if not on_the_fly:
         # look for formatted record existence:
         query = "SELECT value, last_updated FROM bibfmt WHERE "\
                 "id_bibrec='%s' AND format='%s'" % (recID, format)
         res = run_sql(query, None, 1)
     if res:
         # record 'recID' is formatted in 'format', so print it
         if verbose == 9:
             last_updated = res[0][1]
             out += """\n<br/><span class="quicknote">
             Found preformatted output for record %i (cache updated on %s).
             </span>""" % (recID, last_updated)
         decompress = zlib.decompress
         return "%s" % decompress(res[0][0])
     else:
         # record 'recID' is not formatted in 'format',
         # so try to call BibFormat on the fly or use default format:
         if verbose == 9:
             out += """\n<br/><span class="quicknote">
             Formatting record %i on-the-fly with old BibFormat.
             </span><br/>""" % recID
 
         # Retrieve MARCXML
         # Build it on-the-fly only if 'call_old_bibformat' was called
         # with format=xm and on_the_fly=True
         xm_record = record_get_xml(recID, 'xm',
                                    on_the_fly=(on_the_fly and format == 'xm'))
 
 ##         import platform
 ##         # Some problem have been found using either popen or os.system command.
 ##         # Here is a temporary workaround until the issue is solved.
 ##         if platform.python_compiler().find('Red Hat') > -1:
 ##             # use os.system
         (result_code, result_path) = tempfile.mkstemp()
         command = "( %s/bibformat otype=%s )  > %s" % (CFG_BINDIR, format, result_path)
         (xm_code, xm_path) = tempfile.mkstemp()
         xm_file = open(xm_path, "w")
         xm_file.write(xm_record)
         xm_file.close()
         command = command + " <" + xm_path
         os.system(command)
         result_file = open(result_path,"r")
         bibformat_output = result_file.read()
         result_file.close()
         os.close(result_code)
         os.remove(result_path)
         os.close(xm_code)
         os.remove(xm_path)
 ##         else:
 ##             # use popen
 ##         pipe_input, pipe_output, pipe_error = os.popen3(["%s/bibformat" % CFG_BINDIR,
 ##                                                          "otype=%s" % format],
 ##                                                         'rw')
 ##         pipe_input.write(xm_record)
 ##         pipe_input.flush()
 ##         pipe_input.close()
 ##         bibformat_output = pipe_output.read()
 ##         pipe_output.close()
 ##         pipe_error.close()
 
         if bibformat_output.startswith("<record>"):
             dom = minidom.parseString(bibformat_output)
             for e in dom.getElementsByTagName('subfield'):
                 if e.getAttribute('code') == 'g':
                     for t in e.childNodes:
                         out += t.data.encode('utf-8')
         else:
             out += bibformat_output
         return out
 
 def format_record(recID, of, ln=CFG_SITE_LANG, verbose=0,
                   search_pattern=None, xml_record=None, user_info=None):
     """
     Formats a record given output format. Main entry function of
     bibformat engine.
 
     Returns a formatted version of the record in the specified
     language, search pattern, and with the specified output format.
     The function will define which format template must be applied.
 
     You can either specify an record ID to format, or give its xml
     representation.  if 'xml_record' is not None, then use it instead
     of recID.
 
     'user_info' allows to grant access to some functionalities on a
     page depending on the user's priviledges. 'user_info' is the same
     object as the one returned by 'webuser.collect_user_info(req)'
 
     @param recID the ID of record to format
     @param of an output format code (or short identifier for the output format)
     @param ln the language to use to format the record
     @param verbose the level of verbosity from 0 to 9 (O: silent,
                                                        5: errors,
                                                        7: errors and warnings, stop if error in format elements
                                                        9: errors and warnings, stop if error (debug mode ))
     @param search_pattern list of strings representing the user request in web interface
     @param xml_record an xml string representing the record to format
     @param user_info the information of the user who will view the formatted page
     @return formatted record
     """
     if search_pattern is None:
         search_pattern = []
 
     out = ""
     errors_ = []
     # Temporary workflow (during migration of formats):
     # Call new BibFormat
     # But if format not found for new BibFormat, then call old BibFormat
 
     #Create a BibFormat Object to pass that contain record and context
     bfo = BibFormatObject(recID, ln, search_pattern, xml_record, user_info, of)
 
     if of.lower() != 'xm' and len(bfo.get_record()) <= 1:
         # Record only has recid: do not format, excepted
         # for xm format
         return ""
 
     #Find out which format template to use based on record and output format.
     template = decide_format_template(bfo, of)
     if verbose == 9 and template is not None:
         out += """\n<br/><span class="quicknote">
         Using %s template for record %i.
         </span>""" % (template, recID)
 
     ############### FIXME: REMOVE WHEN MIGRATION IS DONE ###############
     path = "%s%s%s" % (CFG_BIBFORMAT_TEMPLATES_PATH, os.sep, template)
     if template is None or not os.access(path, os.R_OK):
         # template not found in new BibFormat. Call old one
         if verbose == 9:
             if template is None:
                 out += """\n<br/><span class="quicknote">
                 No template found for output format %s and record %i.
                 (Check invenio.err log file for more details)
                 </span>""" % (of, recID)
             else:
                 out += """\n<br/><span class="quicknote">
                 Template %s could not be read.
                 </span>""" % (template)
         if CFG_PATH_PHP:
             if verbose == 9:
                 out += """\n<br/><span class="quicknote">
                 Using old BibFormat for record %s.
                 </span>""" % recID
             return out + call_old_bibformat(recID, format=of, on_the_fly=True, verbose=verbose)
     ############################# END ##################################
 
         error = get_msgs_for_code_list([("ERR_BIBFORMAT_NO_TEMPLATE_FOUND", of)],
                                        stream='error', ln=CFG_SITE_LANG)
         errors_.append(error)
         if verbose == 0:
             register_errors(error, 'error')
         elif verbose > 5:
             return out + error[0][1]
         return out
 
     # Format with template
     (out_, errors) = format_with_format_template(template, bfo, verbose)
     errors_.extend(errors)
 
     out += out_
 
     return out
 
 def decide_format_template(bfo, of):
     """
     Returns the format template name that should be used for formatting
     given output format and BibFormatObject.
 
     Look at of rules, and take the first matching one.
     If no rule matches, returns None
 
     To match we ignore lettercase and spaces before and after value of
     rule and value of record
 
     @param bfo a BibFormatObject
     @param of the code of the output format to use
     """
 
     output_format = get_output_format(of)
 
     for rule in output_format['rules']:
         value = bfo.field(rule['field']).strip()#Remove spaces
         pattern = rule['value'].strip() #Remove spaces
         match_obj = re.match(pattern, value, re.IGNORECASE)
         if match_obj is not None and \
                match_obj.start() == 0 and match_obj.end() == len(value):
             return rule['template']
 
     template = output_format['default']
     if template != '':
         return template
     else:
         return None
 
 def format_with_format_template(format_template_filename, bfo,
                                 verbose=0, format_template_code=None):
     """ Format a record given a
     format template. Also returns errors
 
     Returns a formatted version of the record represented by bfo,
     in the language specified in bfo, and with the specified format template.
 
     If format_template_code is provided, the template will not be loaded from
     format_template_filename (but format_template_filename will still be used to
     determine if bft or xsl transformation applies). This allows to preview format
     code without having to save file on disk.
 
     @param format_template_filename the dilename of a format template
     @param bfo the object containing parameters for the current formatting
     @param format_template_code if not empty, use code as template instead of reading format_template_filename (used for previews)
     @param verbose the level of verbosity from 0 to 9 (O: silent,
                                                        5: errors,
                                                        7: errors and warnings,
                                                        9: errors and warnings, stop if error (debug mode ))
     @return tuple (formatted text, errors)
     """
     _ = gettext_set_language(bfo.lang)
 
     def translate(match):
         """
         Translate matching values
         """
         word = match.group("word")
         translated_word = _(word)
         return translated_word
 
     errors_ = []
     if format_template_code is not None:
         format_content = str(format_template_code)
     else:
         format_content = get_format_template(format_template_filename)['code']
 
     if format_template_filename is None or \
            format_template_filename.endswith("."+CFG_BIBFORMAT_FORMAT_TEMPLATE_EXTENSION):
         # .bft
         filtered_format = filter_languages(format_content, bfo.lang)
         localized_format = translation_pattern.sub(translate, filtered_format)
 
         (evaluated_format, errors) = eval_format_template_elements(localized_format,
                                                                    bfo,
                                                                    verbose)
         errors_ = errors
     else:
         #.xsl
         # Fetch MARCXML. On-the-fly xm if we are now formatting in xm
         xml_record = '<?xml version="1.0" encoding="UTF-8"?>\n' + \
                      record_get_xml(bfo.recID, 'xm', on_the_fly=False)
 
         # Transform MARCXML using stylesheet
         evaluated_format = format(xml_record, template_source=format_content)
 
     return (evaluated_format, errors_)
 
 
 def eval_format_template_elements(format_template, bfo, verbose=0):
     """
     Evalutes the format elements of the given template and replace each element with its value.
     Also returns errors.
 
     Prepare the format template content so that we can directly replace the marc code by their value.
     This implies: 1) Look for special tags
                   2) replace special tags by their evaluation
 
     @param format_template the format template code
     @param bfo the object containing parameters for the current formatting
     @param verbose the level of verbosity from 0 to 9 (O: silent,
                                                        5: errors,
                                                        7: errors and warnings,
                                                        9: errors and warnings, stop if error (debug mode ))
     @return tuple (result, errors)
     """
     errors_ = []
 
     # First define insert_element_code(match), used in re.sub() function
     def insert_element_code(match):
         """
         Analyses 'match', interpret the corresponding code, and return the result of the evaluation.
 
         Called by substitution in 'eval_format_template_elements(...)'
 
         @param match a match object corresponding to the special tag that must be interpreted
         """
 
         function_name = match.group("function_name")
         try:
             format_element = get_format_element(function_name, verbose)
         except Exception, e:
             if verbose >= 5:
                 return '<b><span style="color: rgb(255, 0, 0);">' + \
                        cgi.escape(str(e)).replace('\n', '<br/>') + \
                        '</span>'
         if format_element is None:
             error = get_msgs_for_code_list([("ERR_BIBFORMAT_CANNOT_RESOLVE_ELEMENT_NAME", function_name)],
                                            stream='error', ln=CFG_SITE_LANG)
             errors_.append(error)
             if verbose >= 5:
                 return '<b><span style="color: rgb(255, 0, 0);">' + \
                        error[0][1]+'</span></b>'
         else:
             params = {}
             # Look for function parameters given in format template code
             all_params = match.group('params')
             if all_params is not None:
                 function_params_iterator = pattern_function_params.finditer(all_params)
                 for param_match in function_params_iterator:
                     name = param_match.group('param')
                     value = param_match.group('value')
                     params[name] = value
 
             # Evaluate element with params and return (Do not return errors)
             (result, errors) = eval_format_element(format_element,
                                                    bfo,
                                                    params,
                                                    verbose)
             errors_.append(errors)
             return result
 
 
     # Substitute special tags in the format by our own text.
     # Special tags have the form <BNE_format_element_name [param="value"]* />
     format = pattern_tag.sub(insert_element_code, format_template)
 
     return (format, errors_)
 
 
 def eval_format_element(format_element, bfo, parameters={}, verbose=0):
     """
     Returns the result of the evaluation of the given format element
     name, with given BibFormatObject and parameters. Also returns
     the errors of the evaluation.
 
     @param format_element a format element structure as returned by get_format_element
     @param bfo a BibFormatObject used for formatting
     @param parameters a dict of parameters to be used for formatting. Key is parameter and value is value of parameter
     @param verbose the level of verbosity from 0 to 9 (O: silent,
                                                        5: errors,
                                                        7: errors and warnings,
                                                        9: errors and warnings, stop if error (debug mode ))
 
     @return tuple (result, errors)
     """
 
     errors = []
     #Load special values given as parameters
     prefix = parameters.get('prefix', "")
     suffix = parameters.get('suffix', "")
     default_value = parameters.get('default', "")
     escape = parameters.get('escape', "")
     output_text = ''
 
     # 3 possible cases:
     # a) format element file is found: we execute it
     # b) format element file is not found, but exist in tag table (e.g. bfe_isbn)
     # c) format element is totally unknown. Do nothing or report error
 
     if format_element is not None and format_element['type'] == "python":
         # a) We found an element with the tag name, of type "python"
         # Prepare a dict 'params' to pass as parameter to 'format'
         # function of element
         params = {}
 
         # Look for parameters defined in format element
         # Fill them with specified default values and values
         # given as parameters
         for param in format_element['attrs']['params']:
             name = param['name']
             default = param['default']
             params[name] = parameters.get(name, default)
 
         # Add BibFormatObject
         params['bfo'] = bfo
 
         # Execute function with given parameters and return result.
         function = format_element['code']
 
         try:
             output_text = apply(function, (), params)
         except Exception, e:
             name = format_element['attrs']['name']
             error = ("ERR_BIBFORMAT_EVALUATING_ELEMENT", name, str(params))
             errors.append(error)
             if verbose == 0:
                 register_errors(errors, 'error')
             elif verbose >= 5:
                 tb = sys.exc_info()[2]
                 error_string = get_msgs_for_code_list(error,
                                                       stream='error',
                                                       ln=CFG_SITE_LANG)
                 stack = traceback.format_exception(Exception, e, tb, limit=None)
                 output_text = '<b><span style="color: rgb(255, 0, 0);">'+ \
                               str(error_string[0][1]) + "".join(stack) +'</span></b> '
 
         # None can be returned when evaluating function
         if output_text is None:
             output_text = ""
         else:
             output_text = str(output_text)
 
         # Escaping:
         # (1) By default, everything is escaped in mode 1
         # (2) If evaluated element has 'escape_values()' function, use
         #     its returned value as escape mode, and override (1)
         # (3) If template has a defined parameter (in allowed values),
         #     use it, and override (1) and (2)
 
         # (1)
         escape_mode = 1
 
         # (2)
         escape_function = format_element['escape_function']
         if escape_function is not None:
             try:
                 escape_mode = apply(escape_function, (), {'bfo': bfo})
             except Exception, e:
                 error = ("ERR_BIBFORMAT_EVALUATING_ELEMENT_ESCAPE", name)
                 errors.append(error)
                 if verbose == 0:
                     register_errors(errors, 'error')
                 elif verbose >= 5:
                     tb = sys.exc_info()[2]
                     error_string = get_msgs_for_code_list(error,
                                                           stream='error',
                                                           ln=CFG_SITE_LANG)
                     output_text += '<b><span style="color: rgb(255, 0, 0);">'+ \
                                    str(error_string[0][1]) +'</span></b> '
         # (3)
         if escape in ['0', '1', '2', '3', '4', '5', '6']:
             escape_mode = int(escape)
 
         #If escape is equal to 1, then escape all
         # HTML reserved chars.
         if escape_mode > 0:
             output_text = escape_field(output_text, mode=escape_mode)
 
         # Add prefix and suffix if they have been given as parameters and if
         # the evaluation of element is not empty
         if output_text.strip() != "":
             output_text = prefix + output_text + suffix
 
         # Add the default value if output_text is empty
         if output_text == "":
             output_text = default_value
 
         return (output_text, errors)
 
     elif format_element is not None and format_element['type'] == "field":
         # b) We have not found an element in files that has the tag
         # name. Then look for it in the table "tag"
         #
         # <BFE_LABEL_IN_TAG prefix = "" suffix = "" separator = ""
         #                   nbMax="" escape="0"/>
         #
 
         # Load special values given as parameters
         separator = parameters.get('separator ', "")
         nbMax = parameters.get('nbMax', "")
         escape = parameters.get('escape', "1") # By default, escape here
 
         # Get the fields tags that have to be printed
         tags = format_element['attrs']['tags']
 
         output_text = []
 
         # Get values corresponding to tags
         for tag in tags:
             p_tag = parse_tag(tag)
             values = record_get_field_values(bfo.get_record(),
                                              p_tag[0],
                                              p_tag[1],
                                              p_tag[2],
                                              p_tag[3])
             if len(values)>0 and isinstance(values[0], dict):
                 #flatten dict to its values only
                 values_list = map(lambda x: x.values(), values)
                 #output_text.extend(values)
                 for values in values_list:
                     output_text.extend(values)
             else:
                 output_text.extend(values)
 
         if nbMax != "":
             try:
                 nbMax = int(nbMax)
                 output_text = output_text[:nbMax]
             except:
                 name = format_element['attrs']['name']
                 error = ("ERR_BIBFORMAT_NBMAX_NOT_INT", name)
                 errors.append(error)
                 if verbose < 5:
                     register_errors(error, 'error')
                 elif verbose >= 5:
                     error_string = get_msgs_for_code_list(error,
                                                           stream='error',
                                                           ln=CFG_SITE_LANG)
                     output_text = output_text.append(error_string[0][1])
 
 
 
         # Add prefix and suffix if they have been given as parameters and if
         # the evaluation of element is not empty.
         # If evaluation is empty string, return default value if it exists.
         # Else return empty string
         if ("".join(output_text)).strip() != "":
             # If escape is equal to 1, then escape all
             # HTML reserved chars.
             if escape == '1':
                 output_text = cgi.escape(separator.join(output_text))
             else:
                 output_text = separator.join(output_text)
 
             output_text = prefix + output_text + suffix
         else:
             #Return default value
             output_text = default_value
 
         return (output_text, errors)
     else:
         # c) Element is unknown
         error = get_msgs_for_code_list([("ERR_BIBFORMAT_CANNOT_RESOLVE_ELEMENT_NAME", format_element)],
                                        stream='error', ln=CFG_SITE_LANG)
         errors.append(error)
         if verbose < 5:
             register_errors(error, 'error')
             return ("", errors)
         elif verbose >= 5:
             if verbose >= 9:
                 sys.exit(error[0][1])
             return ('<b><span style="color: rgb(255, 0, 0);">' + \
                     error[0][1]+'</span></b>', errors)
 
 
 def filter_languages(format_template, ln='en'):
     """
     Filters the language tags that do not correspond to the specified language.
 
     @param format_template the format template code
     @param ln the language that is NOT filtered out from the template
     @return the format template with unnecessary languages filtered out
     """
     # First define search_lang_tag(match) and clean_language_tag(match), used
     # in re.sub() function
     def search_lang_tag(match):
         """
         Searches for the <lang>...</lang> tag and remove inner localized tags
         such as <en>, <fr>, that are not current_lang.
 
         If current_lang cannot be found inside <lang> ... </lang>, try to use 'CFG_SITE_LANG'
 
         @param match a match object corresponding to the special tag that must be interpreted
         """
         current_lang = ln
         def clean_language_tag(match):
             """
             Return tag text content if tag language of match is output language.
 
             Called by substitution in 'filter_languages(...)'
 
             @param match a match object corresponding to the special tag that must be interpreted
             """
             if match.group(1) == current_lang:
                 return match.group(2)
             else:
                 return ""
             # End of clean_language_tag
 
 
         lang_tag_content = match.group("langs")
         # Try to find tag with current lang. If it does not exists,
         # then current_lang becomes CFG_SITE_LANG until the end of this
         # replace
         pattern_current_lang = re.compile(r"<("+current_lang+ \
                                           r")\s*>(.*?)(</"+current_lang+r"\s*>)", re.IGNORECASE | re.DOTALL)
         if re.search(pattern_current_lang, lang_tag_content) is None:
             current_lang = CFG_SITE_LANG
 
         cleaned_lang_tag = ln_pattern.sub(clean_language_tag, lang_tag_content)
         return cleaned_lang_tag
         # End of search_lang_tag
 
 
     filtered_format_template = pattern_lang.sub(search_lang_tag, format_template)
     return filtered_format_template
 
 def get_format_template(filename, with_attributes=False):
     """
     Returns the structured content of the given formate template.
 
     if 'with_attributes' is true, returns the name and description. Else 'attrs' is not
     returned as key in dictionary (it might, if it has already been loaded previously)
 
     {'code':"<b>Some template code</b>"
      'attrs': {'name': "a name", 'description': "a description"}
     }
 
     @param filename the filename of an format template
     @param with_attributes if True, fetch the attributes (names and description) for format'
     @return strucured content of format template
     """
     # Get from cache whenever possible
     global format_templates_cache
 
     if not filename.endswith("."+CFG_BIBFORMAT_FORMAT_TEMPLATE_EXTENSION) and \
            not filename.endswith(".xsl"):
         return None
 
     if format_templates_cache.has_key(filename):
         # If we must return with attributes and template exist in
         # cache with attributes then return cache.
         # Else reload with attributes
         if with_attributes and \
                format_templates_cache[filename].has_key('attrs'):
             return format_templates_cache[filename]
 
     format_template = {'code':""}
     try:
 
         path = "%s%s%s" % (CFG_BIBFORMAT_TEMPLATES_PATH, os.sep, filename)
 
         format_file = open(path)
         format_content = format_file.read()
         format_file.close()
 
         # Load format template code
         # Remove name and description
         if filename.endswith("."+CFG_BIBFORMAT_FORMAT_TEMPLATE_EXTENSION):
             code_and_description = pattern_format_template_name.sub("",
                                                                     format_content)
             code = pattern_format_template_desc.sub("", code_and_description)
         else:
             code = format_content
 
         format_template['code'] = code
 
     except Exception, e:
         errors = get_msgs_for_code_list([("ERR_BIBFORMAT_CANNOT_READ_TEMPLATE_FILE", filename, str(e))],
                                         stream='error', ln=CFG_SITE_LANG)
         register_errors(errors, 'error')
 
     # Save attributes if necessary
     if with_attributes:
         format_template['attrs'] = get_format_template_attrs(filename)
 
     # Cache and return
     format_templates_cache[filename] = format_template
     return format_template
 
 
 def get_format_templates(with_attributes=False):
     """
     Returns the list of all format templates, as dictionary with filenames as keys
 
     if 'with_attributes' is true, returns the name and description. Else 'attrs' is not
     returned as key in each dictionary (it might, if it has already been loaded previously)
 
     [{'code':"<b>Some template code</b>"
       'attrs': {'name': "a name", 'description': "a description"}
      },
     ...
     }
     @param with_attributes if True, fetch the attributes (names and description) for formats
     """
     format_templates = {}
     files = os.listdir(CFG_BIBFORMAT_TEMPLATES_PATH)
 
     for filename in files:
         if filename.endswith("."+CFG_BIBFORMAT_FORMAT_TEMPLATE_EXTENSION) or \
                filename.endswith(".xsl"):
             format_templates[filename] = get_format_template(filename,
                                                              with_attributes)
 
     return format_templates
 
 def get_format_template_attrs(filename):
     """
     Returns the attributes of the format template with given filename
 
     The attributes are {'name', 'description'}
     Caution: the function does not check that path exists or
     that the format element is valid.
     @param the path to a format element
     """
     attrs = {}
     attrs['name'] = ""
     attrs['description'] = ""
     try:
         template_file = open("%s%s%s" % (CFG_BIBFORMAT_TEMPLATES_PATH,
                                          os.sep,
                                          filename))
         code = template_file.read()
         template_file.close()
 
         match = None
         if filename.endswith(".xsl"):
             # .xsl
             attrs['name'] = filename[:-4]
         else:
             # .bft
             match = pattern_format_template_name.search(code)
             if match is not None:
                 attrs['name'] = match.group('name')
             else:
                 attrs['name'] = filename
 
 
             match = pattern_format_template_desc.search(code)
             if match is not None:
                 attrs['description'] = match.group('desc').rstrip('.')
     except Exception, e:
         errors = get_msgs_for_code_list([("ERR_BIBFORMAT_CANNOT_READ_TEMPLATE_FILE",
                                           filename, str(e))],
                                         stream='error', ln=CFG_SITE_LANG)
         register_errors(errors, 'error')
         attrs['name'] = filename
 
     return attrs
 
 
 def get_format_element(element_name, verbose=0, with_built_in_params=False):
     """
     Returns the format element structured content.
 
     Return None if element cannot be loaded (file not found, not readable or
     invalid)
 
     The returned structure is {'attrs': {some attributes in dict. See get_format_element_attrs_from_*}
                                'code': the_function_code,
                                'type':"field" or "python" depending if element is defined in file or table,
                                'escape_function': the function to call to know if element output must be escaped}
 
     @param element_name the name of the format element to load
     @param verbose the level of verbosity from 0 to 9 (O: silent,
                                                        5: errors,
                                                        7: errors and warnings,
                                                        9: errors and warnings, stop if error (debug mode ))
     @param with_built_in_params if True, load the parameters built in all elements
     @return a dictionary with format element attributes
     """
     # Get from cache whenever possible
     global format_elements_cache
 
     errors = []
 
     # Resolve filename and prepare 'name' as key for the cache
     filename = resolve_format_element_filename(element_name)
     if filename is not None:
         name = filename.upper()
     else:
         name = element_name.upper()
 
     if format_elements_cache.has_key(name):
         element = format_elements_cache[name]
         if not with_built_in_params or \
                (with_built_in_params and \
                 element['attrs'].has_key('builtin_params')):
             return element
 
     if filename is None:
         # Element is maybe in tag table
         if bibformat_dblayer.tag_exists_for_name(element_name):
             format_element = {'attrs': get_format_element_attrs_from_table( \
                 element_name,
                 with_built_in_params),
                               'code':None,
                               'escape_function':None,
                               'type':"field"}
             # Cache and returns
             format_elements_cache[name] = format_element
             return format_element
 
         else:
             errors = get_msgs_for_code_list([("ERR_BIBFORMAT_FORMAT_ELEMENT_NOT_FOUND",
                                               element_name)],
                                             stream='error', ln=CFG_SITE_LANG)
             if verbose == 0:
                 register_errors(errors, 'error')
             elif verbose >= 5:
                 sys.stderr.write(errors[0][1])
             return None
 
     else:
         format_element = {}
 
         module_name = filename
         if module_name.endswith(".py"):
             module_name = module_name[:-3]
 
         # Load element
         try:
             module = __import__(CFG_BIBFORMAT_ELEMENTS_IMPORT_PATH + \
                                 "." + module_name)
             # Load last module in import path
             # For eg. load bfe_name in
             # invenio.bibformat_elements.bfe_name
             # Used to keep flexibility regarding where elements
             # directory is (for eg. test cases)
             components = CFG_BIBFORMAT_ELEMENTS_IMPORT_PATH.split(".")
             for comp in components[1:]:
                 module = getattr(module, comp)
 
         except Exception, e:
             # We catch all exceptions here, as we just want to print
             # traceback in all cases
             tb = sys.exc_info()[2]
             stack = traceback.format_exception(Exception, e, tb, limit=None)
             errors = get_msgs_for_code_list([("ERR_BIBFORMAT_IN_FORMAT_ELEMENT",
                                               element_name,"\n" + "\n".join(stack[-2:-1]))],
                                             stream='error', ln=CFG_SITE_LANG)
             if verbose == 0:
                 register_errors(errors, 'error')
             elif verbose >= 5:
                 sys.stderr.write(errors[0][1])
 
         if errors:
             if verbose >= 7:
                 raise Exception, errors[0][1]
             return None
 
         # Load function 'format()' inside element
         try:
             function_format  = module.__dict__[module_name].format
             format_element['code'] = function_format
         except AttributeError, e:
             errors = get_msgs_for_code_list([("ERR_BIBFORMAT_FORMAT_ELEMENT_FORMAT_FUNCTION",
                                               element_name)],
                                             stream='error', ln=CFG_SITE_LANG)
             if verbose == 0:
                 register_errors(errors, 'error')
             elif verbose >= 5:
                 sys.stderr.write(errors[0][1])
 
         if errors:
             if verbose >= 7:
                 raise Exception, errors[0][1]
             return None
 
         # Load function 'escape_values()' inside element
         function_escape  = getattr(module.__dict__[module_name],
                                    'escape_values',
                                    None)
         format_element['escape_function'] = function_escape
 
         # Prepare, cache and return
         format_element['attrs'] = get_format_element_attrs_from_function( \
                 function_format,
                 element_name,
                 with_built_in_params)
         format_element['type'] = "python"
         format_elements_cache[name] = format_element
         return format_element
 
 def get_format_elements(with_built_in_params=False):
     """
     Returns the list of format elements attributes as dictionary structure
 
     Elements declared in files have priority over element declared in 'tag' table
     The returned object has this format:
     {element_name1: {'attrs': {'description':..., 'seealso':...
                                'params':[{'name':..., 'default':..., 'description':...}, ...]
                                'builtin_params':[{'name':..., 'default':..., 'description':...}, ...]
                               },
                      'code': code_of_the_element
                     },
      element_name2: {...},
      ...}
 
      Returns only elements that could be loaded (not error in code)
 
     @return a dict of format elements with name as key, and a dict as attributes
     @param with_built_in_params if True, load the parameters built in all elements
     """
     format_elements = {}
 
     mappings = bibformat_dblayer.get_all_name_tag_mappings()
 
     for name in mappings:
         format_elements[name.upper().replace(" ", "_").strip()] = get_format_element(name, with_built_in_params=with_built_in_params)
 
     files = os.listdir(CFG_BIBFORMAT_ELEMENTS_PATH)
     for filename in files:
         filename_test = filename.upper().replace(" ", "_")
         if filename_test.endswith(".PY") and filename.upper() != "__INIT__.PY":
             if filename_test.startswith("BFE_"):
                 filename_test = filename_test[4:]
             element_name = filename_test[:-3]
             element = get_format_element(element_name,
                                          with_built_in_params=with_built_in_params)
             if element is not None:
                 format_elements[element_name] = element
 
     return format_elements
 
 def get_format_element_attrs_from_function(function, element_name,
                                            with_built_in_params=False):
     """ Returns the attributes of the
     function given as parameter.
 
     It looks for standard parameters of the function, default
     values and comments in the docstring.
     The attributes are {'description', 'seealso':['element.py', ...],
     'params':{name:{'name', 'default', 'description'}, ...], name2:{}}
 
     The attributes are {'name' : "name of element" #basically the name of 'name' parameter
                         'description': "a string description of the element",
                         'seealso' : ["element_1.py", "element_2.py", ...] #a list of related elements
                         'params': [{'name':"param_name",   #a list of parameters for this element (except 'bfo')
                                     'default':"default value",
                                     'description': "a description"}, ...],
                         'builtin_params': {name: {'name':"param_name",#the parameters builtin for all elem of this kind
                                             'default':"default value",
                                             'description': "a description"}, ...},
                        }
     @param function the formatting function of a format element
     @param element_name the name of the element
     @param with_built_in_params if True, load the parameters built in all elements
     """
 
     attrs = {}
     attrs['description'] = ""
     attrs['name'] = element_name.replace(" ", "_").upper()
     attrs['seealso'] = []
 
     docstring = function.__doc__
     if isinstance(docstring, str):
         # Look for function description in docstring
         #match = pattern_format_element_desc.search(docstring)
         description = docstring.split("@param")[0]
         description = description.split("@see")[0]
         attrs['description'] = description.strip().rstrip('.')
 
         # Look for @see in docstring
         match = pattern_format_element_seealso.search(docstring)
         if match is not None:
             elements = match.group('see').rstrip('.').split(",")
             for element in elements:
                 attrs['seealso'].append(element.strip())
 
     params = {}
     # Look for parameters in function definition
     (args, varargs, varkw, defaults) = inspect.getargspec(function)
 
     # Prepare args and defaults_list such that we can have a mapping
     # from args to defaults
     args.reverse()
     if defaults is not None:
         defaults_list = list(defaults)
         defaults_list.reverse()
     else:
         defaults_list = []
 
     for arg, default in map(None, args, defaults_list):
         if arg == "bfo":
             #Don't keep this as parameter. It is hidden to users, and
             #exists in all elements of this kind
             continue
         param = {}
         param['name'] = arg
         if default is None:
             #In case no check is made inside element, we prefer to
             #print "" (nothing) than None in output
             param['default'] = ""
         else:
             param['default'] = default
         param['description'] = "(no description provided)"
 
         params[arg] = param
 
     if isinstance(docstring, str):
         # Look for @param descriptions in docstring.
         # Add description to existing parameters in params dict
         params_iterator = pattern_format_element_params.finditer(docstring)
         for match in params_iterator:
             name = match.group('name')
             if params.has_key(name):
                 params[name]['description'] = match.group('desc').rstrip('.')
 
     attrs['params'] = params.values()
 
     # Load built-in parameters if necessary
     if with_built_in_params:
 
         builtin_params = []
         # Add 'prefix' parameter
         param_prefix = {}
         param_prefix['name'] = "prefix"
         param_prefix['default'] = ""
         param_prefix['description'] = """A prefix printed only if the
                                          record has a value for this element"""
         builtin_params.append(param_prefix)
 
         # Add 'suffix' parameter
         param_suffix = {}
         param_suffix['name'] = "suffix"
         param_suffix['default'] = ""
         param_suffix['description'] = """A suffix printed only if the
                                          record has a value for this element"""
         builtin_params.append(param_suffix)
 
         # Add 'default' parameter
         param_default = {}
         param_default['name'] = "default"
         param_default['default'] = ""
         param_default['description'] = """A default value printed if the
                                           record has no value for this element"""
         builtin_params.append(param_default)
 
         # Add 'escape' parameter
         param_escape = {}
         param_escape['name'] = "escape"
         param_escape['default'] = ""
         param_escape['description'] = """0 keeps value as it is. Refers to main
                                          documentation for escaping modes
                                          1 to 6"""
         builtin_params.append(param_escape)
 
         attrs['builtin_params'] = builtin_params
 
     return attrs
 
 def get_format_element_attrs_from_table(element_name,
                                         with_built_in_params=False):
     """
     Returns the attributes of the format element with given name in 'tag' table.
 
     Returns None if element_name does not exist in tag table.
 
     The attributes are {'name' : "name of element" #basically the name of 'element_name' parameter
                         'description': "a string description of the element",
                         'seealso' : [] #a list of related elements. Always empty in this case
                         'params': [],  #a list of parameters for this element. Always empty in this case
                         'builtin_params': [{'name':"param_name", #the parameters builtin for all elem of this kind
                                             'default':"default value",
                                             'description': "a description"}, ...],
                         'tags':["950.1", 203.a] #the list of tags printed by this element
                        }
 
     @param element_name an element name in database
     @param element_name the name of the element
     @param with_built_in_params if True, load the parameters built in all elements
     """
 
     attrs = {}
     tags = bibformat_dblayer.get_tags_from_name(element_name)
     field_label = "field"
     if len(tags)>1:
         field_label = "fields"
 
     attrs['description'] = "Prints %s %s of the record" % (field_label,
                                                            ", ".join(tags))
     attrs['name'] = element_name.replace(" ", "_").upper()
     attrs['seealso'] = []
     attrs['params'] = []
     attrs['tags'] = tags
 
     # Load built-in parameters if necessary
     if with_built_in_params:
         builtin_params = []
 
         # Add 'prefix' parameter
         param_prefix = {}
         param_prefix['name'] = "prefix"
         param_prefix['default'] = ""
         param_prefix['description'] = """A prefix printed only if the
                                        record has a value for this element"""
         builtin_params.append(param_prefix)
 
         # Add 'suffix' parameter
         param_suffix = {}
         param_suffix['name'] = "suffix"
         param_suffix['default'] = ""
         param_suffix['description'] = """A suffix printed only if the
                                          record has a value for this element"""
         builtin_params.append(param_suffix)
 
         # Add 'separator' parameter
         param_separator = {}
         param_separator['name'] = "separator"
         param_separator['default'] = " "
         param_separator['description'] = """A separator between elements of
                                             the field"""
         builtin_params.append(param_separator)
 
         # Add 'nbMax' parameter
         param_nbMax = {}
         param_nbMax['name'] = "nbMax"
         param_nbMax['default'] = ""
         param_nbMax['description'] = """The maximum number of values to
                                       print for this element. No limit if not
                                       specified"""
         builtin_params.append(param_nbMax)
 
         # Add 'default' parameter
         param_default = {}
         param_default['name'] = "default"
         param_default['default'] = ""
         param_default['description'] = """A default value printed if the
                                           record has no value for this element"""
         builtin_params.append(param_default)
 
         # Add 'escape' parameter
         param_escape = {}
         param_escape['name'] = "escape"
         param_escape['default'] = ""
         param_escape['description'] = """If set to 1, replaces special
                                          characters '&', '<' and '>' of this
                                          element by SGML entities"""
         builtin_params.append(param_escape)
 
         attrs['builtin_params'] = builtin_params
 
     return attrs
 
 def get_output_format(code, with_attributes=False, verbose=0):
     """
     Returns the structured content of the given output format
 
     If 'with_attributes' is true, also returns the names and description of the output formats,
     else 'attrs' is not returned in dict (it might, if it has already been loaded previously).
 
     if output format corresponding to 'code' is not found return an empty structure.
 
     See get_output_format_attrs() to learn more on the attributes
 
 
     {'rules': [ {'field': "980__a",
                  'value': "PREPRINT",
                  'template': "filename_a.bft",
                 },
                 {...}
               ],
      'attrs': {'names': {'generic':"a name", 'sn':{'en': "a name", 'fr':"un nom"}, 'ln':{'en':"a long name"}}
                'description': "a description"
                'code': "fnm1",
                'content_type': "application/ms-excel",
                'visibility': 1
               }
      'default':"filename_b.bft"
     }
 
     @param code the code of an output_format
     @param with_attributes if True, fetch the attributes (names and description) for format
     @param verbose the level of verbosity from 0 to 9 (O: silent,
                                                        5: errors,
                                                        7: errors and warnings,
                                                        9: errors and warnings, stop if error (debug mode ))
     @return strucured content of output format
     """
     output_format = {'rules':[], 'default':""}
     filename = resolve_output_format_filename(code, verbose)
 
     if filename is None:
         errors = get_msgs_for_code_list([("ERR_BIBFORMAT_OUTPUT_FORMAT_CODE_UNKNOWN", code)],
                                         stream='error', ln=CFG_SITE_LANG)
         register_errors(errors, 'error')
         if with_attributes: #Create empty attrs if asked for attributes
             output_format['attrs'] = get_output_format_attrs(code, verbose)
         return output_format
 
     # Get from cache whenever possible
     global format_outputs_cache
     if format_outputs_cache.has_key(filename):
         # If was must return with attributes but cache has not
         # attributes, then load attributes
         if with_attributes and not \
                format_outputs_cache[filename].has_key('attrs'):
             format_outputs_cache[filename]['attrs'] = get_output_format_attrs(code, verbose)
 
         return format_outputs_cache[filename]
 
     try:
         if with_attributes:
             output_format['attrs'] = get_output_format_attrs(code, verbose)
 
         path = "%s%s%s" % (CFG_BIBFORMAT_OUTPUTS_PATH, os.sep, filename )
         format_file = open(path)
 
         current_tag = ''
         for line in format_file:
             line = line.strip()
             if line == "":
                 # Ignore blank lines
                 continue
             if line.endswith(":"):
                 # Retrieve tag
 
                 # Remove : spaces and eol at the end of line
                 clean_line = line.rstrip(": \n\r")
                 # The tag starts at second position
                 current_tag = "".join(clean_line.split()[1:]).strip()
             elif line.find('---') != -1:
                 words = line.split('---')
                 template = words[-1].strip()
                 condition = ''.join(words[:-1])
                 value = ""
 
                 output_format['rules'].append({'field': current_tag,
                                                'value': condition,
                                                'template': template,
                                                })
 
             elif line.find(':') != -1:
                 # Default case
                 default = line.split(':')[1].strip()
                 output_format['default'] = default
 
     except Exception, e:
         errors = get_msgs_for_code_list([("ERR_BIBFORMAT_CANNOT_READ_OUTPUT_FILE", filename, str(e))],
                                         stream='error', ln=CFG_SITE_LANG)
         register_errors(errors, 'error')
 
     # Cache and return
     format_outputs_cache[filename] = output_format
     return output_format
 
 def get_output_format_attrs(code, verbose=0):
     """
     Returns the attributes of an output format.
 
     The attributes contain 'code', which is the short identifier of the output format
     (to be given as parameter in format_record function to specify the output format),
     'description', a description of the output format, 'visibility' the visibility of
     the format in the output format list on public pages and 'names', the localized names
     of the output format. If 'content_type' is specified then the search_engine will
     send a file with this content type and with result of formatting as content to the user.
     The 'names' dict always contais 'generic', 'ln' (for long name) and 'sn' (for short names)
     keys. 'generic' is the default name for output format. 'ln' and 'sn' contain long and short
     localized names of the output format. Only the languages for which a localization exist
     are used.
 
     {'names': {'generic':"a name", 'sn':{'en': "a name", 'fr':"un nom"}, 'ln':{'en':"a long name"}}
      'description': "a description"
      'code': "fnm1",
      'content_type': "application/ms-excel",
      'visibility': 1
     }
 
     @param code the short identifier of the format
     @param verbose the level of verbosity from 0 to 9 (O: silent,
                                                        5: errors,
                                                        7: errors and warnings,
                                                        9: errors and warnings, stop if error (debug mode ))
     @return strucured content of output format attributes
     """
     if code.endswith("."+CFG_BIBFORMAT_FORMAT_OUTPUT_EXTENSION):
         code = code[:-(len(CFG_BIBFORMAT_FORMAT_OUTPUT_EXTENSION) + 1)]
     attrs = {'names':{'generic':"",
                       'ln':{},
                       'sn':{}},
              'description':'',
              'code':code.upper(),
              'content_type':"",
              'visibility':1}
 
     filename = resolve_output_format_filename(code, verbose)
     if filename is None:
         return attrs
 
     attrs['names'] = bibformat_dblayer.get_output_format_names(code)
     attrs['description'] = bibformat_dblayer.get_output_format_description(code)
     attrs['content_type'] = bibformat_dblayer.get_output_format_content_type(code)
     attrs['visibility'] = bibformat_dblayer.get_output_format_visibility(code)
 
     return attrs
 
 def get_output_formats(with_attributes=False):
     """
     Returns the list of all output format, as a dictionary with their filename as key
 
     If 'with_attributes' is true, also returns the names and description of the output formats,
     else 'attrs' is not returned in dicts (it might, if it has already been loaded previously).
 
     See get_output_format_attrs() to learn more on the attributes
 
     {'filename_1.bfo': {'rules': [ {'field': "980__a",
                                     'value': "PREPRINT",
                                     'template': "filename_a.bft",
                                    },
                                    {...}
                                  ],
                         'attrs': {'names': {'generic':"a name", 'sn':{'en': "a name", 'fr':"un nom"}, 'ln':{'en':"a long name"}}
                                   'description': "a description"
                                   'code': "fnm1"
                                  }
                         'default':"filename_b.bft"
                        },
 
      'filename_2.bfo': {...},
       ...
     }
     @return the list of output formats
     """
     output_formats = {}
     files = os.listdir(CFG_BIBFORMAT_OUTPUTS_PATH)
 
     for filename in files:
         if filename.endswith("."+CFG_BIBFORMAT_FORMAT_OUTPUT_EXTENSION):
             code = "".join(filename.split(".")[:-1])
             output_formats[filename] = get_output_format(code, with_attributes)
 
     return output_formats
 
 def get_kb_mapping(kb, string, default=""):
     """
     Returns the value of the string' in the knowledge base 'kb'.
 
     If kb does not exist or string does not exist in kb, returns 'default'
     string value.
 
     @param kb a knowledge base name
     @param string a key in a knowledge base
     @param default a default value if 'string' is not in 'kb'
     @return the value corresponding to the given string in given kb
     """
 
     global kb_mappings_cache
 
     if kb_mappings_cache.has_key(kb):
         kb_cache = kb_mappings_cache[kb]
         if kb_cache.has_key(string):
             value = kb_mappings_cache[kb][string]
             if value is None:
                 return default
             else:
                 return value
     else:
         # Precreate for caching this kb
         kb_mappings_cache[kb] = {}
 
     value = bibformat_dblayer.get_kb_mapping_value(kb, string)
 
     kb_mappings_cache[kb][str(string)] = value
     if value is None:
         return default
     else:
         return value
 
 def resolve_format_element_filename(string):
     """
     Returns the filename of element corresponding to string
 
     This is necessary since format templates code call
     elements by ignoring case, for eg. <BFE_AUTHOR> is the
     same as <BFE_author>.
     It is also recommended that format elements filenames are
     prefixed with bfe_ . We need to look for these too.
 
     The name of the element has to start with "BFE_".
 
     @param name a name for a format element
     @return the corresponding filename, with right case
     """
 
     if not string.endswith(".py"):
         name = string.replace(" ", "_").upper() +".PY"
     else:
         name = string.replace(" ", "_").upper()
 
     files = os.listdir(CFG_BIBFORMAT_ELEMENTS_PATH)
     for filename in files:
         test_filename = filename.replace(" ", "_").upper()
 
         if test_filename == name or \
         test_filename == "BFE_" + name or \
         "BFE_" + test_filename == name:
             return filename
 
     # No element with that name found
     # Do not log error, as it might be a normal execution case:
     # element can be in database
     return None
 
 def resolve_output_format_filename(code, verbose=0):
     """
     Returns the filename of output corresponding to code
 
     This is necessary since output formats names are not case sensitive
     but most file systems are.
 
     @param code the code for an output format
     @param verbose the level of verbosity from 0 to 9 (O: silent,
                                                        5: errors,
                                                        7: errors and warnings,
                                                        9: errors and warnings, stop if error (debug mode ))
     @return the corresponding filename, with right case, or None if not found
     """
     #Remove non alphanumeric chars (except . and _)
     code = re.sub(r"[^.0-9a-zA-Z_]", "", code)
     if not code.endswith("."+CFG_BIBFORMAT_FORMAT_OUTPUT_EXTENSION):
         code = re.sub(r"\W", "", code)
         code += "."+CFG_BIBFORMAT_FORMAT_OUTPUT_EXTENSION
 
     files = os.listdir(CFG_BIBFORMAT_OUTPUTS_PATH)
     for filename in files:
         if filename.upper() == code.upper():
             return filename
 
     # No output format with that name found
     errors = get_msgs_for_code_list([("ERR_BIBFORMAT_CANNOT_RESOLVE_OUTPUT_NAME", code)],
                                     stream='error', ln=CFG_SITE_LANG)
     if verbose == 0:
         register_errors(errors, 'error')
     elif verbose >= 5:
         sys.stderr.write(errors[0][1])
         if verbose >= 9:
             sys.exit(errors[0][1])
     return None
 
 def get_fresh_format_template_filename(name):
     """
     Returns a new filename and name for template with given name.
 
     Used when writing a new template to a file, so that the name
     has no space, is unique in template directory
 
     Returns (unique_filename, modified_name)
 
     @param a name for a format template
     @return the corresponding filename, and modified name if necessary
     """
     #name = re.sub(r"\W", "", name) #Remove non alphanumeric chars
     name = name.replace(" ", "_")
     filename = name
     # Remove non alphanumeric chars (except .)
     filename = re.sub(r"[^.0-9a-zA-Z]", "", filename)
     path = CFG_BIBFORMAT_TEMPLATES_PATH + os.sep + filename \
            + "." + CFG_BIBFORMAT_FORMAT_TEMPLATE_EXTENSION
     index = 1
     while os.path.exists(path):
         index += 1
         filename = name + str(index)
         path = CFG_BIBFORMAT_TEMPLATES_PATH + os.sep + filename \
                + "." + CFG_BIBFORMAT_FORMAT_TEMPLATE_EXTENSION
 
     if index > 1:
         returned_name = (name + str(index)).replace("_", " ")
     else:
         returned_name = name.replace("_", " ")
 
     return (filename + "." + CFG_BIBFORMAT_FORMAT_TEMPLATE_EXTENSION,
             returned_name) #filename.replace("_", " "))
 
 def get_fresh_output_format_filename(code):
     """
     Returns a new filename for output format with given code.
 
     Used when writing a new output format to a file, so that the code
     has no space, is unique in output format directory. The filename
     also need to be at most 6 chars long, as the convention is that
     filename == output format code (+ .extension)
     We return an uppercase code
     Returns (unique_filename, modified_code)
 
     @param code the code of an output format
     @return the corresponding filename, and modified code if necessary
     """
     #code = re.sub(r"\W", "", code) #Remove non alphanumeric chars
     code = code.upper().replace(" ", "_")
     # Remove non alphanumeric chars (except . and _)
     code = re.sub(r"[^.0-9a-zA-Z_]", "", code)
     if len(code) > 6:
         code = code[:6]
 
     filename = code
     path = CFG_BIBFORMAT_OUTPUTS_PATH + os.sep + filename \
            + "." + CFG_BIBFORMAT_FORMAT_OUTPUT_EXTENSION
     index = 2
     while os.path.exists(path):
         filename = code + str(index)
         if len(filename) > 6:
             filename = code[:-(len(str(index)))]+str(index)
         index += 1
         path = CFG_BIBFORMAT_OUTPUTS_PATH + os.sep + filename \
                + "." + CFG_BIBFORMAT_FORMAT_OUTPUT_EXTENSION
         # We should not try more than 99999... Well I don't see how we
         # could get there.. Sanity check.
         if index >= 99999:
             errors = get_msgs_for_code_list([("ERR_BIBFORMAT_NB_OUTPUTS_LIMIT_REACHED", code)],
                                             stream='error', ln=CFG_SITE_LANG)
             register_errors(errors, 'error')
             sys.exit("Output format cannot be named as %s"%code)
 
     return (filename + "." + CFG_BIBFORMAT_FORMAT_OUTPUT_EXTENSION, filename)
 
 def clear_caches():
     """
     Clear the caches (Output Format, Format Templates and Format Elements)
 
     """
     global format_templates_cache, format_elements_cache , \
            format_outputs_cache, kb_mappings_cache
     format_templates_cache = {}
     format_elements_cache = {}
     format_outputs_cache = {}
     kb_mappings_cache = {}
 
 class BibFormatObject:
     """
     An object that encapsulates a record and associated methods, and that is given
     as parameter to all format elements 'format' function.
     The object is made specifically for a given formatting, i.e. it includes
     for example the language for the formatting.
 
     The object provides basic accessors to the record. For full access, one can get
     the record with get_record() and then use BibRecord methods on the returned object.
     """
     # The record
     record = None
 
     # The language in which the formatting has to be done
     lang = CFG_SITE_LANG
 
     # A list of string describing the context in which the record has
     # to be formatted.
     # It represents the words of the user request in web interface search
     search_pattern = []
 
     # The id of the record
     recID = 0
 
     uid = None # DEPRECATED: use bfo.user_info['uid'] instead
 
     # The information about the user, as returned by
     # 'webuser.collect_user_info(req)'
     user_info = None
 
     # The format in which the record is being formatted
     format = ''
 
     req = None # DEPRECATED: use bfo.user_info instead
 
     def __init__(self, recID, ln=CFG_SITE_LANG, search_pattern=None,
                  xml_record=None, user_info=None, format=''):
         """
         Creates a new bibformat object, with given record.
 
         You can either specify an record ID to format, or give its xml representation.
         if 'xml_record' is not None, use 'xml_record' instead of recID for the record.
 
         'user_info' allows to grant access to some functionalities on
         a page depending on the user's priviledges. It is a dictionary
         in the following form:
         user_info = {
             'remote_ip' : '',
             'remote_host' : '',
             'referer' : '',
             'uri' : '',
             'agent' : '',
             'apache_user' : '',
             'apache_group' : [],
             'uid' : -1,
             'nickname' : '',
             'email' : '',
             'group' : [],
             'guest' : '1'
         }
 
         @param recID the id of a record
         @param ln the language in which the record has to be formatted
         @param search_pattern list of string representing the request used by the user in web interface
         @param xml_record a xml string of the record to format
         @param user_info the information of the user who will view the formatted page
         @param format the format used for formatting this record
         """
         if xml_record is not None:
             # If record is given as parameter
             self.record = create_record(xml_record)[0]
             recID = record_get_field_value(self.record, "001")
 
         self.lang = wash_language(ln)
         if search_pattern is None:
             search_pattern = []
         self.search_pattern = search_pattern
         self.recID = recID
         self.format = format
         self.user_info = user_info
         if self.user_info is None:
             self.user_info = collect_user_info(None)
 
     def get_record(self):
         """
-        Returns the record of this BibFormatObject instance
+        Returns the record structure of this BibFormatObject instance
 
-        @return the record structure as returned by BibRecord
+        @return the record structure as defined by BibRecord library
         """
+        from invenio.search_engine import get_record
         # Create record if necessary
         if self.record is None:
             # on-the-fly creation if current output is xm
-            record = create_record(record_get_xml(self.recID, 'xm',
-                                                  on_the_fly=(self.format.lower() == 'xm')))
-            self.record = record[0]
+            self.record = get_record(self.recID)
 
         return self.record
 
     def control_field(self, tag, escape=0):
         """
         Returns the value of control field given by tag in record
 
         @param tag the marc code of a field
         @param escape 1 if returned value should be escaped. Else 0.
         @return value of field tag in record
         """
         if self.get_record() is None:
             #Case where BibRecord could not parse object
             return ''
 
         p_tag = parse_tag(tag)
         field_value = record_get_field_value(self.get_record(),
                                              p_tag[0],
                                              p_tag[1],
                                              p_tag[2],
                                              p_tag[3])
         if escape == 0:
             return field_value
         else:
             return escape_field(field_value, escape)
 
     def field(self, tag, escape=0):
         """
         Returns the value of the field corresponding to tag in the
         current record.
 
         If the value does not exist, return empty string.  Else
         returns the same as bfo.fields(..)[0] (see docstring below).
 
         'escape' parameter allows to escape special characters
         of the field. The value of escape can be:
                       0 - no escaping
                       1 - escape all HTML characters
                       2 - remove unsafe HTML tags (Eg. keep <br />)
                       3 - Mix of mode 1 and 2. If value of field starts with
                           <!-- HTML -->, then use mode 2. Else use mode 1.
                       4 - Remove all HTML tags
                       5 - Same as 2, with more tags allowed (like <img>)
                       6 - Same as 3, with more tags allowed (like <img>)
 
         @param tag the marc code of a field
         @param escape 1 if returned value should be escaped. Else 0. (see above for other modes)
         @return value of field tag in record
         """
         list_of_fields = self.fields(tag)
         if len(list_of_fields) > 0:
             # Escaping below
             if escape == 0:
                 return list_of_fields[0]
             else:
                 return escape_field(list_of_fields[0], escape)
         else:
             return ""
 
     def fields(self, tag, escape=0, repeatable_subfields_p=False):
         """
         Returns the list of values corresonding to "tag".
 
         If tag has an undefined subcode (such as 999C5),
         the function returns a list of dictionaries, whoose keys
         are the subcodes and the values are the values of tag.subcode.
         If the tag has a subcode, simply returns list of values
         corresponding to tag.
         Eg. for given MARC:
             999C5 $a value_1a $b value_1b
             999C5 $b value_2b
             999C5 $b value_3b $b value_3b_bis
 
             >> bfo.fields('999C5b')
             >> ['value_1b', 'value_2b', 'value_3b', 'value_3b_bis']
             >> bfo.fields('999C5')
             >> [{'a':'value_1a', 'b':'value_1b'},
                 {'b':'value_2b'},
                 {'b':'value_3b'}]
 
         By default the function returns only one value for each
         subfield (that is it considers that repeatable subfields are
         not allowed). It is why in the above example 'value3b_bis' is
         not shown for bfo.fields('999C5').  (Note that it is not
         defined which of value_3b or value_3b_bis is returned).  This
         is to simplify the use of the function, as most of the time
         subfields are not repeatable (in that way we get a string
         instead of a list).  You can allow repeatable subfields by
         setting 'repeatable_subfields_p' parameter to True. In
         this mode, the above example would return:
             >> bfo.fields('999C5b', repeatable_subfields_p=True)
             >> ['value_1b', 'value_2b', 'value_3b']
             >> bfo.fields('999C5', repeatable_subfields_p=True)
             >> [{'a':['value_1a'], 'b':['value_1b']},
                 {'b':['value_2b']},
                 {'b':['value_3b', 'value3b_bis']}]
         NOTICE THAT THE RETURNED STRUCTURE IS DIFFERENT.  Also note
         that whatever the value of 'repeatable_subfields_p' is,
         bfo.fields('999C5b') always show all fields, even repeatable
         ones. This is because the parameter has no impact on the
         returned structure (it is always a list).
 
         'escape' parameter allows to escape special characters
         of the fields. The value of escape can be:
                       0 - no escaping
                       1 - escape all HTML characters
                       2 - remove unsafe HTML tags (Eg. keep <br />)
                       3 - Mix of mode 1 and 2. If value of field starts with
                           <!-- HTML -->, then use mode 2. Else use mode 1.
                       4 - Remove all HTML tags
                       5 - Same as 2, with more tags allowed (like <img>)
                       6 - Same as 3, with more tags allowed (like <img>)
 
         @param tag the marc code of a field
         @param escape 1 if returned values should be escaped. Else 0.
         @repeatable_subfields_p if True, returns the list of subfields in the dictionary
         @return values of field tag in record
         """
         if self.get_record() is None:
             # Case where BibRecord could not parse object
             return []
 
         p_tag = parse_tag(tag)
         if p_tag[3] != "":
             # Subcode has been defined. Simply returns list of values
             values = record_get_field_values(self.get_record(),
                                              p_tag[0],
                                              p_tag[1],
                                              p_tag[2],
                                              p_tag[3])
             if escape == 0:
                 return values
             else:
                 return [escape_field(value, escape) for value in values]
 
         else:
             # Subcode is undefined. Returns list of dicts.
             # However it might be the case of a control field.
 
             instances = record_get_field_instances(self.get_record(),
                                                    p_tag[0],
                                                    p_tag[1],
                                                    p_tag[2])
             if repeatable_subfields_p:
                 list_of_instances = []
                 for instance in instances:
                     instance_dict = {}
                     for subfield in instance[0]:
                         if not instance_dict.has_key(subfield[0]):
                             instance_dict[subfield[0]] = []
                         if escape == 0:
                             instance_dict[subfield[0]].append(subfield[1])
                         else:
                             instance_dict[subfield[0]].append(escape_field(subfield[1], escape))
                     list_of_instances.append(instance_dict)
                 return list_of_instances
             else:
                 if escape == 0:
                     return [dict(instance[0]) for instance in instances]
                 else:
                     return [dict([ (subfield[0], escape_field(subfield[1], escape)) \
                                    for subfield in instance[0] ]) \
                             for instance in instances]
 
     def kb(self, kb, string, default=""):
         """
         Returns the value of the "string" in the knowledge base "kb".
 
         If kb does not exist or string does not exist in kb,
         returns 'default' string or empty string if not specified.
 
         @param kb a knowledge base name
         @param string the string we want to translate
         @param default a default value returned if 'string' not found in 'kb'
         """
         if string is None:
             return default
 
         val = get_kb_mapping(kb, string, default)
 
         if val is None:
             return default
         else:
             return val
 
 def escape_field(value, mode=0):
     """
     Utility function used to escape the value of a field in given mode.
 
     - mode 0: no escaping
     - mode 1: escaping all HTML/XML characters (escaped chars are shown as escaped)
     - mode 2: escaping unsafe HTML tags to avoid XSS, but
               keep basic one (such as <br />)
               Escaped tags are removed.
     - mode 3: mix of mode 1 and mode 2. If field_value starts with <!--HTML-->,
               then use mode 2. Else use mode 1.
     - mode 4: escaping all HTML/XML tags (escaped tags are removed)
     - mode 5: same as 2, but allows more tags, like <img>
     - mode 6: same as 3, but allows more tags, like <img>
     """
     if mode == 1:
         return cgi.escape(value)
     elif mode in [2, 5]:
         allowed_attribute_whitelist = cfg_html_buffer_allowed_attribute_whitelist
         allowed_tag_whitelist = cfg_html_buffer_allowed_tag_whitelist + \
                                 ('class',)
         if mode == 5:
             allowed_attribute_whitelist += ('src', 'alt',
                                             'width', 'height',
                                             'style', 'summary',
                                             'border', 'cellspacing',
                                             'cellpadding')
             allowed_tag_whitelist += ('img', 'table', 'td',
                                       'tr', 'th', 'span', 'caption')
         try:
             return washer.wash(value,
                                allowed_attribute_whitelist=\
                                allowed_attribute_whitelist,
                                allowed_tag_whitelist= \
                                allowed_tag_whitelist
                                )
         except HTMLParseError:
             # Parsing failed
             return cgi.escape(value)
     elif mode in [3, 6]:
         if value.lstrip(' \n').startswith(html_field):
             allowed_attribute_whitelist = cfg_html_buffer_allowed_attribute_whitelist
             allowed_tag_whitelist = cfg_html_buffer_allowed_tag_whitelist + \
                                     ('class',)
             if mode == 6:
                 allowed_attribute_whitelist += ('src', 'alt',
                                                 'width', 'height',
                                                 'style', 'summary',
                                                 'border', 'cellspacing',
                                                 'cellpadding')
                 allowed_tag_whitelist += ('img', 'table', 'td',
                                           'tr', 'th', 'span', 'caption')
             try:
                 return washer.wash(value,
                                    allowed_attribute_whitelist=\
                                    allowed_attribute_whitelist,
                                    allowed_tag_whitelist=\
                                    allowed_tag_whitelist
                                    )
             except HTMLParseError:
                 # Parsing failed
                 return cgi.escape(value)
         else:
             return cgi.escape(value)
     elif mode == 4:
         try:
             return washer.wash(value,
                                allowed_attribute_whitelist=[],
                                allowed_tag_whitelist=[]
                                )
         except HTMLParseError:
             # Parsing failed
             return cgi.escape(value)
     else:
         return value
 
 def bf_profile():
     """
     Runs a benchmark
     """
     for i in range(1, 51):
         format_record(i, "HD", ln=CFG_SITE_LANG, verbose=9, search_pattern=[])
     return
 
 if __name__ == "__main__":
     import profile
     import pstats
     #bf_profile()
     profile.run('bf_profile()', "bibformat_profile")
     p = pstats.Stats("bibformat_profile")
     p.strip_dirs().sort_stats("cumulative").print_stats()
 
diff --git a/modules/bibharvest/lib/bibharvestadminlib.py b/modules/bibharvest/lib/bibharvestadminlib.py
index 790a1fff5..313389c25 100644
--- a/modules/bibharvest/lib/bibharvestadminlib.py
+++ b/modules/bibharvest/lib/bibharvestadminlib.py
@@ -1,1223 +1,1224 @@
 ## Administrator interface for BibIndex
 ## This file is part of CDS Invenio.
 ## Copyright (C) 2002, 2003, 2004, 2005, 2006, 2007, 2008 CERN.
 ##
 ## CDS Invenio is free software; you can redistribute it and/or
 ## modify it under the terms of the GNU General Public License as
 ## published by the Free Software Foundation; either version 2 of the
 ## License, or (at your option) any later version.
 ##
 ## CDS Invenio is distributed in the hope that it will be useful, but
 ## WITHOUT ANY WARRANTY; without even the implied warranty of
 ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 ## General Public License for more details.
 ##
 ## You should have received a copy of the GNU General Public License
 ## along with CDS Invenio; if not, write to the Free Software Foundation, Inc.,
 ## 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA.
 
 """CDS Invenio Bibharvest Administrator Interface."""
 
 __revision__ = "$Id$"
 
 import re
 import os, sys, string
 import ConfigParser
 import time
 import random
 import urllib
 import tempfile
 import datetime
 
 from httplib import InvalidURL
 from invenio.config import \
      CFG_SITE_LANG, \
      CFG_TMPDIR, \
      CFG_VERSION, \
      CFG_SITE_URL,\
      CFG_ETCDIR, \
      CFG_BINDIR, \
      CFG_LOGDIR, \
      CFG_BIBUPLOAD_EXTERNAL_OAIID_TAG
 from invenio.bibrankadminlib import \
      write_outcome,modify_translations,\
      get_def_name,\
      get_i8n_name,\
      get_name,\
      get_rnk_nametypes,\
      get_languages,\
      check_user,\
      is_adminuser,\
      addadminbox,\
      tupletotable,\
      tupletotable_onlyselected,\
      addcheckboxes,\
      createhiddenform
 from invenio.dbquery import run_sql
 from invenio.webpage import page, pageheaderonly, pagefooteronly, adderrorbox
 from invenio.webuser import getUid, get_email
 from invenio.bibharvest_dblayer import get_history_entries, \
     HistoryEntry, get_month_logs_size, get_history_entries_for_day, \
     get_day_logs_size, get_entry_history, get_entry_logs_size, \
     get_holdingpen_entries, delete_holdingpen_entry, get_holdingpen_entry
-from invenio.search_engine import search_pattern
+from invenio.search_engine import search_pattern, get_record
 import invenio.template
 from invenio import oaiharvestlib
 from invenio.xmlmarc2textmarclib import recxml2recmarc, create_marc_record
 from invenio import bibformat
 from invenio.bibrecord import create_record
+
 bibharvest_templates = invenio.template.load('bibharvest')
 
 tmppath = CFG_TMPDIR + '/bibharvestadmin.' + str(os.getpid())
 guideurl = "help/admin/bibharvest-admin-guide"
 
 freqs = [[0, "never"], [24, "daily"], [168, "weekly"], [720, "monthly"] ]
 posts = [["h", "harvest only (h)"], ["h-c", "harvest and convert (h-c)"], ["h-u", "harvest and upload (h-u)"], ["h-c-u", "harvest, convert and upload (h-c-u)"], ["h-c-f-u", "harvest, convert, filter, upload (h-c-f-u)"]]
 dates = [[0, "from beginning"], [1, "from today"]]
 
 def getnavtrail(previous = ''):
     """Get the navtrail"""
     return bibharvest_templates.tmpl_getnavtrail(previous = previous, ln = CFG_SITE_LANG)
 
 def generate_sources_actions_menu(ln, oai_src_id):
     namelinked_args = []
     namelinked_args.append(["oai_src_id", str(oai_src_id)])
     namelinked_args.append(["ln", ln])
     editACTION = bibharvest_templates.tmpl_link_with_args(ln = CFG_SITE_LANG, funcurl = "admin/bibharvest/bibharvestadmin.py/editsource", title = "edit", args = namelinked_args)
     delACTION = bibharvest_templates.tmpl_link_with_args(ln = CFG_SITE_LANG, funcurl = "admin/bibharvest/bibharvestadmin.py/delsource", title = "delete", args = namelinked_args)
     testACTION = bibharvest_templates.tmpl_link_with_args(ln = CFG_SITE_LANG, funcurl = "admin/bibharvest/bibharvestadmin.py/testsource", title = "test", args = namelinked_args)
     historyACTION = bibharvest_templates.tmpl_link_with_args(ln = CFG_SITE_LANG, funcurl = "admin/bibharvest/bibharvestadmin.py/viewhistory", title = "history", args = namelinked_args)
     harvestACTION = bibharvest_templates.tmpl_link_with_args(ln = CFG_SITE_LANG, funcurl = "admin/bibharvest/bibharvestadmin.py/harvest", title = "harvest", args = namelinked_args)
     return editACTION + " / " + delACTION + " / " + testACTION + " / " + historyACTION + " / " + harvestACTION
 
 def generate_oai_source_operations(ln, oai_src_id):
     result = bibharvest_templates.tmpl_draw_titlebar(ln = CFG_SITE_LANG, title = "OAI source operations", guideurl=guideurl)
     result += generate_sources_actions_menu(ln, oai_src_id)
     return result
 
 def perform_request_index(ln=CFG_SITE_LANG):
     """start area for administering harvesting from OAI repositories"""
 
     titlebar = bibharvest_templates.tmpl_draw_titlebar(ln = CFG_SITE_LANG, title = "Overview of sources", guideurl = guideurl, extraname = "add new OAI source" , extraurl = "admin/bibharvest/bibharvestadmin.py/addsource" )
     titlebar2 = bibharvest_templates.tmpl_draw_titlebar(ln = CFG_SITE_LANG, title = "Harvesting status", guideurl = guideurl)
     header = ['name', 'baseURL', 'metadataprefix', 'frequency', 'bibconvertfile', 'postprocess', 'actions']
     header2 = ['name', 'last update']
     oai_src = get_oai_src()
     upd_status = get_update_status()
 
     sources = []
     for (oai_src_id,oai_src_name,oai_src_baseurl,oai_src_prefix,oai_src_frequency,oai_src_config,oai_src_post,oai_src_bibfilter,oai_src_setspecs) in oai_src:
         namelinked_args = []
         namelinked_args.append(["oai_src_id", str(oai_src_id)])
         namelinked_args.append(["ln", ln])
 
         namelinked = bibharvest_templates.tmpl_link_with_args(ln = CFG_SITE_LANG, funcurl = "admin/bibharvest/bibharvestadmin.py/editsource", title = oai_src_name, args = namelinked_args)
         freq = "Not Set"
         if oai_src_frequency==0: freq = "never"
         elif oai_src_frequency==24: freq = "daily"
         elif oai_src_frequency==168: freq = "weekly"
         elif oai_src_frequency==720: freq = "monthly"
         action = generate_sources_actions_menu(ln, oai_src_id)
         sources.append([namelinked,oai_src_baseurl,oai_src_prefix,freq,oai_src_config,oai_src_post, action])
 
     updates = []
     for (upd_name, upd_status) in upd_status:
         if not upd_status:
             upd_status =  bibharvest_templates.tmpl_print_warning(CFG_SITE_LANG, "Never harvested")
         else: #cut away leading zeros
             upd_status = re.sub(r'\.[0-9]+$', '', str(upd_status))
         updates.append([upd_name, upd_status])
 
     (schtime, schstatus) = get_next_schedule()
     if schtime:
         schtime = re.sub(r'\.[0-9]+$', '', str(schtime))
 
 
     holdingpen_link = bibharvest_templates.tmpl_link_with_args(ln = CFG_SITE_LANG, funcurl = "admin/bibharvest/bibharvestadmin.py/viewholdingpen", title = "View Holding Pen", args = [["ln", str(ln)],])
     output = titlebar
     output += bibharvest_templates.tmpl_output_numbersources(CFG_SITE_LANG, get_tot_oai_src())
     output += tupletotable(header=header, tuple=sources)
     output += bibharvest_templates.tmpl_print_brs(CFG_SITE_LANG, 2)
     output += titlebar2
     output += bibharvest_templates.tmpl_output_schedule(CFG_SITE_LANG, schtime, str(schstatus))
     output += holdingpen_link
     output += bibharvest_templates.tmpl_print_brs(ln, 2)
     output += tupletotable(header=header2, tuple=updates)
 
     return output
 
 def perform_request_editsource(oai_src_id=None, oai_src_name='', oai_src_baseurl='', oai_src_prefix='', oai_src_frequency='', oai_src_config='', oai_src_post='',ln=CFG_SITE_LANG, confirm=-1, oai_src_sets=[], oai_src_bibfilter=''):
     """creates html form to edit a OAI source. this method is calling other methods which again is calling this and sending back the output of the method.
     confirm - determines the validation status of the data input into the form"""
 
     if oai_src_id is None:
         return "No OAI source ID selected."
 
     output  = ""
     subtitle = bibharvest_templates.tmpl_draw_subtitle(ln = CFG_SITE_LANG, title = "edit source", subtitle = "Edit OAI source", guideurl = guideurl)
 
     if confirm in [-1, "-1"]:
         oai_src = get_oai_src(oai_src_id)
         oai_src_name = oai_src[0][1]
         oai_src_baseurl = oai_src[0][2]
         oai_src_prefix = oai_src[0][3]
         oai_src_frequency = oai_src[0][4]
         oai_src_config = oai_src[0][5]
         oai_src_post = oai_src[0][6]
         oai_src_sets = oai_src[0][7].split()
         oai_src_bibfilter = oai_src[0][8]
 
     text = bibharvest_templates.tmpl_print_brs(CFG_SITE_LANG, 1)
     text += bibharvest_templates.tmpl_admin_w200_text(ln = CFG_SITE_LANG, title = "Source name", name = "oai_src_name", value = oai_src_name)
     text += bibharvest_templates.tmpl_admin_w200_text(ln = CFG_SITE_LANG, title = "Base URL", name = "oai_src_baseurl", value = oai_src_baseurl)
 
     sets = findSets(oai_src_baseurl)
     if sets:
         # Show available sets to users
         sets_specs = [set[0] for set in sets]
         sets_names = [set[1] for set in sets]
         sets_labels = [((set[1] and set[0]+' ('+set[1]+')') or set[0]) \
                        for set in sets]
         sets_states = [ ((set[0] in oai_src_sets and 1) or 0) for set in sets]
         text += bibharvest_templates.tmpl_admin_checkboxes(ln=CFG_SITE_LANG,
                                                            title="Sets",
                                                            name="oai_src_sets",
                                                            values=sets_specs,
                                                            labels=sets_labels,
                                                            states=sets_states)
     else:
         # Let user specify sets in free textbox
         text += bibharvest_templates.tmpl_admin_w200_text(ln = CFG_SITE_LANG,
                                                           title = "Sets",
                                                           name='oai_src_sets',
                                                           value=' '.join(oai_src_sets))
 
     text += bibharvest_templates.tmpl_admin_w200_text(ln = CFG_SITE_LANG, title = "Metadata prefix", name = "oai_src_prefix", value = oai_src_prefix)
     text += bibharvest_templates.tmpl_admin_w200_select(ln = CFG_SITE_LANG, title = "Frequency", name = "oai_src_frequency", valuenil = "- select frequency -" , values = freqs, lastval = oai_src_frequency)
     text += bibharvest_templates.tmpl_admin_w200_select(ln = CFG_SITE_LANG, title = "Postprocess", name = "oai_src_post", valuenil = "- select mode -" , values = posts, lastval = oai_src_post)
     text += bibharvest_templates.tmpl_admin_w200_text(ln = CFG_SITE_LANG, title = "BibConvert configuration file (if needed by postprocess)", name = "oai_src_config", value = oai_src_config)
     text += bibharvest_templates.tmpl_admin_w200_text(ln = CFG_SITE_LANG, title = "BibFilter program (if needed by postprocess)", name = "oai_src_bibfilter", value = oai_src_bibfilter)
     text += bibharvest_templates.tmpl_print_brs(CFG_SITE_LANG, 2)
 
     output += createhiddenform(action="editsource#1",
                                 text=text,
                                 button="Modify",
                                 oai_src_id=oai_src_id,
                                 ln=ln,
                                 confirm=1)
 
     if confirm in [1, "1"] and not oai_src_name:
         output += bibharvest_templates.tmpl_print_info(CFG_SITE_LANG, "Please enter a name for the source.")
     elif confirm in [1, "1"] and not oai_src_prefix:
         output += bibharvest_templates.tmpl_print_info(CFG_SITE_LANG, "Please enter a metadata prefix.")
     elif confirm in [1, "1"] and not oai_src_baseurl:
         output += bibharvest_templates.tmpl_print_info(CFG_SITE_LANG, "Please enter a base url.")
     elif confirm in [1, "1"] and not oai_src_frequency:
         output += bibharvest_templates.tmpl_print_info(CFG_SITE_LANG, "Please choose a frequency of harvesting")
     elif confirm in [1, "1"] and not oai_src_post:
         output += bibharvest_templates.tmpl_print_info(CFG_SITE_LANG, "Please choose a postprocess mode")
     elif confirm in [1, "1"] and oai_src_post.startswith("h-c") and (not oai_src_config or validatefile(oai_src_config)!=0):
         output += bibharvest_templates.tmpl_print_info(CFG_SITE_LANG, "You selected a postprocess mode which involves conversion.")
         output += bibharvest_templates.tmpl_print_info(CFG_SITE_LANG, "Please enter a valid name of or a full path to a BibConvert config file or change postprocess mode.")
     elif oai_src_id > -1 and confirm in [1, "1"]:
         if not oai_src_frequency:
             oai_src_frequency = 0
         if not oai_src_config:
             oai_src_config = "NULL"
         if not oai_src_post:
             oai_src_post = "h"
         res = modify_oai_src(oai_src_id, oai_src_name, oai_src_baseurl, oai_src_prefix, oai_src_frequency, oai_src_config, oai_src_post, oai_src_sets, oai_src_bibfilter)
         output += write_outcome(res)
 
     lnargs = [["ln", ln]]
     output += bibharvest_templates.tmpl_print_brs(CFG_SITE_LANG, 2)
     output += bibharvest_templates.tmpl_link_with_args(ln = CFG_SITE_LANG, funcurl = "admin/bibharvest/bibharvestadmin.py/index", title = "Go back to the OAI sources overview", args = lnargs )
 
     body = [output]
 
     return addadminbox(subtitle, body)
 
 def perform_request_addsource(oai_src_name=None, oai_src_baseurl='', oai_src_prefix='', oai_src_frequency='', oai_src_lastrun='', oai_src_config='', oai_src_post='', ln=CFG_SITE_LANG, confirm=-1, oai_src_sets=[], oai_src_bibfilter=''):
     """creates html form to add a new source"""
 
     if oai_src_name is None:
         return "No OAI source name selected."
 
     subtitle = bibharvest_templates.tmpl_draw_subtitle(ln=CFG_SITE_LANG,
                                                        title="add source",
                                                        subtitle="Add new OAI source",
                                                        guideurl=guideurl)
     output  = ""
 
     if confirm <= -1:
         text = bibharvest_templates.tmpl_print_brs(CFG_SITE_LANG, 1)
         text += bibharvest_templates.tmpl_admin_w200_text(ln=CFG_SITE_LANG,
                                                           title="Enter the base url",
                                                           name="oai_src_baseurl",
                                                           value=oai_src_baseurl+'http://')
         output = createhiddenform(action="addsource",
                                   text=text,
                                   ln=ln,
                                   button="Validate",
                                   confirm=0)
 
     if (confirm not in ["-1", -1] and validate(oai_src_baseurl)[0] == 0) or \
            confirm in ["1", 1]:
         output += bibharvest_templates.tmpl_output_validate_info(CFG_SITE_LANG, 0, str(oai_src_baseurl))
         output += bibharvest_templates.tmpl_print_brs(CFG_SITE_LANG, 2)
         text = bibharvest_templates.tmpl_admin_w200_text(ln=CFG_SITE_LANG,
                                                          title="Source name",
                                                          name="oai_src_name",
                                                          value=oai_src_name)
 
         metadatas = findMetadataFormats(oai_src_baseurl)
         if metadatas:
             # Show available metadata to user
             prefixes = []
             for value in metadatas:
                 prefixes.append([value, str(value)])
             text += bibharvest_templates.tmpl_admin_w200_select(ln=CFG_SITE_LANG,
                                                                 title="Metadata prefix",
                                                                 name="oai_src_prefix",
                                                                 valuenil="- select prefix -" ,
                                                                 values=prefixes,
                                                                 lastval=oai_src_prefix)
         else:
             # Let user specify prefix in free textbox
             text += bibharvest_templates.tmpl_admin_w200_text(ln=CFG_SITE_LANG,
                                                               title="Metadata prefix",
                                                               name="oai_src_prefix",
                                                               value=oai_src_prefix)
 
         sets = findSets(oai_src_baseurl)
         if sets:
             # Show available sets to users
             sets_specs = [set[0] for set in sets]
             sets_names = [set[1] for set in sets]
             sets_labels = [((set[1] and set[0]+' ('+set[1]+')') or set[0]) \
                            for set in sets]
             sets_states = [ ((set[0] in oai_src_sets and 1) or 0) \
                             for set in sets]
             text += bibharvest_templates.tmpl_admin_checkboxes(ln=CFG_SITE_LANG,
                                                                title="Sets",
                                                                name="oai_src_sets",
                                                                values=sets_specs,
                                                                labels=sets_labels,
                                                                states=sets_states)
         else:
             # Let user specify sets in free textbox
             text += bibharvest_templates.tmpl_admin_w200_text(ln = CFG_SITE_LANG,
                                                               title = "Sets",
                                                               name='oai_src_sets',
                                                               value=' '.join(oai_src_sets))
 
         text += bibharvest_templates.tmpl_admin_w200_select(ln = CFG_SITE_LANG, title = "Frequency", name = "oai_src_frequency", valuenil = "- select frequency -" , values = freqs, lastval = oai_src_frequency)
         text += bibharvest_templates.tmpl_admin_w200_select(ln = CFG_SITE_LANG, title = "Starting date", name = "oai_src_lastrun", valuenil = "- select a date -" , values = dates, lastval = oai_src_lastrun)
         text += bibharvest_templates.tmpl_admin_w200_select(ln = CFG_SITE_LANG, title = "Postprocess", name = "oai_src_post", valuenil = "- select mode -" , values = posts, lastval = oai_src_post)
         text += bibharvest_templates.tmpl_admin_w200_text(ln = CFG_SITE_LANG, title = "BibConvert configuration file (if needed by postprocess)", name = "oai_src_config", value = oai_src_config)
         text += bibharvest_templates.tmpl_admin_w200_text(ln = CFG_SITE_LANG, title = "BibFilter program (if needed by postprocess)", name = "oai_src_bibfilter", value = oai_src_bibfilter)
         text += bibharvest_templates.tmpl_print_brs(CFG_SITE_LANG, 2)
 
 
         output += createhiddenform(action="addsource#1",
                                    text=text,
                                    button="Add OAI Source",
                                    oai_src_baseurl=oai_src_baseurl,
                                    ln=ln,
                                    confirm=1)
     elif confirm in ["0", 0] and validate(oai_src_baseurl)[0] > 0:
         # Could not perform first url validation
         lnargs = [["ln", ln]]
         output += bibharvest_templates.tmpl_output_validate_info(CFG_SITE_LANG, 1, str(oai_src_baseurl))
         output += bibharvest_templates.tmpl_print_brs(CFG_SITE_LANG, 2)
         output += bibharvest_templates.tmpl_link_with_args(ln = CFG_SITE_LANG, funcurl = "admin/bibharvest/bibharvestadmin.py/addsource", title = "Try again with another url", args = [])
         output += """ or """
         output += bibharvest_templates.tmpl_link_with_args(ln = CFG_SITE_LANG, funcurl = "admin/bibharvest/bibharvestadmin.py/addsource", title = "Continue anyway", args = [['oai_src_baseurl', urllib.urlencode({'':oai_src_baseurl})[1:]], ['confirm', '1']])
         output += bibharvest_templates.tmpl_print_brs(CFG_SITE_LANG, 1)
         output += """or"""
         output += bibharvest_templates.tmpl_print_brs(CFG_SITE_LANG, 1)
         output += bibharvest_templates.tmpl_link_with_args(ln = CFG_SITE_LANG, funcurl = "admin/bibharvest/bibharvestadmin.py/index", title = "Go back to the OAI sources overview", args = lnargs)
     elif confirm not in ["-1", -1] and validate(oai_src_baseurl)[0] > 0:
         lnargs = [["ln", ln]]
         output += bibharvest_templates.tmpl_output_validate_info(CFG_SITE_LANG, 1, str(oai_src_baseurl))
         output += bibharvest_templates.tmpl_print_brs(CFG_SITE_LANG, 2)
         output += bibharvest_templates.tmpl_link_with_args(ln = CFG_SITE_LANG, funcurl = "admin/bibharvest/bibharvestadmin.py/addsource", title = "Try again", args = [])
         output += bibharvest_templates.tmpl_print_brs(CFG_SITE_LANG, 1)
         output += """or"""
         output += bibharvest_templates.tmpl_print_brs(CFG_SITE_LANG, 1)
         output += bibharvest_templates.tmpl_link_with_args(ln = CFG_SITE_LANG, funcurl = "admin/bibharvest/bibharvestadmin.py/index", title = "Go back to the OAI sources overview", args = lnargs)
 
     elif confirm not in ["-1", -1]:
         lnargs = [["ln", ln]]
         output += bibharvest_templates.tmpl_output_error_info(CFG_SITE_LANG, str(oai_src_baseurl), validate(oai_src_baseurl)[1])
         output += bibharvest_templates.tmpl_print_brs(CFG_SITE_LANG, 2)
         output += bibharvest_templates.tmpl_link_with_args(ln = CFG_SITE_LANG, funcurl = "admin/bibharvest/bibharvestadmin.py/addsource", title = "Try again", args = [])
         output += bibharvest_templates.tmpl_print_brs(CFG_SITE_LANG, 1)
         output += """or"""
         output += bibharvest_templates.tmpl_print_brs(CFG_SITE_LANG, 1)
         output += bibharvest_templates.tmpl_link_with_args(ln = CFG_SITE_LANG, funcurl = "admin/bibharvest/bibharvestadmin.py/index", title = "Go back to the OAI sources overview", args = lnargs)
 
 
 
     if confirm in [1, "1"] and not oai_src_name:
         output += bibharvest_templates.tmpl_print_info(CFG_SITE_LANG, "Please enter a name for the source.")
     elif confirm in [1, "1"] and not oai_src_prefix:
         output += bibharvest_templates.tmpl_print_info(CFG_SITE_LANG, "Please enter a metadata prefix.")
     elif confirm in [1, "1"] and not oai_src_frequency:
         output += bibharvest_templates.tmpl_print_info(CFG_SITE_LANG, "Please choose a frequency of harvesting")
     elif confirm in [1, "1"] and not oai_src_lastrun:
         output += bibharvest_templates.tmpl_print_info(CFG_SITE_LANG, "Please choose the harvesting starting date")
     elif confirm in [1, "1"] and not oai_src_post:
         output += bibharvest_templates.tmpl_print_info(CFG_SITE_LANG, "Please choose a postprocess mode")
     elif confirm in [1, "1"] and oai_src_post.startswith("h-c") and (not oai_src_config or validatefile(oai_src_config)!=0):
         output += bibharvest_templates.tmpl_print_info(CFG_SITE_LANG, "You selected a postprocess mode which involves conversion.")
         output += bibharvest_templates.tmpl_print_info(CFG_SITE_LANG, "Please enter a valid name of or a full path to a BibConvert config file or change postprocess mode.")
     elif oai_src_name and confirm in [1, "1"]:
         if not oai_src_frequency:
             oai_src_frequency = 0
         if not oai_src_lastrun:
             oai_src_lastrun = 1
         if not oai_src_config:
             oai_src_config = "NULL"
         if not oai_src_post:
             oai_src_post = "h"
 
         res = add_oai_src(oai_src_name, oai_src_baseurl, oai_src_prefix, oai_src_frequency, oai_src_lastrun, oai_src_config, oai_src_post, oai_src_sets, oai_src_bibfilter)
         output += write_outcome(res)
 
         lnargs = [["ln", ln]]
         output += bibharvest_templates.tmpl_print_brs(CFG_SITE_LANG, 2)
         output += bibharvest_templates.tmpl_link_with_args(ln = CFG_SITE_LANG, funcurl = "admin/bibharvest/bibharvestadmin.py/index", title = "Go back to the OAI sources overview", args = lnargs )
 
     body = [output]
 
     return addadminbox(subtitle, body)
 
 def perform_request_delsource(oai_src_id=None, ln=CFG_SITE_LANG, callback='yes', confirm=0):
     """creates html form to delete a source
     """
 
     output = ""
     subtitle = ""
 
     if oai_src_id:
         oai_src = get_oai_src(oai_src_id)
         namesrc = (oai_src[0][1])
         pagetitle = """Delete OAI source: %s""" % namesrc
         subtitle = bibharvest_templates.tmpl_draw_subtitle(ln = CFG_SITE_LANG, \
             title = "delete source", subtitle = pagetitle, guideurl = guideurl)
         output  = ""
 
         if confirm in ["0", 0]:
             if oai_src:
                 question = """Do you want to delete the OAI source '%s' and all its definitions?""" % namesrc
                 text = bibharvest_templates.tmpl_print_info(CFG_SITE_LANG, question)
                 text += bibharvest_templates.tmpl_print_brs(CFG_SITE_LANG, 3)
                 output += createhiddenform(action="delsource#5",
                                        text=text,
                                        button="Confirm",
                                        oai_src_id=oai_src_id,
                                        confirm=1)
             else:
                 return bibharvest_templates.tmpl_print_info(CFG_SITE_LANG, "Source specified does not exist.")
         elif confirm in ["1", 1]:
             res = delete_oai_src(oai_src_id)
             if res[0] == 1:
                 output += bibharvest_templates.tmpl_print_info(CFG_SITE_LANG, "Source removed.")
                 output += bibharvest_templates.tmpl_print_brs(CFG_SITE_LANG, 1)
                 output += write_outcome(res)
             else:
                 output += write_outcome(res)
 
     lnargs = [["ln", ln]]
     output += bibharvest_templates.tmpl_print_brs(CFG_SITE_LANG, 2)
     output += bibharvest_templates.tmpl_link_with_args(ln = CFG_SITE_LANG, \
         funcurl = "admin/bibharvest/bibharvestadmin.py/index", \
         title = "Go back to the OAI sources overview", args = lnargs )
 
     body = [output]
 
     return addadminbox(subtitle, body)
 
 
 def perform_request_testsource(oai_src_id=None, ln=CFG_SITE_LANG, callback='yes', \
     confirm=0, record_id=None):
 
     if oai_src_id is None:
         return "No OAI source ID selected."
     result = ""
     guideurl = "help/admin/bibharvest-admin-guide"
     result += bibharvest_templates.tmpl_output_menu(ln, oai_src_id, guideurl)
     result += bibharvest_templates.tmpl_draw_titlebar(ln = CFG_SITE_LANG, title = \
         "Record ID ( Recognized by the data source )", guideurl=guideurl)
     record_str = ""
     if record_id != None:
         record_str = str(record_id)
     form_text = bibharvest_templates.tmpl_admin_w200_text(ln = CFG_SITE_LANG, title = \
         "Record identifier", name = "record_id", value = record_str)
     result += createhiddenform(action="testsource",
                                text=form_text,
                                button="Test",
                                oai_src_id=oai_src_id,
                                ln=ln,
                                confirm=1)
     if record_id != None:
         result += bibharvest_templates.tmpl_draw_titlebar(ln = CFG_SITE_LANG, title = \
             "OAI XML downloaded from the source" , guideurl = guideurl)
         result += bibharvest_templates.tmpl_embed_document( \
             "/admin/bibharvest/bibharvestadmin.py/preview_original_xml?oai_src_id=" \
             + urllib.quote(str(oai_src_id)) + "&record_id=" \
             + urllib.quote(str(record_id)))
         result += bibharvest_templates.tmpl_draw_titlebar(ln = CFG_SITE_LANG, title = \
             "MARC XML after all the transformations", guideurl = guideurl)
         result += bibharvest_templates.tmpl_embed_document( \
             "/admin/bibharvest/bibharvestadmin.py/preview_harvested_xml?oai_src_id=" \
             + urllib.quote(str(oai_src_id)) + "&record_id=" \
             + urllib.quote(str(record_id)))
     return result
 
 ######################################
 ###  Displaying bibsched task logs ###
 ######################################
 def does_logfile_exist(task_id):
     """
        returns logfile name if exists. None otherwise
     """
     name = CFG_LOGDIR + "/bibsched_task_" + str(task_id) + ".log"
     if os.path.exists(name):
         return name
     else:
         return None
 
 def does_errfile_exist(task_id):
     """
        returns logfile name if exists. None otherwise
     """
     name = CFG_LOGDIR + "/bibsched_task_" + str(task_id) + ".err"
     if os.path.exists(name):
         return name
     else:
         return None
 
 def perform_request_viewtasklogs(ln, confirm, task_id):
     t_id = int(task_id) # preventing malicious user input
     guideurl = "help/admin/bibharvest-admin-guide"
 
     log_name = does_logfile_exist(t_id)
     err_name = does_errfile_exist(t_id)
 
     result = ""
     result += bibharvest_templates.tmpl_output_menu(ln, None, guideurl)
 
     if log_name != None:
         file = open(log_name)
         content = file.read(-1)
         file.close();
         result += bibharvest_templates.tmpl_draw_titlebar(ln, "Log file : " + \
                                                               log_name, guideurl)
 
         result += bibharvest_templates.tmpl_output_scrollable_frame(\
             bibharvest_templates.tmpl_output_preformatted(content))
 
     if err_name != None:
         file = open(err_name)
         content = file.read(-1)
         file.close();
         result += bibharvest_templates.tmpl_print_brs(ln, 2)
         result += bibharvest_templates.tmpl_draw_titlebar(ln, "Log file : " + \
                                                               err_name, guideurl)
         result += bibharvest_templates.tmpl_output_scrollable_frame(\
             bibharvest_templates.tmpl_output_preformatted(content))
 
     return result
 
 ### Probably should be moved to some other data-connection file
 
 
 def build_history_row(item, ln, show_selection, show_oai_source, show_record_ids, identifier = ""):
     def get_cssclass(cssclass):
         if cssclass == "oddtablecolumn":
             return "pairtablecolumn"
         else:
             return "oddtablecolumn"
 
     cssclass = get_cssclass("pairtablecolumn")
     result = bibharvest_templates.tmpl_table_row_begin()
     result += bibharvest_templates.tmpl_table_output_cell(\
         bibharvest_templates.format_date(item.date_harvested) + " " + \
         bibharvest_templates.format_time(item.date_harvested), cssclass = cssclass)
     cssclass = get_cssclass(cssclass)
     result += bibharvest_templates.tmpl_table_output_cell(\
         bibharvest_templates.format_date(item.date_inserted) + " " + \
         bibharvest_templates.format_time(item.date_inserted), cssclass = cssclass)
 
     if show_record_ids:
         record_history_link = bibharvest_templates.tmpl_link_with_args(ln, \
             "/admin/bibharvest/bibharvestadmin.py/viewentryhistory", \
             str(item.oai_id), [["ln", ln], ["oai_id", str(item.oai_id)], \
             ["start", "0"]])
         cssclass = get_cssclass(cssclass)
         result += bibharvest_templates.tmpl_table_output_cell(record_history_link, \
             cssclass = cssclass)
 
         record_details_link = bibharvest_templates.tmpl_link_with_args(ln, \
             "/record/" + str(item.record_id), str(item.record_id), [["ln",str(ln)],])
         cssclass = get_cssclass(cssclass)
         result += bibharvest_templates.tmpl_table_output_cell(record_details_link, \
             cssclass = cssclass)
 
     cssclass = get_cssclass(cssclass)
     result += bibharvest_templates.tmpl_table_output_cell(item.inserted_to_db, \
         cssclass = cssclass)
 
     cssclass = get_cssclass(cssclass)
     task_id = str(item.bibupload_task_id)
     if does_errfile_exist(item.bibupload_task_id) or does_logfile_exist(item.bibupload_task_id):
         task_id = bibharvest_templates.tmpl_link_with_args(ln, \
             "/admin/bibharvest/bibharvestadmin.py/viewtasklogs", str(item.bibupload_task_id),\
             [["ln",str(ln)],["task_id", str(item.bibupload_task_id)]])
 
     result += bibharvest_templates.tmpl_table_output_cell(task_id, cssclass = cssclass)
 
     if show_selection:
         chkbox = bibharvest_templates.tmpl_output_checkbox(item.oai_id, identifier, "1")
         cssclass = get_cssclass(cssclass)
         result += bibharvest_templates.tmpl_table_output_cell(chkbox, \
             cssclass = cssclass)
 
     if show_oai_source:
         cssclass = get_cssclass(cssclass)
         result += bibharvest_templates.tmpl_table_output_cell(str(item.oai_src_id), \
             cssclass = cssclass)
 
     result += bibharvest_templates.tmpl_table_row_end()
     return result
 
 def build_history_table_header(show_selection = True, show_oai_source = False, \
     show_record_ids = True):
 
     headers = ["Harvesting Date", "Insert date"]
     if show_record_ids:
         headers += ["Record ID ( OAI )", "Rec. ID <br/>(Invenio)"]
     headers += ["DB", "task <br/> number"]
     if show_selection:
         headers.append("Reharvest")
     if show_oai_source:
         headers.append("Harvested from <br/> source no")
     return headers
 
 def build_month_history_table(oai_src_id, date, ln):
     """ Function formats the historical data
      @param oai_src_id - identifier of the harvesting source
      @param date - date designing the month of interest
      @result - String containing the history table
     """
     day_limit = 10
     orig_data = get_history_entries(oai_src_id, date)
     stats = get_month_logs_size(oai_src_id, date)
     headers = build_history_table_header()
     result = bibharvest_templates.tmpl_table_begin(headers)
     identifiers = {}
     for day in stats:
         result += bibharvest_templates.tmpl_table_row_begin()
         d_date = datetime.datetime(date.year, date.month, day)
         result += bibharvest_templates.tmpl_history_table_output_day_cell(d_date, \
             stats[day], oai_src_id, ln, stats[day] > day_limit)
         btn = bibharvest_templates.tmpl_output_select_day_button(day)
         result += bibharvest_templates.tmpl_table_output_cell(btn)
         result += bibharvest_templates.tmpl_table_row_end()
         day_data = get_history_entries_for_day(oai_src_id, d_date, limit = day_limit)
         for item in day_data:
             identifier = bibharvest_templates.format_date(item.date_harvested) + \
                 bibharvest_templates.format_time(item.date_harvested) + "_" + item.oai_id
             result += build_history_row(item, ln, show_selection = True, show_oai_source = \
                 False, show_record_ids = True, identifier = identifier)
             if not identifiers.has_key(item.date_harvested.day):
                 identifiers[item.date_harvested.day] = []
             identifiers[item.date_harvested.day].append(identifier)
         if stats[day] > day_limit:
             result += bibharvest_templates.tmpl_history_table_output_day_details_cell(\
                 ln, d_date, oai_src_id)
     result += bibharvest_templates.tmpl_table_end()
     result += bibharvest_templates.tmpl_output_identifiers(identifiers)
     return result
 
 def build_history_table(data, ln = CFG_SITE_LANG, show_selection = True, \
     show_oai_source = False, show_record_ids = True):
 
     headers = build_history_table_header(show_selection = show_selection, \
         show_oai_source = show_oai_source, show_record_ids = show_record_ids)
     result = bibharvest_templates.tmpl_table_begin(headers)
 
     identifiers = {}
     for item in data:
         identifier = bibharvest_templates.format_date(item.date_harvested) + \
             bibharvest_templates.format_time(item.date_harvested) + "_" + item.oai_id
         result += build_history_row(item, ln, show_selection = show_selection,\
             show_oai_source = show_oai_source, show_record_ids = show_record_ids, \
             identifier = identifier)
         if show_selection:
             if not identifiers.has_key(item.date_harvested.day):
                 identifiers[item.date_harvested.day] = []
             identifiers[item.date_harvested.day].append(identifier)
     result += bibharvest_templates.tmpl_table_end()
     if show_selection:
         result += bibharvest_templates.tmpl_output_identifiers(identifiers)
     return result
 
 def perform_request_viewhistory(oai_src_id = None, ln = CFG_SITE_LANG, callback = \
     'yes', confirm = 0, month = None, year = None):
 
     """ Creates html to view the harvesting history """
     date = datetime.datetime.now()
     if year != None and month != None:
         year = int(year)
         month = int(month)
         date = datetime.datetime(year, month, 1)
     result = ""
     result += bibharvest_templates.tmpl_output_menu(ln, oai_src_id, guideurl)
     result += bibharvest_templates.tmpl_output_history_javascript_functions()
     result += bibharvest_templates.tmpl_output_month_selection_bar(oai_src_id, ln, \
         current_month = month, current_year = year)
     inner_text = build_month_history_table(oai_src_id, date, ln)
     inner_text += bibharvest_templates.tmpl_print_brs(ln, 1)
     inner_text = bibharvest_templates.tmpl_output_scrollable_frame(inner_text)
     inner_text += bibharvest_templates.tmpl_output_selection_bar()
     result +=  createhiddenform(action="/admin/bibharvest/bibharvestadmin.py/reharvest", \
         text = inner_text, button = "Reharvest selected records", oai_src_id = \
         oai_src_id, ln = ln)
     return result
 
 
 def perform_request_viewhistoryday(oai_src_id = None, ln = CFG_SITE_LANG, callback = 'yes',\
     confirm = 0, month = None, year = None, day = None, start = 0):
 
     page_length = 50
     result = ""
     result += bibharvest_templates.tmpl_output_menu(ln, oai_src_id, guideurl)
     considered_date = datetime.datetime.now()
     if year != None and month != None and day != None:
         considered_date = datetime.datetime(year, month, day)
     number_of_records = get_day_logs_size(oai_src_id, considered_date)
     return_to_month_link =  bibharvest_templates.tmpl_link_with_args(ln, \
         "/admin/bibharvest/bibharvestadmin.py/viewhistory", \
         "&lt;&lt; Return to the month view", [["ln", ln], ["oai_src_id",\
         str(oai_src_id)], ["year", str(considered_date.year)], \
         ["month", str(considered_date.month)]])
     next_page_link = ""
     if number_of_records > start + page_length:
         next_page_link = bibharvest_templates.tmpl_link_with_args(ln, \
           "/admin/bibharvest/bibharvestadmin.py/viewhistoryday", \
           "Next page &gt;&gt;", \
           [["ln", ln], ["oai_src_id", str(oai_src_id)], ["year", str(considered_date.year)],\
           ["month", str(considered_date.month)], ["day",  str(considered_date.day)], \
           ["start", str(start + page_length)]])
     prev_page_link = ""
     if start > 0:
         new_start = start - page_length
         if new_start < 0:
             new_start = 0
         prev_page_link = bibharvest_templates.tmpl_link_with_args(ln, \
           "/admin/bibharvest/bibharvestadmin.py/viewhistoryday", \
           "&lt;&lt; Previous page", \
           [["ln", ln], ["oai_src_id", str(oai_src_id)], ["year", str(considered_date.year)],\
           ["month", str(considered_date.month)], ["day",  str(considered_date.day)], \
           ["start", str(new_start)]])
     last_shown = start + page_length
     if last_shown > number_of_records:
         last_shown = number_of_records
     current_day_records = get_history_entries_for_day(oai_src_id, considered_date, limit =\
         page_length, start = start)
     current_range = "&nbsp;&nbsp;&nbsp;&nbsp;Viewing entries : " + str(start + 1) + "-" + \
         str(last_shown) + "&nbsp;&nbsp;&nbsp;&nbsp;"
     # Building the interface
     result += bibharvest_templates.tmpl_draw_titlebar(ln, "Viewing history of " + str(year)\
         + "-" + str(month) + "-" + str(day) , guideurl)
     result += prev_page_link + current_range + next_page_link + \
         bibharvest_templates.tmpl_print_brs(ln, 1)
     result += bibharvest_templates.tmpl_output_history_javascript_functions()
     inner_text = bibharvest_templates.tmpl_output_scrollable_frame(build_history_table(\
         current_day_records, ln=ln))
     inner_text += bibharvest_templates.tmpl_output_selection_bar()
     result +=  createhiddenform(action="/admin/bibharvest/bibharvestadmin.py/reharvest", \
         text = inner_text, button = "Reharvest selected records", oai_src_id = oai_src_id, ln = ln)
     result += return_to_month_link + bibharvest_templates.tmpl_print_brs(ln, 1)
     return result
 
 
 def perform_request_viewentryhistory(oai_id, ln, confirm, start):
     page_length = 50
     result = ""
     result += bibharvest_templates.tmpl_output_menu(ln, None, guideurl)
     considered_date = datetime.datetime.now()
 
     number_of_records = get_entry_logs_size(oai_id)
 
     next_page_link = ""
     if number_of_records > start + page_length:
         next_page_link = bibharvest_templates.tmpl_link_with_args(ln, \
           "/admin/bibharvest/bibharvestadmin.py/viewhistoryday", \
           "Next page &gt;&gt;", \
           [["ln", ln], ["oai_id", str(oai_id)], \
           ["start", str(start + page_length)]])
     prev_page_link = ""
     if start > 0:
         new_start = start - page_length
         if new_start < 0:
             new_start = 0
         prev_page_link = bibharvest_templates.tmpl_link_with_args(ln, \
           "/admin/bibharvest/bibharvestadmin.py/viewhistoryday", \
           "&lt;&lt; Previous page", \
           [["ln", ln], ["oai_id", str(oai_id)], \
           ["start", str(new_start)]])
     last_shown = start + page_length
     if last_shown > number_of_records:
         last_shown = number_of_records
     current_entry_records = get_entry_history(oai_id, limit = page_length, start = start)
     current_range = "&nbsp;&nbsp;&nbsp;&nbsp;Viewing entries : " + str(start + 1) \
         + "-" + str(last_shown) + "&nbsp;&nbsp;&nbsp;&nbsp;"
     # Building the interface
     result += bibharvest_templates.tmpl_draw_titlebar(ln, "Viewing history of " + \
         str(oai_id) , guideurl)
     result += prev_page_link + current_range + next_page_link + \
         bibharvest_templates.tmpl_print_brs(ln, 1)
     result += bibharvest_templates.tmpl_output_history_javascript_functions()
     inner_text = bibharvest_templates.tmpl_output_scrollable_frame(\
         build_history_table(current_entry_records, ln, show_selection = False, \
         show_oai_source = True, show_record_ids = False))
     result += inner_text
     result += bibharvest_templates.tmpl_print_brs(ln, 1)
     return result
 
 ############################################################
 ###  The functions allowing to preview the harvested XML ###
 ############################################################
 
 def harvest_record(record_id , oai_src_baseurl, oai_src_prefix):
     """
        Harvests given record and returns it's string as a result
     """
     command = CFG_BINDIR + "/bibharvest -vGetRecord -i" + record_id \
               + " -p" + oai_src_prefix + " " + oai_src_baseurl
     program_output = os.popen(command)
     result = program_output.read(-1)
     program_output.close()
     return result
 
 def convert_record(oai_src_config, record_to_convert):
     command = CFG_BINDIR + "/bibconvert -c " + oai_src_config
     (s_in,s_out,s_err) = os.popen3(command)
     s_in.write(record_to_convert)
     s_in.close()
     s_err.readlines()
     result = s_out.read(-1)
     s_err.close()
     s_out.close()
     return result
 
 def format_record(oai_src_bibfilter,  record_to_convert, treat_new = False):
     """
     Formats the record using given formatting program.
     Returns name of the file containing result,
     program output, program error output
     """
     (file_descriptor, file_name) = tempfile.mkstemp()
     f = os.fdopen(file_descriptor, "w")
     f.write(record_to_convert)
     f.close()
     command = oai_src_bibfilter
     if treat_new:
         command += " -n"
     command += " " + file_name
     (program_input, program_output, program_err) = os.popen3(command)
     program_input.close()
     out = program_output.read(-1)
     err = program_err.read(-1)
     program_output.close()
     program_err.close()
 
     if os.path.exists(file_name + ".insert.xml"):
         return (file_name + ".insert.xml", out, err)
     else:
         return (None, out, err)
 
 def harvest_postprocress_record(oai_src_id, record_id, treat_new = False):
     oai_src = get_oai_src(oai_src_id)
     oai_src_baseurl = oai_src[0][2]
     oai_src_prefix = oai_src[0][3]
     oai_src_config = oai_src[0][5]
     oai_src_post = oai_src[0][6]
     oai_src_sets = oai_src[0][7].split()
     oai_src_bibfilter = oai_src[0][8]
     result = harvest_record(record_id, oai_src_baseurl, oai_src_prefix)
     if result == None:
         return (False, "Error during harvesting")
     if oai_src_post.find("c") != -1:
         result = convert_record(oai_src_config, result)
         if result == None:
             return (False, "Error during converting")
     if oai_src_post.find("f") != -1:
         fres = format_record(oai_src_bibfilter, result, treat_new = treat_new)
         fname = fres[0]
         if fname != None:
             f = open(fname, "r")
             result = f.read(-1)
             f.close()
             os.remove(fname)
         else:
             return (False, "Error during formatting: " + fres[1] + "\n\n" + fres[2])
     return (True, result)
 
 def upload_record(record = None, uploader_paremeters = ["-r", "-i"], oai_source_id = None):
     if record == None:
         return
     (file_descriptor, file_name) = tempfile.mkstemp()
     f = os.fdopen(file_descriptor, "w")
     f.write(record)
     f.close()
     oaiharvestlib.call_bibupload(file_name, uploader_paremeters, oai_src_id = oai_source_id)
     #command = CFG_BINDIR + "/bibupload " + uploader_paremeters + " "
     #command += file_name
 
     #out = os.popen(command)
     #output_data = out.read(-1)
     #out.close()
 
 def perform_request_preview_original_xml(oai_src_id = None, record_id = None):
     oai_src = get_oai_src(oai_src_id)
     oai_src_baseurl = oai_src[0][2]
     oai_src_prefix = oai_src[0][3]
     oai_src_config = oai_src[0][5]
     oai_src_post = oai_src[0][6]
     oai_src_sets = oai_src[0][7].split()
     oai_src_bibfilter = oai_src[0][8]
     record = harvest_record(record_id, oai_src_baseurl, oai_src_prefix)
     return record
 
 def perform_request_preview_harvested_xml(oai_src_id = None, record_id = None):
     return harvest_postprocress_record(oai_src_id, record_id, treat_new = True)
 
 ############################################################
 ### Reharvesting of already existing records             ###
 ############################################################
 
 def perform_request_reharvest_records(oai_src_id = None, ln = CFG_SITE_LANG, confirm=0, record_ids = None):
     for record_id in record_ids:
         # 1) Run full harvesing process as in the preview scenarios
         transformed = harvest_postprocress_record(oai_src_id, record_id, treat_new = True)[1]
         upload_record(transformed, ["-i", "-r"], oai_src_id)
     result = bibharvest_templates.tmpl_output_menu(ln, oai_src_id, guideurl)
     result += bibharvest_templates.tmpl_print_info(ln, "Submitted for inserion into the database")
     return result
 
 def perform_request_harvest_record(oai_src_id = None, ln = CFG_SITE_LANG, confirm=0, record_id = None):
     """ Request for harvesting a new record """
     if oai_src_id is None:
         return "No OAI source ID selected."
     result = ""
     guideurl = "help/admin/bibharvest-admin-guide"
     result += bibharvest_templates.tmpl_output_menu(ln, oai_src_id, guideurl)
     result += bibharvest_templates.tmpl_draw_titlebar(ln = CFG_SITE_LANG, \
         title = "Record ID ( Recognized by the data source )", guideurl=guideurl)
     record_str = ""
     if record_id != None:
         record_str = str(record_id)
     form_text = bibharvest_templates.tmpl_admin_w200_text(ln = CFG_SITE_LANG, \
         title = "Record identifier", name = "record_id", value = record_str)
     result += createhiddenform(action="harvest",
                                text=form_text,
                                button="Harvest",
                                oai_src_id=oai_src_id,
                                ln=ln,
                                confirm=1)
     if record_id != None:
         # there was a harvest-request
         transformed = harvest_postprocress_record(oai_src_id, record_id)[1]
         upload_record(transformed, ["-i"], oai_src_id)
         result += bibharvest_templates.tmpl_print_info(ln, "Submitted for inserion into the database")
     return result
 
 
 ############################
 ### Holding pen support  ###
 ############################
 def build_holdingpen_table(data, ln):
     result = ""
     headers = ["OAI Record ID", "Insertion Date", "", ""]
     result += bibharvest_templates.tmpl_table_begin(headers)
     for record in data:
         oai_id = record[0]
         date_inserted = record[1]
         result += bibharvest_templates.tmpl_table_row_begin()
         result += bibharvest_templates.tmpl_table_output_cell(str(oai_id), cssclass = "oddtablecolumn")
         result += bibharvest_templates.tmpl_table_output_cell(str(date_inserted), cssclass = "pairtablecolumn")
         details_link = bibharvest_templates.tmpl_link_with_args(ln, \
                             "/admin/bibharvest/bibharvestadmin.py/viewhprecord", \
                             "Compare with original", [["ln", ln], \
                             ["oai_id", str(oai_id)], ["date_inserted", str(date_inserted)]])
         result += bibharvest_templates.tmpl_table_output_cell(details_link, cssclass = "oddtablecolumn")
         delete_hp_link = bibharvest_templates.tmpl_link_with_args(ln, \
                             "/admin/bibharvest/bibharvestadmin.py/delhprecord", \
                             "Delete from holding pen", [["ln", ln], \
                             ["oai_id", str(oai_id)], ["date_inserted", str(date_inserted)]])
         result += bibharvest_templates.tmpl_table_output_cell(delete_hp_link, cssclass = "pairtablecolumn")
         result += bibharvest_templates.tmpl_table_row_end()
     result += bibharvest_templates.tmpl_table_end()
     return result
 
 def perform_request_viewholdingpen(ln = CFG_SITE_LANG, confirm=0, start = 0, limit = -1):
     data = get_holdingpen_entries(start, limit)
     result = ""
     result += build_holdingpen_table(data, ln)
     return result
 
 def perform_request_viewhprecord(oai_id, date_inserted, ln = CFG_SITE_LANG, confirm=0):
     result = ""
     record_id = int(search_pattern( p = oai_id, f = CFG_BIBUPLOAD_EXTERNAL_OAIID_TAG, \
                                         m = 'e' ).tolist()[0])
-    db_rec = create_record(bibformat.format_record(record_id ,"xm"))
+    db_rec = get_record(record_id)
     db_MARC = create_marc_record(db_rec[0], record_id, {"text-marc": 1, "aleph-marc": 0})
     db_content = bibharvest_templates.tmpl_output_preformatted(db_MARC.encode("utf-8"))
     db_label = "Database version of record" + bibharvest_templates.tmpl_print_brs(ln, 1)
     hp_rec = create_record(get_holdingpen_entry(oai_id, date_inserted))
     hp_MARC = create_marc_record(hp_rec[0], record_id, {"text-marc": 1, "aleph-marc": 0})
     hp_content = bibharvest_templates.tmpl_output_preformatted(hp_MARC.encode("utf-8"))
     hp_label = bibharvest_templates.tmpl_print_brs(ln, 2) + "Holdingpen version of record"\
         + bibharvest_templates.tmpl_print_brs(ln, 1)
     submit_link = bibharvest_templates.tmpl_link_with_args(ln,
                       "admin/bibharvest/bibharvestadmin.py/accepthprecord",
                       "Accept Holding Pen version",
                       [["ln", str(ln)], ["oai_id", str(oai_id)], ["date_inserted",
                       str(date_inserted)]])
     delete_link = delete_hp_link = bibharvest_templates.tmpl_link_with_args(ln,
                      "admin/bibharvest/bibharvestadmin.py/delhprecord",
                      "Delete from holding pen", [["ln", ln],
                      ["oai_id", str(oai_id)], ["date_inserted", str(date_inserted)]])
     result = ""
     result += db_label
     result += db_content
     result += hp_label
     result += hp_content
     result += delete_link + " "
     result += submit_link
     return result
 
 def perform_request_delhprecord(oai_id, date_inserted, ln = CFG_SITE_LANG, confirm = 0):
     delete_holdingpen_entry(oai_id, date_inserted)
     return "Record deleted from the holding pen"
 
 def perform_request_accepthprecord(oai_id, date_inserted, ln = CFG_SITE_LANG, confirm = 0):
     record_xml = get_holdingpen_entry(oai_id, date_inserted)
     delete_holdingpen_entry(oai_id, date_inserted)
     upload_record(record_xml)
 
     return perform_request_viewholdingpen(ln = ln, confirm = confirm, start = 0, limit = -1)
 ##################################################################
 ### Here the functions to retrieve, modify, delete and add sources
 ##################################################################
 
 def get_oai_src(oai_src_id=''):
     """Returns a row parameters for a given id"""
     sql = "SELECT id,name,baseurl,metadataprefix,frequency,bibconvertcfgfile,postprocess,setspecs,bibfilterprogram FROM oaiHARVEST"
     try:
         if oai_src_id:
             sql += " WHERE id=%s" % oai_src_id
         sql += " ORDER BY id asc"
         res = run_sql(sql)
         return res
     except StandardError, e:
         return ""
 
 def modify_oai_src(oai_src_id, oai_src_name, oai_src_baseurl, oai_src_prefix, oai_src_frequency, oai_src_config, oai_src_post, oai_src_sets=[], oai_src_bibfilter=''):
     """Modifies a row's parameters"""
     try:
         res = run_sql("UPDATE oaiHARVEST SET name=%s WHERE id=%s", (oai_src_name, oai_src_id))
         res = run_sql("UPDATE oaiHARVEST SET baseurl=%s WHERE id=%s", (oai_src_baseurl, oai_src_id))
         res = run_sql("UPDATE oaiHARVEST SET metadataprefix=%s WHERE id=%s", (oai_src_prefix, oai_src_id))
         res = run_sql("UPDATE oaiHARVEST SET frequency=%s WHERE id=%s", (oai_src_frequency, oai_src_id))
         res = run_sql("UPDATE oaiHARVEST SET bibconvertcfgfile=%s WHERE id=%s", (oai_src_config, oai_src_id))
         res = run_sql("UPDATE oaiHARVEST SET postprocess=%s WHERE id=%s", (oai_src_post, oai_src_id))
         res = run_sql("UPDATE oaiHARVEST SET setspecs=%s WHERE id=%s", (' '.join(oai_src_sets), oai_src_id))
         res = run_sql("UPDATE oaiHARVEST SET bibfilterprogram=%s WHERE id=%s", (oai_src_bibfilter, oai_src_id))
         return (1, "")
     except StandardError, e:
         return (0, e)
 
 def add_oai_src(oai_src_name, oai_src_baseurl, oai_src_prefix, oai_src_frequency, oai_src_lastrun, oai_src_config, oai_src_post, oai_src_sets=[], oai_src_bibfilter=''):
     """Adds a new row to the database with the given parameters"""
     try:
         if oai_src_lastrun in [0, "0"]: lastrun_mode = 'NULL'
         else:
             lastrun_mode = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())
             # lastrun_mode = "'"+lastrun_mode+"'"
         run_sql("INSERT INTO oaiHARVEST (id, baseurl, metadataprefix, arguments, comment,  bibconvertcfgfile,  name,  lastrun,  frequency,  postprocess,  bibfilterprogram,  setspecs) VALUES (0, %s, %s, NULL, NULL, %s, %s, %s, %s, %s, %s, %s)", \
                 (oai_src_baseurl, oai_src_prefix, oai_src_config, oai_src_name, lastrun_mode, oai_src_frequency, oai_src_post, oai_src_bibfilter, " ".join(oai_src_sets)))
         return (1, "")
     except StandardError, e:
         return (0, e)
 
 def delete_oai_src(oai_src_id):
     """Deletes a row from the database according to its id"""
     try:
         res = run_sql("DELETE FROM oaiHARVEST WHERE id=%s" % oai_src_id)
         return (1, "")
     except StandardError, e:
         return (0, e)
 
 def get_tot_oai_src():
     """Returns number of rows in the database"""
     try:
         sql = "SELECT COUNT(*) FROM oaiHARVEST"
         res = run_sql(sql)
         return res[0][0]
     except StandardError, e:
         return ""
 
 def get_update_status():
     """Returns a table showing a list of all rows and their LastUpdate status"""
     try:
         sql = "SELECT name,lastrun FROM oaiHARVEST ORDER BY lastrun desc"
         res = run_sql(sql)
         return res
     except StandardError, e:
         return ""
 
 def get_next_schedule():
     """Returns the next scheduled oaiharvestrun tasks"""
     try:
         sql = "SELECT runtime,status FROM schTASK WHERE proc='oaiharvest' AND runtime > now() ORDER by runtime LIMIT 1"
         res = run_sql(sql)
         if len(res)>0:
             return res[0]
         else:
             return ("", "")
     except StandardError, e:
         return ("","")
 
 
 def validate(oai_src_baseurl):
     """This function validates a baseURL by opening its URL and 'greping' for the <OAI-PMH> and <Identify> tags:
 
     Codes:
      0 = okay
      1 = baseURL not valid
      2 = baseURL not found/not existing
      3 = tmp directoy is not writable
      4 = Unknown error
 
      Returns tuple (code, message)
      """
     try:
         url = oai_src_baseurl + "?verb=Identify"
         urllib.urlretrieve(url, tmppath)
 
         # First check if we have xml oai-pmh output
         grepOUT1 = os.popen('grep -iwc "<OAI-PMH" '+tmppath).read()
         if int(grepOUT1) == 0:
             # No.. we have an http error
             return (4, os.popen('cat '+tmppath).read())
 
         grepOUT2 = os.popen('grep -iwc "<identify>" '+tmppath).read()
         if int(grepOUT2) > 0:
             #print "Valid!"
             return (0, '')
         else:
             #print "Not valid!"
             return (1, '')
     except IOError, (errno, strerror):
         # Quick error handling for frequent error codes.
         if errno == 13:
             return (3, "Please check permission on %s and retry" % CFG_TMPDIR)
         elif errno == 2 or errno == 'socket error':
             return (2, "Could not connect with URL %s. Check URL or retry when server is available." % url)
         return (4, strerror)
     except StandardError, e:
         return (4, "An unknown error has occured")
     except InvalidURL, e:
         return (2, "Could not connect with URL %s. Check URL or retry when server is available." % url)
 
 def validatefile(oai_src_config):
     """This function checks whether the given path to text file exists or not
      0 = okay
      1 = file non existing
      """
 
     CFG_BIBCONVERT_XSL_PATH = "%s%sbibconvert%sconfig" % (CFG_ETCDIR,
                                                           os.sep,
                                                           os.sep)
     path_to_config = (CFG_BIBCONVERT_XSL_PATH + os.sep +
                       oai_src_config)
     if os.path.exists(path_to_config):
         # Try to read in config directory
         try:
             ftmp = open(path_to_config, 'r')
             cfgstr= ftmp.read()
             ftmp.close()
             if cfgstr!="":
                 #print "Valid!"
                 return 0
         except StandardError, e:
             pass
 
     # Try to read as complete path
     try:
         ftmp = open(oai_src_config, 'r')
         cfgstr= ftmp.read()
         ftmp.close()
         if cfgstr!="":
             #print "Valid!"
             return 0
         else:
             #print "Not valid!"
             return 1
     except StandardError, e:
         return 1
 
     return 1
 
 def findMetadataFormats(oai_src_baseurl):
     """This function finds the Metadata formats offered by a OAI repository by analysing the output of verb=ListMetadataFormats"""
     formats = []
     url = oai_src_baseurl + "?verb=ListMetadataFormats"
     try:
         urllib.urlretrieve(url, tmppath)
     except IOError:
         return formats
     ftmp = open(tmppath, 'r')
     xmlstr= ftmp.read()
     ftmp.close()
     chunks = xmlstr.split('<metadataPrefix>')
     count = 0 # first chunk is invalid
     for chunk in chunks:
         if count!=0:
             formats.append(chunk.split('</metadataPrefix>')[0])
         count = count + 1
     return formats
 
 def findSets(oai_src_baseurl):
     """This function finds the sets offered by a OAI repository
     by analysing the output of verb=ListSets.
     Returns list of tuples(SetSpec, SetName)"""
     url = oai_src_baseurl + "?verb=ListSets"
     sets = {}
     try:
         urllib.urlretrieve(url, tmppath)
     except IOError:
         return sets
     ftmp = open(tmppath, 'r')
     xmlstr= ftmp.read()
     ftmp.close()
     chunks = xmlstr.split('<set>')
     count = 0 # first chunk is invalid
     for chunk in chunks:
         if count!=0:
             chunks_set = chunk.split('<setSpec>')[1].split("</setSpec>")
             set_spec = chunks_set[0]
             #chunks_set[1].split('<setName>')
             check_set_2 = chunks_set[1].split("<setName>")
             set_name = None
             if len(check_set_2) > 1:
                 set_name = check_set_2[1].split("</setName>")[0]
             sets[set_spec] = [set_spec, set_name]
         count = count + 1
     return sets.values()
diff --git a/modules/bibharvest/lib/oaiarchive_engine.py b/modules/bibharvest/lib/oaiarchive_engine.py
index 193ea9fb6..05f754157 100644
--- a/modules/bibharvest/lib/oaiarchive_engine.py
+++ b/modules/bibharvest/lib/oaiarchive_engine.py
@@ -1,542 +1,542 @@
 ## This file is part of CDS Invenio.
 ## Copyright (C) 2002, 2003, 2004, 2005, 2006, 2007, 2008 CERN.
 ##
 ## CDS Invenio is free software; you can redistribute it and/or
 ## modify it under the terms of the GNU General Public License as
 ## published by the Free Software Foundation; either version 2 of the
 ## License, or (at your option) any later version.
 ##
 ## CDS Invenio is distributed in the hope that it will be useful, but
 ## WITHOUT ANY WARRANTY; without even the implied warranty of
 ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 ## General Public License for more details.
 ##
 ## You should have received a copy of the GNU General Public License
 ## along with CDS Invenio; if not, write to the Free Software Foundation, Inc.,
 ## 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA.
 
 """OAI Repository administration tool -
 
    Updates the metadata of the records to include OAI identifiers and
    OAI SetSpec according to the settings defined in OAI Repository
    admin interface
 
 """
 
 __revision__ = "$Id$"
 
 import os
 import sys
 import time
 import sets
 
 from stat import ST_SIZE
 from tempfile import mkstemp
 
 from invenio.config import \
      CFG_OAI_ID_FIELD, \
      CFG_OAI_ID_PREFIX, \
      CFG_OAI_SET_FIELD, \
      CFG_BINDIR, \
      CFG_SITE_NAME, \
      CFG_TMPDIR
 from invenio.search_engine import \
      perform_request_search, \
-     get_fieldvalues
+     get_fieldvalues, \
+     get_record
 from invenio.intbitset import intbitset as HitSet
 from invenio.dbquery import run_sql
 from invenio.bibtask import \
      task_get_option, \
      task_set_option, \
      write_message, \
      task_update_progress, \
      task_init, \
      task_sleep_now_if_required
 from invenio.bibrecord import \
      record_delete_subfield, \
-     field_xml_output, \
-     create_record
+     field_xml_output
 from invenio.bibformat import format_record
 
 DATAFIELD_SET_HEAD = \
                    "<datafield tag=\"%s\" ind1=\"%s\" ind2=\"%s\">" % \
                    (CFG_OAI_SET_FIELD[0:3],
                     CFG_OAI_SET_FIELD[3:4].replace('_', ' '),
                     CFG_OAI_SET_FIELD[4:5].replace('_', ' '))
 DATAFIELD_ID_HEAD  = \
                   "<datafield tag=\"%s\" ind1=\"%s\" ind2=\"%s\">" % \
                   (CFG_OAI_ID_FIELD[0:3],
                    CFG_OAI_ID_FIELD[3:4].replace('_', ' '),
                    CFG_OAI_ID_FIELD[4:5].replace('_', ' '))
 
 def get_set_definitions(set_spec):
     """
     Retrieve set definitions from oaiARCHIVE table.
 
     The set definitions are the search patterns that define the records
     which are in the set
     """
     set_definitions = []
 
     query = "select setName, setDefinition from oaiARCHIVE where setSpec=%s"
     res = run_sql(query, (set_spec, ))
 
     for (set_name, set_definition) in res:
         params = parse_set_definition(set_definition)
         params['setSpec'] = set_spec
         params['setName'] = set_name
         set_definitions.append(params)
     return set_definitions
 
 def parse_set_definition(set_definition):
     """
     Returns the parameters for the given set definition.
 
     The returned structure is a dictionary with keys being
     c, p1, f1, m1, p2, f2, m2, p3, f3, m3 and corresponding values
 
     @param set_definition a string as returned by the database for column 'setDefinition'
     @return a dictionary
     """
     params = {'c':'',
               'p1':'', 'f1':'', 'm1':'',
               'p2':'', 'f2':'', 'm2':'',
               'p3':'', 'f3':'', 'm3':'',
               'op1':'a', 'op2':'a'}
     definitions = set_definition.split(';')
     for definition in definitions:
         arguments = definition.split('=')
         if len(arguments) == 2:
             params[arguments[0]] = arguments[1]
     return params
 
 def all_set_specs():
     """
     Returns the list of (distinct) setSpecs defined in the settings.
     This also include the "empty" setSpec if any setting uses it.
 
     Note: there can be several times the same setSpec in the settings,
     given that a setSpec might be defined by several search
     queries. Here we return distinct values
     """
     query = "SELECT DISTINCT setSpec FROM oaiARCHIVE"
     res = run_sql(query)
 
     return [row[0] for row in res]
 
 def get_recids_for_set_spec(set_spec):
     """
     Returns the list (as HitSet) of recids belonging to 'set'
 
     Parameters:
 
       set_spec - *str* the set_spec for which we would like to get the
                  recids
     """
     recids = HitSet()
 
     for set_def in get_set_definitions(set_spec):
         new_recids = perform_request_search(c=[coll.strip() \
                                                for coll in set_def['c'].split(',')],
                                             p1=set_def['p1'],
                                             f1=set_def['f1'],
                                             m1=set_def['m1'],
                                             op1=set_def['op1'],
                                             p2=set_def['p2'],
                                             f2=set_def['f2'],
                                             m2=set_def['m2'],
                                             op2=set_def['op2'],
                                             p3=set_def['p3'],
                                             f3=set_def['f3'],
                                             m3=set_def['m3'],
                                             ap=0)
 
         recids = recids.union(HitSet(new_recids))
 
     return recids
 
 def get_set_name_for_set_spec(set_spec):
     """
     Returns the OAI setName of a setSpec.
 
     Note that the OAI Repository admin lets the user add several set
     definition with the same setSpec, and possibly with different
     setNames... -> Returns the first (non empty) one found.
 
     Parameters:
 
       set_spec - *str* the set_spec for which we would like to get the
                  setName
     """
     query = "select setName from oaiARCHIVE where setSpec=%s and setName!=''"
     res = run_sql(query, (set_spec, ))
     if len(res) > 0:
         return res[0][0]
     else:
         return ""
 
 def print_repository_status(write_message=write_message,
                             verbose=0):
     """
     Prints the repository status to the standard output.
 
     Parameters:
 
       write_message - *function* the function used to write the output
 
             verbose - *int* the verbosity of the output
                        - 0: print repository size
                        - 1: print quick status of each set (numbers
                          can be wrong if the repository is in some
                          inconsistent state, i.e. a record is in an
                          OAI setSpec but has not OAI ID)
                        - 2: print detailed status of repository, with
                          number of records that needs to be
                          synchronized according to the sets
                          definitions. Precise, but ~slow...
     """
     repository_size_s = "%d" % repository_size()
     repository_recids_after_update = HitSet()
 
     write_message(CFG_SITE_NAME)
     write_message(" OAI Repository Status")
 
     set_spec_max_length = 19 # How many max char do we display for
     set_name_max_length = 20 # setName and setSpec?
 
     if verbose == 0:
         # Just print repository size
         write_message("  Total(**)" + " " * 29 +
                       " " * (9 - len(repository_size_s)) + repository_size_s)
         return
     elif verbose == 1:
         # We display few information: show longer set name and spec
         set_spec_max_length = 30
         set_name_max_length = 30
 
     write_message("=" * 80)
     header = "  setSpec" + " " * (set_spec_max_length - 7) + \
              "  setName" + " " * (set_name_max_length - 5) + " Volume"
     if verbose > 1:
         header += " " * 5 + "After update(*):"
     write_message(header)
 
     if verbose > 1:
         write_message(" " * 57 + "Additions  Deletions")
 
     write_message("-" * 80)
 
     for set_spec in all_set_specs():
 
         if verbose <= 1:
             # Get the records that are in this set. This is an
             # incomplete check, as it can happen that some records are
             # in this set (according to the metadata) but have no OAI
             # ID (so they are not exported). This can happen if the
             # repository has some records coming from external
             # sources, or if it has never been synchronized with this
             # tool.
             current_recids = perform_request_search(c=CFG_SITE_NAME,
                                                     p1=set_spec,
                                                     f1=CFG_OAI_SET_FIELD,
                                                     m1="e", ap=0)
             nb_current_recids = len(current_recids)
         else:
             # Get the records that are *currently* exported for this
             # setSpec
             current_recids = perform_request_search(c=CFG_SITE_NAME,
                                                     p1=set_spec,
                                                     f1=CFG_OAI_SET_FIELD,
                                                     m1="e", ap=0, op1="a",
                                                     p2="oai:*",
                                                     f2=CFG_OAI_ID_FIELD,
                                                     m2="e")
             nb_current_recids = len(current_recids)
             # Get the records that *should* be in this set according to
             # the admin defined settings, and compute how many should be
             # added or removed
             should_recids = get_recids_for_set_spec(set_spec)
             repository_recids_after_update = repository_recids_after_update.union(should_recids)
 
             nb_add_recids = len(HitSet(should_recids).difference(HitSet(current_recids)))
             nb_remove_recids = len(HitSet(current_recids).difference(HitSet(should_recids)))
             nb_should_recids = len(should_recids)
             nb_recids_after_update = len(repository_recids_after_update)
 
 
         # Adapt setName and setSpec strings lengths
         set_spec_str = set_spec
         if len(set_spec_str) > set_spec_max_length :
             set_spec_str = "%s.." % set_spec_str[:set_spec_max_length]
         set_name_str = get_set_name_for_set_spec(set_spec)
         if len(set_name_str) > set_name_max_length :
             set_name_str = "%s.." % set_name_str[:set_name_max_length]
 
         row = "  " + set_spec_str + \
                " " * ((set_spec_max_length + 2) - len(set_spec_str)) + set_name_str + \
                " " * ((set_name_max_length + 2) - len(set_name_str)) + \
                " " * (7 - len(str(nb_current_recids))) + str(nb_current_recids)
         if verbose > 1:
             row += \
                 " " * max(9 - len(str(nb_add_recids)), 0) + '+' + str(nb_add_recids) + \
                 " " * max(7 - len(str(nb_remove_recids)), 0) + '-' + str(nb_remove_recids) + " = " +\
                 " " * max(7 - len(str(nb_should_recids)), 0) + str(nb_should_recids)
         write_message(row)
 
     write_message("=" * 80)
     footer = "  Total(**)" + " " * (set_spec_max_length + set_name_max_length - 7) + \
              " " * (9 - len(repository_size_s)) + repository_size_s
     if verbose > 1:
         footer += ' ' * (28 - len(str(nb_recids_after_update))) + str(nb_recids_after_update)
     write_message(footer)
 
     if verbose > 1:
         write_message('  *The "after update" columns show the repository after you run this tool.')
     else:
         write_message(' *"Volume" is indicative if repository is out of sync. Use --detailed-report.')
     write_message('**The "total" is not the sum of the above numbers, but the union of the records.')
 
 def repository_size():
     "Read repository size"
     return len(perform_request_search(p1="oai:*",
                                       f1=CFG_OAI_ID_FIELD,
                                       m1="e",
                                       ap=0))
 
 
 ### MAIN ###
 
 def oaiarchive_task():
     """Main business logic code of oai_archive"""
     no_upload = task_get_option("no_upload")
     report = task_get_option("report")
 
     if report > 1:
         print_repository_status(verbose=report)
         return True
 
     task_update_progress("Fetching records to process")
 
     # Build the list of records to be processed, that is, search for
     # the records that match one of the search queries defined in OAI
     # Repository admin interface.
     recids_for_set = {} # Remember exactly which record belongs to which set
     recids = HitSet() # "Flat" set of the recids_for_set values
     for set_spec in all_set_specs():
         task_sleep_now_if_required(can_stop_too=True)
         _recids = get_recids_for_set_spec(set_spec)
         recids_for_set[set_spec] = _recids
         recids = recids.union(_recids)
 
     # Also get the list of records that are currently exported through
     # OAI and that might need to be refreshed
     oai_recids = perform_request_search(c=CFG_SITE_NAME,
                                         p1='oai:%s:*' % CFG_OAI_ID_PREFIX,
                                         f1=CFG_OAI_ID_FIELD,
                                         m1="e", ap=0)
     recids = recids.union(HitSet(oai_recids))
 
     # Prepare to save results in a tmp file
     (fd, filename) = mkstemp(dir=CFG_TMPDIR,
                                   prefix='oaiarchive_' + \
                                   time.strftime("%Y%m%d_%H%M%S_",
                                                 time.localtime()))
     oai_out = os.fdopen(fd, "w")
 
     # Iterate over the recids
     i = 0
     for recid in recids:
         i += 1
         task_sleep_now_if_required(can_stop_too=True)
         task_update_progress("Done %s out of %s records." % \
                              (i, len(recids)))
 
         # Check if an OAI identifier is already in the record or
         # not.
         oai_id_entry = ""
         oai_ids = [_oai_id for _oai_id in \
                    get_fieldvalues(recid, CFG_OAI_ID_FIELD[0:2]) \
                    if _oai_id.strip() != '']
         if len(oai_ids) == 0:
             oai_id_entry = "<subfield code=\"%s\">oai:%s:%s</subfield>\n" % \
                          (CFG_OAI_ID_FIELD[5:6], CFG_OAI_ID_PREFIX, recid)
 
         # Get the sets to which this record already belongs according
         # to the metadata
         current_oai_sets = sets.Set(\
             [_oai_set for _oai_set in \
              get_fieldvalues(recid, CFG_OAI_SET_FIELD[0:2]) \
              if _oai_set.strip() != ''])
 
         # Get the sets that should be in this record according to
         # settings
         updated_oai_sets = sets.Set(\
             [_set for _set, _recids in recids_for_set.iteritems()
              if recid in _recids])
 
         # Ok, we have the old sets and the new sets. If they are equal
         # and oai ID does not need to be added, then great, nothing to
         # change . Otherwise apply the new sets.
         if current_oai_sets == updated_oai_sets and not oai_id_entry:
             continue # Jump to next recid
 
         # Generate the xml sets entry
         oai_set_entry = '\n'.join(["<subfield code=\"%s\">%s</subfield>" % \
                                  (CFG_OAI_SET_FIELD[5:6], _oai_set) \
                                  for _oai_set in updated_oai_sets]) + \
                                  "\n"
 
         # Also get all the datafields with tag and indicator matching
         # CFG_OAI_SET_FIELD[:5] and CFG_OAI_ID_FIELD[:5] but with
         # subcode != CFG_OAI_SET_FIELD[5:6] and subcode !=
         # CFG_OAI_SET_FIELD[5:6], so that we can preserve these values
         other_data = marcxml_filter_out_tags(recid, [CFG_OAI_SET_FIELD,
                                                      CFG_OAI_ID_FIELD])
 
         if oai_id_entry or oai_set_entry:
             if CFG_OAI_ID_FIELD[0:5] == CFG_OAI_SET_FIELD[0:5]:
                 # Put set and OAI ID in the same datafield
                 oai_out.write("<record>\n")
                 oai_out.write("<controlfield tag=\"001\">%s"
                     "</controlfield>\n" % recid)
                 oai_out.write(DATAFIELD_ID_HEAD)
                 oai_out.write("\n")
                 #if oai_id_entry:
                 oai_out.write(oai_id_entry)
                 #if oai_set_entry:
                 oai_out.write(oai_set_entry)
                 oai_out.write("</datafield>\n")
                 oai_out.write(other_data)
                 oai_out.write("</record>\n")
             else:
                 oai_out.write("<record>\n")
                 oai_out.write("<controlfield tag=\"001\">%s"
                     "</controlfield>\n" % recid)
                 if oai_id_entry:
                     oai_out.write(DATAFIELD_ID_HEAD)
                     oai_out.write("\n")
                     oai_out.write(oai_id_entry)
                     oai_out.write("</datafield>\n")
                 if oai_set_entry:
                     oai_out.write(DATAFIELD_SET_HEAD)
                     oai_out.write("\n")
                     oai_out.write(oai_set_entry)
                     oai_out.write("</datafield>\n")
                 oai_out.write(other_data)
                 oai_out.write("</record>\n")
 
     oai_out.close()
     write_message("Wrote to file %s" % filename)
 
     if not no_upload:
         task_sleep_now_if_required(can_stop_too=True)
         # Check if file is empty or not:
         len_file = os.stat(filename)[ST_SIZE]
         if len_file > 0:
             command = "%s/bibupload -c %s -u oairepository" % (CFG_BINDIR, filename)
             os.system(command)
         else:
             os.remove(filename)
 
     return True
 
 def marcxml_filter_out_tags(recid, fields):
     """
     Returns the fields of record 'recid' that share the same tag and
     indicators as those specified in 'fields', but for which the
     subfield is different. This is nice to emulate a bibupload -c that
     corrects only specific subfields.
 
     Parameters:
            recid - *int* the id of the record to process
 
           fields - *list(str)* the list of fields that we want to filter
                    out. Eg ['909COp', '909COo']
     """
     out = ''
 
-    record = create_record(format_record(recid, 'xm'), 2)[0]
+    record = get_record(recid)
 
     # Delete subfields that we want to replace
     for field in fields:
         record_delete_subfield(record,
                                tag=field[0:3],
                                ind1=field[3:4],
                                ind2=field[4:5],
                                subfield=field[5:6])
 
     # Select only datafields that share tag + indicators
     processed_tags_and_ind = []
     for field in fields:
         if not field[0:5] in processed_tags_and_ind:
             # Ensure that we do not process twice the same datafields
             processed_tags_and_ind.append(field[0:5])
             for datafield in record[field[0:3]]:
                 if datafield[1] == field[3:4] and \
                        datafield[2] == field[4:5]:
                     out += field_xml_output(datafield, field[0:3])
 
     return out
 
 #########################
 
 def main():
     """Main that construct all the bibtask."""
 
     # if there is any -r or --report option (or other similar options)
     # in the arguments, just print the status and exit (do not run
     # through BibSched...)
     mode = -1
     if '-d' in sys.argv[1:] or '--detailed-report' in sys.argv[1:]:
         mode = 2
     elif '-r' in sys.argv[1:] or '--report' in sys.argv[1:]:
         mode = 1
 
     if mode != -1:
         def write_message(*args):
             """Overload BibTask function so that it does not need to
             run in BibSched environment"""
             sys.stdout.write(args[0] + '\n')
         print_repository_status(write_message=write_message,
                                 verbose=mode)
         return
 
     task_init(authorization_action='runoaiarchive',
             authorization_msg="OAI Archive Task Submission",
             description="Examples:\n"
                 " Expose records according to sets defined in OAI Repository admin interface\n"
                 "   $ oaiarchive \n"
                 " Expose records according to sets defined in OAI Repository admin interface and update them every day\n"
                 "   $ oaiarchive -s24\n"
                 " Print OAI repository status\n"
                 "   $ oaiarchive -r\n"
                 " Print OAI repository detailed status\n"
                 "   $ oaiarchive -d\n\n",
             help_specific_usage="Options:\n"
                 " -r --report\t\tOAI repository status\n"
                 " -d --detailed-report\t\tOAI repository detailed status\n"
                 " -n --no-process\tDo no upload the modifications\n",
             version=__revision__,
             specific_params=("rdn", [
                 "report",
                 "detailed-report",
                 "no-process"]),
             task_submit_elaborate_specific_parameter_fnc=
                 task_submit_elaborate_specific_parameter,
             task_run_fnc=oaiarchive_task)
 
 def task_submit_elaborate_specific_parameter(key, value, opts, args):
     """Elaborate specific CLI parameters of oaiarchive"""
     if key in ("-r", "--report"):
         task_set_option("report", 1)
     if key in ("-d", "--detailed-report"):
         task_set_option("report", 2)
     elif key in ("-n", "--no-process"):
         task_set_option("no_upload", 1)
     else:
         return False
     return True
 
 ### okay, here we go:
 if __name__ == '__main__':
     main()
diff --git a/modules/bibupload/lib/bibupload.py b/modules/bibupload/lib/bibupload.py
index 5df7ebf57..2a09180f6 100644
--- a/modules/bibupload/lib/bibupload.py
+++ b/modules/bibupload/lib/bibupload.py
@@ -1,1908 +1,1914 @@
 # -*- coding: utf-8 -*-
 ##
 ## This file is part of CDS Invenio.
 ## Copyright (C) 2002, 2003, 2004, 2005, 2006, 2007, 2008 CERN.
 ##
 ## CDS Invenio is free software; you can redistribute it and/or
 ## modify it under the terms of the GNU General Public License as
 ## published by the Free Software Foundation; either version 2 of the
 ## License, or (at your option) any later version.
 ##
 ## CDS Invenio is distributed in the hope that it will be useful, but
 ## WITHOUT ANY WARRANTY; without even the implied warranty of
 ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 ## General Public License for more details.
 ##
 ## You should have received a copy of the GNU General Public License
 ## along with CDS Invenio; if not, write to the Free Software Foundation, Inc.,
 ## 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA.
 
 """
 BibUpload: Receive MARC XML file and update the appropriate database
 tables according to options.
 
     Usage: bibupload [options] input.xml
     Examples:
       $ bibupload -i input.xml
 
     Options:
      -a, --append            new fields are appended to the existing record
      -c, --correct           fields are replaced by the new ones in the
         existing record
      -f, --format            takes only the FMT fields into account.
         Does not update
      -i, --insert            insert the new record in the database
      -r, --replace           the existing record is entirely replaced
         by the new one
      -z, --reference         update references (update only 999 fields)
      -s, --stage=STAGE       stage to start from in the algorithm
         (0: always done; 1: FMT tags;
          2: FFT tags; 3: BibFmt; 4: Metadata update; 5: time update)
      -n, --notimechange      do not change record last modification date
         when updating
      -o, --holdingpen        Makes bibupload insert into holding pen instead
                              the normal database
     Scheduling options:
      -u, --user=USER         user name to store task, password needed
 
     General options:
      -h, --help              print this help and exit
      -v, --verbose=LEVEL     verbose level (from 0 to 9, default 1)
      -V  --version           print the script version
 """
 
 __revision__ = "$Id$"
 
 import os
 import re
 import sys
 import time
 from zlib import compress
 import urllib2
 import socket
 import marshal
 
 from invenio.config import CFG_OAI_ID_FIELD, CFG_SITE_URL, \
      CFG_SITE_SECURE_URL, \
      CFG_BIBUPLOAD_REFERENCE_TAG, \
      CFG_BIBUPLOAD_EXTERNAL_SYSNO_TAG, \
      CFG_BIBUPLOAD_EXTERNAL_OAIID_TAG, \
      CFG_BIBUPLOAD_EXTERNAL_OAIID_PROVENANCE_TAG, \
      CFG_BIBUPLOAD_STRONG_TAGS, \
-     CFG_BIBUPLOAD_CONTROLLED_PROVENANCE_TAGS
+     CFG_BIBUPLOAD_CONTROLLED_PROVENANCE_TAGS, \
+     CFG_BIBUPLOAD_SERIALIZE_RECORD_STRUCTURE
 
 from invenio.bibupload_config import CFG_BIBUPLOAD_CONTROLFIELD_TAGS, \
     CFG_BIBUPLOAD_SPECIAL_TAGS
 from invenio.dbquery import run_sql, \
                             Error
 from invenio.bibrecord import create_records, \
-                              create_record, \
                               record_add_field, \
                               record_delete_field, \
                               record_xml_output, \
                               record_get_field_instances, \
                               record_get_field_values, \
                               field_get_subfield_values, \
                               record_extract_oai_id
+from invenio.search_engine import get_record
 from invenio.dateutils import convert_datestruct_to_datetext
 from invenio.errorlib import register_exception
 from invenio.bibformat import format_record
 from invenio.intbitset import intbitset
 from invenio.config import CFG_WEBSUBMIT_FILEDIR
 from invenio.bibtask import task_init, write_message, \
     task_set_option, task_get_option, task_get_task_param, task_update_status, \
     task_update_progress, task_sleep_now_if_required, fix_argv_paths
 from invenio.bibdocfile import BibRecDocs, file_strip_ext, normalize_format, \
     get_docname_from_url, get_format_from_url, check_valid_url, download_url, \
     KEEP_OLD_VALUE, decompose_bibdocfile_url, bibdocfile_url_p, \
     InvenioWebSubmitFileError
 
 #Statistic variables
 stat = {}
 stat['nb_records_to_upload'] = 0
 stat['nb_records_updated'] = 0
 stat['nb_records_inserted'] = 0
 stat['nb_errors'] = 0
 stat['nb_holdingpen'] = 0
 stat['exectime'] = time.localtime()
 
 ## Let's set a reasonable timeout for URL request (e.g. FFT)
 socket.setdefaulttimeout(40)
 
 _re_find_001 = re.compile('<controlfield\\s+tag=("001"|\'001\')\\s*>\\s*(\\d*)\\s*</controlfield>', re.S)
 def bibupload_pending_recids():
     """This function embed a bit of A.I. and is more a hack than an elegant
     algorithm. It should be updated in case bibupload/bibsched are modified
     in incompatible ways.
     This function return the intbitset of all the records that are being
     (or are scheduled to be) touched by other bibuploads.
     """
     options = run_sql("""SELECT arguments FROM schTASK WHERE status<>'DONE' AND
         proc='bibupload' AND (status='RUNNING' OR status='CONTINUING' OR
         status='WAITING' OR status='SCHEDULED' OR status='ABOUT TO STOP' OR
         status='ABOUT TO SLEEP')""")
     ret = intbitset()
     xmls = []
     if options:
         for arguments in options:
             arguments = marshal.loads(arguments[0])
             for argument in arguments[1:]:
                 if argument.startswith('/'):
                     # XMLs files are recognizable because they're absolute
                     # files...
                     xmls.append(argument)
     for xmlfile in xmls:
         # Let's grep for the 001
         try:
             xml = open(xmlfile).read()
             ret += [int(group[1]) for group in _re_find_001.findall(xml)]
         except:
             continue
     return ret
 
 ### bibupload engine functions:
 def bibupload(record, opt_tag=None, opt_mode=None,
         opt_stage_to_start_from=1, opt_notimechange=0, oai_rec_id = ""):
     """Main function: process a record and fit it in the tables
     bibfmt, bibrec, bibrec_bibxxx, bibxxx with proper record
     metadata.
 
     Return (error_code, recID) of the processed record.
     """
     assert(opt_mode in ('insert', 'replace', 'replace_or_insert', 'reference',
         'correct', 'append', 'format', 'holdingpen'))
     error = None
     # If there are special tags to proceed check if it exists in the record
     if opt_tag is not None and not(record.has_key(opt_tag)):
         write_message("    Failed: Tag not found, enter a valid tag to update.",
                     verbose=1, stream=sys.stderr)
         return (1, -1)
 
     # Extraction of the Record Id from 001, SYSNO or OAIID tags:
     rec_id = retrieve_rec_id(record, opt_mode)
     if rec_id == -1:
         return (1, -1)
     elif rec_id > 0:
         write_message("   -Retrieve record ID (found %s): DONE." % rec_id, verbose=2)
         if not record.has_key('001'):
             # Found record ID by means of SYSNO or OAIID, and the
             # input MARCXML buffer does not have this 001 tag, so we
             # should add it now:
             error = record_add_field(record, '001', '', '', rec_id, [], 0)
             if error is None:
                 write_message("   Failed: " \
                                             "Error during adding the 001 controlfield "  \
                                             "to the record", verbose=1, stream=sys.stderr)
                 return (1, int(rec_id))
             else:
                 error = None
             write_message("   -Added tag 001: DONE.", verbose=2)
     write_message("   -Check if the xml marc file is already in the database: DONE" , verbose=2)
 
     # Reference mode check if there are reference tag
     if opt_mode == 'reference':
         error = extract_tag_from_record(record, CFG_BIBUPLOAD_REFERENCE_TAG)
         if error is None:
             write_message("   Failed: No reference tags has been found...",
                         verbose=1, stream=sys.stderr)
             return (1, -1)
         else:
             error = None
             write_message("   -Check if reference tags exist: DONE", verbose=2)
 
     record_deleted_p = False
     if opt_mode == 'insert' or \
     (opt_mode == 'replace_or_insert' and rec_id is None):
         insert_mode_p = True
         # Insert the record into the bibrec databases to have a recordId
         rec_id = create_new_record()
         write_message("   -Creation of a new record id (%d): DONE" % rec_id, verbose=2)
 
         # we add the record Id control field to the record
         error = record_add_field(record, '001', '', '', rec_id, [], 0)
         if error is None:
             write_message("   Failed: " \
                                         "Error during adding the 001 controlfield "  \
                                         "to the record", verbose=1, stream=sys.stderr)
             return (1, int(rec_id))
         else:
             error = None
 
     elif opt_mode != 'insert' and opt_mode != 'format' and \
             opt_stage_to_start_from != 5:
         insert_mode_p = False
         # Update Mode
         # Retrieve the old record to update
-        rec_old = create_record(format_record(int(rec_id), 'xm'), 2)[0]
+        rec_old = get_record(rec_id)
         if rec_old is None:
             write_message("   Failed during the creation of the old record!",
                         verbose=1, stream=sys.stderr)
             return (1, int(rec_id))
         else:
             write_message("   -Retrieve the old record to update: DONE", verbose=2)
 
         # In Replace mode, take over old strong tags if applicable:
         if opt_mode == 'replace' or \
             opt_mode == 'replace_or_insert':
             copy_strong_tags_from_old_record(record, rec_old)
 
         # Delete tags to correct in the record
         if opt_mode == 'correct' or opt_mode == 'reference':
             delete_tags_to_correct(record, rec_old, opt_tag)
             write_message("   -Delete the old tags to correct in the old record: DONE",
                         verbose=2)
 
         # Append new tag to the old record and update the new record with the old_record modified
         if opt_mode == 'append' or opt_mode == 'correct' or \
             opt_mode == 'reference':
             record = append_new_tag_to_old_record(record, rec_old,
                 opt_tag, opt_mode)
             write_message("   -Append new tags to the old record: DONE", verbose=2)
 
         # now we clear all the rows from bibrec_bibxxx from the old
         # record (they will be populated later (if needed) during
         # stage 4 below):
         delete_bibrec_bibxxx(rec_old, rec_id)
         record_deleted_p = True
         write_message("   -Clean bibrec_bibxxx: DONE", verbose=2)
     write_message("   -Stage COMPLETED", verbose=2)
 
     try:
         # Have a look if we have FMT tags
         write_message("Stage 1: Start (Insert of FMT tags if exist).", verbose=2)
         if opt_stage_to_start_from <= 1 and \
             extract_tag_from_record(record, 'FMT') is not None:
             record = insert_fmt_tags(record, rec_id, opt_mode)
             if record is None:
                 write_message("   Stage 1 failed: Error while inserting FMT tags",
                             verbose=1, stream=sys.stderr)
                 return (1, int(rec_id))
             elif record == 0:
                 # Mode format finished
                 stat['nb_records_updated'] += 1
                 return (0, int(rec_id))
             write_message("   -Stage COMPLETED", verbose=2)
         else:
             write_message("   -Stage NOT NEEDED", verbose=2)
 
         # Have a look if we have FFT tags
         write_message("Stage 2: Start (Process FFT tags if exist).", verbose=2)
         record_had_FFT = False
         if opt_stage_to_start_from <= 2 and \
             extract_tag_from_record(record, 'FFT') is not None:
             record_had_FFT = True
             if not writing_rights_p():
                 write_message("   Stage 2 failed: Error no rights to write fulltext files",
                     verbose=1, stream=sys.stderr)
                 task_update_status("ERROR")
                 sys.exit(1)
             try:
                 record = elaborate_fft_tags(record, rec_id, opt_mode)
             except Exception, e:
                 register_exception()
                 write_message("   Stage 2 failed: Error while elaborating FFT tags: %s" % e,
                     verbose=1, stream=sys.stderr)
                 return (1, int(rec_id))
             if record is None:
                 write_message("   Stage 2 failed: Error while elaborating FFT tags",
                             verbose=1, stream=sys.stderr)
                 return (1, int(rec_id))
             write_message("   -Stage COMPLETED", verbose=2)
         else:
             write_message("   -Stage NOT NEEDED", verbose=2)
 
         # Have a look if we have FFT tags
         write_message("Stage 2B: Start (Synchronize 8564 tags).", verbose=2)
         has_bibdocs = run_sql("SELECT count(id_bibdoc) FROM bibrec_bibdoc JOIN bibdoc ON id_bibdoc=id WHERE id_bibrec=%s AND status<>'DELETED'", (rec_id, ))[0][0] > 0
         if opt_stage_to_start_from <= 2 and (has_bibdocs or record_had_FFT or extract_tag_from_record(record, '856') is not None):
             try:
                 record = synchronize_8564(rec_id, record, record_had_FFT)
             except Exception, e:
                 register_exception()
                 write_message("   Stage 2B failed: Error while synchronizing 8564 tags: %s" % e,
                     verbose=1, stream=sys.stderr)
                 return (1, int(rec_id))
             if record is None:
                 write_message("   Stage 2B failed: Error while synchronizing 8564 tags",
                             verbose=1, stream=sys.stderr)
                 return (1, int(rec_id))
             write_message("   -Stage COMPLETED", verbose=2)
         else:
             write_message("   -Stage NOT NEEDED", verbose=2)
 
         # Update of the BibFmt
         write_message("Stage 3: Start (Update bibfmt).", verbose=2)
         if opt_stage_to_start_from <= 3:
             # format the single record as xml
             rec_xml_new = record_xml_output(record)
             # Update bibfmt with the format xm of this record
             if opt_mode != 'format':
                 error = update_bibfmt_format(rec_id, rec_xml_new, 'xm')
-            if error == 1:
-                write_message("   Failed: error during update_bibfmt_format",
-                            verbose=1, stream=sys.stderr)
-                return (1, int(rec_id))
-            # archive MARCXML format of this record for version history purposes:
-            if opt_mode != 'format':
+                if error == 1:
+                    write_message("   Failed: error during update_bibfmt_format 'xm'",
+                                verbose=1, stream=sys.stderr)
+                    return (1, int(rec_id))
+                if CFG_BIBUPLOAD_SERIALIZE_RECORD_STRUCTURE:
+                    error = update_bibfmt_format(rec_id, marshal.dumps(record), 'recstruct')
+                    if error == 1:
+                        write_message("   Failed: error during update_bibfmt_format 'recstruct'",
+                                    verbose=1, stream=sys.stderr)
+                        return (1, int(rec_id))
+                # archive MARCXML format of this record for version history purposes:
                 error = archive_marcxml_for_history(rec_id)
                 if error == 1:
                     write_message("   Failed to archive MARCXML for history",
                                 verbose=1, stream=sys.stderr)
                     return (1, int(rec_id))
                 else:
                     write_message("   -Archived MARCXML for history : DONE", verbose=2)
             write_message("   -Stage COMPLETED", verbose=2)
 
         # Update the database MetaData
         write_message("Stage 4: Start (Update the database with the metadata).",
                     verbose=2)
         if opt_stage_to_start_from <= 4:
             if opt_mode == 'insert' or \
             opt_mode == 'replace' or \
             opt_mode == 'replace_or_insert' or \
             opt_mode == 'append' or \
             opt_mode == 'correct' or \
             opt_mode == 'reference':
                 update_database_with_metadata(record, rec_id, oai_rec_id)
                 record_deleted_p = False
             else:
                 write_message("   -Stage NOT NEEDED in mode %s" % opt_mode,
                             verbose=2)
             write_message("   -Stage COMPLETED", verbose=2)
         else:
             write_message("   -Stage NOT NEEDED", verbose=2)
 
         # Finally we update the bibrec table with the current date
         write_message("Stage 5: Start (Update bibrec table with current date).",
                     verbose=2)
         if opt_stage_to_start_from <= 5 and \
         opt_notimechange == 0 and \
         not insert_mode_p:
             now = convert_datestruct_to_datetext(time.localtime())
             write_message("   -Retrieved current localtime: DONE", verbose=2)
             update_bibrec_modif_date(now, rec_id)
             write_message("   -Stage COMPLETED", verbose=2)
         else:
             write_message("   -Stage NOT NEEDED", verbose=2)
 
         # Increase statistics
         if insert_mode_p:
             stat['nb_records_inserted'] += 1
         else:
             stat['nb_records_updated'] += 1
 
         # Upload of this record finish
         write_message("Record "+str(rec_id)+" DONE", verbose=1)
         return (0, int(rec_id))
     finally:
         if record_deleted_p:
             ## BibUpload has failed living the record deleted. We should
             ## back the original record then.
             update_database_with_metadata(rec_old, rec_id, oai_rec_id)
             write_message("   Restored original record", verbose=1, stream=sys.stderr)
 
 def insert_record_into_holding_pen(record, oai_id):
     query = "INSERT INTO oaiHOLDINGPEN (oai_id, date_inserted, record_XML) VALUES (%s, NOW(), %s)"
     xml_record = record_xml_output(record)
     run_sql(query, (oai_id, xml_record))
     # record_id is logged as 0! ( We are not inserting into the main database)
     log_record_uploading(oai_id, task_get_task_param('task_id', 0), 0, 'H')
     stat['nb_holdingpen'] += 1
 
 def print_out_bibupload_statistics():
     """Print the statistics of the process"""
     out = "Task stats: %(nb_input)d input records, %(nb_updated)d updated, " \
           "%(nb_inserted)d inserted, %(nb_errors)d errors, %(nb_holdingpen)d inserted to holding pen.  " \
           "Time %(nb_sec).2f sec." % { \
               'nb_input': stat['nb_records_to_upload'],
               'nb_updated': stat['nb_records_updated'],
               'nb_inserted': stat['nb_records_inserted'],
               'nb_errors': stat['nb_errors'],
               'nb_holdingpen': stat['nb_holdingpen'],
               'nb_sec': time.time() - time.mktime(stat['exectime']) }
     write_message(out)
 
 def open_marc_file(path):
     """Open a file and return the data"""
     try:
         # open the file containing the marc document
         marc_file = open(path,'r')
         marc = marc_file.read()
         marc_file.close()
     except IOError, erro:
         write_message("Error: %s" % erro, verbose=1, stream=sys.stderr)
         write_message("Exiting.", sys.stderr)
         task_update_status("ERROR")
         sys.exit(1)
     return marc
 
 def xml_marc_to_records(xml_marc):
     """create the records"""
     # Creation of the records from the xml Marc in argument
     recs = create_records(xml_marc, 1, 1)
     if recs == []:
         write_message("Error: Cannot parse MARCXML file.", verbose=1, stream=sys.stderr)
         write_message("Exiting.", sys.stderr)
         task_update_status("ERROR")
         sys.exit(1)
     elif recs[0][0] is None:
         write_message("Error: MARCXML file has wrong format: %s" % recs,
             verbose=1, stream=sys.stderr)
         write_message("Exiting.", sys.stderr)
         task_update_status("ERROR")
         sys.exit(1)
     else:
         recs = map((lambda x:x[0]), recs)
         return recs
 
 def find_record_format(rec_id, format):
     """Look whether record REC_ID is formatted in FORMAT,
        i.e. whether FORMAT exists in the bibfmt table for this record.
 
        Return the number of times it is formatted: 0 if not, 1 if yes,
        2 if found more than once (should never occur).
     """
     out = 0
     query = """SELECT COUNT(id) FROM bibfmt WHERE id_bibrec=%s AND format=%s"""
     params = (rec_id, format)
     res = []
     try:
         res = run_sql(query, params)
         out = res[0][0]
     except Error, error:
         write_message("   Error during find_record_format() : %s " % error, verbose=1, stream=sys.stderr)
     return out
 
 def find_record_from_recid(rec_id):
     """
     Try to find record in the database from the REC_ID number.
     Return record ID if found, None otherwise.
     """
     try:
         res = run_sql("SELECT id FROM bibrec WHERE id=%s",
                       (rec_id,))
     except Error, error:
         write_message("   Error during find_record_bibrec() : %s "
             % error, verbose=1, stream=sys.stderr)
     if res:
         return res[0][0]
     else:
         return None
 
 def find_record_from_sysno(sysno):
     """
     Try to find record in the database from the external SYSNO number.
     Return record ID if found, None otherwise.
     """
     bibxxx = 'bib'+CFG_BIBUPLOAD_EXTERNAL_SYSNO_TAG[0:2]+'x'
     bibrec_bibxxx = 'bibrec_' + bibxxx
     try:
         res = run_sql("""SELECT bb.id_bibrec FROM %(bibrec_bibxxx)s AS bb,
             %(bibxxx)s AS b WHERE b.tag=%%s AND b.value=%%s
             AND bb.id_bibxxx=b.id""" % \
                       {'bibxxx': bibxxx,
                        'bibrec_bibxxx': bibrec_bibxxx},
                       (CFG_BIBUPLOAD_EXTERNAL_SYSNO_TAG, sysno,))
     except Error, error:
         write_message("   Error during find_record_from_sysno(): %s " % error,
                       verbose=1, stream=sys.stderr)
     if res:
         return res[0][0]
     else:
         return None
 
 def find_records_from_extoaiid(extoaiid, extoaisrc=None):
     """
     Try to find records in the database from the external EXTOAIID number.
     Return list of record ID if found, None otherwise.
     """
     from invenio.search_engine import print_record
     assert(CFG_BIBUPLOAD_EXTERNAL_OAIID_TAG[:5] == CFG_BIBUPLOAD_EXTERNAL_OAIID_PROVENANCE_TAG[:5])
     bibxxx = 'bib'+CFG_BIBUPLOAD_EXTERNAL_OAIID_TAG[0:2]+'x'
     bibrec_bibxxx = 'bibrec_' + bibxxx
     try:
         write_message('   Looking for extoaiid="%s" with extoaisrc="%s"' % (extoaiid, extoaisrc), verbose=9)
         id_bibrecs = intbitset(run_sql("""SELECT bb.id_bibrec FROM %(bibrec_bibxxx)s AS bb,
             %(bibxxx)s AS b WHERE b.tag=%%s AND b.value=%%s
             AND bb.id_bibxxx=b.id""" % \
                       {'bibxxx': bibxxx,
                        'bibrec_bibxxx': bibrec_bibxxx},
                       (CFG_BIBUPLOAD_EXTERNAL_OAIID_TAG, extoaiid,)))
         write_message('   Partially found %s for extoaiid="%s"' % (id_bibrecs, extoaiid), verbose=9)
         ret = intbitset()
         for id_bibrec in id_bibrecs:
-            record = create_record(print_record(id_bibrec, 'xm'))[0]
+            record = get_record(id_bibrec)
             instances = record_get_field_instances(record, CFG_BIBUPLOAD_EXTERNAL_OAIID_TAG[0:3], CFG_BIBUPLOAD_EXTERNAL_OAIID_TAG[3], CFG_BIBUPLOAD_EXTERNAL_OAIID_TAG[4])
             write_message('   recid %s -> instances "%s"' % (id_bibrec, instances), verbose=9)
             for instance in instances:
                 provenance = field_get_subfield_values(instance, CFG_BIBUPLOAD_EXTERNAL_OAIID_PROVENANCE_TAG[5])
                 write_message('   recid %s -> provenance "%s"' % (id_bibrec, provenance), verbose=9)
                 provenance = provenance and provenance[0] or None
                 if provenance is None:
                     if extoaisrc is None:
                         write_message('Found recid %s for extoaiid="%s"' % (id_bibrec, extoaiid), verbose=9)
                         ret.add(id_bibrec)
                         break
                     else:
                         raise Error('Found recid %s for extoaiid="%s" that doesn\'t specify any provenance, while input record does.' % (id_bibrec, extoaiid))
                 else:
                     if extoaiid is None:
                         raise Error('Found recid %s for extoaiid="%s" that specify as provenance "%s", while input record does not specify any provenance.' % (id_bibrec, extoaiid, provenance))
                     elif provenance == extoaisrc:
                         write_message('Found recid %s for extoaiid="%s" with provenance="%s"' % (id_bibrec, extoaiid, extoaisrc), verbose=9)
                         ret.add(id_bibrec)
                         break
         return ret
     except Error, error:
         write_message("   Error during find_records_from_extoaiid(): %s "
             % error, verbose=1, stream=sys.stderr)
         raise
 
 def find_record_from_oaiid(oaiid):
     """
     Try to find record in the database from the OAI ID number and OAI SRC.
     Return record ID if found, None otherwise.
     """
     bibxxx = 'bib'+CFG_OAI_ID_FIELD[0:2]+'x'
     bibrec_bibxxx = 'bibrec_' + bibxxx
     try:
         res = run_sql("""SELECT bb.id_bibrec FROM %(bibrec_bibxxx)s AS bb,
             %(bibxxx)s AS b WHERE b.tag=%%s AND b.value=%%s
             AND bb.id_bibxxx=b.id""" % \
                       {'bibxxx': bibxxx,
                        'bibrec_bibxxx': bibrec_bibxxx},
                       (CFG_OAI_ID_FIELD, oaiid,))
     except Error, error:
         write_message("   Error during find_record_from_oaiid(): %s " % error,
                       verbose=1, stream=sys.stderr)
     if res:
         return res[0][0]
     else:
         return None
 
 def extract_tag_from_record(record, tag_number):
     """ Extract the tag_number for record."""
     # first step verify if the record is not already in the database
     if record:
         return record.get(tag_number, None)
     return None
 
 def retrieve_rec_id(record, opt_mode):
     """Retrieve the record Id from a record by using tag 001 or SYSNO or OAI ID
     tag. opt_mod is the desired mode."""
 
     rec_id = None
 
     # 1st step: we look for the tag 001
     tag_001 = extract_tag_from_record(record, '001')
     if tag_001 is not None:
         # We extract the record ID from the tag
         rec_id = tag_001[0][3]
         # if we are in insert mode => error
         if opt_mode == 'insert':
             write_message("   Failed : Error tag 001 found in the xml" \
                           " submitted, you should use the option replace," \
                           " correct or append to replace an existing" \
                           " record. (-h for help)",
                           verbose=1, stream=sys.stderr)
             return -1
         else:
             # we found the rec id and we are not in insert mode => continue
             # we try to match rec_id against the database:
             if find_record_from_recid(rec_id) is not None:
                 # okay, 001 corresponds to some known record
                 return int(rec_id)
             else:
                 # The record doesn't exist yet. We shall have try to check
                 # the SYSNO or OAI id later.
                 write_message("   -Tag 001 value not found in database.",
                               verbose=9)
                 rec_id = None
     else:
         write_message("   -Tag 001 not found in the xml marc file.", verbose=9)
 
     if rec_id is None:
         # 2nd step we look for the SYSNO
         sysnos = record_get_field_values(record,
             CFG_BIBUPLOAD_EXTERNAL_SYSNO_TAG[0:3],
             CFG_BIBUPLOAD_EXTERNAL_SYSNO_TAG[3:4] != "_" and \
             CFG_BIBUPLOAD_EXTERNAL_SYSNO_TAG[3:4] or "",
             CFG_BIBUPLOAD_EXTERNAL_SYSNO_TAG[4:5] != "_" and \
             CFG_BIBUPLOAD_EXTERNAL_SYSNO_TAG[4:5] or "",
             CFG_BIBUPLOAD_EXTERNAL_SYSNO_TAG[5:6])
         if sysnos:
             sysno = sysnos[0] # there should be only one external SYSNO
             write_message("   -Checking if SYSNO " + sysno + \
                           " exists in the database", verbose=9)
             # try to find the corresponding rec id from the database
             rec_id = find_record_from_sysno(sysno)
             if rec_id is not None:
                 # rec_id found
                 pass
             else:
                 # The record doesn't exist yet. We will try to check
                 # external and internal OAI ids later.
                 write_message("   -Tag SYSNO value not found in database.",
                               verbose=9)
                 rec_id = None
         else:
             write_message("   -Tag SYSNO not found in the xml marc file.",
                 verbose=9)
 
     if rec_id is None:
         # 2nd step we look for the external OAIID
         extoai_fields = record_get_field_instances(record,
             CFG_BIBUPLOAD_EXTERNAL_OAIID_TAG[0:3],
             CFG_BIBUPLOAD_EXTERNAL_OAIID_TAG[3:4] != "_" and \
             CFG_BIBUPLOAD_EXTERNAL_OAIID_TAG[3:4] or "",
             CFG_BIBUPLOAD_EXTERNAL_OAIID_TAG[4:5] != "_" and \
             CFG_BIBUPLOAD_EXTERNAL_OAIID_TAG[4:5] or "")
         if extoai_fields:
             for field in extoai_fields:
                 extoaiid = field_get_subfield_values(field, CFG_BIBUPLOAD_EXTERNAL_OAIID_TAG[5:6])
                 extoaisrc = field_get_subfield_values(field, CFG_BIBUPLOAD_EXTERNAL_OAIID_PROVENANCE_TAG[5:6])
                 if extoaiid:
                     extoaiid = extoaiid[0]
                     if extoaisrc:
                         extoaisrc = extoaisrc[0]
                     else:
                         extoaisrc = None
                     write_message("   -Checking if EXTOAIID %s (%s) exists in the database" % (extoaiid, extoaisrc), verbose=9)
                     # try to find the corresponding rec id from the database
                     try:
                         rec_ids = find_records_from_extoaiid(extoaiid, extoaisrc)
                     except Error, e:
                         write_message(e, verbose=1, stream=sys.stderr)
                         return -1
                     if rec_ids:
                         # rec_id found
                         rec_id = rec_ids.pop()
                         break
                     else:
                         # The record doesn't exist yet. We will try to check
                         # OAI id later.
                         write_message("   -Tag EXTOAIID value not found in database.",
                                     verbose=9)
                         rec_id = None
         else:
             write_message("   -Tag EXTOAIID not found in the xml marc file.", verbose=9)
 
     if rec_id is None:
         # 4th step we look for the OAI ID
         oaiidvalues = record_get_field_values(record,
             CFG_OAI_ID_FIELD[0:3],
             CFG_OAI_ID_FIELD[3:4] != "_" and \
             CFG_OAI_ID_FIELD[3:4] or "",
             CFG_OAI_ID_FIELD[4:5] != "_" and \
             CFG_OAI_ID_FIELD[4:5] or "",
             CFG_OAI_ID_FIELD[5:6])
         if oaiidvalues:
             oaiid = oaiidvalues[0] # there should be only one OAI ID
             write_message("   -Check if local OAI ID " + oaiid + \
                           " exist in the database", verbose=9)
 
             # try to find the corresponding rec id from the database
             rec_id = find_record_from_oaiid(oaiid)
             if rec_id is not None:
                 # rec_id found
                 pass
             else:
                 write_message("   -Tag OAI ID value not found in database.",
                               verbose=9)
                 rec_id = None
         else:
             write_message("   -Tag SYSNO not found in the xml marc file.",
                 verbose=9)
 
     # Now we should have detected rec_id from SYSNO or OAIID
     # tags.  (None otherwise.)
     if rec_id:
         if opt_mode == 'insert':
             write_message("   Failed : Record found in the database," \
                           " you should use the option replace," \
                           " correct or append to replace an existing" \
                           " record. (-h for help)",
                           verbose=1, stream=sys.stderr)
             return -1
     else:
         if opt_mode != 'insert' and \
            opt_mode != 'replace_or_insert':
             write_message("   Failed : Record not found in the database."\
                           " Please insert the file before updating it."\
                           " (-h for help)", verbose=1, stream=sys.stderr)
             return -1
 
     return rec_id and int(rec_id) or None
 
 ### Insert functions
 
 def create_new_record():
     """Create new record in the database"""
     now = convert_datestruct_to_datetext(time.localtime())
     query = """INSERT INTO bibrec (creation_date, modification_date)
                 VALUES (%s, %s)"""
     params = (now, now)
     try:
         rec_id = run_sql(query, params)
         return rec_id
     except Error, error:
         write_message("   Error during the creation_new_record function : %s "
             % error, verbose=1, stream=sys.stderr)
     return None
 
 def insert_bibfmt(id_bibrec, marc, format, modification_date='1970-01-01 00:00:00'):
     """Insert the format in the table bibfmt"""
     # compress the marc value
     pickled_marc =  compress(marc)
     try:
         time.strptime(modification_date, "%Y-%m-%d %H:%M:%S")
     except ValueError:
         modification_date = '1970-01-01 00:00:00'
 
     query = """INSERT INTO  bibfmt (id_bibrec, format, last_updated, value)
         VALUES (%s, %s, %s, %s)"""
     try:
         row_id  = run_sql(query, (id_bibrec, format, modification_date, pickled_marc))
         return row_id
     except Error, error:
         write_message("   Error during the insert_bibfmt function : %s "
             % error, verbose=1, stream=sys.stderr)
     return None
 
 def insert_record_bibxxx(tag, value):
     """Insert the record into bibxxx"""
     # determine into which table one should insert the record
     table_name = 'bib'+tag[0:2]+'x'
 
     # check if the tag, value combination exists in the table
     query = """SELECT id,value FROM %s """ % table_name
     query += """ WHERE tag=%s AND value=%s"""
     params = (tag, value)
     try:
         res = run_sql(query, params)
     except Error, error:
         write_message("   Error during the insert_record_bibxxx function : %s "
             % error, verbose=1, stream=sys.stderr)
 
     # Note: compare now the found values one by one and look for
     # string binary equality (e.g. to respect lowercase/uppercase
     # match), regardless of the charset etc settings.  Ideally we
     # could use a BINARY operator in the above SELECT statement, but
     # we would have to check compatibility on various MySQLdb versions
     # etc; this approach checks all matched values in Python, not in
     # MySQL, which is less cool, but more conservative, so it should
     # work better on most setups.
     for row in res:
         row_id = row[0]
         row_value = row[1]
         if row_value == value:
             return (table_name, row_id)
 
     # We got here only when the tag,value combination was not found,
     # so it is now necessary to insert the tag,value combination into
     # bibxxx table as new.
     query = """INSERT INTO %s """ % table_name
     query += """ (tag, value) values (%s , %s)"""
     params = (tag, value)
     try:
         row_id = run_sql(query, params)
     except Error, error:
         write_message("   Error during the insert_record_bibxxx function : %s "
             % error, verbose=1, stream=sys.stderr)
     return (table_name, row_id)
 
 def insert_record_bibrec_bibxxx(table_name, id_bibxxx,
         field_number, id_bibrec):
     """Insert the record into bibrec_bibxxx"""
     # determine into which table one should insert the record
     full_table_name = 'bibrec_'+ table_name
 
     # insert the proper row into the table
     query = """INSERT INTO %s """ % full_table_name
     query += """(id_bibrec,id_bibxxx, field_number) values (%s , %s, %s)"""
     params = (id_bibrec, id_bibxxx, field_number)
     try:
         res = run_sql(query, params)
     except Error, error:
         write_message("   Error during the insert_record_bibrec_bibxxx"
             " function 2nd query : %s " % error, verbose=1, stream=sys.stderr)
     return res
 
 def synchronize_8564(rec_id, record, record_had_FFT):
     """Sinchronize the 8564 tags for record with actual files. descriptions
     should be a dictionary docname:description for the new description to be
     inserted.
     If record_had_FFT the algorithm assume that every fulltext operation
     has been performed through FFT, hence it discard current 8564 local tags,
     and rebuild them after bibdocfile tables. Otherwise it first import
     from 8564 tags the $y and $z subfields corresponding to local files and
     merge them into bibdocfile tables.
     """
     def merge_marc_into_bibdocfile(field):
         """Given the 8564 tag it retrieve the corresponding bibdoc and
         merge the $y and $z subfields."""
         url = field_get_subfield_values(field, 'u')[:1]
         description = field_get_subfield_values(field, 'y')[:1]
         comment = field_get_subfield_values(field, 'z')[:1]
         if url:
             recid, docname, format = decompose_bibdocfile_url(url[0])
             try:
                 bibdoc = BibRecDocs(recid).get_bibdoc(docname)
                 if description:
                     bibdoc.set_description(description[0], format)
                 if comment:
                     bibdoc.set_comment(comment[0], format)
             except InvenioWebSubmitFileError:
                 ## Apparently the referenced docname doesn't exist anymore.
                 ## Too bad. Let's skip it.
                 write_message("WARNING: docname %s doesn't exist for record %s. Has it been renamed outside FFT?" % (docname, recid), stream=sys.stderr)
 
     write_message("Synchronizing MARC of recid '%s' with:\n%s" % (rec_id, record), verbose=9)
     tags8564s = record_get_field_instances(record, '856', '4', ' ')
     filtered_tags8564s = []
 
     # Let's discover all the previous internal urls, in order to rebuild them!
     for field in tags8564s:
         to_be_removed = False
         for value in field_get_subfield_values(field, 'u') + field_get_subfield_values(field, 'q'):
             try:
                 recid, docname, format = decompose_bibdocfile_url(value)
                 if recid == rec_id:
                     if not record_had_FFT:
                         ## If the submission didn't have FFTs, i.e. could be
                         ## not FFTs aware, and it specify 8564s pointing to local
                         ## owned files, then we should import comment and
                         ## description from the 8564s tags.
                         ## Anything else will be discarded.
                         merge_marc_into_bibdocfile(field)
                     to_be_removed = True
             except InvenioWebSubmitFileError:
                 ## The URL is not a bibdocfile URL.
                 pass
         if not to_be_removed:
             filtered_tags8564s.append(field)
 
     # Let's keep in the record only external 8564
     record_delete_field(record, '856', '4', ' ') # First we delete 8564
     for field in filtered_tags8564s: # Then we readd external ones
         record_add_field(record, '856', '4', ' ', '', field[0])
 
     # Now we refresh with existing internal 8564
     bibrecdocs = BibRecDocs(rec_id)
     latest_files = bibrecdocs.list_latest_files()
     for afile in latest_files:
         url = afile.get_url()
         description = afile.get_description()
         comment = afile.get_comment()
         new_subfield = [('u', url)]
         if description:
             new_subfield.append(('y', description))
         if comment:
             new_subfield.append(('z', comment))
         record_add_field(record, '856', '4', ' ', '', new_subfield)
 
     # Let'handle all the icons
     for bibdoc in bibrecdocs.list_bibdocs():
         icon = bibdoc.get_icon()
         if icon:
             icon = icon.list_all_files()
             if icon:
                 url = icon[0].get_url() ## The 1st format found should be ok
                 new_subfield = [('q', url)]
                 new_subfield.append(('x', 'icon'))
                 record_add_field(record, '856', '4', ' ', '', new_subfield)
     return record
 
 def elaborate_fft_tags(record, rec_id, mode):
     """
     Process FFT tags that should contain $a with file pathes or URLs
     to get the fulltext from.  This function enriches record with
     proper 8564 URL tags, downloads fulltext files and stores them
     into var/data structure where appropriate.
 
     CFG_BIBUPLOAD_WGET_SLEEP_TIME defines time to sleep in seconds in
     between URL downloads.
 
     Note: if an FFT tag contains multiple $a subfields, we upload them
     into different 856 URL tags in the metadata.  See regression test
     case test_multiple_fft_insert_via_http().
     """
 
     # Let's define some handy sub procedure.
     def _add_new_format(bibdoc, url, format, docname, doctype, newname, description, comment):
         """Adds a new format for a given bibdoc. Returns True when everything's fine."""
         write_message('Add new format to %s url: %s, format: %s, docname: %s, doctype: %s, newname: %s, description: %s, comment: %s' % (repr(bibdoc), url, format, docname, doctype, newname, description, comment), verbose=9)
         try:
             if not url: # Not requesting a new url. Just updating comment & description
                 return _update_description_and_comment(bibdoc, docname, format, description, comment)
             tmpurl = download_url(url, format)
             try:
                 try:
                     bibdoc.add_file_new_format(tmpurl, description=description, comment=comment)
                 except StandardError, e:
                     write_message("('%s', '%s', '%s', '%s', '%s', '%s', '%s') not inserted because format already exists (%s)." % (url, format, docname, doctype, newname, description, comment, e), stream=sys.stderr)
                     raise
             finally:
                 os.remove(tmpurl)
         except Exception, e:
             write_message("Error in downloading '%s' because of: %s" % (url, e), stream=sys.stderr)
             raise
         return True
 
     def _add_new_version(bibdoc, url, format, docname, doctype, newname, description, comment):
         """Adds a new version for a given bibdoc. Returns True when everything's fine."""
         write_message('Add new version to %s url: %s, format: %s, docname: %s, doctype: %s, newname: %s, description: %s, comment: %s' % (repr(bibdoc), url, format, docname, doctype, newname, description, comment))
         try:
             if not url:
                 return _update_description_and_comment(bibdoc, docname, format, description, comment)
             tmpurl = download_url(url, format)
             try:
                 try:
                     bibdoc.add_file_new_version(tmpurl, description=description, comment=comment)
                 except StandardError, e:
                     write_message("('%s', '%s', '%s', '%s', '%s', '%s', '%s') not inserted because '%s'." % (url, format, docname, doctype, newname, description, comment, e), stream=sys.stderr)
                     raise
             finally:
                 os.remove(tmpurl)
         except Exception, e:
             write_message("Error in downloading '%s' because of: %s" % (url, e), stream=sys.stderr)
             raise
         return True
 
     def _update_description_and_comment(bibdoc, docname, format, description, comment):
         """Directly update comments and descriptions."""
         write_message('Just updating description and comment for %s with format %s with description %s and comment %s' % (docname, format, description, comment), verbose=9)
         try:
             bibdoc.set_description(description, format)
             bibdoc.set_comment(comment, format)
         except StandardError, e:
             write_message("('%s', '%s', '%s', '%s') description and comment not updated because '%s'." % (docname, format, description, comment, e))
             raise
         return True
 
     def _add_new_icon(bibdoc, url, restriction):
         """Adds a new icon to an existing bibdoc, replacing the previous one if it exists. If url is empty, just remove the current icon."""
         if not url:
             bibdoc.delete_icon()
         else:
             try:
                 path = urllib2.urlparse.urlsplit(url)[2]
                 filename = os.path.split(path)[-1]
                 format = filename[len(file_strip_ext(filename)):]
                 tmpurl = download_url(url, format)
                 try:
                     try:
                         icondoc = bibdoc.add_icon(tmpurl, 'icon-%s' % bibdoc.get_docname())
                         if restriction and restriction != KEEP_OLD_VALUE:
                             icondoc.set_status(restriction)
                     except StandardError, e:
                         write_message("('%s', '%s') icon not added because '%s'." % (url, format, e), stream=sys.stderr)
                         raise
                 finally:
                     os.remove(tmpurl)
             except Exception, e:
                 write_message("Error in downloading '%s' because of: %s" % (url, e), stream=sys.stderr)
                 raise
         return True
 
     tuple_list = extract_tag_from_record(record, 'FFT')
     if tuple_list: # FFT Tags analysis
         write_message("FFTs: "+str(tuple_list), verbose=9)
         docs = {} # docnames and their data
 
         for fft in record_get_field_instances(record, 'FFT', ' ', ' '):
             # Let's discover the type of the document
             # This is a legacy field and will not be enforced any particular
             # check on it.
             doctype = field_get_subfield_values(fft, 't')
             if doctype:
                 doctype = doctype[0]
             else: # Default is Main
                 doctype = 'Main'
 
             # Let's discover the url.
             url = field_get_subfield_values(fft, 'a')
             if url:
                 url = url[0]
                 try:
                     check_valid_url(url)
                 except StandardError, e:
                     raise StandardError, "fft '%s' specify an url ('%s') with problems: %s" % (fft, url, e)
             else:
                 url = ''
 
             # Let's discover the description
             description = field_get_subfield_values(fft, 'd')
             if description != []:
                 description = description[0]
             else:
                 if mode == 'correct':
                     ## If the user require to correct, and do not specify
                     ## a description this means she really want to
                     ## modify the description.
                     description = ''
                 else:
                     description = KEEP_OLD_VALUE
 
             # Let's discover the desired docname to be created/altered
             name = field_get_subfield_values(fft, 'n')
             if name:
                 name = file_strip_ext(name[0])
             else:
                 if url:
                     name = get_docname_from_url(url)
                 else:
                     write_message("Warning: fft '%s' doesn't specifies neither a url nor a name" % str(fft), stream=sys.stderr)
                     continue
 
             # Let's discover the desired new docname in case we want to change it
             newname = field_get_subfield_values(fft, 'm')
             if newname:
                 newname = file_strip_ext(newname[0])
             else:
                 newname = name
 
             # Let's discover the desired format
             format = field_get_subfield_values(fft, 'f')
             if format:
                 format = format[0]
             else:
                 if url:
                     format = get_format_from_url(url)
                 else:
                     format = ''
 
             format = normalize_format(format)
 
             # Let's discover the icon
             icon = field_get_subfield_values(fft, 'x')
             if icon != []:
                 icon = icon[0]
                 if icon != KEEP_OLD_VALUE:
                     try:
                         check_valid_url(icon)
                     except StandardError, e:
                         raise StandardError, "fft '%s' specify an icon ('%s') with problems: %s" % (fft, icon, e)
             else:
                 if mode == 'correct':
                     ## See comment on description
                     icon = ''
                 else:
                     icon = KEEP_OLD_VALUE
 
             # Let's discover the comment
             comment = field_get_subfield_values(fft, 'z')
             if comment != []:
                 comment = comment[0]
             else:
                 if mode == 'correct':
                     ## See comment on description
                     comment = ''
                 else:
                     comment = KEEP_OLD_VALUE
 
             # Let's discover the restriction
             restriction = field_get_subfield_values(fft, 'r')
             if restriction != []:
                 restriction = restriction[0]
             else:
                 if mode == 'correct':
                     ## See comment on description
                     restriction = ''
                 else:
                     restriction = KEEP_OLD_VALUE
 
             version = field_get_subfield_values(fft, 'v')
             if version:
                 version = version[0]
             else:
                 version = ''
 
             if docs.has_key(name): # new format considered
                 (doctype2, newname2, restriction2, icon2, version2, urls) = docs[name]
                 if doctype2 != doctype:
                     raise StandardError, "fft '%s' specifies a different doctype from previous fft with docname '%s'" % (str(fft), name)
                 if newname2 != newname:
                     raise StandardError, "fft '%s' specifies a different newname from previous fft with docname '%s'" % (str(fft), name)
                 if restriction2 != restriction:
                     raise StandardError, "fft '%s' specifies a different restriction from previous fft with docname '%s'" % (str(fft), name)
                 if icon2 != icon:
                     raise StandardError, "fft '%x' specifies a different icon than the previous fft with docname '%s'" % (str(fft), name)
                 if version2 != version:
                     raise StandardError, "fft '%x' specifies a different version than the previous fft with docname '%s'" % (str(fft), name)
                 for (url2, format2, description2, comment2) in urls:
                     if format == format2:
                         raise StandardError, "fft '%s' specifies a second file '%s' with the same format '%s' from previous fft with docname '%s'" % (str(fft), url, format, name)
                 if url or format:
                     urls.append((url, format, description, comment))
             else:
                 if url or format:
                     docs[name] = (doctype, newname, restriction, icon, version, [(url, format, description, comment)])
                 else:
                     docs[name] = (doctype, newname, restriction, icon, version, [])
 
         write_message('Result of FFT analysis:\n\tDocs: %s' % (docs,), verbose=9)
 
         # Let's remove all FFT tags
         record_delete_field(record, 'FFT', ' ', ' ')
 
         # Preprocessed data elaboration
         bibrecdocs = BibRecDocs(rec_id)
 
         if mode == 'replace': # First we erase previous bibdocs
             for bibdoc in bibrecdocs.list_bibdocs():
                 bibdoc.delete()
             bibrecdocs.build_bibdoc_list()
 
         for docname, (doctype, newname, restriction, icon, version, urls) in docs.iteritems():
             write_message("Elaborating olddocname: '%s', newdocname: '%s', doctype: '%s', restriction: '%s', icon: '%s', urls: '%s', mode: '%s'" % (docname, newname, doctype, restriction, icon, urls, mode), verbose=9)
             if mode in ('insert', 'replace'): # new bibdocs, new docnames, new marc
                 if newname in bibrecdocs.get_bibdoc_names():
                     write_message("('%s', '%s') not inserted because docname already exists." % (newname, urls), stream=sys.stderr)
                     raise StandardError
                 try:
                     bibdoc = bibrecdocs.add_bibdoc(doctype, newname)
                     bibdoc.set_status(restriction)
                 except Exception, e:
                     write_message("('%s', '%s', '%s') not inserted because: '%s'." % (doctype, newname, urls, e), stream=sys.stderr)
                     raise StandardError
                 for (url, format, description, comment) in urls:
                     assert(_add_new_format(bibdoc, url, format, docname, doctype, newname, description, comment))
                 if icon and not icon == KEEP_OLD_VALUE:
                     assert(_add_new_icon(bibdoc, icon, restriction))
             elif mode == 'replace_or_insert': # to be thought as correct_or_insert
                 for bibdoc in bibrecdocs.list_bibdocs():
                     if bibdoc.get_docname() == docname:
                         if doctype not in ('PURGE', 'DELETE', 'EXPUNGE', 'REVERT', 'FIX-ALL', 'FIX-MARC', 'DELETE-FILE'):
                             if newname != docname:
                                 try:
                                     bibdoc.change_name(newname)
                                     icon = bibdoc.get_icon()
                                     if icon:
                                         icon.change_name('icon-%s' % newname)
                                 except StandardError, e:
                                     write_message(e, stream=sys.stderr)
                                     raise
                 found_bibdoc = False
                 for bibdoc in bibrecdocs.list_bibdocs():
                     if bibdoc.get_docname() == newname:
                         found_bibdoc = True
                         if doctype == 'PURGE':
                             bibdoc.purge()
                         elif doctype == 'DELETE':
                             bibdoc.delete()
                         elif doctype == 'EXPUNGE':
                             bibdoc.expunge()
                         elif doctype == 'FIX-ALL':
                             bibrecdocs.fix(docname)
                         elif doctype == 'FIX-MARC':
                             pass
                         elif doctype == 'DELETE-FILE':
                             if urls:
                                 for (url, format, description, comment) in urls:
                                     bibdoc.delete_file(format, version)
                         elif doctype == 'REVERT':
                             try:
                                 bibdoc.revert(version)
                             except Exception, e:
                                 write_message('(%s, %s) not correctly reverted: %s' % (newname, version, e), stream=sys.stderr)
                                 raise
                         else:
                             if restriction != KEEP_OLD_VALUE:
                                 bibdoc.set_status(restriction)
                             # Since the docname already existed we have to first
                             # bump the version by pushing the first new file
                             # then pushing the other files.
                             if urls:
                                 (first_url, first_format, first_description, first_comment) = urls[0]
                                 other_urls = urls[1:]
                                 assert(_add_new_version(bibdoc, first_url, first_format, docname, doctype, newname, first_description, first_comment))
                                 for (url, format, description, comment) in other_urls:
                                     assert(_add_new_format(bibdoc, url, format, docname, doctype, newname, description, comment))
                         if icon != KEEP_OLD_VALUE:
                             assert(_add_new_icon(bibdoc, icon, restriction))
                 if not found_bibdoc:
                     bibdoc = bibrecdocs.add_bibdoc(doctype, newname)
                     for (url, format, description, comment) in urls:
                         assert(_add_new_format(bibdoc, url, format, docname, doctype, newname, description, comment))
                     if icon and not icon == KEEP_OLD_VALUE:
                         assert(_add_new_icon(bibdoc, icon, restriction))
             elif mode == 'correct':
                 for bibdoc in bibrecdocs.list_bibdocs():
                     if bibdoc.get_docname() == docname:
                         if doctype not in ('PURGE', 'DELETE', 'EXPUNGE', 'REVERT', 'FIX-ALL', 'FIX-MARC', 'DELETE-FILE'):
                             if newname != docname:
                                 try:
                                     bibdoc.change_name(newname)
                                     icon = bibdoc.get_icon()
                                     if icon:
                                         icon.change_name('icon-%s' % newname)
                                 except StandardError, e:
                                     write_message('Error in renaming %s to %s: %s' % (docname, newname, e), stream=sys.stderr)
                                     raise
                 found_bibdoc = False
                 for bibdoc in bibrecdocs.list_bibdocs():
                     if bibdoc.get_docname() == newname:
                         found_bibdoc = True
                         if doctype == 'PURGE':
                             bibdoc.purge()
                         elif doctype == 'DELETE':
                             bibdoc.delete()
                         elif doctype == 'EXPUNGE':
                             bibdoc.expunge()
                         elif doctype == 'FIX-ALL':
                             bibrecdocs.fix(newname)
                         elif doctype == 'FIX-MARC':
                             pass
                         elif doctype == 'DELETE-FILE':
                             if urls:
                                 for (url, format, description, comment) in urls:
                                     bibdoc.delete_file(format, version)
                         elif doctype == 'REVERT':
                             try:
                                 bibdoc.revert(version)
                             except Exception, e:
                                 write_message('(%s, %s) not correctly reverted: %s' % (newname, version, e), stream=sys.stderr)
                                 raise
                         else:
                             if restriction != KEEP_OLD_VALUE:
                                 bibdoc.set_status(restriction)
                             if urls:
                                 (first_url, first_format, first_description, first_comment) = urls[0]
                                 other_urls = urls[1:]
                                 assert(_add_new_version(bibdoc, first_url, first_format, docname, doctype, newname, first_description, first_comment))
                                 for (url, format, description, comment) in other_urls:
                                     assert(_add_new_format(bibdoc, url, format, docname, description, doctype, newname, description, comment))
                         if icon != KEEP_OLD_VALUE:
                             _add_new_icon(bibdoc, icon, restriction)
                 if not found_bibdoc:
                     write_message("('%s', '%s', '%s') not added because '%s' docname didn't existed." % (doctype, newname, urls, docname), stream=sys.stderr)
                     raise StandardError
             elif mode == 'append':
                 try:
                     found_bibdoc = False
                     for bibdoc in bibrecdocs.list_bibdocs():
                         if bibdoc.get_docname() == docname:
                             found_bibdoc = True
                             for (url, format, description, comment) in urls:
                                 assert(_add_new_format(bibdoc, url, format, docname, doctype, newname, description, comment))
                             if icon not in ('', KEEP_OLD_VALUE):
                                 assert(_add_new_icon(bibdoc, icon, restriction))
                     if not found_bibdoc:
                         try:
                             bibdoc = bibrecdocs.add_bibdoc(doctype, docname)
                             bibdoc.set_status(restriction)
                             for (url, format, description, comment) in urls:
                                 assert(_add_new_format(bibdoc, url, format, docname, doctype, newname, description, comment))
                             if icon and not icon == KEEP_OLD_VALUE:
                                 assert(_add_new_icon(bibdoc, icon, restriction))
                         except Exception, e:
                             register_exception()
                             write_message("('%s', '%s', '%s') not appended because: '%s'." % (doctype, newname, urls, e), stream=sys.stderr)
                             raise
                 except:
                     register_exception()
                     raise
     return record
 
 def insert_fmt_tags(record, rec_id, opt_mode):
     """Process and insert FMT tags"""
 
     fmt_fields = record_get_field_instances(record, 'FMT')
     if fmt_fields:
         for fmt_field in fmt_fields:
             # Get the d, f, g subfields of the FMT tag
             try:
                 d_value = field_get_subfield_values(fmt_field, "d")[0]
             except IndexError:
                 d_value = ""
             try:
                 f_value = field_get_subfield_values(fmt_field, "f")[0]
             except IndexError:
                 f_value = ""
             try:
                 g_value = field_get_subfield_values(fmt_field, "g")[0]
             except IndexError:
                 g_value = ""
             # Update the format
             res = update_bibfmt_format(rec_id, g_value, f_value, d_value)
             if res == 1:
                 write_message("   Failed: Error during update_bibfmt", verbose=1, stream=sys.stderr)
 
         # If we are in format mode, we only care about the FMT tag
         if opt_mode == 'format':
             return 0
         # We delete the FMT Tag of the record
         record_delete_field(record, 'FMT')
         write_message("   -Delete field FMT from record : DONE", verbose=2)
         return record
 
     elif opt_mode == 'format':
         write_message("   Failed: Format updated failed : No tag FMT found", verbose=1, stream=sys.stderr)
         return None
     else:
         return record
 
 
 ### Update functions
 
 def update_bibrec_modif_date(now, bibrec_id):
     """Update the date of the record in bibrec table """
     query = """UPDATE bibrec SET modification_date=%s WHERE id=%s"""
     params = (now, bibrec_id)
     try:
         run_sql(query, params)
         write_message("   -Update record modification date : DONE" , verbose=2)
     except Error, error:
         write_message("   Error during update_bibrec_modif_date function : %s" % error,
                       verbose=1, stream=sys.stderr)
 
 def update_bibfmt_format(id_bibrec, format_value, format_name, modification_date=None):
     """Update the format in the table bibfmt"""
     if modification_date is None:
         modification_date = time.strftime('%Y-%m-%d %H:%M:%S')
     else:
         try:
             time.strptime(modification_date, "%Y-%m-%d %H:%M:%S")
         except ValueError:
             modification_date = '1970-01-01 00:00:00'
 
     # We check if the format is already in bibFmt
     nb_found = find_record_format(id_bibrec, format_name)
     if nb_found == 1:
         # we are going to update the format
         # compress the format_value value
         pickled_format_value =  compress(format_value)
         # update the format:
         query = """UPDATE bibfmt SET last_updated=%s, value=%s WHERE id_bibrec=%s AND format=%s"""
         params = (modification_date, pickled_format_value, id_bibrec, format_name)
         try:
             row_id  = run_sql(query, params)
             if row_id is None:
                 write_message("   Failed: Error during update_bibfmt_format function", verbose=1, stream=sys.stderr)
                 return 1
             else:
                 write_message("   -Update the format %s in bibfmt : DONE" % format_name , verbose=2)
                 return 0
         except Error, error:
             write_message("   Error during the update_bibfmt_format function : %s " % error, verbose=1, stream=sys.stderr)
 
     elif nb_found > 1:
         write_message("   Failed: Same format %s found several time in bibfmt for the same record." % format_name, verbose=1, stream=sys.stderr)
         return 1
     else:
         # Insert the format information in BibFMT
         res = insert_bibfmt(id_bibrec, format_value, format_name, modification_date)
         if res is None:
             write_message("   Failed: Error during insert_bibfmt", verbose=1, stream=sys.stderr)
             return 1
         else:
             write_message("   -Insert the format %s in bibfmt : DONE" % format_name , verbose=2)
             return 0
 
 def archive_marcxml_for_history(recID):
     """
     Archive current MARCXML format of record RECID from BIBFMT table
     into hstRECORD table.  Useful to keep MARCXML history of records.
 
     Return 0 if everything went fine.  Return 1 otherwise.
     """
     try:
         res = run_sql("SELECT id_bibrec, value, last_updated FROM bibfmt WHERE format='xm' AND id_bibrec=%s",
                       (recID,))
         if res:
             run_sql("""INSERT INTO hstRECORD (id_bibrec, marcxml, job_id, job_name, job_person, job_date, job_details)
                                       VALUES (%s,%s,%s,%s,%s,%s,%s)""",
                     (res[0][0], res[0][1], task_get_task_param('task_id', 0), 'bibupload', task_get_task_param('user','UNKNOWN'), res[0][2],
                      'mode: ' + task_get_option('mode','UNKNOWN') + '; file: ' + task_get_option('file_path','UNKNOWN') + '.'))
     except Error, error:
         write_message("   Error during archive_marcxml_for_history: %s " % error,
                       verbose=1, stream=sys.stderr)
         return 1
     return 0
 
 def update_database_with_metadata(record, rec_id, oai_rec_id = "oai"):
     """Update the database tables with the record and the record id given in parameter"""
     for tag in record.keys():
         # check if tag is not a special one:
         if tag not in CFG_BIBUPLOAD_SPECIAL_TAGS:
             # for each tag there is a list of tuples representing datafields
             tuple_list = record[tag]
             # this list should contain the elements of a full tag [tag, ind1, ind2, subfield_code]
             tag_list = []
             tag_list.append(tag)
             for single_tuple in tuple_list:
                 # these are the contents of a single tuple
                 subfield_list = single_tuple[0]
                 ind1 = single_tuple[1]
                 ind2 = single_tuple[2]
                 # append the ind's to the full tag
                 if ind1 == '' or ind1 == ' ':
                     tag_list.append('_')
                 else:
                     tag_list.append(ind1)
                 if ind2 == '' or ind2 == ' ':
                     tag_list.append('_')
                 else:
                     tag_list.append(ind2)
                 datafield_number = single_tuple[4]
 
                 if tag in CFG_BIBUPLOAD_SPECIAL_TAGS:
                     # nothing to do for special tags (FFT, FMT)
                     pass
                 elif tag in CFG_BIBUPLOAD_CONTROLFIELD_TAGS and tag != "001":
                     value = single_tuple[3]
                     # get the full tag
                     full_tag = ''.join(tag_list)
 
                     # update the tables
                     write_message("   insertion of the tag "+full_tag+" with the value "+value, verbose=9)
                     # insert the tag and value into into bibxxx
                     (table_name, bibxxx_row_id) = insert_record_bibxxx(full_tag, value)
                     #print 'tname, bibrow', table_name, bibxxx_row_id;
                     if table_name is None or bibxxx_row_id is None:
                         write_message("   Failed : during insert_record_bibxxx", verbose=1, stream=sys.stderr)
                     # connect bibxxx and bibrec with the table bibrec_bibxxx
                     res = insert_record_bibrec_bibxxx(table_name, bibxxx_row_id, datafield_number, rec_id)
                     if res is None:
                         write_message("   Failed : during insert_record_bibrec_bibxxx", verbose=1, stream=sys.stderr)
                 else:
                     # get the tag and value from the content of each subfield
                     for subfield in subfield_list:
                         subtag = subfield[0]
                         value = subfield[1]
                         tag_list.append(subtag)
                         # get the full tag
                         full_tag = ''.join(tag_list)
                         # update the tables
                         write_message("   insertion of the tag "+full_tag+" with the value "+value, verbose=9)
                         # insert the tag and value into into bibxxx
                         (table_name, bibxxx_row_id) = insert_record_bibxxx(full_tag, value)
                         if table_name is None or bibxxx_row_id is None:
                             write_message("   Failed : during insert_record_bibxxx", verbose=1, stream=sys.stderr)
                         # connect bibxxx and bibrec with the table bibrec_bibxxx
                         res = insert_record_bibrec_bibxxx(table_name, bibxxx_row_id, datafield_number, rec_id)
                         if res is None:
                             write_message("   Failed : during insert_record_bibrec_bibxxx", verbose=1, stream=sys.stderr)
                         # remove the subtag from the list
                         tag_list.pop()
                 tag_list.pop()
                 tag_list.pop()
             tag_list.pop()
     write_message("   -Update the database with metadata : DONE", verbose=2)
 
     log_record_uploading(oai_rec_id, task_get_task_param('task_id', 0), rec_id, 'P')
 
 def append_new_tag_to_old_record(record, rec_old, opt_tag, opt_mode):
     """Append new tags to a old record"""
     if opt_tag is not None:
         tag = opt_tag
         if tag in CFG_BIBUPLOAD_CONTROLFIELD_TAGS:
             if tag == '001':
                 pass
             else:
                 # if it is a controlfield,just access the value
                 for single_tuple in record[tag]:
                     controlfield_value = single_tuple[3]
                     # add the field to the old record
                     newfield_number = record_add_field(rec_old, tag, "", "", controlfield_value)
                     if newfield_number is None:
                         write_message("   Error when adding the field"+tag, verbose=1, stream=sys.stderr)
         else:
             # For each tag there is a list of tuples representing datafields
             for single_tuple in record[tag]:
                 # We retrieve the information of the tag
                 subfield_list = single_tuple[0]
                 ind1 = single_tuple[1]
                 ind2 = single_tuple[2]
                 # We add the datafield to the old record
                 write_message("      Adding tag: %s ind1=%s ind2=%s code=%s" % (tag, ind1, ind2, subfield_list), verbose=9)
                 newfield_number = record_add_field(rec_old, tag, ind1, ind2, "", subfield_list)
                 if newfield_number is None:
                     write_message("Error when adding the field"+tag, verbose=1, stream=sys.stderr)
     else:
         # Go through each tag in the appended record
         for tag in record.keys():
             # Reference mode append only reference tag
             if opt_mode == 'reference':
                 if tag == CFG_BIBUPLOAD_REFERENCE_TAG:
                     for single_tuple in record[tag]:
                         # We retrieve the information of the tag
                         subfield_list = single_tuple[0]
                         ind1 = single_tuple[1]
                         ind2 = single_tuple[2]
                         # We add the datafield to the old record
                         write_message("      Adding tag: %s ind1=%s ind2=%s code=%s" % (tag, ind1, ind2, subfield_list), verbose=9)
                         newfield_number = record_add_field(rec_old, tag, ind1, ind2, "", subfield_list)
                         if newfield_number is None:
                             write_message("   Error when adding the field"+tag, verbose=1, stream=sys.stderr)
             else:
                 if tag in CFG_BIBUPLOAD_CONTROLFIELD_TAGS:
                     if tag == '001':
                         pass
                     else:
                         # if it is a controlfield,just access the value
                         for single_tuple in record[tag]:
                             controlfield_value = single_tuple[3]
                             # add the field to the old record
                             newfield_number = record_add_field(rec_old, tag, "", "", controlfield_value)
                             if newfield_number is None:
                                 write_message("   Error when adding the field"+tag, verbose=1, stream=sys.stderr)
                 else:
                     # For each tag there is a list of tuples representing datafields
                     for single_tuple in record[tag]:
                         # We retrieve the information of the tag
                         subfield_list = single_tuple[0]
                         ind1 = single_tuple[1]
                         ind2 = single_tuple[2]
                         # We add the datafield to the old record
                         write_message("      Adding tag: %s ind1=%s ind2=%s code=%s" % (tag, ind1, ind2, subfield_list), verbose=9)
                         newfield_number = record_add_field(rec_old, tag, ind1, ind2, "", subfield_list)
                         if newfield_number is None:
                             write_message("   Error when adding the field"+tag, verbose=1, stream=sys.stderr)
     return rec_old
 
 def copy_strong_tags_from_old_record(record, rec_old):
     """
     Look for strong tags in RECORD and REC_OLD.  If no strong tags are
     found in RECORD, then copy them over from REC_OLD.  This function
     modifies RECORD structure on the spot.
     """
     for strong_tag in CFG_BIBUPLOAD_STRONG_TAGS:
         if not record_get_field_instances(record, strong_tag):
             strong_tag_old_field_instances = record_get_field_instances(rec_old, strong_tag)
             if strong_tag_old_field_instances:
                 for strong_tag_old_field_instance in strong_tag_old_field_instances:
                     sf_vals, fi_ind1, fi_ind2, controlfield, dummy = strong_tag_old_field_instance
                     record_add_field(record, strong_tag, fi_ind1, fi_ind2, controlfield, sf_vals)
     return
 
 ### Delete functions
 
 def delete_tags_to_correct(record, rec_old, opt_tag):
     """
     Delete tags from REC_OLD which are also existing in RECORD.  When
     deleting, pay attention not only to tags, but also to indicators,
     so that fields with the same tags but different indicators are not
     deleted.
     """
     ## Some fields are controlled via provenance information.
     ## We should re-add saved fields at the end.
     fields_to_readd = {}
     for tag in CFG_BIBUPLOAD_CONTROLLED_PROVENANCE_TAGS:
         if tag[:3] in record:
             tmp_field_instances = record_get_field_instances(record, tag[:3], tag[3], tag[4]) ## Let's discover the provenance that will be updated
             provenances_to_update = []
             for instance in tmp_field_instances:
                 for code, value in instance[0]:
                     if code == tag[5]:
                         if value not in provenances_to_update:
                             provenances_to_update.append(value)
                         break
                 else:
                     ## The provenance is not specified.
                     ## let's add the special empty provenance.
                     if '' not in provenances_to_update:
                         provenances_to_update.append('')
             potential_fields_to_readd = record_get_field_instances(rec_old, tag[:3], tag[3], tag[4]) ## Let's take all the field corresponding to tag
             ## Let's save apart all the fields that should be updated, but
             ## since they have a different provenance not mentioned in record
             ## they should be preserved.
             fields = []
             for sf_vals, ind1, ind2, dummy_cf, dummy_line in potential_fields_to_readd:
                 for code, value in sf_vals:
                     if code == tag[5]:
                         if value not in provenances_to_update:
                             fields.append(sf_vals)
                         break
                 else:
                     if '' not in provenances_to_update:
                         ## Empty provenance, let's protect in any case
                         fields.append(sf_vals)
             fields_to_readd[tag] = fields
 
     # browse through all the tags from the MARCXML file:
     for tag in record:
         # do we have to delete only a special tag or any tag?
         if opt_tag is None or opt_tag == tag:
             # check if the tag exists in the old record too:
             if tag in rec_old and tag != '001':
                 # the tag does exist, so delete all record's tag+ind1+ind2 combinations from rec_old
                 for dummy_sf_vals, ind1, ind2, dummy_cf, field_number in record[tag]:
                     write_message("      Delete tag: " + tag + " ind1=" + ind1 + " ind2=" + ind2, verbose=9)
                     record_delete_field(rec_old, tag, ind1, ind2)
 
     ## Ok, we readd necessary fields!
     for tag, fields in fields_to_readd.iteritems():
         for sf_vals in fields:
             write_message("      Adding tag: " + tag[:3] + " ind1=" + tag[3] + " ind2=" + tag[4] + " code=" + str(sf_vals), verbose=9)
             record_add_field(rec_old, tag[:3], tag[3], tag[4], datafield_subfield_code_value_tuples=sf_vals)
 
 def delete_bibrec_bibxxx(record, id_bibrec):
     """Delete the database record from the table bibxxx given in parameters"""
     # we clear all the rows from bibrec_bibxxx from the old record
     for tag in record.keys():
         if tag not in CFG_BIBUPLOAD_SPECIAL_TAGS:
             # for each name construct the bibrec_bibxxx table name
             table_name = 'bibrec_bib'+tag[0:2]+'x'
             # delete all the records with proper id_bibrec
             query = """DELETE FROM `%s` where id_bibrec = %s"""
             params = (table_name, id_bibrec)
             try:
                 run_sql(query % params)
             except Error, error:
                 write_message("   Error during the delete_bibrec_bibxxx function : %s " % error, verbose=1, stream=sys.stderr)
 
 def wipe_out_record_from_all_tables(recid):
     """
     Wipe out completely the record and all its traces of RECID from
     the database (bibrec, bibrec_bibxxx, bibxxx, bibfmt).  Useful for
     the time being for test cases.
     """
     # delete all the linked bibdocs
     for bibdoc in BibRecDocs(recid).list_bibdocs():
         bibdoc.expunge()
     # delete from bibrec:
     run_sql("DELETE FROM bibrec WHERE id=%s", (recid,))
     # delete from bibrec_bibxxx:
     for i in range(0, 10):
         for j in range(0, 10):
             run_sql("DELETE FROM %(bibrec_bibxxx)s WHERE id_bibrec=%%s" % \
                     {'bibrec_bibxxx': "bibrec_bib%i%ix" % (i, j)},
                     (recid,))
     # delete all unused bibxxx values:
     for i in range(0, 10):
         for j in range(0, 10):
             run_sql("DELETE %(bibxxx)s FROM %(bibxxx)s " \
                     " LEFT JOIN %(bibrec_bibxxx)s " \
                     " ON %(bibxxx)s.id=%(bibrec_bibxxx)s.id_bibxxx " \
                     " WHERE %(bibrec_bibxxx)s.id_bibrec IS NULL" % \
                     {'bibxxx': "bib%i%ix" % (i, j),
                      'bibrec_bibxxx': "bibrec_bib%i%ix" % (i, j)})
     # delete from bibfmt:
     run_sql("DELETE FROM bibfmt WHERE id_bibrec=%s", (recid,))
     # delete from bibrec_bibdoc:
     run_sql("DELETE FROM bibrec_bibdoc WHERE id_bibrec=%s", (recid,))
     return
 
 def delete_bibdoc(id_bibrec):
     """Delete document from bibdoc which correspond to the bibrec id given in parameter"""
     query = """UPDATE bibdoc SET status='DELETED'
                 WHERE id IN (SELECT id_bibdoc FROM bibrec_bibdoc
                               WHERE id_bibrec=%s)"""
     params = (id_bibrec,)
     try:
         run_sql(query, params)
     except Error, error:
         write_message("   Error during the delete_bibdoc function : %s " % error,
                       verbose=1, stream=sys.stderr)
 
 def delete_bibrec_bibdoc(id_bibrec):
     """Delete the bibrec record from the table bibrec_bibdoc given in parameter"""
     # delete all the records with proper id_bibrec
     query = """DELETE FROM bibrec_bibdoc WHERE id_bibrec=%s"""
     params = (id_bibrec,)
     try:
         run_sql(query, params)
     except Error, error:
         write_message("   Error during the delete_bibrec_bibdoc function : %s " % error,
                       verbose=1, stream=sys.stderr)
 def main():
     """Main that construct all the bibtask."""
     task_init(authorization_action='runbibupload',
             authorization_msg="BibUpload Task Submission",
             description="""Receive MARC XML file and update appropriate database
 tables according to options.
 Examples:
     $ bibupload -i input.xml
 """,
             help_specific_usage="""  -a, --append\t\tnew fields are appended to the existing record
   -c, --correct\t\tfields are replaced by the new ones in the existing record
   -f, --format\t\ttakes only the FMT fields into account. Does not update
   -i, --insert\t\tinsert the new record in the database
   -r, --replace\t\tthe existing record is entirely replaced by the new one
   -z, --reference\tupdate references (update only 999 fields)
   -S, --stage=STAGE\tstage to start from in the algorithm (0: always done; 1: FMT tags;
 \t\t\t2: FFT tags; 3: BibFmt; 4: Metadata update; 5: time update)
   -n, --notimechange\tdo not change record last modification date when updating
   -o, --holdingpen\t\tInsert record into holding pen instead of the normal database
 """,
             version=__revision__,
             specific_params=("ircazS:fno",
                  [
                    "insert",
                    "replace",
                    "correct",
                    "append",
                    "reference",
                    "stage=",
                    "format",
                    "notimechange",
                    "holdingpen",
                  ]),
             task_submit_elaborate_specific_parameter_fnc=task_submit_elaborate_specific_parameter,
             task_run_fnc=task_run_core)
 
 def task_submit_elaborate_specific_parameter(key, value, opts, args):
     """ Given the string key it checks it's meaning, eventually using the
     value. Usually it fills some key in the options dict.
     It must return True if it has elaborated the key, False, if it doesn't
     know that key.
     eg:
     if key in ['-n', '--number']:
         task_get_option(\1) = value
         return True
     return False
     """
 
     # No time change option
     if key in ("-n", "--notimechange"):
         task_set_option('notimechange', 1)
 
     # Insert mode option
     elif key in ("-i", "--insert"):
         if task_get_option('mode') == 'replace':
             # if also replace found, then set to replace_or_insert
             task_set_option('mode', 'replace_or_insert')
         else:
             task_set_option('mode', 'insert')
         fix_argv_paths([args[0]])
         task_set_option('file_path', os.path.realpath(args[0]))
 
     # Replace mode option
     elif key in ("-r", "--replace"):
         if task_get_option('mode') == 'insert':
             # if also insert found, then set to replace_or_insert
             task_set_option('mode', 'replace_or_insert')
         else:
             task_set_option('mode', 'replace')
         fix_argv_paths([args[0]])
         task_set_option('file_path', os.path.realpath(args[0]))
     # Holding pen mode option
     elif key in ("-o", "--holdingpen"):
         write_message("Holding pen mode", verbose=3)
         task_set_option('mode', 'holdingpen')
         fix_argv_paths([args[0]])
         task_set_option('file_path', os.path.realpath(args[0]))
     # Correct mode option
     elif key in ("-c", "--correct"):
         task_set_option('mode', 'correct')
         fix_argv_paths([args[0]])
         task_set_option('file_path', os.path.realpath(args[0]))
 
     # Append mode option
     elif key in ("-a", "--append"):
         task_set_option('mode', 'append')
         fix_argv_paths([args[0]])
         task_set_option('file_path', os.path.realpath(args[0]))
 
     # Reference mode option
     elif key in ("-z", "--reference"):
         task_set_option('mode', 'reference')
         fix_argv_paths([args[0]])
         task_set_option('file_path', os.path.realpath(args[0]))
 
     # Format mode option
     elif key in ("-f", "--format"):
         task_set_option('mode', 'format')
         fix_argv_paths([args[0]])
         task_set_option('file_path', os.path.realpath(args[0]))
 
     # Stage
     elif key in ("-S", "--stage"):
         try:
             value = int(value)
         except ValueError:
             print >> sys.stderr, """The value specified for --stage must be a valid integer, not %s""" % value
             return False
         if not (0 <= value <= 5):
             print >> sys.stderr, """The value specified for --stage must be comprised between 0 and 5"""
             return False
         task_set_option('stage_to_start_from', value)
     else:
         return False
     return True
 
 
 def task_submit_check_options():
     """ Reimplement this method for having the possibility to check options
     before submitting the task, in order for example to provide default
     values. It must return False if there are errors in the options.
     """
     if task_get_option('mode') is None:
         write_message("Please specify at least one update/insert mode!")
         return False
 
     if task_get_option('file_path') is None:
         write_message("Missing filename! -h for help.")
         return False
     return True
 
 def writing_rights_p():
     """Return True in case bibupload has the proper rights to write in the
     fulltext file folder."""
     filename = os.path.join(CFG_WEBSUBMIT_FILEDIR, 'test.txt')
     try:
         if not os.path.exists(CFG_WEBSUBMIT_FILEDIR):
             os.makedirs(CFG_WEBSUBMIT_FILEDIR)
         open(filename, 'w').write('TEST')
         assert(open(filename).read() == 'TEST')
         os.remove(filename)
     except:
         register_exception()
         return False
     return True
 
 def task_run_core():
     """ Reimplement to add the body of the task."""
     error = 0
     write_message("Input file '%s', input mode '%s'." %
             (task_get_option('file_path'), task_get_option('mode')))
     write_message("STAGE 0:", verbose=2)
 
     if task_get_option('file_path') is not None:
         write_message("start preocessing", verbose=3)
         recs = xml_marc_to_records(open_marc_file(task_get_option('file_path')))
         stat['nb_records_to_upload'] = len(recs)
         write_message("   -Open XML marc: DONE", verbose=2)
         task_sleep_now_if_required(can_stop_too=True)
         write_message("Entering records loop", verbose=3)
         if recs is not None:
             # We proceed each record by record
             for record in recs:
                 record_id = record_extract_oai_id(record)
                 task_sleep_now_if_required(can_stop_too=True)
                 if task_get_option("mode") == "holdingpen":
                     #inserting into the holding pen
                     write_message("Inserting into holding pen", verbose=3)
                     insert_record_into_holding_pen(record, record_id)
                 else:
                     write_message("Inserting into main database", verbose=3)
                     error = bibupload(
                         record,
                         opt_tag=task_get_option('tag'),
                         opt_mode=task_get_option('mode'),
                         opt_stage_to_start_from=task_get_option('stage_to_start_from'),
                         opt_notimechange=task_get_option('notimechange'),
                         oai_rec_id = record_id)
                     if error[0] == 1:
                         if record:
                             write_message(record_xml_output(record),
                                           stream=sys.stderr)
                         else:
                             write_message("Record could not have been parsed",
                                           stream=sys.stderr)
                         stat['nb_errors'] += 1
                     elif error[0] == 2:
                         if record:
                             write_message(record_xml_output(record),
                                           stream=sys.stderr)
                         else:
                             write_message("Record could not have been parsed",
                                           stream=sys.stderr)
 
                             task_update_progress("Done %d out of %d." % \
                                                      (stat['nb_records_inserted'] + \
                                                       stat['nb_records_updated'],
                                                       stat['nb_records_to_upload']))
         else:
             write_message("   Error bibupload failed: No record found",
                         verbose=1, stream=sys.stderr)
 
     if task_get_task_param('verbose') >= 1:
         # Print out the statistics
         print_out_bibupload_statistics()
 
     # Check if they were errors
     return not stat['nb_errors'] >= 1
 
 def log_record_uploading(oai_rec_id, task_id, bibrec_id, insertion_db):
     if oai_rec_id != "" and oai_rec_id != None:
         query = """UPDATE oaiHARVESTLOG SET date_inserted=NOW(), inserted_to_db=%s, id_bibrec=%s WHERE oai_id = %s AND bibupload_task_id = %s ORDER BY date_harvested LIMIT 1"""
         try:
             run_sql(query, (str(insertion_db), str(bibrec_id), str(oai_rec_id), str(task_id), ))
         except Error, error:
             write_message("   Error during the log_record_uploading function : %s "
                           % error, verbose=1, stream=sys.stderr)
 if __name__ == "__main__":
     main()
diff --git a/modules/websearch/lib/search_engine.py b/modules/websearch/lib/search_engine.py
index 927d04c12..4b2114aeb 100644
--- a/modules/websearch/lib/search_engine.py
+++ b/modules/websearch/lib/search_engine.py
@@ -1,4410 +1,4427 @@
 # -*- coding: utf-8 -*-
 
 ## This file is part of CDS Invenio.
 ## Copyright (C) 2002, 2003, 2004, 2005, 2006, 2007, 2008 CERN.
 ##
 ## CDS Invenio is free software; you can redistribute it and/or
 ## modify it under the terms of the GNU General Public License as
 ## published by the Free Software Foundation; either version 2 of the
 ## License, or (at your option) any later version.
 ##
 ## CDS Invenio is distributed in the hope that it will be useful, but
 ## WITHOUT ANY WARRANTY; without even the implied warranty of
 ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 ## General Public License for more details.
 ##
 ## You should have received a copy of the GNU General Public License
 ## along with CDS Invenio; if not, write to the Free Software Foundation, Inc.,
 ## 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA.
 
 # pylint: disable-msg=C0301
 
 """CDS Invenio Search Engine in mod_python."""
 
 __lastupdated__ = """$Date$"""
 
 __revision__ = "$Id$"
 
 ## import general modules:
 import cgi
 import copy
 import string
 import os
 import re
 import time
 import urllib
 import urlparse
 import zlib
 
 ## import CDS Invenio stuff:
 from invenio.config import \
      CFG_CERN_SITE, \
      CFG_OAI_ID_FIELD, \
      CFG_WEBCOMMENT_ALLOW_REVIEWS, \
      CFG_WEBSEARCH_CALL_BIBFORMAT, \
      CFG_WEBSEARCH_CREATE_SIMILARLY_NAMED_AUTHORS_LINK_BOX, \
      CFG_WEBSEARCH_FIELDS_CONVERT, \
      CFG_WEBSEARCH_NB_RECORDS_TO_SORT, \
      CFG_WEBSEARCH_SEARCH_CACHE_SIZE, \
      CFG_WEBSEARCH_USE_JSMATH_FOR_FORMATS, \
+     CFG_BIBUPLOAD_SERIALIZE_RECORD_STRUCTURE, \
      CFG_BIBRANK_SHOW_DOWNLOAD_GRAPHS, \
      CFG_SITE_LANG, \
      CFG_SITE_NAME, \
      CFG_LOGDIR, \
      CFG_SITE_URL
 from invenio.search_engine_config import CFG_EXPERIMENTAL_FEATURES, InvenioWebSearchUnknownCollectionError
-from invenio.search_engine_summarizer import summarize_records
-from invenio.bibrecord import create_records, record_get_field_value, record_get_field_values
+from invenio.bibrecord import create_record, create_records, record_get_field_value, record_get_field_values
 from invenio.bibrank_record_sorter import get_bibrank_methods, rank_records, is_method_valid
 from invenio.bibrank_downloads_similarity import register_page_view_event, calculate_reading_similarity_list
 from invenio.bibindex_engine_stemmer import stem
 from invenio.bibformat import format_record, format_records, get_output_format_content_type, create_excel
 from invenio.bibformat_config import CFG_BIBFORMAT_USE_OLD_BIBFORMAT
 from invenio.bibrank_downloads_grapher import create_download_history_graph_and_box
 from invenio.data_cacher import DataCacher
 from invenio.websearch_external_collections import print_external_results_overview, perform_external_collection_search
 from invenio.access_control_admin import acc_get_action_id
 from invenio.access_control_config import VIEWRESTRCOLL, \
     CFG_ACC_GRANT_AUTHOR_RIGHTS_TO_EMAILS_IN_TAGS
 from invenio.websearchadminlib import get_detailed_page_tabs
 from invenio.intbitset import intbitset as HitSet
 from invenio.webinterface_handler import wash_urlargd
 from invenio.urlutils import make_canonical_urlargd
 from invenio.dbquery import DatabaseError
 from invenio.access_control_engine import acc_authorize_action
 
 import invenio.template
 webstyle_templates = invenio.template.load('webstyle')
 webcomment_templates = invenio.template.load('webcomment')
 
 from invenio.bibrank_citation_searcher import calculate_cited_by_list, \
 calculate_co_cited_with_list, get_self_cited_in, get_self_cited_by, get_records_with_num_cites
 from invenio.bibrank_citation_grapher import create_citation_history_graph_and_box
 
 from invenio.dbquery import run_sql, run_sql_cached, get_table_update_time, Error
 from invenio.webuser import getUid, collect_user_info
 from invenio.webpage import page, pageheaderonly, pagefooteronly, create_error_box
 from invenio.messages import gettext_set_language
 from invenio.search_engine_query_parser import SearchQueryParenthesisedParser, \
 InvenioWebSearchQueryParserException, SpiresToInvenioSyntaxConverter
 
 try:
     from mod_python import apache
 except ImportError, e:
     pass # ignore user personalisation, needed e.g. for command-line
 
 try:
     import invenio.template
     websearch_templates = invenio.template.load('websearch')
 except:
     pass
 
 ## global vars:
 search_cache = {} # will cache results of previous searches
 cfg_nb_browse_seen_records = 100 # limit of the number of records to check when browsing certain collection
 cfg_nicely_ordered_collection_list = 0 # do we propose collection list nicely ordered or alphabetical?
 collection_reclist_cache_timestamp = 0
 field_i18nname_cache_timestamp = 0
 collection_i18nname_cache_timestamp = 0
 
 ## precompile some often-used regexp for speed reasons:
 re_word = re.compile('[\s]')
 re_quotes = re.compile('[\'\"]')
 re_doublequote = re.compile('\"')
 re_equal = re.compile('\=')
 re_logical_and = re.compile('\sand\s', re.I)
 re_logical_or = re.compile('\sor\s', re.I)
 re_logical_not = re.compile('\snot\s', re.I)
 re_operators = re.compile(r'\s([\+\-\|])\s')
 re_pattern_wildcards_at_beginning = re.compile(r'(\s)[\*\%]+')
 re_pattern_single_quotes = re.compile("'(.*?)'")
 re_pattern_double_quotes = re.compile("\"(.*?)\"")
 re_pattern_regexp_quotes = re.compile("\/(.*?)\/")
 re_pattern_short_words = re.compile(r'([\s\"]\w{1,3})[\*\%]+')
 re_pattern_space = re.compile("__SPACE__")
 re_pattern_today = re.compile("\$TODAY\$")
 re_pattern_parens = re.compile(r'\([^\)]+\s+[^\)]+\)')
 re_unicode_lowercase_a = re.compile(unicode(r"(?u)[áàäâãå]", "utf-8"))
 re_unicode_lowercase_ae = re.compile(unicode(r"(?u)[æ]", "utf-8"))
 re_unicode_lowercase_e = re.compile(unicode(r"(?u)[éèëê]", "utf-8"))
 re_unicode_lowercase_i = re.compile(unicode(r"(?u)[íìïî]", "utf-8"))
 re_unicode_lowercase_o = re.compile(unicode(r"(?u)[óòöôõø]", "utf-8"))
 re_unicode_lowercase_u = re.compile(unicode(r"(?u)[úùüû]", "utf-8"))
 re_unicode_lowercase_y = re.compile(unicode(r"(?u)[ýÿ]", "utf-8"))
 re_unicode_lowercase_c = re.compile(unicode(r"(?u)[çć]", "utf-8"))
 re_unicode_lowercase_n = re.compile(unicode(r"(?u)[ñ]", "utf-8"))
 re_unicode_uppercase_a = re.compile(unicode(r"(?u)[ÁÀÄÂÃÅ]", "utf-8"))
 re_unicode_uppercase_ae = re.compile(unicode(r"(?u)[Æ]", "utf-8"))
 re_unicode_uppercase_e = re.compile(unicode(r"(?u)[ÉÈËÊ]", "utf-8"))
 re_unicode_uppercase_i = re.compile(unicode(r"(?u)[ÍÌÏÎ]", "utf-8"))
 re_unicode_uppercase_o = re.compile(unicode(r"(?u)[ÓÒÖÔÕØ]", "utf-8"))
 re_unicode_uppercase_u = re.compile(unicode(r"(?u)[ÚÙÜÛ]", "utf-8"))
 re_unicode_uppercase_y = re.compile(unicode(r"(?u)[Ý]", "utf-8"))
 re_unicode_uppercase_c = re.compile(unicode(r"(?u)[ÇĆ]", "utf-8"))
 re_unicode_uppercase_n = re.compile(unicode(r"(?u)[Ñ]", "utf-8"))
 re_latex_lowercase_a = re.compile("\\\\[\"H'`~^vu=k]\{?a\}?")
 re_latex_lowercase_ae = re.compile("\\\\ae\\{\\}?")
 re_latex_lowercase_e = re.compile("\\\\[\"H'`~^vu=k]\\{?e\\}?")
 re_latex_lowercase_i = re.compile("\\\\[\"H'`~^vu=k]\\{?i\\}?")
 re_latex_lowercase_o = re.compile("\\\\[\"H'`~^vu=k]\\{?o\\}?")
 re_latex_lowercase_u = re.compile("\\\\[\"H'`~^vu=k]\\{?u\\}?")
 re_latex_lowercase_y = re.compile("\\\\[\"']\\{?y\\}?")
 re_latex_lowercase_c = re.compile("\\\\['uc]\\{?c\\}?")
 re_latex_lowercase_n = re.compile("\\\\[c'~^vu]\\{?n\\}?")
 re_latex_uppercase_a = re.compile("\\\\[\"H'`~^vu=k]\\{?A\\}?")
 re_latex_uppercase_ae = re.compile("\\\\AE\\{?\\}?")
 re_latex_uppercase_e = re.compile("\\\\[\"H'`~^vu=k]\\{?E\\}?")
 re_latex_uppercase_i = re.compile("\\\\[\"H'`~^vu=k]\\{?I\\}?")
 re_latex_uppercase_o = re.compile("\\\\[\"H'`~^vu=k]\\{?O\\}?")
 re_latex_uppercase_u = re.compile("\\\\[\"H'`~^vu=k]\\{?U\\}?")
 re_latex_uppercase_y = re.compile("\\\\[\"']\\{?Y\\}?")
 re_latex_uppercase_c = re.compile("\\\\['uc]\\{?C\\}?")
 re_latex_uppercase_n = re.compile("\\\\[c'~^vu]\\{?N\\}?")
 
 class RestrictedCollectionDataCacher(DataCacher):
     def __init__(self):
         def cache_filler():
             ret = []
             try:
                 viewcollid = acc_get_action_id(VIEWRESTRCOLL)
                 res = run_sql("""SELECT DISTINCT ar.value
                     FROM accROLE_accACTION_accARGUMENT raa JOIN accARGUMENT ar ON raa.id_accARGUMENT = ar.id
                     WHERE ar.keyword = 'collection' AND raa.id_accACTION = %s""", (viewcollid,))
             except Exception:
                 # database problems, return empty cache
                 return []
             for coll in res:
                 ret.append(coll[0])
             return ret
 
         def timestamp_getter():
             return max(get_table_update_time('accROLE_accACTION_accARGUMENT'), get_table_update_time('accARGUMENT'))
 
         DataCacher.__init__(self, cache_filler, timestamp_getter)
 
 def collection_restricted_p(collection):
     cache = restricted_collection_cache.get_cache()
     return collection in cache
 
 try:
     restricted_collection_cache.is_ok_p
 except Exception:
     restricted_collection_cache = RestrictedCollectionDataCacher()
 
 
 def check_user_can_view_record(user_info, recid):
     """Check if the user is authorized to view the given recid. The function
     grants access in two cases: either user has author rights on ths record,
     or he has view rights to the primary collection this record belongs to.
     Returns the same type as acc_authorize_action
     """
 
     def _is_user_in_authorized_author_list_for_recid(user_info, recid):
         """Return True if the user have submitted the given record."""
         authorized_emails = []
         for tag in CFG_ACC_GRANT_AUTHOR_RIGHTS_TO_EMAILS_IN_TAGS:
             authorized_emails.extend(get_fieldvalues(recid, tag))
         for email in authorized_emails:
             email = email.strip().lower()
             if user_info['email'].strip().lower() == email:
                 return True
         return False
 
     record_primary_collection = guess_primary_collection_of_a_record(recid)
     if collection_restricted_p(record_primary_collection):
         (auth_code, auth_msg) = acc_authorize_action(user_info, VIEWRESTRCOLL, collection=record_primary_collection)
         if auth_code == 0 or _is_user_in_authorized_author_list_for_recid(user_info, recid):
             return (0, '')
         else:
             return (auth_code, auth_msg)
     else:
         return (0, '')
 
 class IndexStemmingDataCacher(DataCacher):
     def __init__(self):
         def cache_filler():
             try:
                 res = run_sql("""SELECT id, stemming_language FROM idxINDEX""")
             except DatabaseError:
                 # database problems, return empty cache
                 return {}
             return dict(res)
 
         def timestamp_getter():
             return get_table_update_time('idxINDEX')
 
         DataCacher.__init__(self, cache_filler, timestamp_getter)
 
 def get_index_stemming_language(index_id):
     cache = index_stemming_cache.get_cache()
     return cache[index_id]
 
 try:
     index_stemming_cache.is_ok_p
 except Exception:
     index_stemming_cache = IndexStemmingDataCacher()
 
 class FieldI18nNameDataCacher(DataCacher):
     def __init__(self):
         def cache_filler():
             ret = {}
             try:
                 res = run_sql("SELECT f.name,fn.ln,fn.value FROM fieldname AS fn, field AS f WHERE fn.id_field=f.id AND fn.type='ln'") # ln=long name
             except Exception:
                 # database problems, return empty cache
                 return {}
             for f, ln, i18nname in res:
                 if i18nname:
                     if not ret.has_key(f):
                         ret[f] = {}
                     ret[f][ln] = i18nname
             return ret
 
         def timestamp_getter():
             return get_table_update_time('fieldname')
 
         DataCacher.__init__(self, cache_filler, timestamp_getter)
 
     def get_field_i18nname(self, f, ln=CFG_SITE_LANG):
         out = f
         try:
             out = self.get_cache()[f][ln]
         except KeyError:
             pass # translation in LN does not exist
         return out
 
 try:
     if not field_i18n_name_cache.is_ok_p:
         raise Exception
 except Exception:
     field_i18n_name_cache = FieldI18nNameDataCacher()
 
 
 class CollectionRecListDataCacher(DataCacher):
     def __init__(self):
         def cache_filler():
             ret = {}
             try:
                 res = run_sql("SELECT name,reclist FROM collection")
             except Exception:
                 # database problems, return empty cache
                 return {}
             for name, reclist in res:
                 ret[name] = None # this will be filled later during runtime by calling get_collection_reclist(coll)
             return ret
 
         def timestamp_getter():
             return get_table_update_time('collection')
 
         DataCacher.__init__(self, cache_filler, timestamp_getter)
 
     def get_collection_reclist(self, coll):
         cache = self.get_cache()
         if not cache[coll]:
             # not yet it the cache, so calculate it and fill the cache:
             set = HitSet()
             query = "SELECT nbrecs,reclist FROM collection WHERE name=%s"
             res = run_sql(query, (coll, ), 1)
             if res:
                 try:
                     set = HitSet(res[0][1])
                 except:
                     pass
             self.cache[coll] = set
             cache[coll] = set
         # finally, return reclist:
         return cache[coll]
 
 try:
     if not collection_reclist_cache.is_ok_p:
         raise Exception
 except Exception:
     collection_reclist_cache = CollectionRecListDataCacher()
 
 
 class CollectionI18nDataCacher(DataCacher):
     def __init__(self):
         def cache_filler():
             ret = {}
             try:
                 res = run_sql("SELECT c.name,cn.ln,cn.value FROM collectionname AS cn, collection AS c WHERE cn.id_collection=c.id AND cn.type='ln'") # ln=long name
             except Exception:
                 # database problems,
                 return {}
             for c, ln, i18nname in res:
                 if i18nname:
                     if not ret.has_key(c):
                         ret[c] = {}
                     ret[c][ln] = i18nname
             return ret
 
         def timestamp_getter():
             return get_table_update_time('collectionname')
 
         DataCacher.__init__(self, cache_filler, timestamp_getter)
 
     def get_coll_i18nname(self, c, ln=CFG_SITE_LANG):
         """Return nicely formatted collection name (of name type 'ln',
         'long name') for collection C in language LN."""
         cache = self.get_cache()
         out = c
         try:
             out = cache[c][ln]
         except KeyError:
             pass # translation in LN does not exist
         return out
 
 try:
     if not collection_i18n_name_cache.is_ok_p:
         raise Exception
 except Exception:
     collection_i18n_name_cache = CollectionI18nDataCacher()
 
 
 def get_alphabetically_ordered_collection_list(level=0, ln=CFG_SITE_LANG):
     """Returns nicely ordered (score respected) list of collections, more exactly list of tuples
        (collection name, printable collection name).
        Suitable for create_search_box()."""
     out = []
     query = "SELECT id,name FROM collection ORDER BY name ASC"
     res = run_sql(query)
     for c_id, c_name in res:
         # make a nice printable name (e.g. truncate c_printable for
         # long collection names in given language):
         c_printable_fullname = get_coll_i18nname(c_name, ln)
         c_printable = wash_index_term(c_printable_fullname, 30, False)
         if c_printable != c_printable_fullname:
             c_printable = c_printable + "..."
         if level:
             c_printable = " " + level * '-' + " " + c_printable
         out.append([c_name, c_printable])
     return out
 
 def get_nicely_ordered_collection_list(collid=1, level=0, ln=CFG_SITE_LANG):
     """Returns nicely ordered (score respected) list of collections, more exactly list of tuples
        (collection name, printable collection name).
        Suitable for create_search_box()."""
     colls_nicely_ordered = []
     query = "SELECT c.name,cc.id_son FROM collection_collection AS cc, collection AS c "\
             " WHERE c.id=cc.id_son AND cc.id_dad=%s ORDER BY score DESC"
     res = run_sql(query, (collid, ))
     for c, cid in res:
         # make a nice printable name (e.g. truncate c_printable for
         # long collection names in given language):
         c_printable_fullname = get_coll_i18nname(c, ln)
         c_printable = wash_index_term(c_printable_fullname, 30, False)
         if c_printable != c_printable_fullname:
             c_printable = c_printable + "..."
         if level:
             c_printable = " " + level * '-' + " " + c_printable
         colls_nicely_ordered.append([c, c_printable])
         colls_nicely_ordered  = colls_nicely_ordered + get_nicely_ordered_collection_list(cid, level+1, ln=ln)
     return colls_nicely_ordered
 
 def get_index_id_from_field(field):
     """Returns first index id where the field code FIELD is indexed.
        Returns zero in case there is no table for this index.
        Example: field='author', output=4."""
     out = 0
     res = run_sql("""SELECT w.id FROM idxINDEX AS w, idxINDEX_field AS wf, field AS f
                       WHERE f.code=%s AND wf.id_field=f.id AND w.id=wf.id_idxINDEX
                       LIMIT 1""", (field,))
     if res:
         out = res[0][0]
     return out
 
 def get_words_from_pattern(pattern):
     "Returns list of whitespace-separated words from pattern."
     words = {}
     for word in string.split(pattern):
         if not words.has_key(word):
             words[word] = 1;
     return words.keys()
 
 def create_basic_search_units(req, p, f, m=None, of='hb'):
     """Splits search pattern and search field into a list of independently searchable units.
        - A search unit consists of '(operator, pattern, field, type, hitset)' tuples where
           'operator' is set union (|), set intersection (+) or set exclusion (-);
           'pattern' is either a word (e.g. muon*) or a phrase (e.g. 'nuclear physics');
           'field' is either a code like 'title' or MARC tag like '100__a';
           'type' is the search type ('w' for word file search, 'a' for access file search).
         - Optionally, the function accepts the match type argument 'm'.
           If it is set (e.g. from advanced search interface), then it
           performs this kind of matching.  If it is not set, then a guess is made.
           'm' can have values: 'a'='all of the words', 'o'='any of the words',
                                'p'='phrase/substring', 'r'='regular expression',
                                'e'='exact value'.
         - Warnings are printed on req (when not None) in case of HTML output formats."""
 
     opfts = [] # will hold (o,p,f,t,h) units
 
     # FIXME: quick hack for the journal index
     if f == 'journal':
         opfts.append(['+', p, f, 'w'])
         return opfts
 
     ## check arguments: if matching type phrase/string/regexp, do we have field defined?
     if (m=='p' or m=='r' or m=='e') and not f:
         m = 'a'
         if of.startswith("h"):
             print_warning(req, "This matching type cannot be used within <em>any field</em>.  I will perform a word search instead." )
             print_warning(req, "If you want to phrase/substring/regexp search in a specific field, e.g. inside title, then please choose <em>within title</em> search option.")
 
     ## is desired matching type set?
     if m:
         ## A - matching type is known; good!
         if m == 'e':
             # A1 - exact value:
             opfts.append(['+', p, f, 'a']) # '+' since we have only one unit
         elif m == 'p':
             # A2 - phrase/substring:
             opfts.append(['+', "%" + p + "%", f, 'a']) # '+' since we have only one unit
         elif m == 'r':
             # A3 - regular expression:
             opfts.append(['+', p, f, 'r']) # '+' since we have only one unit
         elif m == 'a' or m == 'w':
             # A4 - all of the words:
             p = strip_accents(p) # strip accents for 'w' mode, FIXME: delete when not needed
             for word in get_words_from_pattern(p):
                 opfts.append(['+', word, f, 'w']) # '+' in all units
         elif m == 'o':
             # A5 - any of the words:
             p = strip_accents(p) # strip accents for 'w' mode, FIXME: delete when not needed
             for word in get_words_from_pattern(p):
                 if len(opfts)==0:
                     opfts.append(['+', word, f, 'w']) # '+' in the first unit
                 else:
                     opfts.append(['|', word, f, 'w']) # '|' in further units
         else:
             if of.startswith("h"):
                 print_warning(req, "Matching type '%s' is not implemented yet." % cgi.escape(m), "Warning")
             opfts.append(['+', "%" + p + "%", f, 'w'])
     else:
         ## B - matching type is not known: let us try to determine it by some heuristics
         if f and p[0] == '"' and p[-1] == '"':
             ## B0 - does 'p' start and end by double quote, and is 'f' defined? => doing ACC search
             opfts.append(['+', p[1:-1], f, 'a'])
         elif f and p[0] == "'" and p[-1] == "'":
             ## B0bis - does 'p' start and end by single quote, and is 'f' defined? => doing ACC search
             opfts.append(['+', '%' + p[1:-1] + '%', f, 'a'])
         elif f and p[0] == "/" and p[-1] == "/":
             ## B0ter - does 'p' start and end by a slash, and is 'f' defined? => doing regexp search
             opfts.append(['+', p[1:-1], f, 'r'])
         elif f and string.find(p, ',') >= 0:
             ## B1 - does 'p' contain comma, and is 'f' defined? => doing ACC search
             opfts.append(['+', p, f, 'a'])
         elif f and str(f[0:2]).isdigit():
             ## B2 - does 'f' exist and starts by two digits?  => doing ACC search
             opfts.append(['+', p, f, 'a'])
         else:
             ## B3 - doing WRD search, but maybe ACC too
             # search units are separated by spaces unless the space is within single or double quotes
             # so, let us replace temporarily any space within quotes by '__SPACE__'
             p = re_pattern_single_quotes.sub(lambda x: "'"+string.replace(x.group(1), ' ', '__SPACE__')+"'", p)
             p = re_pattern_double_quotes.sub(lambda x: "\""+string.replace(x.group(1), ' ', '__SPACE__')+"\"", p)
             p = re_pattern_regexp_quotes.sub(lambda x: "/"+string.replace(x.group(1), ' ', '__SPACE__')+"/", p)
             # wash argument:
             p = re_equal.sub(":", p)
             p = re_logical_and.sub(" ", p)
             p = re_logical_or.sub(" |", p)
             p = re_logical_not.sub(" -", p)
             p = re_operators.sub(r' \1', p)
             for pi in string.split(p): # iterate through separated units (or items, as "pi" stands for "p item")
                 pi = re_pattern_space.sub(" ", pi) # replace back '__SPACE__' by ' '
                 # firstly, determine set operator
                 if pi[0] == '+' or pi[0] == '-' or pi[0] == '|':
                     oi = pi[0]
                     pi = pi[1:]
                 else:
                     # okay, there is no operator, so let us decide what to do by default
                     oi = '+' # by default we are doing set intersection...
                 # secondly, determine search pattern and field:
                 if string.find(pi, ":") > 0:
                     fi, pi = string.split(pi, ":", 1)
                     # test whether fi is a real index code or a MARC-tag defined code:
                     if fi in get_fieldcodes() or '00' <= fi[:2] <= '99':
                         pass
                     else:
                         # it is not, so joint it back:
                         fi, pi = f, fi + ":" + pi
                 else:
                     fi, pi = f, pi
                 # look also for old ALEPH field names:
                 if fi and CFG_WEBSEARCH_FIELDS_CONVERT.has_key(string.lower(fi)):
                     fi = CFG_WEBSEARCH_FIELDS_CONVERT[string.lower(fi)]
                 # wash 'pi' argument:
                 if re_quotes.match(pi):
                     # B3a - quotes are found => do ACC search (phrase search)
                     if fi:
                         if pi[0] == '"' and pi[-1] == '"':
                             pi = string.replace(pi, '"', '') # remove quote signs
                             opfts.append([oi, pi, fi, 'a'])
                         elif pi[0] == "'" and pi[-1] == "'":
                             pi = string.replace(pi, "'", "") # remove quote signs
                             opfts.append([oi, "%" + pi + "%", fi, 'a'])
                         else: # unbalanced quotes, so do WRD query:
                             opfts.append([oi, pi, fi, 'w'])
                     else:
                         # fi is not defined, look at where we are doing exact or subphrase search (single/double quotes):
                         if pi[0] == '"' and pi[-1] == '"':
                             opfts.append([oi, pi[1:-1], "anyfield", 'a'])
                             if of.startswith("h"):
                                 print_warning(req, "Searching for an exact match inside any field may be slow.  You may want to search for words instead, or choose to search within specific field.")
                         else:
                             # nope, subphrase in global index is not possible => change back to WRD search
                             pi = strip_accents(pi) # strip accents for 'w' mode, FIXME: delete when not needed
                             for pii in get_words_from_pattern(pi):
                                 # since there may be '-' and other chars that we do not index in WRD
                                 opfts.append([oi, pii, fi, 'w'])
                             if of.startswith("h"):
                                 print_warning(req, "The partial phrase search does not work in any field.  I'll do a boolean AND searching instead.")
                                 print_warning(req, "If you want to do a partial phrase search in a specific field, e.g. inside title, then please choose 'within title' search option.", "Tip")
                                 print_warning(req, "If you want to do exact phrase matching, then please use double quotes.", "Tip")
                 elif fi and str(fi[0]).isdigit() and str(fi[0]).isdigit():
                     # B3b - fi exists and starts by two digits => do ACC search
                     opfts.append([oi, pi, fi, 'a'])
                 elif fi and not get_index_id_from_field(fi):
                     # B3c - fi exists but there is no words table for fi => try ACC search
                     opfts.append([oi, pi, fi, 'a'])
                 elif fi and pi.startswith('/') and pi.endswith('/'):
                     # B3d - fi exists and slashes found => try regexp search
                     opfts.append([oi, pi[1:-1], fi, 'r'])
                 else:
                     # B3e - general case => do WRD search
                     pi = strip_accents(pi) # strip accents for 'w' mode, FIXME: delete when not needed
                     for pii in get_words_from_pattern(pi):
                         opfts.append([oi, pii, fi, 'w'])
 
     ## sanity check:
     for i in range(0, len(opfts)):
         try:
             pi = opfts[i][1]
             if pi == '*':
                 if of.startswith("h"):
                     print_warning(req, "Ignoring standalone wildcard word.", "Warning")
                 del opfts[i]
             if pi == '' or pi == ' ':
                 fi = opfts[i][2]
                 if fi:
                     if of.startswith("h"):
                         print_warning(req, "Ignoring empty <em>%s</em> search term." % fi, "Warning")
                 del opfts[i]
         except:
             pass
 
     ## return search units:
     return opfts
 
 def page_start(req, of, cc, as, ln, uid, title_message=None,
                description='', keywords='', recID=-1, tab=''):
     "Start page according to given output format."
     _ = gettext_set_language(ln)
 
     if not title_message: title_message = _("Search Results")
 
     if not req:
         return # we were called from CLI
 
     content_type = get_output_format_content_type(of)
 
     if of.startswith('x'):
         if of == 'xr':
             # we are doing RSS output
             req.content_type = "application/rss+xml"
             req.send_http_header()
             req.write("""<?xml version="1.0" encoding="UTF-8"?>\n""")
         else:
             # we are doing XML output:
             req.content_type = "text/xml"
             req.send_http_header()
             req.write("""<?xml version="1.0" encoding="UTF-8"?>\n""")
     elif of.startswith('t') or str(of[0:3]).isdigit():
         # we are doing plain text output:
         req.content_type = "text/plain"
         req.send_http_header()
     elif of == "id":
         pass # nothing to do, we shall only return list of recIDs
     elif content_type == 'text/html':
         # we are doing HTML output:
         req.content_type = "text/html"
         req.send_http_header()
 
         if not description:
             description = "%s %s." % (cc, _("Search Results"))
 
         if not keywords:
             keywords = "%s, WebSearch, %s" % (get_coll_i18nname(CFG_SITE_NAME, ln), get_coll_i18nname(cc, ln))
 
         argd = {}
         if req.args:
             argd = cgi.parse_qs(req.args)
         rssurl = websearch_templates.build_rss_url(argd)
 
         navtrail = create_navtrail_links(cc, as, ln)
         navtrail_append_title_p = 1
 
         # FIXME: Find a good point to put this code.
         # This is a nice hack to trigger jsMath only when displaying single
         # records.
         if of.lower() in CFG_WEBSEARCH_USE_JSMATH_FOR_FORMATS:
             metaheaderadd = """
   <script type='text/javascript'>
     jsMath = {
         Controls: {cookie: {printwarn: 0}}
     };
   </script>
   <script src='/jsMath/easy/invenio-jsmath.js' type='text/javascript'></script>
 """
         else:
             metaheaderadd = ''
         if (tab != '' or ((of != '' or of.lower() != 'hd') and of != 'hb')) and \
                recID != -1:
             # If we are not in information tab in HD format, customize
             # the nav. trail to have a link back to main record. (Due
             # to the way perform_request_search() works, hb
             # (lowercase) is equal to hd)
             if navtrail != '':
                 navtrail += ' &gt; '
             if (of != '' or of.lower() != 'hd') and of != 'hb':
                 # Export
                 format_name = of
                 query = "SELECT name FROM format WHERE code=%s"
                 res = run_sql(query, (of,))
                 if res:
                     format_name = res[0][0]
                 navtrail += ' <a class="navtrail" href="%s/record/%s">%s</a> &gt; %s' % \
                             (CFG_SITE_URL, recID, title_message, format_name)
             else:
                 # Discussion, citations, etc. tabs
                 tab_label = get_detailed_page_tabs(cc, ln=ln)[tab]['label']
                 navtrail += ' <a class="navtrail" href="%s/record/%s">%s</a> &gt; %s' % \
                             (CFG_SITE_URL, recID, title_message, _(tab_label))
             navtrail_append_title_p = 0
 
         req.write(pageheaderonly(req=req, title=title_message,
                                  navtrail=navtrail,
                                  description=description,
                                  keywords=keywords,
                                  metaheaderadd=metaheaderadd,
                                  uid=uid,
                                  language=ln,
                                  navmenuid='search',
                                  navtrail_append_title_p=\
                                  navtrail_append_title_p,
                                  rssurl=rssurl))
         req.write(websearch_templates.tmpl_search_pagestart(ln=ln))
     #else:
     #    req.send_http_header()
 
 def page_end(req, of="hb", ln=CFG_SITE_LANG):
     "End page according to given output format: e.g. close XML tags, add HTML footer, etc."
     if of == "id":
         return [] # empty recID list
     if not req:
         return # we were called from CLI
     if of.startswith('h'):
         req.write(websearch_templates.tmpl_search_pageend(ln = ln)) # pagebody end
         req.write(pagefooteronly(lastupdated=__lastupdated__, language=ln, req=req))
     return "\n"
 
 def create_inputdate_box(name="d1", selected_year=0, selected_month=0, selected_day=0, ln=CFG_SITE_LANG):
     "Produces 'From Date', 'Until Date' kind of selection box.  Suitable for search options."
 
     _ = gettext_set_language(ln)
 
     box = ""
     # day
     box += """<select name="%sd">""" % name
     box += """<option value="">%s""" % _("any day")
     for day in range(1, 32):
         box += """<option value="%02d"%s>%02d""" % (day, is_selected(day, selected_day), day)
     box += """</select>"""
     # month
     box += """<select name="%sm">""" % name
     box += """<option value="">%s""" % _("any month")
     for mm, month in [(1, _("January")), (2, _("February")), (3, _("March")), (4, _("April")), \
                       (5, _("May")), (6, _("June")), (7, _("July")), (8, _("August")), \
                       (9, _("September")), (10, _("October")), (11, _("November")), (12, _("December"))]:
         box += """<option value="%02d"%s>%s""" % (mm, is_selected(mm, selected_month), month)
     box += """</select>"""
     # year
     box += """<select name="%sy">""" % name
     box += """<option value="">%s""" % _("any year")
     this_year = int(time.strftime("%Y", time.localtime()))
     for year in range(this_year-20, this_year+1):
         box += """<option value="%d"%s>%d""" % (year, is_selected(year, selected_year), year)
     box += """</select>"""
     return box
 
 def create_search_box(cc, colls, p, f, rg, sf, so, sp, rm, of, ot, as,
                       ln, p1, f1, m1, op1, p2, f2, m2, op2, p3, f3,
                       m3, sc, pl, d1y, d1m, d1d, d2y, d2m, d2d, dt, jrec, ec,
                       action=""):
 
     """Create search box for 'search again in the results page' functionality."""
 
     # load the right message language
     _ = gettext_set_language(ln)
 
     # some computations
     cc_intl = get_coll_i18nname(cc, ln)
     cc_colID = get_colID(cc)
 
     colls_nicely_ordered = []
     if cfg_nicely_ordered_collection_list:
         colls_nicely_ordered = get_nicely_ordered_collection_list(ln=ln)
     else:
         colls_nicely_ordered = get_alphabetically_ordered_collection_list(ln=ln)
 
     colls_nice = []
     for (cx, cx_printable) in colls_nicely_ordered:
         if not cx.startswith("Unnamed collection"):
             colls_nice.append({ 'value' : cx,
                                 'text' : cx_printable
                               })
 
     coll_selects = []
     if colls and colls[0] != CFG_SITE_NAME:
         # some collections are defined, so print these first, and only then print 'add another collection' heading:
         for c in colls:
             if c:
                 temp = []
                 temp.append({ 'value' : CFG_SITE_NAME,
                               'text' : '*** %s ***' % _("any collection")
                             })
                 for val in colls_nice:
                     # print collection:
                     if not cx.startswith("Unnamed collection"):
                         temp.append({ 'value' : val['value'],
                                       'text' : val['text'],
                                       'selected' : (c == re.sub("^[\s\-]*","", val['value']))
                                     })
                 coll_selects.append(temp)
         coll_selects.append([{ 'value' : '',
                                'text' : '*** %s ***' % _("add another collection")
                              }] + colls_nice)
     else: # we searched in CFG_SITE_NAME, so print 'any collection' heading
         coll_selects.append([{ 'value' : CFG_SITE_NAME,
                                'text' : '*** %s ***' % _("any collection")
                              }] + colls_nice)
 
     ## ranking methods
     ranks = [{
                'value' : '',
                'text' : "- %s %s -" % (_("OR").lower (), _("rank by")),
              }]
     for (code, name) in get_bibrank_methods(cc_colID, ln):
         # propose found rank methods:
         ranks.append({
                        'value' : code,
                        'text' : name,
                      })
 
     formats = []
     query = """SELECT code,name FROM format WHERE visibility='1' ORDER BY name ASC"""
     res = run_sql(query)
     if res:
         # propose found formats:
         for code, name in res:
             formats.append({ 'value' : code,
                              'text' : name
                            })
     else:
         formats.append({'value' : 'hb',
                         'text' : _("HTML brief")
                        })
 
     # show collections in the search box? (not if there is only one
     # collection defined, and not if we are in light search)
     show_colls = True
     if len(collection_reclist_cache.keys()) == 1 or \
            as == -1:
         show_colls = False
 
 
     return websearch_templates.tmpl_search_box(
              ln = ln,
              as = as,
              cc_intl = cc_intl,
              cc = cc,
              ot = ot,
              sp = sp,
              action = action,
              fieldslist = get_searchwithin_fields(ln=ln, colID=cc_colID),
              f1 = f1,
              f2 = f2,
              f3 = f3,
              m1 = m1,
              m2 = m2,
              m3 = m3,
              p1 = p1,
              p2 = p2,
              p3 = p3,
              op1 = op1,
              op2 = op2,
              rm = rm,
              p = p,
              f = f,
              coll_selects = coll_selects,
              d1y = d1y, d2y = d2y, d1m = d1m, d2m = d2m, d1d = d1d, d2d = d2d,
              dt = dt,
              sort_fields = get_sortby_fields(ln=ln, colID=cc_colID),
              sf = sf,
              so = so,
              ranks = ranks,
              sc = sc,
              rg = rg,
              formats = formats,
              of = of,
              pl = pl,
              jrec = jrec,
              ec = ec,
              show_colls = show_colls,
            )
 
 def create_navtrail_links(cc=CFG_SITE_NAME, as=0, ln=CFG_SITE_LANG, self_p=1, tab=''):
     """Creates navigation trail links, i.e. links to collection
     ancestors (except Home collection).  If as==1, then links to
     Advanced Search interfaces; otherwise Simple Search.
     """
 
     dads = []
     for dad in get_coll_ancestors(cc):
         if dad != CFG_SITE_NAME: # exclude Home collection
             dads.append ((dad, get_coll_i18nname (dad, ln)))
 
     if self_p and cc != CFG_SITE_NAME:
         dads.append((cc, get_coll_i18nname(cc, ln)))
 
     return websearch_templates.tmpl_navtrail_links(
         as=as, ln=ln, dads=dads)
 
 def get_searchwithin_fields(ln='en', colID=None):
     """Retrieves the fields name used in the 'search within' selection box for the collection ID colID."""
     res = None
     if colID:
         res = run_sql_cached("""SELECT f.code,f.name FROM field AS f, collection_field_fieldvalue AS cff
                                  WHERE cff.type='sew' AND cff.id_collection=%s AND cff.id_field=f.id
                               ORDER BY cff.score DESC, f.name ASC""", (colID,),
                              affected_tables=['field', 'collection_field_fieldvalue'])
     if not res:
         res = run_sql_cached("SELECT code,name FROM field ORDER BY name ASC",
                              affected_tables=['field',])
     fields = [{
                 'value' : '',
                 'text' : get_field_i18nname("any field", ln)
               }]
     for field_code, field_name in res:
         if field_code and field_code != "anyfield":
             fields.append({ 'value' : field_code,
                             'text' : get_field_i18nname(field_name, ln)
                           })
     return fields
 
 def get_sortby_fields(ln='en', colID=None):
     """Retrieves the fields name used in the 'sort by' selection box for the collection ID colID."""
     _ = gettext_set_language(ln)
     res = None
     if colID:
         res = run_sql_cached("""SELECT DISTINCT(f.code),f.name FROM field AS f, collection_field_fieldvalue AS cff
                                  WHERE cff.type='soo' AND cff.id_collection=%s AND cff.id_field=f.id
                               ORDER BY cff.score DESC, f.name ASC""", (colID,),
                              affected_tables=['field', 'collection_field_fieldvalue'])
     if not res:
         # no sort fields defined for this colID, try to take Home collection:
         res = run_sql_cached("""SELECT DISTINCT(f.code),f.name FROM field AS f, collection_field_fieldvalue AS cff
                                  WHERE cff.type='soo' AND cff.id_collection=%s AND cff.id_field=f.id
                                  ORDER BY cff.score DESC, f.name ASC""", (1,),
                              affected_tables=['field', 'collection_field_fieldvalue'])
     if not res:
         # no sort fields defined for the Home collection, take all sort fields defined wherever they are:
         res = run_sql_cached("""SELECT DISTINCT(f.code),f.name FROM field AS f, collection_field_fieldvalue AS cff
                                  WHERE cff.type='soo' AND cff.id_field=f.id
                                  ORDER BY cff.score DESC, f.name ASC""",
                              affected_tables=['field', 'collection_field_fieldvalue'])
     fields = [{
                 'value' : '',
                 'text' : _("latest first")
               }]
     for field_code, field_name in res:
         if field_code and field_code != "anyfield":
             fields.append({ 'value' : field_code,
                             'text' : get_field_i18nname(field_name, ln)
                           })
     return fields
 
 def create_andornot_box(name='op', value='', ln='en'):
     "Returns HTML code for the AND/OR/NOT selection box."
 
     _ = gettext_set_language(ln)
 
     out = """
     <select name="%s">
     <option value="a"%s>%s
     <option value="o"%s>%s
     <option value="n"%s>%s
     </select>
     """ % (name,
            is_selected('a', value), _("AND"),
            is_selected('o', value), _("OR"),
            is_selected('n', value), _("AND NOT"))
 
     return out
 
 def create_matchtype_box(name='m', value='', ln='en'):
     "Returns HTML code for the 'match type' selection box."
 
     _ = gettext_set_language(ln)
 
     out = """
     <select name="%s">
     <option value="a"%s>%s
     <option value="o"%s>%s
     <option value="e"%s>%s
     <option value="p"%s>%s
     <option value="r"%s>%s
     </select>
     """ % (name,
            is_selected('a', value), _("All of the words:"),
            is_selected('o', value), _("Any of the words:"),
            is_selected('e', value), _("Exact phrase:"),
            is_selected('p', value), _("Partial phrase:"),
            is_selected('r', value), _("Regular expression:"))
     return out
 
 def is_selected(var, fld):
     "Checks if the two are equal, and if yes, returns ' selected'.  Useful for select boxes."
     if type(var) is int and type(fld) is int:
         if var == fld:
             return " selected"
     elif str(var) == str(fld):
         return " selected"
     elif fld and len(fld)==3 and fld[0] == "w" and var == fld[1:]:
         return " selected"
     return ""
 
 def wash_colls(cc, c, split_colls=0):
     """Wash collection list by checking whether user has deselected
     anything under 'Narrow search'.  Checks also if cc is a list or not.
        Return list of cc, colls_to_display, colls_to_search since the list
     of collections to display is different from that to search in.
     This is because users might have chosen 'split by collection'
     functionality.
        The behaviour of "collections to display" depends solely whether
     user has deselected a particular collection: e.g. if it started
     from 'Articles and Preprints' page, and deselected 'Preprints',
     then collection to display is 'Articles'.  If he did not deselect
     anything, then collection to display is 'Articles & Preprints'.
        The behaviour of "collections to search in" depends on the
     'split_colls' parameter:
          * if is equal to 1, then we can wash the colls list down
            and search solely in the collection the user started from;
          * if is equal to 0, then we are splitting to the first level
            of collections, i.e. collections as they appear on the page
            we started to search from;
 
     The function raises exception
     InvenioWebSearchUnknownCollectionError
     if cc or one of c collections is not known.
     """
 
     colls_out = []
     colls_out_for_display = []
 
     # check what type is 'cc':
     if type(cc) is list:
         for ci in cc:
             if collection_reclist_cache.has_key(ci):
                 # yes this collection is real, so use it:
                 cc = ci
                 break
     else:
         # check once if cc is real:
         if not collection_reclist_cache.has_key(cc):
             if cc:
                 raise InvenioWebSearchUnknownCollectionError(cc)
             else:
                 cc = CFG_SITE_NAME # cc is not set, so replace it with Home collection
 
     # check type of 'c' argument:
     if type(c) is list:
         colls = c
     else:
         colls = [c]
 
     # remove all 'unreal' collections:
     colls_real = []
     for coll in colls:
         if collection_reclist_cache.has_key(coll):
             colls_real.append(coll)
         else:
             if coll:
                 raise InvenioWebSearchUnknownCollectionError(coll)
     colls = colls_real
 
     # check if some real collections remain:
     if len(colls)==0:
         colls = [cc]
 
     # then let us check the list of non-restricted "real" sons of 'cc' and compare it to 'coll':
     res = run_sql("""SELECT c.name FROM collection AS c,
                                         collection_collection AS cc,
                                         collection AS ccc
                      WHERE c.id=cc.id_son AND cc.id_dad=ccc.id
                        AND ccc.name=%s AND cc.type='r'""", (cc,))
     l_cc_nonrestricted_sons = []
     l_c = colls
     for row in res:
         if not collection_restricted_p(row[0]):
             l_cc_nonrestricted_sons.append(row[0])
     l_c.sort()
     l_cc_nonrestricted_sons.sort()
     if l_cc_nonrestricted_sons == l_c:
         colls_out_for_display = [cc] # yep, washing permitted, it is sufficient to display 'cc'
     else:
         colls_out_for_display = colls # nope, we need to display all 'colls' successively
 
     # remove duplicates:
     colls_out_for_display_nondups=filter(lambda x, colls_out_for_display=colls_out_for_display: colls_out_for_display[x-1] not in colls_out_for_display[x:], range(1, len(colls_out_for_display)+1))
     colls_out_for_display = map(lambda x, colls_out_for_display=colls_out_for_display:colls_out_for_display[x-1], colls_out_for_display_nondups)
 
     # second, let us decide on collection splitting:
     if split_colls == 0:
         # type A - no sons are wanted
         colls_out = colls_out_for_display
 #    elif split_colls == 1:
     else:
         # type B - sons (first-level descendants) are wanted
         for coll in colls_out_for_display:
             coll_sons = get_coll_sons(coll)
             if coll_sons == []:
                 colls_out.append(coll)
             else:
                 colls_out = colls_out + coll_sons
 
     # remove duplicates:
     colls_out_nondups=filter(lambda x, colls_out=colls_out: colls_out[x-1] not in colls_out[x:], range(1, len(colls_out)+1))
     colls_out = map(lambda x, colls_out=colls_out:colls_out[x-1], colls_out_nondups)
 
     return (cc, colls_out_for_display, colls_out)
 
 def strip_accents(x):
     """Strip accents in the input phrase X (assumed in UTF-8) by replacing
     accented characters with their unaccented cousins (e.g. é by e).
     Return such a stripped X."""
     x = re_latex_lowercase_a.sub("a", x)
     x = re_latex_lowercase_ae.sub("ae", x)
     x = re_latex_lowercase_e.sub("e", x)
     x = re_latex_lowercase_i.sub("i", x)
     x = re_latex_lowercase_o.sub("o", x)
     x = re_latex_lowercase_u.sub("u", x)
     x = re_latex_lowercase_y.sub("x", x)
     x = re_latex_lowercase_c.sub("c", x)
     x = re_latex_lowercase_n.sub("n", x)
     x = re_latex_uppercase_a.sub("A", x)
     x = re_latex_uppercase_ae.sub("AE", x)
     x = re_latex_uppercase_e.sub("E", x)
     x = re_latex_uppercase_i.sub("I", x)
     x = re_latex_uppercase_o.sub("O", x)
     x = re_latex_uppercase_u.sub("U", x)
     x = re_latex_uppercase_y.sub("Y", x)
     x = re_latex_uppercase_c.sub("C", x)
     x = re_latex_uppercase_n.sub("N", x)
 
     # convert input into Unicode string:
     try:
         y = unicode(x, "utf-8")
     except:
         return x # something went wrong, probably the input wasn't UTF-8
     # asciify Latin-1 lowercase characters:
     y = re_unicode_lowercase_a.sub("a", y)
     y = re_unicode_lowercase_ae.sub("ae", y)
     y = re_unicode_lowercase_e.sub("e", y)
     y = re_unicode_lowercase_i.sub("i", y)
     y = re_unicode_lowercase_o.sub("o", y)
     y = re_unicode_lowercase_u.sub("u", y)
     y = re_unicode_lowercase_y.sub("y", y)
     y = re_unicode_lowercase_c.sub("c", y)
     y = re_unicode_lowercase_n.sub("n", y)
     # asciify Latin-1 uppercase characters:
     y = re_unicode_uppercase_a.sub("A", y)
     y = re_unicode_uppercase_ae.sub("AE", y)
     y = re_unicode_uppercase_e.sub("E", y)
     y = re_unicode_uppercase_i.sub("I", y)
     y = re_unicode_uppercase_o.sub("O", y)
     y = re_unicode_uppercase_u.sub("U", y)
     y = re_unicode_uppercase_y.sub("Y", y)
     y = re_unicode_uppercase_c.sub("C", y)
     y = re_unicode_uppercase_n.sub("N", y)
     # return UTF-8 representation of the Unicode string:
     return y.encode("utf-8")
 
 def wash_index_term(term, max_char_length=50, lower_term=True):
     """
     Return washed form of the index term TERM that would be suitable
     for storing into idxWORD* tables.  I.e., lower the TERM if
     LOWER_TERM is True, and truncate it safely to MAX_CHAR_LENGTH
     UTF-8 characters (meaning, in principle, 4*MAX_CHAR_LENGTH bytes).
 
     The function works by an internal conversion of TERM, when needed,
     from its input Python UTF-8 binary string format into Python
     Unicode format, and then truncating it safely to the given number
     of UTF-8 characters, without possible mis-truncation in the middle
     of a multi-byte UTF-8 character that could otherwise happen if we
     would have been working with UTF-8 binary representation directly.
 
     Note that MAX_CHAR_LENGTH corresponds to the length of the term
     column in idxINDEX* tables.
     """
     if lower_term:
         washed_term = unicode(term, 'utf-8').lower()
     else:
         washed_term = unicode(term, 'utf-8')
     if len(washed_term) <= max_char_length:
         # no need to truncate the term, because it will fit
         # nicely even if it uses four-byte UTF-8 characters
         return washed_term.encode('utf-8')
     else:
         # truncate the term in a safe position:
         return washed_term[:max_char_length].encode('utf-8')
 
 def lower_index_term(term):
     """
     Return safely lowered index term TERM.  This is done by converting
     to UTF-8 first, because standard Python lower() function is not
     UTF-8 safe.  To be called by both the search engine and the
     indexer when appropriate (e.g. before stemming).
 
     In case of problems with UTF-8 compliance, this function raises
     UnicodeDecodeError, so the client code may want to catch it.
     """
     return unicode(term, 'utf-8').lower().encode('utf-8')
 
 def wash_output_format(format):
     """Wash output format FORMAT.  Currently only prevents input like
     'of=9' for backwards-compatible format that prints certain fields
     only.  (for this task, 'of=tm' is preferred)"""
     if str(format[0:3]).isdigit() and len(format) != 6:
         # asked to print MARC tags, but not enough digits,
         # so let's switch back to HTML brief default
         return 'hb'
     else:
         return format
 
 def wash_pattern(p):
     """Wash pattern passed by URL. Check for sanity of the wildcard by
     removing wildcards if they are appended to extremely short words
     (1-3 letters).  TODO: instead of this approximative treatment, it
     will be much better to introduce a temporal limit, e.g. to kill a
     query if it does not finish in 10 seconds."""
     # strip accents:
     # p = strip_accents(p) # FIXME: when available, strip accents all the time
     # add leading/trailing whitespace for the two following wildcard-sanity checking regexps:
     p = " " + p + " "
     # get rid of wildcards at the beginning of words:
     p = re_pattern_wildcards_at_beginning.sub("\\1", p)
     # replace spaces within quotes by __SPACE__ temporarily:
     p = re_pattern_single_quotes.sub(lambda x: "'"+string.replace(x.group(1), ' ', '__SPACE__')+"'", p)
     p = re_pattern_double_quotes.sub(lambda x: "\""+string.replace(x.group(1), ' ', '__SPACE__')+"\"", p)
     p = re_pattern_regexp_quotes.sub(lambda x: "/"+string.replace(x.group(1), ' ', '__SPACE__')+"/", p)
     # get rid of extremely short words (1-3 letters with wildcards):
     p = re_pattern_short_words.sub("\\1", p)
     # replace back __SPACE__ by spaces:
     p = re_pattern_space.sub(" ", p)
     # replace special terms:
     p = re_pattern_today.sub(time.strftime("%Y-%m-%d", time.localtime()), p)
     # remove unnecessary whitespace:
     p = string.strip(p)
     return p
 
 def wash_field(f):
     """Wash field passed by URL."""
     # get rid of unnecessary whitespace:
     f = string.strip(f)
     # wash old-style CDS Invenio/ALEPH 'f' field argument, e.g. replaces 'wau' and 'au' by 'author'
     if CFG_WEBSEARCH_FIELDS_CONVERT.has_key(string.lower(f)):
         f = CFG_WEBSEARCH_FIELDS_CONVERT[f]
     return f
 
 def wash_dates(d1="", d1y=0, d1m=0, d1d=0, d2="", d2y=0, d2m=0, d2d=0):
     """
     Take user-submitted date arguments D1 (full datetime string) or
     (D1Y, D1M, D1Y) year, month, day tuple and D2 or (D2Y, D2M, D2Y)
     and return (YYY1-M1-D2 H1:M1:S2, YYY2-M2-D2 H2:M2:S2) datetime
     strings in the YYYY-MM-DD HH:MM:SS format suitable for time
     restricted searching.
 
     Note that when both D1 and (D1Y, D1M, D1D) parameters are present,
     the precedence goes to D1.  Ditto for D2*.
 
     Note that when (D1Y, D1M, D1D) are taken into account, some values
     may be missing and are completed e.g. to 01 or 12 according to
     whether it is the starting or the ending date.
     """
     datetext1, datetext2 =  "", ""
     # sanity checking:
     if d1 == "" and d1y == 0 and d1m == 0 and d1d == 0 and d2 == "" and d2y == 0 and d2m == 0 and d2d == 0:
         return ("", "") # nothing selected, so return empty values
     # wash first (starting) date:
     if d1:
         # full datetime string takes precedence:
         datetext1 = d1
     else:
         # okay, first date passed as (year,month,day):
         if d1y:
             datetext1 += "%04d" % d1y
         else:
             datetext1 += "0000"
         if d1m:
             datetext1 += "-%02d" % d1m
         else:
             datetext1 += "-01"
         if d1d:
             datetext1 += "-%02d" % d1d
         else:
             datetext1 += "-01"
         datetext1 += " 00:00:00"
     # wash second (ending) date:
     if d2:
         # full datetime string takes precedence:
         datetext2 = d2
     else:
         # okay, second date passed as (year,month,day):
         if d2y:
             datetext2 += "%04d" % d2y
         else:
             datetext2 += "9999"
         if d2m:
             datetext2 += "-%02d" % d2m
         else:
             datetext2 += "-12"
         if d2d:
             datetext2 += "-%02d" % d2d
         else:
             datetext2 += "-31" # NOTE: perhaps we should add max(datenumber) in
                                # given month, but for our quering it's not
                                # needed, 31 will always do
         datetext2 += " 00:00:00"
     # okay, return constructed YYYY-MM-DD HH:MM:SS datetexts:
     return (datetext1, datetext2)
 
 def get_colID(c):
     "Return collection ID for collection name C.  Return None if no match found."
     colID = None
     res = run_sql("SELECT id FROM collection WHERE name=%s", (c,), 1)
     if res:
         colID = res[0][0]
     return colID
 
 def get_coll_i18nname(c, ln=CFG_SITE_LANG):
     """Return nicely formatted collection name (of name type 'ln',
     'long name') for collection C in language LN."""
     global collection_i18nname_cache
     global collection_i18nname_cache_timestamp
     # firstly, check whether the collectionname table was modified:
     if get_table_update_time('collectionname') > collection_i18nname_cache_timestamp:
         # yes it was, cache clear-up needed:
         collection_i18nname_cache = create_collection_i18nname_cache()
     # secondly, read i18n name from either the cache or return common name:
     out = c
     try:
         out = collection_i18nname_cache[c][ln]
     except KeyError:
         pass # translation in LN does not exist
     return out
 
 def get_field_i18nname(f, ln=CFG_SITE_LANG):
     """Return nicely formatted field name (of type 'ln', 'long name')
        for field F in language LN."""
     global field_i18nname_cache
     global field_i18nname_cache_timestamp
     # firstly, check whether the fieldname table was modified:
     if get_table_update_time('fieldname') > field_i18nname_cache_timestamp:
         # yes it was, cache clear-up needed:
         field_i18nname_cache = create_field_i18nname_cache()
     # secondly, read i18n name from either the cache or return common name:
     out = f
     try:
         out = field_i18nname_cache[f][ln]
     except KeyError:
         pass # translation in LN does not exist
     return out
 
 def get_coll_ancestors(coll):
     "Returns a list of ancestors for collection 'coll'."
     coll_ancestors = []
     coll_ancestor = coll
     while 1:
         res = run_sql("""SELECT c.name FROM collection AS c
                           LEFT JOIN collection_collection AS cc ON c.id=cc.id_dad
                           LEFT JOIN collection AS ccc ON ccc.id=cc.id_son
                           WHERE ccc.name=%s ORDER BY cc.id_dad ASC LIMIT 1""",
                       (coll_ancestor,))
         if res:
             coll_name = res[0][0]
             coll_ancestors.append(coll_name)
             coll_ancestor = coll_name
         else:
             break
     # ancestors found, return reversed list:
     coll_ancestors.reverse()
     return coll_ancestors
 
 def get_coll_sons(coll, type='r', public_only=1):
     """Return a list of sons (first-level descendants) of type 'type' for collection 'coll'.
        If public_only, then return only non-restricted son collections.
     """
     coll_sons = []
     query = "SELECT c.name FROM collection AS c "\
             "LEFT JOIN collection_collection AS cc ON c.id=cc.id_son "\
             "LEFT JOIN collection AS ccc ON ccc.id=cc.id_dad "\
             "WHERE cc.type=%s AND ccc.name=%s"
     query += " ORDER BY cc.score DESC"
     res = run_sql(query, (type, coll))
     for name in res:
         if not public_only or not collection_restricted_p(name[0]):
             coll_sons.append(name[0])
     return coll_sons
 
 def get_coll_real_descendants(coll):
     """Return a list of all descendants of collection 'coll' that are defined by a 'dbquery'.
        IOW, we need to decompose compound collections like "A & B" into "A" and "B" provided
        that "A & B" has no associated database query defined.
     """
     coll_sons = []
     res = run_sql("""SELECT c.name,c.dbquery FROM collection AS c
                      LEFT JOIN collection_collection AS cc ON c.id=cc.id_son
                      LEFT JOIN collection AS ccc ON ccc.id=cc.id_dad
                      WHERE ccc.name=%s ORDER BY cc.score DESC""",
                   (coll,))
     for name, dbquery in res:
         if dbquery: # this is 'real' collection, so return it:
             coll_sons.append(name)
         else: # this is 'composed' collection, so recurse:
             coll_sons.extend(get_coll_real_descendants(name))
     return coll_sons
 
 def get_collection_reclist(coll):
     """Return hitset of recIDs that belong to the collection 'coll'.
        But firstly check the last updated date of the collection table.
        If it's newer than the cache timestamp, then empty the cache,
        since new records could have been added."""
     global collection_reclist_cache
     global collection_reclist_cache_timestamp
     # firstly, check whether the collection table was modified:
     if get_table_update_time('collection') > collection_reclist_cache_timestamp:
         # yes it was, cache clear-up needed:
         collection_reclist_cache = create_collection_reclist_cache()
     # secondly, read reclist from either the cache or the database:
     try:
         if not collection_reclist_cache[coll]:
             # not yet it the cache, so calculate it and fill the cache:
             query = "SELECT nbrecs,reclist FROM collection WHERE name=%s"
             res = run_sql(query, (coll, ), 1)
             if res:
                 try:
                     set = HitSet(res[0][1])
                 except:
                     set = HitSet()
             collection_reclist_cache[coll] = set
         # finally, return reclist:
         return collection_reclist_cache[coll]
     except KeyError:
         return HitSet()
 
 def create_collection_reclist_cache():
     """Creates list of records belonging to collections.  Called on startup
     and used later for intersecting search results with collection universe."""
     global collection_reclist_cache_timestamp
     # populate collection reclist cache:
     collrecs = {}
     try:
         res = run_sql("SELECT name,reclist FROM collection")
     except Error:
         # database problems, set timestamp to zero and return empty cache
         collection_reclist_cache_timestamp = 0
         return collrecs
     for name, reclist in res:
         collrecs[name] = None # this will be filled later during runtime by calling get_collection_reclist(coll)
     # update timestamp:
     try:
         collection_reclist_cache_timestamp = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())
     except NameError:
         collection_reclist_cache_timestamp = 0
     return collrecs
 
 try:
     collection_reclist_cache.has_key(CFG_SITE_NAME)
 except:
     try:
         collection_reclist_cache = create_collection_reclist_cache()
     except:
         collection_reclist_cache = {}
 
 def create_collection_i18nname_cache():
     """Create cache of I18N collection names of type 'ln' (=long name).
     Called on startup and used later during the search time."""
     global collection_i18nname_cache_timestamp
     # populate collection I18N name cache:
     names = {}
     try:
         res = run_sql("SELECT c.name,cn.ln,cn.value FROM collectionname AS cn, collection AS c WHERE cn.id_collection=c.id AND cn.type='ln'") # ln=long name
     except Error:
         # database problems, set timestamp to zero and return empty cache
         collection_i18nname_cache_timestamp = 0
         return names
     for c, ln, i18nname in res:
         if i18nname:
             if not names.has_key(c):
                 names[c] = {}
             names[c][ln] = i18nname
     # update timestamp:
     try:
         collection_i18nname_cache_timestamp = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())
     except NameError:
         collection_i18nname_cache_timestamp = 0
     return names
 
 try:
     collection_i18nname_cache.has_key(CFG_SITE_NAME)
 except:
     try:
         collection_i18nname_cache = create_collection_i18nname_cache()
     except:
         collection_i18nname_cache = {}
 
 def create_field_i18nname_cache():
     """Create cache of I18N field names of type 'ln' (=long name).
     Called on startup and used later during the search time."""
     global field_i18nname_cache_timestamp
     # populate field I18 name cache:
     names = {}
     try:
         res = run_sql("SELECT f.name,fn.ln,fn.value FROM fieldname AS fn, field AS f WHERE fn.id_field=f.id AND fn.type='ln'") # ln=long name
     except Error:
         # database problems, set timestamp to zero and return empty cache
         field_i18nname_cache_timestamp = 0
         return names
     for f, ln, i18nname in res:
         if i18nname:
             if not names.has_key(f):
                 names[f] = {}
             names[f][ln] = i18nname
     # update timestamp:
     try:
         field_i18nname_cache_timestamp = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())
     except NameError:
         field_i18nname_cache_timestamp = 0
     return names
 
 try:
     field_i18nname_cache.has_key(CFG_SITE_NAME)
 except:
     try:
         field_i18nname_cache = create_field_i18nname_cache()
     except:
         field_i18nname_cache = {}
 
 def browse_pattern(req, colls, p, f, rg, ln=CFG_SITE_LANG):
     """Browse either biliographic phrases or words indexes, and display it."""
 
     # load the right message language
     _ = gettext_set_language(ln)
 
     ## do we search in words indexes?
     if not f:
         return browse_in_bibwords(req, p, f)
 
     ## is p enclosed in quotes? (coming from exact search)
     if p.startswith('"') and p.endswith('"'):
         p = p[1:-1]
 
     p_orig = p
     ## okay, "real browse" follows:
     ## FIXME: the maths in the get_nearest_terms_in_bibxxx is just a test
     browsed_phrases = get_nearest_terms_in_bibxxx(p, f, (rg+1)/2+1, (rg-1)/2+1)
     while not browsed_phrases:
         # try again and again with shorter and shorter pattern:
         try:
             p = p[:-1]
             browsed_phrases = get_nearest_terms_in_bibxxx(p, f, (rg+1)/2+1, (rg-1)/2+1)
         except:
             # probably there are no hits at all:
             req.write(_("No values found."))
             return
 
     ## try to check hits in these particular collection selection:
     browsed_phrases_in_colls = []
     if 0:
         for phrase in browsed_phrases:
             phrase_hitset = HitSet()
             phrase_hitsets = search_pattern("", phrase, f, 'e')
             for coll in colls:
                 phrase_hitset.union_update(phrase_hitsets[coll])
             if len(phrase_hitset) > 0:
                 # okay, this phrase has some hits in colls, so add it:
                 browsed_phrases_in_colls.append([phrase, len(phrase_hitset)])
 
     ## were there hits in collections?
     if browsed_phrases_in_colls == []:
         if browsed_phrases != []:
             #print_warning(req, """<p>No match close to <em>%s</em> found in given collections.
             #Please try different term.<p>Displaying matches in any collection...""" % p_orig)
             ## try to get nbhits for these phrases in any collection:
             for phrase in browsed_phrases:
                 browsed_phrases_in_colls.append([phrase, get_nbhits_in_bibxxx(phrase, f)])
 
     ## display results now:
     out = websearch_templates.tmpl_browse_pattern(
             f=f,
             fn=get_field_i18nname(get_field_name(f), ln),
             ln=ln,
             browsed_phrases_in_colls=browsed_phrases_in_colls,
             colls=colls,
             rg=rg,
           )
     req.write(out)
     return
 
 def browse_in_bibwords(req, p, f, ln=CFG_SITE_LANG):
     """Browse inside words indexes."""
     if not p:
         return
     _ = gettext_set_language(ln)
 
     urlargd = {}
     urlargd.update(req.argd)
     urlargd['action'] = 'search'
 
     nearest_box = create_nearest_terms_box(urlargd, p, f, 'w', ln=ln, intro_text_p=0)
 
     req.write(websearch_templates.tmpl_search_in_bibwords(
         p = p,
         f = f,
         ln = ln,
         nearest_box = nearest_box
     ))
     return
 
 def search_pattern(req=None, p=None, f=None, m=None, ap=0, of="id", verbose=0, ln=CFG_SITE_LANG):
     """Search for complex pattern 'p' within field 'f' according to
        matching type 'm'.  Return hitset of recIDs.
 
        The function uses multi-stage searching algorithm in case of no
        exact match found.  See the Search Internals document for
        detailed description.
 
        The 'ap' argument governs whether an alternative patterns are to
        be used in case there is no direct hit for (p,f,m).  For
        example, whether to replace non-alphanumeric characters by
        spaces if it would give some hits.  See the Search Internals
        document for detailed description.  (ap=0 forbits the
        alternative pattern usage, ap=1 permits it.)
 
        The 'of' argument governs whether to print or not some
        information to the user in case of no match found.  (Usually it
        prints the information in case of HTML formats, otherwise it's
        silent).
 
        The 'verbose' argument controls the level of debugging information
        to be printed (0=least, 9=most).
 
        All the parameters are assumed to have been previously washed.
 
        This function is suitable as a mid-level API.
     """
 
     _ = gettext_set_language(ln)
 
     hitset_empty = HitSet()
     # sanity check:
     if not p:
         hitset_full = HitSet(trailing_bits=1)
         hitset_full.discard(0)
         # no pattern, so return all universe
         return hitset_full
     # search stage 1: break up arguments into basic search units:
     if verbose and of.startswith("h"):
         t1 = os.times()[4]
     basic_search_units = create_basic_search_units(req, p, f, m, of)
     if verbose and of.startswith("h"):
         t2 = os.times()[4]
         print_warning(req, "Search stage 1: basic search units are: %s" % cgi.escape(repr(basic_search_units)))
         print_warning(req, "Search stage 1: execution took %.2f seconds." % (t2 - t1))
     # search stage 2: do search for each search unit and verify hit presence:
     if verbose and of.startswith("h"):
         t1 = os.times()[4]
     basic_search_units_hitsets = []
     for idx_unit in range(0, len(basic_search_units)):
         bsu_o, bsu_p, bsu_f, bsu_m = basic_search_units[idx_unit]
         basic_search_unit_hitset = search_unit(bsu_p, bsu_f, bsu_m)
         if verbose >= 9 and of.startswith("h"):
             print_warning(req, "Search stage 1: pattern %s gave hitlist %s" % (cgi.escape(bsu_p), basic_search_unit_hitset))
         if len(basic_search_unit_hitset) > 0 or \
            ap==0 or \
            bsu_o=="|" or \
            ((idx_unit+1)<len(basic_search_units) and basic_search_units[idx_unit+1][0]=="|"):
             # stage 2-1: this basic search unit is retained, since
             # either the hitset is non-empty, or the approximate
             # pattern treatment is switched off, or the search unit
             # was joined by an OR operator to preceding/following
             # units so we do not require that it exists
             basic_search_units_hitsets.append(basic_search_unit_hitset)
         else:
             # stage 2-2: no hits found for this search unit, try to replace non-alphanumeric chars inside pattern:
             if re.search(r'[^a-zA-Z0-9\s\:]', bsu_p):
                 if bsu_p.startswith('"') and bsu_p.endswith('"'): # is it ACC query?
                     bsu_pn = re.sub(r'[^a-zA-Z0-9\s\:]+', "*", bsu_p)
                 else: # it is WRD query
                     bsu_pn = re.sub(r'[^a-zA-Z0-9\s\:]+', " ", bsu_p)
                 if verbose and of.startswith('h') and req:
                     print_warning(req, "trying (%s,%s,%s)" % (cgi.escape(bsu_pn), cgi.escape(bsu_f), cgi.escape(bsu_m)))
                 basic_search_unit_hitset = search_pattern(req=None, p=bsu_pn, f=bsu_f, m=bsu_m, of="id", ln=ln)
                 if len(basic_search_unit_hitset) > 0:
                     # we retain the new unit instead
                     if of.startswith('h'):
                         print_warning(req, _("No exact match found for %(x_query1)s, using %(x_query2)s instead...") % \
                                       {'x_query1': "<em>" + cgi.escape(bsu_p) + "</em>",
                                        'x_query2': "<em>" + cgi.escape(bsu_pn) + "</em>"})
                     basic_search_units[idx_unit][1] = bsu_pn
                     basic_search_units_hitsets.append(basic_search_unit_hitset)
                 else:
                     # stage 2-3: no hits found either, propose nearest indexed terms:
                     if of.startswith('h'):
                         if req:
                             if bsu_f == "recid":
                                 print_warning(req, "Requested record does not seem to exist.")
                             else:
                                 print_warning(req, create_nearest_terms_box(req.argd, bsu_p, bsu_f, bsu_m, ln=ln))
                     return hitset_empty
             else:
                 # stage 2-3: no hits found either, propose nearest indexed terms:
                 if of.startswith('h'):
                     if req:
                         if bsu_f == "recid":
                             print_warning(req, "Requested record does not seem to exist.")
                         else:
                             print_warning(req, create_nearest_terms_box(req.argd, bsu_p, bsu_f, bsu_m, ln=ln))
                 return hitset_empty
     if verbose and of.startswith("h"):
         t2 = os.times()[4]
         for idx_unit in range(0, len(basic_search_units)):
             print_warning(req, "Search stage 2: basic search unit %s gave %d hits." %
                           (basic_search_units[idx_unit][1:], len(basic_search_units_hitsets[idx_unit])))
         print_warning(req, "Search stage 2: execution took %.2f seconds." % (t2 - t1))
     # search stage 3: apply boolean query for each search unit:
     if verbose and of.startswith("h"):
         t1 = os.times()[4]
     # let the initial set be the complete universe:
     hitset_in_any_collection = HitSet(trailing_bits=1)
     hitset_in_any_collection.discard(0)
     for idx_unit in range(0, len(basic_search_units)):
         this_unit_operation = basic_search_units[idx_unit][0]
         this_unit_hitset = basic_search_units_hitsets[idx_unit]
         if this_unit_operation == '+':
             hitset_in_any_collection.intersection_update(this_unit_hitset)
         elif this_unit_operation == '-':
             hitset_in_any_collection.difference_update(this_unit_hitset)
         elif this_unit_operation == '|':
             hitset_in_any_collection.union_update(this_unit_hitset)
         else:
             if of.startswith("h"):
                 print_warning(req, "Invalid set operation %s." % cgi.escape(this_unit_operation), "Error")
     if len(hitset_in_any_collection) == 0:
         # no hits found, propose alternative boolean query:
         if of.startswith('h'):
             nearestterms = []
             for idx_unit in range(0, len(basic_search_units)):
                 bsu_o, bsu_p, bsu_f, bsu_m = basic_search_units[idx_unit]
                 if bsu_p.startswith("%") and bsu_p.endswith("%"):
                     bsu_p = "'" + bsu_p[1:-1] + "'"
                 bsu_nbhits = len(basic_search_units_hitsets[idx_unit])
 
                 # create a similar query, but with the basic search unit only
                 argd = {}
                 argd.update(req.argd)
 
                 argd['p'] = bsu_p
                 argd['f'] = bsu_f
 
                 nearestterms.append((bsu_p, bsu_nbhits, argd))
 
             text = websearch_templates.tmpl_search_no_boolean_hits(
                      ln=ln,  nearestterms=nearestterms)
             print_warning(req, text)
     if verbose and of.startswith("h"):
         t2 = os.times()[4]
         print_warning(req, "Search stage 3: boolean query gave %d hits." % len(hitset_in_any_collection))
         print_warning(req, "Search stage 3: execution took %.2f seconds." % (t2 - t1))
     return hitset_in_any_collection
 
 def search_pattern_parenthesised(req=None, p=None, f=None, m=None, ap=0, of="id", verbose=0, ln=CFG_SITE_LANG):
     """Search for complex pattern 'p' containing parenthesis within field 'f' according to
        matching type 'm'.  Return hitset of recIDs.
 
        For more details on the parameters see 'search_pattern'
     """
     _ = gettext_set_language(ln)
 
     # if the pattern uses SPIRES search syntax, convert it to Invenio syntax
     spires_syntax_converter = SpiresToInvenioSyntaxConverter()
     p = spires_syntax_converter.convert_query(p)
 
     # sanity check: do not call parenthesised parser for search terms
     # like U(1):
     if not re_pattern_parens.search(p):
         return search_pattern(req, p, f, m, ap, of, verbose, ln)
 
     # Try searching with parentheses
     try:
         parser = SearchQueryParenthesisedParser()
 
         # get a hitset with all recids
         result_hitset = HitSet(trailing_bits=1)
 
         # parse the query. The result is list of [op1, expr1, op2, expr2, ..., opN, exprN]
         parsing_result = parser.parse_query(p)
         if verbose  and of.startswith("h"):
             print_warning(req, "Search stage 1: search_pattern_parenthesised() returned %s." % repr(parsing_result))
 
         # go through every pattern
         # calculate hitset for it
         # combine pattern's hitset with the result using the corresponding operator
         for index in xrange(0, len(parsing_result)-1, 2 ):
             current_operator = parsing_result[index]
             current_pattern = parsing_result[index+1]
 
             # obtain a hitset for the current patter
             current_hitset = search_pattern(req, current_pattern, f, m, ap, of, verbose, ln)
 
             # combine the current hitset with resulting hitset using the current operator
             if current_operator == '+':
                 result_hitset = result_hitset & current_hitset
             elif current_operator == '-':
                 result_hitset = result_hitset - current_hitset
             elif current_operator == '|':
                 result_hitset = result_hitset | current_hitset
             else:
                 assert False, "Unknown operator in search_pattern_parenthesised()"
 
         return result_hitset
 
     # If searching with parenteses fails, perform search ignoring parentheses
     except InvenioWebSearchQueryParserException:
 
         print_warning(req, _("Nested or mismatched parentheses detected. Ignoring all parentheses in the query..."))
 
         # remove the parentheses in the query. Current implementation removes all the parentheses,
         # but it could be improved to romove only these that are not insede quotes
         p = p.replace('(', ' ')
         p = p.replace(')', ' ')
 
         return search_pattern(req, p, f, m, ap, of, verbose, ln)
 
 def search_unit(p, f=None, m=None):
     """Search for basic search unit defined by pattern 'p' and field
        'f' and matching type 'm'.  Return hitset of recIDs.
 
        All the parameters are assumed to have been previously washed.
        'p' is assumed to be already a ``basic search unit'' so that it
        is searched as such and is not broken up in any way.  Only
        wildcard and span queries are being detected inside 'p'.
 
        This function is suitable as a low-level API.
     """
 
     ## create empty output results set:
     set = HitSet()
     if not p: # sanity checking
         return set
     if m == 'a' or m == 'r':
         # we are doing either direct bibxxx search or phrase search or regexp search
         set = search_unit_in_bibxxx(p, f, m)
     elif p.startswith("cited:"):
         # we are doing search by the citation count
         set = search_unit_by_times_cited(p[6:])
     else:
         # we are doing bibwords search by default
         set = search_unit_in_bibwords(p, f)
     return set
 
 def search_unit_in_bibwords(word, f, decompress=zlib.decompress):
     """Searches for 'word' inside bibwordsX table for field 'f' and returns hitset of recIDs."""
     set = HitSet() # will hold output result set
     set_used = 0 # not-yet-used flag, to be able to circumvent set operations
     # deduce into which bibwordsX table we will search:
     stemming_language = get_index_stemming_language(get_index_id_from_field("anyfield"))
     bibwordsX = "idxWORD%02dF" % get_index_id_from_field("anyfield")
     if f:
         index_id = get_index_id_from_field(f)
         if index_id:
             bibwordsX = "idxWORD%02dF" % index_id
             stemming_language = get_index_stemming_language(index_id)
         else:
             return HitSet() # word index f does not exist
 
     # wash 'word' argument and run query:
     word = string.replace(word, '*', '%') # we now use '*' as the truncation character
     words = string.split(word, "->", 1) # check for span query
     if len(words) == 2:
         word0 = re_word.sub('', words[0])
         word1 = re_word.sub('', words[1])
         if stemming_language:
             word0 = lower_index_term(word0)
             word1 = lower_index_term(word1)
             word0 = stem(word0, stemming_language)
             word1 = stem(word1, stemming_language)
         res = run_sql("SELECT term,hitlist FROM %s WHERE term BETWEEN %%s AND %%s" % bibwordsX,
                       (wash_index_term(word0), wash_index_term(word1)))
     else:
         if f == 'journal':
             pass # FIXME: quick hack for the journal index
         else:
             word = re_word.sub('', word)
         if stemming_language:
             word = lower_index_term(word)
             word = stem(word, stemming_language)
         if string.find(word, '%') >= 0: # do we have wildcard in the word?
             if f == 'journal':
                 # FIXME: quick hack for the journal index
                 # FIXME: we can run a sanity check here for all indexes
                 res = ()
             else:
                 res = run_sql("SELECT term,hitlist FROM %s WHERE term LIKE %%s" % bibwordsX,
                               (wash_index_term(word),))
         else:
             res = run_sql("SELECT term,hitlist FROM %s WHERE term=%%s" % bibwordsX,
                           (wash_index_term(word),))
     # fill the result set:
     for word, hitlist in res:
         hitset_bibwrd = HitSet(hitlist)
         # add the results:
         if set_used:
             set.union_update(hitset_bibwrd)
         else:
             set = hitset_bibwrd
             set_used = 1
     # okay, return result set:
     return set
 
 def search_unit_in_bibxxx(p, f, type):
     """Searches for pattern 'p' inside bibxxx tables for field 'f' and returns hitset of recIDs found.
     The search type is defined by 'type' (e.g. equals to 'r' for a regexp search)."""
 
     # FIXME: quick hack for the journal index
     if f == 'journal':
         return search_unit_in_bibwords(p, f)
 
     p_orig = p # saving for eventual future 'no match' reporting
     query_addons = "" # will hold additional SQL code for the query
     query_params = () # will hold parameters for the query (their number may vary depending on TYPE argument)
     # wash arguments:
     f = string.replace(f, '*', '%') # replace truncation char '*' in field definition
     if type == 'r':
         query_addons = "REGEXP %s"
         query_params = (p,)
     else:
         p = string.replace(p, '*', '%') # we now use '*' as the truncation character
         ps = string.split(p, "->", 1) # check for span query:
         if len(ps) == 2:
             query_addons = "BETWEEN %s AND %s"
             query_params = (ps[0], ps[1])
         else:
             if string.find(p, '%') > -1:
                 query_addons = "LIKE %s"
                 query_params = (ps[0],)
             else:
                 query_addons = "= %s"
                 query_params = (ps[0],)
     # construct 'tl' which defines the tag list (MARC tags) to search in:
     tl = []
     if str(f[0]).isdigit() and str(f[1]).isdigit():
         tl.append(f) # 'f' seems to be okay as it starts by two digits
     else:
         # convert old ALEPH tag names, if appropriate: (TODO: get rid of this before entering this function)
         if CFG_WEBSEARCH_FIELDS_CONVERT.has_key(string.lower(f)):
             f = CFG_WEBSEARCH_FIELDS_CONVERT[string.lower(f)]
         # deduce desired MARC tags on the basis of chosen 'f'
         tl = get_field_tags(f)
         if not tl:
             # f index does not exist, nevermind
             pass
     # okay, start search:
     l = [] # will hold list of recID that matched
     for t in tl:
         # deduce into which bibxxx table we will search:
         digit1, digit2 = int(t[0]), int(t[1])
         bx = "bib%d%dx" % (digit1, digit2)
         bibx = "bibrec_bib%d%dx" % (digit1, digit2)
         # construct and run query:
         if t == "001":
             res = run_sql("SELECT id FROM bibrec WHERE id %s" % query_addons,
                           query_params)
         else:
             query = "SELECT bibx.id_bibrec FROM %s AS bx LEFT JOIN %s AS bibx ON bx.id=bibx.id_bibxxx WHERE bx.value %s" % \
                     (bx, bibx, query_addons)
             if len(t) != 6 or t[-1:]=='%':
                 # wildcard query, or only the beginning of field 't'
                 # is defined, so add wildcard character:
                 query += " AND bx.tag LIKE %s"
                 res = run_sql(query, query_params + (t + '%',))
             else:
                 # exact query for 't':
                 query += " AND bx.tag=%s"
                 res = run_sql(query, query_params + (t,))
         # fill the result set:
         for id_bibrec in res:
             if id_bibrec[0]:
                 l.append(id_bibrec[0])
     # check no of hits found:
     nb_hits = len(l)
     # okay, return result set:
     set = HitSet(l)
     return set
 
 def search_unit_in_bibrec(datetext1, datetext2, type='c'):
     """
     Return hitset of recIDs found that were either created or modified
     (according to 'type' arg being 'c' or 'm') from datetext1 until datetext2, inclusive.
     Does not pay attention to pattern, collection, anything.  Useful
     to intersect later on with the 'real' query.
     """
     set = HitSet()
     if type.startswith("m"):
         type = "modification_date"
     else:
         type = "creation_date" # by default we are searching for creation dates
     res = run_sql("SELECT id FROM bibrec WHERE %s>=%%s AND %s<=%%s" % (type, type),
                   (datetext1, datetext2))
     for row in res:
         set += row[0]
     return set
 
 def search_unit_by_times_cited(p):
     """
     Return histset of recIDs found that are cited P times.
     Usually P looks like '10->23'.
     """
     numstr = '"'+p+'"'
     #this is sort of stupid but since we may need to
     #get the records that do _not_ have cites, we have to
     #know the ids of all records, too
     #but this is needed only if bsu_p is 0 or 0 or 0->0
     allrecs = []
     if p == 0 or p == "0" or \
        p.startswith("0->") or p.endswith("->0"):
         allrecs = HitSet(run_sql_cached("SELECT id FROM bibrec", affected_tables=['bibrec']))
     return get_records_with_num_cites(numstr, allrecs)
 
 def intersect_results_with_collrecs(req, hitset_in_any_collection, colls, ap=0, of="hb", verbose=0, ln=CFG_SITE_LANG):
     """Return dict of hitsets given by intersection of hitset with the collection universes."""
     _ = gettext_set_language(ln)
 
     # search stage 4: intersect with the collection universe:
     if verbose and of.startswith("h"):
         t1 = os.times()[4]
     results = {}
     results_nbhits = 0
     for coll in colls:
         results[coll] = hitset_in_any_collection & get_collection_reclist(coll)
         results_nbhits += len(results[coll])
     if results_nbhits == 0:
         # no hits found, try to search in Home:
         results_in_Home = hitset_in_any_collection & get_collection_reclist(CFG_SITE_NAME)
         if len(results_in_Home) > 0:
             # some hits found in Home, so propose this search:
             if of.startswith("h"):
                 url = websearch_templates.build_search_url(req.argd, cc=CFG_SITE_NAME, c=[])
                 print_warning(req, _("No match found in collection %(x_collection)s. Other public collections gave %(x_url_open)s%(x_nb_hits)d hits%(x_url_close)s.") %\
                               {'x_collection': '<em>' + string.join([get_coll_i18nname(coll, ln) for coll in colls], ', ') + '</em>',
                                'x_url_open': '<a class="nearestterms" href="%s">' % (url),
                                'x_nb_hits': len(results_in_Home),
                                'x_url_close': '</a>'})
             results = {}
         else:
             # no hits found in Home, recommend different search terms:
             if of.startswith("h"):
                 print_warning(req, _("No public collection matched your query. "
                                      "If you were looking for a non-public document, please choose "
                                      "the desired restricted collection first."))
             results = {}
     if verbose and of.startswith("h"):
         t2 = os.times()[4]
         print_warning(req, "Search stage 4: intersecting with collection universe gave %d hits." % results_nbhits)
         print_warning(req, "Search stage 4: execution took %.2f seconds." % (t2 - t1))
     return results
 
 def intersect_results_with_hitset(req, results, hitset, ap=0, aptext="", of="hb"):
     """Return intersection of search 'results' (a dict of hitsets
        with collection as key) with the 'hitset', i.e. apply
        'hitset' intersection to each collection within search
        'results'.
 
        If the final 'results' set is to be empty, and 'ap'
        (approximate pattern) is true, and then print the `warningtext'
        and return the original 'results' set unchanged.  If 'ap' is
        false, then return empty results set.
     """
     if ap:
         results_ap = copy.deepcopy(results)
     else:
         results_ap = {} # will return empty dict in case of no hits found
     nb_total = 0
     for coll in results.keys():
         results[coll].intersection_update(hitset)
         nb_total += len(results[coll])
     if nb_total == 0:
         if of.startswith("h"):
             print_warning(req, aptext)
         results = results_ap
     return results
 
 def create_similarly_named_authors_link_box(author_name, ln=CFG_SITE_LANG):
     """Return a box similar to ``Not satisfied...'' one by proposing
        author searches for similar names.  Namely, take AUTHOR_NAME
        and the first initial of the firstame (after comma) and look
        into author index whether authors with e.g. middle names exist.
        Useful mainly for CERN Library that sometimes contains name
        forms like Ellis-N, Ellis-Nick, Ellis-Nicolas all denoting the
        same person.  The box isn't proposed if no similarly named
        authors are found to exist.
     """
     # return nothing if not configured:
     if CFG_WEBSEARCH_CREATE_SIMILARLY_NAMED_AUTHORS_LINK_BOX == 0:
         return ""
     # return empty box if there is no initial:
     if re.match(r'[^ ,]+, [^ ]', author_name) is None:
         return ""
     # firstly find name comma initial:
     author_name_to_search = re.sub(r'^([^ ,]+, +[^ ,]).*$', '\\1', author_name)
 
     # secondly search for similar name forms:
     similar_author_names = {}
     for name in author_name_to_search, strip_accents(author_name_to_search):
         for tag in get_field_tags("author"):
             # deduce into which bibxxx table we will search:
             digit1, digit2 = int(tag[0]), int(tag[1])
             bx = "bib%d%dx" % (digit1, digit2)
             bibx = "bibrec_bib%d%dx" % (digit1, digit2)
             if len(tag) != 6 or tag[-1:]=='%':
                 # only the beginning of field 't' is defined, so add wildcard character:
                 res = run_sql("""SELECT bx.value FROM %s AS bx
                                   WHERE bx.value LIKE %%s AND bx.tag LIKE %%s""" % bx,
                               (name + "%", tag + "%"))
             else:
                 res = run_sql("""SELECT bx.value FROM %s AS bx
                                   WHERE bx.value LIKE %%s AND bx.tag=%%s""" % bx,
                               (name + "%", tag))
             for row in res:
                 similar_author_names[row[0]] = 1
     # remove the original name and sort the list:
     try:
         del similar_author_names[author_name]
     except KeyError:
         pass
     # thirdly print the box:
     out = ""
     if similar_author_names:
         out_authors = similar_author_names.keys()
         out_authors.sort()
 
         tmp_authors = []
         for out_author in out_authors:
             nbhits = get_nbhits_in_bibxxx(out_author, "author")
             if nbhits:
                 tmp_authors.append((out_author, nbhits))
         out += websearch_templates.tmpl_similar_author_names(
                  authors=tmp_authors, ln=ln)
 
     return out
 
 def create_nearest_terms_box(urlargd, p, f, t='w', n=5, ln=CFG_SITE_LANG, intro_text_p=True):
     """Return text box containing list of 'n' nearest terms above/below 'p'
        for the field 'f' for matching type 't' (words/phrases) in
        language 'ln'.
        Propose new searches according to `urlargs' with the new words.
        If `intro_text_p' is true, then display the introductory message,
        otherwise print only the nearest terms in the box content.
     """
     # load the right message language
     _ = gettext_set_language(ln)
 
     out = ""
     nearest_terms = []
     if not p: # sanity check
         p = "."
     # look for nearest terms:
     if t == 'w':
         nearest_terms = get_nearest_terms_in_bibwords(p, f, n, n)
         if not nearest_terms:
             return "%s %s." % (_("No words index available for"), cgi.escape(get_field_i18nname(get_field_name(f), ln)))
     else:
         nearest_terms = get_nearest_terms_in_bibxxx(p, f, n, n)
         if not nearest_terms:
             return "%s %s." % (_("No phrase index available for"), cgi.escape(get_field_i18nname(get_field_name(f), ln)))
 
     terminfo = []
     for term in nearest_terms:
         if t == 'w':
             hits = get_nbhits_in_bibwords(term, f)
         else:
             hits = get_nbhits_in_bibxxx(term, f)
 
         argd = {}
         argd.update(urlargd)
 
         # check which fields contained the requested parameter, and replace it.
         for (px, fx) in ('p', 'f'), ('p1', 'f1'), ('p2', 'f2'), ('p3', 'f3'):
             if px in argd:
                 if f == argd[fx] or f == "anyfield" or f == "":
                     if string.find(argd[px], p) > -1:
                         argd[px] = string.replace(argd[px], p, term)
                         break
                 else:
                     if string.find(argd[px], f+':'+p) > -1:
                         argd[px] = string.replace(argd[px], f+':'+p, f+':'+term)
                         break
                     elif string.find(argd[px], f+':"'+p+'"') > -1:
                         argd[px] = string.replace(argd[px], f+':"'+p+'"', f+':"'+term+'"')
                         break
 
         terminfo.append((term, hits, argd))
 
     intro = ""
     if intro_text_p: # add full leading introductory text
         if f:
             intro = _("Search term %(x_term)s inside index %(x_index)s did not match any record. Nearest terms in any collection are:") % \
                      {'x_term': "<em>" + cgi.escape(p.startswith("%") and p.endswith("%") and p[1:-1] or p) + "</em>",
                       'x_index': "<em>" + cgi.escape(get_field_i18nname(get_field_name(f), ln)) + "</em>"}
         else:
             intro = _("Search term %s did not match any record. Nearest terms in any collection are:") % \
                      ("<em>" + cgi.escape(p.startswith("%") and p.endswith("%") and p[1:-1] or p) + "</em>")
 
     return websearch_templates.tmpl_nearest_term_box(p=p, ln=ln, f=f, terminfo=terminfo,
                                                      intro=intro)
 
 def get_nearest_terms_in_bibwords(p, f, n_below, n_above):
     """Return list of +n -n nearest terms to word `p' in index for field `f'."""
     nearest_words = [] # will hold the (sorted) list of nearest words to return
     # deduce into which bibwordsX table we will search:
     bibwordsX = "idxWORD%02dF" % get_index_id_from_field("anyfield")
     if f:
         index_id = get_index_id_from_field(f)
         if index_id:
             bibwordsX = "idxWORD%02dF" % index_id
         else:
             return nearest_words
     # firstly try to get `n' closest words above `p':
     res = run_sql("SELECT term FROM %s WHERE term<%%s ORDER BY term DESC LIMIT %%s" % bibwordsX,
                   (p, n_above))
     for row in res:
         nearest_words.append(row[0])
     nearest_words.reverse()
     # secondly insert given word `p':
     nearest_words.append(p)
     # finally try to get `n' closest words below `p':
     res = run_sql("SELECT term FROM %s WHERE term>%%s ORDER BY term ASC LIMIT %%s" % bibwordsX,
                   (p, n_below))
     for row in res:
         nearest_words.append(row[0])
     return nearest_words
 
 def get_nearest_terms_in_bibxxx(p, f, n_below, n_above):
     """Browse (-n_above, +n_below) closest bibliographic phrases
        for the given pattern p in the given field f, regardless
        of collection.
        Return list of [phrase1, phrase2, ... , phrase_n]."""
     ## determine browse field:
     if not f and string.find(p, ":") > 0: # does 'p' contain ':'?
         f, p = string.split(p, ":", 1)
 
     # FIXME: quick hack for the journal index
     if f == 'journal':
         return get_nearest_terms_in_bibwords(p, f, n_below, n_above)
 
     ## We are going to take max(n_below, n_above) as the number of
     ## values to ferch from bibXXx.  This is needed to work around
     ## MySQL UTF-8 sorting troubles in 4.0.x.  Proper solution is to
     ## use MySQL 4.1.x or our own idxPHRASE in the future.
     n_fetch = 2*max(n_below, n_above)
     ## construct 'tl' which defines the tag list (MARC tags) to search in:
     tl = []
     if str(f[0]).isdigit() and str(f[1]).isdigit():
         tl.append(f) # 'f' seems to be okay as it starts by two digits
     else:
         # deduce desired MARC tags on the basis of chosen 'f'
         tl = get_field_tags(f)
     ## start browsing to fetch list of hits:
     browsed_phrases = {} # will hold {phrase1: 1, phrase2: 1, ..., phraseN: 1} dict of browsed phrases (to make them unique)
     # always add self to the results set:
     browsed_phrases[p.startswith("%") and p.endswith("%") and p[1:-1] or p] = 1
     for t in tl:
         # deduce into which bibxxx table we will search:
         digit1, digit2 = int(t[0]), int(t[1])
         bx = "bib%d%dx" % (digit1, digit2)
         bibx = "bibrec_bib%d%dx" % (digit1, digit2)
         # firstly try to get `n' closest phrases above `p':
         if len(t) != 6 or t[-1:]=='%': # only the beginning of field 't' is defined, so add wildcard character:
             res = run_sql("""SELECT bx.value FROM %s AS bx
                               WHERE bx.value<%%s AND bx.tag LIKE %%s
                               ORDER BY bx.value DESC LIMIT %%s""" % bx,
                           (p, t + "%", n_fetch))
         else:
             res = run_sql("""SELECT bx.value FROM %s AS bx
                               WHERE bx.value<%%s AND bx.tag=%%s
                               ORDER BY bx.value DESC LIMIT %%s""" % bx,
                           (p, t, n_fetch))
         for row in res:
             browsed_phrases[row[0]] = 1
         # secondly try to get `n' closest phrases equal to or below `p':
         if len(t) != 6 or t[-1:]=='%': # only the beginning of field 't' is defined, so add wildcard character:
             res = run_sql("""SELECT bx.value FROM %s AS bx
                               WHERE bx.value>=%%s AND bx.tag LIKE %%s
                               ORDER BY bx.value ASC LIMIT %%s""" % bx,
                           (p, t + "%", n_fetch))
         else:
             res = run_sql("""SELECT bx.value FROM %s AS bx
                               WHERE bx.value>=%%s AND bx.tag=%%s
                               ORDER BY bx.value ASC LIMIT %%s""" % bx,
                           (p, t, n_fetch))
         for row in res:
             browsed_phrases[row[0]] = 1
     # select first n words only: (this is needed as we were searching
     # in many different tables and so aren't sure we have more than n
     # words right; this of course won't be needed when we shall have
     # one ACC table only for given field):
     phrases_out = browsed_phrases.keys()
     phrases_out.sort(lambda x, y: cmp(string.lower(strip_accents(x)),
                                       string.lower(strip_accents(y))))
     # find position of self:
     try:
         idx_p = phrases_out.index(p)
     except:
         idx_p = len(phrases_out)/2
     # return n_above and n_below:
     return phrases_out[max(0, idx_p-n_above):idx_p+n_below]
 
 def get_nbhits_in_bibwords(word, f):
     """Return number of hits for word 'word' inside words index for field 'f'."""
     out = 0
     # deduce into which bibwordsX table we will search:
     bibwordsX = "idxWORD%02dF" % get_index_id_from_field("anyfield")
     if f:
         index_id = get_index_id_from_field(f)
         if index_id:
             bibwordsX = "idxWORD%02dF" % index_id
         else:
             return 0
     if word:
         res = run_sql("SELECT hitlist FROM %s WHERE term=%%s" % bibwordsX,
                       (word,))
         for hitlist in res:
             out += len(HitSet(hitlist[0]))
     return out
 
 def get_nbhits_in_bibxxx(p, f):
     """Return number of hits for word 'word' inside words index for field 'f'."""
     ## determine browse field:
     if not f and string.find(p, ":") > 0: # does 'p' contain ':'?
         f, p = string.split(p, ":", 1)
 
     # FIXME: quick hack for the journal index
     if f == 'journal':
         return get_nbhits_in_bibwords(p, f)
 
     ## construct 'tl' which defines the tag list (MARC tags) to search in:
     tl = []
     if str(f[0]).isdigit() and str(f[1]).isdigit():
         tl.append(f) # 'f' seems to be okay as it starts by two digits
     else:
         # deduce desired MARC tags on the basis of chosen 'f'
         tl = get_field_tags(f)
     # start searching:
     recIDs = {} # will hold dict of {recID1: 1, recID2: 1, ..., }  (unique recIDs, therefore)
     for t in tl:
         # deduce into which bibxxx table we will search:
         digit1, digit2 = int(t[0]), int(t[1])
         bx = "bib%d%dx" % (digit1, digit2)
         bibx = "bibrec_bib%d%dx" % (digit1, digit2)
         if len(t) != 6 or t[-1:]=='%': # only the beginning of field 't' is defined, so add wildcard character:
             res = run_sql("""SELECT bibx.id_bibrec FROM %s AS bibx, %s AS bx
                               WHERE bx.value=%%s AND bx.tag LIKE %%s
                                 AND bibx.id_bibxxx=bx.id""" % (bibx, bx),
                           (p, t + "%"))
         else:
             res = run_sql("""SELECT bibx.id_bibrec FROM %s AS bibx, %s AS bx
                               WHERE bx.value=%%s AND bx.tag=%%s
                                 AND bibx.id_bibxxx=bx.id""" % (bibx, bx),
                           (p, t))
         for row in res:
             recIDs[row[0]] = 1
     return len(recIDs)
 
 def get_mysql_recid_from_aleph_sysno(sysno):
     """Returns DB's recID for ALEPH sysno passed in the argument (e.g. "002379334CER").
        Returns None in case of failure."""
     out = None
     res = run_sql("""SELECT bb.id_bibrec FROM bibrec_bib97x AS bb, bib97x AS b
                       WHERE b.value=%s AND b.tag='970__a' AND bb.id_bibxxx=b.id""",
                   (sysno,))
     if res:
         out = res[0][0]
     return out
 
 def guess_primary_collection_of_a_record(recID):
     """Return primary collection name a record recid belongs to, by
        testing 980 identifier.
        May lead to bad guesses when a collection is defined dynamically
        via dbquery.
        In that case, return 'CFG_SITE_NAME'."""
     out = CFG_SITE_NAME
     dbcollids = get_fieldvalues(recID, "980__a")
     if dbcollids:
         dbquery = "collection:" + dbcollids[0]
         res = run_sql("SELECT name FROM collection WHERE dbquery=%s", (dbquery,))
         if res:
             out = res[0][0]
     return out
 
 _re_collection_url = re.compile('/collection/(.+)')
 def guess_collection_of_a_record(recID, referer=None):
     """Return collection name a record recid belongs to, by first testing
        the referer URL if provided and otherwise returning the
        primary collection."""
     if referer:
         dummy, hostname, path, dummy, query, dummy = urlparse.urlparse(referer)
         g = _re_collection_url.match(path)
         if g:
             name = urllib.unquote_plus(g.group(1))
             if recID in get_collection_reclist(name):
                 return name
         elif path.startswith('/search'):
             query = cgi.parse_qs(query)
             for name in query.get('cc', []) + query.get('c', []):
                 if recID in get_collection_reclist(name):
                     return name
     return guess_primary_collection_of_a_record(recID)
 
 def get_all_collections_of_a_record(recID):
     """Return all the collection names a record belongs to.
     Note this function is O(n_collections)."""
     ret = []
     for name in collection_reclist_cache.keys():
         if recID in get_collection_reclist(name):
             ret.append(name)
     return ret
 
 def get_tag_name(tag_value, prolog="", epilog=""):
     """Return tag name from the known tag value, by looking up the 'tag' table.
        Return empty string in case of failure.
        Example: input='100__%', output=first author'."""
     out = ""
     res = run_sql("SELECT name FROM tag WHERE value=%s", (tag_value,))
     if res:
         out = prolog + res[0][0] + epilog
     return out
 
 def get_fieldcodes():
     """Returns a list of field codes that may have been passed as 'search options' in URL.
        Example: output=['subject','division']."""
     out = []
     res = run_sql("SELECT DISTINCT(code) FROM field")
     for row in res:
         out.append(row[0])
     return out
 
 def get_field_name(code):
     """Return the corresponding field_name given the field code.
     e.g. reportnumber -> report number."""
     res = run_sql("SELECT name FROM field WHERE code=%s", (code, ))
     if res:
         return res[0][0]
     else:
         return ""
 
 def get_field_tags(field):
     """Returns a list of MARC tags for the field code 'field'.
        Returns empty list in case of error.
        Example: field='author', output=['100__%','700__%']."""
     out = []
     query = """SELECT t.value FROM tag AS t, field_tag AS ft, field AS f
                 WHERE f.code=%s AND ft.id_field=f.id AND t.id=ft.id_tag
                 ORDER BY ft.score DESC"""
     res = run_sql(query, (field, ))
     for val in res:
         out.append(val[0])
     return out
 
 def get_fieldvalues(recID, tag):
     """Return list of field values for field TAG inside record RECID."""
     out = []
     if tag == "001___":
         # we have asked for recID that is not stored in bibXXx tables
         out.append(str(recID))
     else:
         # we are going to look inside bibXXx tables
         digits = tag[0:2]
         try:
             intdigits = int(digits)
             if intdigits < 0 or intdigits > 99:
                 raise ValueError
         except ValueError:
             # invalid tag value asked for
             return []
         bx = "bib%sx" % digits
         bibx = "bibrec_bib%sx" % digits
         query = "SELECT bx.value FROM %s AS bx, %s AS bibx " \
                 " WHERE bibx.id_bibrec=%%s AND bx.id=bibx.id_bibxxx AND bx.tag LIKE %%s " \
                 " ORDER BY bibx.field_number, bx.tag ASC" % (bx, bibx)
         res = run_sql(query, (recID, tag))
         for row in res:
             out.append(row[0])
     return out
 
 def get_fieldvalues_alephseq_like(recID, tags_in):
     """Return buffer of ALEPH sequential-like textual format with fields found in the list TAGS_IN for record RECID."""
     out = ""
     if type(tags_in) is not list:
         tags_in = [tags_in,]
     if len(tags_in) == 1 and len(tags_in[0]) == 6:
         ## case A: one concrete subfield asked, so print its value if found
         ##         (use with care: can false you if field has multiple occurrences)
         out += string.join(get_fieldvalues(recID, tags_in[0]),"\n")
     else:
         ## case B: print our "text MARC" format; works safely all the time
         # find out which tags to output:
         dict_of_tags_out = {}
         if not tags_in:
             for i in range(0, 10):
                 for j in range(0, 10):
                     dict_of_tags_out["%d%d%%" % (i, j)] = 1
         else:
             for tag in tags_in:
                 if len(tag) == 0:
                     for i in range(0, 10):
                         for j in range(0, 10):
                             dict_of_tags_out["%d%d%%" % (i, j)] = 1
                 elif len(tag) == 1:
                     for j in range(0, 10):
                         dict_of_tags_out["%s%d%%" % (tag, j)] = 1
                 elif len(tag) < 5:
                     dict_of_tags_out["%s%%" % tag] = 1
                 elif tag >= 6:
                     dict_of_tags_out[tag[0:5]] = 1
         tags_out = dict_of_tags_out.keys()
         tags_out.sort()
         # search all bibXXx tables as needed:
         for tag in tags_out:
             digits = tag[0:2]
             try:
                 intdigits = int(digits)
                 if intdigits < 0 or intdigits > 99:
                     raise ValueError
             except ValueError:
                 # invalid tag value asked for
                 continue
             if tag.startswith("001") or tag.startswith("00%"):
                 if out:
                     out += "\n"
                 out += "%09d %s %d" % (recID, "001__", recID)
             bx = "bib%sx" % digits
             bibx = "bibrec_bib%sx" % digits
             query = "SELECT b.tag,b.value,bb.field_number FROM %s AS b, %s AS bb "\
                     "WHERE bb.id_bibrec=%%s AND b.id=bb.id_bibxxx AND b.tag LIKE %%s"\
                     "ORDER BY bb.field_number, b.tag ASC" % (bx, bibx)
             res = run_sql(query, (recID, str(tag)+'%'))
             # go through fields:
             field_number_old = -999
             field_old = ""
             for row in res:
                 field, value, field_number = row[0], row[1], row[2]
                 ind1, ind2 = field[3], field[4]
                 if ind1 == "_":
                     ind1 = ""
                 if ind2 == "_":
                     ind2 = ""
                 # print field tag
                 if field_number != field_number_old or field[:-1] != field_old[:-1]:
                     if out:
                         out += "\n"
                     out += "%09d %s " % (recID, field[:5])
                     field_number_old = field_number
                     field_old = field
                 # print subfield value
                 if field[0:2] == "00" and field[-1:] == "_":
                     out += value
                 else:
                     out += "$$%s%s" % (field[-1:], value)
     return out
 
 def record_exists(recID):
     """Return 1 if record RECID exists.
        Return 0 if it doesn't exist.
        Return -1 if it exists but is marked as deleted."""
     out = 0
     res = run_sql("SELECT id FROM bibrec WHERE id=%s", (recID,), 1)
     if res:
         # record exists; now check whether it isn't marked as deleted:
         dbcollids = get_fieldvalues(recID, "980__%")
         if ("DELETED" in dbcollids) or (CFG_CERN_SITE and "DUMMY" in dbcollids):
             out = -1 # exists, but marked as deleted
         else:
             out = 1 # exists fine
     return out
 
 def record_public_p(recID):
     """Return 1 if the record is public, i.e. if it can be found in the Home collection.
        Return 0 otherwise.
     """
     return recID in get_collection_reclist(CFG_SITE_NAME)
 
 def get_creation_date(recID, fmt="%Y-%m-%d"):
     "Returns the creation date of the record 'recID'."
     out = ""
     res = run_sql("SELECT DATE_FORMAT(creation_date,%s) FROM bibrec WHERE id=%s", (fmt, recID), 1)
     if res:
         out = res[0][0]
     return out
 
 def get_modification_date(recID, fmt="%Y-%m-%d"):
     "Returns the date of last modification for the record 'recID'."
     out = ""
     res = run_sql("SELECT DATE_FORMAT(modification_date,%s) FROM bibrec WHERE id=%s", (fmt, recID), 1)
     if res:
         out = res[0][0]
     return out
 
 def print_warning(req, msg, type='', prologue='<br />', epilogue='<br />'):
     "Prints warning message and flushes output."
     if req and msg:
         req.write(websearch_templates.tmpl_print_warning(
                    msg = msg,
                    type = type,
                    prologue = prologue,
                    epilogue = epilogue,
                  ))
         return
 
 def print_search_info(p, f, sf, so, sp, rm, of, ot, collection=CFG_SITE_NAME, nb_found=-1, jrec=1, rg=10,
                       as=0, ln=CFG_SITE_LANG, p1="", p2="", p3="", f1="", f2="", f3="", m1="", m2="", m3="", op1="", op2="",
                       sc=1, pl_in_url="",
                       d1y=0, d1m=0, d1d=0, d2y=0, d2m=0, d2d=0, dt="",
                       cpu_time=-1, middle_only=0):
     """Prints stripe with the information on 'collection' and 'nb_found' results and CPU time.
        Also, prints navigation links (beg/next/prev/end) inside the results set.
        If middle_only is set to 1, it will only print the middle box information (beg/netx/prev/end/etc) links.
        This is suitable for displaying navigation links at the bottom of the search results page."""
 
     out = ""
 
     # sanity check:
     if jrec < 1:
         jrec = 1
     if jrec > nb_found:
         jrec = max(nb_found-rg+1, 1)
 
     return websearch_templates.tmpl_print_search_info(
              ln = ln,
              collection = collection,
              as = as,
              collection_name = get_coll_i18nname(collection, ln),
              collection_id = get_colID(collection),
              middle_only = middle_only,
              rg = rg,
              nb_found = nb_found,
              sf = sf,
              so = so,
              rm = rm,
              of = of,
              ot = ot,
              p = p,
              f = f,
              p1 = p1,
              p2 = p2,
              p3 = p3,
              f1 = f1,
              f2 = f2,
              f3 = f3,
              m1 = m1,
              m2 = m2,
              m3 = m3,
              op1 = op1,
              op2 = op2,
              pl_in_url = pl_in_url,
              d1y = d1y,
              d1m = d1m,
              d1d = d1d,
              d2y = d2y,
              d2m = d2m,
              d2d = d2d,
              dt = dt,
              jrec = jrec,
              sc = sc,
              sp = sp,
              all_fieldcodes = get_fieldcodes(),
              cpu_time = cpu_time,
            )
 
 def print_results_overview(req, colls, results_final_nb_total, results_final_nb, cpu_time, ln=CFG_SITE_LANG, ec=[]):
     """Prints results overview box with links to particular collections below."""
 
     out = ""
     new_colls = []
     for coll in colls:
         new_colls.append({
                           'id': get_colID(coll),
                           'code': coll,
                           'name': get_coll_i18nname(coll, ln),
                          })
 
     return websearch_templates.tmpl_print_results_overview(
              ln = ln,
              results_final_nb_total = results_final_nb_total,
              results_final_nb = results_final_nb,
              cpu_time = cpu_time,
              colls = new_colls,
              ec = ec,
            )
 
 def sort_records(req, recIDs, sort_field='', sort_order='d', sort_pattern='', verbose=0, of='hb', ln=CFG_SITE_LANG):
     """Sort records in 'recIDs' list according sort field 'sort_field' in order 'sort_order'.
        If more than one instance of 'sort_field' is found for a given record, try to choose that that is given by
        'sort pattern', for example "sort by report number that starts by CERN-PS".
        Note that 'sort_field' can be field code like 'author' or MARC tag like '100__a' directly."""
 
     _ = gettext_set_language(ln)
 
     ## check arguments:
     if not sort_field:
         return recIDs
     if len(recIDs) > CFG_WEBSEARCH_NB_RECORDS_TO_SORT:
         if of.startswith('h'):
             print_warning(req, _("Sorry, sorting is allowed on sets of up to %d records only. Using default sort order.") % CFG_WEBSEARCH_NB_RECORDS_TO_SORT, "Warning")
         return recIDs
 
     sort_fields = string.split(sort_field, ",")
     recIDs_dict = {}
     recIDs_out = []
 
     ## first deduce sorting MARC tag out of the 'sort_field' argument:
     tags = []
     for sort_field in sort_fields:
         if sort_field and str(sort_field[0:2]).isdigit():
             # sort_field starts by two digits, so this is probably a MARC tag already
             tags.append(sort_field)
         else:
             # let us check the 'field' table
             query = """SELECT DISTINCT(t.value) FROM tag AS t, field_tag AS ft, field AS f
                         WHERE f.code=%s AND ft.id_field=f.id AND t.id=ft.id_tag
                         ORDER BY ft.score DESC"""
             res = run_sql(query, (sort_field, ))
             if res:
                 for row in res:
                     tags.append(row[0])
             else:
                 if of.startswith('h'):
                     print_warning(req, _("Sorry, %s does not seem to be a valid sort option. Choosing title sort instead.") % cgi.escape(sort_field), "Error")
                 tags.append("245__a")
     if verbose >= 3:
         print_warning(req, "Sorting by tags %s." % cgi.escape(repr(tags)))
         if sort_pattern:
             print_warning(req, "Sorting preferentially by %s." % cgi.escape(sort_pattern))
 
     ## check if we have sorting tag defined:
     if tags:
         # fetch the necessary field values:
         for recID in recIDs:
             val = "" # will hold value for recID according to which sort
             vals = [] # will hold all values found in sorting tag for recID
             for tag in tags:
                 vals.extend(get_fieldvalues(recID, tag))
             if sort_pattern:
                 # try to pick that tag value that corresponds to sort pattern
                 bingo = 0
                 for v in vals:
                     if v.lower().startswith(sort_pattern.lower()): # bingo!
                         bingo = 1
                         val = v
                         break
                 if not bingo: # sort_pattern not present, so add other vals after spaces
                     val = sort_pattern + "          " + string.join(vals)
             else:
                 # no sort pattern defined, so join them all together
                 val = string.join(vals)
             val = strip_accents(val.lower()) # sort values regardless of accents and case
             if recIDs_dict.has_key(val):
                 recIDs_dict[val].append(recID)
             else:
                 recIDs_dict[val] = [recID]
         # sort them:
         recIDs_dict_keys = recIDs_dict.keys()
         recIDs_dict_keys.sort()
         # now that keys are sorted, create output array:
         for k in recIDs_dict_keys:
             for s in recIDs_dict[k]:
                 recIDs_out.append(s)
         # ascending or descending?
         if sort_order == 'a':
             recIDs_out.reverse()
         # okay, we are done
         return recIDs_out
     else:
         # good, no sort needed
         return recIDs
 
 def print_records(req, recIDs, jrec=1, rg=10, format='hb', ot='', ln=CFG_SITE_LANG, relevances=[], relevances_prologue="(", relevances_epilogue="%%)", decompress=zlib.decompress, search_pattern='', print_records_prologue_p=True, print_records_epilogue_p=True, verbose=0, tab=''):
 
     """
     Prints list of records 'recIDs' formatted according to 'format' in
     groups of 'rg' starting from 'jrec'.
 
     Assumes that the input list 'recIDs' is sorted in reverse order,
     so it counts records from tail to head.
 
     A value of 'rg=-9999' means to print all records: to be used with care.
 
     Print also list of RELEVANCES for each record (if defined), in
     between RELEVANCE_PROLOGUE and RELEVANCE_EPILOGUE.
 
     Print prologue and/or epilogue specific to 'format' if
     'print_records_prologue_p' and/or print_records_epilogue_p' are
     True.
     """
 
     # load the right message language
     _ = gettext_set_language(ln)
 
     # sanity checking:
     if req is None:
         return
 
     # get user_info (for formatting based on user)
     user_info = collect_user_info(req)
 
     if len(recIDs):
         nb_found = len(recIDs)
 
         if rg == -9999: # print all records
             rg = nb_found
         else:
             rg = abs(rg)
         if jrec < 1: # sanity checks
             jrec = 1
         if jrec > nb_found:
             jrec = max(nb_found-rg+1, 1)
 
         # will print records from irec_max to irec_min excluded:
         irec_max = nb_found - jrec
         irec_min = nb_found - jrec - rg
         if irec_min < 0:
             irec_min = -1
         if irec_max >= nb_found:
             irec_max = nb_found - 1
 
         #req.write("%s:%d-%d" % (recIDs, irec_min, irec_max))
 
         if format.startswith('x'):
 
             # print header if needed
             if print_records_prologue_p:
                 print_records_prologue(req, format)
 
             # print records
             recIDs_to_print = [recIDs[x] for x in range(irec_max, irec_min, -1)]
             format_records(recIDs_to_print,
                            format,
                            ln=ln,
                            search_pattern=search_pattern,
                            record_separator="\n",
                            user_info=user_info,
                            req=req)
             # print footer if needed
             if print_records_epilogue_p:
                 print_records_epilogue(req, format)
 
         elif format.startswith('t') or str(format[0:3]).isdigit():
             # we are doing plain text output:
             for irec in range(irec_max, irec_min, -1):
                 x = print_record(recIDs[irec], format, ot, ln, search_pattern=search_pattern,
                                  user_info=user_info, verbose=verbose)
                 req.write(x)
                 if x:
                     req.write('\n')
         elif format == 'excel':
             recIDs_to_print = [recIDs[x] for x in range(irec_max, irec_min, -1)]
             create_excel(recIDs=recIDs_to_print, req=req, ln=ln)
         else:
             # we are doing HTML output:
             if format == 'hp' or format.startswith("hb_") or format.startswith("hd_"):
                 # portfolio and on-the-fly formats:
                 for irec in range(irec_max, irec_min, -1):
                     req.write(print_record(recIDs[irec], format, ot, ln, search_pattern=search_pattern,
                                            user_info=user_info, verbose=verbose))
             elif format.startswith("hb"):
                 # HTML brief format:
                 req.write(websearch_templates.tmpl_record_format_htmlbrief_header(
                     ln = ln))
                 for irec in range(irec_max, irec_min, -1):
                     row_number = jrec+irec_max-irec
                     recid = recIDs[irec]
                     if relevances and relevances[irec]:
                         relevance = relevances[irec]
                     else:
                         relevance = ''
                     record = print_record(recIDs[irec], format, ot, ln, search_pattern=search_pattern,
                                                   user_info=user_info, verbose=verbose)
 
                     req.write(websearch_templates.tmpl_record_format_htmlbrief_body(
                         ln = ln,
                         recid = recid,
                         row_number = row_number,
                         relevance = relevance,
                         record = record,
                         relevances_prologue = relevances_prologue,
                         relevances_epilogue = relevances_epilogue,
                         ))
                 req.write(websearch_templates.tmpl_record_format_htmlbrief_footer(
                     ln = ln))
 
             elif format.startswith("hd"):
                 # HTML detailed format:
                 for irec in range(irec_max, irec_min, -1):
                     unordered_tabs = get_detailed_page_tabs(get_colID(guess_primary_collection_of_a_record(recIDs[irec])),
                                                             recIDs[irec], ln=ln)
                     ordered_tabs_id = [(tab_id, values['order']) for (tab_id, values) in unordered_tabs.iteritems()]
                     ordered_tabs_id.sort(lambda x,y: cmp(x[1],y[1]))
                     link_ln = ''
                     if ln != CFG_SITE_LANG:
                         link_ln = '?ln=%s' % ln
                     tabs = [(unordered_tabs[tab_id]['label'], \
                              '%s/record/%s/%s%s' % (CFG_SITE_URL, recIDs[irec], tab_id, link_ln), \
                              tab_id == tab,
                              unordered_tabs[tab_id]['enabled']) \
                             for (tab_id, order) in ordered_tabs_id
                             if unordered_tabs[tab_id]['visible'] == True]
 
                     content = ''
                     # load content
                     if tab == 'usage':
                         req.write(webstyle_templates.detailed_record_container_top(recIDs[irec],
                                                                                    tabs,
                                                                                    ln))
                         r = calculate_reading_similarity_list(recIDs[irec], "downloads")
                         downloadsimilarity = None
                         downloadhistory = None
                         #if r:
                         #    downloadsimilarity = r
                         if CFG_BIBRANK_SHOW_DOWNLOAD_GRAPHS:
                             downloadhistory = create_download_history_graph_and_box(recIDs[irec], ln)
 
                         r = calculate_reading_similarity_list(recIDs[irec], "pageviews")
                         viewsimilarity = None
                         if r: viewsimilarity = r
                         content = websearch_templates.tmpl_detailed_record_statistics(recIDs[irec],
                                                                                       ln,
                                                                                       downloadsimilarity=downloadsimilarity,
                                                                                       downloadhistory=downloadhistory,
                                                                                       viewsimilarity=viewsimilarity)
                         req.write(content)
                         req.write(webstyle_templates.detailed_record_container_bottom(recIDs[irec],
                                                                                       tabs,
                                                                                       ln))
                     elif tab == 'citations':
                         recid = recIDs[irec]
                         req.write(webstyle_templates.detailed_record_container_top(recid,
                                                                                    tabs,
                                                                                    ln))
                         req.write(websearch_templates.tmpl_detailed_record_citations_prologue(recid, ln))
 
                         # Citing
                         citinglist = []
                         r = calculate_cited_by_list(recid)
                         if r:
                             citinglist = r
                         req.write(websearch_templates.tmpl_detailed_record_citations_citing_list(recid,
                                                                                        ln,
                                                                                        citinglist=citinglist))
                         # Self-cited
                         selfcited = get_self_cited_by(recid)
                         req.write(websearch_templates.tmpl_detailed_record_citations_self_cited(recid,
                                   ln, selfcited=selfcited, citinglist=citinglist))
                         # Co-cited
                         s = calculate_co_cited_with_list(recid)
                         cociting = None
                         if s:
                             cociting = s
                         req.write(websearch_templates.tmpl_detailed_record_citations_co_citing(recid,
                                                                                                ln,
                                                                                                cociting=cociting))
                         # Citation history
                         citationhistory = None
                         if r:
                             citationhistory = create_citation_history_graph_and_box(recid, ln)
                         #debug
                         if verbose > 3:
                             print_warning(req, "Citation graph debug: "+str(len(citationhistory)))
 
                         req.write(websearch_templates.tmpl_detailed_record_citations_citation_history(recid, ln, citationhistory))
                         req.write(websearch_templates.tmpl_detailed_record_citations_epilogue(recid, ln))
                         req.write(webstyle_templates.detailed_record_container_bottom(recid,
                                                                                       tabs,
                                                                                       ln))
                     elif tab == 'references':
                         req.write(webstyle_templates.detailed_record_container_top(recIDs[irec],
                                                                                    tabs,
                                                                                    ln))
                         req.write(format_record(recIDs[irec], 'HDREF', ln=ln, user_info=user_info, verbose=verbose))
                         req.write(webstyle_templates.detailed_record_container_bottom(recIDs[irec],
                                                                                       tabs,
                                                                                       ln))
                     elif tab == 'holdings':
                         req.write(webstyle_templates.detailed_record_container_top(recIDs[irec],
                                                                                    tabs,
                                                                                    ln))
                         req.write(format_record(recIDs[irec], 'HDHOLD', ln=ln, user_info=user_info, verbose=verbose))
                         req.write(webstyle_templates.detailed_record_container_bottom(recIDs[irec],
                                                                                       tabs,
                                                                                       ln))
 
                     else:
                         # Metadata tab
                         req.write(webstyle_templates.detailed_record_container_top(recIDs[irec],
                                                                                    tabs,
                                                                                    ln,
                                                                                    show_short_rec_p=False))
                         creationdate = None
                         modificationdate = None
                         if record_exists(recIDs[irec]) == 1:
                             creationdate = get_creation_date(recIDs[irec])
                             modificationdate = get_modification_date(recIDs[irec])
 
                         content = print_record(recIDs[irec], format, ot, ln,
                                                search_pattern=search_pattern,
                                                user_info=user_info, verbose=verbose)
                         content = websearch_templates.tmpl_detailed_record_metadata(
                             recID = recIDs[irec],
                             ln = ln,
                             format = format,
                             creationdate = creationdate,
                             modificationdate = modificationdate,
                             content = content)
                         req.write(content)
 
                         req.write(webstyle_templates.detailed_record_container_bottom(recIDs[irec],
                                                                                       tabs,
                                                                                       ln,
                                                                                       creationdate=creationdate,
                                                                                       modificationdate=modificationdate,
                                                                                       show_short_rec_p=False))
 
                         if len(tabs) > 0:
                             # Add the mini box at bottom of the page
                             if CFG_WEBCOMMENT_ALLOW_REVIEWS:
                                 from invenio.webcomment import get_mini_reviews
                                 reviews = get_mini_reviews(recid = recIDs[irec], ln=ln)
                             else:
                                 reviews = ''
                             actions = format_record(recIDs[irec], 'HDACT', ln=ln, user_info=user_info, verbose=verbose)
                             files = format_record(recIDs[irec], 'HDFILE', ln=ln, user_info=user_info, verbose=verbose)
                             req.write(webstyle_templates.detailed_record_mini_panel(recIDs[irec],
                                                                                     ln,
                                                                                     format,
                                                                                     files=files,
                                                                                     reviews=reviews,
                                                                                     actions=actions))
             else:
                 # Other formats
                 for irec in range(irec_max, irec_min, -1):
                     req.write(print_record(recIDs[irec], format, ot, ln,
                                            search_pattern=search_pattern,
                                            user_info=user_info, verbose=verbose))
 
     else:
         print_warning(req, _("Use different search terms."))
 
 def print_records_prologue(req, format):
     """
     Print the appropriate prologue for list of records in the given
     format.
     """
     prologue = "" # no prologue needed for HTML or Text formats
     if format.startswith('xm'):
         prologue = websearch_templates.tmpl_xml_marc_prologue()
     elif format.startswith('xn'):
         prologue = websearch_templates.tmpl_xml_nlm_prologue()
     elif format.startswith('xw'):
         prologue = websearch_templates.tmpl_xml_refworks_prologue()
     elif format.startswith('xr'):
         prologue = websearch_templates.tmpl_xml_rss_prologue()
     elif format.startswith('x'):
         prologue = websearch_templates.tmpl_xml_default_prologue()
     req.write(prologue)
 
 def print_records_epilogue(req, format):
     """
     Print the appropriate epilogue for list of records in the given
     format.
     """
     epilogue = "" # no epilogue needed for HTML or Text formats
     if format.startswith('xm'):
         epilogue = websearch_templates.tmpl_xml_marc_epilogue()
     elif format.startswith('xn'):
         epilogue = websearch_templates.tmpl_xml_nlm_epilogue()
     elif format.startswith('xw'):
         epilogue = websearch_templates.tmpl_xml_refworks_epilogue()
     elif format.startswith('xr'):
         epilogue = websearch_templates.tmpl_xml_rss_epilogue()
     elif format.startswith('x'):
         epilogue = websearch_templates.tmpl_xml_default_epilogue()
     req.write(epilogue)
 
+def get_record(recid):
+    """Directly the record object corresponding to the recid."""
+    from marshal import loads, dumps
+    from zlib import compress, decompress
+    if CFG_BIBUPLOAD_SERIALIZE_RECORD_STRUCTURE:
+        value = run_sql('SELECT value FROM bibfmt WHERE id_bibrec=%s AND FORMAT=\'recstruct\'',  (recid, ))
+        if value:
+            try:
+                return loads(decompress(value[0][0]))
+            except:
+                ### In case of corruption, let's rebuild it!
+                pass
+    return create_record(print_record(recid, 'xm'))[0]
+
 def print_record(recID, format='hb', ot='', ln=CFG_SITE_LANG, decompress=zlib.decompress,
                  search_pattern=None, user_info=None, verbose=0):
     """Prints record 'recID' formatted accoding to 'format'."""
 
+    if format == 'recstruct':
+        return get_record(recID)
+
     _ = gettext_set_language(ln)
 
     out = ""
 
     # sanity check:
     record_exist_p = record_exists(recID)
     if record_exist_p == 0: # doesn't exist
         return out
 
     # New Python BibFormat procedure for formatting
     # Old procedure follows further below
     # We must still check some special formats, but these
     # should disappear when BibFormat improves.
     if not (CFG_BIBFORMAT_USE_OLD_BIBFORMAT \
             or format.lower().startswith('t') \
             or format.lower().startswith('hm') \
             or str(format[0:3]).isdigit() \
             or ot):
 
         # Unspecified format is hd
         if format == '':
             format = 'hd'
 
         if record_exist_p == -1 and get_output_format_content_type(format) == 'text/html':
             # HTML output displays a default value for deleted records.
             # Other format have to deal with it.
             out += _("The record has been deleted.")
         else:
             out += call_bibformat(recID, format, ln, search_pattern=search_pattern,
                                   user_info=user_info, verbose=verbose)
 
             # at the end of HTML brief mode, print the "Detailed record" functionality:
             if format.lower().startswith('hb') and \
                    format.lower() != 'hb_p':
                 out += websearch_templates.tmpl_print_record_brief_links(
                     ln = ln,
                     recID = recID,
                     )
         return out
 
     # Old PHP BibFormat procedure for formatting
     # print record opening tags, if needed:
     if format == "marcxml" or format == "oai_dc":
         out += "  <record>\n"
         out += "   <header>\n"
         for oai_id in get_fieldvalues(recID, CFG_OAI_ID_FIELD):
             out += "    <identifier>%s</identifier>\n" % oai_id
         out += "    <datestamp>%s</datestamp>\n" % get_modification_date(recID)
         out += "   </header>\n"
         out += "   <metadata>\n"
 
     if format.startswith("xm") or format == "marcxml":
         # look for detailed format existence:
         query = "SELECT value FROM bibfmt WHERE id_bibrec=%s AND format=%s"
         res = run_sql(query, (recID, format), 1)
         if res and record_exist_p == 1:
             # record 'recID' is formatted in 'format', so print it
             out += "%s" % decompress(res[0][0])
         else:
             # record 'recID' is not formatted in 'format' -- they are not in "bibfmt" table; so fetch all the data from "bibXXx" tables:
             if format == "marcxml":
                 out += """    <record xmlns="http://www.loc.gov/MARC21/slim">\n"""
                 out += "        <controlfield tag=\"001\">%d</controlfield>\n" % int(recID)
             elif format.startswith("xm"):
                 out += """    <record>\n"""
                 out += "        <controlfield tag=\"001\">%d</controlfield>\n" % int(recID)
             if record_exist_p == -1:
                 # deleted record, so display only OAI ID and 980:
                 oai_ids = get_fieldvalues(recID, CFG_OAI_ID_FIELD)
                 if oai_ids:
                     out += "<datafield tag=\"%s\" ind1=\"%s\" ind2=\"%s\"><subfield code=\"%s\">%s</subfield></datafield>\n" % \
                            (CFG_OAI_ID_FIELD[0:3], CFG_OAI_ID_FIELD[3:4], CFG_OAI_ID_FIELD[4:5], CFG_OAI_ID_FIELD[5:6], oai_ids[0])
                 out += "<datafield tag=\"980\" ind1=\"\" ind2=\"\"><subfield code=\"c\">DELETED</subfield></datafield>\n"
             else:
                 # controlfields
                 query = "SELECT b.tag,b.value,bb.field_number FROM bib00x AS b, bibrec_bib00x AS bb "\
                         "WHERE bb.id_bibrec=%s AND b.id=bb.id_bibxxx AND b.tag LIKE '00%%' "\
                         "ORDER BY bb.field_number, b.tag ASC"
                 res = run_sql(query, (recID, ))
                 for row in res:
                     field, value = row[0], row[1]
                     value = encode_for_xml(value)
                     out += """        <controlfield tag="%s" >%s</controlfield>\n""" % \
                            (encode_for_xml(field[0:3]), value)
                 # datafields
                 i = 1 # Do not process bib00x and bibrec_bib00x, as
                       # they are controlfields. So start at bib01x and
                       # bibrec_bib00x (and set i = 0 at the end of
                       # first loop)
                 for digit1 in range(0, 10):
                     for digit2 in range(i, 10):
                         bx = "bib%d%dx" % (digit1, digit2)
                         bibx = "bibrec_bib%d%dx" % (digit1, digit2)
                         query = "SELECT b.tag,b.value,bb.field_number FROM %s AS b, %s AS bb "\
                                 "WHERE bb.id_bibrec=%%s AND b.id=bb.id_bibxxx AND b.tag LIKE %%s"\
                                 "ORDER BY bb.field_number, b.tag ASC" % (bx, bibx)
                         res = run_sql(query, (recID, str(digit1)+str(digit2)+'%'))
                         field_number_old = -999
                         field_old = ""
                         for row in res:
                             field, value, field_number = row[0], row[1], row[2]
                             ind1, ind2 = field[3], field[4]
                             if ind1 == "_" or ind1 == "":
                                 ind1 = " "
                             if ind2 == "_" or ind2 == "":
                                 ind2 = " "
                             # print field tag
                             if field_number != field_number_old or field[:-1] != field_old[:-1]:
                                 if field_number_old != -999:
                                     out += """        </datafield>\n"""
                                 out += """        <datafield tag="%s" ind1="%s" ind2="%s">\n""" % \
                                            (encode_for_xml(field[0:3]), encode_for_xml(ind1), encode_for_xml(ind2))
                                 field_number_old = field_number
                                 field_old = field
                             # print subfield value
                             value = encode_for_xml(value)
                             out += """            <subfield code="%s">%s</subfield>\n""" % \
                                    (encode_for_xml(field[-1:]), value)
 
                         # all fields/subfields printed in this run, so close the tag:
                         if field_number_old != -999:
                             out += """        </datafield>\n"""
                     i = 0 # Next loop should start looking at bib%0 and bibrec_bib00x
             # we are at the end of printing the record:
             out += "    </record>\n"
 
     elif format == "xd" or format == "oai_dc":
         # XML Dublin Core format, possibly OAI -- select only some bibXXx fields:
         out += """    <dc xmlns="http://purl.org/dc/elements/1.1/"
                          xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
                          xsi:schemaLocation="http://purl.org/dc/elements/1.1/
                                              http://www.openarchives.org/OAI/1.1/dc.xsd">\n"""
         if record_exist_p == -1:
             out += ""
         else:
             for f in get_fieldvalues(recID, "041__a"):
                 out += "        <language>%s</language>\n" % f
 
             for f in get_fieldvalues(recID, "100__a"):
                 out += "        <creator>%s</creator>\n" % encode_for_xml(f)
 
             for f in get_fieldvalues(recID, "700__a"):
                 out += "        <creator>%s</creator>\n" % encode_for_xml(f)
 
             for f in get_fieldvalues(recID, "245__a"):
                 out += "        <title>%s</title>\n" % encode_for_xml(f)
 
             for f in get_fieldvalues(recID, "65017a"):
                 out += "        <subject>%s</subject>\n" % encode_for_xml(f)
 
             for f in get_fieldvalues(recID, "8564_u"):
                 out += "        <identifier>%s</identifier>\n" % encode_for_xml(f)
 
             for f in get_fieldvalues(recID, "520__a"):
                 out += "        <description>%s</description>\n" % encode_for_xml(f)
 
             out += "        <date>%s</date>\n" % get_creation_date(recID)
         out += "    </dc>\n"
 
     elif len(format) == 6 and str(format[0:3]).isdigit():
         # user has asked to print some fields only
         if format == "001":
             out += "<!--%s-begin-->%s<!--%s-end-->\n" % (format, recID, format)
         else:
             vals = get_fieldvalues(recID, format)
             for val in vals:
                 out += "<!--%s-begin-->%s<!--%s-end-->\n" % (format, val, format)
 
     elif format.startswith('t'):
         ## user directly asked for some tags to be displayed only
         if record_exist_p == -1:
             out += get_fieldvalues_alephseq_like(recID, ["001", CFG_OAI_ID_FIELD, "980"])
         else:
             out += get_fieldvalues_alephseq_like(recID, ot)
 
     elif format == "hm":
         if record_exist_p == -1:
             out += "<pre>" + cgi.escape(get_fieldvalues_alephseq_like(recID, ["001", CFG_OAI_ID_FIELD, "980"])) + "</pre>"
         else:
             out += "<pre>" + cgi.escape(get_fieldvalues_alephseq_like(recID, ot)) + "</pre>"
 
     elif format.startswith("h") and ot:
         ## user directly asked for some tags to be displayed only
         if record_exist_p == -1:
             out += "<pre>" + get_fieldvalues_alephseq_like(recID, ["001", CFG_OAI_ID_FIELD, "980"]) + "</pre>"
         else:
             out += "<pre>" + get_fieldvalues_alephseq_like(recID, ot) + "</pre>"
 
     elif format == "hd":
         # HTML detailed format
         if record_exist_p == -1:
             out += _("The record has been deleted.")
         else:
             # look for detailed format existence:
             query = "SELECT value FROM bibfmt WHERE id_bibrec=%s AND format=%s"
             res = run_sql(query, (recID, format), 1)
             if res:
                 # record 'recID' is formatted in 'format', so print it
                 out += "%s" % decompress(res[0][0])
             else:
                 # record 'recID' is not formatted in 'format', so try to call BibFormat on the fly or use default format:
                 out_record_in_format = call_bibformat(recID, format, ln, search_pattern=search_pattern,
                                                       user_info=user_info, verbose=verbose)
                 if out_record_in_format:
                     out += out_record_in_format
                 else:
                     out += websearch_templates.tmpl_print_record_detailed(
                              ln = ln,
                              recID = recID,
                            )
 
     elif format.startswith("hb_") or format.startswith("hd_"):
         # underscore means that HTML brief/detailed formats should be called on-the-fly; suitable for testing formats
         if record_exist_p == -1:
             out += _("The record has been deleted.")
         else:
             out += call_bibformat(recID, format, ln, search_pattern=search_pattern,
                                   user_info=user_info, verbose=verbose)
 
     elif format.startswith("hx"):
         # BibTeX format, called on the fly:
         if record_exist_p == -1:
             out += _("The record has been deleted.")
         else:
             out += call_bibformat(recID, format, ln, search_pattern=search_pattern,
                                   user_info=user_info, verbose=verbose)
 
     elif format.startswith("hs"):
         # for citation/download similarity navigation links:
         if record_exist_p == -1:
             out += _("The record has been deleted.")
         else:
             out += '<a href="%s">' % websearch_templates.build_search_url(recid=recID, ln=ln)
             # firstly, title:
             titles = get_fieldvalues(recID, "245__a")
             if titles:
                 for title in titles:
                     out += "<strong>%s</strong>" % title
             else:
                 # usual title not found, try conference title:
                 titles = get_fieldvalues(recID, "111__a")
                 if titles:
                     for title in titles:
                         out += "<strong>%s</strong>" % title
                 else:
                     # just print record ID:
                     out += "<strong>%s %d</strong>" % (get_field_i18nname("record ID", ln), recID)
             out += "</a>"
             # secondly, authors:
             authors = get_fieldvalues(recID, "100__a") + get_fieldvalues(recID, "700__a")
             if authors:
                 out += " - %s" % authors[0]
                 if len(authors) > 1:
                     out += " <em>et al</em>"
             # thirdly publication info:
             publinfos = get_fieldvalues(recID, "773__s")
             if not publinfos:
                 publinfos = get_fieldvalues(recID, "909C4s")
                 if not publinfos:
                     publinfos = get_fieldvalues(recID, "037__a")
                     if not publinfos:
                         publinfos = get_fieldvalues(recID, "088__a")
             if publinfos:
                 out += " - %s" % publinfos[0]
             else:
                 # fourthly publication year (if not publication info):
                 years = get_fieldvalues(recID, "773__y")
                 if not years:
                     years = get_fieldvalues(recID, "909C4y")
                     if not years:
                         years = get_fieldvalues(recID, "260__c")
                 if years:
                     out += " (%s)" % years[0]
     else:
         # HTML brief format by default
         if record_exist_p == -1:
             out += _("The record has been deleted.")
         else:
             query = "SELECT value FROM bibfmt WHERE id_bibrec=%s AND format=%s"
             res = run_sql(query, (recID, format))
             if res:
                 # record 'recID' is formatted in 'format', so print it
                 out += "%s" % decompress(res[0][0])
             else:
                 # record 'recID' is not formatted in 'format', so try to call BibFormat on the fly: or use default format:
                 if CFG_WEBSEARCH_CALL_BIBFORMAT:
                     out_record_in_format = call_bibformat(recID, format, ln, search_pattern=search_pattern,
                                                           user_info=user_info, verbose=verbose)
                     if out_record_in_format:
                         out += out_record_in_format
                     else:
                         out += websearch_templates.tmpl_print_record_brief(
                                  ln = ln,
                                  recID = recID,
                                )
                 else:
                     out += websearch_templates.tmpl_print_record_brief(
                              ln = ln,
                              recID = recID,
                            )
 
             # at the end of HTML brief mode, print the "Detailed record" functionality:
             if format == 'hp' or format.startswith("hb_") or format.startswith("hd_"):
                 pass # do nothing for portfolio and on-the-fly formats
             else:
                 out += websearch_templates.tmpl_print_record_brief_links(
                          ln = ln,
                          recID = recID,
                        )
 
     # print record closing tags, if needed:
     if format == "marcxml" or format == "oai_dc":
         out += "   </metadata>\n"
         out += "  </record>\n"
 
     return out
 
 def encode_for_xml(s):
     "Encode special chars in string so that it would be XML-compliant."
     s = string.replace(s, '&', '&amp;')
     s = string.replace(s, '<', '&lt;')
     return s
 
 def call_bibformat(recID, format="HD", ln=CFG_SITE_LANG, search_pattern=None, user_info=None, verbose=0):
     """
     Calls BibFormat and returns formatted record.
 
     BibFormat will decide by itself if old or new BibFormat must be used.
     """
 
     keywords = []
     if search_pattern is not None:
         units = create_basic_search_units(None, str(search_pattern), None)
         keywords = [unit[1] for unit in units if unit[0] != '-']
 
     return format_record(recID,
                          of=format,
                          ln=ln,
                          search_pattern=keywords,
                          user_info=user_info,
                          verbose=verbose)
 
 def log_query(hostname, query_args, uid=-1):
     """
     Log query into the query and user_query tables.
     Return id_query or None in case of problems.
     """
     id_query = None
     if uid >= 0:
         # log the query only if uid is reasonable
         res = run_sql("SELECT id FROM query WHERE urlargs=%s", (query_args,), 1)
         try:
             id_query = res[0][0]
         except:
             id_query = run_sql("INSERT INTO query (type, urlargs) VALUES ('r', %s)", (query_args,))
         if id_query:
             run_sql("INSERT INTO user_query (id_user, id_query, hostname, date) VALUES (%s, %s, %s, %s)",
                     (uid, id_query, hostname,
                      time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())))
     return id_query
 
 def log_query_info(action, p, f, colls, nb_records_found_total=-1):
     """Write some info to the log file for later analysis."""
     try:
         log = open(CFG_LOGDIR + "/search.log", "a")
         log.write(time.strftime("%Y%m%d%H%M%S#", time.localtime()))
         log.write(action+"#")
         log.write(p+"#")
         log.write(f+"#")
         for coll in colls[:-1]:
             log.write("%s," % coll)
         log.write("%s#" % colls[-1])
         log.write("%d" % nb_records_found_total)
         log.write("\n")
         log.close()
     except:
         pass
     return
 
 def wash_url_argument(var, new_type):
     """Wash list argument into 'new_type', that can be 'list',
        'str', or 'int'.  Useful for washing mod_python passed
        arguments, that are all lists of strings (URL args may be
        multiple), but we sometimes want only to take the first value,
        and sometimes to represent it as string or numerical value."""
     out = []
     if new_type == 'list':  # return lst
         if type(var) is list:
             out = var
         else:
             out = [var]
     elif new_type == 'str':  # return str
         if type(var) is list:
             try:
                 out = "%s" % var[0]
             except:
                 out = ""
         elif type(var) is str:
             out = var
         else:
             out = "%s" % var
     elif new_type == 'int': # return int
         if type(var) is list:
             try:
                 out = string.atoi(var[0])
             except:
                 out = 0
         elif type(var) is int:
             out = var
         elif type(var) is str:
             try:
                 out = string.atoi(var)
             except:
                 out = 0
         else:
             out = 0
     return out
 
 ### CALLABLES
 
 def perform_request_search(req=None, cc=CFG_SITE_NAME, c=None, p="", f="", rg=10, sf="", so="d", sp="", rm="", of="id", ot="", as=0,
                            p1="", f1="", m1="", op1="", p2="", f2="", m2="", op2="", p3="", f3="", m3="", sc=0, jrec=0,
                            recid=-1, recidb=-1, sysno="", id=-1, idb=-1, sysnb="", action="", d1="",
                            d1y=0, d1m=0, d1d=0, d2="", d2y=0, d2m=0, d2d=0, dt="", verbose=0, ap=0, ln=CFG_SITE_LANG, ec=None, tab=""):
     """Perform search or browse request, without checking for
        authentication.  Return list of recIDs found, if of=id.
        Otherwise create web page.
 
        The arguments are as follows:
 
          req - mod_python Request class instance.
 
           cc - current collection (e.g. "ATLAS").  The collection the
                user started to search/browse from.
 
            c - collection list (e.g. ["Theses", "Books"]).  The
                collections user may have selected/deselected when
                starting to search from 'cc'.
 
            p - pattern to search for (e.g. "ellis and muon or kaon").
 
            f - field to search within (e.g. "author").
 
           rg - records in groups of (e.g. "10").  Defines how many hits
                per collection in the search results page are
                displayed.
 
           sf - sort field (e.g. "title").
 
           so - sort order ("a"=ascending, "d"=descending).
 
           sp - sort pattern (e.g. "CERN-") -- in case there are more
                values in a sort field, this argument tells which one
                to prefer
 
           rm - ranking method (e.g. "jif").  Defines whether results
                should be ranked by some known ranking method.
 
           of - output format (e.g. "hb").  Usually starting "h" means
                HTML output (and "hb" for HTML brief, "hd" for HTML
                detailed), "x" means XML output, "t" means plain text
                output, "id" means no output at all but to return list
                of recIDs found.  (Suitable for high-level API.)
 
           ot - output only these MARC tags (e.g. "100,700,909C0b").
                Useful if only some fields are to be shown in the
                output, e.g. for library to control some fields.
 
           as - advanced search ("0" means no, "1" means yes).  Whether
                search was called from within the advanced search
                interface.
 
           p1 - first pattern to search for in the advanced search
                interface.  Much like 'p'.
 
           f1 - first field to search within in the advanced search
                interface.  Much like 'f'.
 
           m1 - first matching type in the advanced search interface.
                ("a" all of the words, "o" any of the words, "e" exact
                phrase, "p" partial phrase, "r" regular expression).
 
          op1 - first operator, to join the first and the second unit
                in the advanced search interface.  ("a" add, "o" or,
                "n" not).
 
           p2 - second pattern to search for in the advanced search
                interface.  Much like 'p'.
 
           f2 - second field to search within in the advanced search
                interface.  Much like 'f'.
 
           m2 - second matching type in the advanced search interface.
                ("a" all of the words, "o" any of the words, "e" exact
                phrase, "p" partial phrase, "r" regular expression).
 
          op2 - second operator, to join the second and the third unit
                in the advanced search interface.  ("a" add, "o" or,
                "n" not).
 
           p3 - third pattern to search for in the advanced search
                interface.  Much like 'p'.
 
           f3 - third field to search within in the advanced search
                interface.  Much like 'f'.
 
           m3 - third matching type in the advanced search interface.
                ("a" all of the words, "o" any of the words, "e" exact
                phrase, "p" partial phrase, "r" regular expression).
 
           sc - split by collection ("0" no, "1" yes).  Governs whether
                we want to present the results in a single huge list,
                or splitted by collection.
 
         jrec - jump to record (e.g. "234").  Used for navigation
                inside the search results.
 
        recid - display record ID (e.g. "20000").  Do not
                search/browse but go straight away to the Detailed
                record page for the given recID.
 
       recidb - display record ID bis (e.g. "20010").  If greater than
                'recid', then display records from recid to recidb.
                Useful for example for dumping records from the
                database for reformatting.
 
        sysno - display old system SYS number (e.g. "").  If you
                migrate to CDS Invenio from another system, and store your
                old SYS call numbers, you can use them instead of recid
                if you wish so.
 
           id - the same as recid, in case recid is not set.  For
                backwards compatibility.
 
          idb - the same as recid, in case recidb is not set.  For
                backwards compatibility.
 
        sysnb - the same as sysno, in case sysno is not set.  For
                backwards compatibility.
 
       action - action to do.  "SEARCH" for searching, "Browse" for
                browsing.  Default is to search.
 
           d1 - first datetime in full YYYY-mm-dd HH:MM:DD format
                (e.g. "1998-08-23 12:34:56"). Useful for search limits
                on creation/modification date (see 'dt' argument
                below).  Note that 'd1' takes precedence over d1y, d1m,
                d1d if these are defined.
 
          d1y - first date's year (e.g. "1998").  Useful for search
                limits on creation/modification date.
 
          d1m - first date's month (e.g. "08").  Useful for search
                limits on creation/modification date.
 
          d1d - first date's day (e.g. "23").  Useful for search
                limits on creation/modification date.
 
           d2 - second datetime in full YYYY-mm-dd HH:MM:DD format
                (e.g. "1998-09-02 12:34:56"). Useful for search limits
                on creation/modification date (see 'dt' argument
                below).  Note that 'd2' takes precedence over d2y, d2m,
                d2d if these are defined.
 
          d2y - second date's year (e.g. "1998").  Useful for search
                limits on creation/modification date.
 
          d2m - second date's month (e.g. "09").  Useful for search
                limits on creation/modification date.
 
          d2d - second date's day (e.g. "02").  Useful for search
                limits on creation/modification date.
 
           dt - first and second date's type (e.g. "c").  Specifies
                whether to search in creation dates ("c") or in
                modification dates ("m").  When dt is not set and d1*
                and d2* are set, the default is "c".
 
      verbose - verbose level (0=min, 9=max).  Useful to print some
                internal information on the searching process in case
                something goes wrong.
 
           ap - alternative patterns (0=no, 1=yes).  In case no exact
                match is found, the search engine can try alternative
                patterns e.g. to replace non-alphanumeric characters by
                a boolean query.  ap defines if this is wanted.
 
           ln - language of the search interface (e.g. "en").  Useful
                for internationalization.
 
           ec - list of external search engines to search as well
                (e.g. "SPIRES HEP").
     """
     selected_external_collections_infos = None
 
     # wash output format:
     of = wash_output_format(of)
 
     # wash all arguments requiring special care
     try:
         (cc, colls_to_display, colls_to_search) = wash_colls(cc, c, sc) # which colls to search and to display?
     except InvenioWebSearchUnknownCollectionError, exc:
         colname = exc.colname
         if of.startswith("h"):
             page_start(req, of, cc, as, ln, getUid(req),
                        websearch_templates.tmpl_collection_not_found_page_title(colname, ln))
             req.write(websearch_templates.tmpl_collection_not_found_page_body(colname, ln))
             return page_end(req, of, ln)
         elif of == "id":
             return []
         elif of.startswith("x"):
             # Print empty, but valid XML
             print_records_prologue(req, of)
             print_records_epilogue(req, of)
         else:
             return page_end(req, of, ln)
 
     p = wash_pattern(p)
     f = wash_field(f)
     p1 = wash_pattern(p1)
     f1 = wash_field(f1)
     p2 = wash_pattern(p2)
     f2 = wash_field(f2)
     p3 = wash_pattern(p3)
     f3 = wash_field(f3)
     datetext1, datetext2 = wash_dates(d1, d1y, d1m, d1d, d2, d2y, d2m, d2d)
 
     # wash ranking method:
     if not is_method_valid(None, rm):
         rm = ""
 
     _ = gettext_set_language(ln)
 
     # backwards compatibility: id, idb, sysnb -> recid, recidb, sysno (if applicable)
     if sysnb != "" and sysno == "":
         sysno = sysnb
     if id > 0 and recid == -1:
         recid = id
     if idb > 0 and recidb == -1:
         recidb = idb
     # TODO deduce passed search limiting criterias (if applicable)
     pl, pl_in_url = "", "" # no limits by default
     if action != "browse" and req and req.args: # we do not want to add options while browsing or while calling via command-line
         fieldargs = cgi.parse_qs(req.args)
         for fieldcode in get_fieldcodes():
             if fieldargs.has_key(fieldcode):
                 for val in fieldargs[fieldcode]:
                     pl += "+%s:\"%s\" " % (fieldcode, val)
                     pl_in_url += "&amp;%s=%s" % (urllib.quote(fieldcode), urllib.quote(val))
     # deduce recid from sysno argument (if applicable):
     if sysno: # ALEPH SYS number was passed, so deduce DB recID for the record:
         recid = get_mysql_recid_from_aleph_sysno(sysno)
         if recid is None:
             recid = 0 # use recid 0 to indicate that this sysno does not exist
     # deduce collection we are in (if applicable):
     if recid > 0:
         referer = None
         if req:
             referer = req.headers_in.get('Referer')
         cc = guess_collection_of_a_record(recid, referer)
     # deduce user id (if applicable):
     try:
         uid = getUid(req)
     except:
         uid = 0
     ## 0 - start output
     if recid >= 0: # recid can be 0 if deduced from sysno and if such sysno does not exist
         ## 1 - detailed record display
         title, description, keywords = \
                websearch_templates.tmpl_record_page_header_content(req, recid, ln)
 
         if not req.header_only:
             page_start(req, of, cc, as, ln, uid, title, description, keywords, recid, tab)
         # Default format is hb but we are in detailed -> change 'of'
         if of == "hb":
             of = "hd"
         if record_exists(recid):
             if recidb <= recid: # sanity check
                 recidb = recid + 1
             if of == "id":
                 return [recidx for recidx in range(recid, recidb) if record_exists(recidx)]
             else:
                 print_records(req, range(recid, recidb), -1, -9999, of, ot, ln, search_pattern=p, verbose=verbose, tab=tab)
             if req and of.startswith("h"): # register detailed record page view event
                 client_ip_address = str(req.get_remote_host(apache.REMOTE_NOLOOKUP))
                 register_page_view_event(recid, uid, client_ip_address)
         else: # record does not exist
             if of == "id":
                 return []
             elif of.startswith("x"):
                 # Print empty, but valid XML
                 print_records_prologue(req, of)
                 print_records_epilogue(req, of)
             elif of.startswith("h"):
                 if req.header_only:
                     raise apache.SERVER_RETURN, apache.HTTP_NOT_FOUND
                 else:
                     print_warning(req, _("Requested record does not seem to exist."))
 
     elif action == "browse":
         ## 2 - browse needed
         page_start(req, of, cc, as, ln, uid, _("Browse"))
         if of.startswith("h"):
             req.write(create_search_box(cc, colls_to_display, p, f, rg, sf, so, sp, rm, of, ot, as, ln, p1, f1, m1, op1,
                                         p2, f2, m2, op2, p3, f3, m3, sc, pl, d1y, d1m, d1d, d2y, d2m, d2d, dt, jrec, ec, action))
         try:
             if as == 1 or (p1 or p2 or p3):
                 browse_pattern(req, colls_to_search, p1, f1, rg, ln)
                 browse_pattern(req, colls_to_search, p2, f2, rg, ln)
                 browse_pattern(req, colls_to_search, p3, f3, rg, ln)
             else:
                 browse_pattern(req, colls_to_search, p, f, rg, ln)
         except:
             if of.startswith("h"):
                 req.write(create_error_box(req, verbose=verbose, ln=ln))
             elif of.startswith("x"):
                 # Print empty, but valid XML
                 print_records_prologue(req, of)
                 print_records_epilogue(req, of)
             return page_end(req, of, ln)
 
     elif rm and p.startswith("recid:"):
         ## 3-ter - similarity search needed
         if not req.header_only:
             page_start(req, of, cc, as, ln, uid, _("Search Results"))
         if of.startswith("h"):
             req.write(create_search_box(cc, colls_to_display, p, f, rg, sf, so, sp, rm, of, ot, as, ln, p1, f1, m1, op1,
                                         p2, f2, m2, op2, p3, f3, m3, sc, pl, d1y, d1m, d1d, d2y, d2m, d2d, dt, jrec, ec, action))
         if record_exists(p[6:]) != 1:
             # record does not exist
             if of.startswith("h"):
                 if req.header_only:
                     raise apache.SERVER_RETURN, apache.HTTP_NOT_FOUND
                 else:
                     print_warning(req, "Requested record does not seem to exist.")
             if of == "id":
                 return []
             elif of.startswith("x"):
                 # Print empty, but valid XML
                 print_records_prologue(req, of)
                 print_records_epilogue(req, of)
         else:
             # record well exists, so find similar ones to it
             t1 = os.times()[4]
             results_similar_recIDs, results_similar_relevances, results_similar_relevances_prologue, results_similar_relevances_epilogue, results_similar_comments = \
                                     rank_records(rm, 0, get_collection_reclist(cc), string.split(p), verbose)
             if results_similar_recIDs:
                 t2 = os.times()[4]
                 cpu_time = t2 - t1
                 if of.startswith("h"):
                     req.write(print_search_info(p, f, sf, so, sp, rm, of, ot, cc, len(results_similar_recIDs),
                                                 jrec, rg, as, ln, p1, p2, p3, f1, f2, f3, m1, m2, m3, op1, op2,
                                                 sc, pl_in_url,
                                                 d1y, d1m, d1d, d2y, d2m, d2d, dt, cpu_time))
                     print_warning(req, results_similar_comments)
                     print_records(req, results_similar_recIDs, jrec, rg, of, ot, ln,
                                   results_similar_relevances, results_similar_relevances_prologue, results_similar_relevances_epilogue, search_pattern=p, verbose=verbose)
                 elif of=="id":
                     return results_similar_recIDs
                 elif of.startswith("x"):
                     print_records(req, results_similar_recIDs, jrec, rg, of, ot, ln,
                                   results_similar_relevances, results_similar_relevances_prologue, results_similar_relevances_epilogue, search_pattern=p, verbose=verbose)
             else:
                 # rank_records failed and returned some error message to display:
                 if of.startswith("h"):
                     print_warning(req, results_similar_relevances_prologue)
                     print_warning(req, results_similar_relevances_epilogue)
                     print_warning(req, results_similar_comments)
                 if of == "id":
                     return []
                 elif of.startswith("x"):
                     # Print empty, but valid XML
                     print_records_prologue(req, of)
                     print_records_epilogue(req, of)
 
     elif p.startswith("cocitedwith:"):  #WAS EXPERIMENTAL
         ## 3-terter - cited by search needed
         page_start(req, of, cc, as, ln, uid, _("Search Results"))
         if of.startswith("h"):
             req.write(create_search_box(cc, colls_to_display, p, f, rg, sf, so, sp, rm, of, ot, as, ln, p1, f1, m1, op1,
                                         p2, f2, m2, op2, p3, f3, m3, sc, pl, d1y, d1m, d1d, d2y, d2m, d2d, dt, jrec, ec, action))
         recID = p[12:]
         if record_exists(recID) != 1:
             # record does not exist
             if of.startswith("h"):
                 print_warning(req, "Requested record does not seem to exist.")
             if of == "id":
                 return []
             elif of.startswith("x"):
                 # Print empty, but valid XML
                 print_records_prologue(req, of)
                 print_records_epilogue(req, of)
         else:
             # record well exists, so find co-cited ones:
             t1 = os.times()[4]
             results_cocited_recIDs = map(lambda x: x[0], calculate_co_cited_with_list(int(recID)))
             if results_cocited_recIDs:
                 t2 = os.times()[4]
                 cpu_time = t2 - t1
                 if of.startswith("h"):
                     req.write(print_search_info(p, f, sf, so, sp, rm, of, ot, CFG_SITE_NAME, len(results_cocited_recIDs),
                                                 jrec, rg, as, ln, p1, p2, p3, f1, f2, f3, m1, m2, m3, op1, op2,
                                                 sc, pl_in_url,
                                                 d1y, d1m, d1d, d2y, d2m, d2d, dt, cpu_time))
                     print_records(req, results_cocited_recIDs, jrec, rg, of, ot, ln, search_pattern=p, verbose=verbose)
                 elif of=="id":
                     return results_cocited_recIDs
                 elif of.startswith("x"):
                     print_records(req, results_cocited_recIDs, jrec, rg, of, ot, ln, search_pattern=p, verbose=verbose)
 
             else:
                 # cited rank_records failed and returned some error message to display:
                 if of.startswith("h"):
                     print_warning(req, "nothing found")
                 if of == "id":
                     return []
                 elif of.startswith("x"):
                     # Print empty, but valid XML
                     print_records_prologue(req, of)
                     print_records_epilogue(req, of)
     else:
         ## 3 - common search needed
         page_start(req, of, cc, as, ln, uid, _("Search Results"))
         if of.startswith("h"):
             req.write(create_search_box(cc, colls_to_display, p, f, rg, sf, so, sp, rm, of, ot, as, ln, p1, f1, m1, op1,
                                         p2, f2, m2, op2, p3, f3, m3, sc, pl, d1y, d1m, d1d, d2y, d2m, d2d, dt, jrec, ec, action))
         t1 = os.times()[4]
         results_in_any_collection = HitSet()
         if as == 1 or (p1 or p2 or p3):
             ## 3A - advanced search
             try:
                 results_in_any_collection = search_pattern_parenthesised(req, p1, f1, m1, ap=ap, of=of, verbose=verbose, ln=ln)
                 if len(results_in_any_collection) == 0:
                     if of.startswith("h"):
                         perform_external_collection_search(req, cc, [p, p1, p2, p3], f, ec, verbose, ln, selected_external_collections_infos)
                     elif of.startswith("x"):
                         # Print empty, but valid XML
                         print_records_prologue(req, of)
                         print_records_epilogue(req, of)
                     return page_end(req, of, ln)
                 if p2:
                     results_tmp = search_pattern_parenthesised(req, p2, f2, m2, ap=ap, of=of, verbose=verbose, ln=ln)
                     if op1 == "a": # add
                         results_in_any_collection.intersection_update(results_tmp)
                     elif op1 == "o": # or
                         results_in_any_collection.union_update(results_tmp)
                     elif op1 == "n": # not
                         results_in_any_collection.difference_update(results_tmp)
                     else:
                         if of.startswith("h"):
                             print_warning(req, "Invalid set operation %s." % cgi.escape(op1), "Error")
                     if len(results_in_any_collection) == 0:
                         if of.startswith("h"):
                             perform_external_collection_search(req, cc, [p, p1, p2, p3], f, ec, verbose, ln, selected_external_collections_infos)
                         elif of.startswith("x"):
                             # Print empty, but valid XML
                             print_records_prologue(req, of)
                             print_records_epilogue(req, of)
                         return page_end(req, of, ln)
                 if p3:
                     results_tmp = search_pattern_parenthesised(req, p3, f3, m3, ap=ap, of=of, verbose=verbose, ln=ln)
                     if op2 == "a": # add
                         results_in_any_collection.intersection_update(results_tmp)
                     elif op2 == "o": # or
                         results_in_any_collection.union_update(results_tmp)
                     elif op2 == "n": # not
                         results_in_any_collection.difference_update(results_tmp)
                     else:
                         if of.startswith("h"):
                             print_warning(req, "Invalid set operation %s." % cgi.escape(op2), "Error")
             except:
                 if of.startswith("h"):
                     req.write(create_error_box(req, verbose=verbose, ln=ln))
                     perform_external_collection_search(req, cc, [p, p1, p2, p3], f, ec, verbose, ln, selected_external_collections_infos)
                 elif of.startswith("x"):
                     # Print empty, but valid XML
                     print_records_prologue(req, of)
                     print_records_epilogue(req, of)
 
                 return page_end(req, of, ln)
         else:
             ## 3B - simple search
             try:
                 results_in_any_collection = search_pattern_parenthesised(req, p, f, ap=ap, of=of, verbose=verbose, ln=ln)
             except:
                 if of.startswith("h"):
                     req.write(create_error_box(req, verbose=verbose, ln=ln))
                     perform_external_collection_search(req, cc, [p, p1, p2, p3], f, ec, verbose, ln, selected_external_collections_infos)
                 return page_end(req, of, ln)
 
         if len(results_in_any_collection) == 0:
             if of.startswith("h"):
                 perform_external_collection_search(req, cc, [p, p1, p2, p3], f, ec, verbose, ln, selected_external_collections_infos)
             elif of.startswith("x"):
                 # Print empty, but valid XML
                 print_records_prologue(req, of)
                 print_records_epilogue(req, of)
             return page_end(req, of, ln)
 
 #             search_cache_key = p+"@"+f+"@"+string.join(colls_to_search,",")
 #             if search_cache.has_key(search_cache_key): # is the result in search cache?
 #                 results_final = search_cache[search_cache_key]
 #             else:
 #                 results_final = search_pattern(req, p, f, colls_to_search)
 #                 search_cache[search_cache_key] = results_final
 #             if len(search_cache) > CFG_WEBSEARCH_SEARCH_CACHE_SIZE: # is the cache full? (sanity cleaning)
 #                 search_cache.clear()
 
         # search stage 4: intersection with collection universe:
         try:
             results_final = intersect_results_with_collrecs(req, results_in_any_collection, colls_to_search, ap, of, verbose, ln)
         except:
             if of.startswith("h"):
                 req.write(create_error_box(req, verbose=verbose, ln=ln))
                 perform_external_collection_search(req, cc, [p, p1, p2, p3], f, ec, verbose, ln, selected_external_collections_infos)
             return page_end(req, of, ln)
 
         if results_final == {}:
             if of.startswith("h"):
                 perform_external_collection_search(req, cc, [p, p1, p2, p3], f, ec, verbose, ln, selected_external_collections_infos)
             if of.startswith("x"):
                 # Print empty, but valid XML
                 print_records_prologue(req, of)
                 print_records_epilogue(req, of)
             return page_end(req, of, ln)
 
         # search stage 5: apply search option limits and restrictions:
         if datetext1 != "":
             if verbose and of.startswith("h"):
                 print_warning(req, "Search stage 5: applying time etc limits, from %s until %s..." % (datetext1, datetext2))
             try:
                 results_final = intersect_results_with_hitset(req,
                                                               results_final,
                                                               search_unit_in_bibrec(datetext1, datetext2, dt),
                                                               ap,
                                                               aptext= _("No match within your time limits, "
                                                                         "discarding this condition..."),
                                                               of=of)
             except:
                 if of.startswith("h"):
                     req.write(create_error_box(req, verbose=verbose, ln=ln))
                     perform_external_collection_search(req, cc, [p, p1, p2, p3], f, ec, verbose, ln, selected_external_collections_infos)
                 return page_end(req, of, ln)
             if results_final == {}:
                 if of.startswith("h"):
                     perform_external_collection_search(req, cc, [p, p1, p2, p3], f, ec, verbose, ln, selected_external_collections_infos)
                 return page_end(req, of, ln)
 
 
 
         if pl:
             pl = wash_pattern(pl)
             if verbose and of.startswith("h"):
                 print_warning(req, "Search stage 5: applying search pattern limit %s..." % cgi.escape(pl))
             try:
                 results_final = intersect_results_with_hitset(req,
                                                               results_final,
                                                               search_pattern_parenthesised(req, pl, ap=0, ln=ln),
                                                               ap,
                                                               aptext=_("No match within your search limits, "
                                                                        "discarding this condition..."),
                                                               of=of)
             except:
                 if of.startswith("h"):
                     req.write(create_error_box(req, verbose=verbose, ln=ln))
                     perform_external_collection_search(req, cc, [p, p1, p2, p3], f, ec, verbose, ln, selected_external_collections_infos)
                 return page_end(req, of, ln)
             if results_final == {}:
                 if of.startswith("h"):
                     perform_external_collection_search(req, cc, [p, p1, p2, p3], f, ec, verbose, ln, selected_external_collections_infos)
                 if of.startswith("x"):
                     # Print empty, but valid XML
                     print_records_prologue(req, of)
                     print_records_epilogue(req, of)
                 return page_end(req, of, ln)
 
         t2 = os.times()[4]
         cpu_time = t2 - t1
         ## search stage 6: display results:
         results_final_nb_total = 0
         results_final_nb = {} # will hold number of records found in each collection
                               # (in simple dict to display overview more easily)
         for coll in results_final.keys():
             results_final_nb[coll] = len(results_final[coll])
             #results_final_nb_total += results_final_nb[coll]
 
         # Now let us calculate results_final_nb_total more precisely,
         # in order to get the total number of "distinct" hits across
         # searched collections; this is useful because a record might
         # have been attributed to more than one primary collection; so
         # we have to avoid counting it multiple times.  The price to
         # pay for this accuracy of results_final_nb_total is somewhat
         # increased CPU time.
         if results_final.keys() == 1:
             # only one collection; no need to union them
             results_final_for_all_selected_colls = results_final.values()[0]
             results_final_nb_total = results_final_nb.values()[0]
         else:
             # okay, some work ahead to union hits across collections:
             results_final_for_all_selected_colls = HitSet()
             for coll in results_final.keys():
                 results_final_for_all_selected_colls.union_update(results_final[coll])
             results_final_nb_total = len(results_final_for_all_selected_colls)
 
         if results_final_nb_total == 0:
             if of.startswith('h'):
                 print_warning(req, "No match found, please enter different search terms.")
             elif of.startswith("x"):
                 # Print empty, but valid XML
                 print_records_prologue(req, of)
                 print_records_epilogue(req, of)
         else:
             # yes, some hits found: good!
             # collection list may have changed due to not-exact-match-found policy so check it out:
             for coll in results_final.keys():
                 if coll not in colls_to_search:
                     colls_to_search.append(coll)
             # print results overview:
             if of == "id":
                 # we have been asked to return list of recIDs
                 recIDs = list(results_final_for_all_selected_colls)
                 if sf: # do we have to sort?
                     recIDs = sort_records(req, recIDs, sf, so, sp, verbose, of)
                 elif rm: # do we have to rank?
                     results_final_for_all_colls_rank_records_output = rank_records(rm, 0, results_final_for_all_selected_colls,
                                                                                    string.split(p) + string.split(p1) +
                                                                                    string.split(p2) + string.split(p3), verbose)
                     if results_final_for_all_colls_rank_records_output[0]:
                         recIDs = results_final_for_all_colls_rank_records_output[0]
                 return recIDs
             elif of.startswith("h"):
                 if of not in ['hcs']:
                     req.write(print_results_overview(req, colls_to_search, results_final_nb_total, results_final_nb, cpu_time, ln, ec))
                     selected_external_collections_infos = print_external_results_overview(req, cc, [p, p1, p2, p3], f, ec, verbose, ln)
             # print number of hits found for XML outputs:
             if of.startswith("x"):
                 req.write("<!-- Search-Engine-Total-Number-Of-Results: %s -->\n" % results_final_nb_total)
             # print records:
             if of in ['hcs']:
                 # feed the current search to be summarized:
                 summarize_records(results_final_for_all_selected_colls, 'hcs', ln, p, f, req)
             else:
                 if len(colls_to_search)>1:
                     cpu_time = -1 # we do not want to have search time printed on each collection
                 print_records_prologue(req, of)
                 for coll in colls_to_search:
                     if results_final.has_key(coll) and len(results_final[coll]):
                         if of.startswith("h"):
                             req.write(print_search_info(p, f, sf, so, sp, rm, of, ot, coll, results_final_nb[coll],
                                                         jrec, rg, as, ln, p1, p2, p3, f1, f2, f3, m1, m2, m3, op1, op2,
                                                         sc, pl_in_url,
                                                         d1y, d1m, d1d, d2y, d2m, d2d, dt, cpu_time))
                         results_final_recIDs = list(results_final[coll])
                         results_final_relevances = []
                         results_final_relevances_prologue = ""
                         results_final_relevances_epilogue = ""
                         if sf: # do we have to sort?
                             results_final_recIDs = sort_records(req, results_final_recIDs, sf, so, sp, verbose, of)
                         elif rm: # do we have to rank?
                             results_final_recIDs_ranked, results_final_relevances, results_final_relevances_prologue, results_final_relevances_epilogue, results_final_comments = \
                                                          rank_records(rm, 0, results_final[coll],
                                                                       string.split(p) + string.split(p1) +
                                                                       string.split(p2) + string.split(p3), verbose)
                             if of.startswith("h"):
                                 print_warning(req, results_final_comments)
                             if results_final_recIDs_ranked:
                                 results_final_recIDs = results_final_recIDs_ranked
                             else:
                                 # rank_records failed and returned some error message to display:
                                 print_warning(req, results_final_relevances_prologue)
                                 print_warning(req, results_final_relevances_epilogue)
                         print_records(req, results_final_recIDs, jrec, rg, of, ot, ln,
                                       results_final_relevances,
                                       results_final_relevances_prologue,
                                       results_final_relevances_epilogue,
                                       search_pattern=p,
                                       print_records_prologue_p=False,
                                       print_records_epilogue_p=False,
                                       verbose=verbose)
                         if of.startswith("h"):
                             req.write(print_search_info(p, f, sf, so, sp, rm, of, ot, coll, results_final_nb[coll],
                                                         jrec, rg, as, ln, p1, p2, p3, f1, f2, f3, m1, m2, m3, op1, op2,
                                                         sc, pl_in_url,
                                                         d1y, d1m, d1d, d2y, d2m, d2d, dt, cpu_time, 1))
                 print_records_epilogue(req, of)
                 if f == "author" and of.startswith("h"):
                     req.write(create_similarly_named_authors_link_box(p, ln))
             # log query:
             try:
                 id_query = log_query(req.get_remote_host(), req.args, uid)
                 if of.startswith("h") and id_query:
                     if not of in ['hcs']:
                         # display aalert/RSS teaser for non-summary formats:
                         req.write(websearch_templates.tmpl_alert_rss_teaser_box_for_query(id_query, ln=ln))
             except:
                 # do not log query if req is None (used by CLI interface)
                 pass
             log_query_info("ss", p, f, colls_to_search, results_final_nb_total)
 
     # External searches
     if of.startswith("h"):
         if not of in ['hcs']:
             perform_external_collection_search(req, cc, [p, p1, p2, p3], f, ec, verbose, ln, selected_external_collections_infos)
 
     return page_end(req, of, ln)
 
 def perform_request_cache(req, action="show"):
     """Manipulates the search engine cache."""
     global search_cache
     global collection_reclist_cache
     global collection_reclist_cache_timestamp
     global field_i18nname_cache
     global field_i18nname_cache_timestamp
     global collection_i18nname_cache
     global collection_i18nname_cache_timestamp
     req.content_type = "text/html"
     req.send_http_header()
     out = ""
     out += "<h1>Search Cache</h1>"
     # clear cache if requested:
     if action == "clear":
         search_cache = {}
         collection_reclist_cache = create_collection_reclist_cache()
     # show collection reclist cache:
     out += "<h3>Collection reclist cache</h3>"
     out += "- collection table last updated: %s" % get_table_update_time('collection')
     out += "<br />- reclist cache timestamp: %s" % collection_reclist_cache_timestamp
     out += "<br />- reclist cache contents:"
     out += "<blockquote>"
     for coll in collection_reclist_cache.keys():
         if collection_reclist_cache[coll]:
             out += "%s (%d)<br />" % (coll, len(get_collection_reclist(coll)))
     out += "</blockquote>"
     # show search cache:
     out += "<h3>Search Cache</h3>"
     out += "<blockquote>"
     if len(search_cache):
         out += """<table border="=">"""
         out += "<tr><td><strong>%s</strong></td><td><strong>%s</strong></td><td><strong>%s</strong></td><td><strong>%s</strong></td></tr>" % \
                ("Pattern", "Field", "Collection", "Number of Hits")
         for search_cache_key in search_cache.keys():
             p, f, c = string.split(search_cache_key, "@", 2)
             # find out about length of cached data:
             l = 0
             for coll in search_cache[search_cache_key]:
                 l += len(search_cache[search_cache_key][coll])
             out += "<tr><td>%s</td><td>%s</td><td>%s</td><td>%d</td></tr>" % (p, f, c, l)
         out += "</table>"
     else:
         out += "<p>Search cache is empty."
     out += "</blockquote>"
     out += """<p><a href="%s/search/cache?action=clear">clear cache</a>""" % CFG_SITE_URL
     # show field i18nname cache:
     out += "<h3>Field I18N names cache</h3>"
     out += "- fieldname table last updated: %s" % get_table_update_time('fieldname')
     out += "<br />- i18nname cache timestamp: %s" % field_i18nname_cache_timestamp
     out += "<br />- i18nname cache contents:"
     out += "<blockquote>"
     for field in field_i18nname_cache.keys():
         for ln in field_i18nname_cache[field].keys():
             out += "%s, %s = %s<br />" % (field, ln, field_i18nname_cache[field][ln])
     out += "</blockquote>"
     # show collection i18nname cache:
     out += "<h3>Collection I18N names cache</h3>"
     out += "- collectionname table last updated: %s" % get_table_update_time('collectionname')
     out += "<br />- i18nname cache timestamp: %s" % collection_i18nname_cache_timestamp
     out += "<br />- i18nname cache contents:"
     out += "<blockquote>"
     for coll in collection_i18nname_cache.keys():
         for ln in collection_i18nname_cache[coll].keys():
             out += "%s, %s = %s<br />" % (coll, ln, collection_i18nname_cache[coll][ln])
     out += "</blockquote>"
     req.write("<html>")
     req.write(out)
     req.write("</html>")
     return "\n"
 
 def perform_request_log(req, date=""):
     """Display search log information for given date."""
     req.content_type = "text/html"
     req.send_http_header()
     req.write("<html>")
     req.write("<h1>Search Log</h1>")
     if date: # case A: display stats for a day
         yyyymmdd = string.atoi(date)
         req.write("<p><big><strong>Date: %d</strong></big><p>" % yyyymmdd)
         req.write("""<table border="1">""")
         req.write("<tr><td><strong>%s</strong></td><td><strong>%s</strong></td><td><strong>%s</strong></td><td><strong>%s</strong></td><td><strong>%s</strong></td><td><strong>%s</strong></td></tr>" % ("No.", "Time", "Pattern", "Field", "Collection", "Number of Hits"))
         # read file:
         p = os.popen("grep ^%d %s/search.log" % (yyyymmdd, CFG_LOGDIR), 'r')
         lines = p.readlines()
         p.close()
         # process lines:
         i = 0
         for line in lines:
             try:
                 datetime, as, p, f, c, nbhits = string.split(line,"#")
                 i += 1
                 req.write("<tr><td align=\"right\">#%d</td><td>%s:%s:%s</td><td>%s</td><td>%s</td><td>%s</td><td>%s</td></tr>" \
                           % (i, datetime[8:10], datetime[10:12], datetime[12:], p, f, c, nbhits))
             except:
                 pass # ignore eventual wrong log lines
         req.write("</table>")
     else: # case B: display summary stats per day
         yyyymm01 = int(time.strftime("%Y%m01", time.localtime()))
         yyyymmdd = int(time.strftime("%Y%m%d", time.localtime()))
         req.write("""<table border="1">""")
         req.write("<tr><td><strong>%s</strong></td><td><strong>%s</strong></tr>" % ("Day", "Number of Queries"))
         for day in range(yyyymm01, yyyymmdd + 1):
             p = os.popen("grep -c ^%d %s/search.log" % (day, CFG_LOGDIR), 'r')
             for line in p.readlines():
                 req.write("""<tr><td>%s</td><td align="right"><a href="%s/search/log?date=%d">%s</a></td></tr>""" % \
                           (day, CFG_SITE_URL, day, line))
             p.close()
         req.write("</table>")
     req.write("</html>")
     return "\n"
 
 
 def get_values_for_code_dict(recids, tag):
     """ gets values of tag for records, puts them in dictionary that contains their frequency"""
     valuefreqdict = {}
     for recid in recids:
         vals = get_fieldvalues(recid, tag)
         for v in vals:
             if valuefreqdict.has_key(v):
                 valuefreqdict[v] = valuefreqdict[v]+1
             else:
                 valuefreqdict[v] = 1
     return valuefreqdict
 
 def get_most_popular_values_for_code(recids, tag):
     """returns a sorted tuple list of the popular values for a given tag"""
     valuefreqdict = get_values_for_code_dict(recids, tag)
     tmppairs = []
     for k,v in valuefreqdict.items():
         tmppairs.append((v,k))
     tmppairs.sort()
     tmppairs.reverse()
     #take only the keys and return them
     sortedvalues = []
     for (v,k) in tmppairs:
         sortedvalues.append(k)
     return sortedvalues
 
 def get_most_popular_field_values(recids, tags, exclude_values=None, count_repetitive_values=True):
     """
     Analyze RECIDS and look for TAGS and return most popular values
     and the frequency with which they occur sorted according to
     descending frequency.
 
     If a value is found in EXCLUDE_VALUES, then do not count it.
 
     If COUNT_REPETITIVE_VALUES is True, then we could every occurrence
     of value in the tags.  If False, then we could the value only once
     regardless of the number of times it may appear in a record.
 
     Example:
      >>> get_most_popular_field_values(range(11,20), '980__a')
      (('PREPRINT', 10), ('THESIS', 7), ...)
      >>> get_most_popular_field_values(range(11,20), ('100__a', '700__a'))
      (('Ellis, J', 10), ('Ellis, N', 7), ...)
      >>> get_most_popular_field_values(range(11,20), ('100__a', '700__a'), ('Ellis, J'))
      (('Ellis, N', 7), ...)
     """
 
     def _get_most_popular_field_values_helper_sorter(val1, val2):
         "Compare VAL1 and VAL2 according to, firstly, frequency, then secondly, alphabetically."
         compared_via_frequencies = cmp(valuefreqdict[val2], valuefreqdict[val1])
         if compared_via_frequencies == 0:
             return cmp(val1.lower(), val2.lower())
         else:
             return compared_via_frequencies
 
     valuefreqdict = {}
     # sanity check:
     if isinstance(tags, str):
         tags = (tags,)
     # find values and their frequencies:
     for recid in recids:
         vals_in_rec = []
         for tag in tags:
             for val in get_fieldvalues(recid, tag):
                 vals_in_rec.append(val)
         if not count_repetitive_values:
             # do not count repetitive values
             dtmp = {}
             for val in vals_in_rec:
                 dtmp[val] = 1
             vals_in_rec = dtmp.keys()
         for val in vals_in_rec:
             if (not exclude_values) or \
                (exclude_values and val not in exclude_values):
                 if valuefreqdict.has_key(val):
                     valuefreqdict[val] += 1
                 else:
                     valuefreqdict[val] = 1
     # sort by descending frequency of values:
     out = ()
     vals = valuefreqdict.keys()
     vals.sort(_get_most_popular_field_values_helper_sorter)
     for val in vals:
         out += (val, valuefreqdict[val]),
     return out
 
 def profile(p="", f="", c=CFG_SITE_NAME):
     """Profile search time."""
     import profile
     import pstats
     profile.run("perform_request_search(p='%s',f='%s', c='%s')" % (p, f, c), "perform_request_search_profile")
     p = pstats.Stats("perform_request_search_profile")
     p.strip_dirs().sort_stats("cumulative").print_stats()
     return 0
 
 ## test cases:
 #print wash_colls(CFG_SITE_NAME,"Library Catalogue", 0)
 #print wash_colls("Periodicals & Progress Reports",["Periodicals","Progress Reports"], 0)
 #print wash_field("wau")
 #print print_record(20,"tm","001,245")
 #print create_opft_search_units(None, "PHE-87-13","reportnumber")
 #print ":"+wash_pattern("* and % doo * %")+":\n"
 #print ":"+wash_pattern("*")+":\n"
 #print ":"+wash_pattern("ellis* ell* e*%")+":\n"
 #print run_sql("SELECT name,dbquery from collection")
 #print get_index_id("author")
 #print get_coll_ancestors("Theses")
 #print get_coll_sons("Articles & Preprints")
 #print get_coll_real_descendants("Articles & Preprints")
 #print get_collection_reclist("Theses")
 #print log(sys.stdin)
 #print search_unit_in_bibrec('2002-12-01','2002-12-12')
 #print type(wash_url_argument("-1",'int'))
 #print get_nearest_terms_in_bibxxx("ellis", "author", 5, 5)
 #print call_bibformat(68, "HB_FLY")
 #print create_collection_i18nname_cache()
 #print get_fieldvalues(10, "980__a")
 #print get_fieldvalues_alephseq_like(10,"001___")
 #print get_fieldvalues_alephseq_like(10,"980__a")
 #print get_fieldvalues_alephseq_like(10,"foo")
 #print get_fieldvalues_alephseq_like(10,"-1")
 #print get_fieldvalues_alephseq_like(10,"99")
 #print get_fieldvalues_alephseq_like(10,["001", "980"])
 
 ## profiling:
 #profile("of the this")
 #print perform_request_search(p="ellis")
diff --git a/modules/websearch/lib/search_engine_summarizer.py b/modules/websearch/lib/search_engine_summarizer.py
index 4d2c5fe33..ed41e784c 100644
--- a/modules/websearch/lib/search_engine_summarizer.py
+++ b/modules/websearch/lib/search_engine_summarizer.py
@@ -1,172 +1,172 @@
 # -*- coding: utf-8 -*-
 
 ## This file is part of CDS Invenio.
 ## Copyright (C) 2002, 2003, 2004, 2005, 2006, 2007, 2008 CERN.
 ##
 ## CDS Invenio is free software; you can redistribute it and/or
 ## modify it under the terms of the GNU General Public License as
 ## published by the Free Software Foundation; either version 2 of the
 ## License, or (at your option) any later version.
 ##
 ## CDS Invenio is distributed in the hope that it will be useful, but
 ## WITHOUT ANY WARRANTY; without even the implied warranty of
 ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 ## General Public License for more details.
 ##
 ## You should have received a copy of the GNU General Public License
 ## along with CDS Invenio; if not, write to the Free Software Foundation, Inc.,
 ## 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA.
 
 """
 Search Engine Summarizer, producing summary formats such as citesummary.
 The main API is summarize_records().
 """
 
 __lastupdated__ = """$Date$"""
 
 __revision__ = "$Id$"
 
 from invenio.config import CFG_INSPIRE_SITE
 from invenio.bibrank_citation_searcher import get_cited_by_list
 import search_engine
 import invenio.template
 websearch_templates = invenio.template.load('websearch')
 
 ## CFG_CITESUMMARY_COLLECTIONS -- how do we break down cite summary
 ## results according to collections?
 if CFG_INSPIRE_SITE:
     CFG_CITESUMMARY_COLLECTIONS = [['All papers', 'collection:citeable'],
                                    ['Published only', 'collection:citeable collection:published']]
 else:
     CFG_CITESUMMARY_COLLECTIONS = [['All papers', ''],
                                    ['Published only', 'collection:article']]
 
 ## CFG_CITESUMMARY_FAME_THRESHOLDS -- how do we break down cite
 ## summary results into famous and less famous paper groups?
 CFG_CITESUMMARY_FAME_THRESHOLDS = [
                                    (500, 1000000, 'Renowned papers (500+)'),
                                    (250, 499, 'Famous papers (250-499)'),
                                    (100, 249, 'Very well-known papers (100-249)'),
                                    (50, 99, 'Well-known papers (50-99)'),
                                    (10, 49, 'Known papers (10-49)'),
                                    (1, 9, 'Less known papers (1-9)'),
                                    (0, 0, 'Unknown papers (0)')
                                    ]
 
 def summarize_records(recids, of, ln, searchpattern="", searchfield="", req=None):
     """Write summary report for records RECIDS in the format OF in language LN.
        SEARCHPATTERN and SEARCHFIELD are search query that led to RECIDS,
        for instance p='Smith, Paul' and f='author'.  They are used for links.
        REQ is the Apache/mod_python request object.
     """
-
+    import search_engine
     if of == 'hcs':
         # this is HTML cite summary
 
         # 1) hcs prologue:
         d_recids = {}
         d_total_recs = {}
         for coll, colldef in CFG_CITESUMMARY_COLLECTIONS:
             if not colldef:
                 d_recids[coll] = recids
             else:
                 d_recids[coll] = recids & search_engine.search_pattern(p=colldef)
             d_total_recs[coll] = len(d_recids[coll])
         req.write(websearch_templates.tmpl_citesummary_prologue(d_total_recs, CFG_CITESUMMARY_COLLECTIONS, searchpattern, searchfield, ln))
 
         # 2) hcs overview:
         d_recid_citers = {}
         d_total_cites = {}
         d_avg_cites = {}
         for coll, colldef in CFG_CITESUMMARY_COLLECTIONS:
             d_total_cites[coll] = 0
             d_avg_cites[coll] = 0
             d_recid_citers[coll] =  get_cited_by_list(d_recids[coll])
             for recid, lciters in d_recid_citers[coll]:
                 if lciters:
                     d_total_cites[coll] += len(lciters)
             if d_total_cites[coll] != 0:
                 d_avg_cites[coll] = d_total_cites[coll] * 1.0 / d_total_recs[coll]
         req.write(websearch_templates.tmpl_citesummary_overview(d_total_cites, d_avg_cites, CFG_CITESUMMARY_COLLECTIONS, ln))
 
         # 3) hcs break down by fame:
         for low, high, fame in CFG_CITESUMMARY_FAME_THRESHOLDS:
             d_cites = {}
             for coll, colldef in CFG_CITESUMMARY_COLLECTIONS:
                 d_cites[coll] = 0
                 for recid, lciters in d_recid_citers[coll]:
                     numcites = 0
                     if lciters:
                         numcites = len(lciters)
                     if numcites >= low and numcites <= high:
                         d_cites[coll] += 1
             req.write(websearch_templates.tmpl_citesummary_breakdown_by_fame(d_cites, low, high, fame, CFG_CITESUMMARY_COLLECTIONS, searchpattern, searchfield, ln))
 
         # 4) hcs epilogue:
         req.write(websearch_templates.tmpl_citesummary_epilogue(ln))
         return ''
 
     elif of == 'xcs':
         # this is XML cite summary
         citedbylist = get_cited_by_list(recids)
         return print_citation_summary_xml(citedbylist)
 
 #for citation summary, code xcs/hcs (unless changed)
 def print_citation_summary_xml(citedbylist):
     """Prints citation summary in xml."""
     alldict = calculate_citations(citedbylist)
     avgstr = str(alldict['avgcites'])
     totalcites = str(alldict['totalcites'])
     #format avg so that it does not span 10 digits
     avgstr = avgstr[0:4]
     reciddict = alldict['reciddict']
     #output formatting
     outp = "<citationsummary records=\""+str(len(citedbylist))
     outp += "\" citations=\""+str(totalcites)+"\">"
     for low, high, name in CFG_CITESUMMARY_FAME_THRESHOLDS:
         #get the name, print the value
         if reciddict.has_key(name):
             recs = reciddict[name]
             outp += "<citationclass>"+name
             outp += "<records>"+str(recs)+"</records>"
             outp += "</citationclass>\n"
     outp = outp + "</citationsummary>"
     #req.write(outp)
     return outp #just to return something
 
 def calculate_citations(citedbylist):
     """calculates records in classes of citations
        defined by thresholds. returns a dictionary that
        contains total, avg, records and a dictionary
        of threshold names and number corresponding to it"""
     totalcites = 0
     avgcites = 0
     reciddict = {}
     for recid, cites in citedbylist:
         numcites = 0
         if cites:
             numcites = len(cites)
         totalcites = totalcites + numcites
         #take the numbers in CFG_CITESUMMARY_FAME_THRESHOLDS
         for low, high, name in CFG_CITESUMMARY_FAME_THRESHOLDS:
             if (numcites >= low) and (numcites <= high):
                 if reciddict.has_key(name):
                     tmp = reciddict[name]
                     tmp.append(recid)
                     reciddict[name] = tmp
                 else:
                     reciddict[name] = [recid]
     if (len(citedbylist) == 0):
         avgcites = 0
     else:
         avgcites = totalcites*1.0/len(citedbylist)
 
     #create a dictionary that contains all the values
     alldict = {}
     alldict['records'] = len(citedbylist)
     alldict['totalcites'] = totalcites
     alldict['avgcites'] = avgcites
     alldict['reciddict'] = reciddict
     return alldict
 
 
diff --git a/modules/websubmit/lib/bibdocfile.py b/modules/websubmit/lib/bibdocfile.py
index 8ccffd64b..cd5583cd2 100644
--- a/modules/websubmit/lib/bibdocfile.py
+++ b/modules/websubmit/lib/bibdocfile.py
@@ -1,2501 +1,2501 @@
 ## This file is part of CDS Invenio.
 ## Copyright (C) 2002, 2003, 2004, 2005, 2006, 2007, 2008 CERN.
 ##
 ## CDS Invenio is free software; you can redistribute it and/or
 ## modify it under the terms of the GNU General Public License as
 ## published by the Free Software Foundation; either version 2 of the
 ## License, or (at your option) any later version.
 ##
 ## CDS Invenio is distributed in the hope that it will be useful, but
 ## WITHOUT ANY WARRANTY; without even the implied warranty of
 ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 ## General Public License for more details.
 ##
 ## You should have received a copy of the GNU General Public License
 ## along with CDS Invenio; if not, write to the Free Software Foundation, Inc.,
 ## 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA.
 
 __revision__ = "$Id$"
 
 import os
 import re
 import shutil
 import md5
 import filecmp
 import time
 import random
 import socket
 import urllib2
 import urllib
 import tempfile
 import cPickle
 import base64
 import binascii
 import cgi
 
 try:
     import magic
     CFG_HAS_MAGIC = True
 except ImportError:
     CFG_HAS_MAGIC = False
 from datetime import datetime
 from mimetypes import MimeTypes
 from thread import get_ident
 
 try:
     from mod_python import apache
 except ImportError:
     pass
 
 ## Let's set a reasonable timeout for URL request (e.g. FFT)
 socket.setdefaulttimeout(40)
 
 try:
     set
 except NameError:
     from sets import Set as set
 
 from invenio.shellutils import escape_shell_arg
 from invenio.dbquery import run_sql, DatabaseError, blob_to_string
 from invenio.errorlib import register_exception
-from invenio.bibrecord import create_record, record_get_field_instances, \
+from invenio.bibrecord import record_get_field_instances, \
     field_get_subfield_values, field_get_subfield_instances, \
     encode_for_xml
 from invenio.access_control_engine import acc_authorize_action
 from invenio.config import CFG_SITE_LANG, CFG_SITE_URL, \
     CFG_WEBDIR, CFG_WEBSUBMIT_FILEDIR,\
     CFG_WEBSUBMIT_ADDITIONAL_KNOWN_FILE_EXTENSIONS, \
     CFG_WEBSUBMIT_FILESYSTEM_BIBDOC_GROUP_LIMIT, CFG_SITE_SECURE_URL, \
     CFG_BIBUPLOAD_FFT_ALLOWED_LOCAL_PATHS, \
     CFG_TMPDIR, CFG_PATH_MD5SUM, \
     CFG_WEBSUBMIT_STORAGEDIR
 from invenio.bibformat import format_record
 import invenio.template
 websubmit_templates = invenio.template.load('websubmit')
 websearch_templates = invenio.template.load('websearch')
 
 CFG_BIBDOCFILE_MD5_THRESHOLD = 256 * 1024
 CFG_BIBDOCFILE_MD5_BUFFER = 1024 * 1024
 CFG_BIBDOCFILE_STRONG_FORMAT_NORMALIZATION = False
 
 KEEP_OLD_VALUE = 'KEEP-OLD-VALUE'
 _mimes = MimeTypes(strict=False)
 _mimes.suffix_map.update({'.tbz2' : '.tar.bz2'})
 _mimes.encodings_map.update({'.bz2' : 'bzip2'})
 
 _magic_cookies = {}
 def get_magic_cookies():
     """Return a tuple of magic object.
     ... not real magic. Just see: man file(1)"""
     thread_id = get_ident()
     if thread_id not in _magic_cookies:
         _magic_cookies[thread_id] = {
             magic.MAGIC_NONE : magic.open(magic.MAGIC_NONE),
             magic.MAGIC_COMPRESS : magic.open(magic.MAGIC_COMPRESS),
             magic.MAGIC_MIME : magic.open(magic.MAGIC_MIME),
             magic.MAGIC_COMPRESS + magic.MAGIC_MIME : magic.open(magic.MAGIC_COMPRESS + magic.MAGIC_MIME)
         }
         for key in _magic_cookies[thread_id].keys():
             _magic_cookies[thread_id][key].load()
     return _magic_cookies[thread_id]
 
 def _generate_extensions():
     _tmp_extensions = _mimes.encodings_map.keys() + \
                 _mimes.suffix_map.keys() + \
                 _mimes.types_map[1].keys() + \
                 CFG_WEBSUBMIT_ADDITIONAL_KNOWN_FILE_EXTENSIONS
     _extensions = []
     for ext in _tmp_extensions:
         if ext.startswith('.'):
             _extensions.append(ext)
         else:
             _extensions.append('.' + ext)
     _extensions.sort()
     _extensions.reverse()
     _extensions = set([ext.lower() for ext in _extensions])
     return _extensions
 
 _extensions = _generate_extensions()
 
 
 class InvenioWebSubmitFileError(Exception):
     pass
 
 def file_strip_ext(afile, skip_version=False):
     """Strip in the best way the extension from a filename"""
     if skip_version:
         afile = afile.split(';')[0]
     lowfile = afile.lower()
     ext = '.'
     while ext:
         ext = ''
         for c_ext in _extensions:
             if lowfile.endswith(c_ext):
                 lowfile = lowfile[0:-len(c_ext)]
                 ext = c_ext
                 break
     return afile[:len(lowfile)]
 
 def normalize_format(format):
     """Normalize the format."""
     if format and format[0] != '.':
         format = '.' + format
     if CFG_BIBDOCFILE_STRONG_FORMAT_NORMALIZATION:
         if format not in ('.Z', '.H', '.C', '.CC'):
             format = format.lower()
         format = format.replace('.jpg', '.jpeg')
     return format
 
 _docname_re = re.compile(r'[^-\w.]*')
 def normalize_docname(docname):
     """Normalize the docname (only digit and alphabetic letters and underscore are allowed)"""
     #return _docname_re.sub('', docname)
     return docname
 
 def normalize_version(version):
     """Normalize the version."""
     try:
         int(version)
     except ValueError:
         if version.lower().strip() == 'all':
             return 'all'
         else:
             return ''
     return str(version)
 
 def decompose_file(afile, skip_version=False):
     """Decompose a file into dirname, basename and extension.
     Note that if provided with a URL, the scheme in front will be part
     of the dirname."""
     if skip_version:
         version = afile.split(';')[-1]
         try:
             int(version)
             afile = afile[:-len(version)-1]
         except ValueError:
             pass
     basename = os.path.basename(afile)
     dirname = afile[:-len(basename)-1]
     base = file_strip_ext(basename)
     extension = basename[len(base) + 1:]
     if extension:
         extension = '.' + extension
     return (dirname, base, extension)
 
 def decompose_file_with_version(afile):
     """Decompose a file into dirname, basename, extension and version.
     In case version does not exist it will raise ValueError.
     Note that if provided with a URL, the scheme in front will be part
     of the dirname."""
     version_str = afile.split(';')[-1]
     version = int(version_str)
     afile = afile[:-len(version_str)-1]
     basename = os.path.basename(afile)
     dirname = afile[:-len(basename)-1]
     base = file_strip_ext(basename)
     extension = basename[len(base) + 1:]
     if extension:
         extension = '.' + extension
     return (dirname, base, extension, version)
 
 
 def propose_next_docname(docname):
     """Propose a next docname docname"""
     if '_' in docname:
         split_docname = docname.split('_')
         try:
             split_docname[-1] = str(int(split_docname[-1]) + 1)
             docname = '_'.join(split_docname)
         except ValueError:
             docname += '_1'
     else:
         docname += '_1'
     return docname
 
 class BibRecDocs:
     """this class represents all the files attached to one record"""
     def __init__(self, recid, deleted_too=False):
         self.id = recid
         self.deleted_too = deleted_too
         self.bibdocs = []
         self.build_bibdoc_list()
 
     def __repr__(self):
         if self.deleted_too:
             return 'BibRecDocs(%s, True)' % self.id
         else:
             return 'BibRecDocs(%s)' % self.id
 
     def __str__(self):
         out = '%i::::total bibdocs attached=%i\n' % (self.id, len(self.bibdocs))
         out += '%i::::total size latest version=%s\n' % (self.id, nice_size(self.get_total_size_latest_version()))
         out += '%i::::total size all files=%s\n' % (self.id, nice_size(self.get_total_size()))
         for bibdoc in self.bibdocs:
             out += str(bibdoc)
         return out
 
     def empty_p(self):
         """Return True if the bibrec is empty, i.e. it has no bibdocs
         connected."""
         return len(self.bibdocs) == 0
 
     def deleted_p(self):
         """Return True if the bibrec has been deleted."""
         from invenio.search_engine import record_exists
         return record_exists(self.id) == -1
 
     def get_xml_8564(self):
         """Return a snippet of XML representing the 8564 corresponding to the
         current state"""
+        from invenio.search_engine import get_record
         out = ''
-        xml = format_record(self.id, of='xm')
-        record = create_record(xml)[0]
+        record = get_record(self.id)
         fields = record_get_field_instances(record, '856', '4', ' ')
         for field in fields:
             url = field_get_subfield_values(field, 'u')
             if not bibdocfile_url_p(url):
                 out += '\t<datafield tag="856" ind1="4" ind2=" ">\n'
                 for subfield, value in field_get_subfield_instances(field):
                     out += '\t\t<subfield code="%s">%s</subfield>\n' % (subfield, encode_for_xml(value))
                 out += '\t</datafield>\n'
 
         for afile in self.list_latest_files():
             out += '\t<datafield tag="856" ind1="4" ind2=" ">\n'
             url = afile.get_url()
             description = afile.get_description()
             comment = afile.get_comment()
             if url:
                 out += '\t\t<subfield code="u">%s</subfield>\n' % encode_for_xml(url)
             if description:
                 out += '\t\t<subfield code="y">%s</subfield>\n' % encode_for_xml(description)
             if comment:
                 out += '\t\t<subfield code="z">%s</subfield>\n' % encode_for_xml(comment)
             out += '\t</datafield>\n'
 
         for bibdoc in self.bibdocs:
             icon = bibdoc.get_icon()
             if icon:
                 icon = icon.list_all_files()
                 if icon:
                     out += '\t<datafield tag="856" ind1="4" ind2=" ">\n'
                     out += '\t\t<subfield code="q">%s</subfield>\n' % encode_for_xml(icon[0].get_url())
                     out += '\t\t<subfield code="x">icon</subfield>\n'
                     out += '\t</datafield>\n'
 
         return out
 
     def get_total_size_latest_version(self):
         """Return the total size used on disk of all the files belonging
         to this record and corresponding to the latest version."""
         size = 0
         for bibdoc in self.bibdocs:
             size += bibdoc.get_total_size_latest_version()
         return size
 
     def get_total_size(self):
         """Return the total size used on disk of all the files belonging
         to this record of any version."""
         size = 0
         for bibdoc in self.bibdocs:
             size += bibdoc.get_total_size()
         return size
 
     def build_bibdoc_list(self):
         """This function must be called everytime a bibdoc connected to this
         recid is added, removed or modified.
         """
         self.bibdocs = []
         if self.deleted_too:
             res = run_sql("""SELECT id_bibdoc, type FROM bibrec_bibdoc JOIN
                          bibdoc ON id=id_bibdoc WHERE id_bibrec=%s
                          ORDER BY docname ASC""", (self.id,))
         else:
             res = run_sql("""SELECT id_bibdoc, type FROM bibrec_bibdoc JOIN
                          bibdoc ON id=id_bibdoc WHERE id_bibrec=%s AND
                          status<>'DELETED' ORDER BY docname ASC""", (self.id,))
         for row in res:
             cur_doc = BibDoc(docid=row[0], recid=self.id, doctype=row[1])
             self.bibdocs.append(cur_doc)
 
     def list_bibdocs(self, doctype=''):
         """Returns the list all bibdocs object belonging to a recid.
         If doctype is set, it returns just the bibdocs of that doctype.
         """
         if not doctype:
             return self.bibdocs
         else:
             return [bibdoc for bibdoc in self.bibdocs if doctype == bibdoc.doctype]
 
     def get_bibdoc_names(self, doctype=''):
         """Returns the names of the files associated with the bibdoc of a
         paritcular doctype"""
         return [bibdoc.docname for bibdoc in self.list_bibdocs(doctype)]
 
     def check_file_exists(self, path):
         """Returns 1 if the recid has a file identical to the one stored in path."""
         size = os.path.getsize(path)
 
         # Let's consider all the latest files
         files = self.list_latest_files()
 
         # Let's consider all the latest files with same size
         potential = [afile for afile in files if afile.get_size() == size]
 
         if potential:
             checksum = calculate_md5(path)
 
             # Let's consider all the latest files with the same size and the
             # same checksum
             potential = [afile for afile in potential if afile.get_checksum() == checksum]
 
             if potential:
                 potential = [afile for afile in potential if filecmp.cmp(afile.get_full_path(), path)]
 
                 if potential:
                     return True
                 else:
                     # Gosh! How unlucky, same size, same checksum but not same
                     # content!
                     pass
         return False
 
     def propose_unique_docname(self, docname):
         """Propose a unique docname."""
         docname = normalize_docname(docname)
         goodname = docname
         i = 1
         while goodname in self.get_bibdoc_names():
             i += 1
             goodname = "%s_%s" % (docname, i)
         return goodname
 
     def merge_bibdocs(self, docname1, docname2):
         """This method merge docname2 into docname1.
         Given all the formats of the latest version of docname2 the files
         are added as new formats into docname1.
         Docname2 is marked as deleted.
         This method fails if at least one format in docname2 already exists
         in docname1. (In this case the two bibdocs are preserved)
         Comments and descriptions are also copied and if docname2 has an icon
         and docname1 has not, the icon is imported.
         If docname2 has a restriction(status) and docname1 has not the
         restriction is imported."""
 
         bibdoc1 = self.get_bibdoc(docname1)
         bibdoc2 = self.get_bibdoc(docname2)
 
         ## Check for possibility
         for bibdocfile in bibdoc2.list_latest_files():
             format = bibdocfile.get_format()
             if bibdoc1.format_already_exists_p(format):
                 raise InvenioWebSubmitFileError('Format %s already exists in bibdoc %s of record %s. It\'s impossible to merge bibdoc %s into it.' % (format, docname1, self.id, docname2))
 
         ## Importing Icon if needed.
         icon1 = bibdoc1.get_icon()
         icon2 = bibdoc2.get_icon()
         if icon2 is not None and icon1 is None:
             icon = icon2.list_latest_files()[0]
             bibdoc1.add_icon(icon.get_full_path(), format=icon.get_format())
 
         ## Importing restriction if needed.
         restriction1 = bibdoc1.get_status()
         restriction2 = bibdoc2.get_status()
         if restriction2 and not restriction1:
             bibdoc1.set_status(restriction2)
 
         ## Importing formats
         for bibdocfile in bibdoc2.list_latest_files():
             format = bibdocfile.get_format()
             comment = bibdocfile.get_comment()
             description = bibdocfile.get_description()
             bibdoc1.add_file_new_format(bibdocfile.get_full_path(), description=description, comment=comment, format=format)
 
         ## Finally deleting old bibdoc2
         bibdoc2.delete()
         self.build_bibdoc_list()
 
     def get_docid(self, docname):
         """Returns the docid corresponding to the given docname, if the docname
         is valid.
         """
         for bibdoc in self.bibdocs:
             if bibdoc.docname == docname:
                 return bibdoc.id
         raise InvenioWebSubmitFileError, "Recid '%s' is not connected with a " \
             "docname '%s'" % (self.id, docname)
 
     def get_docname(self, docid):
         """Returns the docname corresponding to the given docid, if the docid
         is valid.
         """
         for bibdoc in self.bibdocs:
             if bibdoc.id == docid:
                 return bibdoc.docname
         raise InvenioWebSubmitFileError, "Recid '%s' is not connected with a " \
             "docid '%s'" % (self.id, docid)
 
     def has_docname_p(self, docname):
         """Return True if a bibdoc with a particular docname belong to this
         record."""
         for bibdoc in self.bibdocs:
             if bibdoc.docname == docname:
                 return True
         return False
 
     def get_bibdoc(self, docname):
         """Returns the bibdoc with a particular docname associated with
         this recid"""
         for bibdoc in self.bibdocs:
             if bibdoc.docname == docname:
                 return bibdoc
         raise InvenioWebSubmitFileError, "Recid '%s' is not connected with " \
             " docname '%s'" % (self.id, docname)
 
     def delete_bibdoc(self, docname):
         """Deletes a docname associated with the recid."""
         for bibdoc in self.bibdocs:
             if bibdoc.docname == docname:
                 bibdoc.delete()
         self.build_bibdoc_list()
 
     def add_bibdoc(self, doctype="Main", docname='file', never_fail=False):
         """Creates a new bibdoc associated with the recid, with a file
         called docname and a particular doctype. It returns the bibdoc object
         which was just created.
         If never_fail is True then the system will always be able
         to create a bibdoc.
         """
         try:
             docname = normalize_docname(docname)
             if never_fail:
                 docname = self.propose_unique_docname(docname)
             if docname in self.get_bibdoc_names():
                 raise InvenioWebSubmitFileError, "%s has already a bibdoc with docname %s" % (self.id, docname)
             else:
                 bibdoc = BibDoc(recid=self.id, doctype=doctype, docname=docname)
                 self.build_bibdoc_list()
                 return bibdoc
         except Exception, e:
             register_exception()
             raise InvenioWebSubmitFileError(str(e))
 
     def add_new_file(self, fullpath, doctype="Main", docname=None, never_fail=False, description=None, comment=None, format=None):
         """Adds a new file with the following policy: if the docname is not set
         it is retrieved from the name of the file. If bibdoc with the given
         docname doesn't exist, it is created and the file is added to it.
         It it exist but it doesn't contain the format that is being added, the
         new format is added. If the format already exists then if never_fail
         is True a new bibdoc is created with a similar name but with a progressive
         number as a suffix and the file is added to it. The elaborated bibdoc
         is returned.
         """
         if docname is None:
             docname = decompose_file(fullpath)[1]
         if format is None:
             format = decompose_file(fullpath)[2]
         docname = normalize_docname(docname)
         try:
             bibdoc = self.get_bibdoc(docname)
         except InvenioWebSubmitFileError:
             # bibdoc doesn't already exists!
             bibdoc = self.add_bibdoc(doctype, docname, False)
             bibdoc.add_file_new_version(fullpath, description=description, comment=comment, format=format)
         else:
             try:
                 bibdoc.add_file_new_format(fullpath, description=description, comment=comment, format=format)
             except InvenioWebSubmitFileError, e:
                 # Format already exist!
                 if never_fail:
                     bibdoc = self.add_bibdoc(doctype, docname, True)
                     bibdoc.add_file_new_version(fullpath, description=description, comment=comment, format=format)
                 else:
                     raise e
         return bibdoc
 
     def add_new_version(self, fullpath, docname=None, description=None, comment=None, format=None, hide_previous_versions=False):
         """Adds a new fullpath file to an already existent docid making the
         previous files associated with the same bibdocids obsolete.
         It returns the bibdoc object.
         """
         if docname is None:
             docname = decompose_file(fullpath)[1]
         if format is None:
             format = decompose_file(fullpath)[2]
         bibdoc = self.get_bibdoc(docname=docname)
         bibdoc.add_file_new_version(fullpath, description=description, comment=comment, format=format, hide_previous_versions=hide_previous_versions)
         return bibdoc
 
     def add_new_format(self, fullpath, docname=None, description=None, comment=None, format=None):
         """Adds a new format for a fullpath file to an already existent
         docid along side already there files.
         It returns the bibdoc object.
         """
         if docname is None:
             docname = decompose_file(fullpath)[1]
         if format is None:
             format = decompose_file(fullpath)[2]
         bibdoc = self.get_bibdoc(docname=docname)
         bibdoc.add_file_new_format(fullpath, description=description, comment=comment, format=format)
         return bibdoc
 
     def list_latest_files(self, doctype=''):
         """Returns a list which is made up by all the latest docfile of every
         bibdoc (of a particular doctype).
         """
         docfiles = []
         for bibdoc in self.list_bibdocs(doctype):
             docfiles += bibdoc.list_latest_files()
         return docfiles
 
     def display(self, docname="", version="", doctype="", ln=CFG_SITE_LANG, verbose=0, display_hidden=True):
         """Returns a formatted panel with information and links about a given
         docid of a particular version (or any), of a particular doctype (or any)
         """
         t = ""
         if docname:
             try:
                 bibdocs = [self.get_bibdoc(docname)]
             except InvenioWebSubmitFileError:
                 bibdocs = self.list_bibdocs(doctype)
         else:
             bibdocs = self.list_bibdocs(doctype)
         if bibdocs:
             types = list_types_from_array(bibdocs)
             fulltypes = []
             for mytype in types:
                 fulltype = {
                             'name' : mytype,
                             'content' : [],
                            }
                 for bibdoc in bibdocs:
                     if mytype == bibdoc.get_type():
                         fulltype['content'].append(bibdoc.display(version,
                             ln=ln, display_hidden=display_hidden))
                 fulltypes.append(fulltype)
 
             if verbose >= 9:
                 verbose_files = str(self)
             else:
                 verbose_files = ''
 
             t = websubmit_templates.tmpl_bibrecdoc_filelist(
                   ln=ln,
                   types = fulltypes,
                   verbose_files=verbose_files
                 )
         return t
 
     def fix(self, docname):
         """Algorithm that transform an a broken/old bibdoc into a coherent one:
         i.e. the corresponding folder will have files named after the bibdoc
         name. Proper .recid, .type, .md5 files will be created/updated.
         In case of more than one file with the same format revision a new bibdoc
         will be created in order to put does files.
         Returns the list of newly created bibdocs if any.
         """
         bibdoc = self.get_bibdoc(docname)
         versions = {}
         res = []
         new_bibdocs = [] # List of files with the same version/format of
                         # existing file which need new bibdoc.
         counter = 0
         zero_version_bug = False
         if os.path.exists(bibdoc.basedir):
             for filename in os.listdir(bibdoc.basedir):
                 if filename[0] != '.' and ';' in filename:
                     name, version = filename.split(';')
                     try:
                         version = int(version)
                     except ValueError:
                         # Strange name
                         register_exception()
                         raise InvenioWebSubmitFileError, "A file called %s exists under %s. This is not a valid name. After the ';' there must be an integer representing the file revision. Please, manually fix this file either by renaming or by deleting it." % (filename, bibdoc.basedir)
                     if version == 0:
                         zero_version_bug = True
                     format = name[len(file_strip_ext(name)):]
                     format = normalize_format(format)
                     if not versions.has_key(version):
                         versions[version] = {}
                     new_name = 'FIXING-%s-%s' % (str(counter), name)
                     try:
                         shutil.move('%s/%s' % (bibdoc.basedir, filename), '%s/%s' % (bibdoc.basedir, new_name))
                     except Exception, e:
                         register_exception()
                         raise InvenioWebSubmitFileError, "Error in renaming '%s' to '%s': '%s'" % ('%s/%s' % (bibdoc.basedir, filename), '%s/%s' % (bibdoc.basedir, new_name), e)
                     if versions[version].has_key(format):
                         new_bibdocs.append((new_name, version))
                     else:
                         versions[version][format] = new_name
                     counter += 1
                 elif filename[0] != '.':
                     # Strange name
                     register_exception()
                     raise InvenioWebSubmitFileError, "A file called %s exists under %s. This is not a valid name. There should be a ';' followed by an integer representing the file revision. Please, manually fix this file either by renaming or by deleting it." % (filename, bibdoc.basedir)
         else:
             # we create the corresponding storage directory
             old_umask = os.umask(022)
             os.makedirs(bibdoc.basedir)
             # and save the father record id if it exists
             try:
                 if self.id != "":
                     recid_fd = open("%s/.recid" % bibdoc.basedir, "w")
                     recid_fd.write(str(self.id))
                     recid_fd.close()
                 if bibdoc.doctype != "":
                     type_fd = open("%s/.type" % bibdoc.basedir, "w")
                     type_fd.write(str(bibdoc.doctype))
                     type_fd.close()
             except Exception, e:
                 register_exception()
                 raise InvenioWebSubmitFileError, e
             os.umask(old_umask)
 
 
         if not versions:
             bibdoc.delete()
         else:
             for version, formats in versions.iteritems():
                 if zero_version_bug:
                     version += 1
                 for format, filename in formats.iteritems():
                     destination = '%s%s;%i' % (docname, format, version)
                     try:
                         shutil.move('%s/%s' % (bibdoc.basedir, filename), '%s/%s' % (bibdoc.basedir, destination))
                     except Exception, e:
                         register_exception()
                         raise InvenioWebSubmitFileError, "Error in renaming '%s' to '%s': '%s'" % ('%s/%s' % (bibdoc.basedir, filename), '%s/%s' % (bibdoc.basedir, destination), e)
 
             try:
                 recid_fd = open("%s/.recid" % bibdoc.basedir, "w")
                 recid_fd.write(str(self.id))
                 recid_fd.close()
                 type_fd = open("%s/.type" % bibdoc.basedir, "w")
                 type_fd.write(str(bibdoc.doctype))
                 type_fd.close()
             except Exception, e:
                 register_exception()
                 raise InvenioWebSubmitFileError, "Error in creating .recid and .type file for '%s' folder: '%s'" % (bibdoc.basedir, e)
 
             self.build_bibdoc_list()
 
             res = []
 
             for (filename, version) in new_bibdocs:
                 if zero_version_bug:
                     version += 1
                 new_bibdoc = self.add_bibdoc(doctype=bibdoc.doctype, docname=docname, never_fail=True)
                 new_bibdoc.add_file_new_format('%s/%s' % (bibdoc.basedir, filename), version)
                 res.append(new_bibdoc)
                 try:
                     os.remove('%s/%s' % (bibdoc.basedir, filename))
                 except Exception, e:
                     register_exception()
                     raise InvenioWebSubmitFileError, "Error in removing '%s': '%s'" % ('%s/%s' % (bibdoc.basedir, filename), e)
 
             Md5Folder(bibdoc.basedir).update(only_new=False)
         bibdoc._build_file_list()
         self.build_bibdoc_list()
 
         for bibdoc in self.bibdocs:
             if not run_sql('SELECT more_info FROM bibdoc WHERE id=%s', (bibdoc.id,)):
                 ## Import from MARC only if the bibdoc has never had
                 ## its more_info initialized.
                 try:
                     bibdoc.import_descriptions_and_comments_from_marc()
                 except Exception, e:
                     register_exception()
                     raise InvenioWebSubmitFileError, "Error in importing description and comment from %s for record %s: %s" % (repr(bibdoc), self.id, e)
         return res
 
     def check_format(self, docname):
         """In case CFG_WEBSUBMIT_ADDITIONAL_KNOWN_FILE_EXTENSIONS is
         altered or Python version changes, it might happen that a docname
         contains files which are no more docname + .format ; version, simply
         because the .format is now recognized (and it was not before, so
         it was contained into the docname).
         This algorithm verify if it is necessary to fix.
         Return True if format is correct. False if a fix is needed."""
         bibdoc = self.get_bibdoc(docname)
         correct_docname = decompose_file(docname)[1]
         if docname != correct_docname:
             return False
         for filename in os.listdir(bibdoc.basedir):
             if not filename.startswith('.'):
                 try:
                     dummy, dummy, format, version = decompose_file_with_version(filename)
                 except:
                     raise InvenioWebSubmitFileError('Incorrect filename "%s" for docname %s for recid %i' % (filename, docname, self.id))
                 if '%s%s;%i' % (correct_docname, format, version) != filename:
                     return False
         return True
 
     def check_duplicate_docnames(self):
         """Check wethever the record is connected with at least tho bibdoc
         with the same docname.
         Return True if everything is fine.
         """
         docnames = set()
         for docname in self.get_bibdoc_names():
             if docname in docnames:
                 return False
             else:
                 docnames.add(docname)
         return True
 
     def uniformize_bibdoc(self, docname):
         """This algorithm correct wrong file name belonging to a bibdoc."""
         bibdoc = self.get_bibdoc(docname)
         for filename in os.listdir(bibdoc.basedir):
             if not filename.startswith('.'):
                 try:
                     dummy, dummy, format, version = decompose_file_with_version(filename)
                 except ValueError:
                     register_exception(alert_admin=True, prefix= "Strange file '%s' is stored in %s" % (filename, bibdoc.basedir))
                 else:
                     os.rename(os.path.join(bibdoc.basedir, filename), os.path.join(bibdoc.basedir, '%s%s;%i' % (docname, format, version)))
         Md5Folder(bibdoc.basedir).update()
         bibdoc.touch()
         bibdoc._build_file_list('rename')
 
     def fix_format(self, docname, skip_check=False):
         """ Fixing this situation require
         different steps, because docname might already exists.
         This algorithm try to fix this situation.
         In case a merging is needed the algorithm return False if the merging
         is not possible.
         """
         if not skip_check:
             if self.check_format(docname):
                 return True
         bibdoc = self.get_bibdoc(docname)
         correct_docname = decompose_file(docname)[1]
         need_merge = False
         if correct_docname != docname:
             need_merge = self.has_docname_p(correct_docname)
             if need_merge:
                 proposed_docname = self.propose_unique_docname(correct_docname)
                 run_sql('UPDATE bibdoc SET docname=%s WHERE id=%s', (proposed_docname, bibdoc.id))
                 self.build_bibdoc_list()
                 self.uniformize_bibdoc(proposed_docname)
                 try:
                     self.merge_bibdocs(docname, proposed_docname)
                 except InvenioWebSubmitFileError:
                     return False
             else:
                 run_sql('UPDATE bibdoc SET docname=%s WHERE id=%s', (correct_docname, bibdoc.id))
                 self.build_bibdoc_list()
                 self.uniformize_bibdoc(correct_docname)
         else:
             self.uniformize_bibdoc(docname)
         return True
 
     def fix_duplicate_docnames(self, skip_check=False):
         """Algotirthm to fix duplicate docnames.
         If a record is connected with at least two bibdoc having the same
         docname, the algorithm will try to merge them.
         """
         if not skip_check:
             if self.check_duplicate_docnames():
                 return
         docnames = set()
         for bibdoc in self.list_bibdocs():
             docname = bibdoc.docname
             if docname in docnames:
                 new_docname = self.propose_unique_docname(bibdoc.docname)
                 bibdoc.change_name(new_docname)
                 self.merge_bibdocs(docname, new_docname)
             docnames.add(docname)
 
 class BibDoc:
     """this class represents one file attached to a record
         there is a one to one mapping between an instance of this class and
         an entry in the bibdoc db table"""
 
     def __init__ (self, docid="", recid="", docname="file", doctype="Main"):
         """Constructor of a bibdoc. At least the docid or the recid/docname
         pair is needed."""
         # docid is known, the document already exists
         docname = normalize_docname(docname)
         self.docfiles = []
         self.md5s = None
         self.related_files = []
         if docid != "":
             if recid == "":
                 recid = None
                 self.doctype = ""
                 res = run_sql("select id_bibrec,type from bibrec_bibdoc "
                     "where id_bibdoc=%s", (docid,))
                 if len(res) > 0:
                     recid = res[0][0]
                     self.doctype = res[0][1]
                 else:
                     res = run_sql("select id_bibdoc1 from bibdoc_bibdoc "
                                   "where id_bibdoc2=%s", (docid,))
                     if len(res) > 0 :
                         main_bibdoc = res[0][0]
                         res = run_sql("select id_bibrec,type from bibrec_bibdoc "
                                       "where id_bibdoc=%s", (main_bibdoc,))
                         if len(res) > 0:
                             recid = res[0][0]
                             self.doctype = res[0][1]
             else:
                 res = run_sql("select type from bibrec_bibdoc "
                     "where id_bibrec=%s and id_bibdoc=%s", (recid, docid,))
                 if len(res) > 0:
                     self.doctype = res[0][0]
                 else:
                     #this bibdoc isn't associated with the corresponding bibrec.
                     raise InvenioWebSubmitFileError, "No docid associated with the recid %s" % recid
             # gather the other information
             res = run_sql("select id,status,docname,creation_date,"
                 "modification_date,more_info from bibdoc where id=%s", (docid,))
             if len(res) > 0:
                 self.cd = res[0][3]
                 self.md = res[0][4]
                 self.recid = recid
                 self.docname = res[0][2]
                 self.id = docid
                 self.status = res[0][1]
                 self.more_info = BibDocMoreInfo(docid, blob_to_string(res[0][5]))
                 self.basedir = _make_base_dir(self.id)
             else:
                 # this bibdoc doesn't exist
                 raise InvenioWebSubmitFileError, "The docid %s does not exist." % docid
         # else it is a new document
         else:
             if docname == "" or doctype == "":
                 raise InvenioWebSubmitFileError, "Argument missing for creating a new bibdoc"
             else:
                 self.recid = recid
                 self.doctype = doctype
                 self.docname = docname
                 self.status = ''
                 if recid:
                     res = run_sql("SELECT b.id FROM bibrec_bibdoc bb JOIN bibdoc b on bb.id_bibdoc=b.id WHERE bb.id_bibrec=%s AND b.docname=%s", (recid, docname))
                     if res:
                         raise InvenioWebSubmitFileError, "A bibdoc called %s already exists for recid %s" % (docname, recid)
                 self.id = run_sql("INSERT INTO bibdoc (status,docname,creation_date,modification_date) "
                     "values(%s,%s,NOW(),NOW())", (self.status, docname))
                 if self.id is not None:
                     # we link the document to the record if a recid was
                     # specified
                     self.more_info = BibDocMoreInfo(self.id)
                     res = run_sql("SELECT creation_date, modification_date FROM bibdoc WHERE id=%s", (self.id,))
                     self.cd = res[0][0]
                     self.md = res[0][0]
                 else:
                     raise InvenioWebSubmitFileError, "New docid cannot be created"
                 try:
                     self.basedir = _make_base_dir(self.id)
                     # we create the corresponding storage directory
                     if not os.path.exists(self.basedir):
                         old_umask = os.umask(022)
                         os.makedirs(self.basedir)
                         # and save the father record id if it exists
                         try:
                             if self.recid != "":
                                 recid_fd = open("%s/.recid" % self.basedir, "w")
                                 recid_fd.write(str(self.recid))
                                 recid_fd.close()
                             if self.doctype != "":
                                 type_fd = open("%s/.type" % self.basedir, "w")
                                 type_fd.write(str(self.doctype))
                                 type_fd.close()
                         except Exception, e:
                             register_exception()
                             raise InvenioWebSubmitFileError, e
                         os.umask(old_umask)
                     if self.recid != "":
                         run_sql("INSERT INTO bibrec_bibdoc (id_bibrec, id_bibdoc, type) VALUES (%s,%s,%s)",
                             (recid, self.id, self.doctype,))
                 except Exception, e:
                     run_sql('DELETE FROM bibdoc WHERE id=%s', (self.id, ))
                     run_sql('DELETE FROM bibrec_bibdoc WHERE id_bibdoc=%s', (self.id, ))
                     register_exception()
                     raise InvenioWebSubmitFileError, e
         # build list of attached files
         self._build_file_list('init')
         # link with related_files
         self._build_related_file_list()
 
     def __repr__(self):
         return 'BibDoc(%s, %s, %s, %s)' % (repr(self.id), repr(self.recid), repr(self.docname), repr(self.doctype))
 
     def __str__(self):
         out = '%s:%i:::docname=%s\n' % (self.recid or '', self.id, self.docname)
         out += '%s:%i:::doctype=%s\n' % (self.recid or '', self.id, self.doctype)
         out += '%s:%i:::status=%s\n' % (self.recid or '', self.id, self.status)
         out += '%s:%i:::basedir=%s\n' % (self.recid or '', self.id, self.basedir)
         out += '%s:%i:::creation date=%s\n' % (self.recid or '', self.id, self.cd)
         out += '%s:%i:::modification date=%s\n' % (self.recid or '', self.id, self.md)
         out += '%s:%i:::total file attached=%s\n' % (self.recid or '', self.id, len(self.docfiles))
         out += '%s:%i:::total size latest version=%s\n' % (self.recid or '', self.id, nice_size(self.get_total_size_latest_version()))
         out += '%s:%i:::total size all files=%s\n' % (self.recid or '', self.id, nice_size(self.get_total_size()))
         for docfile in self.docfiles:
             out += str(docfile)
         icon = self.get_icon()
         if icon:
             out += str(self.get_icon())
         return out
 
     def format_already_exists_p(self, format):
         """Return True if the given format already exists among the latest files."""
         format = normalize_format(format)
         for afile in self.list_latest_files():
             if format == afile.get_format():
                 return True
         return False
 
     def get_status(self):
         """Retrieve the status."""
         return self.status
 
     def touch(self):
         """Update the modification time of the bibdoc."""
         run_sql('UPDATE bibdoc SET modification_date=NOW() WHERE id=%s', (self.id, ))
         #if self.recid:
             #run_sql('UPDATE bibrec SET modification_date=NOW() WHERE id=%s', (self.recid, ))
 
     def set_status(self, new_status):
         """Set a new status."""
         if new_status != KEEP_OLD_VALUE:
             if new_status == 'DELETED':
                 raise InvenioWebSubmitFileError('DELETED is a reserved word and can not be used for setting the status')
             run_sql('UPDATE bibdoc SET status=%s WHERE id=%s', (new_status, self.id))
             self.status = new_status
             self.touch()
             self._build_file_list()
             self._build_related_file_list()
 
     def add_file_new_version(self, filename, description=None, comment=None, format=None, hide_previous_versions=False):
         """Add a new version of a file."""
         try:
             latestVersion = self.get_latest_version()
             if latestVersion == 0:
                 myversion = 1
             else:
                 myversion = latestVersion + 1
             if os.path.exists(filename):
                 if not os.path.getsize(filename) > 0:
                     raise InvenioWebSubmitFileError, "%s seems to be empty" % filename
                 if format is None:
                     format = decompose_file(filename)[2]
                 destination = "%s/%s%s;%i" % (self.basedir, self.docname, format, myversion)
                 try:
                     shutil.copyfile(filename, destination)
                     os.chmod(destination, 0644)
                 except Exception, e:
                     register_exception()
                     raise InvenioWebSubmitFileError, "Encountered an exception while copying '%s' to '%s': '%s'" % (filename, destination, e)
                 self.more_info.set_description(description, format, myversion)
                 self.more_info.set_comment(comment, format, myversion)
                 for afile in self.list_all_files():
                     format = afile.get_format()
                     version = afile.get_version()
                     if version < myversion:
                         self.more_info.set_hidden(hide_previous_versions, format, myversion)
             else:
                 raise InvenioWebSubmitFileError, "'%s' does not exists!" % filename
         finally:
             self.touch()
             Md5Folder(self.basedir).update()
             self._build_file_list()
 
     def purge(self):
         """Phisically Remove all the previous version of the given bibdoc"""
         version = self.get_latest_version()
         if version > 1:
             for afile in self.docfiles:
                 if afile.get_version() < version:
                     self.more_info.unset_comment(afile.get_format(), afile.get_version())
                     self.more_info.unset_description(afile.get_format(), afile.get_version())
                     self.more_info.unset_hidden(afile.get_format(), afile.get_version())
                     try:
                         os.remove(afile.get_full_path())
                     except Exception, e:
                         register_exception()
             Md5Folder(self.basedir).update()
             self.touch()
             self._build_file_list()
 
     def expunge(self):
         """Phisically remove all the traces of a given bibdoc
         note that you should not use any more this object or unpredictable
         things will happen."""
         del self.md5s
         del self.more_info
         os.system('rm -rf %s' % escape_shell_arg(self.basedir))
         run_sql('DELETE FROM bibrec_bibdoc WHERE id_bibdoc=%s', (self.id, ))
         run_sql('DELETE FROM bibdoc_bibdoc WHERE id_bibdoc1=%s OR id_bibdoc2=%s', (self.id, self.id))
         run_sql('DELETE FROM bibdoc WHERE id=%s', (self.id, ))
         run_sql('INSERT DELAYED INTO hstDOCUMENT(action, id_bibdoc, docname, doctimestamp) VALUES("EXPUNGE", %s, %s, NOW())', (self.id, self.docname))
 
         del self.docfiles
         del self.id
         del self.cd
         del self.md
         del self.basedir
         del self.recid
         del self.doctype
         del self.docname
 
     def revert(self, version):
         """Revert to a given version by copying its differnt formats to a new
         version."""
         try:
             version = int(version)
             new_version = self.get_latest_version() + 1
             for docfile in self.list_version_files(version):
                 destination = "%s/%s%s;%i" % (self.basedir, self.docname, docfile.get_format(), new_version)
                 if os.path.exists(destination):
                     raise InvenioWebSubmitFileError, "A file for docname '%s' for the recid '%s' already exists for the format '%s'" % (self.docname, self.recid, docfile.get_format())
                 try:
                     shutil.copyfile(docfile.get_full_path(), destination)
                     os.chmod(destination, 0644)
                     self.more_info.set_comment(self.more_info.get_comment(docfile.get_format(), version), docfile.get_format(), new_version)
                     self.more_info.set_description(self.more_info.get_description(docfile.get_format(), version), docfile.get_format(), new_version)
                 except Exception, e:
                     register_exception()
                     raise InvenioWebSubmitFileError, "Encountered an exception while copying '%s' to '%s': '%s'" % (docfile.get_full_path(), destination, e)
         finally:
             Md5Folder(self.basedir).update()
             self.touch()
             self._build_file_list()
 
     def import_descriptions_and_comments_from_marc(self, record=None):
         """Import description & comment from the corresponding marc.
         if record is passed it is directly used, otherwise it is
         calculated after the xm stored in the database."""
         ## Let's get the record
+        from invenio.search_engine import get_record
         if record is None:
-            xml = format_record(self.id, of='xm')
-            record = create_record(xml)[0]
+            record = get_record(self.id)
         fields = record_get_field_instances(record, '856', '4', ' ')
 
         global_comment = None
         global_description = None
         local_comment = {}
         local_description = {}
 
         for field in fields:
             url = field_get_subfield_values(field, 'u')
             if url:
                 ## Given a url
                 url = url[0]
                 if url == '%s/record/%s/files/' % (CFG_SITE_URL, self.recid):
                     ## If it is a traditional /record/1/files/ one
                     ## We have global description/comment for all the formats
                     description = field_get_subfield_values(field, 'y')
                     if description:
                         global_description = description[0]
                     comment = field_get_subfield_values(field, 'z')
                     if comment:
                         global_comment = comment[0]
                 elif bibdocfile_url_p(url):
                     ## Otherwise we have description/comment per format
                     dummy, docname, format = decompose_bibdocfile_url(url)
                     if docname == self.docname:
                         description = field_get_subfield_values(field, 'y')
                         if description:
                             local_description[format] = description[0]
                         comment = field_get_subfield_values(field, 'z')
                         if comment:
                             local_comment[format] = comment[0]
 
         ## Let's update the tables
         version = self.get_latest_version()
         for docfile in self.list_latest_files():
             format = docfile.get_format()
             if format in local_comment:
                 self.set_comment(local_comment[format], format, version)
             else:
                 self.set_comment(global_comment, format, version)
             if format in local_description:
                 self.set_description(local_description[format], format, version)
             else:
                 self.set_description(global_description, format, version)
         self._build_file_list('init')
 
     def add_file_new_format(self, filename, version=None, description=None, comment=None, format=None):
         """add a new format of a file to an archive"""
         try:
             if version is None:
                 version = self.get_latest_version()
             if version == 0:
                 version = 1
             if os.path.exists(filename):
                 if not os.path.getsize(filename) > 0:
                     raise InvenioWebSubmitFileError, "%s seems to be empty" % filename
                 if format is None:
                     format = decompose_file(filename)[2]
                 destination = "%s/%s%s;%i" % (self.basedir, self.docname, format, version)
                 if os.path.exists(destination):
                     raise InvenioWebSubmitFileError, "A file for docname '%s' for the recid '%s' already exists for the format '%s'" % (self.docname, self.recid, format)
                 try:
                     shutil.copyfile(filename, destination)
                     os.chmod(destination, 0644)
                 except Exception, e:
                     register_exception()
                     raise InvenioWebSubmitFileError, "Encountered an exception while copying '%s' to '%s': '%s'" % (filename, destination, e)
                 self.more_info.set_comment(comment, format, version)
                 self.more_info.set_description(description, format, version)
             else:
                 raise InvenioWebSubmitFileError, "'%s' does not exists!" % filename
         finally:
             Md5Folder(self.basedir).update()
             self.touch()
             self._build_file_list()
 
     def get_icon(self):
         """Returns the bibdoc corresponding to an icon of the given bibdoc."""
         if self.related_files.has_key('Icon'):
             return self.related_files['Icon'][0]
         else:
             return None
 
     def add_icon(self, filename, basename=None, format=None):
         """Links an icon with the bibdoc object. Return the icon bibdoc"""
         #first check if an icon already exists
         existing_icon = self.get_icon()
         if existing_icon is not None:
             existing_icon.delete()
         #then add the new one
         if basename is None:
             basename = 'icon-%s' % self.docname
         if format is None:
             format = decompose_file(filename)[2]
         newicon = BibDoc(doctype='Icon', docname=basename)
         newicon.add_file_new_version(filename, format=format)
         try:
             try:
                 old_umask = os.umask(022)
                 recid_fd = open("%s/.docid" % newicon.get_base_dir(), "w")
                 recid_fd.write(str(self.id))
                 recid_fd.close()
                 type_fd = open("%s/.type" % newicon.get_base_dir(), "w")
                 type_fd.write(str(self.doctype))
                 type_fd.close()
                 os.umask(old_umask)
                 run_sql("INSERT INTO bibdoc_bibdoc (id_bibdoc1, id_bibdoc2, type) VALUES (%s,%s,'Icon')", (self.id, newicon.get_id(),))
             except Exception, e:
                 register_exception()
                 raise InvenioWebSubmitFileError, "Encountered an exception while writing .docid and .doctype for folder '%s': '%s'" % (newicon.get_base_dir(), e)
         finally:
             Md5Folder(newicon.basedir).update()
             self.touch()
             self._build_related_file_list()
         return newicon
 
     def delete_icon(self):
         """Removes the current icon if it exists."""
         existing_icon = self.get_icon()
         if existing_icon is not None:
             existing_icon.delete()
         self.touch()
         self._build_related_file_list()
 
     def display(self, version="", ln=CFG_SITE_LANG, display_hidden=True):
         """Returns a formatted representation of the files linked with
         the bibdoc.
         """
         t = ""
         if version == "all":
             docfiles = self.list_all_files(list_hidden=display_hidden)
         elif version != "":
             version = int(version)
             docfiles = self.list_version_files(version, list_hidden=display_hidden)
         else:
             docfiles = self.list_latest_files()
         existing_icon = self.get_icon()
         if existing_icon is not None:
             existing_icon = existing_icon.list_all_files()[0]
             imageurl = "%s/record/%s/files/%s" % \
                 (CFG_SITE_URL, self.recid, urllib.quote(existing_icon.get_full_name()))
         else:
             imageurl = "%s/img/smallfiles.gif" % CFG_SITE_URL
 
         versions = []
         for version in list_versions_from_array(docfiles):
             currversion = {
                             'version' : version,
                             'previous' : 0,
                             'content' : []
                           }
             if version == self.get_latest_version() and version != 1:
                 currversion['previous'] = 1
             for docfile in docfiles:
                 if docfile.get_version() == version:
                     currversion['content'].append(docfile.display(ln = ln))
             versions.append(currversion)
 
         t = websubmit_templates.tmpl_bibdoc_filelist(
               ln = ln,
               versions = versions,
               imageurl = imageurl,
               docname = self.docname,
               recid = self.recid
             )
         return t
 
     def change_name(self, newname):
         """Rename the bibdoc name. New name must not be already used by the linked
         bibrecs."""
         try:
             newname = normalize_docname(newname)
             res = run_sql("SELECT b.id FROM bibrec_bibdoc bb JOIN bibdoc b on bb.id_bibdoc=b.id WHERE bb.id_bibrec=%s AND b.docname=%s", (self.recid, newname))
             if res:
                 raise InvenioWebSubmitFileError, "A bibdoc called %s already exists for recid %s" % (newname, self.recid)
             try:
                 for f in os.listdir(self.basedir):
                     if not f.startswith('.'):
                         try:
                             (dummy, base, extension, version) = decompose_file_with_version(f)
                         except ValueError:
                             register_exception(alert_admin=True, prefix="Strange file '%s' is stored in %s" % (f, self.basedir))
                         else:
                             shutil.move(os.path.join(self.basedir, f), os.path.join(self.basedir, '%s%s;%i' % (newname, extension, version)))
             except Exception, e:
                 register_exception()
                 raise InvenioWebSubmitFileError("Error in renaming the bibdoc %s to %s for recid %s: %s" % (self.docname, newname, self.recid, e))
             run_sql("update bibdoc set docname=%s where id=%s", (newname, self.id,))
             self.docname = newname
         finally:
             Md5Folder(self.basedir).update()
             self.touch()
             self._build_file_list('rename')
             self._build_related_file_list()
 
     def set_comment(self, comment, format, version=None):
         """Update the comment of a format/version."""
         if version is None:
             version = self.get_latest_version()
         self.more_info.set_comment(comment, format, version)
         self.touch()
         self._build_file_list('init')
 
     def set_description(self, description, format, version=None):
         """Update the description of a format/version."""
         if version is None:
             version = self.get_latest_version()
         self.more_info.set_description(description, format, version)
         self.touch()
         self._build_file_list('init')
 
     def set_hidden(self, hidden, format, version=None):
         """Update the hidden flag for format/version."""
         if version is None:
             version = self.get_latest_version()
         self.more_info.set_hidden(hidden, format, version)
         self.touch()
         self._build_file_list('init')
 
     def get_comment(self, format, version=None):
         """Get a comment for a given format/version."""
         if version is None:
             version = self.get_latest_version()
         return self.more_info.get_comment(format, version)
 
     def get_description(self, format, version=None):
         """Get a description for a given format/version."""
         if version is None:
             version = self.get_latest_version()
         return self.more_info.get_description(format, version)
 
     def hidden_p(self, format, version=None):
         """Is the format/version hidden?"""
         if version is None:
             version = self.get_latest_version()
         return self.more_info.hidden_p(format, version)
 
     def get_docname(self):
         """retrieve bibdoc name"""
         return self.docname
 
     def get_base_dir(self):
         """retrieve bibdoc base directory, e.g. /soft/cdsweb/var/data/files/123"""
         return self.basedir
 
     def get_type(self):
         """retrieve bibdoc doctype"""
         return self.doctype
 
     def get_recid(self):
         """retrieve bibdoc recid"""
         return self.recid
 
     def get_id(self):
         """retrieve bibdoc id"""
         return self.id
 
     def get_file(self, format, version=""):
         """Return a DocFile with docname name, with format (the extension), and
         with the given version.
         """
         if version == "":
             docfiles = self.list_latest_files()
         else:
             version = int(version)
             docfiles = self.list_version_files(version)
 
         format = normalize_format(format)
 
         for docfile in docfiles:
             if (docfile.get_format()==format or not format):
                 return docfile
         raise InvenioWebSubmitFileError, "No file called '%s' of format '%s', version '%s'" % (self.docname, format, version)
 
     def list_versions(self):
         """Returns the list of existing version numbers for a given bibdoc."""
         versions = []
         for docfile in self.docfiles:
             if not docfile.get_version() in versions:
                 versions.append(docfile.get_version())
         return versions
 
     def delete(self):
         """delete the current bibdoc instance."""
         try:
             today = datetime.today()
             self.change_name('DELETED-%s%s-%s' % (today.strftime('%Y%m%d%H%M%S'), today.microsecond, self.docname))
             run_sql("UPDATE bibdoc SET status='DELETED' WHERE id=%s", (self.id,))
         except Exception, e:
             register_exception()
             raise InvenioWebSubmitFileError, "It's impossible to delete bibdoc %s: %s" % (self.id, e)
 
     def deleted_p(self):
         """Return True if the bibdoc has been deleted."""
         return self.status == 'DELETED'
 
     def empty_p(self):
         """Return True if the bibdoc is empty, i.e. it has no bibdocfile
         connected."""
         return len(self.docfiles) == 0
 
     def undelete(self, previous_status=''):
         """undelete a deleted file (only if it was actually deleted). The
         previous status, i.e. the restriction key can be provided.
         Otherwise the bibdoc will pe public."""
         bibrecdocs = BibRecDocs(self.recid)
         try:
             run_sql("UPDATE bibdoc SET status=%s WHERE id=%s AND status='DELETED'", (self.id, previous_status))
         except Exception, e:
             register_exception()
             raise InvenioWebSubmitFileError, "It's impossible to undelete bibdoc %s: %s" % (self.id, e)
         if self.docname.startswith('DELETED-'):
             try:
                 # Let's remove DELETED-20080214144322- in front of the docname
                 original_name = '-'.join(self.docname.split('-')[2:])
                 original_name = bibrecdocs.propose_unique_docname(original_name)
                 self.change_name(original_name)
             except Exception, e:
                 register_exception()
                 raise InvenioWebSubmitFileError, "It's impossible to restore the previous docname %s. %s kept as docname because: %s" % (original_name, self.docname, e)
         else:
             raise InvenioWebSubmitFileError, "Strange just undeleted docname isn't called DELETED-somedate-docname but %s" % self.docname
 
     def delete_file(self, format, version):
         """Delete on the filesystem the particular format version.
         Note, this operation is not reversible!"""
         try:
             afile = self.get_file(format, version)
         except InvenioWebSubmitFileError:
             return
         try:
             os.remove(afile.get_full_path())
         except OSError:
             pass
         self.touch()
         self._build_file_list()
 
     def get_history(self):
         """Return a string with a line for each row in the history for the
         given docid."""
         ret = []
         hst = run_sql("""SELECT action, docname, docformat, docversion,
                 docsize, docchecksum, doctimestamp
                 FROM hstDOCUMENT
                 WHERE id_bibdoc=%s ORDER BY doctimestamp ASC""", (self.id, ))
         for row in hst:
             ret.append("%s %s '%s', format: '%s', version: %i, size: %s, checksum: '%s'" % (row[6].strftime('%Y-%m-%d %H:%M:%S'), row[0], row[1], row[2], row[3], nice_size(row[4]), row[5]))
         return ret
 
     def _build_file_list(self, context=''):
         """Lists all files attached to the bibdoc. This function should be
         called everytime the bibdoc is modified.
         As a side effect it log everything that has happened to the bibdocfiles
         in the log facility, according to the context:
         "init": means that the function has been called;
         for the first time by a constructor, hence no logging is performed
         "": by default means to log every deleted file as deleted and every
         added file as added;
         "rename": means that every appearently deleted file is logged as
         renamef and every new file as renamet.
         """
 
         def log_action(action, docid, docname, format, version, size, checksum, timestamp=''):
             """Log an action into the bibdoclog table."""
             try:
                 if timestamp:
                     run_sql('INSERT DELAYED INTO hstDOCUMENT(action, id_bibdoc, docname, docformat, docversion, docsize, docchecksum, doctimestamp) VALUES(%s, %s, %s, %s, %s, %s, %s, %s)', (action, docid, docname, format, version, size, checksum, timestamp))
                 else:
                     run_sql('INSERT DELAYED INTO hstDOCUMENT(action, id_bibdoc, docname, docformat, docversion, docsize, docchecksum, doctimestamp) VALUES(%s, %s, %s, %s, %s, %s, %s, NOW())', (action, docid, docname, format, version, size, checksum))
             except DatabaseError:
                 register_exception()
 
         def make_removed_added_bibdocfiles(previous_file_list):
             """Internal function for build the log of changed files."""
 
             # Let's rebuild the previous situation
             old_files = {}
             for bibdocfile in previous_file_list:
                 old_files[(bibdocfile.name, bibdocfile.format, bibdocfile.version)] = (bibdocfile.size, bibdocfile.checksum, bibdocfile.md)
 
             # Let's rebuild the new situation
             new_files = {}
             for bibdocfile in self.docfiles:
                 new_files[(bibdocfile.name, bibdocfile.format, bibdocfile.version)] = (bibdocfile.size, bibdocfile.checksum, bibdocfile.md)
 
             # Let's subtract from added file all the files that are present in
             # the old list, and let's add to deleted files that are not present
             # added file.
             added_files = dict(new_files)
             deleted_files = {}
             for key, value in old_files.iteritems():
                 if added_files.has_key(key):
                     del added_files[key]
                 else:
                     deleted_files[key] = value
             return (added_files, deleted_files)
 
         if context != 'init':
             previous_file_list = list(self.docfiles)
         self.docfiles = []
         if os.path.exists(self.basedir):
             self.md5s = Md5Folder(self.basedir)
             files = os.listdir(self.basedir)
             files.sort()
             for afile in files:
                 if not afile.startswith('.'):
                     try:
                         filepath = os.path.join(self.basedir, afile)
                         fileversion = int(re.sub(".*;", "", afile))
                         fullname = afile.replace(";%s" % fileversion, "")
                         checksum = self.md5s.get_checksum(afile)
                         (dirname, basename, format) = decompose_file(fullname)
                         comment = self.more_info.get_comment(format, fileversion)
                         description = self.more_info.get_description(format, fileversion)
                         hidden = self.more_info.hidden_p(format, fileversion)
                         # we can append file:
                         self.docfiles.append(BibDocFile(filepath, self.doctype,
                             fileversion, basename, format,
                             self.recid, self.id, self.status, checksum, description, comment, hidden))
                     except Exception, e:
                         register_exception()
         if context == 'init':
             return
         else:
             added_files, deleted_files = make_removed_added_bibdocfiles(previous_file_list)
             deletedstr = "DELETED"
             addedstr = "ADDED"
             if context == 'rename':
                 deletedstr = "RENAMEDFROM"
                 addedstr = "RENAMEDTO"
             for (docname, format, version), (size, checksum, md) in added_files.iteritems():
                 if context == 'rename':
                     md = '' # No modification time
                 log_action(addedstr, self.id, docname, format, version, size, checksum, md)
             for (docname, format, version), (size, checksum, md) in deleted_files.iteritems():
                 if context == 'rename':
                     md = '' # No modification time
                 log_action(deletedstr, self.id, docname, format, version, size, checksum, md)
 
     def _build_related_file_list(self):
         """Lists all files attached to the bibdoc. This function should be
         called everytime the bibdoc is modified within e.g. its icon.
         """
         self.related_files = {}
         res = run_sql("SELECT ln.id_bibdoc2,ln.type,bibdoc.status FROM "
             "bibdoc_bibdoc AS ln,bibdoc WHERE id=ln.id_bibdoc2 AND "
             "ln.id_bibdoc1=%s", (self.id,))
         for row in res:
             docid = row[0]
             doctype = row[1]
             if row[2] != 'DELETED':
                 if not self.related_files.has_key(doctype):
                     self.related_files[doctype] = []
                 cur_doc = BibDoc(docid=docid)
                 self.related_files[doctype].append(cur_doc)
 
     def get_total_size_latest_version(self):
         """Return the total size used on disk of all the files belonging
         to this bibdoc and corresponding to the latest version."""
         ret = 0
         for bibdocfile in self.list_latest_files():
             ret += bibdocfile.get_size()
         return ret
 
     def get_total_size(self):
         """Return the total size used on disk of all the files belonging
         to this bibdoc."""
         ret = 0
         for bibdocfile in self.list_all_files():
             ret += bibdocfile.get_size()
         return ret
 
     def list_all_files(self, list_hidden=True):
         """Returns all the docfiles linked with the given bibdoc."""
         if list_hidden:
             return self.docfiles
         else:
             return [afile for afile in self.docfiles if not afile.hidden_p()]
 
     def list_latest_files(self):
         """Returns all the docfiles within the last version."""
         return self.list_version_files(self.get_latest_version())
 
     def list_version_files(self, version, list_hidden=True):
         """Return all the docfiles of a particular version."""
         version = int(version)
         return [docfile for docfile in self.docfiles if docfile.get_version() == version and (list_hidden or not docfile.hidden_p)]
 
     def get_latest_version(self):
         """ Returns the latest existing version number for the given bibdoc.
         If no file is associated to this bibdoc, returns '0'.
         """
         version = 0
         for bibdocfile in self.docfiles:
             if bibdocfile.get_version() > version:
                 version = bibdocfile.get_version()
         return version
 
     def get_file_number(self):
         """Return the total number of files."""
         return len(self.docfiles)
 
     def register_download(self, ip_address, version, format, userid=0):
         """Register the information about a download of a particular file."""
         format = normalize_format(format)
         if format[:1] == '.':
             format = format[1:]
         format = format.upper()
         return run_sql("INSERT INTO rnkDOWNLOADS "
             "(id_bibrec,id_bibdoc,file_version,file_format,"
             "id_user,client_host,download_time) VALUES "
             "(%s,%s,%s,%s,%s,INET_ATON(%s),NOW())",
             (self.recid, self.id, version, format,
             userid, ip_address,))
 
 class BibDocFile:
     """This class represents a physical file in the CDS Invenio filesystem.
     It should never be instantiated directly"""
 
     def __init__(self, fullpath, doctype, version, name, format, recid, docid, status, checksum, description=None, comment=None, hidden=False):
         self.fullpath = fullpath
         self.doctype = doctype
         self.docid = docid
         self.recid = recid
         self.version = version
         self.status = status
         self.checksum = checksum
         self.description = description
         self.comment = comment
         self.hidden = hidden
         self.size = os.path.getsize(fullpath)
         self.md = datetime.fromtimestamp(os.path.getmtime(fullpath))
         try:
             self.cd = datetime.fromtimestamp(os.path.getctime(fullpath))
         except OSError:
             self.cd = self.md
         self.name = name
         self.format = normalize_format(format)
         self.dir = os.path.dirname(fullpath)
         self.url = '%s/record/%s/files/%s%s' % (CFG_SITE_URL, self.recid, urllib.quote(self.name), urllib.quote(self.format))
         self.fullurl = '%s?version=%s' % (self.url, self.version)
         self.etag = '"%i%s%i"' % (self.docid, self.format, self.version)
         if format == "":
             self.mime = "application/octet-stream"
             self.encoding = ""
             self.fullname = name
         else:
             self.fullname = "%s%s" % (name, self.format)
             (self.mime, self.encoding) = _mimes.guess_type(self.fullname)
             if self.mime is None:
                 self.mime = "application/octet-stream"
         self.magic = None
 
     def __repr__(self):
         return ('BibDocFile(%s, %s, %i, %s, %s, %i, %i, %s, %s, %s, %s, %s)' % (repr(self.fullpath), repr(self.doctype), self.version, repr(self.name), repr(self.format), self.recid, self.docid, repr(self.status), repr(self.checksum), repr(self.description), repr(self.comment), repr(self.hidden)))
 
     def __str__(self):
         out = '%s:%s:%s:%s:fullpath=%s\n' % (self.recid, self.docid, self.version, self.format, self.fullpath)
         out += '%s:%s:%s:%s:fullname=%s\n' % (self.recid, self.docid, self.version, self.format, self.fullname)
         out += '%s:%s:%s:%s:name=%s\n' % (self.recid, self.docid, self.version, self.format, self.name)
         out += '%s:%s:%s:%s:status=%s\n' % (self.recid, self.docid, self.version, self.format, self.status)
         out += '%s:%s:%s:%s:checksum=%s\n' % (self.recid, self.docid, self.version, self.format, self.checksum)
         out += '%s:%s:%s:%s:size=%s\n' % (self.recid, self.docid, self.version, self.format, nice_size(self.size))
         out += '%s:%s:%s:%s:creation time=%s\n' % (self.recid, self.docid, self.version, self.format, self.cd)
         out += '%s:%s:%s:%s:modification time=%s\n' % (self.recid, self.docid, self.version, self.format, self.md)
         out += '%s:%s:%s:%s:magic=%s\n' % (self.recid, self.docid, self.version, self.format, self.get_magic())
         out += '%s:%s:%s:%s:mime=%s\n' % (self.recid, self.docid, self.version, self.format, self.mime)
         out += '%s:%s:%s:%s:encoding=%s\n' % (self.recid, self.docid, self.version, self.format, self.encoding)
         out += '%s:%s:%s:%s:url=%s\n' % (self.recid, self.docid, self.version, self.format, self.url)
         out += '%s:%s:%s:%s:fullurl=%s\n' % (self.recid, self.docid, self.version, self.format, self.fullurl)
         out += '%s:%s:%s:%s:description=%s\n' % (self.recid, self.docid, self.version, self.format, self.description)
         out += '%s:%s:%s:%s:comment=%s\n' % (self.recid, self.docid, self.version, self.format, self.comment)
         out += '%s:%s:%s:%s:hidden=%s\n' % (self.recid, self.docid, self.version, self.format, self.hidden)
         out += '%s:%s:%s:%s:etag=%s\n' % (self.recid, self.docid, self.version, self.format, self.etag)
         return out
 
     def display(self, ln = CFG_SITE_LANG):
         """Returns a formatted representation of this docfile."""
         return websubmit_templates.tmpl_bibdocfile_filelist(
                  ln = ln,
                  recid = self.recid,
                  version = self.version,
                  name = self.name,
                  format = self.format,
                  size = self.size,
                )
 
     def is_restricted(self, req):
         """Returns restriction state. (see acc_authorize_action return values)"""
         if self.status not in ('', 'DELETED'):
             return acc_authorize_action(req, 'viewrestrdoc', status=self.status)
         elif self.status == 'DELETED':
             return (1, 'File has ben deleted')
         else:
             return (0, '')
 
     def hidden_p(self):
         return self.hidden
 
     def get_url(self):
         return self.url
 
     def get_type(self):
         return self.doctype
 
     def get_path(self):
         return self.fullpath
 
     def get_bibdocid(self):
         return self.docid
 
     def get_name(self):
         return self.name
 
     def get_full_name(self):
         return self.fullname
 
     def get_full_path(self):
         return self.fullpath
 
     def get_format(self):
         return self.format
 
     def get_size(self):
         return self.size
 
     def get_version(self):
         return self.version
 
     def get_checksum(self):
         return self.checksum
 
     def get_description(self):
         return self.description
 
     def get_comment(self):
         return self.comment
 
     def get_content(self):
         """Returns the binary content of the file."""
         content_fd = open(self.fullpath, 'rb')
         content = content_fd.read()
         content_fd.close()
         return content
 
     def get_recid(self):
         """Returns the recid connected with the bibdoc of this file."""
         return self.recid
 
     def get_status(self):
         """Returns the status of the file, i.e. either '', 'DELETED' or a
         restriction keyword."""
         return self.status
 
     def get_magic(self):
         """Return all the possible guesses from the magic library about
         the content of the file."""
         if self.magic is None and CFG_HAS_MAGIC:
             magic_cookies = get_magic_cookies()
             magic_result = []
             for key in magic_cookies.keys():
                 magic_result.append(magic_cookies[key].file(self.fullpath))
             self.magic = tuple(magic_result)
         return self.magic
 
     def check(self):
         """Return True if the checksum corresponds to the file."""
         return calculate_md5(self.fullpath) == self.checksum
 
     def stream(self, req):
         """Stream the file."""
         if self.status:
             (auth_code, auth_message) = acc_authorize_action(req, 'viewrestrdoc', status=self.status)
         else:
             auth_code = 0
         if auth_code == 0:
             if os.path.exists(self.fullpath):
                 if random.random() < 0.25 and calculate_md5(self.fullpath) != self.checksum:
                     raise InvenioWebSubmitFileError, "File %s, version %i, for record %s is corrupted!" % (self.fullname, self.version, self.recid)
                 stream_file(req, self.fullpath, self.fullname, self.mime, self.encoding, self.etag, self.checksum, self.fullurl)
                 raise apache.SERVER_RETURN, apache.DONE
             else:
                 req.status = apache.HTTP_NOT_FOUND
                 raise InvenioWebSubmitFileError, "%s does not exists!" % self.fullpath
         else:
             raise InvenioWebSubmitFileError, "You are not authorized to download %s: %s" % (self.fullname, auth_message)
 
 def stream_file(req, fullpath, fullname=None, mime=None, encoding=None, etag=None, md5=None, location=None):
     """This is a generic function to stream a file to the user.
     If fullname, mime, encoding, and location are not provided they will be
     guessed based on req and fullpath.
     md5 should be passed as an hexadecimal string.
     """
     def normal_streaming(size):
         req.set_content_length(size)
         req.send_http_header()
         if not req.header_only:
             req.sendfile(fullpath)
         return ""
 
     def single_range(size, the_range):
         req.set_content_length(the_range[1])
         req.headers_out['Content-Range'] = 'bytes %d-%d/%d' % (the_range[0], the_range[0] + the_range[1] - 1, size)
         req.status = apache.HTTP_PARTIAL_CONTENT
         req.send_http_header()
         if not req.header_only:
             req.sendfile(fullpath, the_range[0], the_range[1])
         return ""
 
     def multiple_ranges(size, ranges, mime):
         req.status = apache.HTTP_PARTIAL_CONTENT
         boundary = '%s%04d' % (time.strftime('THIS_STRING_SEPARATES_%Y%m%d%H%M%S'), random.randint(0, 9999))
         req.content_type = 'multipart/byteranges; boundary=%s' % boundary
         content_length = 0
         for arange in ranges:
             content_length += len('--%s\r\n' % boundary)
             content_length += len('Content-Type: %s\r\n' % mime)
             content_length += len('Content-Range: bytes %d-%d/%d\r\n' % (arange[0], arange[0] + arange[1] - 1, size))
             content_length += len('\r\n')
             content_length += arange[1]
             content_length += len('\r\n')
         content_length += len('--%s--\r\n' % boundary)
         req.set_content_length(content_length)
         req.send_http_header()
         if not req.header_only:
             for arange in ranges:
                 req.write('--%s\r\n' % boundary, 0)
                 req.write('Content-Type: %s\r\n' % mime, 0)
                 req.write('Content-Range: bytes %d-%d/%d\r\n' % (arange[0], arange[0] + arange[1] - 1, size), 0)
                 req.write('\r\n', 0)
                 req.sendfile(fullpath, arange[0], arange[1])
                 req.write('\r\n', 0)
             req.write('--%s--\r\n' % boundary)
             req.flush()
         return ""
 
     def parse_date(date):
         """According to <http://www.w3.org/Protocols/rfc2616/rfc2616-sec3.html#sec3.3>
         a date can come in three formats (in order of preference):
             Sun, 06 Nov 1994 08:49:37 GMT  ; RFC 822, updated by RFC 1123
             Sunday, 06-Nov-94 08:49:37 GMT ; RFC 850, obsoleted by RFC 1036
             Sun Nov  6 08:49:37 1994       ; ANSI C's asctime() format
         Moreover IE is adding some trailing information after a ';'.
         Wrong dates should be simpled ignored.
         This function return the time in seconds since the epoch GMT or None
         in case of errors."""
         if not date:
             return None
         try:
             date = date.split(';')[0].strip() # Because of IE
             ## Sun, 06 Nov 1994 08:49:37 GMT
             return time.mktime(time.strptime(date, '%a, %d %b %Y %X %Z'))
         except:
             try:
                 ## Sun, 06 Nov 1994 08:49:37 GMT
                 return time.mktime(time.strptime(date, '%A, %d-%b-%y %H:%M:%S %Z'))
             except:
                 try:
                     ## Sun, 06 Nov 1994 08:49:37 GMT
                     return time.mktime(date)
                 except:
                     return None
 
     def parse_ranges(ranges):
         """According to <http://www.w3.org/Protocols/rfc2616/rfc2616-sec14.html#sec14.35>
         a (multiple) range request comes in the form:
             bytes=20-30,40-60,70-,-80
         with the meaning:
             from byte to 20 to 30 inclusive (11 bytes)
             from byte to 40 to 60 inclusive (21 bytes)
             from byte 70 to (size - 1) inclusive (size - 70 bytes)
             from byte size - 80 to (size - 1) inclusive (80 bytes)
         This function will return the list of ranges in the form:
             [[first_byte, last_byte], ...]
         If first_byte or last_byte aren't specified they'll be set to None
         If the list is not well formatted it will return None
         """
         try:
             if ranges.startswith('bytes') and '=' in ranges:
                 ranges = ranges.split('=')[1].strip()
             else:
                 return None
             ret = []
             for arange in ranges.split(','):
                 arange = arange.strip()
                 if arange.startswith('-'):
                     ret.append([None, int(arange[1:])])
                 elif arange.endswith('-'):
                     ret.append([int(arange[:-1]), None])
                 else:
                     ret.append(map(int, arange.split('-')))
             return ret
         except:
             return None
 
     def parse_tags(tags):
         """Return a list of tags starting from a comma separated list."""
         return [tag.strip() for tag in tags.split(',')]
 
     def fix_ranges(ranges, size):
         """Complementary to parse_ranges it will transform all the ranges
         into (first_byte, length), adjusting all the value based on the
         actual size provided.
         """
         ret = []
         for arange in ranges:
             if (arange[0] is None and arange[1] > 0) or arange[0] < size:
                 if arange[0] is None:
                     arange[0] = size - arange[1]
                 elif arange[1] is None:
                     arange[1] = size - arange[0]
                 else:
                     arange[1] = arange[1] - arange[0] + 1
                 arange[0] = max(0, arange[0])
                 arange[1] = min(size - arange[0], arange[1])
                 if arange[1] > 0:
                     ret.append(arange)
         return ret
 
     def get_normalized_headers(headers):
         """Strip and lowerize all the keys of the headers dictionary plus
         strip, lowerize and transform known headers value into their value."""
         ret = {
             'if-match' : None,
             'unless-modified-since' : None,
             'if-modified-since' : None,
             'range' : None,
             'if-range' : None,
             'if-none-match' : None,
         }
         for key, value in req.headers_in.iteritems():
             key = key.strip().lower()
             value = value.strip()
             if key in ('unless-modified-since', 'if-modified-since'):
                 value = parse_date(value)
             elif key == 'range':
                 value = parse_ranges(value)
             elif key == 'if-range':
                 value = parse_date(value) or parse_tags(value)
             elif key in ('if-match', 'if-none-match'):
                 value = parse_tags(value)
             if value:
                 ret[key] = value
         return ret
 
     headers = get_normalized_headers(req.headers_in)
     if headers['if-match']:
         if etag is not None and etag not in headers['if-match']:
             raise apache.SERVER_RETURN, apache.HTTP_PRECONDITION_FAILED
 
     if os.path.exists(fullpath):
         mtime = os.path.getmtime(fullpath)
         if fullname is None:
             fullname = os.path.basename(fullpath)
         if mime is None:
             format = decompose_file(fullpath)[2]
             (mime, encoding) = _mimes.guess_type(fullpath)
             if mime is None:
                 mime = "application/octet-stream"
         if location is None:
             location = req.uri
         req.content_type = mime
         req.encoding = encoding
         req.filename = fullname
         req.headers_out["Last-Modified"] = time.strftime('%a, %d %b %Y %X GMT', time.gmtime(mtime))
         req.headers_out["Accept-Ranges"] = "bytes"
         req.headers_out["Content-Location"] = location
         if etag is not None:
             req.headers_out["ETag"] = etag
         if md5 is not None:
             req.headers_out["Content-MD5"] = base64.encodestring(binascii.unhexlify(md5.upper()))[:-1]
         req.headers_out["Content-Disposition"] = 'inline; filename="%s"' % fullname.replace('"', '\\"')
         size = os.path.getsize(fullpath)
         if not size:
             try:
                 raise Exception, '%s exists but is empty' % fullpath
             except Exception:
                 register_exception(req=req, alert_admin=True)
             raise apache.SERVER_RETURN, apache.HTTP_NOT_FOUND
         if headers['if-modified-since'] and headers['if-modified-since'] >= mtime:
             raise apache.SERVER_RETURN, apache.HTTP_NOT_MODIFIED
         if headers['if-none-match']:
             if etag is not None and etag in headers['if-none-match']:
                 raise apache.SERVER_RETURN, apache.HTTP_NOT_MODIFIED
         if headers['unless-modified-since'] and headers['unless-modified-since'] < mtime:
             return normal_streaming(size)
         if headers['range']:
             try:
                 if headers['if-range']:
                     if etag is None or etag not in headers['if-range']:
                         return normal_streaming(size)
                 ranges = fix_ranges(headers['range'], size)
             except:
                 return normal_streaming(size)
             if len(ranges) > 1:
                 return multiple_ranges(size, ranges, mime)
             elif ranges:
                 return single_range(size, ranges[0])
             else:
                 raise apache.SERVER_RETURN, apache.HTTP_RANGE_NOT_SATISFIABLE
         else:
             return normal_streaming(size)
     else:
         raise apache.SERVER_RETURN, apache.HTTP_NOT_FOUND
 
 def stream_restricted_icon(req):
     """Return the content of the "Restricted Icon" file."""
     stream_file(req, '%s/img/restricted.gif' % CFG_WEBDIR)
     raise apache.SERVER_RETURN, apache.DONE
 
 def list_types_from_array(bibdocs):
     """Retrieves the list of types from the given bibdoc list."""
     types = []
     for bibdoc in bibdocs:
         if not bibdoc.get_type() in types:
             types.append(bibdoc.get_type())
     return types
 
 def list_versions_from_array(docfiles):
     """Retrieve the list of existing versions from the given docfiles list."""
     versions = []
     for docfile in docfiles:
         if not docfile.get_version() in versions:
             versions.append(docfile.get_version())
     return versions
 
 def order_files_with_version(docfile1, docfile2):
     """order docfile objects according to their version"""
     version1 = docfile1.get_version()
     version2 = docfile2.get_version()
     return cmp(version2, version1)
 
 def _make_base_dir(docid):
     """Given a docid it returns the complete path that should host its files."""
     group = "g" + str(int(int(docid) / CFG_WEBSUBMIT_FILESYSTEM_BIBDOC_GROUP_LIMIT))
     return os.path.join(CFG_WEBSUBMIT_FILEDIR, group, str(docid))
 
 
 class Md5Folder:
     """Manage all the Md5 checksum about a folder"""
     def __init__(self, folder):
         """Initialize the class from the md5 checksum of a given path"""
         self.folder = folder
         try:
             self.load()
         except InvenioWebSubmitFileError:
             self.md5s = {}
             self.update()
 
     def update(self, only_new = True):
         """Update the .md5 file with the current files. If only_new
         is specified then only not already calculated file are calculated."""
         if not only_new:
             self.md5s = {}
         if os.path.exists(self.folder):
             for filename in os.listdir(self.folder):
                 if filename not in self.md5s and not filename.startswith('.'):
                     self.md5s[filename] = calculate_md5(os.path.join(self.folder, filename))
         self.store()
 
     def store(self):
         """Store the current md5 dictionary into .md5"""
         try:
             old_umask = os.umask(022)
             md5file = open(os.path.join(self.folder, ".md5"), "w")
             for key, value in self.md5s.items():
                 md5file.write('%s *%s\n' % (value, key))
             md5file.close()
             os.umask(old_umask)
         except Exception, e:
             register_exception()
             raise InvenioWebSubmitFileError, "Encountered an exception while storing .md5 for folder '%s': '%s'" % (self.folder, e)
 
     def load(self):
         """Load .md5 into the md5 dictionary"""
         self.md5s = {}
         try:
             md5file = open(os.path.join(self.folder, ".md5"), "r")
             for row in md5file:
                 md5hash = row[:32]
                 filename = row[34:].strip()
                 self.md5s[filename] = md5hash
             md5file.close()
         except IOError:
             self.update()
         except Exception, e:
             register_exception()
             raise InvenioWebSubmitFileError, "Encountered an exception while loading .md5 for folder '%s': '%s'" % (self.folder, e)
 
     def check(self, filename = ''):
         """Check the specified file or all the files for which it exists a hash
         for being coherent with the stored hash."""
         if filename and filename in self.md5s.keys():
             try:
                 return self.md5s[filename] == calculate_md5(os.path.join(self.folder, filename))
             except Exception, e:
                 register_exception()
                 raise InvenioWebSubmitFileError, "Encountered an exception while loading '%s': '%s'" % (os.path.join(self.folder, filename), e)
         else:
             for filename, md5hash in self.md5s.items():
                 try:
                     if calculate_md5(os.path.join(self.folder, filename)) != md5hash:
                         return False
                 except Exception, e:
                     register_exception()
                     raise InvenioWebSubmitFileError, "Encountered an exception while loading '%s': '%s'" % (os.path.join(self.folder, filename), e)
             return True
 
     def get_checksum(self, filename):
         """Return the checksum of a physical file."""
         md5hash = self.md5s.get(filename, None)
         if md5hash is None:
             self.update()
         # Now it should not fail!
         md5hash = self.md5s[filename]
         return md5hash
 
 def calculate_md5_external(filename):
     """Calculate the md5 of a physical file through md5sum Command Line Tool.
     This is suitable for file larger than 256Kb."""
     try:
         md5_result = os.popen(CFG_PATH_MD5SUM + ' -b %s' % escape_shell_arg(filename))
         ret = md5_result.read()[:32]
         md5_result.close()
         if len(ret) != 32:
             # Error in running md5sum. Let's fallback to internal
             # algorithm.
             return calculate_md5(filename, force_internal=True)
         else:
             return ret
     except Exception, e:
         raise InvenioWebSubmitFileError, "Encountered an exception while calculating md5 for file '%s': '%s'" % (filename, e)
 
 def calculate_md5(filename, force_internal=False):
     """Calculate the md5 of a physical file. This is suitable for files smaller
     than 256Kb."""
     if not CFG_PATH_MD5SUM or force_internal or os.path.getsize(filename) < CFG_BIBDOCFILE_MD5_THRESHOLD:
         try:
             to_be_read = open(filename, "rb")
             computed_md5 = md5.new()
             while True:
                 buf = to_be_read.read(CFG_BIBDOCFILE_MD5_BUFFER)
                 if buf:
                     computed_md5.update(buf)
                 else:
                     break
             to_be_read.close()
             return computed_md5.hexdigest()
         except Exception, e:
             register_exception()
             raise InvenioWebSubmitFileError, "Encountered an exception while calculating md5 for file '%s': '%s'" % (filename, e)
     else:
         return calculate_md5_external(filename)
 
 
 def bibdocfile_url_to_bibrecdocs(url):
     """Given an URL in the form CFG_SITE_[SECURE_]URL/record/xxx/files/... it returns
     a BibRecDocs object for the corresponding recid."""
 
     recid = decompose_bibdocfile_url(url)[0]
     return BibRecDocs(recid)
 
 def bibdocfile_url_to_bibdoc(url):
     """Given an URL in the form CFG_SITE_[SECURE_]URL/record/xxx/files/... it returns
     a BibDoc object for the corresponding recid/docname."""
 
     docname = decompose_bibdocfile_url(url)[1]
     return bibdocfile_url_to_bibrecdocs(url).get_bibdoc(docname)
 
 def bibdocfile_url_to_bibdocfile(url):
     """Given an URL in the form CFG_SITE_[SECURE_]URL/record/xxx/files/... it returns
     a BibDocFile object for the corresponding recid/docname/format."""
     dummy, dummy, format = decompose_bibdocfile_url(url)
     return bibdocfile_url_to_bibdoc(url).get_file(format)
 
 def bibdocfile_url_to_fullpath(url):
     """Given an URL in the form CFG_SITE_[SECURE_]URL/record/xxx/files/... it returns
     the fullpath for the corresponding recid/docname/format."""
 
     return bibdocfile_url_to_bibdocfile(url).get_full_path()
 
 def bibdocfile_url_p(url):
     """Return True when the url is a potential valid url pointing to a
     fulltext owned by a system."""
     if url.startswith('%s/getfile.py' % CFG_SITE_URL) or url.startswith('%s/getfile.py' % CFG_SITE_SECURE_URL):
         return True
     if not (url.startswith('%s/record/' % CFG_SITE_URL) or url.startswith('%s/record/' % CFG_SITE_SECURE_URL)):
         return False
     splitted_url = url.split('/files/')
     return len(splitted_url) == 2 and splitted_url[0] != '' and splitted_url[1] != ''
 
 def decompose_bibdocfile_url(url):
     """Given a bibdocfile_url return a triple (recid, docname, format)."""
     if url.startswith('%s/getfile.py' % CFG_SITE_URL) or url.startswith('%s/getfile.py' % CFG_SITE_SECURE_URL):
         return decompose_bibdocfile_very_old_url(url)
     if url.startswith('%s/record/' % CFG_SITE_URL):
         recid_file = url[len('%s/record/' % CFG_SITE_URL):]
     elif url.startswith('%s/record/' % CFG_SITE_SECURE_URL):
         recid_file = url[len('%s/record/' % CFG_SITE_SECURE_URL):]
     else:
         raise InvenioWebSubmitFileError, "Url %s doesn't correspond to a valid record inside the system." % url
     recid_file = recid_file.replace('/files/', '/')
     recid, docname, format = decompose_file(urllib.unquote(recid_file))
     return (int(recid), docname, format)
 
 re_bibdocfile_old_url = re.compile(r'/record/(\d*)/files/')
 def decompose_bibdocfile_old_url(url):
     """Given a bibdocfile old url (e.g. CFG_SITE_URL/record/123/files)
     it returns the recid."""
     g = re_bibdocfile_old_url.search(url)
     if g:
         return int(g.group(1))
     raise InvenioWebSubmitFileError('%s is not a valid old bibdocfile url' % url)
 
 def decompose_bibdocfile_very_old_url(url):
     """Decompose an old /getfile.py? URL"""
     if url.startswith('%s/getfile.py' % CFG_SITE_URL) or url.startswith('%s/getfile.py' % CFG_SITE_SECURE_URL):
         params = urllib.splitquery(url)[1]
         if params:
             try:
                 params = cgi.parse_qs(params)
                 if 'docid' in params:
                     docid = int(params['docid'][0])
                     bibdoc = BibDoc(docid)
                     recid = bibdoc.get_recid()
                     docname = bibdoc.get_docname()
                 elif 'recid' in params:
                     recid = int(params['recid'][0])
                     if 'name' in params:
                         docname = params['name'][0]
                     else:
                         docname = ''
                 else:
                     raise InvenioWebSubmitFileError('%s has not enough params to correspond to a bibdocfile.' % url)
                 format = normalize_format(params.get('format', [''])[0])
                 return (recid, docname, format)
             except Exception, e:
                 raise InvenioWebSubmitFileError('Problem with %s: %s' % (url, e))
         else:
             raise InvenioWebSubmitFileError('%s has no params to correspond to a bibdocfile.' % url)
     else:
         raise InvenioWebSubmitFileError('%s is not a valid very old bibdocfile url' % url)
 
 
 
 def nice_size(size):
     """Return a nicely printed size in kilo."""
     unit = 'B'
     if size > 1024:
         size /= 1024.0
         unit = 'KB'
         if size > 1024:
             size /= 1024.0
             unit = 'MB'
             if size > 1024:
                 size /= 1024.0
                 unit = 'GB'
     return '%s %s' % (websearch_templates.tmpl_nice_number(size, max_ndigits_after_dot=2), unit)
 
 def get_docname_from_url(url):
     """Return a potential docname given a url"""
     path = urllib2.urlparse.urlsplit(urllib.unquote(url))[2]
     filename = os.path.split(path)[-1]
     return file_strip_ext(filename)
 
 def get_format_from_url(url):
     """Return a potential format given a url"""
     path = urllib2.urlparse.urlsplit(urllib.unquote(url))[2]
     filename = os.path.split(path)[-1]
     return filename[len(file_strip_ext(filename)):]
 
 def clean_url(url):
     """Given a local url e.g. a local path it render it a realpath."""
     protocol = urllib2.urlparse.urlsplit(url)[0]
     if protocol in ('', 'file'):
         path = urllib2.urlparse.urlsplit(urllib.unquote(url))[2]
         return os.path.realpath(path)
     else:
         return url
 
 def check_valid_url(url):
     """Check for validity of a url or a file."""
     try:
         protocol = urllib2.urlparse.urlsplit(url)[0]
         if protocol in ('', 'file'):
             path = urllib2.urlparse.urlsplit(urllib.unquote(url))[2]
             if os.path.realpath(path) != path:
                 raise StandardError, "%s is not a normalized path (would be %s)." % (path, os.path.normpath(path))
             for allowed_path in CFG_BIBUPLOAD_FFT_ALLOWED_LOCAL_PATHS + [CFG_TMPDIR, CFG_WEBSUBMIT_STORAGEDIR]:
                 if path.startswith(allowed_path):
                     dummy_fd = open(path)
                     dummy_fd.close()
                     return
             raise StandardError, "%s is not in one of the allowed paths." % path
         else:
             urllib2.urlopen(url)
     except Exception, e:
         raise StandardError, "%s is not a correct url: %s" % (url, e)
 
 def safe_mkstemp(suffix):
     """Create a temporary filename that don't have any '.' inside a part
     from the suffix."""
     tmpfd, tmppath = tempfile.mkstemp(suffix=suffix, dir=CFG_TMPDIR)
     if '.' not in suffix:
         # Just in case format is empty
         return tmpfd, tmppath
     while '.' in os.path.basename(tmppath)[:-len(suffix)]:
         os.close(tmpfd)
         os.remove(tmppath)
         tmpfd, tmppath = tempfile.mkstemp(suffix=suffix, dir=CFG_TMPDIR)
     return (tmpfd, tmppath)
 
 def download_url(url, format, user=None, password=None, sleep=2):
     """Download a url (if it corresponds to a remote file) and return a local url
     to it."""
     class my_fancy_url_opener(urllib.FancyURLopener):
         def __init__(self, user, password):
             urllib.FancyURLopener.__init__(self)
             self.fancy_user = user
             self.fancy_password = password
 
         def prompt_user_passwd(self, host, realm):
             return (self.fancy_user, self.fancy_password)
 
     format = normalize_format(format)
     protocol = urllib2.urlparse.urlsplit(url)[0]
     tmpfd, tmppath = safe_mkstemp(format)
     try:
         try:
             if protocol in ('', 'file'):
                 path = urllib2.urlparse.urlsplit(urllib.unquote(url))[2]
                 if os.path.realpath(path) != path:
                     raise StandardError, "%s is not a normalized path (would be %s)." % (path, os.path.normpath(path))
                 for allowed_path in CFG_BIBUPLOAD_FFT_ALLOWED_LOCAL_PATHS + [CFG_TMPDIR, CFG_WEBSUBMIT_STORAGEDIR]:
                     if path.startswith(allowed_path):
                         shutil.copy(path, tmppath)
                         if os.path.getsize(tmppath) > 0:
                             return tmppath
                         else:
                             raise StandardError, "%s seems to be empty" % url
                 raise StandardError, "%s is not in one of the allowed paths." % path
             else:
                 if user is not None:
                     urlopener = my_fancy_url_opener(user, password)
                     urlopener.retrieve(url, tmppath)
                 else:
                     urllib.urlretrieve(url, tmppath)
                 #cmd_exit_code, cmd_out, cmd_err = run_shell_command(CFG_PATH_WGET + ' %s -O %s -t 2 -T 40' % \
                                                                     #(escape_shell_arg(url), escape_shell_arg(tmppath)))
                 #if cmd_exit_code:
                     #raise StandardError, "It's impossible to download %s: %s" % (url, cmd_err)
                 if os.path.getsize(tmppath) > 0:
                     return tmppath
                 else:
                     raise StandardError, "%s seems to be empty" % url
         except:
             os.remove(tmppath)
             raise
     finally:
         os.close(tmpfd)
 
 class BibDocMoreInfo:
     """Class to wrap the serialized bibdoc more_info. At the moment
     it stores descriptions and comments for each BibDoc."""
     def __init__(self, docid, more_info=None):
         try:
             assert(type(docid) in (long, int) and docid > 0)
             self.docid = docid
             try:
                 if more_info is None:
                     res = run_sql('SELECT more_info FROM bibdoc WHERE id=%s', (docid, ))
                     if res and res[0][0]:
                         self.more_info = cPickle.loads(blob_to_string(res[0][0]))
                     else:
                         self.more_info = {}
                 else:
                     self.more_info = cPickle.loads(more_info)
             except:
                 self.more_info = {}
             if 'descriptions' not in self.more_info:
                 self.more_info['descriptions'] = {}
             if 'comments' not in self.more_info:
                 self.more_info['comments'] = {}
             if 'hidden' not in self.more_info:
                 self.more_info['hidden'] = {}
         except:
             register_exception()
             raise
 
     def flush(self):
         """if __dirty is True reserialize di DB."""
         run_sql('UPDATE bibdoc SET more_info=%s WHERE id=%s', (cPickle.dumps(self.more_info), self.docid))
 
     def get_comment(self, format, version):
         """Return the comment corresponding to the given docid/format/version."""
         try:
             assert(type(version) is int)
             format = normalize_format(format)
             return self.more_info['comments'].get(version, {}).get(format)
         except:
             register_exception()
             raise
 
     def get_description(self, format, version):
         """Return the description corresponding to the given docid/format/version."""
         try:
             assert(type(version) is int)
             format = normalize_format(format)
             return self.more_info['descriptions'].get(version, {}).get(format)
         except:
             register_exception()
             raise
 
     def hidden_p(self, format, version):
         """Is the format/version hidden?"""
         try:
             assert(type(version) is int)
             format = normalize_format(format)
             return self.more_info['hidden'].get(version, {}).get(format, False)
         except:
             register_exception()
             raise
 
     def set_comment(self, comment, format, version):
         """Store a comment corresponding to the given docid/format/version."""
         try:
             assert(type(version) is int and version > 0)
             format = normalize_format(format)
             if comment == KEEP_OLD_VALUE:
                 comment = self.get_comment(format, version) or self.get_comment(format, version - 1)
             if not comment:
                 self.unset_comment(format, version)
                 self.flush()
                 return
             if not version in self.more_info['comments']:
                 self.more_info['comments'][version] = {}
             self.more_info['comments'][version][format] = comment
             self.flush()
         except:
             register_exception()
             raise
 
     def set_description(self, description, format, version):
         """Store a description corresponding to the given docid/format/version."""
         try:
             assert(type(version) is int and version > 0)
             format = normalize_format(format)
             if description == KEEP_OLD_VALUE:
                 description = self.get_description(format, version) or self.get_description(format, version - 1)
             if not description:
                 self.unset_description(format, version)
                 self.flush()
                 return
             if not version in self.more_info['descriptions']:
                 self.more_info['descriptions'][version] = {}
             self.more_info['descriptions'][version][format] = description
             self.flush()
         except:
             register_exception()
             raise
 
     def set_hidden(self, hidden, format, version):
         """Store wethever the docid/format/version is hidden."""
         try:
             assert(type(version) is int and version > 0)
             format = normalize_format(format)
             if not hidden:
                 self.unset_hidden(format, version)
                 self.flush()
                 return
             if not version in self.more_info['hidden']:
                 self.more_info['hidden'][version] = {}
             self.more_info['hidden'][version][format] = hidden
             self.flush()
         except:
             register_exception()
             raise
 
     def unset_comment(self, format, version):
         """Remove a comment."""
         try:
             assert(type(version) is int and version > 0)
             del self.more_info['comments'][version][format]
             self.flush()
         except KeyError:
             pass
         except:
             register_exception()
             raise
 
     def unset_description(self, format, version):
         """Remove a description."""
         try:
             assert(type(version) is int and version > 0)
             del self.more_info['descriptions'][version][format]
             self.flush()
         except KeyError:
             pass
         except:
             register_exception()
             raise
 
     def unset_hidden(self, format, version):
         """Remove hidden flag."""
         try:
             assert(type(version) is int and version > 0)
             del self.more_info['hidden'][version][format]
             self.flush()
         except KeyError:
             pass
         except:
             register_exception()
             raise
 
     def serialize(self):
         """Return the serialized version of the more_info."""
         return cPickle.dumps(self.more_info)
 
 def readfile(filename):
     """Try to read a file. Return '' in case of any error.
     This function is useful for quick implementation of websubmit functions.
     """
     try:
         fd = open(filename)
         content = fd.read()
         fd.close()
         return content
     except:
         return ''