diff --git a/Makefile.am b/Makefile.am
index 121fb75bc..005f28b0a 100644
--- a/Makefile.am
+++ b/Makefile.am
@@ -1,559 +1,561 @@
 ## This file is part of Invenio.
 ## Copyright (C) 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011 CERN.
 ##
 ## Invenio is free software; you can redistribute it and/or
 ## modify it under the terms of the GNU General Public License as
 ## published by the Free Software Foundation; either version 2 of the
 ## License, or (at your option) any later version.
 ##
 ## Invenio is distributed in the hope that it will be useful, but
 ## WITHOUT ANY WARRANTY; without even the implied warranty of
 ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 ## General Public License for more details.
 ##
 ## You should have received a copy of the GNU General Public License
 ## along with Invenio; if not, write to the Free Software Foundation, Inc.,
 ## 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA.
 
 confignicedir = $(sysconfdir)/build
 confignice_SCRIPTS=config.nice
 
 SUBDIRS = po config modules
 
 EXTRA_DIST = UNINSTALL THANKS RELEASE-NOTES configure-tests.py config.nice.in \
              config.rpath
 
 
 # current MathJax version and packages
 # See also modules/miscutil/lib/htmlutils.py (get_mathjax_header)
 MJV = 1.1
 MATHJAX = https://github.com/mathjax/MathJax/zipball/v$(MJV)
 
 # current CKeditor version
 CKV = 3.6.1
 CKEDITOR = ckeditor_$(CKV).zip
 
 # current MediaElement.js version
 MEV = master
 MEDIAELEMENT = http://github.com/johndyer/mediaelement/zipball/$(MEV)
 
 # git-version-get stuff:
 BUILT_SOURCES = $(top_srcdir)/.version
 $(top_srcdir)/.version:
 	echo $(VERSION) > $@-t && mv $@-t $@
 dist-hook:
 	echo $(VERSION) > $(distdir)/.tarball-version
 
 check-custom-templates:
 	$(PYTHON) $(top_srcdir)/modules/webstyle/lib/template.py --check-custom-templates $(top_srcdir)
 
 kwalitee-check:
 	@$(PYTHON) $(top_srcdir)/modules/miscutil/lib/kwalitee.py --stats $(top_srcdir)
 
 kwalitee-check-errors-only:
 	@$(PYTHON) $(top_srcdir)/modules/miscutil/lib/kwalitee.py --check-errors $(top_srcdir)
 
 kwalitee-check-variables:
 	@$(PYTHON) $(top_srcdir)/modules/miscutil/lib/kwalitee.py --check-variables $(top_srcdir)
 
 kwalitee-check-indentation:
 	@$(PYTHON) $(top_srcdir)/modules/miscutil/lib/kwalitee.py --check-indentation $(top_srcdir)
 
 kwalitee-check-sql-queries:
 	@$(PYTHON) $(top_srcdir)/modules/miscutil/lib/kwalitee.py --check-sql $(top_srcdir)
 
 etags:
 	\rm -f $(top_srcdir)/TAGS
 	(cd $(top_srcdir) && find $(top_srcdir) -name "*.py" -print | xargs etags)
 
 install-data-local:
 	for d in / /cache /log /tmp /tmp-shared /data /run ; do	\
 		mkdir -p $(localstatedir)$$d ;		\
 	done
 	@echo "************************************************************"
 	@echo "** Invenio software has been successfully installed!      **"
 	@echo "**                                                        **"
 	@echo "** You may proceed to customizing your installation now.  **"
 	@echo "************************************************************"
 
 install-mathjax-plugin:
 	@echo "***********************************************************"
 	@echo "** Installing MathJax plugin, please wait...             **"
 	@echo "***********************************************************"
 	rm -rf /tmp/invenio-mathjax-plugin
 	mkdir /tmp/invenio-mathjax-plugin
 	mkdir -p ${prefix}/var/www/MathJax
 	(cd /tmp/invenio-mathjax-plugin && \
 	wget '$(MATHJAX)' -O mathjax.zip --no-check-certificate && \
 	unzip -q mathjax.zip && cd mathjax-MathJax-* && cp -ur * \
 	${prefix}/var/www/MathJax)
 	rm -fr /tmp/invenio-mathjax-plugin
 	@echo "************************************************************"
 	@echo "** The MathJax plugin was successfully installed.         **"
 	@echo "** Please do not forget to properly set the option        **"
 	@echo "** CFG_WEBSEARCH_USE_MATHJAX_FOR_FORMATS in invenio.conf. **"
 	@echo "************************************************************"
 
 uninstall-mathjax-plugin:
 	@rm -rvf ${prefix}/var/www/MathJax
 	@echo "***********************************************************"
 	@echo "** The MathJax plugin was successfully uninstalled.      **"
 	@echo "***********************************************************"
 
 install-jscalendar-plugin:
 	@echo "***********************************************************"
 	@echo "** Installing jsCalendar plugin, please wait...          **"
 	@echo "***********************************************************"
 	rm -rf /tmp/invenio-jscalendar-plugin
 	mkdir /tmp/invenio-jscalendar-plugin
 	(cd /tmp/invenio-jscalendar-plugin && \
 	wget 'http://www.dynarch.com/static/jscalendar-1.0.zip' && \
 	unzip -u jscalendar-1.0.zip && \
 	mkdir -p ${prefix}/var/www/jsCalendar && \
 	cp jscalendar-1.0/img.gif ${prefix}/var/www/jsCalendar/jsCalendar.gif && \
 	cp jscalendar-1.0/calendar.js ${prefix}/var/www/jsCalendar/ && \
 	cp jscalendar-1.0/calendar-setup.js ${prefix}/var/www/jsCalendar/ && \
 	cp jscalendar-1.0/lang/calendar-en.js ${prefix}/var/www/jsCalendar/ && \
 	cp jscalendar-1.0/calendar-blue.css ${prefix}/var/www/jsCalendar/)
 	rm -fr /tmp/invenio-jscalendar-plugin
 	@echo "***********************************************************"
 	@echo "** The jsCalendar plugin was successfully installed.     **"
 	@echo "***********************************************************"
 
 uninstall-jscalendar-plugin:
 	@rm -rvf ${prefix}/var/www/jsCalendar
 	@echo "***********************************************************"
 	@echo "** The jsCalendar plugin was successfully uninstalled.   **"
 	@echo "***********************************************************"
 
 install-jquery-plugins: install-jquery-plugins
 	@echo "***********************************************************"
 	@echo "** Installing various jQuery plugins, please wait...     **"
 	@echo "***********************************************************"
 	mkdir -p ${prefix}/var/www/js
 	mkdir -p $(prefix)/var/www/css
 	(cd ${prefix}/var/www/js && \
 	wget http://jqueryjs.googlecode.com/files/jquery-1.3.1.min.js && \
 	mv jquery-1.3.1.min.js jquery.min.js && \
 	wget http://jquery-ui.googlecode.com/svn/tags/1.8.10/jquery-1.4.4.js && \
 	wget http://jquery-ui.googlecode.com/svn/tags/1.8.10/ui/minified/jquery.effects.core.min.js && \
 	wget http://jquery-ui.googlecode.com/svn/tags/1.8.10/ui/minified/jquery.effects.highlight.min.js && \
 	wget http://jquery-ui.googlecode.com/svn/tags/1.8.10/ui/minified/jquery.ui.widget.min.js && \
 	wget http://jquery-ui.googlecode.com/svn/tags/1.8.10/ui/minified/jquery.ui.tabs.min.js && \
 	wget http://jquery-ui.googlecode.com/svn/tags/1.7.3/ui/minified/ui.slider.min.js && \
 	wget http://jquery-ui.googlecode.com/svn/tags/1.7.3/ui/minified/ui.sortable.min.js && \
 	wget http://jquery-ui.googlecode.com/svn/tags/1.8.10/ui/minified/jquery.ui.button.min.js && \
 	wget http://jquery-ui.googlecode.com/svn/tags/1.8.10/ui/minified/jquery.ui.dialog.min.js && \
 	wget http://jquery-ui.googlecode.com/svn/tags/1.8.10/ui/minified/jquery.ui.mouse.min.js && \
 	wget http://jquery-ui.googlecode.com/svn/tags/1.8.10/ui/minified/jquery.ui.draggable.min.js && \
 	wget http://jquery-ui.googlecode.com/svn/tags/1.8.10/ui/minified/jquery.ui.position.min.js && \
 	wget http://jquery-ui.googlecode.com/svn/tags/1.8.10/ui/minified/jquery.ui.resizable.min.js && \
 	wget http://ajax.googleapis.com/ajax/libs/jqueryui/1.8.11/jquery-ui.min.js && \
 	wget http://www.appelsiini.net/download/jquery.jeditable.mini.js && \
 	wget http://github.com/malsup/form/raw/master/jquery.form.js --no-check-certificate && \
 	wget http://jquery-multifile-plugin.googlecode.com/svn/trunk/jquery.MultiFile.pack.js && \
 	wget http://autobahn.tablesorter.com/jquery.tablesorter.zip && \
 	wget http://www.uploadify.com/wp-content/uploads/uploadify-v2.1.4.zip -O uploadify.zip && \
 	wget http://www.datatables.net/download/build/jquery.dataTables.min.js && \
 	wget http://keith-wood.name/zip/jquery.bookmark.package-1.4.0.zip && \
 	unzip jquery.tablesorter.zip && \
 	rm jquery.tablesorter.zip && \
 	rm -rf uploadify && \
 	unzip -u uploadify.zip -d uploadify && \
 	wget http://flot.googlecode.com/files/flot-0.6.zip && \
 	wget http://trentrichardson.com/examples/timepicker/js/jquery-ui-timepicker-addon.js && \
 	unzip -u flot-0.6.zip && \
 	mv flot/jquery.flot.selection.min.js flot/jquery.flot.min.js flot/excanvas.min.js ./ && \
 	rm flot-0.6.zip && rm -r flot && \
 	mv uploadify/swfobject.js ./ && \
 	mv uploadify/cancel.png uploadify/uploadify.css uploadify/uploadify.allglyphs.swf uploadify/uploadify.fla uploadify/uploadify.swf ../img/ && \
 	mv uploadify/jquery.uploadify.v2.1.4.min.js ./jquery.uploadify.min.js && \
 	rm uploadify.zip && rm -r uploadify && \
 	wget --no-check-certificate https://github.com/douglascrockford/JSON-js/raw/master/json2.js && \
 	wget http://jquery-ui.googlecode.com/svn/tags/1.7.3/ui/minified/ui.datepicker.min.js && \
 	wget -O jquery.hotkeys.min.js http://js-hotkeys.googlecode.com/files/jquery.hotkeys-0.7.8-packed.js && \
 	wget http://jquery.bassistance.de/treeview/jquery.treeview.zip && \
 	unzip jquery.treeview.zip -d jquery-treeview && \
 	rm jquery.treeview.zip && \
 	wget http://invenio-software.org/download/jquery/v1.5/js/jquery.ajaxPager.js && \
 	wget http://jqueryui.com/download/jquery-ui-1.7.3.custom.zip && \
 	unzip jquery-ui-1.7.3.custom.zip development-bundle/ui/ui.core.js js/jquery-ui-1.7.3.custom.min.js && \
 	mv development-bundle/ui/ui.core.js ui.core.js && \
 	mv js/jquery-ui-1.7.3.custom.min.js jquery-ui-1.7.3.custom.min.js && \
 	rm -rf development-bundle && \
 	rm jquery-ui-1.7.3.custom.zip && \
 	unzip jquery.bookmark.package-1.4.0.zip && \
 	rm -f jquery.bookmark.ext.* bookmarks-big.png bookmarkBasic.html jquery.bookmark.js jquery.bookmark.pack.js && \
 	mv bookmarks.png ../img/ && \
 	mv jquery.bookmark.css ../css/ && \
 	rm -f jquery.bookmark.package-1.4.0.zip && \
 	mkdir -p ${prefix}/var/www/img && \
 	cd ${prefix}/var/www/img && \
 	wget -r -np -nH --cut-dirs=4 -A "png,css" -P jquery-ui/themes http://jquery-ui.googlecode.com/svn/tags/1.7.3/themes/base/ && \
 	wget -r -np -nH --cut-dirs=4 -A "png,css" -P jquery-ui/themes http://jquery-ui.googlecode.com/svn/tags/1.7.3/themes/smoothness/ && \
 	wget -r -np -nH --cut-dirs=4 -A "png,css" -P jquery-ui/themes http://jquery-ui.googlecode.com/svn/tags/1.7.3/themes/redmond/ && \
 	wget --no-check-certificate -O datatables_jquery-ui.css https://github.com/DataTables/DataTables/raw/master/media/css/demo_table_jui.css && \
 	wget http://jquery-ui.googlecode.com/svn/tags/1.7.3/themes/redmond/jquery-ui.css && \
 	wget http://jquery-ui.googlecode.com/svn/tags/1.7.3/demos/images/calendar.gif && \
 	wget -r -np -nH --cut-dirs=5 -A "png" http://jquery-ui.googlecode.com/svn/tags/1.7.3/themes/redmond/images/)
 	@echo "***********************************************************"
 	@echo "** The jQuery plugins were successfully installed.       **"
 	@echo "***********************************************************"
 
 uninstall-jquery-plugins:
 	(cd ${prefix}/var/www/js && \
 	rm -f jquery.min.js && \
 	rm -f jquery.effects.core.min.js && \
 	rm -f jquery.effects.highlight.min.js && \
 	rm -f jquery.MultiFile.pack.js && \
 	rm -f jquery.jeditable.mini.js && \
 	rm -f jquery.flot.selection.min.js && \
 	rm -f jquery.flot.min.js && \
 	rm -f excanvas.min.js && \
 	rm -f jquery-ui-timepicker-addon.min.js && \
 	rm -f jquery.tablesorter.js && \
 	rm -f jquery.tablesorter.pager.js && \
 	rm -f ui.datepicker.min.js && \
 	rm -f json2.js && \
 	rm -f jquery.uploadify.min.js && \
 	rm -f ui.slider.min.js && \
 	rm -f ui.sortable.min.js && \
 	rm -f jquery.ui.tabs.min.js && \
 	rm -rf tablesorter && \
 	rm -f jquery.hotkeys.min.js && \
 	rm -rf jquery-treeview && \
 	rm -f jquery.ajaxPager.js && \
 	rm -f jquery.form.js && \
 	rm -f jquery.dataTables.min.js && \
 	rm -f ui.core.js && \
 	rm -f jquery.bookmark.min.js && \
         rm -f jquery-ui.min.js)
 	(cd ${prefix}/var/www/img && \
 	rm -f cancel.png uploadify.css uploadify.swf uploadify.allglyphs.swf uploadify.fla && \
 	rm -f datatables_jquery-ui.css \
 	rm -f bookmarks.png) && \
 	(cd ${prefix}/var/www/css && \
 	rm -f jquery.bookmark.css)
 	@echo "***********************************************************"
 	@echo "** The jquery plugins were successfully uninstalled.     **"
 	@echo "***********************************************************"
 
 install-ckeditor-plugin:
 	@echo "***********************************************************"
 	@echo "** Installing CKeditor plugin, please wait...           **"
 	@echo "***********************************************************"
 	rm -rf ${prefix}/lib/python/invenio/ckeditor/
 	rm -rf /tmp/invenio-ckeditor-plugin
 	mkdir /tmp/invenio-ckeditor-plugin
 	(cd /tmp/invenio-ckeditor-plugin && \
 	wget 'http://download.cksource.com/CKEditor/CKEditor/CKEditor%20$(CKV)/$(CKEDITOR)' && \
 	unzip -u -d ${prefix}/var/www $(CKEDITOR)) && \
 	find ${prefix}/var/www/ckeditor/ -depth -name '_*' -exec rm -rf {} \; && \
 	find ${prefix}/var/www/ckeditor/ckeditor* -maxdepth 0 ! -name "ckeditor.js" -exec rm -r {} \; && \
 	rm -fr /tmp/invenio-ckeditor-plugin
 	@echo "* Installing Invenio-specific CKeditor config..."
 	(cd $(top_srcdir)/modules/webstyle/etc && make install)
 	@echo "***********************************************************"
 	@echo "** The CKeditor plugin was successfully installed.      **"
 	@echo "** Please do not forget to properly set the option       **"
 	@echo "** CFG_WEBCOMMENT_USE_RICH_TEXT_EDITOR in invenio.conf.  **"
 	@echo "***********************************************************"
 
 uninstall-ckeditor-plugin:
 	@rm -rvf ${prefix}/var/www/ckeditor
 	@rm -rvf ${prefix}/lib/python/invenio/ckeditor
 	@echo "***********************************************************"
 	@echo "** The CKeditor plugin was successfully uninstalled.    **"
 	@echo "***********************************************************"
 
 install-pdfa-helper-files:
 	@echo "***********************************************************"
 	@echo "** Installing PDF/A helper files, please wait...         **"
 	@echo "***********************************************************"
 	wget 'http://invenio-software.org/download/invenio-demo-site-files/ISOCoatedsb.icc' -O ${prefix}/etc/websubmit/file_converter_templates/ISOCoatedsb.icc
 	@echo "***********************************************************"
 	@echo "** The PDF/A helper files were successfully installed.   **"
 	@echo "***********************************************************"
 
 install-mediaelement:
 	@echo "***********************************************************"
 	@echo "** MediaElement.js, please wait...                       **"
 	@echo "***********************************************************"
 	rm -rf /tmp/mediaelement
 	mkdir /tmp/mediaelement
 	wget 'http://github.com/johndyer/mediaelement/zipball/master' -O '/tmp/mediaelement/mediaelement.zip' --no-check-certificate
 	unzip -u -d '/tmp/mediaelement' '/tmp/mediaelement/mediaelement.zip'
 	rm -rf ${prefix}/var/www/mediaelement
 	mkdir ${prefix}/var/www/mediaelement
 	mv /tmp/mediaelement/johndyer-mediaelement-*/build/* ${prefix}/var/www/mediaelement
 	rm -rf /tmp/mediaelement
 	@echo "***********************************************************"
 	@echo "** MediaElement.js was successfully installed.           **"
 	@echo "***********************************************************"
 
 uninstall-pdfa-helper-files:
 	rm -f ${prefix}/etc/websubmit/file_converter_templates/ISOCoatedsb.icc
 	@echo "***********************************************************"
 	@echo "** The PDF/A helper files were successfully uninstalled. **"
 	@echo "***********************************************************"
 
 update-v0.3.0-tables update-v0.3.1-tables:
 	echo "ALTER TABLE idxINDEXNAME CHANGE id_idxINDEX id_idxINDEX mediumint(9) unsigned NOT NULL FIRST;" | ${prefix}/bin/dbexec
 	echo "ALTER TABLE rnkMETHODNAME CHANGE id_rnkMETHOD id_rnkMETHOD mediumint(9) unsigned NOT NULL FIRST;" | ${prefix}/bin/dbexec
 	echo "ALTER TABLE collectionname CHANGE id_collection id_collection mediumint(9) unsigned NOT NULL FIRST;" | ${prefix}/bin/dbexec
 	echo "ALTER TABLE formatname CHANGE id_format id_format mediumint(9) unsigned NOT NULL FIRST;" | ${prefix}/bin/dbexec
 	echo "ALTER TABLE fieldname CHANGE id_field id_field mediumint(9) unsigned NOT NULL FIRST;" | ${prefix}/bin/dbexec
 	echo "INSERT INTO accACTION (id,name,description,allowedkeywords,optional) VALUES (NULL,'runbibrank','run BibRank','','no');" | ${prefix}/bin/dbexec
 	echo "INSERT INTO accACTION (id,name,description,allowedkeywords,optional) VALUES (NULL,'cfgbibrank','configure BibRank','','no');" | ${prefix}/bin/dbexec
 
 update-v0.3.2-tables:
 	echo "ALTER TABLE sbmCOLLECTION_sbmDOCTYPE CHANGE id_son id_son char(10) NOT NULL default '0';" | ${prefix}/bin/dbexec
 
 update-v0.3.3-tables:
 	${prefix}/bin/dbexec < $(top_srcdir)/modules/miscutil/sql/tabcreate.sql
 	echo "ALTER TABLE flxLINKTYPEPARAMS CHANGE pname pname varchar(78) NOT NULL default '';" | ${prefix}/bin/dbexec
 	echo "ALTER TABLE rnkMETHOD DROP star_category_ranges;" | ${prefix}/bin/dbexec
 	echo "DROP TABLE rnkSET;" | ${prefix}/bin/dbexec
 	echo "ALTER TABLE schTASK CHANGE arguments arguments LONGTEXT;" | ${prefix}/bin/dbexec
 	echo "ALTER TABLE schTASK CHANGE status status varchar(50);" | ${prefix}/bin/dbexec
 
 update-v0.5.0-tables:
 	${prefix}/bin/dbexec < $(top_srcdir)/modules/miscutil/sql/tabcreate.sql
 	echo "ALTER TABLE session ADD INDEX uid (uid);" | ${prefix}/bin/dbexec
 	echo "UPDATE idxINDEXNAME SET ln='cs' WHERE ln='cz';" | ${prefix}/bin/dbexec
 	echo "UPDATE rnkMETHODNAME SET ln='cs' WHERE ln='cz';" | ${prefix}/bin/dbexec
 	echo "UPDATE collectionname SET ln='cs' WHERE ln='cz';" | ${prefix}/bin/dbexec
 	echo "UPDATE collection_portalbox SET ln='cs' WHERE ln='cz';" | ${prefix}/bin/dbexec
 	echo "UPDATE formatname SET ln='cs' WHERE ln='cz';" | ${prefix}/bin/dbexec
 	echo "UPDATE fieldname SET ln='cs' WHERE ln='cz';" | ${prefix}/bin/dbexec
 	echo "UPDATE idxINDEXNAME SET ln='sv' WHERE ln='se';" | ${prefix}/bin/dbexec
 	echo "UPDATE rnkMETHODNAME SET ln='sv' WHERE ln='se';" | ${prefix}/bin/dbexec
 	echo "UPDATE collectionname SET ln='sv' WHERE ln='se';" | ${prefix}/bin/dbexec
 	echo "UPDATE collection_portalbox SET ln='sv' WHERE ln='se';" | ${prefix}/bin/dbexec
 	echo "UPDATE formatname SET ln='sv' WHERE ln='se';" | ${prefix}/bin/dbexec
 	echo "UPDATE fieldname SET ln='sv' WHERE ln='se';" | ${prefix}/bin/dbexec
 
 update-v0.7.1-tables:
 	echo "DROP TABLE oaiHARVEST;" | ${prefix}/bin/dbexec
 	${prefix}/bin/dbexec < $(top_srcdir)/modules/miscutil/sql/tabcreate.sql
 	echo "INSERT INTO accACTION (id,name,description,allowedkeywords,optional) VALUES (NULL,'cfgbibharvest','configure BibHarvest','','no');" | ${prefix}/bin/dbexec
 	echo "INSERT INTO accACTION (id,name,description,allowedkeywords,optional) VALUES (NULL,'runoaiharvest','run BibHarvest oaiharvest','','no');" | ${prefix}/bin/dbexec
 	echo "INSERT INTO accACTION (id,name,description,allowedkeywords,optional) VALUES (NULL,'cfgwebcomment','configure WebComment','','no');" | ${prefix}/bin/dbexec
 	echo "INSERT INTO accACTION (id,name,description,allowedkeywords,optional) VALUES (NULL,'runoaiarchive','run BibHarvest oaiarchive','','no');" | ${prefix}/bin/dbexec
 	echo "INSERT INTO accACTION (id,name,description,allowedkeywords,optional) VALUES (NULL,'runbibedit','run BibEdit','','no');" | ${prefix}/bin/dbexec
 	echo "ALTER TABLE user ADD nickname varchar(255) NOT NULL default '';" | ${prefix}/bin/dbexec
 	echo "ALTER TABLE user ADD last_login datetime NOT NULL default '0000-00-00 00:00:00';" | ${prefix}/bin/dbexec
 	echo "ALTER TABLE user ADD INDEX nickname (nickname);" | ${prefix}/bin/dbexec
 	echo "ALTER TABLE sbmFIELD CHANGE subname subname varchar(13) default NULL;" | ${prefix}/bin/dbexec
 	echo "ALTER TABLE user_query_basket CHANGE alert_name alert_name varchar(30) NOT NULL default '';" | ${prefix}/bin/dbexec
 	echo "TRUNCATE TABLE session;" | ${prefix}/bin/dbexec
 	@echo "**********************************************************"
 	@echo "** Do not forget to run the basket migration now:       **"
 	@echo "**    @PYTHON@ modules/webbasket/lib/webbasket_migration_kit.py "
 	@echo "** Please see the RELEASE-NOTES for details.            **"
 	@echo "**********************************************************"
 	@echo "INSERT INTO oaiARCHIVE (id, setName, setSpec, setDescription, setDefinition, setRecList) SELECT id, setName, setSpec, CONCAT_WS('', setDescription), setDefinition, setRecList FROM oaiSET;"
 
 update-v0.90.0-tables:
 	${prefix}/bin/dbexec < $(top_srcdir)/modules/miscutil/sql/tabcreate.sql
 	echo "ALTER TABLE format ADD COLUMN (description varchar(255) default '');" | ${prefix}/bin/dbexec
 	echo "ALTER TABLE format ADD COLUMN (content_type varchar(255) default '');" | ${prefix}/bin/dbexec
 
 update-v0.90.1-tables:
 	${prefix}/bin/dbexec < $(top_srcdir)/modules/miscutil/sql/tabcreate.sql
 	echo "ALTER TABLE schTASK ADD INDEX status (status);" | ${prefix}/bin/dbexec
 	echo "ALTER TABLE schTASK ADD INDEX runtime (runtime);" | ${prefix}/bin/dbexec
 	echo "ALTER TABLE sbmCATEGORIES ADD COLUMN score TINYINT UNSIGNED NOT NULL DEFAULT 0;" | ${prefix}/bin/dbexec
 	echo "ALTER TABLE sbmCATEGORIES ADD PRIMARY KEY (doctype, sname);" | ${prefix}/bin/dbexec
 	echo "ALTER TABLE sbmCATEGORIES ADD KEY doctype (doctype);" | ${prefix}/bin/dbexec
 	echo "ALTER TABLE oaiHARVEST ADD COLUMN setspecs TEXT NOT NULL DEFAULT '';" | ${prefix}/bin/dbexec
 	echo "ALTER TABLE oaiARCHIVE CHANGE setDescription setDescription text NOT NULL default '';" | ${prefix}/bin/dbexec
 	echo "ALTER TABLE oaiARCHIVE CHANGE p1 p1 text NOT NULL default '';" | ${prefix}/bin/dbexec
 	echo "ALTER TABLE oaiARCHIVE CHANGE f1 f1 text NOT NULL default '';" | ${prefix}/bin/dbexec
 	echo "ALTER TABLE oaiARCHIVE CHANGE m1 m1 text NOT NULL default '';" | ${prefix}/bin/dbexec
 	echo "ALTER TABLE oaiARCHIVE CHANGE p2 p2 text NOT NULL default '';" | ${prefix}/bin/dbexec
 	echo "ALTER TABLE oaiARCHIVE CHANGE f2 f2 text NOT NULL default '';" | ${prefix}/bin/dbexec
 	echo "ALTER TABLE oaiARCHIVE CHANGE m2 m2 text NOT NULL default '';" | ${prefix}/bin/dbexec
 	echo "ALTER TABLE oaiARCHIVE CHANGE p3 p3 text NOT NULL default '';" | ${prefix}/bin/dbexec
 	echo "ALTER TABLE oaiARCHIVE CHANGE f3 f3 text NOT NULL default '';" | ${prefix}/bin/dbexec
 	echo "ALTER TABLE oaiARCHIVE CHANGE m3 m3 text NOT NULL default '';" | ${prefix}/bin/dbexec
 	echo "UPDATE bibdoc SET status=0 WHERE status='';" | ${prefix}/bin/dbexec
 	echo "UPDATE bibdoc SET status=1 WHERE status='deleted';" | ${prefix}/bin/dbexec
 	echo "ALTER TABLE fmtKNOWLEDGEBASES add COLUMN kbtype char default NULL;" | ${prefix}/bin/dbexec
 
 update-v0.92.0-tables:
 	echo "UPDATE bibdoc SET status=0 WHERE status='';" | ${prefix}/bin/dbexec
 	echo "UPDATE bibdoc SET status=1 WHERE status='deleted';" | ${prefix}/bin/dbexec
 	echo "ALTER TABLE schTASK CHANGE arguments arguments mediumblob;" | ${prefix}/bin/dbexec
 	echo "UPDATE user SET note=1 WHERE nickname='admin' AND note IS NULL;" | ${prefix}/bin/dbexec
 	echo "ALTER TABLE usergroup CHANGE name name varchar(255) NOT NULL default '';" | ${prefix}/bin/dbexec
 	echo "ALTER TABLE usergroup ADD login_method varchar(255) NOT NULL default 'INTERNAL';" | ${prefix}/bin/dbexec
 	echo "ALTER TABLE usergroup ADD UNIQUE KEY login_method_name (login_method(70), name);" | ${prefix}/bin/dbexec
 	echo "ALTER TABLE user CHANGE settings settings blob default NULL;" | ${prefix}/bin/dbexec
 	echo "INSERT INTO sbmALLFUNCDESCR VALUES ('Get_Recid', 'This function gets the recid for a document with a given report-number (as stored in the global variable rn).');" | ${prefix}/bin/dbexec
 
 update-v0.92.1-tables:
 	echo "DROP TABLE rnkCITATIONDATA;" | ${prefix}/bin/dbexec
 	${prefix}/bin/dbexec < $(top_srcdir)/modules/miscutil/sql/tabcreate.sql
 	echo "UPDATE bibdoc SET status='DELETED' WHERE status='1';" | ${prefix}/bin/dbexec
 	echo "UPDATE bibdoc SET status='' WHERE status='0';" | ${prefix}/bin/dbexec
 	echo "ALTER TABLE bibrec ADD KEY creation_date (creation_date);" | ${prefix}/bin/dbexec
 	echo "ALTER TABLE bibrec ADD KEY modification_date (modification_date);" | ${prefix}/bin/dbexec
 	echo "ALTER TABLE bibdoc ADD KEY creation_date (creation_date);" | ${prefix}/bin/dbexec
 	echo "ALTER TABLE bibdoc ADD KEY modification_date (modification_date);" | ${prefix}/bin/dbexec
 	echo "ALTER TABLE bibdoc ADD KEY docname (docname);" | ${prefix}/bin/dbexec
 	echo "ALTER TABLE oaiHARVEST CHANGE postprocess postprocess varchar(20) NOT NULL default 'h';" | ${prefix}/bin/dbexec
 	echo "ALTER TABLE oaiHARVEST ADD COLUMN bibfilterprogram varchar(255) NOT NULL default '';" | ${prefix}/bin/dbexec
 	echo "ALTER TABLE idxINDEXNAME CHANGE ln ln char(5) NOT NULL default '';" | ${prefix}/bin/dbexec
 	echo "ALTER TABLE idxINDEX ADD COLUMN stemming_language VARCHAR(10) NOT NULL default '';" | ${prefix}/bin/dbexec
 	echo "ALTER TABLE rnkMETHODNAME CHANGE ln ln char(5) NOT NULL default '';" | ${prefix}/bin/dbexec
 	echo "ALTER TABLE rnkDOWNLOADS CHANGE id_bibdoc id_bibdoc mediumint(9) unsigned default NULL;" | ${prefix}/bin/dbexec
 	echo "ALTER TABLE rnkDOWNLOADS CHANGE file_format file_format varchar(10) NULL default NULL;" | ${prefix}/bin/dbexec
 	echo "ALTER TABLE collectionname CHANGE ln ln char(5) NOT NULL default '';" | ${prefix}/bin/dbexec
 	echo "ALTER TABLE collection_portalbox CHANGE ln ln char(5) NOT NULL default '';" | ${prefix}/bin/dbexec
 	echo "ALTER TABLE format ADD COLUMN visibility TINYINT NOT NULL default 1;" | ${prefix}/bin/dbexec
 	echo "ALTER TABLE formatname CHANGE ln ln char(5) NOT NULL default '';" | ${prefix}/bin/dbexec
 	echo "ALTER TABLE fieldname CHANGE ln ln char(5) NOT NULL default '';" | ${prefix}/bin/dbexec
 	echo "ALTER TABLE accROLE ADD COLUMN firerole_def_ser blob NULL;" | ${prefix}/bin/dbexec
 	echo "ALTER TABLE accROLE ADD COLUMN firerole_def_src text NULL;" | ${prefix}/bin/dbexec
 	echo "ALTER TABLE user_accROLE ADD COLUMN expiration datetime NOT NULL default '9999-12-31 23:59:59';" | ${prefix}/bin/dbexec
 	echo "ALTER TABLE user DROP INDEX id, ADD PRIMARY KEY id (id);" | ${prefix}/bin/dbexec
 	echo -e 'from invenio.dbquery import run_sql;\
 	map(lambda index_id: run_sql("ALTER TABLE idxPHRASE%02dF CHANGE term term TEXT NULL DEFAULT NULL, DROP INDEX term, ADD INDEX term (term (50))" % index_id[0]), run_sql("select id from idxINDEX"))' | $(PYTHON)
 	echo "INSERT INTO rnkCITATIONDATA VALUES (1,'citationdict','','');" | ${prefix}/bin/dbexec
 	echo "INSERT INTO rnkCITATIONDATA VALUES (2,'reversedict','','');" | ${prefix}/bin/dbexec
 	echo "INSERT INTO rnkCITATIONDATA VALUES (3,'selfcitdict','','');" | ${prefix}/bin/dbexec
 
 update-v0.99.0-tables:
 	${prefix}/bin/dbexec < $(top_srcdir)/modules/miscutil/sql/tabcreate.sql
 	echo "ALTER TABLE bibdoc ADD COLUMN more_info mediumblob NULL default NULL;" | ${prefix}/bin/dbexec
 	echo "ALTER TABLE schTASK ADD COLUMN priority tinyint(4) NOT NULL default 0;" | ${prefix}/bin/dbexec
 	echo "ALTER TABLE schTASK ADD KEY priority (priority);" | ${prefix}/bin/dbexec
 	echo "ALTER TABLE rnkCITATIONDATA DROP PRIMARY KEY;" | ${prefix}/bin/dbexec
 	echo "ALTER TABLE rnkCITATIONDATA ADD PRIMARY KEY (id);" | ${prefix}/bin/dbexec
 	echo "ALTER TABLE rnkCITATIONDATA CHANGE id id mediumint(8) unsigned NOT NULL auto_increment;" | ${prefix}/bin/dbexec
 	echo "ALTER TABLE rnkCITATIONDATA ADD UNIQUE KEY object_name (object_name);" | ${prefix}/bin/dbexec
 	echo "ALTER TABLE sbmPARAMETERS CHANGE value value text NOT NULL default '';" | ${prefix}/bin/dbexec
 	echo "ALTER TABLE sbmAPPROVAL ADD note text NOT NULL default '';" | ${prefix}/bin/dbexec
 	echo "ALTER TABLE hstDOCUMENT CHANGE docsize docsize bigint(15) unsigned NOT NULL;" | ${prefix}/bin/dbexec
 	echo "ALTER TABLE cmtACTIONHISTORY CHANGE client_host client_host int(10) unsigned default NULL;" | ${prefix}/bin/dbexec
 
 update-v0.99.1-tables:
 	@echo "Nothing to do; table structure did not change between v0.99.1 and v0.99.2."
 
 update-v0.99.2-tables:
 	@echo "Nothing to do; table structure did not change between v0.99.2 and v0.99.3."
 
 update-v0.99.3-tables: # from v0.99.3 to v1.0.0-rc0
 	echo "RENAME TABLE oaiARCHIVE TO oaiREPOSITORY;" | ${prefix}/bin/dbexec
 	${prefix}/bin/dbexec < $(top_srcdir)/modules/miscutil/sql/tabcreate.sql
 	echo "INSERT INTO knwKB (id,name,description,kbtype) SELECT id,name,description,'' FROM fmtKNOWLEDGEBASES;" | ${prefix}/bin/dbexec
 	echo "INSERT INTO knwKBRVAL (id,m_key,m_value,id_knwKB) SELECT id,m_key,m_value,id_fmtKNOWLEDGEBASES FROM fmtKNOWLEDGEBASEMAPPINGS;" | ${prefix}/bin/dbexec
 	echo "ALTER TABLE sbmPARAMETERS CHANGE name name varchar(40) NOT NULL default '';" | ${prefix}/bin/dbexec
 	echo "ALTER TABLE bibdoc CHANGE docname docname varchar(250) COLLATE utf8_bin NOT NULL default 'file';" | ${prefix}/bin/dbexec
 	echo "ALTER TABLE bibdoc CHANGE status status text NOT NULL default '';" | ${prefix}/bin/dbexec
 	echo "ALTER TABLE bibdoc ADD COLUMN text_extraction_date datetime NOT NULL default '0000-00-00';" | ${prefix}/bin/dbexec
 	echo "ALTER TABLE collection DROP COLUMN restricted;" | ${prefix}/bin/dbexec
 	echo "ALTER TABLE schTASK CHANGE host host varchar(255) NOT NULL default '';" | ${prefix}/bin/dbexec
 	echo "ALTER TABLE hstTASK CHANGE host host varchar(255) NOT NULL default '';" | ${prefix}/bin/dbexec
 	echo "ALTER TABLE bib85x DROP INDEX kv, ADD INDEX kv (value(100));" | ${prefix}/bin/dbexec
 	echo "UPDATE clsMETHOD SET location='http://invenio-software.org/download/invenio-demo-site-files/HEP.rdf' WHERE name='HEP' AND location='';" | ${prefix}/bin/dbexec
 	echo "UPDATE clsMETHOD SET location='http://invenio-software.org/download/invenio-demo-site-files/NASA-subjects.rdf' WHERE name='NASA-subjects' AND location='';" | ${prefix}/bin/dbexec
 	echo "UPDATE accACTION SET name='runoairepository', description='run oairepositoryupdater task' WHERE name='runoaiarchive';" | ${prefix}/bin/dbexec
 	echo "UPDATE accACTION SET name='cfgoaiharvest', description='configure OAI Harvest' WHERE name='cfgbibharvest';" | ${prefix}/bin/dbexec
 	echo "ALTER TABLE accARGUMENT CHANGE value value varchar(255);" | ${prefix}/bin/dbexec
 	echo "UPDATE accACTION SET allowedkeywords='doctype,act,categ' WHERE name='submit';" | ${prefix}/bin/dbexec
 	echo "INSERT INTO accARGUMENT(keyword,value) VALUES ('categ','*');" | ${prefix}/bin/dbexec
 	echo "INSERT INTO accROLE_accACTION_accARGUMENT(id_accROLE,id_accACTION,id_accARGUMENT,argumentlistid) SELECT DISTINCT raa.id_accROLE,raa.id_accACTION,accARGUMENT.id,raa.argumentlistid FROM accROLE_accACTION_accARGUMENT as raa JOIN accACTION on id_accACTION=accACTION.id,accARGUMENT WHERE accACTION.name='submit' and accARGUMENT.keyword='categ' and accARGUMENT.value='*';" | ${prefix}/bin/dbexec
 	echo "UPDATE accACTION SET allowedkeywords='name,with_editor_rights' WHERE name='cfgwebjournal';" | ${prefix}/bin/dbexec
 	echo "INSERT INTO accARGUMENT(keyword,value) VALUES ('with_editor_rights','yes');" | ${prefix}/bin/dbexec
 	echo "INSERT INTO accROLE_accACTION_accARGUMENT(id_accROLE,id_accACTION,id_accARGUMENT,argumentlistid) SELECT DISTINCT raa.id_accROLE,raa.id_accACTION,accARGUMENT.id,raa.argumentlistid FROM accROLE_accACTION_accARGUMENT as raa JOIN accACTION on id_accACTION=accACTION.id,accARGUMENT WHERE accACTION.name='cfgwebjournal' and accARGUMENT.keyword='with_editor_rights' and accARGUMENT.value='yes';" | ${prefix}/bin/dbexec
 	echo "ALTER TABLE bskEXTREC CHANGE id id int(15) unsigned NOT NULL auto_increment;" | ${prefix}/bin/dbexec
 	echo "ALTER TABLE bskEXTREC ADD external_id int(15) NOT NULL default '0';" | ${prefix}/bin/dbexec
 	echo "ALTER TABLE bskEXTREC ADD collection_id int(15) unsigned NOT NULL default '0';" | ${prefix}/bin/dbexec
 	echo "ALTER TABLE bskEXTREC ADD original_url text;" | ${prefix}/bin/dbexec
 	echo "ALTER TABLE cmtRECORDCOMMENT ADD status char(2) NOT NULL default 'ok';" | ${prefix}/bin/dbexec
 	echo "ALTER TABLE cmtRECORDCOMMENT ADD KEY status (status);" | ${prefix}/bin/dbexec
 	echo "INSERT INTO sbmALLFUNCDESCR VALUES ('Move_Photos_to_Storage','Attach/edit the pictures uploaded with the \"create_photos_manager_interface()\" function');"  | ${prefix}/bin/dbexec
 	echo "INSERT INTO sbmFIELDDESC VALUES ('Upload_Photos',NULL,'','R',NULL,NULL,NULL,NULL,NULL,'\"\"\"\r\nThis is an example of element that creates a photos upload interface.\r\nClone it, customize it and integrate it into your submission. Then add function \r\n\'Move_Photos_to_Storage\' to your submission functions list, in order for files \r\nuploaded with this interface to be attached to the record. More information in \r\nthe WebSubmit admin guide.\r\n\"\"\"\r\n\r\nfrom invenio.websubmit_functions.ParamFile import ParamFromFile\r\nfrom invenio.websubmit_functions.Move_Photos_to_Storage import read_param_file, create_photos_manager_interface, get_session_id\r\n\r\n# Retrieve session id\r\ntry:\r\n    # User info is defined only in MBI/MPI actions...\r\n    session_id = get_session_id(None, uid, user_info) \r\nexcept:\r\n    session_id = get_session_id(req, uid, {})\r\n\r\n# Retrieve context\r\nindir = curdir.split(\'/\')[-3]\r\ndoctype = curdir.split(\'/\')[-2]\r\naccess = curdir.split(\'/\')[-1]\r\n\r\n# Get the record ID, if any\r\nsysno = ParamFromFile(\"%s/%s\" % (curdir,\'SN\')).strip()\r\n\r\n\"\"\"\r\nModify below the configuration of the photos manager interface.\r\nNote: \'can_reorder_photos\' parameter is not yet fully taken into consideration\r\n\r\nDocumentation of the function is available by running:\r\necho -e \'from invenio.websubmit_functions.Move_Photos_to_Storage import create_photos_manager_interface as f\\nprint f.__doc__\' | python\r\n\"\"\"\r\ntext += create_photos_manager_interface(sysno, session_id, uid,\r\n                                        doctype, indir, curdir, access,\r\n                                        can_delete_photos=True,\r\n                                        can_reorder_photos=True,\r\n                                        can_upload_photos=True,\r\n                                        editor_width=700,\r\n                                        editor_height=400,\r\n                                        initial_slider_value=100,\r\n                                        max_slider_value=200,\r\n                                        min_slider_value=80)','0000-00-00','0000-00-00',NULL,NULL,0);"  | ${prefix}/bin/dbexec
 	echo "INSERT INTO sbmFUNDESC VALUES ('Move_Photos_to_Storage','iconsize');" | ${prefix}/bin/dbexec
 	echo "INSERT INTO sbmFIELDDESC VALUES ('Upload_Files',NULL,'','R',NULL,NULL,NULL,NULL,NULL,'\"\"\"\r\nThis is an example of element that creates a file upload interface.\r\nClone it, customize it and integrate it into your submission. Then add function \r\n\'Move_Uploaded_Files_to_Storage\' to your submission functions list, in order for files \r\nuploaded with this interface to be attached to the record. More information in \r\nthe WebSubmit admin guide.\r\n\"\"\"\r\nfrom invenio.websubmit_managedocfiles import create_file_upload_interface\r\nfrom invenio.websubmit_functions.Shared_Functions import ParamFromFile\r\n\r\nindir = ParamFromFile(os.path.join(curdir, \'indir\'))\r\ndoctype = ParamFromFile(os.path.join(curdir, \'doctype\'))\r\naccess = ParamFromFile(os.path.join(curdir, \'access\'))\r\ntry:\r\n    sysno = int(ParamFromFile(os.path.join(curdir, \'SN\')).strip())\r\nexcept:\r\n    sysno = -1\r\nln = ParamFromFile(os.path.join(curdir, \'ln\'))\r\n\r\n\"\"\"\r\nRun the following to get the list of parameters of function \'create_file_upload_interface\':\r\necho -e \'from invenio.websubmit_managedocfiles import create_file_upload_interface as f\\nprint f.__doc__\' | python\r\n\"\"\"\r\ntext = create_file_upload_interface(recid=sysno,\r\n                                 print_outside_form_tag=False,\r\n                                 include_headers=True,\r\n                                 ln=ln,\r\n                                 doctypes_and_desc=[(\'main\',\'Main document\'),\r\n                                                    (\'additional\',\'Figure, schema, etc.\')],\r\n                                 can_revise_doctypes=[\'*\'],\r\n                                 can_describe_doctypes=[\'main\'],\r\n                                 can_delete_doctypes=[\'additional\'],\r\n                                 can_rename_doctypes=[\'main\'],\r\n                                 sbm_indir=indir, sbm_doctype=doctype, sbm_access=access)[1]\r\n','0000-00-00','0000-00-00',NULL,NULL,0);"  | ${prefix}/bin/dbexec
 	echo "INSERT INTO sbmFUNDESC VALUES ('Move_Uploaded_Files_to_Storage','forceFileRevision');"  | ${prefix}/bin/dbexec
 	echo "INSERT INTO sbmALLFUNCDESCR VALUES ('Create_Upload_Files_Interface','Display generic interface to add/revise/delete files. To be used before function \"Move_Uploaded_Files_to_Storage\"');" | ${prefix}/bin/dbexec
 	echo "INSERT INTO sbmALLFUNCDESCR VALUES ('Move_Uploaded_Files_to_Storage','Attach files uploaded with \"Create_Upload_Files_Interface\"')" | ${prefix}/bin/dbexec
 	echo "INSERT INTO sbmFUNDESC VALUES ('Move_Revised_Files_to_Storage','elementNameToDoctype');"  | ${prefix}/bin/dbexec
 	echo "INSERT INTO sbmFUNDESC VALUES ('Move_Revised_Files_to_Storage','createIconDoctypes');"  | ${prefix}/bin/dbexec
 	echo "INSERT INTO sbmFUNDESC VALUES ('Move_Revised_Files_to_Storage','createRelatedFormats');"  | ${prefix}/bin/dbexec
 	echo "INSERT INTO sbmFUNDESC VALUES ('Move_Revised_Files_to_Storage','iconsize');"  | ${prefix}/bin/dbexec
 	echo "INSERT INTO sbmFUNDESC VALUES ('Move_Revised_Files_to_Storage','keepPreviousVersionDoctypes');"  | ${prefix}/bin/dbexec
 	echo "INSERT INTO sbmALLFUNCDESCR VALUES ('Move_Revised_Files_to_Storage','Revise files initially uploaded with \"Move_Files_to_Storage\"')" | ${prefix}/bin/dbexec
 	echo "INSERT INTO sbmFUNDESC VALUES ('Create_Upload_Files_Interface','maxsize');" | ${prefix}/bin/dbexec
 	echo "INSERT INTO sbmFUNDESC VALUES ('Create_Upload_Files_Interface','minsize');" | ${prefix}/bin/dbexec
 	echo "INSERT INTO sbmFUNDESC VALUES ('Create_Upload_Files_Interface','doctypes');" | ${prefix}/bin/dbexec
 	echo "INSERT INTO sbmFUNDESC VALUES ('Create_Upload_Files_Interface','restrictions');" | ${prefix}/bin/dbexec
 	echo "INSERT INTO sbmFUNDESC VALUES ('Create_Upload_Files_Interface','canDeleteDoctypes');" | ${prefix}/bin/dbexec
 	echo "INSERT INTO sbmFUNDESC VALUES ('Create_Upload_Files_Interface','canReviseDoctypes');" | ${prefix}/bin/dbexec
 	echo "INSERT INTO sbmFUNDESC VALUES ('Create_Upload_Files_Interface','canDescribeDoctypes');" | ${prefix}/bin/dbexec
 	echo "INSERT INTO sbmFUNDESC VALUES ('Create_Upload_Files_Interface','canCommentDoctypes');" | ${prefix}/bin/dbexec
 	echo "INSERT INTO sbmFUNDESC VALUES ('Create_Upload_Files_Interface','canKeepDoctypes');" | ${prefix}/bin/dbexec
 	echo "INSERT INTO sbmFUNDESC VALUES ('Create_Upload_Files_Interface','canAddFormatDoctypes');" | ${prefix}/bin/dbexec
 	echo "INSERT INTO sbmFUNDESC VALUES ('Create_Upload_Files_Interface','canRestrictDoctypes');" | ${prefix}/bin/dbexec
 	echo "INSERT INTO sbmFUNDESC VALUES ('Create_Upload_Files_Interface','canRenameDoctypes');" | ${prefix}/bin/dbexec
 	echo "INSERT INTO sbmFUNDESC VALUES ('Create_Upload_Files_Interface','canNameNewFiles');" | ${prefix}/bin/dbexec
 	echo "INSERT INTO sbmFUNDESC VALUES ('Create_Upload_Files_Interface','createRelatedFormats');" | ${prefix}/bin/dbexec
 	echo "INSERT INTO sbmFUNDESC VALUES ('Create_Upload_Files_Interface','keepDefault');" | ${prefix}/bin/dbexec
 	echo "INSERT INTO sbmFUNDESC VALUES ('Create_Upload_Files_Interface','showLinks');" | ${prefix}/bin/dbexec
 	echo "INSERT INTO sbmFUNDESC VALUES ('Create_Upload_Files_Interface','fileLabel');" | ${prefix}/bin/dbexec
 	echo "INSERT INTO sbmFUNDESC VALUES ('Create_Upload_Files_Interface','filenameLabel');" | ${prefix}/bin/dbexec
 	echo "INSERT INTO sbmFUNDESC VALUES ('Create_Upload_Files_Interface','descriptionLabel');" | ${prefix}/bin/dbexec
 	echo "INSERT INTO sbmFUNDESC VALUES ('Create_Upload_Files_Interface','commentLabel');" | ${prefix}/bin/dbexec
 	echo "INSERT INTO sbmFUNDESC VALUES ('Create_Upload_Files_Interface','restrictionLabel');" | ${prefix}/bin/dbexec
 	echo "INSERT INTO sbmFUNDESC VALUES ('Create_Upload_Files_Interface','startDoc');" | ${prefix}/bin/dbexec
 	echo "INSERT INTO sbmFUNDESC VALUES ('Create_Upload_Files_Interface','endDoc');" | ${prefix}/bin/dbexec
 	echo "INSERT INTO sbmFUNDESC VALUES ('Create_Upload_Files_Interface','defaultFilenameDoctypes');" | ${prefix}/bin/dbexec
 	echo "INSERT INTO sbmFUNDESC VALUES ('Create_Upload_Files_Interface','maxFilesDoctypes');" | ${prefix}/bin/dbexec
 	echo "INSERT INTO sbmFUNDESC VALUES ('Move_Uploaded_Files_to_Storage','iconsize');" | ${prefix}/bin/dbexec
 	echo "INSERT INTO sbmFUNDESC VALUES ('Move_Uploaded_Files_to_Storage','createIconDoctypes');" | ${prefix}/bin/dbexec
 	echo "INSERT INTO sbmFUNDESC VALUES ('Report_Number_Generation','nblength');" | ${prefix}/bin/dbexec
 	echo "INSERT INTO sbmFUNDESC VALUES ('Second_Report_Number_Generation','2nd_nb_length');" | ${prefix}/bin/dbexec
 	echo "INSERT INTO sbmFUNDESC VALUES ('Get_Recid','record_search_pattern');" | ${prefix}/bin/dbexec
 	echo "INSERT INTO sbmALLFUNCDESCR VALUES ('Move_FCKeditor_Files_to_Storage','Transfer files attached to the record with the FCKeditor');" | ${prefix}/bin/dbexec
 	echo "INSERT INTO sbmFUNDESC VALUES ('Move_FCKeditor_Files_to_Storage','input_fields');" | ${prefix}/bin/dbexec
 	echo "INSERT INTO sbmFUNDESC VALUES ('Stamp_Uploaded_Files','layer');" | ${prefix}/bin/dbexec
 	echo "INSERT INTO sbmFUNDESC VALUES ('Stamp_Replace_Single_File_Approval','layer');" | ${prefix}/bin/dbexec
 	echo "INSERT INTO sbmFUNDESC VALUES ('Stamp_Replace_Single_File_Approval','switch_file');" | ${prefix}/bin/dbexec
 	echo "INSERT INTO sbmFUNDESC VALUES ('Stamp_Uploaded_Files','switch_file');" | ${prefix}/bin/dbexec
 	echo "INSERT INTO sbmFUNDESC VALUES ('Move_Files_to_Storage','paths_and_restrictions');" | ${prefix}/bin/dbexec
 	echo "INSERT INTO sbmFUNDESC VALUES ('Move_Files_to_Storage','paths_and_doctypes');" | ${prefix}/bin/dbexec
 	echo "ALTER TABLE cmtRECORDCOMMENT ADD round_name varchar(255) NOT NULL default ''" | ${prefix}/bin/dbexec
 	echo "ALTER TABLE cmtRECORDCOMMENT ADD restriction varchar(50) NOT NULL default ''" | ${prefix}/bin/dbexec
 	echo "ALTER TABLE cmtRECORDCOMMENT ADD in_reply_to_id_cmtRECORDCOMMENT int(15) unsigned NOT NULL default '0'" | ${prefix}/bin/dbexec
 	echo "ALTER TABLE cmtRECORDCOMMENT ADD KEY in_reply_to_id_cmtRECORDCOMMENT (in_reply_to_id_cmtRECORDCOMMENT);" | ${prefix}/bin/dbexec
 	echo "ALTER TABLE bskRECORDCOMMENT ADD in_reply_to_id_bskRECORDCOMMENT int(15) unsigned NOT NULL default '0'" | ${prefix}/bin/dbexec
 	echo "ALTER TABLE bskRECORDCOMMENT ADD KEY in_reply_to_id_bskRECORDCOMMENT (in_reply_to_id_bskRECORDCOMMENT);" | ${prefix}/bin/dbexec
 	echo "ALTER TABLE cmtRECORDCOMMENT ADD reply_order_cached_data blob NULL default NULL;" | ${prefix}/bin/dbexec
 	echo "ALTER TABLE bskRECORDCOMMENT ADD reply_order_cached_data blob NULL default NULL;" | ${prefix}/bin/dbexec
 	echo "ALTER TABLE cmtRECORDCOMMENT ADD INDEX (reply_order_cached_data(40));" | ${prefix}/bin/dbexec
 	echo "ALTER TABLE bskRECORDCOMMENT ADD INDEX (reply_order_cached_data(40));" | ${prefix}/bin/dbexec
 	echo -e 'from invenio.webcommentadminlib import migrate_comments_populate_threads_index;\
 	migrate_comments_populate_threads_index()' | $(PYTHON)
 	echo -e 'from invenio.access_control_firerole import repair_role_definitions;\
 	repair_role_definitions()' | $(PYTHON)
 
 update-v1.0.0-rc0-tables: # from v1.0.0-rc0 to next release
 	${prefix}/bin/dbexec < $(top_srcdir)/modules/miscutil/sql/tabcreate.sql
 	echo "INSERT INTO sbmALLFUNCDESCR VALUES ('Set_Embargo','Set an embargo on all the documents of a given record.');" | ${prefix}/bin/dbexec
 	echo "INSERT INTO sbmFUNDESC VALUES ('Set_Embargo','date_file');" | ${prefix}/bin/dbexec
 	echo "INSERT INTO sbmFUNDESC VALUES ('Set_Embargo','date_format');" | ${prefix}/bin/dbexec
 	echo "INSERT INTO sbmFUNDESC VALUES ('User_is_Record_Owner_or_Curator','curator_role');" | ${prefix}/bin/dbexec
 	echo "INSERT INTO sbmFUNDESC VALUES ('User_is_Record_Owner_or_Curator','curator_flag');" | ${prefix}/bin/dbexec
 	echo "INSERT INTO sbmFUNDESC VALUES ('Move_Photos_to_Storage','iconformat');" | ${prefix}/bin/dbexec
 	echo "INSERT INTO format (name, code, description, content_type, visibility) VALUES ('Podcast', 'xp', 'Sample format suitable for multimedia feeds, such as podcasts', 'application/rss+xml', 0);" | ${prefix}/bin/dbexec
 	echo "ALTER TABLE accMAILCOOKIE ADD INDEX expiration (expiration);" | ${prefix}/bin/dbexec
 	echo "UPDATE sbmFUNDESC SET function='Move_CKEditor_Files_to_Storage' WHERE function='Move_FCKeditor_Files_to_Storage';" | ${prefix}/bin/dbexec
 	echo "UPDATE sbmALLFUNCDESCR SET function='Move_CKEditor_Files_to_Storage', description='Transfer files attached to the record with the CKEditor' WHERE function='Move_FCKeditor_Files_to_Storage';" | ${prefix}/bin/dbexec
 	echo "UPDATE sbmFUNCTIONS SET function='Move_CKEditor_Files_to_Storage' WHERE function='Move_FCKeditor_Files_to_Storage';" | ${prefix}/bin/dbexec
 	echo "ALTER TABLE schTASK CHANGE proc proc varchar(255) NOT NULL;" | ${prefix}/bin/dbexec
 	echo "ALTER TABLE session CHANGE session_object session_object longblob;" | ${prefix}/bin/dbexec
+	echo "ALTER TABLE oaiREPOSITORY CHANGE setSpec setSpec varchar(255) NOT NULL default 'GLOBAL_SET';" | ${prefix}/bin/dbexec
+	echo "UPDATE oaiREPOSITORY SET setSpec='GLOBAL_SET' WHERE setSpec='';" | ${prefix}/bin/dbexec
 
 CLEANFILES = *~ *.pyc *.tmp
diff --git a/THANKS b/THANKS
index 9efbb02d5..2f7205c2a 100644
--- a/THANKS
+++ b/THANKS
@@ -1,170 +1,173 @@
 Invenio THANKS
 ==============
 
 Several people outside the Invenio Development Team
 contributed to the project:
 
   - Thierry Thomas <thierry@FreeBSD.org>
     Patches for compiling old CDSware 0.3.x sources on FreeBSD.
 
   - Guido Pelzer <guido.pelzer@web.de>
     Contributions to the German translation.  German stopword list.
 
   - Valerio Gracco <valerio.gracco@cern.ch>
     Contributions to the Italian translation.
 
   - Tullio Basaglia <tullio.basaglia@cern.ch>
     Contributions to the Italian translation.
 
   - Flavio C. Coelho <fccoelho@fiocruz.br>
     Contributions to the Portuguese translation.
 
   - Lyuba Vasilevskaya <lyubov.vassilevskaya@cern.ch>
     Contributions to the Russian translation.
 
   - Maria Gomez Marti <maria.gomez.marti@cern.ch>
     Contributions to the Spanish translation.
 
   - Magaly Bascones Dominguez <magaly.bascones.dominguez@cern.ch>
     Contributions to the Spanish translation.
 
   - Urban Andersson <urban.andersson@hb.se>
     Contributions to the Swedish translation.
 
   - Eric Grand <eric.grand@rero.ch>
     Contributions to the French translation.
 
   - Theodoropoulos Theodoros <theod@lib.auth.gr>
     Contributions to the Greek translation, Greek stopword list,
     XML RefWorks output format.
 
   - Vasyl Ostrovskyi <vo@imath.kiev.ua>
     Contributions to the Ukrainian translation.
 
   - Ferran Jorba <Ferran.Jorba@uab.cat>
     Contributions to the Catalan and Spanish translations.  Cleanup of
     the old PHP-based BibFormat Admin Guide.  Several minor patches.
 
   - Beatriu Piera <Beatriz.Piera@uab.es>
     Translation of the Search Guide into Catalan and Spanish.
 
   - Anonymous contributor (name withheld by request)
     Contributions to the Japanese translation.
 
   - Anonymous contributor (name withheld by request)
     Contributions to the Spanish translation.
 
   - Alen Vodopijevec <alen@irb.hr>
     Contributions to the Croatian translation.
 
   - Jasna Marković <jmarkov@irb.hr>
     Contributions to the Croatian translation.
 
   - Kam-ming Ku <kmku@hkusua.hku.hk>
     Contributions to the Chinese translations (zh_CN, zh_TW).
 
   - Benedikt Koeppel <be.public@gmail.com>
     Contributions to the German translation.
 
   - Toru Tsuboyama <toru.tsuboyama@kek.jp>
     Contributions to the Japanese translation.
 
   - Mike Marino <mmarino@gmail.com>
     Several minor patches and suggestions.
 
   - Zbigniew Szklarz <zszklarz@student.agh.edu.pl>
     Contributions to the Polish translation.
 
   - Iaroslav Gaponenko <adrahil@gmail.com>
     Contributions to the Russian translation.
 
   - Yana Osborne <ianna.osborne@cern.ch>
     Contributions to the Russian translation.
 
   - Zbigniew Leonowicz <leonowicz@ieee.org>
     Contributions to the Polish translation.
 
   - Makiko Matsumoto <maki.matsumoto@gmail.com> and Takao Ishigaki
     Contributions to the Japanese translation.
 
   - Eva Papp <Eva.Papp@cern.ch>
     Contributions to the Hungarian translation.
 
   - Nino Jejelava <nino.jejelava@gmail.com>
     Contributions to the Georgian translation.
 
   - Cristian Bacchi <cristian.bacchi@gmail.com>
     Improvements to the browse interface.
 
   - Genis Musulmanbekov <genis@jinr.ru>
     Contributions to the Russian translation.
 
   - Andrey Tremba <metandrey@gmail.com>
     Contributions to the Russian translation.
 
 The URL handler was inspired by the Quixote Web Framework which is
 ``Copyright (c) 2004 Corporation for National Research Initiatives;
 All Rights Reserved''.
 <http://www.quixote.ca/>
 
 The session handler was adapted from the mod_python session implementation.
 <http://www.modpython.org/>
 
 Javascript Quicktags scripts from Alex King are used to provide
 additional capabilities to the edition of BibFormat templates through
 the web admin interface.
 <http://www.alexking.org>
 
 The indexer engine uses the Martin Porter Stemming Algorithm and its
 Vivake Gupta free Python implementation.
 <http://tartarus.org/~martin/PorterStemmer/>
 
 The CSS style for rounded corners box used in detailed record pages
 adapted from Francky Lleyneman liquidcorners CSS.
 <http://home.tiscali.nl/developerscorner/liquidcorners/liquidcorners.htm>
 
 The NASA_Subjects.rdf files has been retrieved from the American National
 Aeronautics and Space Administration (NASA) who kindly provide this for
 free re-use.
 <http://nasataxonomy.jpl.nasa.gov/fordevelopers/>
 
 The tiger test picure used in automated demo picture submission was
 converted from Ghostscript's 'tiger.eps'.
 <http://www.gnu.org/software/ghostscript/>
 
 Some icon images were taken from (i) the Silk icon set, (ii) the
 Function icon set, (iii) the activity indicator icon, and (iv) the
 Open Icon Libray.
 <http://www.famfamfam.com/lab/icons/silk/>
 <http://wefunction.com/2008/07/function-free-icon-set/>
 <http://www.badeziner.com/2008/05/04/120-free-ajax-activity-indicator-gif-icons/>
 <http://openiconlibrary.sourceforge.net/gallery2/>
 
 The unoconv.py script has been adapted from UNOCONV by Dag Wieers.
 <http://dag.wieers.com/home-made/unoconv/>
 
 PDFA_def.ps has been adapted from the GPL distribution of GhostScript.
 <http://ghostscript.com/>
 
 The ISOCoatedsb.icc ICC profile has been retrieved from the European Color
 Initiative.
 <http://www.eci.org/>
 
 The PEP8 conformance checking script (pep8.py) was written by Johann
 C. Rocholl <johann@browsershots.org>.  The pep8.py version included
 with Invenio was downloaded from
 <http://svn.browsershots.org/trunk/devtools/pep8/pep8.py> on
 2009-06-14.
 
 The git-version-gen script was taken from gnulib 20100704+stable-1.
 <http://www.gnu.org/software/gnulib/>
 
 The LaTeX-to-Unicode translation table was compiled from:
 FX, <http://stackoverflow.com/questions/4578912/replace-all-accented-characters-by-their-latex-equivalent>
 Lea Wiemann <LeWiemann@gmail.com>, <http://docutils.sourceforge.net/docutils/writers/newlatex2e/unicode_map.py>
 
 The scientificchar plugin for the CKEditor was adapted from the
 specialchar plugin from Frederico Knabben.
 <http://ckeditor.com/>
 
+The oai2.xsl.v1.0 OAI to HTML XSLT Style Sheet was taken from EPrints.
+<http://www.eprints.org/software/xslt.php>
+
 - end of file -
diff --git a/config/invenio.conf b/config/invenio.conf
index 4b1b1f87b..b2b4dc8dd 100644
--- a/config/invenio.conf
+++ b/config/invenio.conf
@@ -1,1525 +1,1600 @@
 ## This file is part of Invenio.
 ## Copyright (C) 2008, 2009, 2010, 2011 CERN.
 ##
 ## Invenio is free software; you can redistribute it and/or
 ## modify it under the terms of the GNU General Public License as
 ## published by the Free Software Foundation; either version 2 of the
 ## License, or (at your option) any later version.
 ##
 ## Invenio is distributed in the hope that it will be useful, but
 ## WITHOUT ANY WARRANTY; without even the implied warranty of
 ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 ## General Public License for more details.
 ##
 ## You should have received a copy of the GNU General Public License
 ## along with Invenio; if not, write to the Free Software Foundation, Inc.,
 ## 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA.
 
 ###################################################
 ## About 'invenio.conf' and 'invenio-local.conf' ##
 ###################################################
 
 ## The 'invenio.conf' file contains the vanilla default configuration
 ## parameters of a Invenio installation, as coming out of the
 ## distribution.  The file should be self-explanatory.  Once installed
 ## in its usual location (usually /opt/invenio/etc), you could in
 ## principle go ahead and change the values according to your local
 ## needs, but this is not advised.
 ##
 ## If you would like to customize some of these parameters, you should
 ## rather create a file named 'invenio-local.conf' in the same
 ## directory where 'invenio.conf' lives and you should write there
 ## only the customizations that you want to be different from the
 ## vanilla defaults.
 ##
 ## Here is a realistic, minimalist, yet production-ready example of
 ## what you would typically put there:
 ##
 ##    $ cat /opt/invenio/etc/invenio-local.conf
 ##    [Invenio]
 ##    CFG_SITE_NAME = John Doe's Document Server
 ##    CFG_SITE_NAME_INTL_fr = Serveur des Documents de John Doe
 ##    CFG_SITE_URL = http://your.site.com
 ##    CFG_SITE_SECURE_URL = https://your.site.com
 ##    CFG_SITE_ADMIN_EMAIL = john.doe@your.site.com
 ##    CFG_SITE_SUPPORT_EMAIL = john.doe@your.site.com
 ##    CFG_WEBALERT_ALERT_ENGINE_EMAIL = john.doe@your.site.com
 ##    CFG_WEBCOMMENT_ALERT_ENGINE_EMAIL = john.doe@your.site.com
 ##    CFG_WEBCOMMENT_DEFAULT_MODERATOR = john.doe@your.site.com
 ##    CFG_DATABASE_HOST = localhost
 ##    CFG_DATABASE_NAME = invenio
 ##    CFG_DATABASE_USER = invenio
 ##    CFG_DATABASE_PASS = my123p$ss
 ##
 ## You should override at least the parameters mentioned above and the
 ## parameters mentioned in the `Part 1: Essential parameters' below in
 ## order to define some very essential runtime parameters such as the
 ## name of your document server (CFG_SITE_NAME and
 ## CFG_SITE_NAME_INTL_*), the visible URL of your document server
 ## (CFG_SITE_URL and CFG_SITE_SECURE_URL), the email address of the
 ## local Invenio administrator, comment moderator, and alert engine
 ## (CFG_SITE_SUPPORT_EMAIL, CFG_SITE_ADMIN_EMAIL, etc), and last but
 ## not least your database credentials (CFG_DATABASE_*).
 ##
 ## The Invenio system will then read both the default invenio.conf
 ## file and your customized invenio-local.conf file and it will
 ## override any default options with the ones you have specified in
 ## your local file.  This cascading of configuration parameters will
 ## ease your future upgrades.
 
 [Invenio]
 
 ###################################
 ## Part 1:  Essential parameters ##
 ###################################
 
 ## This part defines essential Invenio internal parameters that
 ## everybody should override, like the name of the server or the email
 ## address of the local Invenio administrator.
 
 ## CFG_DATABASE_* - specify which MySQL server to use, the name of the
 ## database to use, and the database access credentials.
 CFG_DATABASE_HOST = localhost
 CFG_DATABASE_PORT = 3306
 CFG_DATABASE_NAME = invenio
 CFG_DATABASE_USER = invenio
 CFG_DATABASE_PASS = my123p$ss
 
 ## CFG_SITE_URL - specify URL under which your installation will be
 ## visible.  For example, use "http://your.site.com".  Do not leave
 ## trailing slash.
 CFG_SITE_URL = http://localhost
 
 ## CFG_SITE_SECURE_URL - specify secure URL under which your
 ## installation secure pages such as login or registration will be
 ## visible.  For example, use "https://your.site.com".  Do not leave
 ## trailing slash.  If you don't plan on using HTTPS, then you may
 ## leave this empty.
 CFG_SITE_SECURE_URL = https://localhost
 
 ## CFG_SITE_NAME -- the visible name of your Invenio installation.
 CFG_SITE_NAME = Atlantis Institute of Fictive Science
 
 ## CFG_SITE_NAME_INTL -- the international versions of CFG_SITE_NAME
 ## in various languages.  (See also CFG_SITE_LANGS below.)
 CFG_SITE_NAME_INTL_en = Atlantis Institute of Fictive Science
 CFG_SITE_NAME_INTL_fr = Atlantis Institut des Sciences Fictives
 CFG_SITE_NAME_INTL_de = Atlantis Institut der fiktiven Wissenschaft
 CFG_SITE_NAME_INTL_es = Atlantis Instituto de la Ciencia Fictive
 CFG_SITE_NAME_INTL_ca = Institut Atlantis de Ciència Fictícia
 CFG_SITE_NAME_INTL_pt = Instituto Atlantis de Ciência Fictícia
 CFG_SITE_NAME_INTL_it = Atlantis Istituto di Scienza Fittizia
 CFG_SITE_NAME_INTL_ru = Институт Фиктивных Наук Атлантиды
 CFG_SITE_NAME_INTL_sk = Atlantis Inštitút Fiktívnych Vied
 CFG_SITE_NAME_INTL_cs = Atlantis Institut Fiktivních Věd
 CFG_SITE_NAME_INTL_no = Atlantis Institutt for Fiktiv Vitenskap
 CFG_SITE_NAME_INTL_sv = Atlantis Institut för Fiktiv Vetenskap
 CFG_SITE_NAME_INTL_el = Ινστιτούτο Φανταστικών Επιστημών Ατλαντίδος
 CFG_SITE_NAME_INTL_uk = Інститут вигаданих наук в Атлантісі
 CFG_SITE_NAME_INTL_ja = Fictive 科学のAtlantis の協会
 CFG_SITE_NAME_INTL_pl = Instytut Fikcyjnej Nauki Atlantis
 CFG_SITE_NAME_INTL_bg = Институт за фиктивни науки Атлантис
 CFG_SITE_NAME_INTL_hr = Institut Fiktivnih Znanosti Atlantis
 CFG_SITE_NAME_INTL_zh_CN = 阿特兰提斯虚拟科学学院
 CFG_SITE_NAME_INTL_zh_TW = 阿特蘭提斯虛擬科學學院
 CFG_SITE_NAME_INTL_hu = Kitalált Tudományok Atlantiszi Intézete
 CFG_SITE_NAME_INTL_af = Atlantis Instituut van Fiktiewe Wetenskap
 CFG_SITE_NAME_INTL_gl = Instituto Atlantis de Ciencia Fictive
 CFG_SITE_NAME_INTL_ro = Institutul Atlantis al Ştiinţelor Fictive
 CFG_SITE_NAME_INTL_rw = Atlantis Ishuri Rikuru Ry'ubuhanga
 CFG_SITE_NAME_INTL_ka = ატლანტიდის ფიქტიური მეცნიერების ინსტიტუტი
 CFG_SITE_NAME_INTL_lt = Fiktyvių Mokslų Institutas Atlantis
 CFG_SITE_NAME_INTL_ar = معهد أطلنطيس للعلوم الافتراضية
 
 ## CFG_SITE_LANG -- the default language of the interface: '
 CFG_SITE_LANG = en
 
 ## CFG_SITE_LANGS -- list of all languages the user interface should
 ## be available in, separated by commas.  The order specified below
 ## will be respected on the interface pages.  A good default would be
 ## to use the alphabetical order.  Currently supported languages
 ## include Afrikaans, Arabic, Bulgarian, Catalan, Czech, German, Georgian,
 ## Greek, English, Spanish, French, Croatian, Hungarian, Galician,
 ## Italian, Japanese, Kinyarwanda, Lithuanian, Norwegian, Polish,
 ## Portuguese, Romanian, Russian, Slovak, Swedish, Ukrainian, Chinese
 ## (China), Chinese (Taiwan), so that the eventual maximum you can
 ## currently select is
 ## "af,ar,bg,ca,cs,de,el,en,es,fr,hr,gl,ka,it,rw,lt,hu,ja,no,pl,pt,ro,ru,sk,sv,uk,zh_CN,zh_TW".
 CFG_SITE_LANGS = af,ar,bg,ca,cs,de,el,en,es,fr,hr,gl,ka,it,rw,lt,hu,ja,no,pl,pt,ro,ru,sk,sv,uk,zh_CN,zh_TW
 
 ## CFG_SITE_SUPPORT_EMAIL -- the email address of the support team for
 ## this installation:
 CFG_SITE_SUPPORT_EMAIL = info@invenio-software.org
 
 ## CFG_SITE_ADMIN_EMAIL -- the email address of the 'superuser' for
 ## this installation.  Enter your email address below and login with
 ## this address when using Invenio inistration modules.  You
 ## will then be automatically recognized as superuser of the system.
 CFG_SITE_ADMIN_EMAIL = info@invenio-software.org
 
 ## CFG_SITE_EMERGENCY_EMAIL_ADDRESSES -- list of email addresses to
 ## which an email should be sent in case of emergency (e.g. bibsched
 ## queue has been stopped because of an error).  Configuration
 ## dictionary allows for different recipients based on weekday and
 ## time-of-day. Example:
 ##
 ## CFG_SITE_EMERGENCY_EMAIL_ADDRESSES = {
 ##    'Sunday 22:00-06:00': '0041761111111@email2sms.foo.com',
 ##    '06:00-18:00': 'team-in-europe@foo.com,0041762222222@email2sms.foo.com',
 ##    '18:00-06:00': 'team-in-usa@foo.com',
 ##    '*': 'john.doe.phone@foo.com'}
 ##
 ## If you want the emergency email notifications to always go to the
 ## same address, just use the wildcard line in the above example.
 CFG_SITE_EMERGENCY_EMAIL_ADDRESSES = {}
 
 ## CFG_SITE_ADMIN_EMAIL_EXCEPTIONS -- set this to 0 if you do not want
 ## to receive any captured exception via email to CFG_SITE_ADMIN_EMAIL
 ## address.  Captured exceptions will still be available in
 ## var/log/invenio.err file.  Set this to 1 if you want to receive
 ## some of the captured exceptions (this depends on the actual place
 ## where the exception is captured).  Set this to 2 if you want to
 ## receive all captured exceptions.
 CFG_SITE_ADMIN_EMAIL_EXCEPTIONS = 1
 
 ## CFG_SITE_RECORD -- what is the URI part representing detailed
 ## record pages?  We recomment to leave the default value `record'
 ## unchanged.
 CFG_SITE_RECORD = record
 
 ## CFG_ERRORLIB_RESET_EXCEPTION_NOTIFICATION_COUNTER_AFTER -- set this to
 ## the number of seconds after which to reset the exception notification
 ## counter. A given repetitive exception is notified via email with a
 ## logarithmic strategy: the first time it is seen it is sent via email,
 ## then the second time, then the fourth, then the eighth and so forth.
 ## If the number of seconds elapsed since the last time it was notified
 ## is greater than CFG_ERRORLIB_RESET_EXCEPTION_NOTIFICATION_COUNTER_AFTER
 ## then the internal counter is reset in order not to have exception
 ## notification become more and more rare.
 CFG_ERRORLIB_RESET_EXCEPTION_NOTIFICATION_COUNTER_AFTER = 14400
 
 ## CFG_CERN_SITE -- do we want to enable CERN-specific code?
 ## Put "1" for "yes" and "0" for "no".
 CFG_CERN_SITE = 0
 
 ## CFG_INSPIRE_SITE -- do we want to enable INSPIRE-specific code?
 ## Put "1" for "yes" and "0" for "no".
 CFG_INSPIRE_SITE = 0
 
 ## CFG_ADS_SITE -- do we want to enable ADS-specific code?
 ## Put "1" for "yes" and "0" for "no".
 CFG_ADS_SITE = 0
 
 ## CFG_OPENAIRE_SITE -- do we want to enable OpenAIRE-specific code?
 ## Put "1" for "yes" and "0" for "no".
 CFG_OPENAIRE_SITE = 0
 
 ## CFG_DEVEL_SITE -- is this a development site? If it is, you might
 ## prefer that it does not do certain things. For example, you might
 ## not want WebSubmit to send certain emails or trigger certain
 ## processes on a development site.
 ## Put "1" for "yes" (this is a development site) or "0" for "no"
 ## (this isn't a development site.)
 CFG_DEVEL_SITE = 0
 
 ################################
 ## Part 2: Web page style     ##
 ################################
 
 ## The variables affecting the page style.  The most important one is
 ## the 'template skin' you would like to use and the obfuscation mode
 ## for your email addresses.  Please refer to the WebStyle Admin Guide
 ## for more explanation.  The other variables are listed here mostly
 ## for backwards compatibility purposes only.
 
 ## CFG_WEBSTYLE_TEMPLATE_SKIN -- what template skin do you want to
 ## use?
 CFG_WEBSTYLE_TEMPLATE_SKIN = default
 
 ## CFG_WEBSTYLE_EMAIL_ADDRESSES_OBFUSCATION_MODE. How do we "protect"
 ## email addresses from undesired automated email harvesters?  This
 ## setting will not affect 'support' and 'admin' emails.
 ## NOTE: there is no ultimate solution to protect against email
 ## harvesting. All have drawbacks and can more or less be
 ## circumvented. Choose you preferred mode ([t] means "transparent"
 ## for the user):
 ##    -1: hide all emails.
 ## [t] 0 : no protection, email returned as is.
 ##           foo@example.com => foo@example.com
 ##     1 : basic email munging: replaces @ by [at] and . by [dot]
 ##           foo@example.com => foo [at] example [dot] com
 ## [t] 2 : transparent name mangling: characters are replaced by
 ##         equivalent HTML entities.
 ##           foo@example.com => &#102;&#111;&#111;&#64;&#101;&#120;&#97;&#109;&#112;&#108;&#101;&#46;&#99;&#111;&#109;
 ## [t] 3 : javascript insertion. Requires Javascript enabled on client
 ##         side.
 ##     4 : replaces @ and . characters by gif equivalents.
 ##             foo@example.com => foo<img src="at.gif" alt=" [at] ">example<img src="dot.gif" alt=" [dot] ">com
 CFG_WEBSTYLE_EMAIL_ADDRESSES_OBFUSCATION_MODE = 2
 
 ## CFG_WEBSTYLE_INSPECT_TEMPLATES -- Do we want to debug all template
 ## functions so that they would return HTML results wrapped in
 ## comments indicating which part of HTML page was created by which
 ## template function?  Useful only for debugging Pythonic HTML
 ## template.  See WebStyle Admin Guide for more information.
 CFG_WEBSTYLE_INSPECT_TEMPLATES = 0
 
 ## (deprecated) CFG_WEBSTYLE_CDSPAGEBOXLEFTTOP -- eventual global HTML
 ## left top box:
 CFG_WEBSTYLE_CDSPAGEBOXLEFTTOP =
 
 ## (deprecated) CFG_WEBSTYLE_CDSPAGEBOXLEFTBOTTOM -- eventual global
 ## HTML left bottom box:
 CFG_WEBSTYLE_CDSPAGEBOXLEFTBOTTOM =
 
 ## (deprecated) CFG_WEBSTYLE_CDSPAGEBOXRIGHTTOP -- eventual global
 ## HTML right top box:
 CFG_WEBSTYLE_CDSPAGEBOXRIGHTTOP =
 
 ## (deprecated) CFG_WEBSTYLE_CDSPAGEBOXRIGHTBOTTOM -- eventual global
 ## HTML right bottom box:
 CFG_WEBSTYLE_CDSPAGEBOXRIGHTBOTTOM =
 
 ## CFG_WEBSTYLE_HTTP_STATUS_ALERT_LIST -- when certain HTTP status
 ## codes are raised to the WSGI handler, the corresponding exceptions
 ## and error messages can be sent to the system administrator for
 ## inspecting.  This is useful to detect and correct errors.  The
 ## variable represents a comma-separated list of HTTP statuses that
 ## should alert admin.  Wildcards are possible. If the status is
 ## followed by an "r", it means that a referer is required to exist
 ## (useful to distinguish broken known links from URL typos when 404
 ## errors are raised).
 CFG_WEBSTYLE_HTTP_STATUS_ALERT_LIST = 404r,400,5*,41*
 
 ## CFG_WEBSTYLE_HTTP_USE_COMPRESSION -- whether to enable deflate
 ## compression of your HTTP/HTTPS connections. This will affect the Apache
 ## configuration snippets created by inveniocfg --create-apache-conf and
 ## the OAI-PMH Identify response.
 CFG_WEBSTYLE_HTTP_USE_COMPRESSION = 0
 
 ##################################
 ## Part 3: WebSearch parameters ##
 ##################################
 
 ## This section contains some configuration parameters for WebSearch
 ## module.  Please note that WebSearch is mostly configured on
 ## run-time via its WebSearch Admin web interface.  The parameters
 ## below are the ones that you do not probably want to modify very
 ## often during the runtime.  (Note that you may modify them
 ## afterwards too, though.)
 
 ## CFG_WEBSEARCH_SEARCH_CACHE_SIZE -- how many queries we want to
 ## cache in memory per one Apache httpd process?  This cache is used
 ## mainly for "next/previous page" functionality, but it caches also
 ## "popular" user queries if more than one user happen to search for
 ## the same thing.  Note that large numbers may lead to great memory
 ## consumption.  We recommend a value not greater than 100.
 CFG_WEBSEARCH_SEARCH_CACHE_SIZE = 0
 
 ## CFG_WEBSEARCH_FIELDS_CONVERT -- if you migrate from an older
 ## system, you may want to map field codes of your old system (such as
 ## 'ti') to Invenio/MySQL ("title").  Use Python dictionary syntax
 ## for the translation table, e.g. {'wau':'author', 'wti':'title'}.
 ## Usually you don't want to do that, and you would use empty dict {}.
 CFG_WEBSEARCH_FIELDS_CONVERT = {}
 
 ## CFG_WEBSEARCH_LIGHTSEARCH_PATTERN_BOX_WIDTH -- width of the
 ## search pattern window in the light search interface, in
 ## characters.  CFG_WEBSEARCH_LIGHTSEARCH_PATTERN_BOX_WIDTH = 60
 CFG_WEBSEARCH_LIGHTSEARCH_PATTERN_BOX_WIDTH = 60
 
 ## CFG_WEBSEARCH_SIMPLESEARCH_PATTERN_BOX_WIDTH -- width of the search
 ## pattern window in the simple search interface, in characters.
 CFG_WEBSEARCH_SIMPLESEARCH_PATTERN_BOX_WIDTH = 40
 
 ## CFG_WEBSEARCH_ADVANCEDSEARCH_PATTERN_BOX_WIDTH -- width of the
 ## search pattern window in the advanced search interface, in
 ## characters.
 CFG_WEBSEARCH_ADVANCEDSEARCH_PATTERN_BOX_WIDTH = 30
 
 ## CFG_WEBSEARCH_NB_RECORDS_TO_SORT -- how many records do we still
 ## want to sort?  For higher numbers we print only a warning and won't
 ## perform any sorting other than default 'latest records first', as
 ## sorting would be very time consuming then.  We recommend a value of
 ## not more than a couple of thousands.
 CFG_WEBSEARCH_NB_RECORDS_TO_SORT = 1000
 
 ## CFG_WEBSEARCH_CALL_BIBFORMAT -- if a record is being displayed but
 ## it was not preformatted in the "HTML brief" format, do we want to
 ## call BibFormatting on the fly?  Put "1" for "yes" and "0" for "no".
 ## Note that "1" will display the record exactly as if it were fully
 ## preformatted, but it may be slow due to on-the-fly processing; "0"
 ## will display a default format very fast, but it may not have all
 ## the fields as in the fully preformatted HTML brief format.  Note
 ## also that this option is active only for old (PHP) formats; the new
 ## (Python) formats are called on the fly by default anyway, since
 ## they are much faster.  When usure, please set "0" here.
 CFG_WEBSEARCH_CALL_BIBFORMAT = 0
 
 ## CFG_WEBSEARCH_USE_ALEPH_SYSNOS -- do we want to make old SYSNOs
 ## visible rather than MySQL's record IDs?  You may use this if you
 ## migrate from a different e-doc system, and you store your old
 ## system numbers into 970__a.  Put "1" for "yes" and "0" for
 ## "no". Usually you don't want to do that, though.
 CFG_WEBSEARCH_USE_ALEPH_SYSNOS = 0
 
 ## CFG_WEBSEARCH_I18N_LATEST_ADDITIONS -- Put "1" if you want the
 ## "Latest Additions" in the web collection pages to show
 ## internationalized records.  Useful only if your brief BibFormat
 ## templates contains internationalized strings. Otherwise put "0" in
 ## order not to slow down the creation of latest additions by WebColl.
 CFG_WEBSEARCH_I18N_LATEST_ADDITIONS = 0
 
 ## CFG_WEBSEARCH_INSTANT_BROWSE -- the number of records to display
 ## under 'Latest Additions' in the web collection pages.
 CFG_WEBSEARCH_INSTANT_BROWSE = 10
 
 ## CFG_WEBSEARCH_INSTANT_BROWSE_RSS -- the number of records to
 ## display in the RSS feed.
 CFG_WEBSEARCH_INSTANT_BROWSE_RSS = 25
 
 ## CFG_WEBSEARCH_RSS_I18N_COLLECTIONS -- comma-separated list of
 ## collections that feature an internationalized RSS feed on their
 ## main seach interface page created by webcoll.  Other collections
 ## will have RSS feed using CFG_SITE_LANG.
 CFG_WEBSEARCH_RSS_I18N_COLLECTIONS =
 
 ## CFG_WEBSEARCH_RSS_TTL -- number of minutes that indicates how long
 ## a feed cache is valid.
 CFG_WEBSEARCH_RSS_TTL = 360
 
 ## CFG_WEBSEARCH_RSS_MAX_CACHED_REQUESTS -- maximum number of request kept
 ## in cache. If the cache is filled, following request are not cached.
 CFG_WEBSEARCH_RSS_MAX_CACHED_REQUESTS = 1000
 
 ## CFG_WEBSEARCH_AUTHOR_ET_AL_THRESHOLD -- up to how many author names
 ## to print explicitely; for more print "et al".  Note that this is
 ## used in default formatting that is seldomly used, as usually
 ## BibFormat defines all the format.  The value below is only used
 ## when BibFormat fails, for example.
 CFG_WEBSEARCH_AUTHOR_ET_AL_THRESHOLD = 3
 
 ## CFG_WEBSEARCH_NARROW_SEARCH_SHOW_GRANDSONS -- whether to show or
 ## not collection grandsons in Narrow Search boxes (sons are shown by
 ## default, grandsons are configurable here).  Use 0 for no and 1 for
 ## yes.
 CFG_WEBSEARCH_NARROW_SEARCH_SHOW_GRANDSONS = 1
 
 ## CFG_WEBSEARCH_CREATE_SIMILARLY_NAMED_AUTHORS_LINK_BOX -- shall we
 ## create help links for Ellis, Nick or Ellis, Nicholas and friends
 ## when Ellis, N was searched for?  Useful if you have one author
 ## stored in the database under several name formats, namely surname
 ## comma firstname and surname comma initial cataloging policy.  Use 0
 ## for no and 1 for yes.
 CFG_WEBSEARCH_CREATE_SIMILARLY_NAMED_AUTHORS_LINK_BOX = 1
 
 ## CFG_WEBSEARCH_USE_MATHJAX_FOR_FORMATS -- MathJax is a JavaScript
 ## library that renders (La)TeX mathematical formulas in the client
 ## browser.  This parameter must contain a comma-separated list of
 ## output formats for which to apply the MathJax rendering, for example
 ## "hb,hd".  If the list is empty, MathJax is disabled.
 CFG_WEBSEARCH_USE_MATHJAX_FOR_FORMATS =
 
 ## CFG_WEBSEARCH_EXTERNAL_COLLECTION_SEARCH_TIMEOUT -- when searching
 ## external collections (e.g. SPIRES, CiteSeer, etc), how many seconds
 ## do we wait for reply before abandonning?
 CFG_WEBSEARCH_EXTERNAL_COLLECTION_SEARCH_TIMEOUT = 5
 
 ## CFG_WEBSEARCH_EXTERNAL_COLLECTION_SEARCH_MAXRESULTS -- how many
 ## results do we fetch?
 CFG_WEBSEARCH_EXTERNAL_COLLECTION_SEARCH_MAXRESULTS = 10
 
 ## CFG_WEBSEARCH_SPLIT_BY_COLLECTION -- do we want to split the search
 ## results by collection or not?  Use 0 for not, 1 for yes.
 CFG_WEBSEARCH_SPLIT_BY_COLLECTION = 1
 
 ## CFG_WEBSEARCH_DEF_RECORDS_IN_GROUPS -- the default number of
 ## records to display per page in the search results pages.
 CFG_WEBSEARCH_DEF_RECORDS_IN_GROUPS = 10
 
 ## CFG_WEBSEARCH_MAX_RECORDS_IN_GROUPS -- in order to limit denial of
 ## service attacks the total number of records per group displayed as a
 ## result of a search query will be limited to this number. Only the superuser
 ## queries will not be affected by this limit.
 CFG_WEBSEARCH_MAX_RECORDS_IN_GROUPS = 200
 
 ## CFG_WEBSEARCH_PERMITTED_RESTRICTED_COLLECTIONS_LEVEL -- logged in users
 ## might have rights to access some restricted collections. This variable
 ## tweaks the kind of support the system will automatically provide to the
 ## user with respect to searching into these restricted collections.
 ## Set this to 0 in order to have the user to explicitly activate restricted
 ## collections in order to search into them. Set this to 1 in order to
 ## propose to the user the list of restricted collections to which he/she has
 ## rights (note: this is not yet implemented). Set this to 2 in order to
 ## silently add all the restricted collections to which the user has rights to
 ## to any query.
 ## Note: the system will discover which restricted collections a user has
 ## rights to, at login time. The time complexity of this procedure is
 ## proportional to the number of restricted collections. E.g. for a system
 ## with ~50 restricted collections, you might expect ~1s of delay in the
 ## login time, when this variable is set to a value higher than 0.
 CFG_WEBSEARCH_PERMITTED_RESTRICTED_COLLECTIONS_LEVEL = 0
 
 ## CFG_WEBSEARCH_SHOW_COMMENT_COUNT -- do we want to show the 'N comments'
 ## links on the search engine pages?  (useful only when you have allowed
 ## commenting)
 CFG_WEBSEARCH_SHOW_COMMENT_COUNT = 1
 
 ## CFG_WEBSEARCH_SHOW_REVIEW_COUNT -- do we want to show the 'N reviews'
 ## links on the search engine pages?  (useful only when you have allowed
 ## reviewing)
 CFG_WEBSEARCH_SHOW_REVIEW_COUNT = 1
 
 ## CFG_WEBSEARCH_FULLTEXT_SNIPPETS -- how many full-text snippets to
 ## display for full-text searches?
 CFG_WEBSEARCH_FULLTEXT_SNIPPETS = 4
 
 ## CFG_WEBSEARCH_FULLTEXT_SNIPPETS_WORDS -- how many context words
 ## to display around the pattern in the snippet?
 CFG_WEBSEARCH_FULLTEXT_SNIPPETS_WORDS = 4
 
 ## CFG_WEBSEARCH_WILDCARD_LIMIT -- some of the queries, wildcard
 ## queries in particular (ex: cern*, a*), but also regular expressions
 ## (ex: [a-z]+), may take a long time to respond due to the high
 ## number of hits. You can limit the number of terms matched by a
 ## wildcard by setting this variable.  A negative value or zero means
 ## that none of the queries will be limited (which may be wanted by
 ## also prone to denial-of-service kind of attacks).
 CFG_WEBSEARCH_WILDCARD_LIMIT = 50000
 
 ## CFG_WEBSEARCH_SYNONYM_KBRS -- defines which knowledge bases are to
 ## be used for which index in order to provide runtime synonym lookup
 ## of user-supplied terms, and what massaging function should be used
 ## upon search pattern before performing the KB lookup.  (Can be one
 ## of `exact', 'leading_to_comma', `leading_to_number'.)
 CFG_WEBSEARCH_SYNONYM_KBRS = {
     'journal': ['SEARCH-SYNONYM-JOURNAL', 'leading_to_number'],
     }
 
 ## CFG_SOLR_URL -- optionally, you may use Solr to serve full-text
 ## queries.  If so, please specify the URL of your Solr instance.
 ## (example: http://localhost:8080/sorl)
 CFG_SOLR_URL =
 
 ## CFG_WEBSEARCH_PREV_NEXT_HIT_LIMIT -- specify the limit when
 ## the previous/next/back hit links are to be displayed on detailed record pages.
 ## In order to speeding up list manipulations, if a search returns lots of hits,
 ## more than this limit, then do not loose time calculating next/previous/back
 ## hits at all, but display page directly without these.
 ## Note also that Invenio installations that do not like
 ## to have the next/previous hit link functionality would be able to set this
 ## variable to zero and not see anything.
 CFG_WEBSEARCH_PREV_NEXT_HIT_LIMIT = 1000
 
 ## CFG_WEBSEARCH_VIEWRESTRCOLL_POLICY -- when a record belongs to more than one
 ## restricted collection, if the viewrestcoll policy is set to "ALL" (default)
 ## then the user must be authorized to all the restricted collections, in
 ## order to be granted access to the specific record. If the policy is set to
 ## "ANY", then the user need to be authorized to only one of the collections
 ## in order to be granted access to the specific record.
 CFG_WEBSEARCH_VIEWRESTRCOLL_POLICY = ALL
 
 #######################################
 ## Part 4: BibHarvest OAI parameters ##
 #######################################
 
 ## This part defines parameters for the Invenio OAI gateway.
 ## Useful if you are running Invenio as OAI data provider.
 
 ## CFG_OAI_ID_FIELD -- OAI identifier MARC field:
 CFG_OAI_ID_FIELD = 909COo
 
 ## CFG_OAI_SET_FIELD -- OAI set MARC field:
 CFG_OAI_SET_FIELD = 909COp
 
+## CFG_OAI_SET_FIELD -- previous OAI set MARC field:
+CFG_OAI_PREVIOUS_SET_FIELD = 909COq
+
 ## CFG_OAI_DELETED_POLICY -- OAI deletedrecordspolicy
-## (no/transient/persistent).
-CFG_OAI_DELETED_POLICY = no
+## (no/transient/persistent):
+CFG_OAI_DELETED_POLICY = persistent
 
 ## CFG_OAI_ID_PREFIX -- OAI identifier prefix:
 CFG_OAI_ID_PREFIX = atlantis.cern.ch
 
 ## CFG_OAI_SAMPLE_IDENTIFIER -- OAI sample identifier:
-CFG_OAI_SAMPLE_IDENTIFIER = oai:atlantis.cern.ch:CERN-TH-4036
+CFG_OAI_SAMPLE_IDENTIFIER = oai:atlantis.cern.ch:123
 
 ## CFG_OAI_IDENTIFY_DESCRIPTION -- description for the OAI Identify verb:
 CFG_OAI_IDENTIFY_DESCRIPTION = <description>
-   <oai-identifier xmlns="http://www.openarchives.org/OAI/2.0/oai-identifier"
-                   xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
-                   xsi:schemaLocation="http://www.openarchives.org/OAI/2.0/oai-identifier
-                                       http://www.openarchives.org/OAI/2.0/oai-identifier.xsd">
-      <scheme>oai</scheme>
-      <repositoryIdentifier>atlantis.cern.ch</repositoryIdentifier>
-      <delimiter>:</delimiter>
-      <sampleIdentifier>oai:atlantis.cern.ch:CERN-TH-4036</sampleIdentifier>
-   </oai-identifier>
- </description>
- <description>
   <eprints xmlns="http://www.openarchives.org/OAI/1.1/eprints"
            xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
            xsi:schemaLocation="http://www.openarchives.org/OAI/1.1/eprints
                                http://www.openarchives.org/OAI/1.1/eprints.xsd">
       <content>
        <URL>http://atlantis.cern.ch/</URL>
       </content>
       <metadataPolicy>
        <text>Free and unlimited use by anybody with obligation to refer to original record</text>
       </metadataPolicy>
       <dataPolicy>
        <text>Full content, i.e. preprints may not be harvested by robots</text>
       </dataPolicy>
       <submissionPolicy>
        <text>Submission restricted. Submitted documents are subject of approval by OAI repository admins.</text>
       </submissionPolicy>
   </eprints>
  </description>
 
 ## CFG_OAI_LOAD -- OAI number of records in a response:
-CFG_OAI_LOAD = 1000
+CFG_OAI_LOAD = 500
 
 ## CFG_OAI_EXPIRE -- OAI resumptionToken expiration time:
 CFG_OAI_EXPIRE = 90000
 
 ## CFG_OAI_SLEEP -- service unavailable between two consecutive
 ## requests for CFG_OAI_SLEEP seconds:
-CFG_OAI_SLEEP = 10
+CFG_OAI_SLEEP = 2
+
+## CFG_OAI_METADATA_FORMATS -- mapping between accepted metadataPrefixes and
+## the corresponding output format to use, its schema and its metadataNamespace.
+CFG_OAI_METADATA_FORMATS = {
+    'marcxml': ('XOAIMARC', 'http://www.openarchives.org/OAI/1.1/dc.xsd', 'http://purl.org/dc/elements/1.1/'),
+    'oai_dc': ('XOAIDC', 'http://www.loc.gov/standards/marcxml/schema/MARC21slim.xsd', 'http://www.loc.gov/MARC21/slim'),
+    }
+
+## CFG_OAI_FRIENDS -- list of OAI baseURL of friend repositories. See:
+## <http://www.openarchives.org/OAI/2.0/guidelines-friends.htm>
+CFG_OAI_FRIENDS = http://cdsweb.cern.ch/oai2d,http://openaire.cern.ch/oai2d,http://export.arxiv.org/oai2
+
+## The following subfields are a completition to
+## CFG_BIBUPLOAD_EXTERNAL_OAIID_TAG. If CFG_OAI_PROVENANCE_BASEURL_SUBFIELD is
+## set for a record, then the corresponding field is considered has being
+## harvested via OAI-PMH
+
+## CFG_OAI_PROVENANCE_BASEURL_SUBFIELD -- baseURL of the originDescription or a
+## record
+CFG_OAI_PROVENANCE_BASEURL_SUBFIELD = u
+
+## CFG_OAI_PROVENANCE_DATESTAMP_SUBFIELD -- datestamp of the originDescription
+## or a record
+CFG_OAI_PROVENANCE_DATESTAMP_SUBFIELD = d
+
+## CFG_OAI_PROVENANCE_METADATANAMESPACE_SUBFIELD -- metadataNamespace of the
+## originDescription or a record
+CFG_OAI_PROVENANCE_METADATANAMESPACE_SUBFIELD = m
+
+## CFG_OAI_PROVENANCE_ORIGINDESCRIPTION_SUBFIELD -- originDescription of the
+## originDescription or a record
+CFG_OAI_PROVENANCE_ORIGINDESCRIPTION_SUBFIELD = d
+
+## CFG_OAI_PROVENANCE_HARVESTDATE_SUBFIELD -- harvestDate of the
+## originDescription or a record
+CFG_OAI_PROVENANCE_HARVESTDATE_SUBFIELD = h
+
+## CFG_OAI_PROVENANCE_ALTERED_SUBFIELD -- altered flag of the
+## originDescription or a record
+CFG_OAI_PROVENANCE_ALTERED_SUBFIELD = t
+
+## NOTE: the following parameters are experimenta
+## -----------------------------------------------------------------------------
+## CFG_OAI_RIGHTS_FIELD -- MARC field dedicated to storing Copyright information
+CFG_OAI_RIGHTS_FIELD = 542__
+
+## CFG_OAI_RIGHTS_HOLDER_SUBFIELD -- MARC subfield dedicated to storing the
+## Copyright holder information
+CFG_OAI_RIGHTS_HOLDER_SUBFIELD = d
+
+## CFG_OAI_RIGHTS_DATE_SUBFIELD -- MARC subfield dedicated to storing the
+## Copyright date information
+CFG_OAI_RIGHTS_DATE_SUBFIELD = g
+
+## CFG_OAI_RIGHTS_URI_SUBFIELD -- MARC subfield dedicated to storing the URI
+## (URL or URN, more detailed statement about copyright status) information
+CFG_OAI_RIGHTS_URI_SUBFIELD = u
+
+## CFG_OAI_RIGHTS_CONTACT_SUBFIELD -- MARC subfield dedicated to storing the
+## Copyright holder contact information
+CFG_OAI_RIGHTS_CONTACT_SUBFIELD = e
+
+## CFG_OAI_RIGHTS_STATEMENT_SUBFIELD -- MARC subfield dedicated to storing the
+## Copyright statement as presented on the resource
+CFG_OAI_RIGHTS_STATEMENT_SUBFIELD = f
+
+## CFG_OAI_LICENSE_FIELD -- MARC field dedicated to storing terms governing
+## use and reproduction (license)
+CFG_OAI_LICENSE_FIELD = 540__
+
+## CFG_OAI_LICENSE_TERMS_SUBFIELD -- MARC subfield dedicated to storing the
+## Terms governing use and reproduction, e.g. CC License
+CFG_OAI_LICENSE_TERMS_SUBFIELD = a
+
+## CFG_OAI_LICENSE_PUBLISHER_SUBFIELD -- MARC subfield dedicated to storing the
+## person or institution imposing the license (author, publisher)
+CFG_OAI_LICENSE_PUBLISHER_SUBFIELD = b
+
+## CFG_OAI_LICENSE_URI_SUBFIELD -- MARC subfield dedicated to storing the URI
+## URI
+CFG_OAI_LICENSE_URI_SUBFIELD = u
+##------------------------------------------------------------------------------
+
 
 ##################################
 ## Part 5: WebSubmit parameters ##
 ##################################
 
 ## This section contains some configuration parameters for WebSubmit
 ## module.  Please note that WebSubmit is mostly configured on
 ## run-time via its WebSubmit Admin web interface.  The parameters
 ## below are the ones that you do not probably want to modify during
 ## the runtime.
 
 ## CFG_WEBSUBMIT_FILESYSTEM_BIBDOC_GROUP_LIMIT -- the fulltext
 ## documents are stored under "/opt/invenio/var/data/files/gX/Y"
 ## directories where X is 0,1,... and Y stands for bibdoc ID.  Thusly
 ## documents Y are grouped into directories X and this variable
 ## indicates the maximum number of documents Y stored in each
 ## directory X.  This limit is imposed solely for filesystem
 ## performance reasons in order not to have too many subdirectories in
 ## a given directory.
 CFG_WEBSUBMIT_FILESYSTEM_BIBDOC_GROUP_LIMIT = 5000
 
 ## CFG_WEBSUBMIT_ADDITIONAL_KNOWN_FILE_EXTENSIONS -- a comma-separated
 ## list of document extensions not listed in Python standard mimetype
 ## library that should be recognized by Invenio.
 CFG_WEBSUBMIT_ADDITIONAL_KNOWN_FILE_EXTENSIONS = hpg,link,lis,llb,mat,mpp,msg,docx,docm,xlsx,xlsm,xlsb,pptx,pptm,ppsx,ppsm
 
 ## CFG_BIBDOCFILE_USE_XSENDFILE -- if your web server supports
 ## XSendfile header, you may want to enable this feature in order for
 ## to Invenio tell the web server to stream files for download (after
 ## proper authorization checks) by web server's means.  This helps to
 ## liberate Invenio worker processes from being busy with sending big
 ## files to clients.  The web server will take care of that.  Note:
 ## this feature is still somewhat experimental.  Note: when enabled
 ## (set to 1), then you have to also regenerate Apache vhost conf
 ## snippets (inveniocfg --update-config-py --create-apache-conf).
 CFG_BIBDOCFILE_USE_XSENDFILE = 0
 
 ## CFG_BIBDOCFILE_MD5_CHECK_PROBABILITY -- a number between 0 and
 ## 1 that indicates probability with which MD5 checksum will be
 ## verified when streaming bibdocfile-managed files.  (0.1 will cause
 ## the check to be performed once for every 10 downloads)
 CFG_BIBDOCFILE_MD5_CHECK_PROBABILITY = 0.1
 
 ## CFG_OPENOFFICE_SERVER_HOST -- the host where an OpenOffice Server is
 ## listening to. If localhost an OpenOffice server will be started
 ## automatically if it is not already running.
 ## Note: if you set this to an empty value this will disable the usage of
 ## OpenOffice for converting documents.
 ## If you set this to something different than localhost you'll have to take
 ## care to have an OpenOffice server running on the corresponding host and
 ## to install the same OpenOffice release both on the client and on the server
 ## side.
 ## In order to launch an OpenOffice server on a remote machine, just start
 ## the usual 'soffice' executable in this way:
 ## $> soffice -headless -nologo -nodefault -norestore -nofirststartwizard \
 ## .. -accept=socket,host=HOST,port=PORT;urp;StarOffice.ComponentContext
 CFG_OPENOFFICE_SERVER_HOST = localhost
 
 ## CFG_OPENOFFICE_SERVER_PORT -- the port where an OpenOffice Server is
 ## listening to.
 CFG_OPENOFFICE_SERVER_PORT = 2002
 
 ## CFG_OPENOFFICE_USER -- the user that will be used to launch the OpenOffice
 ## client. It is recommended to set this to a user who don't own files, like
 ## e.g. 'nobody'. You should also authorize your Apache server user to be
 ## able to become this user, e.g. by adding to your /etc/sudoers the following
 ## line:
 ## "apache  ALL=(nobody) NOPASSWD: ALL"
 ## provided that apache is the username corresponding to the Apache user.
 ## On some machine this might be apache2 or www-data.
 CFG_OPENOFFICE_USER = nobody
 
 #################################
 ## Part 6: BibIndex parameters ##
 #################################
 
 ## This section contains some configuration parameters for BibIndex
 ## module.  Please note that BibIndex is mostly configured on run-time
 ## via its BibIndex Admin web interface.  The parameters below are the
 ## ones that you do not probably want to modify very often during the
 ## runtime.
 
 ## CFG_BIBINDEX_FULLTEXT_INDEX_LOCAL_FILES_ONLY -- when fulltext indexing, do
 ## you want to index locally stored files only, or also external URLs?
 ## Use "0" to say "no" and "1" to say "yes".
 CFG_BIBINDEX_FULLTEXT_INDEX_LOCAL_FILES_ONLY = 1
 
 ## CFG_BIBINDEX_REMOVE_STOPWORDS -- when indexing, do we want to remove
 ## stopwords?  Use "0" to say "no" and "1" to say "yes".
 CFG_BIBINDEX_REMOVE_STOPWORDS = 0
 
 ## CFG_BIBINDEX_CHARS_ALPHANUMERIC_SEPARATORS -- characters considered as
 ## alphanumeric separators of word-blocks inside words.  You probably
 ## don't want to change this.
 CFG_BIBINDEX_CHARS_ALPHANUMERIC_SEPARATORS = \!\"\#\$\%\&\'\(\)\*\+\,\-\.\/\:\;\<\=\>\?\@\[\\\]\^\_\`\{\|\}\~
 
 ## CFG_BIBINDEX_CHARS_PUNCTUATION -- characters considered as punctuation
 ## between word-blocks inside words.  You probably don't want to
 ## change this.
 CFG_BIBINDEX_CHARS_PUNCTUATION = \.\,\:\;\?\!\"
 
 ## CFG_BIBINDEX_REMOVE_HTML_MARKUP -- should we attempt to remove HTML markup
 ## before indexing?  Use 1 if you have HTML markup inside metadata
 ## (e.g. in abstracts), use 0 otherwise.
 CFG_BIBINDEX_REMOVE_HTML_MARKUP = 0
 
 ## CFG_BIBINDEX_REMOVE_LATEX_MARKUP -- should we attempt to remove LATEX markup
 ## before indexing?  Use 1 if you have LATEX markup inside metadata
 ## (e.g. in abstracts), use 0 otherwise.
 CFG_BIBINDEX_REMOVE_LATEX_MARKUP = 0
 
 ## CFG_BIBINDEX_MIN_WORD_LENGTH -- minimum word length allowed to be added to
 ## index.  The terms smaller then this amount will be discarded.
 ## Useful to keep the database clean, however you can safely leave
 ## this value on 0 for up to 1,000,000 documents.
 CFG_BIBINDEX_MIN_WORD_LENGTH = 0
 
 ## CFG_BIBINDEX_URLOPENER_USERNAME and CFG_BIBINDEX_URLOPENER_PASSWORD --
 ## access credentials to access restricted URLs, interesting only if
 ## you are fulltext-indexing files located on a remote server that is
 ## only available via username/password.  But it's probably better to
 ## handle this case via IP or some convention; the current scheme is
 ## mostly there for demo only.
 CFG_BIBINDEX_URLOPENER_USERNAME = mysuperuser
 CFG_BIBINDEX_URLOPENER_PASSWORD = mysuperpass
 
 ## CFG_INTBITSET_ENABLE_SANITY_CHECKS --
 ## Enable sanity checks for integers passed to the intbitset data
 ## structures. It is good to enable this during debugging
 ## and to disable this value for speed improvements.
 CFG_INTBITSET_ENABLE_SANITY_CHECKS = False
 
 ## CFG_BIBINDEX_PERFORM_OCR_ON_DOCNAMES -- regular expression that matches
 ## docnames for which OCR is desired (set this to .* in order to enable
 ## OCR in general, set this to empty in order to disable it.)
 CFG_BIBINDEX_PERFORM_OCR_ON_DOCNAMES = scan-.*
 
 ## CFG_BIBINDEX_SPLASH_PAGES -- key-value mapping where the key corresponds
 ## to a regular expression that matches the URLs of the splash pages of
 ## a given service and the value is a regular expression of the set of URLs
 ## referenced via <a> tags in the HTML content of the splash pages that are
 ## referring to documents that need to be indexed.
 ## NOTE: for backward compatibility reasons you can set this to a simple
 ## regular expression that will directly be used as the unique key of the
 ## map, with corresponding value set to ".*" (in order to match any URL)
 CFG_BIBINDEX_SPLASH_PAGES = {
     "http://documents\.cern\.ch/setlink\?.*": ".*",
     "http://ilcagenda\.linearcollider\.org/subContributionDisplay\.py\?.*|http://ilcagenda\.linearcollider\.org/contributionDisplay\.py\?.*": "http://ilcagenda\.linearcollider\.org/getFile\.py/access\?.*|http://ilcagenda\.linearcollider\.org/materialDisplay\.py\?.*",
     }
 
 ## CFG_BIBINDEX_AUTHOR_WORD_INDEX_EXCLUDE_FIRST_NAMES -- do we want
 ## the author word index to exclude first names to keep only last
 ## names?  If set to True, then for the author `Bernard, Denis', only
 ## `Bernard' will be indexed in the word index, not `Denis'.  Note
 ## that if you change this variable, you have to re-index the author
 ## index via `bibindex -w author -R'.
 CFG_BIBINDEX_AUTHOR_WORD_INDEX_EXCLUDE_FIRST_NAMES = False
 
 ## CFG_BIBINDEX_SYNONYM_KBRS -- defines which knowledge bases are to
 ## be used for which index in order to provide index-time synonym
 ## lookup, and what massaging function should be used upon search
 ## pattern before performing the KB lookup.  (Can be one of `exact',
 ## 'leading_to_comma', `leading_to_number'.)
 CFG_BIBINDEX_SYNONYM_KBRS = {
     'global': ['INDEX-SYNONYM-TITLE', 'exact'],
     'title': ['INDEX-SYNONYM-TITLE', 'exact'],
     }
 
 #######################################
 ## Part 7: Access control parameters ##
 #######################################
 
 ## This section contains some configuration parameters for the access
 ## control system.  Please note that WebAccess is mostly configured on
 ## run-time via its WebAccess Admin web interface.  The parameters
 ## below are the ones that you do not probably want to modify very
 ## often during the runtime.  (If you do want to modify them during
 ## runtime, for example te deny access temporarily because of backups,
 ## you can edit access_control_config.py directly, no need to get back
 ## here and no need to redo the make process.)
 
 ## CFG_ACCESS_CONTROL_LEVEL_SITE -- defines how open this site is.
 ## Use 0 for normal operation of the site, 1 for read-only site (all
 ## write operations temporarily closed), 2 for site fully closed,
 ## 3 for also disabling any database connection.
 ## Useful for site maintenance.
 CFG_ACCESS_CONTROL_LEVEL_SITE = 0
 
 ## CFG_ACCESS_CONTROL_LEVEL_GUESTS -- guest users access policy.  Use
 ## 0 to allow guest users, 1 not to allow them (all users must login).
 CFG_ACCESS_CONTROL_LEVEL_GUESTS = 0
 
 ## CFG_ACCESS_CONTROL_LEVEL_ACCOUNTS -- account registration and
 ## activation policy.  When 0, users can register and accounts are
 ## automatically activated.  When 1, users can register but admin must
 ## activate the accounts.  When 2, users cannot register nor update
 ## their email address, only admin can register accounts.  When 3,
 ## users cannot register nor update email address nor password, only
 ## admin can register accounts.  When 4, the same as 3 applies, nor
 ## user cannot change his login method.  When 5, then the same as 4
 ## applies, plus info about how to get an account is hidden from the
 ## login page.
 CFG_ACCESS_CONTROL_LEVEL_ACCOUNTS = 0
 
 ## CFG_ACCESS_CONTROL_LIMIT_REGISTRATION_TO_DOMAIN -- limit account
 ## registration to certain email addresses?  If wanted, give domain
 ## name below, e.g. "cern.ch".  If not wanted, leave it empty.
 CFG_ACCESS_CONTROL_LIMIT_REGISTRATION_TO_DOMAIN =
 
 ## CFG_ACCESS_CONTROL_NOTIFY_ADMIN_ABOUT_NEW_ACCOUNTS -- send a
 ## notification email to the administrator when a new account is
 ## created?  Use 0 for no, 1 for yes.
 CFG_ACCESS_CONTROL_NOTIFY_ADMIN_ABOUT_NEW_ACCOUNTS = 0
 
 ## CFG_ACCESS_CONTROL_NOTIFY_USER_ABOUT_NEW_ACCOUNT -- send a
 ## notification email to the user when a new account is created in order to
 ## to verify the validity of the provided email address?  Use
 ## 0 for no, 1 for yes.
 CFG_ACCESS_CONTROL_NOTIFY_USER_ABOUT_NEW_ACCOUNT = 1
 
 ## CFG_ACCESS_CONTROL_NOTIFY_USER_ABOUT_ACTIVATION -- send a
 ## notification email to the user when a new account is activated?
 ## Use 0 for no, 1 for yes.
 CFG_ACCESS_CONTROL_NOTIFY_USER_ABOUT_ACTIVATION = 0
 
 ## CFG_ACCESS_CONTROL_NOTIFY_USER_ABOUT_DELETION -- send a
 ## notification email to the user when a new account is deleted or
 ## account demand rejected?  Use 0 for no, 1 for yes.
 CFG_ACCESS_CONTROL_NOTIFY_USER_ABOUT_DELETION = 0
 
 ## CFG_APACHE_PASSWORD_FILE -- the file where Apache user credentials
 ## are stored.  Must be an absolute pathname.  If the value does not
 ## start by a slash, it is considered to be the filename of a file
 ## located under prefix/var/tmp directory.  This is useful for the
 ## demo site testing purposes.  For the production site, if you plan
 ## to restrict access to some collections based on the Apache user
 ## authentication mechanism, you should put here an absolute path to
 ## your Apache password file.
 CFG_APACHE_PASSWORD_FILE = demo-site-apache-user-passwords
 
 ## CFG_APACHE_GROUP_FILE -- the file where Apache user groups are
 ## defined.  See the documentation of the preceding config variable.
 CFG_APACHE_GROUP_FILE = demo-site-apache-user-groups
 
 ###################################
 ## Part 8: WebSession parameters ##
 ###################################
 
 ## This section contains some configuration parameters for tweaking
 ## session handling.
 
 ## CFG_WEBSESSION_EXPIRY_LIMIT_DEFAULT -- number of days after which a session
 ## and the corresponding cookie is considered expired.
 CFG_WEBSESSION_EXPIRY_LIMIT_DEFAULT = 2
 
 ## CFG_WEBSESSION_EXPIRY_LIMIT_REMEMBER -- number of days after which a session
 ## and the corresponding cookie is considered expired, when the user has
 ## requested to permanently stay logged in.
 CFG_WEBSESSION_EXPIRY_LIMIT_REMEMBER = 365
 
 ## CFG_WEBSESSION_RESET_PASSWORD_EXPIRE_IN_DAYS -- when user requested
 ## a password reset, for how many days is the URL valid?
 CFG_WEBSESSION_RESET_PASSWORD_EXPIRE_IN_DAYS = 3
 
 ## CFG_WEBSESSION_ADDRESS_ACTIVATION_EXPIRE_IN_DAYS -- when an account
 ## activation email was sent, for how many days is the URL valid?
 CFG_WEBSESSION_ADDRESS_ACTIVATION_EXPIRE_IN_DAYS = 3
 
 ## CFG_WEBSESSION_NOT_CONFIRMED_EMAIL_ADDRESS_EXPIRE_IN_DAYS -- when
 ## user won't confirm his email address and not complete
 ## registeration, after how many days will it expire?
 CFG_WEBSESSION_NOT_CONFIRMED_EMAIL_ADDRESS_EXPIRE_IN_DAYS = 10
 
 ## CFG_WEBSESSION_DIFFERENTIATE_BETWEEN_GUESTS -- when set to 1, the session
 ## system allocates the same uid=0 to all guests users regardless of where they
 ## come from. 0 allocate a unique uid to each guest.
 CFG_WEBSESSION_DIFFERENTIATE_BETWEEN_GUESTS = 0
 
 ################################
 ## Part 9: BibRank parameters ##
 ################################
 
 ## This section contains some configuration parameters for the ranking
 ## system.
 
 ## CFG_BIBRANK_SHOW_READING_STATS -- do we want to show reading
 ## similarity stats?  ('People who viewed this page also viewed')
 CFG_BIBRANK_SHOW_READING_STATS = 1
 
 ## CFG_BIBRANK_SHOW_DOWNLOAD_STATS -- do we want to show the download
 ## similarity stats?  ('People who downloaded this document also
 ## downloaded')
 CFG_BIBRANK_SHOW_DOWNLOAD_STATS = 1
 
 ## CFG_BIBRANK_SHOW_DOWNLOAD_GRAPHS -- do we want to show download
 ## history graph? (0=no | 1=classic/gnuplot | 2=flot)
 CFG_BIBRANK_SHOW_DOWNLOAD_GRAPHS = 1
 
 ## CFG_BIBRANK_SHOW_DOWNLOAD_GRAPHS_CLIENT_IP_DISTRIBUTION -- do we
 ## want to show a graph representing the distribution of client IPs
 ## downloading given document?
 CFG_BIBRANK_SHOW_DOWNLOAD_GRAPHS_CLIENT_IP_DISTRIBUTION = 0
 
 ## CFG_BIBRANK_SHOW_CITATION_LINKS -- do we want to show the 'Cited
 ## by' links?  (useful only when you have citations in the metadata)
 CFG_BIBRANK_SHOW_CITATION_LINKS = 1
 
 ## CFG_BIBRANK_SHOW_CITATION_STATS -- de we want to show citation
 ## stats?  ('Cited by M recors', 'Co-cited with N records')
 CFG_BIBRANK_SHOW_CITATION_STATS = 1
 
 ## CFG_BIBRANK_SHOW_CITATION_GRAPHS -- do we want to show citation
 ## history graph?  (0=no | 1=classic/gnuplot | 2=flot)
 CFG_BIBRANK_SHOW_CITATION_GRAPHS = 1
 
 ####################################
 ## Part 10: WebComment parameters ##
 ####################################
 
 ## This section contains some configuration parameters for the
 ## commenting and reviewing facilities.
 
 ## CFG_WEBCOMMENT_ALLOW_COMMENTS -- do we want to allow users write
 ## public comments on records?
 CFG_WEBCOMMENT_ALLOW_COMMENTS = 1
 
 ## CFG_WEBCOMMENT_ALLOW_REVIEWS -- do we want to allow users write
 ## public reviews of records?
 CFG_WEBCOMMENT_ALLOW_REVIEWS = 1
 
 ## CFG_WEBCOMMENT_ALLOW_SHORT_REVIEWS -- do we want to allow short
 ## reviews, that is just the attribution of stars without submitting
 ## detailed review text?
 CFG_WEBCOMMENT_ALLOW_SHORT_REVIEWS = 0
 
 ## CFG_WEBCOMMENT_NB_REPORTS_BEFORE_SEND_EMAIL_TO_ADMIN -- if users
 ## report a comment to be abusive, how many they have to be before the
 ## site admin is alerted?
 CFG_WEBCOMMENT_NB_REPORTS_BEFORE_SEND_EMAIL_TO_ADMIN = 5
 
 ## CFG_WEBCOMMENT_NB_COMMENTS_IN_DETAILED_VIEW -- how many comments do
 ## we display in the detailed record page upon welcome?
 CFG_WEBCOMMENT_NB_COMMENTS_IN_DETAILED_VIEW = 1
 
 ## CFG_WEBCOMMENT_NB_REVIEWS_IN_DETAILED_VIEW -- how many reviews do
 ## we display in the detailed record page upon welcome?
 CFG_WEBCOMMENT_NB_REVIEWS_IN_DETAILED_VIEW = 1
 
 ## CFG_WEBCOMMENT_ADMIN_NOTIFICATION_LEVEL -- do we notify the site
 ## admin after every comment?
 CFG_WEBCOMMENT_ADMIN_NOTIFICATION_LEVEL = 1
 
 ## CFG_WEBCOMMENT_TIMELIMIT_PROCESSING_COMMENTS_IN_SECONDS -- how many
 ## elapsed seconds do we consider enough when checking for possible
 ## multiple comment submissions by a user?
 CFG_WEBCOMMENT_TIMELIMIT_PROCESSING_COMMENTS_IN_SECONDS = 20
 
 ## CFG_WEBCOMMENT_TIMELIMIT_PROCESSING_REVIEWS_IN_SECONDS -- how many
 ## elapsed seconds do we consider enough when checking for possible
 ## multiple review submissions by a user?
 CFG_WEBCOMMENT_TIMELIMIT_PROCESSING_REVIEWS_IN_SECONDS = 20
 
 ## CFG_WEBCOMMENT_USE_RICH_EDITOR -- enable the WYSIWYG
 ## Javascript-based editor when user edits comments?
 CFG_WEBCOMMENT_USE_RICH_TEXT_EDITOR = False
 
 ## CFG_WEBCOMMENT_ALERT_ENGINE_EMAIL -- the email address from which the
 ## alert emails will appear to be sent:
 CFG_WEBCOMMENT_ALERT_ENGINE_EMAIL = info@invenio-software.org
 
 ## CFG_WEBCOMMENT_DEFAULT_MODERATOR -- if no rules are
 ## specified to indicate who is the comment moderator of
 ## a collection, this person will be used as default
 CFG_WEBCOMMENT_DEFAULT_MODERATOR = info@invenio-software.org
 
 ## CFG_WEBCOMMENT_USE_MATHJAX_IN_COMMENTS -- do we want to allow the use
 ## of MathJax plugin to render latex input in comments?
 CFG_WEBCOMMENT_USE_MATHJAX_IN_COMMENTS = 1
 
 ## CFG_WEBCOMMENT_AUTHOR_DELETE_COMMENT_OPTION -- allow comment author to
 ## delete its own comment?
 CFG_WEBCOMMENT_AUTHOR_DELETE_COMMENT_OPTION = 1
 
 # CFG_WEBCOMMENT_EMAIL_REPLIES_TO -- which field of the record define
 # email addresses that should be notified of newly submitted comments,
 # and for which collection. Use collection names as keys, and list of
 # tags as values
 CFG_WEBCOMMENT_EMAIL_REPLIES_TO = {
     'Articles': ['506__d', '506__m'],
     }
 
 # CFG_WEBCOMMENT_RESTRICTION_DATAFIELD -- which field of the record
 # define the restriction (must be linked to WebAccess
 # 'viewrestrcomment') to apply to newly submitted comments, and for
 # which collection. Use collection names as keys, and one tag as value
 CFG_WEBCOMMENT_RESTRICTION_DATAFIELD = {
     'Articles': '5061_a',
     'Pictures': '5061_a',
     'Theses': '5061_a',
     }
 
 # CFG_WEBCOMMENT_ROUND_DATAFIELD -- which field of the record define
 # the current round of comment for which collection. Use collection
 # name as key, and one tag as value
 CFG_WEBCOMMENT_ROUND_DATAFIELD = {
     'Articles': '562__c',
     'Pictures': '562__c',
     }
 
 # CFG_WEBCOMMENT_MAX_ATTACHMENT_SIZE -- max file size per attached
 # file, in bytes.  Choose 0 if you don't want to limit the size
 CFG_WEBCOMMENT_MAX_ATTACHMENT_SIZE = 5242880
 
 # CFG_WEBCOMMENT_MAX_ATTACHED_FILES -- maxium number of files that can
 # be attached per comment.  Choose 0 if you don't want to limit the
 # number of files.  File uploads can be restricted with action
 # "attachcommentfile".
 CFG_WEBCOMMENT_MAX_ATTACHED_FILES = 5
 
 # CFG_WEBCOMMENT_MAX_COMMENT_THREAD_DEPTH -- how many levels of
 # indentation discussions can be.  This can be used to ensure that
 # discussions will not go into deep levels of nesting if users don't
 # understand the difference between "reply to comment" and "add
 # comment". When the depth is reached, any "reply to comment" is
 # conceptually converted to a "reply to thread" (i.e. reply to this
 # parent's comment). Use -1 for no limit, 0 for unthreaded (flat)
 # discussions.
 CFG_WEBCOMMENT_MAX_COMMENT_THREAD_DEPTH = 1
 
 ##################################
 ## Part 11: BibSched parameters ##
 ##################################
 
 ## This section contains some configuration parameters for the
 ## bibliographic task scheduler.
 
 ## CFG_BIBSCHED_REFRESHTIME -- how often do we want to refresh
 ## bibsched monitor? (in seconds)
 CFG_BIBSCHED_REFRESHTIME = 5
 
 ## CFG_BIBSCHED_LOG_PAGER -- what pager to use to view bibsched task
 ## logs?
 CFG_BIBSCHED_LOG_PAGER = /bin/more
 
 ## CFG_BIBSCHED_GC_TASKS_OLDER_THAN -- after how many days to perform the
 ## gargbage collector of BibSched queue (i.e. removing/moving task to archive).
 CFG_BIBSCHED_GC_TASKS_OLDER_THAN = 30
 
 ## CFG_BIBSCHED_GC_TASKS_TO_REMOVE -- list of BibTask that can be safely
 ## removed from the BibSched queue once they are DONE.
 CFG_BIBSCHED_GC_TASKS_TO_REMOVE = bibindex,bibreformat,webcoll,bibrank,inveniogc
 
 ## CFG_BIBSCHED_GC_TASKS_TO_ARCHIVE -- list of BibTasks that should be safely
 ## archived out of the BibSched queue once they are DONE.
 CFG_BIBSCHED_GC_TASKS_TO_ARCHIVE = bibupload,oaiarchive
 
 ## CFG_BIBSCHED_MAX_NUMBER_CONCURRENT_TASKS -- maximum number of BibTasks
 ## that can run concurrently.
 ## NOTE: concurrent tasks are still considered as an experimental
 ## feature. Please keep this value set to 1 on production environments.
 CFG_BIBSCHED_MAX_NUMBER_CONCURRENT_TASKS = 1
 
 ## CFG_BIBSCHED_PROCESS_USER -- bibsched and bibtask processes must
 ## usually run under the same identity as the Apache web server
 ## process in order to share proper file read/write privileges.  If
 ## you want to force some other bibsched/bibtask user, e.g. because
 ## you are using a local `invenio' user that belongs to your
 ## `www-data' Apache user group and so shares writing rights with your
 ## Apache web server process in this way, then please set its username
 ## identity here.  Otherwise we shall check whether your
 ## bibsched/bibtask processes are run under the same identity as your
 ## Apache web server process (in which case you can leave the default
 ## empty value here).
 CFG_BIBSCHED_PROCESS_USER =
 
 ## CFG_BIBSCHED_NODE_TASKS -- specific nodes may be configured to
 ## run only specific tasks; if you want this, then this variable is a
 ## dictionary of the form {'hostname1': ['task1', 'task2']}.  The
 ## default is that any node can run any task.
 CFG_BIBSCHED_NODE_TASKS = {}
 
 ###################################
 ## Part 12: WebBasket parameters ##
 ###################################
 
 ## CFG_WEBBASKET_MAX_NUMBER_OF_DISPLAYED_BASKETS -- a safety limit for
 ## a maximum number of displayed baskets
 CFG_WEBBASKET_MAX_NUMBER_OF_DISPLAYED_BASKETS = 20
 
 ## CFG_WEBBASKET_USE_RICH_TEXT_EDITOR -- enable the WYSIWYG
 ## Javascript-based editor when user edits comments in WebBasket?
 CFG_WEBBASKET_USE_RICH_TEXT_EDITOR = False
 
 ##################################
 ## Part 13: WebAlert parameters ##
 ##################################
 
 ## This section contains some configuration parameters for the
 ## automatic email notification alert system.
 
 ## CFG_WEBALERT_ALERT_ENGINE_EMAIL -- the email address from which the
 ## alert emails will appear to be sent:
 CFG_WEBALERT_ALERT_ENGINE_EMAIL = info@invenio-software.org
 
 ## CFG_WEBALERT_MAX_NUM_OF_RECORDS_IN_ALERT_EMAIL -- how many records
 ## at most do we send in an outgoing alert email?
 CFG_WEBALERT_MAX_NUM_OF_RECORDS_IN_ALERT_EMAIL = 20
 
 ## CFG_WEBALERT_MAX_NUM_OF_CHARS_PER_LINE_IN_ALERT_EMAIL -- number of
 ## chars per line in an outgoing alert email?
 CFG_WEBALERT_MAX_NUM_OF_CHARS_PER_LINE_IN_ALERT_EMAIL = 72
 
 ## CFG_WEBALERT_SEND_EMAIL_NUMBER_OF_TRIES -- when sending alert
 ## emails fails, how many times we retry?
 CFG_WEBALERT_SEND_EMAIL_NUMBER_OF_TRIES = 3
 
 ## CFG_WEBALERT_SEND_EMAIL_SLEEPTIME_BETWEEN_TRIES -- when sending
 ## alert emails fails, what is the sleeptime between tries? (in
 ## seconds)
 CFG_WEBALERT_SEND_EMAIL_SLEEPTIME_BETWEEN_TRIES = 300
 
 ####################################
 ## Part 14: WebMessage parameters ##
 ####################################
 
 ## CFG_WEBMESSAGE_MAX_SIZE_OF_MESSAGE -- how large web messages do we
 ## allow?
 CFG_WEBMESSAGE_MAX_SIZE_OF_MESSAGE = 20000
 
 ## CFG_WEBMESSAGE_MAX_NB_OF_MESSAGES -- how many messages for a
 ## regular user do we allow in its inbox?
 CFG_WEBMESSAGE_MAX_NB_OF_MESSAGES = 30
 
 ## CFG_WEBMESSAGE_DAYS_BEFORE_DELETE_ORPHANS -- how many days before
 ## we delete orphaned messages?
 CFG_WEBMESSAGE_DAYS_BEFORE_DELETE_ORPHANS = 60
 
 ##################################
 ## Part 15: MiscUtil parameters ##
 ##################################
 
 ## CFG_MISCUTIL_SQL_USE_SQLALCHEMY -- whether to use SQLAlchemy.pool
 ## in the DB engine of Invenio.  It is okay to enable this flag
 ## even if you have not installed SQLAlchemy.  Note that Invenio will
 ## loose some perfomance if this option is enabled.
 CFG_MISCUTIL_SQL_USE_SQLALCHEMY = False
 
 ## CFG_MISCUTIL_SQL_RUN_SQL_MANY_LIMIT -- how many queries can we run
 ## inside run_sql_many() in one SQL statement?  The limit value
 ## depends on MySQL's max_allowed_packet configuration.
 CFG_MISCUTIL_SQL_RUN_SQL_MANY_LIMIT = 10000
 
 ## CFG_MISCUTIL_SMTP_HOST -- which server to use as outgoing mail server to
 ## send outgoing emails generated by the system, for example concerning
 ## submissions or email notification alerts.
 CFG_MISCUTIL_SMTP_HOST = localhost
 
 ## CFG_MISCUTIL_SMTP_PORT -- which port to use on the outgoing mail server
 ## defined in the previous step.
 CFG_MISCUTIL_SMTP_PORT = 25
 
 ## CFG_MISCUTILS_DEFAULT_PROCESS_TIMEOUT -- the default number of seconds after
 ## which a process launched trough shellutils.run_process_with_timeout will
 ## be killed. This is useful to catch runaway processes.
 CFG_MISCUTIL_DEFAULT_PROCESS_TIMEOUT = 300
 
 ## CFG_MATHJAX_HOSTING -- if you plan to use MathJax to display TeX
 ## formulas on HTML web pages, you can specify whether you wish to use
 ## 'local' hosting or 'cdn' hosting of MathJax libraries.  (If set to
 ## 'local', you have to run 'make install-mathjax-plugin' as described
 ## in the INSTALL guide.)  If set to 'local', users will use your site
 ## to download MathJax sources.  If set to 'cdn', users will use
 ## centralized MathJax CDN servers instead.  Please note that using
 ## CDN is suitable only for small institutions or for MathJax
 ## sponsors; see the MathJax website for more details.  (Also, please
 ## note that if you plan to use MathJax on your site, you have to
 ## adapt CFG_WEBSEARCH_USE_MATHJAX_FOR_FORMATS and
 ## CFG_WEBCOMMENT_USE_MATHJAX_IN_COMMENTS configuration variables
 ## elsewhere in this file.)
 CFG_MATHJAX_HOSTING = local
 
 #################################
 ## Part 16: BibEdit parameters ##
 #################################
 
 ## CFG_BIBEDIT_TIMEOUT -- when a user edits a record, this record is
 ## locked to prevent other users to edit it at the same time.
 ## How many seconds of inactivity before the locked record again will be free
 ## for other people to edit?
 CFG_BIBEDIT_TIMEOUT = 3600
 
 ## CFG_BIBEDIT_LOCKLEVEL -- when a user tries to edit a record which there
 ## is a pending bibupload task for in the queue, this shouldn't be permitted.
 ## The lock level determines how thouroughly the queue should be investigated
 ## to determine if this is the case.
 ## Level 0 - always permits editing, doesn't look at the queue
 ##           (unsafe, use only if you know what you are doing)
 ## Level 1 - permits editing if there are no queued bibedit tasks for this record
 ##           (safe with respect to bibedit, but not for other bibupload maintenance jobs)
 ## Level 2 - permits editing if there are no queued bibupload tasks of any sort
 ##           (safe, but may lock more than necessary if many cataloguers around)
 ## Level 3 - permits editing if no queued bibupload task concerns given record
 ##           (safe, most precise locking, but slow,
 ##            checks for 001/EXTERNAL_SYSNO_TAG/EXTERNAL_OAIID_TAG)
 ## The recommended level is 3 (default) or 2 (if you use maintenance jobs often).
 CFG_BIBEDIT_LOCKLEVEL = 3
 
 ## CFG_BIBEDIT_PROTECTED_FIELDS -- a comma-separated list of fields that BibEdit
 ## will not allow to be added, edited or deleted. Wildcards are not supported,
 ## but conceptually a wildcard is added at the end of every field specification.
 ## Examples:
 ## 500A   - protect all MARC fields with tag 500 and first indicator A
 ## 5      - protect all MARC fields in the 500-series.
 ## 909C_a - protect subfield a in tag 909 with first indicator C and empty
 ##          second indicator
 ## Note that 001 is protected by default, but if protection of other
 ## identifiers or automated fields is a requirement, they should be added to
 ## this list.
 CFG_BIBEDIT_PROTECTED_FIELDS =
 
 ## CFG_BIBEDIT_QUEUE_CHECK_METHOD -- how do we want to check for
 ## possible queue locking situations to prevent cataloguers from
 ## editing a record that may be waiting in the queue?  Use 'bibrecord'
 ## for exact checking (always works, but may be slow), use 'regexp'
 ## for regular expression based checking (very fast, but may be
 ## inaccurate).  When unsure, use 'bibrecord'.
 CFG_BIBEDIT_QUEUE_CHECK_METHOD = bibrecord
 
 ## CFG_BIBEDIT_EXTEND_RECORD_WITH_COLLECTION_TEMPLATE -- a dictionary
 ## containing which collections will be extended with a given template
 ## while being displayed in BibEdit UI.
 CFG_BIBEDIT_EXTEND_RECORD_WITH_COLLECTION_TEMPLATE = { 'Poetry' : 'poem'}
 
 ## CFG_BIBEDITMULTI_LIMIT_INSTANT_PROCESSING -- maximum number of records
 ## that can be modified instantly using the multi-record editor. Above
 ## this limit, modifications will only be executed in limited hours.
 CFG_BIBEDITMULTI_LIMIT_INSTANT_PROCESSING = 2000
 
 ## CFG_BIBEDITMULTI_LIMIT_DELAYED_PROCESSING -- maximum number of records
 ## that can be send for modification without having a superadmin role.
 ## If the number of records is between CFG_BIBEDITMULTI_LIMIT_INSTANT_PROCESSING
 ## and this number, the modifications will take place only in limited hours.
 CFG_BIBEDITMULTI_LIMIT_DELAYED_PROCESSING = 20000
 
 ## CFG_BIBEDITMULTI_LIMIT_DELAYED_PROCESSING_TIME -- Allowed time to
 ## execute modifications on records, when the number exceeds
 ## CFG_BIBEDITMULTI_LIMIT_INSTANT_PROCESSING.
 CFG_BIBEDITMULTI_LIMIT_DELAYED_PROCESSING_TIME = 22:00-05:00
 
 ###################################
 ## Part 17: BibUpload parameters ##
 ###################################
 
 ## CFG_BIBUPLOAD_REFERENCE_TAG -- where do we store references?
 CFG_BIBUPLOAD_REFERENCE_TAG = 999
 
 ## CFG_BIBUPLOAD_EXTERNAL_SYSNO_TAG -- where do we store external
 ## system numbers?  Useful for matching when our records come from an
 ## external digital library system.
 CFG_BIBUPLOAD_EXTERNAL_SYSNO_TAG = 970__a
 
 ## CFG_BIBUPLOAD_EXTERNAL_OAIID_TAG -- where do we store OAI ID tags
 ## of harvested records?  Useful for matching when we harvest stuff
 ## via OAI that we do not want to reexport via Invenio OAI; so records
 ## may have only the source OAI ID stored in this tag (kind of like
 ## external system number too).
 CFG_BIBUPLOAD_EXTERNAL_OAIID_TAG = 035__a
 
 ## CFG_BIBUPLOAD_EXTERNAL_OAIID_PROVENANCE_TAG -- where do we store OAI SRC
 ## tags of harvested records?  Useful for matching when we harvest stuff
 ## via OAI that we do not want to reexport via Invenio OAI; so records
 ## may have only the source OAI SRC stored in this tag (kind of like
 ## external system number too). Note that the field should be the same of
 ## CFG_BIBUPLOAD_EXTERNAL_OAIID_TAG.
 CFG_BIBUPLOAD_EXTERNAL_OAIID_PROVENANCE_TAG = 035__9
 
 ## CFG_BIBUPLOAD_STRONG_TAGS -- a comma-separated list of tags that
 ## are strong enough to resist the replace mode.  Useful for tags that
 ## might be created from an external non-metadata-like source,
 ## e.g. the information about the number of copies left.
 CFG_BIBUPLOAD_STRONG_TAGS = 964
 
 ## CFG_BIBUPLOAD_CONTROLLED_PROVENANCE_TAGS -- a comma-separated list
 ## of tags that contain provenance information that should be checked
 ## in the bibupload correct mode via matching provenance codes.  (Only
 ## field instances of the same provenance information would be acted
 ## upon.)  Please specify the whole tag info up to subfield codes.
 CFG_BIBUPLOAD_CONTROLLED_PROVENANCE_TAGS = 6531_9
 
 ## CFG_BIBUPLOAD_FFT_ALLOWED_LOCAL_PATHS -- a comma-separated list of system
 ## paths from which it is allowed to take fulltextes that will be uploaded via
 ## FFT (CFG_TMPDIR is included by default).
 CFG_BIBUPLOAD_FFT_ALLOWED_LOCAL_PATHS = /tmp,/home
 
 ## CFG_BIBUPLOAD_FFT_ALLOWED_EXTERNAL_URLS -- a dictionary containing external
 ## URLs that can be accessed by Invenio and specific HTTP headers that will be
 ## used for each URL.
 ## The keys of the dictionary are regular expressions matching a set of URLs,
 ## the values are dictionaries of headers as consumed by urllib2.Request. If a
 ## regular expression matching all URLs is created at the end of the list, it
 ## means that Invenio will download all URLs. Otherwise Invenio will just
 ## download authorized URLs.
 ## CFG_BIBUPLOAD_FFT_ALLOWED_EXTERNAL_URLS = [
 ##     ('http://myurl.com/.*', {'User-Agent': 'Me'}),
 ##     ('http://yoururl.com/.*', {'User-Agent': 'You', 'Accept': 'text/plain'}),
 ##     ('http://.*', {'User-Agent': 'Invenio'}),
 ##     ]
 CFG_BIBUPLOAD_FFT_ALLOWED_EXTERNAL_URLS = [
     ('http://.*', {'User-Agent': 'Invenio'}),
     ]
 
 ## CFG_BIBUPLOAD_SERIALIZE_RECORD_STRUCTURE -- do we want to serialize
 ## internal representation of records (Pythonic record structure) into
 ## the database?  This can improve internal processing speed of some
 ## operations at the price of somewhat bigger disk space usage.
 ## If you change this value after some records have already been added
 ## to your installation, you may want to run:
 ##     $ /opt/invenio/bin/inveniocfg --reset-recstruct-cache
 ## in order to either erase the cache thus freeing database space,
 ## or to fill the cache for all records that have not been cached yet.
 CFG_BIBUPLOAD_SERIALIZE_RECORD_STRUCTURE = 1
 
 ## CFG_BATCHUPLOADER_FILENAME_MATCHING_POLICY -- a comma-separated list
 ## indicating which fields match the file names of the documents to be
 ## uploaded.
 ## The matching will be done in the same order as the list provided.
 CFG_BATCHUPLOADER_FILENAME_MATCHING_POLICY = reportnumber,recid
 
 ## CFG_BATCHUPLOADER_DAEMON_DIR -- Directory where the batchuploader daemon
 ## will look for the subfolders metadata and document by default.
 ## If path is relative, CFG_PREFIX will be joined as a prefix
 CFG_BATCHUPLOADER_DAEMON_DIR = var/batchupload
 
 ## CFG_BATCHUPLOADER_WEB_ROBOT_AGENT -- Comma-separated list to specify the
 ## agents permitted when calling batch uploader web interface
 ## cdsweb.cern.ch/batchuploader/robotupload
 ## if using a curl, eg: curl xxx -A invenio_webupload
 CFG_BATCHUPLOADER_WEB_ROBOT_AGENT = invenio_webupload
 
 ## CFG_BATCHUPLOADER_WEB_ROBOT_RIGHTS -- Access list specifying for each
 ## IP address, which collections are allowed using batch uploader robot
 ## interface.
 CFG_BATCHUPLOADER_WEB_ROBOT_RIGHTS = {
     '10.0.0.1': ['BOOK', 'REPORT'], # Example 1
     '10.0.0.2': ['POETRY', 'PREPRINT'], # Example 2
     }
 
 ####################################
 ## Part 18: BibCatalog parameters ##
 ####################################
 
 ## CFG_BIBCATALOG_SYSTEM -- set desired catalog system. For example, RT.
 CFG_BIBCATALOG_SYSTEM =
 
 ## RT CONFIGURATION
 ## CFG_BIBCATALOG_SYSTEM_RT_CLI -- path to the RT CLI client
 CFG_BIBCATALOG_SYSTEM_RT_CLI = /usr/bin/rt
 
 ## CFG_BIBCATALOG_SYSTEM_RT_URL -- Base URL of the remote RT system
 CFG_BIBCATALOG_SYSTEM_RT_URL = http://localhost/rt3
 
 ## CFG_BIBCATALOG_SYSTEM_RT_DEFAULT_USER -- Set the username for a default RT account
 ## on remote system, with limited privileges, in order to only create and modify own tickets.
 CFG_BIBCATALOG_SYSTEM_RT_DEFAULT_USER =
 
 ## CFG_BIBCATALOG_SYSTEM_RT_DEFAULT_PWD -- Set the password for the default RT account
 ## on remote system.
 CFG_BIBCATALOG_SYSTEM_RT_DEFAULT_PWD =
 
 ####################################
 ## Part 19: BibFormat parameters  ##
 ####################################
 
 ## CFG_BIBFORMAT_HIDDEN_TAGS -- comma-separated list of MARC tags that
 ## are not shown to users not having cataloging authorizations.
 CFG_BIBFORMAT_HIDDEN_TAGS = 595
 
 ## CFG_BIBFORMAT_ADDTHIS_ID -- if you want to use the AddThis service from
 ## <http://www.addthis.com/>, set this value to the pubid parameter as
 ## provided by the service (e.g. ra-4ff80aae118f4dad), and add a call to
 ## <BFE_ADDTHIS /> formatting element in your formats, for example
 ## Default_HTML_detailed.bft.
 CFG_BIBFORMAT_ADDTHIS_ID =
 
 ####################################
 ## Part 20: BibMatch parameters  ##
 ####################################
 
 ## CFG_BIBMATCH_LOCAL_SLEEPTIME -- Determines the amount of seconds to sleep
 ## between search queries on LOCAL system.
 CFG_BIBMATCH_LOCAL_SLEEPTIME = 0.0
 
 ## CFG_BIBMATCH_REMOTE_SLEEPTIME -- Determines the amount of seconds to sleep
 ## between search queries on REMOTE systems.
 CFG_BIBMATCH_REMOTE_SLEEPTIME = 2.0
 
 ## CFG_BIBMATCH_FUZZY_WORDLIMITS -- Determines the amount of words to extract
 ## from a certain fields value during fuzzy matching mode. Add/change field
 ## and appropriate number to the dictionary to configure this.
 CFG_BIBMATCH_FUZZY_WORDLIMITS = {
                                 '100__a': 2,
                                 '245__a': 4
                                 }
 
 ## CFG_BIBMATCH_FUZZY_EMPTY_RESULT_LIMIT -- Determines the amount of empty results
 ## to accept during fuzzy matching mode.
 CFG_BIBMATCH_FUZZY_EMPTY_RESULT_LIMIT = 1
 
 ## CFG_BIBMATCH_QUERY_TEMPLATES -- Here you can set the various predefined querystrings
 ## used to standardize common matching queries. By default the following templates
 ## are given:
 ## title             - standard title search. Taken from 245__a (default)
 ## title-author      - title and author search (i.e. this is a title AND author a)
 ##                     Taken from 245__a and 100__a
 ## reportnumber      - reportnumber search (i.e. reportnumber:REP-NO-123).
 CFG_BIBMATCH_QUERY_TEMPLATES = {
                                 'title' : '[title]',
                                 'title-author' : '[title] [author]',
                                 'reportnumber' : 'reportnumber:[reportnumber]'
                                 }
 
 ######################################
 ## Part 21: BibAuthorID parameters  ##
 ######################################
 
 # CFG_BIBAUTHORID_MAX_PROCESSES is the max number of processes
 # that may be spawned by the disambiguation algorithm
 CFG_BIBAUTHORID_MAX_PROCESSES = 12
 
 # CFG_BIBAUTHORID_PERSONID_SQL_MAX_THREADS is the max number of threads
 # to parallelize sql queries during personID tables updates
 CFG_BIBAUTHORID_PERSONID_SQL_MAX_THREADS = 12
 
 # CFG_BIBAUTHORID_PERSONID_MIN_P_FROM_BCTKD_RA is the minimum confidence needed
 # when backtracking automatically disambiguated authors to persons.
 # Values in [0,1]
 CFG_BIBAUTHORID_PERSONID_MIN_P_FROM_BCTKD_RA = 0.5
 
 # CFG_BIBAUTHORID_PERSONID_MIN_P_FROM_NEW_RA is the threshold for
 # the confidence in a paper by the disambiguation algorithm to have it
 # automatically connected to a personID. Papers below the thresholds are
 # left disconnected from persons if not already connected in other ways.
 # values in [0,1]
 CFG_BIBAUTHORID_PERSONID_MIN_P_FROM_NEW_RA = 0.5
 
 # CFG_BIBAUTHORID_PERSONID_MAX_COMP_LIST_MIN_TRSH minimum threshold for
 # disambiguated authors and persons: if less compatible than this the update
 # process will create a new person to associate to the found disambiguated author.
 CFG_BIBAUTHORID_PERSONID_MAX_COMP_LIST_MIN_TRSH = 0.5
 
 # CFG_BIBAUTHORID_PERSONID_MAX_COMP_LIST_MIN_TRSH_P_N is a fallback mechanism
 # to force a merge if a certain percentage of papers is compatible no matter
 # what the confidences on the automatically disambiguated author looks like
 CFG_BIBAUTHORID_PERSONID_MAX_COMP_LIST_MIN_TRSH_P_N = 0.5
 
 # CFG_BIBAUTHORID_EXTERNAL_CLAIMED_RECORDS_KEY defines the user info
 # keys for externally claimed records in an remote-login scenario--e.g. from arXiv.org
 # e.g. "external_arxivids" for arXiv SSO
 CFG_BIBAUTHORID_EXTERNAL_CLAIMED_RECORDS_KEY =
 
 # CFG_BIBAUTHORID_ATTACH_VA_TO_MULTIPLE_RAS determines if the authorid
 # algorithm is allowed to attach a virtual author to multiple
 # real authors in the last run of the orphan processing.
 # Comma separated list of values.
 CFG_BIBAUTHORID_ATTACH_VA_TO_MULTIPLE_RAS = False
 
 # CFG_BIBAUTHORID_AID_ENABLED
 # Globally enable AuthorID Interfaces.
 #     If False: No guest, user or operator will have access to the system.
 CFG_BIBAUTHORID_ENABLED = True
 
 # CFG_BIBAUTHORID_AID_ON_AUTHORPAGES
 # Enable AuthorID information on the author pages.
 CFG_BIBAUTHORID_ON_AUTHORPAGES = True
 
 # CFG_BIBAUTHORID_AUTHOR_TICKET_ADMIN_EMAIL defines the eMail address
 # all ticket requests concerning authors will be sent to.
 CFG_BIBAUTHORID_AUTHOR_TICKET_ADMIN_EMAIL = info@invenio-software.org
 
 #CFG_BIBAUTHORID_UI_SKIP_ARXIV_STUB_PAGE defines if the optional arXive stub page is skipped
 CFG_BIBAUTHORID_UI_SKIP_ARXIV_STUB_PAGE = False
 
 ######################################
 ## Part 22: BibClassify parameters  ##
 ######################################
 
 # CFG_BIBCLASSIFY_WEB_MAXKW -- maximum number of keywords to display
 # in the Keywords tab web page.
 CFG_BIBCLASSIFY_WEB_MAXKW = 100
 
 ########################################
 ## Part 23: Plotextractor parameters  ##
 ########################################
 
 ## CFG_PLOTEXTRACTOR_SOURCE_BASE_URL -- for acquiring source tarballs for plot
 ## extraction, where should we look?  If nothing is set, we'll just go
 ## to arXiv, but this can be a filesystem location, too
 CFG_PLOTEXTRACTOR_SOURCE_BASE_URL = http://arxiv.org/
 
 ## CFG_PLOTEXTRACTOR_SOURCE_TARBALL_FOLDER -- for acquiring source tarballs for plot
 ## extraction, subfolder where the tarballs sit
 CFG_PLOTEXTRACTOR_SOURCE_TARBALL_FOLDER = e-print/
 
 ## CFG_PLOTEXTRACTOR_SOURCE_PDF_FOLDER -- for acquiring source tarballs for plot
 ## extraction, subfolder where the pdf sit
 CFG_PLOTEXTRACTOR_SOURCE_PDF_FOLDER = pdf/
 
 ## CFG_PLOTEXTRACTOR_DOWNLOAD_TIMEOUT -- a float representing the number of seconds
 ## to wait between each download of pdf and/or tarball from source URL.
 CFG_PLOTEXTRACTOR_DOWNLOAD_TIMEOUT = 2.0
 
 ## CFG_PLOTEXTRACTOR_CONTEXT_LIMIT -- when extracting context of plots from
 ## TeX sources, this is the limitation of characters in each direction to extract
 ## context from. Default 750.
 CFG_PLOTEXTRACTOR_CONTEXT_EXTRACT_LIMIT = 750
 
 ## CFG_PLOTEXTRACTOR_DISALLOWED_TEX -- when extracting context of plots from TeX
 ## sources, this is the list of TeX tags that will trigger 'end of context'.
 CFG_PLOTEXTRACTOR_DISALLOWED_TEX = begin,end,section,includegraphics,caption,acknowledgements
 
 ## CFG_PLOTEXTRACTOR_CONTEXT_WORD_LIMIT -- when extracting context of plots from
 ## TeX sources, this is the limitation of words in each direction. Default 75.
 CFG_PLOTEXTRACTOR_CONTEXT_WORD_LIMIT = 75
 
 ## CFG_PLOTEXTRACTOR_CONTEXT_SENTENCE_LIMIT -- when extracting context of plots from
 ## TeX sources, this is the limitation of sentences in each direction. Default 2.
 CFG_PLOTEXTRACTOR_CONTEXT_SENTENCE_LIMIT = 2
 
 ######################################
 ## Part 24: WebStat parameters      ##
 ######################################
 
 # CFG_WEBSTAT_BIBCIRCULATION_START_YEAR defines the start date of the BibCirculation
 # statistics. Value should have the format 'yyyy'. If empty, take all existing data.
 CFG_WEBSTAT_BIBCIRCULATION_START_YEAR =
 
 
 ##########################
 ##  THAT's ALL, FOLKS!  ##
 ##########################
diff --git a/configure.ac b/configure.ac
index 0de604829..7be5c1417 100644
--- a/configure.ac
+++ b/configure.ac
@@ -1,881 +1,882 @@
 ## This file is part of Invenio.
 ## Copyright (C) 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011 CERN.
 ##
 ## Invenio is free software; you can redistribute it and/or
 ## modify it under the terms of the GNU General Public License as
 ## published by the Free Software Foundation; either version 2 of the
 ## License, or (at your option) any later version.
 ##
 ## Invenio is distributed in the hope that it will be useful, but
 ## WITHOUT ANY WARRANTY; without even the implied warranty of
 ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 ## General Public License for more details.
 ##
 ## You should have received a copy of the GNU General Public License
 ## along with Invenio; if not, write to the Free Software Foundation, Inc.,
 ## 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA.
 
 ## This is Invenio main configure.ac file.  If you change this
 ## file, then please run "autoreconf" to regenerate the "configure"
 ## script.
 
 ## Initialize autoconf and automake:
 AC_INIT([invenio],
         m4_esyscmd([./git-version-gen .tarball-version]),
         [info@invenio-software.org])
 AM_INIT_AUTOMAKE([tar-ustar])
 
 ## By default we shall install into /opt/invenio.  (Do not use
 ## AC_PREFIX_DEFAULT for this, because it would not work well with
 ## the localstatedir hack below.)
 test "${prefix}" = NONE && prefix=/opt/invenio
 
 ## Remove eventual trailing slashes from the prefix value:
 test "${prefix%/}" != "" && prefix=${prefix%/}
 
 ## Check for install:
 AC_PROG_INSTALL
 
 ## Check for gettext support:
 AM_GNU_GETTEXT(external)
 AM_GNU_GETTEXT_VERSION(0.14.4)
 
 ## Check for MySQL client:
 AC_MSG_CHECKING(for mysql)
 AC_ARG_WITH(mysql, AC_HELP_STRING([--with-mysql], [path to a specific MySQL binary (optional)]), MYSQL=${withval})
 if test -n "$MYSQL"; then
    AC_MSG_RESULT($MYSQL)
 else
    AC_PATH_PROG(MYSQL, mysql)
    if test -z "$MYSQL"; then
       AC_MSG_ERROR([
       MySQL command-line client was not found in your PATH.
       Please install it first.
       Available from <http://mysql.com/>.])
    fi
 fi
 
 ## Check for Python:
 AC_MSG_CHECKING(for python)
 AC_ARG_WITH(python, AC_HELP_STRING([--with-python], [path to a specific Python binary (optional)]), PYTHON=${withval})
 if test -n "$PYTHON"; then
    AC_MSG_RESULT($PYTHON)
 else
    AC_PATH_PROG(PYTHON, python)
    if test -z "$PYTHON"; then
       AC_MSG_ERROR([
       Python was not found in your PATH.  Please either install it
       in your PATH or specify --with-python configure option.
       Python is available from <http://python.org/>.])
    fi
 fi
 
 ## Check for OpenOffice.org Python binary:
 AC_MSG_CHECKING(for OpenOffice.org Python binary)
 AC_ARG_WITH(openoffice-python, AC_HELP_STRING([--with-openoffice-python], [path to a specific OpenOffice.org Python binary (optional)]), OPENOFFICE_PYTHON=`which ${withval}`)
 
 if test -z "$OPENOFFICE_PYTHON"; then
    OPENOFFICE_PYTHON=`locate -l 1 -r "o.*office/program/python$"`
    OPENOFFICE_PYTHON="$PYTHON $OPENOFFICE_PYTHON"
    if test -n "$OPENOFFICE_PYTHON" && ($OPENOFFICE_PYTHON -c "import uno" 2> /dev/null); then
       AC_MSG_RESULT($OPENOFFICE_PYTHON)
    else
       AC_MSG_WARN([
       You have not specified the path ot the OpenOffice.org Python binary.
       OpenOffice.org and Microsoft Office document conversion and fulltext indexing
       will not be available.  We recommend you to install OpenOffice.org first
       and to rerun the configure script. OpenOffice.org is available from
       <http://www.openoffice.org/>.])
    fi
 elif ($OPENOFFICE_PYTHON -c "import uno" 2> /dev/null); then
    AC_MSG_RESULT($OPENOFFICE_PYTHON)
 else
    AC_MSG_ERROR([
    The specified OpenOffice.org Python binary is not correctly configured.
    Please specify the correct path to the specific OpenOffice Python binary
    (OpenOffice.org is available from <http://www.openoffice.org/>).])
 fi
 
 ## Check for Python version and modules:
 AC_MSG_CHECKING(for required Python modules)
 $PYTHON ${srcdir}/configure-tests.py
 if test $? -ne 0; then
    AC_MSG_ERROR([Please fix the above Python problem before continuing.])
 fi
 AC_MSG_RESULT(found)
 
 ## Check for PHP:
 AC_PATH_PROG(PHP, php)
 
 ## Check for gzip:
 AC_PATH_PROG(GZIP, gzip)
 if test -z "$GZIP"; then
    AC_MSG_WARN([
    Gzip was not found in your PATH.  It is used in
    the WebSubmit module to compress the data submitted in an archive.
    You can continue without it but you will miss some Invenio
    functionality.  We recommend you to install it first and to rerun
    the configure script.  Gzip is available from
    <http://www.gzip.org/>.])
 fi
 
 ## Check for gunzip:
 AC_PATH_PROG(GUNZIP, gunzip)
 if test -z "$GUNZIP"; then
    AC_MSG_WARN([
    Gunzip was not found in your PATH.  It is used in
    the WebSubmit module to correctly deal with submitted compressed
    files.
    You can continue without it but you will miss some Invenio
    functionality.  We recommend you to install it first and to rerun
    the configure script.  Gunzip is available from
    <http://www.gzip.org/>.])
 fi
 
 ## Check for tar:
 AC_PATH_PROG(TAR, tar)
 if test -z "$TAR"; then
    AC_MSG_WARN([
    Tar was not found in your PATH.  It is used in
    the WebSubmit module to pack the submitted data into an archive.
    You can continue without it but you will miss some Invenio
    functionality.  We recommend you to install it first and to rerun
    the configure script.  Tar is available from
    <ftp://prep.ai.mit.edu/pub/gnu/tar/>.])
 fi
 
 ## Check for wget:
 AC_PATH_PROG(WGET, wget)
 if test -z "$WGET"; then
    AC_MSG_WARN([
    wget was not found in your PATH.  It is used for the fulltext file
    retrieval.
    You can continue without it but we recomend you to install it first
    and to rerun the configure script.  wget is available from
    <http://www.gnu.org/software/wget/>.])
 fi
 
 ## Check for md5sum:
 AC_PATH_PROG(MD5SUM, md5sum)
 if test -z "$MD5SUM"; then
    AC_MSG_WARN([
    md5sum was not found in your PATH.  It is used for the fulltext file
    checksum verification.
    You can continue without it but we recomend you to install it first
    and to rerun the configure script.  md5sum is available from
    <http://www.gnu.org/software/coreutils/>.])
 fi
 
 ## Check for ps2pdf:
 AC_PATH_PROG(PS2PDF, ps2pdf)
 if test -z "$PS2PDF"; then
    AC_MSG_WARN([
    ps2pdf was not found in your PATH.  It is used in
    the WebSubmit module to convert submitted PostScripts into PDF.
    You can continue without it but you will miss some Invenio
    functionality.  We recommend you to install it first and to rerun
    the configure script.  ps2pdf is available from
    <http://www.cs.wisc.edu/~ghost/doc/AFPL/>.])
 fi
 
 ## Check for pdflatex:
 AC_PATH_PROG(PDFLATEX, pdflatex)
 if test -z "$PDFLATEX"; then
    AC_MSG_WARN([
    pdflatex was not found in your PATH.  It is used in
    the WebSubmit module to stamp PDF files.
    You can continue without it but you will miss some Invenio
    functionality.  We recommend you to install it first and to rerun
    the configure script.])
 fi
 
 
 ## Check for tiff2pdf:
 AC_PATH_PROG(TIFF2PDF, tiff2pdf)
 if test -z "$TIFF2PDF"; then
    AC_MSG_WARN([
    tiff2pdf was not found in your PATH.  It is used in
    the WebSubmit module to convert submitted TIFF file into PDF.
    You can continue without it but you will miss some Invenio
    functionality.  We recommend you to install it first and to rerun
    the configure script.  tiff2pdf is available from
    <http://www.remotesensing.org/libtiff/>.])
 fi
 
 ## Check for gs:
 AC_PATH_PROG(GS, gs)
 if test -z "$GS"; then
    AC_MSG_WARN([
    gs was not found in your PATH.  It is used in
    the WebSubmit module to convert submitted PostScripts into PDF.
    You can continue without it but you will miss some Invenio
    functionality.  We recommend you to install it first and to rerun
    the configure script.  gs is available from
    <http://www.cs.wisc.edu/~ghost/doc/AFPL/>.])
 fi
 
 ## Check for pdftotext:
 AC_PATH_PROG(PDFTOTEXT, pdftotext)
 if test -z "$PDFTOTEXT"; then
    AC_MSG_WARN([
    pdftotext was not found in your PATH.  It is used for the fulltext indexation
    of PDF files.
    You can continue without it but you may miss fulltext searching capability
    of Invenio.  We recomend you to install it first and to rerun the configure
    script.  pdftotext is available from <http://www.foolabs.com/xpdf/home.html>.
    ])
 fi
 
 ## Check for pdftotext:
 AC_PATH_PROG(PDFINFO, pdfinfo)
 if test -z "$PDFINFO"; then
    AC_MSG_WARN([
    pdfinfo was not found in your PATH.  It is used for gathering information on
    PDF files.
    You can continue without it but you may miss this feature of Invenio.
    We recomend you to install it first and to rerun the configure
    script.  pdftotext is available from <http://www.foolabs.com/xpdf/home.html>.
    ])
 fi
 
 ## Check for pdftk:
 AC_PATH_PROG(PDFTK, pdftk)
 if test -z "$PDFTK"; then
    AC_MSG_WARN([
    pdftk was not found in your PATH.  It is used for the fulltext file stamping.
    You can continue without it but you may miss this feature of Invenio.
    We recomend you to install it first and to rerun the configure
    script.  pdftk is available from <http://www.accesspdf.com/pdftk/>.
    ])
 fi
 
 ## Check for pdf2ps:
 AC_PATH_PROG(PDF2PS, pdf2ps)
 if test -z "$PDF2PS"; then
    AC_MSG_WARN([
    pdf2ps was not found in your PATH.  It is used in
    the WebSubmit module to convert submitted PDFs into PostScript.
    You can continue without it but you will miss some Invenio
    functionality.  We recommend you to install it first and to rerun
    the configure script.  pdf2ps is available from
    <http://www.cs.wisc.edu/~ghost/doc/AFPL/>.])
 fi
 
 ## Check for pdftops:
 AC_PATH_PROG(PDFTOPS, pdftops)
 if test -z "$PDFTOPS"; then
    AC_MSG_WARN([
    pdftops was not found in your PATH.  It is used in
    the WebSubmit module to convert submitted PDFs into PostScript.
    You can continue without it but you will miss some Invenio
    functionality.  We recommend you to install it first and to rerun
    the configure script.  pdftops is available from
    <http://poppler.freedesktop.org/>.])
 fi
 
 ## Check for pdfopt:
 AC_PATH_PROG(PDFOPT, pdfopt)
 if test -z "$PDFOPT"; then
    AC_MSG_WARN([
    pdfopt was not found in your PATH.  It is used in
    the WebSubmit module to linearized submitted PDFs.
    You can continue without it but you will miss some Invenio
    functionality.  We recommend you to install it first and to rerun
    the configure script.  pdfopt is available from
    <http://www.cs.wisc.edu/~ghost/doc/AFPL/>.])
 fi
 
 ## Check for pdfimages:
 AC_PATH_PROG(PDFTOPPM, pdftoppm)
 if test -z "$PDFTOPPM"; then
    AC_MSG_WARN([
    pdftoppm was not found in your PATH.  It is used in
    the WebSubmit module to extract images from PDFs for OCR.
    You can continue without it but you will miss some Invenio
    functionality.  We recommend you to install it first and to rerun
    the configure script.  pdftoppm is available from
    <http://poppler.freedesktop.org/>.])
 fi
 
 ## Check for pdfimages:
 AC_PATH_PROG(PAMFILE, pdftoppm)
 if test -z "$PAMFILE"; then
    AC_MSG_WARN([
    pamfile was not found in your PATH.  It is used in
    the WebSubmit module to retrieve the size of images extracted from PDFs
    for OCR.
    You can continue without it but you will miss some Invenio
    functionality.  We recommend you to install it first and to rerun
    the configure script.  pamfile is available as part of the netpbm utilities
    from:
    <http://netpbm.sourceforge.net/>.])
 fi
 
 ## Check for ocroscript:
 AC_PATH_PROG(OCROSCRIPT, ocroscript)
 if test -z "$OCROSCRIPT"; then
    AC_MSG_WARN([
    If you plan to run OCR on your PDFs, then please install
    ocroscript now.  Otherwise you can safely continue.  You have also an
    option to install ocroscript later and edit invenio-local.conf to let
    Invenio know the path to ocroscript.
    ocroscript is available as part of OCROpus from
    <http://code.google.com/p/ocropus/>.
    NOTE: Since OCROpus is being actively developed and its api is continuosly
    changing, please install relase 0.3.1])
 fi
 
 ## Check for pstotext:
 AC_PATH_PROG(PSTOTEXT, pstotext)
 if test -z "$PSTOTEXT"; then
    AC_MSG_WARN([
    pstotext was not found in your PATH.  It is used for the fulltext indexation
    of PDF and PostScript files.
    Please install pstotext.  Otherwise you can safely continue.  You have also an
    option to install pstotext later and edit invenio-local.conf to let
    Invenio know the path to pstotext.
    pstotext is available from <http://www.cs.wisc.edu/~ghost/doc/AFPL/>.
    ])
 fi
 
 ## Check for ps2ascii:
 AC_PATH_PROG(PSTOASCII, ps2ascii)
 if test -z "$PSTOASCII"; then
    AC_MSG_WARN([
    ps2ascii was not found in your PATH.  It is used for the fulltext indexation
    of PostScript files.
    Please install ps2ascii.  Otherwise you can safely continue.  You have also an
    option to install ps2ascii later and edit invenio-local.conf to let
    Invenio know the path to ps2ascii.
    ps2ascii is available from <http://www.cs.wisc.edu/~ghost/doc/AFPL/>.
    ])
 fi
 
 ## Check for any2djvu:
 AC_PATH_PROG(ANY2DJVU, any2djvu)
 if test -z "$ANY2DJVU"; then
    AC_MSG_WARN([
    any2djvu was not found in your PATH.  It is used in
    the WebSubmit module to convert documents to DJVU.
    Please install any2djvu.  Otherwise you can safely continue.  You have also an
    option to install any2djvu later and edit invenio-local.conf to let
    Invenio know the path to any2djvu.
    any2djvu is available from
    <http://djvu.sourceforge.net/>.])
 fi
 
 ## Check for DJVUPS:
 AC_PATH_PROG(DJVUPS, djvups)
 if test -z "$DJVUPS"; then
    AC_MSG_WARN([
    djvups was not found in your PATH.  It is used in
    the WebSubmit module to convert documents from DJVU.
    Please install djvups.  Otherwise you can safely continue.  You have also an
    option to install djvups later and edit invenio-local.conf to let
    Invenio know the path to djvups.
    djvups is available from
    <http://djvu.sourceforge.net/>.])
 fi
 
 ## Check for DJVUTXT:
 AC_PATH_PROG(DJVUTXT, djvutxt)
 if test -z "$DJVUTXT"; then
    AC_MSG_WARN([
    djvutxt was not found in your PATH.  It is used in
    the WebSubmit module to extract text from DJVU documents.
    You can continue without it but you will miss some Invenio
    functionality.  We recommend you to install it first and to rerun
    the configure script.  djvutxt is available from
    <http://djvu.sourceforge.net/>.])
 fi
 
 ## Check for file:
 AC_PATH_PROG(FILE, file)
 if test -z "$FILE"; then
    AC_MSG_WARN([
    File was not found in your PATH.  It is used in
    the WebSubmit module to check the validity of the submitted files.
    You can continue without it but you will miss some Invenio
    functionality.  We recommend you to install it first and to rerun
    the configure script.  File is available from
    <ftp://ftp.astron.com/pub/file/>.])
 fi
 
 ## Check for convert:
 AC_PATH_PROG(CONVERT, convert)
 if test -z "$CONVERT"; then
    AC_MSG_WARN([
    Convert was not found in your PATH.  It is used in
    the WebSubmit module to create an icon from a submitted picture.
    You can continue without it but you will miss some Invenio
    functionality.  We recommend you to install it first and to rerun
    the configure script.  Convert is available from
    <http://www.imagemagick.org/>.])
 fi
 
 ## Check for CLISP:
 AC_MSG_CHECKING(for clisp)
 AC_ARG_WITH(clisp, AC_HELP_STRING([--with-clisp], [path to a specific CLISP binary (optional)]), CLISP=${withval})
 if test -n "$CLISP"; then
    AC_MSG_RESULT($CLISP)
 else
    AC_PATH_PROG(CLISP, clisp)
    if test -z "$CLISP"; then
       AC_MSG_WARN([
       GNU CLISP was not found in your PATH.  It is used by the WebStat
       module to produce statistics about Invenio usage.  (Alternatively,
       SBCL or CMUCL can be used instead of CLISP.)
       You can continue without it but you will miss this feature.  We
       recommend you to install it first (if you don't have neither CMUCL
       nor SBCL) and to rerun the configure script.
       GNU CLISP is available from <http://clisp.cons.org/>.])
    fi
 fi
 
 ## Check for CMUCL:
 AC_MSG_CHECKING(for cmucl)
 AC_ARG_WITH(cmucl, AC_HELP_STRING([--with-cmucl], [path to a specific CMUCL binary (optional)]), CMUCL=${withval})
 if test -n "$CMUCL"; then
    AC_MSG_RESULT($CMUCL)
 else
    AC_PATH_PROG(CMUCL, cmucl)
    if test -z "$CMUCL"; then
       AC_MSG_CHECKING(for lisp) # CMUCL can also be installed under `lisp' exec name
       AC_PATH_PROG(CMUCL, lisp)
    fi
    if test -z "$CMUCL"; then
       AC_MSG_WARN([
       CMUCL was not found in your PATH.  It is used by the WebStat
       module to produce statistics about Invenio usage.  (Alternatively,
       CLISP or SBCL can be used instead of CMUCL.)
       You can continue without it but you will miss this feature.  We
       recommend you to install it first (if you don't have neither CLISP
       nor SBCL) and to rerun the configure script.
       CMUCL is available from <http://www.cons.org/cmucl/>.])
    fi
 fi
 
 ## Check for SBCL:
 AC_MSG_CHECKING(for sbcl)
 AC_ARG_WITH(sbcl, AC_HELP_STRING([--with-sbcl], [path to a specific SBCL binary (optional)]), SBCL=${withval})
 if test -n "$SBCL"; then
    AC_MSG_RESULT($SBCL)
 else
    AC_PATH_PROG(SBCL, sbcl)
    if test -z "$SBCL"; then
       AC_MSG_WARN([
       SBCL was not found in your PATH.  It is used by the WebStat
       module to produce statistics about Invenio usage.  (Alternatively,
       CLISP or CMUCL can be used instead of SBCL.)
       You can continue without it but you will miss this feature.  We
       recommend you to install it first (if you don't have neither CLISP
       nor CMUCL) and to rerun the configure script.
       SBCL is available from <http://sbcl.sourceforge.net/>.])
    fi
 fi
 
 ## Check for gnuplot:
 AC_PATH_PROG(GNUPLOT, gnuplot)
 if test -z "$GNUPLOT"; then
    AC_MSG_WARN([
    Gnuplot was not found in your PATH.  It is used by the BibRank
    module to produce graphs about download and citation history.
    You can continue without it but you will miss these graphs.  We
    recommend you to install it first and to rerun the configure script.
    Gnuplot is available from <http://www.gnuplot.info/>.])
 fi
 
 ## Check for ffmpeg:
 AC_PATH_PROG(FFMPEG, ffmpeg)
 AC_PATH_PROG(FFPROBE, ffprobe)
 if test -z "$FFMPEG"; then
    AC_MSG_WARN([
    FFmpeg was not found in your PATH. It is used by the BibEncode
    module to for video encoding.
    You can continue without but you will not be able to use BibEncode
    and no video submission workflows are thereby possible.
    We recommend you to install it first if you would like to support video
    submissions and to rerun the configure script.
    FFmpeg is available from <http://www.ffmpeg.org/>.])
 fi
 
 ## Check for mediainfo:
 AC_PATH_PROG(MEDIAINFO, mediainfo)
 if test -z "$MEDIAINFO"; then
    AC_MSG_WARN([
    Mediainfo was not found in your PATH. It is used by the BibEncode
    module to for video encoding and media metadata handling.
    You can continue without but you will not be able to use BibEncode
    and no video submission workflows are thereby possible.
    We recommend you to install it first if you would like to support video
    submissions and to rerun the configure script.
    Mediainfo is available from <http://mediainfo.sourceforge.net/>.])
 fi
 
 ## Check for ffmpeg
 
 ## Substitute variables:
 AC_SUBST(VERSION)
 AC_SUBST(OPENOFFICE_PYTHON)
 AC_SUBST(MYSQL)
 AC_SUBST(PYTHON)
 AC_SUBST(GZIP)
 AC_SUBST(GUNZIP)
 AC_SUBST(TAR)
 AC_SUBST(WGET)
 AC_SUBST(MD5SUM)
 AC_SUBST(PS2PDF)
 AC_SUBST(GS)
 AC_SUBST(PDFTOTEXT)
 AC_SUBST(PDFTK)
 AC_SUBST(PDF2PS)
 AC_SUBST(PDFTOPS)
 AC_SUBST(PDFOPT)
 AC_SUBST(PDFTOPPM)
 AC_SUBST(OCROSCRIPT)
 AC_SUBST(PSTOTEXT)
 AC_SUBST(PSTOASCII)
 AC_SUBST(ANY2DJVU)
 AC_SUBST(DJVUPS)
 AC_SUBST(DJVUTXT)
 AC_SUBST(FILE)
 AC_SUBST(CONVERT)
 AC_SUBST(GNUPLOT)
 AC_SUBST(CLISP)
 AC_SUBST(CMUCL)
 AC_SUBST(SBCL)
 AC_SUBST(CACHEDIR)
 AC_SUBST(FFMPEG)
 AC_SUBST(MEDIAINFO)
 AC_SUBST(FFPROBE)
 AC_SUBST(localstatedir, `eval echo "${localstatedir}"`)
 
 ## Define output files:
 AC_CONFIG_FILES([config.nice  \
      Makefile \
      po/Makefile.in \
      config/Makefile \
      config/invenio-autotools.conf \
      modules/Makefile \
      modules/bibauthorid/Makefile \
      modules/bibauthorid/bin/Makefile \
      modules/bibauthorid/bin/bibauthorid \
      modules/bibauthorid/doc/Makefile \
      modules/bibauthorid/doc/admin/Makefile \
      modules/bibauthorid/doc/hacking/Makefile \
      modules/bibauthorid/lib/Makefile \
      modules/bibauthorid/etc/Makefile \
      modules/bibauthorid/etc/name_authority_files/Makefile \
      modules/bibauthorid/web/Makefile \
      modules/bibcatalog/Makefile \
      modules/bibcatalog/doc/Makefile \
      modules/bibcatalog/doc/admin/Makefile \
      modules/bibcatalog/doc/hacking/Makefile
      modules/bibcatalog/lib/Makefile \
      modules/bibcheck/Makefile \
      modules/bibcheck/doc/Makefile \
      modules/bibcheck/doc/admin/Makefile \
      modules/bibcheck/doc/hacking/Makefile \
      modules/bibcheck/etc/Makefile \
      modules/bibcheck/web/Makefile \
      modules/bibcheck/web/admin/Makefile \
      modules/bibcirculation/Makefile \
      modules/bibcirculation/bin/Makefile \
      modules/bibcirculation/doc/Makefile \
      modules/bibcirculation/doc/admin/Makefile \
      modules/bibcirculation/doc/hacking/Makefile
      modules/bibcirculation/lib/Makefile \
      modules/bibcirculation/web/Makefile \
      modules/bibcirculation/web/admin/Makefile \
      modules/bibclassify/Makefile \
      modules/bibclassify/bin/Makefile \
      modules/bibclassify/bin/bibclassify \
      modules/bibclassify/doc/Makefile \
      modules/bibclassify/doc/admin/Makefile \
      modules/bibclassify/doc/hacking/Makefile \
      modules/bibclassify/etc/Makefile \
      modules/bibclassify/lib/Makefile \
      modules/bibconvert/Makefile \
      modules/bibconvert/bin/Makefile \
      modules/bibconvert/bin/bibconvert \
      modules/bibconvert/doc/Makefile \
      modules/bibconvert/doc/admin/Makefile \
      modules/bibconvert/doc/hacking/Makefile \
      modules/bibconvert/etc/Makefile \
      modules/bibconvert/lib/Makefile \
      modules/bibedit/Makefile \
      modules/bibedit/bin/Makefile \
      modules/bibedit/bin/bibedit \
      modules/bibedit/bin/refextract \
      modules/bibedit/bin/xmlmarc2textmarc \
      modules/bibedit/bin/textmarc2xmlmarc \
      modules/bibedit/bin/xmlmarclint \
      modules/bibedit/doc/Makefile \
      modules/bibedit/doc/admin/Makefile \
      modules/bibedit/doc/hacking/Makefile \
      modules/bibedit/etc/Makefile \
      modules/bibedit/lib/Makefile \
      modules/bibedit/web/Makefile \
      modules/bibencode/Makefile \
      modules/bibencode/bin/Makefile \
      modules/bibencode/bin/bibencode \
      modules/bibencode/lib/Makefile \
      modules/bibencode/etc/Makefile \
      modules/bibencode/www/Makefile \
      modules/bibexport/Makefile \
      modules/bibexport/bin/Makefile \
      modules/bibexport/bin/bibexport \
      modules/bibexport/doc/Makefile \
      modules/bibexport/doc/admin/Makefile \
      modules/bibexport/doc/hacking/Makefile
      modules/bibexport/etc/Makefile \
      modules/bibexport/lib/Makefile \
      modules/bibexport/web/Makefile \
      modules/bibexport/web/admin/Makefile \
      modules/bibformat/Makefile \
      modules/bibformat/bin/Makefile \
      modules/bibformat/bin/bibreformat \
      modules/bibformat/doc/Makefile \
      modules/bibformat/doc/admin/Makefile \
      modules/bibformat/doc/hacking/Makefile \
      modules/bibformat/etc/Makefile \
      modules/bibformat/etc/format_templates/Makefile \
      modules/bibformat/etc/output_formats/Makefile \
      modules/bibformat/lib/Makefile \
      modules/bibformat/lib/elements/Makefile \
      modules/bibformat/web/Makefile \
      modules/bibformat/web/admin/Makefile \
      modules/bibharvest/Makefile \
      modules/bibharvest/bin/Makefile \
      modules/bibharvest/bin/oairepositoryupdater \
      modules/bibharvest/bin/oaiharvest \
      modules/bibharvest/doc/Makefile \
      modules/bibharvest/doc/admin/Makefile \
      modules/bibharvest/doc/hacking/Makefile \
+     modules/bibharvest/etc/Makefile \
      modules/bibharvest/lib/Makefile \
      modules/bibharvest/web/Makefile \
      modules/bibharvest/web/admin/Makefile \
      modules/bibindex/Makefile \
      modules/bibindex/bin/Makefile \
      modules/bibindex/bin/bibindex \
      modules/bibindex/bin/bibstat \
      modules/bibindex/doc/Makefile \
      modules/bibindex/doc/admin/Makefile \
      modules/bibindex/doc/hacking/Makefile \
      modules/bibindex/lib/Makefile \
      modules/bibindex/web/Makefile \
      modules/bibindex/web/admin/Makefile \
      modules/bibknowledge/Makefile \
      modules/bibknowledge/lib/Makefile \
      modules/bibknowledge/doc/Makefile \
      modules/bibknowledge/doc/admin/Makefile \
      modules/bibknowledge/doc/hacking/Makefile \
      modules/bibmatch/Makefile \
      modules/bibmatch/bin/Makefile \
      modules/bibmatch/bin/bibmatch \
      modules/bibmatch/doc/Makefile \
      modules/bibmatch/doc/admin/Makefile \
      modules/bibmatch/etc/Makefile \
      modules/bibmatch/lib/Makefile \
      modules/bibmerge/Makefile \
      modules/bibmerge/bin/Makefile \
      modules/bibmerge/doc/Makefile \
      modules/bibmerge/doc/admin/Makefile \
      modules/bibmerge/doc/hacking/Makefile \
      modules/bibmerge/lib/Makefile \
      modules/bibmerge/web/Makefile \
      modules/bibmerge/web/admin/Makefile \
      modules/bibrank/Makefile \
      modules/bibrank/bin/Makefile \
      modules/bibrank/bin/bibrank \
      modules/bibrank/bin/bibrankgkb \
      modules/bibrank/doc/Makefile \
      modules/bibrank/doc/admin/Makefile \
      modules/bibrank/doc/hacking/Makefile \
      modules/bibrank/etc/Makefile \
      modules/bibrank/etc/bibrankgkb.cfg \
      modules/bibrank/etc/demo_jif.cfg \
      modules/bibrank/etc/template_single_tag_rank_method.cfg \
      modules/bibrank/lib/Makefile \
      modules/bibrank/web/Makefile \
      modules/bibrank/web/admin/Makefile \
      modules/bibsched/Makefile \
      modules/bibsched/bin/Makefile \
      modules/bibsched/bin/bibsched \
      modules/bibsched/bin/bibtaskex \
      modules/bibsched/bin/bibtasklet \
      modules/bibsched/doc/Makefile \
      modules/bibsched/doc/admin/Makefile \
      modules/bibsched/doc/hacking/Makefile \
      modules/bibsched/lib/Makefile \
      modules/bibsched/lib/tasklets/Makefile \
      modules/bibupload/Makefile \
      modules/bibsword/Makefile \
      modules/bibsword/bin/Makefile \
      modules/bibsword/bin/bibsword \
      modules/bibsword/doc/Makefile \
      modules/bibsword/doc/admin/Makefile \
      modules/bibsword/doc/hacking/Makefile \
      modules/bibsword/lib/Makefile \
      modules/bibsword/etc/Makefile \
      modules/bibupload/bin/Makefile \
      modules/bibupload/bin/bibupload \
      modules/bibupload/bin/batchuploader \
      modules/bibupload/doc/Makefile \
      modules/bibupload/doc/admin/Makefile \
      modules/bibupload/doc/hacking/Makefile \
      modules/bibupload/lib/Makefile \
      modules/elmsubmit/Makefile \
      modules/elmsubmit/bin/Makefile \
      modules/elmsubmit/bin/elmsubmit \
      modules/elmsubmit/doc/Makefile \
      modules/elmsubmit/doc/admin/Makefile \
      modules/elmsubmit/doc/hacking/Makefile \
      modules/elmsubmit/etc/Makefile \
      modules/elmsubmit/etc/elmsubmit.cfg \
      modules/elmsubmit/lib/Makefile \
      modules/miscutil/Makefile \
      modules/miscutil/bin/Makefile \
      modules/miscutil/bin/dbdump \
      modules/miscutil/bin/dbexec \
      modules/miscutil/bin/inveniocfg \
      modules/miscutil/bin/plotextractor \
      modules/miscutil/demo/Makefile \
      modules/miscutil/doc/Makefile \
      modules/miscutil/doc/hacking/Makefile \
      modules/miscutil/etc/Makefile \
      modules/miscutil/etc/bash_completion.d/Makefile \
      modules/miscutil/etc/bash_completion.d/inveniocfg \
      modules/miscutil/etc/ckeditor_scientificchar/Makefile \
      modules/miscutil/etc/ckeditor_scientificchar/dialogs/Makefile \
      modules/miscutil/etc/ckeditor_scientificchar/lang/Makefile \
      modules/miscutil/lib/Makefile \
      modules/miscutil/sql/Makefile \
      modules/miscutil/web/Makefile \
      modules/webaccess/Makefile \
      modules/webaccess/bin/Makefile \
      modules/webaccess/bin/authaction \
      modules/webaccess/bin/webaccessadmin \
      modules/webaccess/doc/Makefile \
      modules/webaccess/doc/admin/Makefile \
      modules/webaccess/doc/hacking/Makefile \
      modules/webaccess/lib/Makefile \
      modules/webaccess/web/Makefile \
      modules/webaccess/web/admin/Makefile \
      modules/webalert/Makefile \
      modules/webalert/bin/Makefile \
      modules/webalert/bin/alertengine \
      modules/webalert/doc/Makefile \
      modules/webalert/doc/admin/Makefile \
      modules/webalert/doc/hacking/Makefile \
      modules/webalert/lib/Makefile \
      modules/webalert/web/Makefile \
      modules/webbasket/Makefile \
      modules/webbasket/doc/Makefile \
      modules/webbasket/doc/admin/Makefile \
      modules/webbasket/doc/hacking/Makefile \
      modules/webbasket/lib/Makefile \
      modules/webbasket/web/Makefile \
      modules/webcomment/Makefile \
      modules/webcomment/doc/Makefile \
      modules/webcomment/doc/admin/Makefile \
      modules/webcomment/doc/hacking/Makefile \
      modules/webcomment/lib/Makefile \
      modules/webcomment/web/Makefile \
      modules/webcomment/web/admin/Makefile \
      modules/webhelp/Makefile \
      modules/webhelp/web/Makefile \
      modules/webhelp/web/admin/Makefile \
      modules/webhelp/web/admin/howto/Makefile \
      modules/webhelp/web/hacking/Makefile \
      modules/webjournal/Makefile \
      modules/webjournal/etc/Makefile \
      modules/webjournal/doc/Makefile \
      modules/webjournal/doc/admin/Makefile \
      modules/webjournal/doc/hacking/Makefile \
      modules/webjournal/lib/Makefile \
      modules/webjournal/lib/elements/Makefile \
      modules/webjournal/lib/widgets/Makefile \
      modules/webjournal/web/Makefile \
      modules/webjournal/web/admin/Makefile \
      modules/webmessage/Makefile \
      modules/webmessage/bin/Makefile \
      modules/webmessage/bin/webmessageadmin \
      modules/webmessage/doc/Makefile \
      modules/webmessage/doc/admin/Makefile \
      modules/webmessage/doc/hacking/Makefile \
      modules/webmessage/lib/Makefile \
      modules/webmessage/web/Makefile \
      modules/websearch/Makefile \
      modules/websearch/bin/Makefile \
      modules/websearch/bin/webcoll \
      modules/websearch/doc/Makefile \
      modules/websearch/doc/admin/Makefile \
      modules/websearch/doc/hacking/Makefile \
      modules/websearch/lib/Makefile \
      modules/websearch/web/Makefile \
      modules/websearch/web/admin/Makefile \
      modules/websession/Makefile \
      modules/websession/bin/Makefile \
      modules/websession/bin/inveniogc \
      modules/websession/doc/Makefile \
      modules/websession/doc/admin/Makefile \
      modules/websession/doc/hacking/Makefile \
      modules/websession/lib/Makefile \
      modules/websession/web/Makefile \
      modules/webstat/Makefile \
      modules/webstat/bin/Makefile \
      modules/webstat/bin/webstat \
      modules/webstat/bin/webstatadmin \
      modules/webstat/doc/Makefile \
      modules/webstat/doc/admin/Makefile \
      modules/webstat/doc/hacking/Makefile \
      modules/webstat/etc/Makefile \
      modules/webstat/lib/Makefile \
      modules/webstyle/Makefile \
      modules/webstyle/bin/Makefile \
      modules/webstyle/bin/webdoc \
      modules/webstyle/css/Makefile \
      modules/webstyle/doc/Makefile \
      modules/webstyle/doc/admin/Makefile \
      modules/webstyle/doc/hacking/Makefile \
      modules/webstyle/etc/Makefile \
      modules/webstyle/img/Makefile \
      modules/webstyle/lib/Makefile \
      modules/websubmit/Makefile \
      modules/websubmit/bin/Makefile \
      modules/websubmit/bin/bibdocfile \
      modules/websubmit/bin/inveniounoconv \
      modules/websubmit/doc/Makefile \
      modules/websubmit/doc/admin/Makefile \
      modules/websubmit/doc/hacking/Makefile \
      modules/websubmit/etc/Makefile \
      modules/websubmit/lib/Makefile \
      modules/websubmit/lib/functions/Makefile \
      modules/websubmit/web/Makefile \
      modules/websubmit/web/admin/Makefile \
      ])
 
 ## Finally, write output files:
 AC_OUTPUT
 
 ## Write help:
 AC_MSG_RESULT([****************************************************************************])
 AC_MSG_RESULT([** Your Invenio installation is now ready for building.                   **])
 AC_MSG_RESULT([** You have entered the following parameters:                             **])
 AC_MSG_RESULT([**   - Invenio main install directory: ${prefix}])
 AC_MSG_RESULT([**   - Python executable: $PYTHON])
 AC_MSG_RESULT([**   - MySQL client executable: $MYSQL])
 AC_MSG_RESULT([**   - CLISP executable: $CLISP])
 AC_MSG_RESULT([**   - CMUCL executable: $CMUCL])
 AC_MSG_RESULT([**   - SBCL executable: $SBCL])
 AC_MSG_RESULT([** Here are the steps to continue the building process:                   **])
 AC_MSG_RESULT([**   1) Type 'make' to build your Invenio system.                         **])
 AC_MSG_RESULT([**   2) Type 'make install' to install your Invenio system.               **])
 AC_MSG_RESULT([** After that you can start customizing your installation as documented   **])
 AC_MSG_RESULT([** in the INSTALL file (i.e. edit invenio.conf, run inveniocfg, etc).     **])
 AC_MSG_RESULT([** Good luck, and thanks for choosing Invenio.                            **])
 AC_MSG_RESULT([**              -- Invenio Development Team <info@invenio-software.org>   **])
 AC_MSG_RESULT([****************************************************************************])
 
 ## end of file
diff --git a/modules/bibconvert/etc/oaiarxiv2marcxml.xsl b/modules/bibconvert/etc/oaiarxiv2marcxml.xsl
index 2d3f6c61e..58ccebdee 100644
--- a/modules/bibconvert/etc/oaiarxiv2marcxml.xsl
+++ b/modules/bibconvert/etc/oaiarxiv2marcxml.xsl
@@ -1,1033 +1,1049 @@
 <?xml version="1.0" encoding="ISO-8859-1"?>
 <!-- $Id$
 
      This file is part of Invenio.
      Copyright (C) 2007, 2008, 2010, 2011 CERN.
 
      Invenio is free software; you can redistribute it and/or
      modify it under the terms of the GNU General Public License as
      published by the Free Software Foundation; either version 2 of the
      License, or (at your option) any later version.
 
      Invenio is distributed in the hope that it will be useful, but
      WITHOUT ANY WARRANTY; without even the implied warranty of
      MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
      General Public License for more details.
 
      You should have received a copy of the GNU General Public License
      along with Invenio; if not, write to the Free Software Foundation, Inc.,
      59 Temple Place, Suite 330, Boston, MA 02111-1307, USA.
 -->
 
 
 <xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
                 xmlns:OAI-PMH="http://www.openarchives.org/OAI/2.0/"
                 xmlns:arXiv="http://arxiv.org/OAI/arXiv/"
                 exclude-result-prefixes="OAI-PMH arXiv"
                 version="1.0">
 <xsl:output method="xml" encoding="UTF-8"/>
 
 <!-- Global variables -->
 
 <xsl:variable name="lcletters">abcdefghijklmnopqrstuvwxyz</xsl:variable>
 <xsl:variable name="ucletters">ABCDEFGHIJKLMNOPQRSTUVWXYZ</xsl:variable>
 
 <!-- ************ FUNCTIONS ************ -->
 
  <!-- FUNCTION  replace-string -->
  <xsl:template name="replace-string">
     <xsl:param name="text"/>
     <xsl:param name="from"/>
     <xsl:param name="to"/>
     <xsl:choose>
       <xsl:when test="contains($text, $from)">
         <xsl:variable name="before" select="substring-before($text, $from)"/>
         <xsl:variable name="after" select="substring-after($text, $from)"/>
         <xsl:variable name="prefix" select="concat($before, $to)"/>
 
         <xsl:value-of select="$before"/>
         <xsl:value-of select="$to"/>
         <xsl:call-template name="replace-string">
           <xsl:with-param name="text" select="$after"/>
           <xsl:with-param name="from" select="$from"/>
           <xsl:with-param name="to" select="$to"/>
         </xsl:call-template>
       </xsl:when>
       <xsl:otherwise>
         <xsl:value-of select="$text"/>
       </xsl:otherwise>
     </xsl:choose>
  </xsl:template>
 
 
  <!-- FUNCTION   output-695a-subfields -->
  <xsl:template name="output-695a-subfields">
       <xsl:param name="list" />
       <xsl:variable name="newlist" select="concat(normalize-space($list), ' ')" />
       <xsl:variable name="first" select="substring-before($newlist, ' ')" />
       <xsl:variable name="remaining" select="substring-after($newlist, ' ')" />
       <xsl:if test="not($first='')">
          <datafield tag="695" ind1=" " ind2=" ">
            <subfield code="a"><xsl:value-of select="$first" /></subfield>
            <subfield code="9">LANL EDS</subfield>
          </datafield>
       </xsl:if>
       <xsl:if test="$remaining">
           <xsl:call-template name="output-695a-subfields">
               <xsl:with-param name="list" select="$remaining" />
           </xsl:call-template>
       </xsl:if>
  </xsl:template>
 
 
  <!-- FUNCTION   output-65017a-subfields -->
  <xsl:template name="output-65017a-subfields">
       <xsl:param name="list" />
       <xsl:variable name="newlist" select="concat(normalize-space($list), ' ')" />
       <xsl:variable name="first" select="substring-before($newlist, ' ')" />
       <xsl:variable name="remaining" select="substring-after($newlist, ' ')" />
       <xsl:if test="not($first='')">
          <datafield tag="650" ind1="1" ind2="7">
            <subfield code="a"><xsl:value-of select="$first" /></subfield>
            <subfield code="2">arXiv</subfield>
          </datafield>
       </xsl:if>
       <xsl:if test="$remaining">
           <xsl:call-template name="output-65027a-subfields">
               <xsl:with-param name="list" select="$remaining" />
           </xsl:call-template>
       </xsl:if>
  </xsl:template>
 
 
  <!-- FUNCTION   output-65027a-subfields -->
  <xsl:template name="output-65027a-subfields">
       <xsl:param name="list" />
       <xsl:variable name="newlist" select="concat(normalize-space($list), ' ')" />
       <xsl:variable name="first" select="substring-before($newlist, ' ')" />
       <xsl:variable name="remaining" select="substring-after($newlist, ' ')" />
       <xsl:if test="not($first='')">
          <datafield tag="650" ind1="2" ind2="7">
            <subfield code="a"><xsl:value-of select="$first" /></subfield>
            <subfield code="2">arXiv</subfield>
          </datafield>
       </xsl:if>
       <xsl:if test="$remaining">
           <xsl:call-template name="output-65027a-subfields">
               <xsl:with-param name="list" select="$remaining" />
           </xsl:call-template>
       </xsl:if>
  </xsl:template>
 
 
 
  <!-- FUNCTION  last-word : returns last word of a string of words separated by spaces -->
  <xsl:template name="last-word">
     <xsl:param name="text"/>
     <xsl:choose>
       <xsl:when test="contains(normalize-space($text), ' ')">
         <xsl:variable name="after" select="substring-after( normalize-space($text), ' ') "/>
         <xsl:call-template name="last-word">
           <xsl:with-param name="text" select="$after"/>
         </xsl:call-template>
       </xsl:when>
       <xsl:otherwise>
         <xsl:value-of select="$text"/>
       </xsl:otherwise>
     </xsl:choose>
  </xsl:template>
 
 
  <!-- FUNCTION  matchPR773p -->
  <xsl:template name="matchPR773p">
     <xsl:param name="detectPR"/>
     <xsl:param name="commentsf"/>
     <xsl:choose>
       <xsl:when test="contains(normalize-space($detectPR), '@')">
         <xsl:variable name="after" select="substring-after( normalize-space($detectPR), '@') "/>
         <xsl:variable name="todetect" select="substring-before( normalize-space($detectPR), '@') "/>
         <xsl:call-template name="matchPR773pSUB">
           <xsl:with-param name="todetect" select="$todetect"/>
   <xsl:with-param name="commentsf" select="$commentsf"/>
         </xsl:call-template>
         <xsl:call-template name="matchPR773p">
           <xsl:with-param name="detectPR" select="$after"/>
           <xsl:with-param name="commentsf" select="$commentsf"/>
         </xsl:call-template>
       </xsl:when>
     </xsl:choose>
  </xsl:template>
 
 
  <!-- FUNCTION  matchPR773pSUB called by mathPR773p  -->
  <xsl:template name="matchPR773pSUB">
     <xsl:param name="todetect"/>
     <xsl:param name="commentsf"/>
     <xsl:if test="contains($commentsf, $todetect)">
     <xsl:variable name="disp773t" select="normalize-space(substring-after($commentsf, $todetect))"/>
        <xsl:if test="string-length($disp773t)>0">
        <datafield tag="773" ind1=" " ind2=" ">
           <subfield code="p"><xsl:value-of select="normalize-space(substring-after($commentsf, $todetect))"/></subfield>
        </datafield>
        </xsl:if>
     </xsl:if>
  </xsl:template>
 
 
  <!-- FUNCTION  rn-extract : returns a subfield for each reportnumber in a string (comma separted)  -->
  <xsl:template name="rn-extract">
     <xsl:param name="text"/>
     <xsl:choose>
       <xsl:when test="contains(normalize-space($text), ',')">
         <xsl:variable name="after" select="substring-after( normalize-space($text), ',')"/>
         <datafield tag="088" ind1=" " ind2=" ">
            <subfield code="a"><xsl:value-of select="substring-before( normalize-space($text), ',')"/></subfield>
         </datafield>
         <xsl:call-template name="rn-extract">
           <xsl:with-param name="text" select="$after"/>
         </xsl:call-template>
       </xsl:when>
       <xsl:otherwise>
         <datafield tag="088" ind1=" " ind2=" ">
           <subfield code="a"><xsl:value-of select="$text"/></subfield>
         </datafield>
       </xsl:otherwise>
     </xsl:choose>
  </xsl:template>
 
 
  <!-- FUNCTION cern-detect : returns the appropriatate 690C subfield if it is a CERN parper and nothing otherwise -->
  <xsl:template name="cern-detect">
     <xsl:param name="reportnumber"/>
     <xsl:if test="contains($reportnumber, 'CERN') or ./OAI-PMH:metadata/arXiv:arXiv/arXiv:authors/arXiv:author/arXiv:affiliation[.='CERN']">
       <datafield tag="690" ind1="C" ind2=" ">
         <subfield code="a">CERN</subfield>
       </datafield>
     </xsl:if>
  </xsl:template>
 
  <!-- FUNCTION Print sets values separated by ;  -->
  <xsl:template name="print-sets">
     <xsl:for-each select="./OAI-PMH:header/OAI-PMH:setSpec"><xsl:value-of select="."/>;</xsl:for-each>
  </xsl:template>
 
  <!-- FUNCTION cern-detect9 : returns the appropriatate 690C subfield if it is a CERN parper and nothing otherwise -->
  <xsl:template name="cern-detect9">
     <xsl:param name="reportnumber"/>
     <xsl:if test="contains($reportnumber, 'CERN') or ./OAI-PMH:metadata/arXiv:arXiv/arXiv:authors/arXiv:author/arXiv:affiliation[.='CERN']">
       <datafield tag="980" ind1=" " ind2=" ">
         <subfield code="a">CERN</subfield>
       </datafield>
     </xsl:if>
  </xsl:template>
 
  <!-- FUNCTION  reformat-date : from 3 params (YYYY,MM,DD)  to "DD Mmm YYYY" -->
  <xsl:template name="reformat-date">
     <xsl:param name="year"/>
     <xsl:param name="month"/>
     <xsl:param name="day"/>
     <xsl:choose>
       <xsl:when test="$month='01'">
          <xsl:value-of select="concat($day,' Jan ',$year)"/>
       </xsl:when>
       <xsl:when test="$month='02'">
          <xsl:value-of select="concat($day,' Feb ',$year)"/>
       </xsl:when>
       <xsl:when test="$month='03'">
          <xsl:value-of select="concat($day,' Mar ',$year)"/>
       </xsl:when>
       <xsl:when test="$month='04'">
          <xsl:value-of select="concat($day,' Apr ',$year)"/>
       </xsl:when>
       <xsl:when test="$month='05'">
          <xsl:value-of select="concat($day,' May ',$year)"/>
       </xsl:when>
       <xsl:when test="$month='06'">
          <xsl:value-of select="concat($day,' Jun ',$year)"/>
       </xsl:when>
       <xsl:when test="$month='07'">
          <xsl:value-of select="concat($day,' Jul ',$year)"/>
       </xsl:when>
       <xsl:when test="$month='08'">
          <xsl:value-of select="concat($day,' Aug ',$year)"/>
       </xsl:when>
       <xsl:when test="$month='09'">
          <xsl:value-of select="concat($day,' Sep ',$year)"/>
       </xsl:when>
       <xsl:when test="$month='10'">
          <xsl:value-of select="concat($day,' Oct ',$year)"/>
       </xsl:when>
       <xsl:when test="$month='11'">
          <xsl:value-of select="concat($day,' Nov ',$year)"/>
       </xsl:when>
       <xsl:when test="$month='12'">
          <xsl:value-of select="concat($day,' Dec ',$year)"/>
       </xsl:when>
     </xsl:choose>
  </xsl:template>
 
 
 
  <!-- Functions for author / collaboration extraction-->
  <xsl:template name="extractAthor">
    <xsl:param name="node"/>
    <subfield code="a">
      <xsl:choose>
        <xsl:when test="not(contains($node/arXiv:forenames, 'Collaboration:')) and not(contains($node/arXiv:forenames, 'Consortium:'))">
          <xsl:variable name="fnames">
            <xsl:value-of select="normalize-space($node/arXiv:forenames)"/>
          </xsl:variable>
          <xsl:value-of select="$node/arXiv:keyname"/>, <xsl:call-template name="replace-string"><xsl:with-param name="text" select="$fnames"/><xsl:with-param name="from" select="'.'"/><xsl:with-param name="to" select="''"/></xsl:call-template>
        </xsl:when>
        <xsl:when test="contains($node/arXiv:forenames, 'Collaboration:')">
          <xsl:variable name="fnames">
            <xsl:value-of select="normalize-space(substring-after($node/arXiv:forenames, 'Collaboration:'))"/>
          </xsl:variable>
          <xsl:value-of select="$node/arXiv:keyname"/>, <xsl:call-template name="replace-string"><xsl:with-param name="text" select="$fnames"/><xsl:with-param name="from" select="'.'"/><xsl:with-param name="to" select="''"/></xsl:call-template>
        </xsl:when>
        <xsl:when test="contains($node/arXiv:forenames, 'Consortium:')">
          <xsl:variable name="fnames">
            <xsl:value-of select="normalize-space(substring-after($node/arXiv:forenames, 'Consortium:'))"/>
          </xsl:variable>
          <xsl:value-of select="$node/arXiv:keyname"/>, <xsl:call-template name="replace-string"><xsl:with-param name="text" select="$fnames"/><xsl:with-param name="from" select="'.'"/><xsl:with-param name="to" select="''"/></xsl:call-template>
        </xsl:when>
      </xsl:choose>
    </subfield>
 
    <xsl:for-each select="$node/arXiv:affiliation">
      <xsl:variable name="knlow"><xsl:value-of select="normalize-space(translate(., $ucletters, $lcletters))"/></xsl:variable>
      <xsl:if test="not (contains($knlow,'collab') or contains($knlow,'team') or contains($knlow,'group') or contains($knlow, 'for the'))">
        <subfield code="u"><xsl:value-of select="."/></subfield>
      </xsl:if>
    </xsl:for-each>
  </xsl:template>
 
  <xsl:template name="extractCollaborationSimple">
    <xsl:param name="node"/>
    <!-- Extracting collaboration from the simple author field
         ( containing only collaboration ) -->
    <xsl:variable name="knlowc" select="normalize-space(translate($node/arXiv:affiliation, $ucletters, $lcletters))" />
    <xsl:variable name="knlowfn" select="normalize-space(translate($node/arXiv:forenames, $ucletters, $lcletters))" />
    <xsl:variable name="knlowkn" select="normalize-space(translate($node/arXiv:keyname, $ucletters, $lcletters))" />
    <xsl:variable name="knlow" select="concat($knlowc,$knlowfn,$knlowkn)" />
 
    <xsl:if test="contains($knlow, 'collab') or contains($knlow, 'team') or contains($knlow,'group') or contains($knlow, 'for the') ">
 
      <xsl:choose>
        <xsl:when test="contains($knlowc, 'collab') or contains($knlowc, 'team') or contains($knlowc,'group') or contains($knlowc, 'for the')">
          <xsl:value-of select="$node/arXiv:affiliation"/>
        </xsl:when>
        <xsl:when test="contains($knlowfn, 'collab') or contains($knlowfn, 'team') or contains($knlowfn,'group') or contains($knlowfn, 'for the')">
          <xsl:value-of select="concat($node/arXiv:keyname , ' ', $node/arXiv:forenames)"/>
        </xsl:when>
        <xsl:when test="contains($knlowkn, 'collab') or contains($knlowkn, 'team') or contains($knlowkn,'group') or contains($knlowkn, 'for the')">
          <xsl:value-of select="concat($node/arXiv:forenames, ' ', $node/arXiv:keyname)"/>
        </xsl:when>
        <xsl:otherwise>
          <xsl:value-of select="concat($node/arXiv:forenames, ' ', $node/arXiv:keyname, ' ', $node/arXiv:affiliation)"/>
        </xsl:otherwise>
      </xsl:choose>
 
    </xsl:if>
  </xsl:template>
 
 
  <xsl:template name="extractCollaborationComplex">
    <xsl:param name="node"/>
    <!-- Extracting collaboration in case when the same field contains
         collaboration and author information -->
    <xsl:choose>
      <xsl:when test="contains($node/arXiv:forenames, 'Consortium')">
        <xsl:value-of select="concat(substring-before($node/arXiv:forenames,'Consortium:'), ' consortium')"/>
      </xsl:when>
      <xsl:when test="contains($node/arXiv:forenames, 'Collaboration')">
        <xsl:value-of select="concat(substring-before($node/arXiv:forenames,'Collaboration:'), ' collaboration')"/>
      </xsl:when>
    </xsl:choose>
  </xsl:template>
 
  <xsl:template name="extractCollaboration">
    <xsl:param name="node"/>
    <datafield tag="595" ind1=" " ind2=" ">
      <subfield code="a">giva a faire</subfield>
    </datafield>
    <datafield tag="710" ind1=" " ind2=" ">
      <subfield code="g">
        <xsl:choose>
          <xsl:when test="contains($node/arXiv:forenames, 'Collaboration:') or contains($node/arXiv:forenames, 'Consortium:')">
            <xsl:call-template name="extractCollaborationComplex">
              <xsl:with-param name="node" select="$node"/>
            </xsl:call-template>
          </xsl:when>
          <xsl:otherwise>
            <xsl:call-template name="extractCollaborationSimple">
              <xsl:with-param name="node" select="$node"/>
            </xsl:call-template>
          </xsl:otherwise>
        </xsl:choose>
      </subfield>
    </datafield>
  </xsl:template>
 
  <xsl:template name="firstAuthor">
    <xsl:param name="firstNode"/> <!-- Full XML node ( with structure !) -->
    <datafield tag = "100" ind1 = " " ind2= " ">
      <xsl:call-template name="extractAthor">
        <xsl:with-param name="node" select="$firstNode"/>
      </xsl:call-template>
    </datafield>
  </xsl:template>
 
  <xsl:template name="furtherAuthor">
    <xsl:param name="node"/> <!-- Full XML node ( with structure !) -->
    <datafield tag="700" ind1=" " ind2=" ">
      <xsl:call-template name="extractAthor">
        <xsl:with-param name="node" select="$node"/>
      </xsl:call-template>
    </datafield>
  </xsl:template>
 
  <xsl:template name="collaboration">
    <xsl:param name="node"/> <!-- Full XML node ( with structure !) -->
    <xsl:call-template name="extractCollaboration">
      <xsl:with-param name="node" select="$node"/>
    </xsl:call-template>
  </xsl:template>
 
 
 <!-- ************ MAIN CODE ************ -->
 
 <xsl:template match="/">
 
 
 
 
   <collection>
   <xsl:for-each select="//OAI-PMH:record">
 
 
   <!-- *** GLOBAL RECORD VARS *** -->
 
   <!-- Preparing base determination : getting cathegory -->
   <xsl:variable name="setspec2">
     <xsl:value-of select="substring-after(./OAI-PMH:header/OAI-PMH:setSpec,':')"/>
   </xsl:variable>
 
   <!-- Preparing data : is this a thesis ? (we can find this in the abstract)-->
 
   <xsl:variable name="commentslow">
     <xsl:value-of select="translate(./OAI-PMH:metadata/arXiv:arXiv/arXiv:comments,$ucletters,$lcletters)"/>
   </xsl:variable>
 
   <xsl:variable name="detectPR">accepted@appear@press@publ@review@submitted"></xsl:variable>
 
   <!-- *** END GLOBAL RECIRD VARS *** -->
 
 
 
     <!-- KEEPING ONLY RECORDS THAT ARE USEFULL FOR CERN -->
     <xsl:variable name="setspec">
       <xsl:value-of select="substring-after(./OAI-PMH:header/OAI-PMH:setSpec,':')"/>
     </xsl:variable>
 
 
 
 
     <!-- <xsl:variable name="allsets">
       <xsl:call-template name="print-sets" />
     </xsl:variable>  -->
 
     <xsl:variable name="allsets">
       <xsl:value-of select="./OAI-PMH:metadata/arXiv:arXiv/arXiv:categories" />
     </xsl:variable>
 
 
 
      <xsl:if test=" contains($allsets, 'quant-ph') or contains($allsets,'q-alg') or contains($allsets,'plasm-ph') or contains($allsets,'physics') or contains($allsets,'nucl-th') or contains($allsets,'nucl-ex') or contains($allsets,'math') or contains($allsets,'math-ph') or contains($allsets,'hep-th') or contains($allsets,'hep-ph') or contains($allsets,'hep-lat') or contains($allsets,'hep-ex') or contains($allsets,'gr-qc') or contains($allsets,'cs') or contains($allsets,'astro-ph') or contains($allsets,'acc-ph')  ">
 
 
 
     <xsl:choose>
        <!-- HANDLING DELETED RECORDS -->
        <xsl:when test="OAI-PMH:header[@status='deleted']">
          <record>
          <xsl:if test="./OAI-PMH:header/OAI-PMH:identifier | ./OAI-PMH:header/OAI-PMH:setSpec">
-           <datafield tag="909" ind1="C" ind2="O">
-             <subfield code="o"><xsl:value-of select="./OAI-PMH:header/OAI-PMH:identifier"/></subfield>
-             <subfield code="p"><xsl:value-of select="./OAI-PMH:header/OAI-PMH:setSpec"/></subfield>
-           </datafield>
+          <datafield tag="035" ind1=" " ind2=" ">
+            <subfield code="a"><xsl:value-of select="./OAI-PMH:header/OAI-PMH:identifier" /></subfield>
+            <subfield code="u"><xsl:value-of select="//OAI-PMH:request" /></subfield>
+            <subfield code="d"><xsl:value-of select="./OAI-PMH:header/OAI-PMH:datestamp" /></subfield>
+            <subfield code="h"><xsl:value-of select="//OAI-PMH:responseDate" /></subfield>
+            <subfield code="m"><xsl:value-of select="//OAI-PMH:request/@metadataPrefix" /></subfield>
+            <xsl:if test="./OAI-PMH:about/provenance/originDescription">
+              <subfield code="o"><xsl:copy-of select="./OAI-PMH:about/provenance/originDescription" /></subfield>
+            </xsl:if>
+            <subfield code="t">false</subfield>
+            <subfield code="9">arXiv</subfield>
+          </datafield>
          </xsl:if>
            <datafield tag="980" ind1="" ind2="">
              <subfield code="c">DELETED</subfield>
              </datafield>
          </record>
        </xsl:when>
 
 
 
        <!-- HANDLING NON-DELETED RECORDS -->
        <xsl:otherwise>
          <record>
 
            <!-- Field FFT :  url for future bibupload fultext importation : FIXME: add other sets
            <xsl:if test=" ($setspec ='quant-ph') or ($setspec ='physics') or ($setspec ='q-alg') or ($setspec ='nucl-th') or ($setspec ='nucl-ex') or ($setspec ='hep-th') or ($setspec ='hep-ph') or ($setspec ='hep-lat') or ($setspec ='hep-ex')  or ($setspec ='chao-dyn') or ($setspec ='gr-qc')  or ($setspec ='astro-ph') ">
 
            <xsl:if test="./OAI-PMH:metadata/arXiv:arXiv/arXiv:id">
              <datafield tag="FFT" ind1=" " ind2=" "><subfield code="a">http://export.arxiv.org/pdf/<xsl:value-of select="./OAI-PMH:metadata/arXiv:arXiv/arXiv:id"/>.pdf</subfield></datafield>
            </xsl:if>
            </xsl:if>  -->
 
 
 
            <!-- MARC FIELD 003  -->
            <controlfield tag="003">SzGeCERN</controlfield>
 
 
            <!-- MARC FIELD 035_$$9,a  = metadata/header/identifier  -->
            <xsl:if test="./OAI-PMH:header/OAI-PMH:identifier">
              <datafield tag="035" ind1=" " ind2=" ">
-                <subfield code="9">arXiv</subfield>
-                <subfield code="a"><xsl:value-of select="./OAI-PMH:header/OAI-PMH:identifier"/></subfield>
+               <subfield code="a"><xsl:value-of select="./OAI-PMH:header/OAI-PMH:identifier" /></subfield>
+               <subfield code="u"><xsl:value-of select="//OAI-PMH:request" /></subfield>
+               <subfield code="d"><xsl:value-of select="./OAI-PMH:header/OAI-PMH:datestamp" /></subfield>
+               <subfield code="h"><xsl:value-of select="//OAI-PMH:responseDate" /></subfield>
+               <subfield code="m"><xsl:value-of select="//OAI-PMH:request/@metadataPrefix" /></subfield>
+               <xsl:if test="./OAI-PMH:about/provenance/originDescription">
+                 <subfield code="o"><xsl:copy-of select="./OAI-PMH:about/provenance/originDescription" /></subfield>
+               </xsl:if>
+               <subfield code="t">false</subfield>
+               <subfield code="9">arXiv</subfield>
              </datafield>
            </xsl:if>
 
 
            <!-- MARC FIELD 037$$a = metadata/arXiv/id
            <xsl:if test="./OAI-PMH:metadata/arXiv:arXiv/arXiv:id">
              <datafield tag="037" ind1=" " ind2=" ">
                 <subfield code="a"><xsl:value-of select="./OAI-PMH:metadata/arXiv:arXiv/arXiv:id"/></subfield>
              </datafield>
            </xsl:if> -->
 
 
            <!-- MARC FIELD 037_$$a = metadata/header/identifier  -->
            <xsl:if test="./OAI-PMH:header/OAI-PMH:identifier">
              <datafield tag="037" ind1=" " ind2=" ">
                 <subfield code="a">
                   <xsl:call-template name="replace-string">
                     <xsl:with-param name="text" select="substring-after(./OAI-PMH:header/OAI-PMH:identifier, ':')"/>
                     <xsl:with-param name="from" select="'arXiv.org'"/>
                     <xsl:with-param name="to" select="'arXiv'"/>
                   </xsl:call-template>
                 </subfield>
              </datafield>
            </xsl:if>
 
            <!-- MARC FIELD 041$$a = default value: eng for english  -->
            <datafield tag="041" ind1=" " ind2=" ">
                 <subfield code="a">eng</subfield>
            </datafield>
 
            <xsl:if test="./OAI-PMH:metadata/arXiv:arXiv/arXiv:report-no">
              <!-- MARC FIELD 088$$a = metadata/arXiv/report-no   -->
              <xsl:variable name="RN0">
                <xsl:value-of select="translate(./OAI-PMH:metadata/arXiv:arXiv/arXiv:report-no, $lcletters, $ucletters)"/>
              </xsl:variable>
              <xsl:variable name="RN1">
                <xsl:call-template name="replace-string"><xsl:with-param name="text" select="$RN0"/><xsl:with-param name="from" select="'/'"/><xsl:with-param name="to" select="'-'"/></xsl:call-template>
              </xsl:variable>
              <xsl:variable name="RN2">
                <xsl:call-template name="replace-string"><xsl:with-param name="text" select="$RN1"/><xsl:with-param name="from" select="';'"/><xsl:with-param name="to" select="','"/></xsl:call-template>
              </xsl:variable>
              <xsl:variable name="RN3">
                <xsl:call-template name="replace-string"><xsl:with-param name="text" select="$RN2"/><xsl:with-param name="from" select="', '"/><xsl:with-param name="to" select="','"/></xsl:call-template>
              </xsl:variable>
              <xsl:call-template name="rn-extract"><xsl:with-param name="text" select="$RN3"/></xsl:call-template>
 
              <!-- Detection of CERN Reports -->
 
              <xsl:if test="contains(./OAI-PMH:metadata/arXiv:arXiv/arXiv:report-no, 'CERN-PH-TH')">
                <datafield tag="084" ind1 = " " ind2 = " ">
                  <subfield code="a">
                    <xsl:variable name="reportdate" select="substring-after(./OAI-PMH:metadata/arXiv:arXiv/arXiv:report-no, 'CERN-PH-TH/')"/>
                    TH-<xsl:choose>
                    <xsl:when test="contains($reportdate,',')">
                      <xsl:value-of select="substring-before($reportdate, ',')"/>
                    </xsl:when>
                    <xsl:otherwise>
                      <xsl:value-of select="$reportdate"/>
                    </xsl:otherwise>
                  </xsl:choose>
                  </subfield>
                  <subfield code="2">CERN Library</subfield>
                </datafield>
                <datafield tag="710" ind1 = " " ind2 = " ">
                  <subfield code="5">PH-TH</subfield>
                </datafield>
              </xsl:if>
 
              <xsl:if test="contains(./OAI-PMH:metadata/arXiv:arXiv/arXiv:report-no, 'CERN-PH-EP')">
                <datafield tag="084" ind1 = " " ind2 = " ">
                  <subfield code="a">
                    <xsl:variable name="reportdate" select="substring-after(./OAI-PMH:metadata/arXiv:arXiv/arXiv:report-no, 'CERN-PH-EP/')"/>
                    PH-EP-<xsl:choose>
                    <xsl:when test="contains($reportdate,',')">
                      <xsl:value-of select="substring-before($reportdate, ',')"/>
                    </xsl:when>
                    <xsl:otherwise>
                      <xsl:value-of select="$reportdate"/>
                    </xsl:otherwise>
                  </xsl:choose>
                  </subfield>
                  <subfield code="2">CERN Library</subfield>
                </datafield>
                <datafield tag="710" ind1 = " " ind2 = " ">
                  <subfield code="5">PH-EP</subfield>
                </datafield>
              </xsl:if>
 
            </xsl:if>
 
            <!-- MARC FIELDS [1,7]00$$a,u  = metadata/arXiv/[author,affiliation]
                 N.B.: $$v not used, all affiliations are repeated in $$u   -->
 
            <xsl:if test="./OAI-PMH:metadata/arXiv:arXiv/arXiv:authors/arXiv:author">
                <xsl:variable name="containingCollaboration"
                              select="./OAI-PMH:metadata/arXiv:arXiv/arXiv:authors/arXiv:author[
                                      contains(translate(./arXiv:forenames, $lcletters, $ucletters), 'CONSORTIUM') or contains(translate(./arXiv:keyname, $lcletters, $ucletters) , 'CONSORTIUM') or contains(translate(./arXiv:affiliation, $lcletters, $ucletters) , 'CONSORTIUM')
                                      or contains(translate(./arXiv:forenames, $lcletters, $ucletters), 'COLLAB') or contains(translate(./arXiv:keyname, $lcletters, $ucletters) , 'COLLAB') or contains(translate(./arXiv:affiliation, $lcletters, $ucletters) , 'COLLAB')
                                      or contains(translate(./arXiv:forenames, $lcletters, $ucletters), 'TEAM') or contains(translate(./arXiv:keyname, $lcletters, $ucletters) , 'TEAM') or contains(translate(./arXiv:affiliation, $lcletters, $ucletters) , 'TEAM')
                                      or contains(translate(./arXiv:forenames, $lcletters, $ucletters), 'GROUP') or contains(translate(./arXiv:keyname, $lcletters, $ucletters) , 'GROUP') or contains(translate(./arXiv:affiliation, $lcletters, $ucletters) , 'GROUP')
                                      or contains(translate(./arXiv:forenames, $lcletters, $ucletters), 'FOR THE') or contains(translate(./arXiv:keyname, $lcletters, $ucletters) , 'FOR THE') or contains(translate(./arXiv:affiliation, $lcletters, $ucletters) , 'FOR THE')
                                      ]"
                              />
                <xsl:variable name="containingAuthor"
                              select="./OAI-PMH:metadata/arXiv:arXiv/arXiv:authors/arXiv:author[
                                      not(
                                          contains(translate(./arXiv:forenames, $lcletters, $ucletters), 'CONSORTIUM') or contains(translate(./arXiv:keyname, $lcletters, $ucletters) , 'CONSORTIUM')
                                          or contains(translate(./arXiv:forenames, $lcletters, $ucletters), 'COLLAB') or contains(translate(./arXiv:keyname, $lcletters, $ucletters) , 'COLLAB')
                                          or contains(translate(./arXiv:forenames, $lcletters, $ucletters), 'TEAM') or contains(translate(./arXiv:keyname, $lcletters, $ucletters) , 'TEAM')
                                          or contains(translate(./arXiv:forenames, $lcletters, $ucletters), 'GROUP') or contains(translate(./arXiv:keyname, $lcletters, $ucletters) , 'GROUP')
                                      or contains(translate(./arXiv:forenames, $lcletters, $ucletters), 'FOR THE') or contains(translate(./arXiv:keyname, $lcletters, $ucletters) , 'FOR THE') or contains(translate(./arXiv:affiliation, $lcletters, $ucletters) , 'FOR THE')
                                      )
                                      or contains(./arXiv:forenames, 'Collaboration:')
                                      or contains(./arXiv:forenames, 'Consortium:')
                                      ]" />
 
                <xsl:call-template name="firstAuthor">
                  <xsl:with-param name="firstNode" select="$containingAuthor[1]"/>
                </xsl:call-template>
 
                <xsl:for-each select="$containingAuthor[position() > 1]">
                  <xsl:call-template name="furtherAuthor">
                    <xsl:with-param name="node" select="."/>
                  </xsl:call-template>
                </xsl:for-each>
 
                <xsl:for-each select="$containingCollaboration">
                  <xsl:call-template name="collaboration">
                    <xsl:with-param name="node" select="."/>
                  </xsl:call-template>
                  <collaboration></collaboration>
                </xsl:for-each>
                <!--
                <xsl:if test="./OAI-PMH:metadata/arXiv:arXiv/arXiv:authors/arXiv:author[contains(./arXiv:affiliation,'CERN')]">
                  <datafield tag="690" ind1="C" ind2=" ">
                    <subfield code="a">CERN</subfield>
                  </datafield>
                </xsl:if>-->
            </xsl:if>
 
            <!-- MARC FIELD 8564   <subfield code="y">Access to fulltext document</subfield> -->
            <xsl:if test="./OAI-PMH:metadata/arXiv:arXiv/arXiv:id">
              <datafield tag="856" ind1="4" ind2=" ">
                   <subfield code="u">http://arxiv.org/pdf/<xsl:value-of select="substring-after(./OAI-PMH:header/OAI-PMH:identifier, '.org:')"/>.pdf</subfield>
              </datafield>
            </xsl:if>
 
              <!-- Filling 962$$b  LKR$$b - conference detection in comments field  -->
              <xsl:if test="./OAI-PMH:metadata/arXiv:arXiv/arXiv:comments">
              <xsl:variable name="lkrmatch"><xsl:value-of select="normalize-space(translate(./OAI-PMH:metadata/arXiv:arXiv/arXiv:comments, $ucletters, $lcletters))"/></xsl:variable>
              <xsl:if test="contains($lkrmatch,' colloquium ') or  contains($lkrmatch,' colloquiums ') or  contains($lkrmatch,' conf ') or  contains($lkrmatch,' conference ') or  contains($lkrmatch,' conferences ') or  contains($lkrmatch,' contrib ') or  contains($lkrmatch,' contributed ') or  contains($lkrmatch,' contribution ') or  contains($lkrmatch,' contributions ') or  contains($lkrmatch,' forum ') or  contains($lkrmatch,' lecture ') or  contains($lkrmatch,' lectures ') or  contains($lkrmatch,' meeting ') or  contains($lkrmatch,' meetings ') or  contains($lkrmatch,' pres ') or  contains($lkrmatch,' presented ') or  contains($lkrmatch,' proc ') or  contains($lkrmatch,' proceeding ') or  contains($lkrmatch,' proceedings ') or  contains($lkrmatch,' rencontre ') or  contains($lkrmatch,' rencontres ') or  contains($lkrmatch,' school ') or  contains($lkrmatch,' schools ') or  contains($lkrmatch,' seminar ') or  contains($lkrmatch,' seminars ') or  contains($lkrmatch,' symp ') or  contains($lkrmatch,' symposium ') or  contains($lkrmatch,' symposiums ') or  contains($lkrmatch,' talk ') or  contains($lkrmatch,' talks ') or  contains($lkrmatch,' workshop ') or  contains($lkrmatch,' workshops ') ">
                <datafield tag="962" ind1=" " ind2=" ">
                  <subfield code="b"><xsl:value-of select="normalize-space(./OAI-PMH:metadata/arXiv:arXiv/arXiv:comments)"/> </subfield>
                </datafield>
              </xsl:if>
              </xsl:if>
 
 
            <!-- MARC FIELD 245$$a  -->
            <xsl:if test="./OAI-PMH:metadata/arXiv:arXiv/arXiv:title">
              <datafield tag="245" ind1=" " ind2=" ">
                <subfield code="a"><xsl:value-of select="normalize-space(./OAI-PMH:metadata/arXiv:arXiv/arXiv:title)"/></subfield>
              </datafield>
            </xsl:if>
 
 
            <!-- MARC FIELD  269$$c / date  -->
            <!-- RE-MARC FIELD Same treatement for all bases, subfileds a and b addeb by babbage.py later  -->
            <xsl:if test="./OAI-PMH:header/OAI-PMH:datestamp">
              <xsl:variable name="datebase" select="./OAI-PMH:header/OAI-PMH:datestamp"/>
              <xsl:variable name="year" select="substring-before($datebase,'-')"/>
              <xsl:variable name="month" select="substring-before(substring-after($datebase,'-'),'-')"/>
              <xsl:variable name="day" select="substring-after(substring-after($datebase,'-'),'-')"/>
 
              <datafield tag="269" ind1=" " ind2=" ">
                 <subfield code="c">
                   <xsl:call-template name="reformat-date"><xsl:with-param name="year" select="$year"/><xsl:with-param name="month" select="$month"/><xsl:with-param name="day" select="$day"/></xsl:call-template>
                 </subfield>
              </datafield>
            </xsl:if>
 
 
            <!-- MARC FIELD 300$$a / pagination -->
            <xsl:choose>
              <xsl:when test="./OAI-PMH:metadata/arXiv:arXiv/arXiv:comments">
                <xsl:choose>
                <xsl:when test="contains(./OAI-PMH:metadata/arXiv:arXiv/arXiv:comments, 'pages')">
                  <xsl:variable name="beforepages">
                    <xsl:value-of select="normalize-space(substring-before(./OAI-PMH:metadata/arXiv:arXiv/arXiv:comments,'pages'))"/>
                  </xsl:variable>
                  <datafield tag="300" ind1=" " ind2=" ">
                    <subfield code="a"><xsl:call-template name="last-word"><xsl:with-param name="text" select="$beforepages"/></xsl:call-template> p</subfield>
                  </datafield>
                </xsl:when>
                <!--xsl:otherwise-->
                  <!--datafield tag="300" ind1=" " ind2=" "-->
                    <!--subfield code="a">mult. p</subfield-->
                  <!--/datafield-->
                <!--/xsl:otherwise-->
                </xsl:choose>
              </xsl:when>
              <xsl:otherwise>
                   <datafield tag="300" ind1=" " ind2=" ">
                    <subfield code="a">mult. p</subfield>
                  </datafield>
              </xsl:otherwise>
            </xsl:choose>
 
 
            <!-- MARC FIELD 520$$a  -->
            <xsl:if test="./OAI-PMH:metadata/arXiv:arXiv/arXiv:abstract">
              <datafield tag="520" ind1=" " ind2=" ">
                 <subfield code="a">
                    <xsl:value-of select="normalize-space(./OAI-PMH:metadata/arXiv:arXiv/arXiv:abstract)"/>
                 </subfield>
              </datafield>
            </xsl:if>
 
 
            <!-- MARC FIELD 500$$a -->
            <xsl:if test="./OAI-PMH:metadata/arXiv:arXiv/arXiv:comments">
               <datafield tag="500" ind1=" " ind2=" ">
                 <subfield code="a">Comments: <xsl:value-of select="normalize-space(./OAI-PMH:metadata/arXiv:arXiv/arXiv:comments)"/></subfield>
               </datafield>
            </xsl:if>
 
 
            <!-- MARC FIELD 595$$a -->
            <!--xsl:if test="./OAI-PMH:metadata/arXiv:arXiv/arXiv:comments"-->
            <datafield tag="595" ind1=" " ind2=" ">
                <subfield code="a">LANL EDS</subfield>
            </datafield>
            <!-- /xsl:if-->
 
 
            <!-- MARC FIELD 65017$$ab -->
            <xsl:if test="./OAI-PMH:metadata/arXiv:arXiv/arXiv:categories">
                 <xsl:call-template name="output-65017a-subfields">
                     <xsl:with-param name="list"><xsl:value-of select="./OAI-PMH:metadata/arXiv:arXiv/arXiv:categories"/></xsl:with-param>
                 </xsl:call-template>
            </xsl:if>
 
 
            <!-- MARC FIELD 695$$a -->
            <xsl:if test="./OAI-PMH:metadata/arXiv:arXiv/arXiv:categories">
                 <xsl:call-template name="output-695a-subfields">
                     <xsl:with-param name="list"><xsl:value-of select="./OAI-PMH:metadata/arXiv:arXiv/arXiv:categories"/></xsl:with-param>
                 </xsl:call-template>
            </xsl:if>
 
            <!-- MARC FIELD 773$$p  - publication detection in comments field -->
            <xsl:if test="./OAI-PMH:metadata/arXiv:arXiv/arXiv:journal-ref or ./OAI-PMH:metadata/arXiv:arXiv/arXiv:doi">
                <datafield tag="773" ind1=" " ind2=" ">
                   <xsl:if test="./OAI-PMH:metadata/arXiv:arXiv/arXiv:journal-ref">
                       <subfield code="o"><xsl:value-of select="normalize-space(./OAI-PMH:metadata/arXiv:arXiv/arXiv:journal-ref)"/></subfield>
                   </xsl:if>
                   <xsl:if test="./OAI-PMH:metadata/arXiv:arXiv/arXiv:doi">
                     <subfield code="a"><xsl:value-of select="./OAI-PMH:metadata/arXiv:arXiv/arXiv:doi"/></subfield>
                   </xsl:if>
                </datafield>
            </xsl:if>
 
              <!-- MARC FIELD 773$$p  - publication detection in comments field
              <xsl:if test="./OAI-PMH:metadata/arXiv:arXiv/arXiv:comments">
                <xsl:variable name="commentsf">
                  <xsl:value-of select="translate(./OAI-PMH:metadata/arXiv:arXiv/arXiv:comments,$ucletters,$lcletters)"/>
                </xsl:variable>
              <xsl:call-template name="matchPR773p"><xsl:with-param name="detectPR" select="$detectPR"/><xsl:with-param name="commentsf" select="$commentsf"/></xsl:call-template>
              </xsl:if> -->
 
 
            <!-- MARC FIELD 773 - using journal-ref field  OLD
            <xsl:if test="./OAI-PMH:metadata/arXiv:arXiv/arXiv:journal-ref">
 
 
              <xsl:variable name="jref">
                <xsl:value-of select="./OAI-PMH:metadata/arXiv:arXiv/arXiv:journal-ref"/>
              </xsl:variable>
 
              <xsl:choose>
                <xsl:when test="contains($jref,',')">
 
                <xsl:variable name="jref-beforecoma">
                  <xsl:value-of select="normalize-space(substring-before($jref,','))"/>
                </xsl:variable>
 
                <xsl:variable name="jref-volume">
                  <xsl:call-template name="last-word"><xsl:with-param name="text" select="$jref-beforecoma"/></xsl:call-template>
                </xsl:variable>
 
                <xsl:variable name="jref-title">
                  <xsl:value-of select="normalize-space(substring-before($jref-beforecoma,$jref-volume))"/>
                </xsl:variable>
 
                <xsl:variable name="jref-title2">
                   <xsl:call-template name="replace-string"><xsl:with-param name="text" select="$jref-title"/><xsl:with-param name="from" select="'.'"/><xsl:with-param name="to" select="'. '"/></xsl:call-template>
                </xsl:variable>
 
                <xsl:variable name="jref-aftercoma">
                  <xsl:value-of select="normalize-space(substring-after($jref,','))"/>
                </xsl:variable>
 
                <xsl:variable name="jref-year">
                  <xsl:value-of select="normalize-space(substring-before(substring-after($jref,'('),')'))"/>
                </xsl:variable>
 
                <xsl:variable name="jref-pages-base">
                  <xsl:value-of select="normalize-space(substring-before($jref-aftercoma,'('))"/>
                </xsl:variable>
 
                    <xsl:variable name="jref-pages">
                      <xsl:value-of select="normalize-space($jref-pages-base)"/>
                    </xsl:variable>
 
 
                <datafield tag="773" ind1=" " ind2=" ">
                   <xsl:if test="string-length($jref-title2)>0">
                     <subfield code="p"><xsl:value-of select="normalize-space($jref-title2)"/></subfield>
                   </xsl:if>
                   <xsl:if test="string-length($jref-volume)>0">
                     <subfield code="v"><xsl:value-of select="$jref-volume"/></subfield>
                   </xsl:if>
                   <xsl:if test="string-length($jref-year)>0">
                     <subfield code="y"><xsl:value-of select="$jref-year"/></subfield>
                   </xsl:if>
                   <xsl:if test="string-length($jref-pages-base)>0">
                     <subfield code="c"><xsl:value-of select="$jref-pages-base"/></subfield>
                   </xsl:if>
                </datafield>
 
 
                <xsl:if test="string-length($jref-year)>0">
                <datafield tag="260" ind1=" " ind2=" ">
                    <subfield code="c"><xsl:value-of select="$jref-year"/></subfield>
                </datafield>
                </xsl:if>
 
 
              </xsl:when>
              <xsl:otherwise>
                <xsl:variable name="jref-beforedate">
                  <xsl:value-of select="normalize-space(substring-before($jref,'('))"/>
                </xsl:variable>
 
                <xsl:variable name="jref-volume-pre">
                  <xsl:call-template name="last-word"><xsl:with-param name="text" select="$jref-beforedate"/></xsl:call-template>
                </xsl:variable>
 
 
                <xsl:choose>
                  <xsl:when test="string(number($jref-beforedate)) = 'NaN'">
                   <xsl:variable name="jref-volume">
                     <xsl:value-of select="substring($jref-volume-pre, 2)"/>
                   </xsl:variable>
                  </xsl:when>
                  <xsl:otherwise>
                   <xsl:variable name="jref-volume">
                     <xsl:value-of select="$jref-volume-pre"/>
                   </xsl:variable>
                  </xsl:otherwise>
                </xsl:choose>
 
 
 
                <xsl:variable name="jref-volume">
                  <xsl:value-of select="substring($jref-volume-pre, 2)"/>
                </xsl:variable>
 
 
                <xsl:variable name="jref-title">
                  <xsl:value-of select="normalize-space(substring-before($jref-beforedate,$jref-volume))"/>
                </xsl:variable>
 
                <xsl:variable name="jref-title2">
                   <xsl:call-template name="replace-string"><xsl:with-param name="text" select="$jref-title"/><xsl:with-param name="from" select="'.'"/><xsl:with-param name="to" select="'. '"/></xsl:call-template>
                </xsl:variable>
 
                <xsl:variable name="jref-pages">
                  <xsl:value-of select="normalize-space(substring-after($jref,')'))"/>
                </xsl:variable>
 
                <xsl:variable name="jref-year">
                  <xsl:value-of select="substring-after( substring-before($jref,')') , '(' )"/>
                </xsl:variable>
 
                <datafield tag="773" ind1=" " ind2=" ">
                   <xsl:if test="string-length($jref-title2)>0">
                     <subfield code="p"><xsl:value-of select="normalize-space($jref-title2)"/></subfield>
                   </xsl:if>
                   <xsl:if test="string-length($jref-volume)>0">
                     <subfield code="v"><xsl:value-of select="$jref-volume"/></subfield>
                   </xsl:if>
                   <xsl:if test="string-length($jref-year)>0">
                      <subfield code="y"><xsl:value-of select="$jref-year"/></subfield>
                   </xsl:if>
                   <xsl:if test="string-length($jref-pages)>0">
                     <subfield code="c"><xsl:value-of select="$jref-pages"/></subfield>
                   </xsl:if>
                </datafield>
 
                <xsl:if test="string-length($jref-year)>0">
                <datafield tag="260" ind1=" " ind2=" ">
                    <subfield code="c"><xsl:value-of select="$jref-year"/></subfield>
                </datafield>
                </xsl:if>
 
              </xsl:otherwise>
            </xsl:choose>
 
 
            </xsl:if>
            -->
 
 
            <!-- MARC FIELDS which are treated differently according to the base  -->
            <!-- MARC FIELDS 269$$[b,a,c], 300$$a , V 500$$a,  502$$[a,b,c] , V 595$$a, V 690$$c, V 960$$a, V 980$$a -->
            <!-- RE-MARC :-) FIELD 8564 is genrated via FFT tag by bibupload  -->
            <!-- Base: 10=hep related topics , 11=hep topics , 13=published articles, 14=theses  -->
 
 
 
            <!-- Now determinig base -->
            <xsl:choose>
 
 
              <!-- Base 13 specific treatment -->
              <xsl:when test="./OAI-PMH:metadata/arXiv:arXiv/arXiv:journal-ref">
 
                    <xsl:if test="./OAI-PMH:header/OAI-PMH:datestamp">
                      <xsl:variable name="datebase" select="./OAI-PMH:header/OAI-PMH:datestamp"/>
                      <xsl:variable name="year" select="substring-before($datebase,'-')"/>
                      <xsl:variable name="month" select="substring-before(substring-after($datebase,'-'),'-')"/>
                      <xsl:variable name="day" select="substring-after(substring-after($datebase,'-'),'-')"/>
 
                      <datafield tag="260" ind1=" " ind2=" ">
                        <subfield code="c"><xsl:value-of select="$year"/></subfield>
                      </datafield>
                    </xsl:if>
 
 
                    <!-- MARC FIELDS 690C$$a and 980$$a NB: 980$$a enables searching  -->
                    <datafield tag="690" ind1="C" ind2=" ">
                      <subfield code="a">ARTICLE</subfield>
                    </datafield>
 
                    <xsl:call-template name="cern-detect"><xsl:with-param name="reportnumber" select="./OAI-PMH:metadata/arXiv:arXiv/arXiv:report-no"/></xsl:call-template>
 
                    <datafield tag="980" ind1=" " ind2=" ">
                      <subfield code="a">ARTICLE</subfield>
                    </datafield>
                    <xsl:call-template name="cern-detect9"><xsl:with-param name="reportnumber" select="./OAI-PMH:metadata/arXiv:arXiv/arXiv:report-no"/></xsl:call-template>
                    <!-- MARC FIELD 960$$a the base field  -->
                    <datafield tag="960" ind1=" " ind2=" ">
                        <subfield code="a">13</subfield>
                    </datafield>
 
              </xsl:when>
 
              <!-- Base 14 specific treatment -->
              <xsl:when test="contains($commentslow,' diploma ') or contains($commentslow,' diplomarbeit ') or contains($commentslow,' diplome ') or contains($commentslow,' dissertation ') or contains($commentslow,' doctoraal ') or contains($commentslow,' doctoral ') or contains($commentslow,' doctorat ') or contains($commentslow,' doctorate ') or contains($commentslow,' doktorarbeit ') or contains($commentslow,' dottorato ') or contains($commentslow,' habilitationsschrift ') or contains($commentslow,' hochschule ') or contains($commentslow,' inauguraldissertation ') or contains($commentslow,' memoire ') or contains($commentslow,' phd ') or contains($commentslow,' proefschrift ') or contains($commentslow,' schlussbericht ') or contains($commentslow,' staatsexamensarbeit ') or contains($commentslow,' tesi ') or contains($commentslow,' thesis ') or contains($commentslow,' travail ') or contains($commentslow,' diploma,') or contains($commentslow,' diplomarbeit,') or contains($commentslow,' diplome,') or contains($commentslow,' dissertation,') or contains($commentslow,' doctoraal,') or contains($commentslow,' doctoral,') or contains($commentslow,' doctorat,') or contains($commentslow,' doctorate,') or contains($commentslow,' doktorarbeit,') or contains($commentslow,' dottorato,') or contains($commentslow,' habilitationsschrift,') or contains($commentslow,' hochschule,') or contains($commentslow,' inauguraldissertation,') or contains($commentslow,' memoire,') or contains($commentslow,' phd,') or contains($commentslow,' proefschrift,') or contains($commentslow,' schlussbericht,') or contains($commentslow,' staatsexamensarbeit,') or contains($commentslow,' tesi,') or contains($commentslow,' thesis,') or contains($commentslow,' travail,') or contains($commentslow,' diploma.') or contains($commentslow,' diplomarbeit.') or contains($commentslow,' diplome.') or contains($commentslow,' dissertation.') or contains($commentslow,' doctoraal.') or contains($commentslow,' doctoral.') or contains($commentslow,' doctorat.') or contains($commentslow,' doctorate.') or contains($commentslow,' doktorarbeit.') or contains($commentslow,' dottorato.') or contains($commentslow,' habilitationsschrift.') or contains($commentslow,' hochschule.') or contains($commentslow,' inauguraldissertation.') or contains($commentslow,' memoire.') or contains($commentslow,' phd.') or contains($commentslow,' proefschrift.') or contains($commentslow,' schlussbericht.') or contains($commentslow,' staatsexamensarbeit.') or contains($commentslow,' tesi.') or contains($commentslow,' thesis.') or contains($commentslow,' travail.') or contains($commentslow,' diploma;') or contains($commentslow,' diplomarbeit;') or contains($commentslow,' diplome;') or contains($commentslow,' dissertation;') or contains($commentslow,' doctoraal;') or contains($commentslow,' doctoral;') or contains($commentslow,' doctorat;') or contains($commentslow,' doctorate;') or contains($commentslow,' doktorarbeit;') or contains($commentslow,' dottorato;') or contains($commentslow,' habilitationsschrift;') or contains($commentslow,' hochschule;') or contains($commentslow,' inauguraldissertation;') or contains($commentslow,' memoire;') or contains($commentslow,' phd;') or contains($commentslow,' proefschrift;') or contains($commentslow,' schlussbericht;') or contains($commentslow,' staatsexamensarbeit;') or contains($commentslow,' tesi;') or contains($commentslow,' thesis;') or contains($commentslow,' travail;')">
 
                    <xsl:if test="./OAI-PMH:header/OAI-PMH:datestamp">
                      <xsl:variable name="datebase" select="./OAI-PMH:header/OAI-PMH:datestamp"/>
                      <xsl:variable name="year" select="substring-before($datebase,'-')"/>
                      <xsl:variable name="month" select="substring-before(substring-after($datebase,'-'),'-')"/>
                      <xsl:variable name="day" select="substring-after(substring-after($datebase,'-'),'-')"/>
 
                      <datafield tag="260" ind1=" " ind2=" ">
                        <subfield code="c"><xsl:value-of select="$year"/></subfield>
                      </datafield>
                    </xsl:if>
 
                    <!-- MARC FIELDS 502$$a  -->
                    <datafield tag="502" ind1=" " ind2=" ">
                      <subfield code="a">Thesis</subfield>
                    </datafield>
 
                    <!-- MARC FIELDS 690C$$a and 980$$a NB: 980$$a enables searching  -->
                    <datafield tag="690" ind1="C" ind2=" ">
                      <subfield code="a">THESIS</subfield>
                    </datafield>
                    <xsl:call-template name="cern-detect"><xsl:with-param name="reportnumber" select="./OAI-PMH:metadata/arXiv:arXiv/arXiv:report-no"/></xsl:call-template>
 
                    <datafield tag="980" ind1=" " ind2=" ">
                      <subfield code="a">THESIS</subfield>
                    </datafield>
                    <xsl:call-template name="cern-detect9"><xsl:with-param name="reportnumber" select="./OAI-PMH:metadata/arXiv:arXiv/arXiv:report-no"/></xsl:call-template>
 
                    <!-- MARC FIELD 960$$a the base field  -->
                    <datafield tag="960" ind1=" " ind2=" ">
                        <subfield code="a">14</subfield>
                    </datafield>
 
              </xsl:when>
 
              <!-- Otherwise we have a bases 11-->
              <xsl:otherwise>
 
                    <xsl:if test="./OAI-PMH:header/OAI-PMH:datestamp">
                      <xsl:variable name="datebase" select="./OAI-PMH:header/OAI-PMH:datestamp"/>
                      <xsl:variable name="year" select="substring-before($datebase,'-')"/>
                      <xsl:variable name="month" select="substring-before(substring-after($datebase,'-'),'-')"/>
                      <xsl:variable name="day" select="substring-after(substring-after($datebase,'-'),'-')"/>
 
                      <datafield tag="260" ind1=" " ind2=" ">
                        <subfield code="c"><xsl:value-of select="$year"/></subfield>
                      </datafield>
                    </xsl:if>
 
                    <!-- MARC FIELDS 690C$$a and 980$$a NB: 980$$a enables searching  -->
                    <datafield tag="690" ind1="C" ind2=" ">
                      <subfield code="a">PREPRINT</subfield>
                    </datafield>
                    <xsl:call-template name="cern-detect"><xsl:with-param name="reportnumber" select="translate(./OAI-PMH:metadata/arXiv:arXiv/arXiv:report-no, lcletters, ucletters)"/></xsl:call-template>
 
                    <datafield tag="980" ind1=" " ind2=" ">
                      <subfield code="a">PREPRINT</subfield>
                    </datafield>
                    <xsl:call-template name="cern-detect9"><xsl:with-param name="reportnumber" select="translate(./OAI-PMH:metadata/arXiv:arXiv/arXiv:report-no, lcletters, ucletters)"/></xsl:call-template>
 
                    <!-- MARC FIELD 960$$a the base field  -->
                    <datafield tag="960" ind1=" " ind2=" ">
                        <subfield code="a">11</subfield>
                    </datafield>
 
 
              </xsl:otherwise>
 
            </xsl:choose>
 
 
 
          </record>
        </xsl:otherwise>
 
 
     </xsl:choose>
    </xsl:if>
   </xsl:for-each>
   </collection>
 </xsl:template>
 
 </xsl:stylesheet>
diff --git a/modules/bibconvert/etc/oaidc2marcxml.xsl b/modules/bibconvert/etc/oaidc2marcxml.xsl
index 25e4552f6..12bbd512d 100644
--- a/modules/bibconvert/etc/oaidc2marcxml.xsl
+++ b/modules/bibconvert/etc/oaidc2marcxml.xsl
@@ -1,191 +1,221 @@
-<?xml version="1.0" encoding="ISO-8859-1"?>
+<?xml version="1.0" encoding="UTF-8"?>
 <!-- $Id$
-     
+
      This file is part of Invenio.
      Copyright (C) 2006, 2007, 2008, 2010, 2011 CERN.
 
      Invenio is free software; you can redistribute it and/or
      modify it under the terms of the GNU General Public License as
      published by the Free Software Foundation; either version 2 of the
      License, or (at your option) any later version.
 
      Invenio is distributed in the hope that it will be useful, but
      WITHOUT ANY WARRANTY; without even the implied warranty of
      MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-     General Public License for more details.  
+     General Public License for more details.
 
      You should have received a copy of the GNU General Public License
      along with Invenio; if not, write to the Free Software Foundation, Inc.,
      59 Temple Place, Suite 330, Boston, MA 02111-1307, USA.
 -->
 <!-- This transformation does the following:
      - Select various OAI fields and map each of them
        to corresponding marc field
      - Adds OAI identifier in field 909CO
 
      This stylesheet is provided only as an example of transformation.
      Please look for 'CUSTOMIZEME' labels in this stylesheet in order to find
      key parts that you should customize to fit your installation needs.
 
      Also note that this stylesheet expect source file to correctly refers to
-        http://www.loc.gov/MARC21/slim and 
-        http://www.openarchives.org/OAI/2.0/oai_dc/ and 
+        http://www.loc.gov/MARC21/slim and
+        http://www.openarchives.org/OAI/2.0/oai_dc/ and
         http://purl.org/dc/elements/1.1/ namespaces
 -->
 <xsl:stylesheet version="1.0"
 xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
 xmlns:OAI-PMH="http://www.openarchives.org/OAI/2.0/"
 xmlns:oaidc="http://www.openarchives.org/OAI/2.0/oai_dc/"
 xmlns:dc="http://purl.org/dc/elements/1.1/"
 exclude-result-prefixes="OAI-PMH oaidc dc">
-<xsl:output method="xml" encoding="UTF-8"/>
-<xsl:template match="/">
-        <collection>
-            <xsl:for-each select="//OAI-PMH:record">    
-            <xsl:choose>
-                <xsl:when test="OAI-PMH:header[@status='deleted']">
-                    <record>
-                        <xsl:if test="./OAI-PMH:header/OAI-PMH:identifier | ./OAI-PMH:header/OAI-PMH:setSpec">
-                            <!-- CUSTOMIZEME: Modify the datafield below with tag and indicators used 
-                                              in your Invenio installation for the OAI identifier -->
-		            <datafield tag="909" ind1="C" ind2="O">
-                                <subfield code="o"><xsl:value-of select="./OAI-PMH:header/OAI-PMH:identifier"/></subfield>
-                                <subfield code="p"><xsl:value-of select="./OAI-PMH:header/OAI-PMH:setSpec"/></subfield>
-                             </datafield>
-                        </xsl:if>
-                        <datafield tag="980" ind1=" " ind2=" ">
-                            <subfield code="c">DELETED</subfield>
-                        </datafield>
-                     </record>
-                </xsl:when>
-                <xsl:otherwise>
-                    <record>
-                    	<xsl:if test="./OAI-PMH:metadata/oaidc:dc/dc:language">
-                            <datafield tag="041" ind1=" " ind2=" ">
-                                <subfield code="a"><xsl:value-of select="./OAI-PMH:metadata/oaidc:dc/dc:language"/></subfield>
-                            </datafield>
-                
-	        	</xsl:if>
-		        <xsl:if test="./OAI-PMH:metadata/oaidc:dc/dc:creator[1]">
-                            <datafield tag="100" ind1=" " ind2=" ">
-                                <subfield code="a"><xsl:value-of select="./OAI-PMH:metadata/oaidc:dc/dc:creator[1]"/></subfield>
-                            </datafield>
-                        </xsl:if>
-		<xsl:if test="./OAI-PMH:metadata/oaidc:dc/dc:title">
-                
-		    <datafield tag="245" ind1=" " ind2=" ">
-                        <subfield code="a"><xsl:value-of select="./OAI-PMH:metadata/oaidc:dc/dc:title"/></subfield>
-                    </datafield>
-                
-		</xsl:if>
-		<xsl:if test="./OAI-PMH:metadata/oaidc:dc/dc:title | ./OAI-PMH:metadata/oaidc:dc/dc:date">
-                
-                    <datafield tag="260" ind1=" " ind2=" ">
-		        <xsl:if test="./OAI-PMH:metadata/oaidc:dc/dc:publisher">
-                        
-		            <subfield code="b"><xsl:value-of select="./OAI-PMH:metadata/oaidc:dc/dc:publisher"/></subfield>
-                        
-                        </xsl:if>
-			<xsl:if test="./OAI-PMH:metadata/oaidc:dc/dc:date">
-                        
-			    <subfield code="c"><xsl:value-of select="./OAI-PMH:metadata/oaidc:dc/dc:date"/></subfield>
-			
-                        </xsl:if>
-                    </datafield>
-                
-		</xsl:if>
-		<xsl:if test="./OAI-PMH:metadata/oaidc:dc/dc:coverage">
-                
-	    	    <datafield tag="500" ind1=" " ind2=" ">
-                        <subfield code="a"><xsl:value-of select="./OAI-PMH:metadata/oaidc:dc/dc:coverage"/></subfield>
-                    </datafield>
-                
-		</xsl:if>
-		<xsl:if test="./OAI-PMH:metadata/oaidc:dc/dc:description">
-                
-                    <datafield tag="520" ind1=" " ind2=" ">
-                        <subfield code="a"><xsl:value-of select="./OAI-PMH:metadata/oaidc:dc/dc:description"/></subfield>
-                    </datafield>
-                
-		</xsl:if>
-		<xsl:if test="./OAI-PMH:metadata/oaidc:dc/dc:rights">
-                
-		    <datafield tag="540" ind1=" " ind2=" ">
-                        <subfield code="a"><xsl:value-of select="./OAI-PMH:metadata/oaidc:dc/dc:rights"/></subfield>
-                    </datafield>
-                
-		</xsl:if>
-		<xsl:if test="./OAI-PMH:metadata/oaidc:dc/dc:subject">
-                
-                    <datafield tag="650" ind1="1" ind2="7">
-                        <subfield code="a"><xsl:value-of select="./OAI-PMH:metadata/oaidc:dc/dc:subject"/></subfield>  
-                    </datafield>
-                
-		</xsl:if>
-		<xsl:if test="./OAI-PMH:metadata/oaidc:dc/dc:type">
-                
-		    <datafield tag="655" ind1="7" ind2=" ">
-                        <subfield code="a"><xsl:value-of select="./OAI-PMH:metadata/oaidc:dc/dc:type"/></subfield>  
-                    </datafield>
-                
-		</xsl:if>
-		<xsl:for-each select="./OAI-PMH:metadata/oaidc:dc/dc:creator[position()>1]">
-                    <datafield tag="700" ind1=" " ind2=" ">
-                    	<subfield code="a"><xsl:value-of select="."/></subfield>
-                    </datafield>
-		</xsl:for-each>
-		<xsl:if test="./OAI-PMH:metadata/oaidc:dc/dc:contributor">
-                
-		    <datafield tag="720" ind1=" " ind2=" ">
-                        <subfield code="a"><xsl:value-of select="./OAI-PMH:metadata/oaidc:dc/dc:contributor"/></subfield>
-                    </datafield>
-                
-		</xsl:if>
-		<xsl:if test="./OAI-PMH:metadata/oaidc:dc/dc:source">
-                
-		    <datafield tag="786" ind1="0" ind2=" ">
-                        <subfield code="n"><xsl:value-of select="./OAI-PMH:metadata/oaidc:dc/dc:source"/></subfield>
-                    </datafield>
-                
-		</xsl:if>
-		<xsl:if test="./OAI-PMH:metadata/oaidc:dc/dc:relation">
-                
-		    <datafield tag="786" ind1="0" ind2=" ">
-                        <subfield code="n"><xsl:value-of select="./OAI-PMH:metadata/oaidc:dc/dc:relation"/></subfield>
-                    </datafield>
-                
-		</xsl:if>
-		<xsl:if test="./OAI-PMH:metadata/oaidc:dc/dc:format">
-                
-                    <datafield tag="856" ind1=" " ind2=" ">
-                        <subfield code="q"><xsl:value-of select="./OAI-PMH:metadata/oaidc:dc/dc:format"/></subfield>
-                    </datafield>
-                
-		</xsl:if>
-		<xsl:if test="./OAI-PMH:header/OAI-PMH:identifier">
-                    <!-- CUSTOMIZEME: Modify the datafield below with tag and indicators used 
-                                      in your Invenio installation for the OAI identifier -->
-                    <datafield tag="909" ind1="C" ind2="O">
-                        <subfield code="u"><xsl:value-of select="./OAI-PMH:header/OAI-PMH:identifier"/></subfield>
-                    </datafield>
-                
-		</xsl:if>            
-                 <datafield tag="980" ind1=" " ind2=" ">
-                    <!--- CUSTOMIZEME: Produce a 980__a field in order to classify this record
+  <xsl:output method="xml" encoding="UTF-8" />
+  <xsl:template match="/">
+    <collection>
+      <xsl:for-each select="//OAI-PMH:record">
+        <xsl:choose>
+          <xsl:when test="OAI-PMH:header[@status='deleted']">
+            <record>
+              <xsl:if test="./OAI-PMH:header/OAI-PMH:identifier | ./OAI-PMH:header/OAI-PMH:setSpec">
+
+                <!-- CUSTOMIZEME: Modify the datafield below with tag and indicators used
+                                              in your Invenio installation for the OAI Provenance Field -->
+                <datafield tag="035" ind1=" " ind2=" ">
+                  <subfield code="a"><xsl:value-of select="./OAI-PMH:header/OAI-PMH:identifier" /></subfield>
+                  <subfield code="u"><xsl:value-of select="//OAI-PMH:request" /></subfield>
+                  <!-- Set this to a more semantic string if you prefer... -->
+                  <subfield code="9"><xsl:value-of select="//OAI-PMH:request" /></subfield>
+                  <subfield code="d"><xsl:value-of select="./OAI-PMH:header/OAI-PMH:datestamp" /></subfield>
+                  <subfield code="h"><xsl:value-of select="//OAI-PMH:responseDate" /></subfield>
+                  <subfield code="m"><xsl:value-of select="//OAI-PMH:request/@metadataPrefix" /></subfield>
+                  <xsl:if test="./OAI-PMH:about/provenance/originDescription">
+                    <subfield code="o"><xsl:copy-of select="./OAI-PMH:about/provenance/originDescription" /></subfield>
+                  </xsl:if>
+                  <subfield code="t">false</subfield>
+                </datafield>
+              </xsl:if>
+              <datafield tag="980" ind1="" ind2="">
+                <subfield code="c">DELETED</subfield>
+              </datafield>
+            </record>
+          </xsl:when>
+          <xsl:otherwise>
+            <record>
+              <xsl:if test="./OAI-PMH:metadata/oaidc:dc/dc:language">
+
+                <datafield tag="041" ind1="" ind2="">
+                  <subfield code="a">
+                    <xsl:value-of select="./OAI-PMH:metadata/oaidc:dc/dc:language" />
+                  </subfield>
+                </datafield>
+              </xsl:if>
+              <xsl:if test="./OAI-PMH:metadata/oaidc:dc/dc:creator[1]">
+
+                <datafield tag="100" ind1="" ind2="">
+                  <subfield code="a">
+                    <xsl:value-of select="./OAI-PMH:metadata/oaidc:dc/dc:creator[1]" />
+                  </subfield>
+                </datafield>
+              </xsl:if>
+              <xsl:if test="./OAI-PMH:metadata/oaidc:dc/dc:title">
+                <datafield tag="245" ind1="" ind2="">
+                  <subfield code="a">
+                    <xsl:value-of select="./OAI-PMH:metadata/oaidc:dc/dc:title" />
+                  </subfield>
+                </datafield>
+              </xsl:if>
+              <xsl:if test="./OAI-PMH:metadata/oaidc:dc/dc:title | ./OAI-PMH:metadata/oaidc:dc/dc:date">
+
+                <datafield tag="260" ind1="" ind2="">
+                  <xsl:if test="./OAI-PMH:metadata/oaidc:dc/dc:publisher">
+
+                    <subfield code="b">
+                      <xsl:value-of select="./OAI-PMH:metadata/oaidc:dc/dc:publisher" />
+                    </subfield>
+                  </xsl:if>
+                  <xsl:if test="./OAI-PMH:metadata/oaidc:dc/dc:date">
+
+                    <subfield code="c">
+                      <xsl:value-of select="./OAI-PMH:metadata/oaidc:dc/dc:date" />
+                    </subfield>
+                  </xsl:if>
+                </datafield>
+              </xsl:if>
+              <xsl:if test="./OAI-PMH:metadata/oaidc:dc/dc:coverage">
+
+                <datafield tag="500" ind1="" ind2="">
+                  <subfield code="a">
+                    <xsl:value-of select="./OAI-PMH:metadata/oaidc:dc/dc:coverage" />
+                  </subfield>
+                </datafield>
+              </xsl:if>
+              <xsl:if test="./OAI-PMH:metadata/oaidc:dc/dc:description">
+
+                <datafield tag="520" ind1="" ind2="">
+                  <subfield code="a">
+                    <xsl:value-of select="./OAI-PMH:metadata/oaidc:dc/dc:description" />
+                  </subfield>
+                </datafield>
+              </xsl:if>
+              <xsl:if test="./OAI-PMH:metadata/oaidc:dc/dc:rights">
+                <datafield tag="540" ind1="" ind2="">
+                  <subfield code="a">
+                    <xsl:value-of select="./OAI-PMH:metadata/oaidc:dc/dc:rights" />
+                  </subfield>
+                </datafield>
+              </xsl:if>
+              <xsl:if test="./OAI-PMH:metadata/oaidc:dc/dc:subject">
+
+                <datafield tag="650" ind1="1" ind2="7">
+                  <subfield code="a">
+                    <xsl:value-of select="./OAI-PMH:metadata/oaidc:dc/dc:subject" />
+                  </subfield>
+                </datafield>
+              </xsl:if>
+              <xsl:if test="./OAI-PMH:metadata/oaidc:dc/dc:type">
+                <datafield tag="655" ind1="7" ind2="">
+                  <subfield code="a">
+                    <xsl:value-of select="./OAI-PMH:metadata/oaidc:dc/dc:type" />
+                  </subfield>
+                </datafield>
+              </xsl:if>
+              <xsl:for-each select="./OAI-PMH:metadata/oaidc:dc/dc:creator[position()&gt;1]">
+
+                <datafield tag="700" ind1="" ind2="">
+                  <subfield code="a">
+                    <xsl:value-of select="." />
+                  </subfield>
+                </datafield>
+              </xsl:for-each>
+              <xsl:if test="./OAI-PMH:metadata/oaidc:dc/dc:contributor">
+
+                <datafield tag="720" ind1="" ind2="">
+                  <subfield code="a">
+                    <xsl:value-of select="./OAI-PMH:metadata/oaidc:dc/dc:contributor" />
+                  </subfield>
+                </datafield>
+              </xsl:if>
+              <xsl:if test="./OAI-PMH:metadata/oaidc:dc/dc:source">
+                <datafield tag="786" ind1="0" ind2="">
+                  <subfield code="n">
+                    <xsl:value-of select="./OAI-PMH:metadata/oaidc:dc/dc:source" />
+                  </subfield>
+                </datafield>
+              </xsl:if>
+              <xsl:if test="./OAI-PMH:metadata/oaidc:dc/dc:relation">
+
+                <datafield tag="786" ind1="0" ind2="">
+                  <subfield code="n">
+                    <xsl:value-of select="./OAI-PMH:metadata/oaidc:dc/dc:relation" />
+                  </subfield>
+                </datafield>
+              </xsl:if>
+              <xsl:if test="./OAI-PMH:metadata/oaidc:dc/dc:format">
+                <datafield tag="856" ind1="" ind2="">
+                  <subfield code="q">
+                    <xsl:value-of select="./OAI-PMH:metadata/oaidc:dc/dc:format" />
+                  </subfield>
+                </datafield>
+              </xsl:if>
+              <xsl:if test="./OAI-PMH:header/OAI-PMH:identifier">
+                <!-- CUSTOMIZEME: Modify the datafield below with tag and indicators used
+                                      in your Invenio installation for the OAI Provenance Field -->
+                <datafield tag="035" ind1=" " ind2=" ">
+                  <subfield code="a"><xsl:value-of select="./OAI-PMH:header/OAI-PMH:identifier" /></subfield>
+                  <subfield code="u"><xsl:value-of select="//OAI-PMH:request" /></subfield>
+                  <!-- Set this to a more semantic string if you prefer... -->
+                  <subfield code="9"><xsl:value-of select="//OAI-PMH:request" /></subfield>
+                  <subfield code="d"><xsl:value-of select="./OAI-PMH:header/OAI-PMH:datestamp" /></subfield>
+                  <subfield code="h"><xsl:value-of select="//OAI-PMH:responseDate" /></subfield>
+                  <subfield code="m"><xsl:value-of select="//OAI-PMH:request/@metadataPrefix" /></subfield>
+                  <xsl:if test="./OAI-PMH:about/provenance/originDescription">
+                    <subfield code="o"><xsl:copy-of select="./OAI-PMH:about/provenance/originDescription" /></subfield>
+                  </xsl:if>
+                  <subfield code="t">false</subfield>
+                </datafield>
+              </xsl:if>
+              <datafield tag="980" ind1="" ind2="">
+                <!--- CUSTOMIZEME: Produce a 980__a field in order to classify this record
                                    in a collection. The value for 980__a can either come
                                    from the DC metadata, or be a fixed value.
                                    A knowledge base can also be used to transform source data.
                                    Harvesting from qualified OAI source might be easier to select
-                                   the right source data.  -->    
-                     <subfield code="a">PREPRINT</subfield>
-                 </datafield>
-
-                </record>
-                </xsl:otherwise>        
-            </xsl:choose>
-        </xsl:for-each> 
-        </collection>
-
-</xsl:template>
-
+                                   the right source data.  -->
+                <subfield code="a">PREPRINT</subfield>
+              </datafield>
+            </record>
+          </xsl:otherwise>
+        </xsl:choose>
+      </xsl:for-each>
+    </collection>
+  </xsl:template>
 </xsl:stylesheet>
diff --git a/modules/bibconvert/etc/oaimarc2marcxml.xsl b/modules/bibconvert/etc/oaimarc2marcxml.xsl
index 6536173d9..f80d0f898 100644
--- a/modules/bibconvert/etc/oaimarc2marcxml.xsl
+++ b/modules/bibconvert/etc/oaimarc2marcxml.xsl
@@ -1,116 +1,136 @@
-<?xml version="1.0" encoding="ISO-8859-1"?>
+<?xml version="1.0" encoding="UTF-8"?>
 <!-- $Id$
 
      This file is part of Invenio.
      Copyright (C) 2006, 2007, 2008, 2010, 2011 CERN.
 
      Invenio is free software; you can redistribute it and/or
      modify it under the terms of the GNU General Public License as
      published by the Free Software Foundation; either version 2 of the
      License, or (at your option) any later version.
 
      Invenio is distributed in the hope that it will be useful, but
      WITHOUT ANY WARRANTY; without even the implied warranty of
      MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-     General Public License for more details.  
+     General Public License for more details.
 
      You should have received a copy of the GNU General Public License
      along with Invenio; if not, write to the Free Software Foundation, Inc.,
      59 Temple Place, Suite 330, Boston, MA 02111-1307, USA.
 -->
 <!-- This transformation keeps the same source file, with the following exceptions:
      - OAI envelope is removed
      - Records marked with status='deleted' are returned as deleted for Invenio
      - subfield 980 $w is removed
      - Adds OAI identifier in field 909CO
 
      This stylesheet is provided only as an example of transformation.
      Please look for 'CUSTOMIZEME' labels in this stylesheet in order to find
      key parts that you should customize to fit your installation needs.
 
      Also note that this stylesheet expect source file to correctly refers to
-        http://www.loc.gov/MARC21/slim and 
+        http://www.loc.gov/MARC21/slim and
         http://www.openarchives.org/OAI/2.0/ namespaces
 -->
 <xsl:stylesheet version="1.0"
 xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
 xmlns:OAI-PMH="http://www.openarchives.org/OAI/2.0/"
 xmlns:marc="http://www.loc.gov/MARC21/slim"
-exclude-result-prefixes="OAI-PMH marc">
-<xsl:output method="xml" encoding="UTF-8"/>
-<xsl:template match="/">
-        <collection>
-            <xsl:for-each select="//OAI-PMH:record">
-            <xsl:choose>
-                <xsl:when test="./OAI-PMH:header[@status='deleted']">
-                    <record>
-
-                        <xsl:if test="./OAI-PMH:header/OAI-PMH:identifier | ./OAI-PMH:header/OAI-PMH:setSpec">
-                             <!-- CUSTOMIZE ME: Modify the datafield below with tag and indicators used 
-                                                in your Invenio installation for the OAI identifier -->
-        		     <datafield tag="909" ind1="C" ind2="O">
-                                <subfield code="o"><xsl:value-of select="./OAI-PMH:header/OAI-PMH:identifier"/></subfield>
-                                <subfield code="p"><xsl:value-of select="./OAI-PMH:header/OAI-PMH:setSpec"/></subfield>
-                             </datafield>
-                        </xsl:if>
-                        <datafield tag="980" ind1=" " ind2=" ">
-                            <subfield code="c">DELETED</subfield>
-                        </datafield>
-                     </record>
-                </xsl:when>
-                <xsl:otherwise>
-                    <record>
-                        <xsl:for-each select="./OAI-PMH:metadata/marc:record/marc:datafield">
-                           <!-- CUSTOMIZEME: Modify below in order to choose which 
+xmlns:OAI-provenance="http://www.openarchives.org/OAI/2.0/provenance"
+xmlns:fn="http://cdsweb.cern.ch/bibconvert/fn"
+exclude-result-prefixes="OAI-PMH marc OAI-provenance fn">
+  <xsl:output method="xml" encoding="UTF-8" />
+  <xsl:template match="/">
+    <collection>
+      <xsl:for-each select="//OAI-PMH:record">
+        <xsl:choose>
+          <xsl:when test="./OAI-PMH:header[@status='deleted']">
+            <record>
+              <xsl:if test="./OAI-PMH:header/OAI-PMH:identifier | ./OAI-PMH:header/OAI-PMH:setSpec">
+                <!-- CUSTOMIZEME: Modify the datafield below with tag and indicators used
+                                      in your Invenio installation for the OAI Provenance Field -->
+                <datafield tag="035" ind1=" " ind2=" ">
+                  <subfield code="a"><xsl:value-of select="./OAI-PMH:header/OAI-PMH:identifier" /></subfield>
+                  <subfield code="u"><xsl:value-of select="//OAI-PMH:request" /></subfield>
+                  <!-- Set this to a more semantic string if you prefer... -->
+                  <subfield code="9"><xsl:value-of select="//OAI-PMH:request" /></subfield>
+                  <subfield code="d"><xsl:value-of select="./OAI-PMH:header/OAI-PMH:datestamp" /></subfield>
+                  <subfield code="h"><xsl:value-of select="//OAI-PMH:responseDate" /></subfield>
+                  <subfield code="m"><xsl:value-of select="//OAI-PMH:request/@metadataPrefix" /></subfield>
+                  <xsl:if test="./OAI-PMH:about/OAI-provenance:provenance/OAI-provenance:originDescription">
+                    <xsl:variable name="origin" select="./OAI-PMH:about/OAI-provenance:provenance/OAI-provenance:originDescription" />
+                    <subfield code="o"><value-of select="fn:escape($origin)" /></subfield>
+                  </xsl:if>
+                  <subfield code="t">false</subfield>
+                </datafield>
+              </xsl:if>
+              <datafield tag="980" ind1="" ind2="">
+                <subfield code="c">DELETED</subfield>
+              </datafield>
+            </record>
+          </xsl:when>
+          <xsl:otherwise>
+            <record>
+              <xsl:for-each select="./OAI-PMH:metadata/marc:record/marc:datafield">
+                <!-- CUSTOMIZEME: Modify below in order to choose which
                                              datafield/subfield will be kept
-                                             and which will be dropped  
-                                                
+                                             and which will be dropped
+
                                              Sample below: -keep if tag is not 980__ and has subfields
                                                            -if tag is 980__ , keep datafield only if some
                                                             subfield with code != 'w' exist. Remove
                                                             subfield 980__w.
-                                                -->   
-                            <xsl:choose>  
-                               <xsl:when test="not(@tag='980' and (@ind1='' or @ind1=' ') and (@ind2='' or @ind2=' ') and ./marc:subfield)">
-                                 <xsl:element name="{local-name(.)}">
-                                    <xsl:copy-of select="@*"/>
-                                    <xsl:for-each select="./marc:subfield">
-                                         <xsl:element name="{local-name(.)}">
-                                             <xsl:copy-of select="@*"/>
-                                             <xsl:value-of select="."/>
-                                         </xsl:element>
-                                    </xsl:for-each>
-                                 </xsl:element> 
-                               </xsl:when>
-                               <xsl:otherwise>
-                                 <xsl:if test="./marc:subfield[@code!='w']">
-                                 <xsl:element name="{local-name(.)}">
-                                    <xsl:copy-of select="@*"/>
-                                    <xsl:for-each select="./marc:subfield[@code!='w']">
-                                         <xsl:element name="{local-name(.)}">
-                                             <xsl:copy-of select="@*"/>
-                                             <xsl:value-of select="."/>
-                                         </xsl:element>
-                                    </xsl:for-each>
-                                 </xsl:element> 
-                                 </xsl:if>
-                               </xsl:otherwise>
-                            </xsl:choose>
-                         </xsl:for-each> 
-                        
-		        <xsl:if test="./OAI-PMH:header/OAI-PMH:identifier">
-                        <!-- CUSTOMIZEME: Modify the datafield below with tag and indicators used 
-                                          in your Invenio installation for the OAI identifier -->
-                            <datafield tag="909" ind1="C" ind2="O">
-                                <subfield code="o"><xsl:value-of select="./OAI-PMH:header/OAI-PMH:identifier"/></subfield>
-                            </datafield>
-		        </xsl:if>
-                    </record>
-                </xsl:otherwise>        
-            </xsl:choose>
-        </xsl:for-each> 
-        </collection>
-
-</xsl:template>
+                                                -->
+                <xsl:choose>
+                  <xsl:when test="not(@tag='980' and (@ind1='' or @ind1=' ') and (@ind2='' or @ind2=' ') and ./marc:subfield)">
 
+                    <xsl:element name="{local-name(.)}">
+                      <xsl:copy-of select="@*" />
+                      <xsl:for-each select="./marc:subfield">
+                        <xsl:element name="{local-name(.)}">
+                          <xsl:copy-of select="@*" />
+                          <xsl:value-of select="." />
+                        </xsl:element>
+                      </xsl:for-each>
+                    </xsl:element>
+                  </xsl:when>
+                  <xsl:otherwise>
+                    <xsl:if test="./marc:subfield[@code!='w']">
+                      <xsl:element name="{local-name(.)}">
+                        <xsl:copy-of select="@*" />
+                        <xsl:for-each select="./marc:subfield[@code!='w']">
+                          <xsl:element name="{local-name(.)}">
+                            <xsl:copy-of select="@*" />
+                            <xsl:value-of select="." />
+                          </xsl:element>
+                        </xsl:for-each>
+                      </xsl:element>
+                    </xsl:if>
+                  </xsl:otherwise>
+                </xsl:choose>
+              </xsl:for-each>
+              <xsl:if test="./OAI-PMH:header/OAI-PMH:identifier">
+                <!-- CUSTOMIZEME: Modify the datafield below with tag and indicators used
+                                      in your Invenio installation for the OAI Provenance Field -->
+                <datafield tag="035" ind1=" " ind2=" ">
+                  <subfield code="a"><xsl:value-of select="./OAI-PMH:header/OAI-PMH:identifier" /></subfield>
+                  <subfield code="u"><xsl:value-of select="//OAI-PMH:request" /></subfield>
+                  <!-- Set this to a more semantic string if you prefer... -->
+                  <subfield code="9"><xsl:value-of select="//OAI-PMH:request" /></subfield>
+                  <subfield code="d"><xsl:value-of select="./OAI-PMH:header/OAI-PMH:datestamp" /></subfield>
+                  <subfield code="h"><xsl:value-of select="//OAI-PMH:responseDate" /></subfield>
+                  <subfield code="m"><xsl:value-of select="//OAI-PMH:request/@metadataPrefix" /></subfield>
+                  <xsl:if test="./OAI-PMH:about/OAI-provenance:provenance/OAI-provenance:originDescription">
+                    <xsl:variable name="origin" select="./OAI-PMH:about/OAI-provenance:provenance/OAI-provenance:originDescription" />
+                    <subfield code="o"><xsl:value-of select="fn:escape($origin)" /></subfield>
+                  </xsl:if>
+                  <subfield code="t">false</subfield>
+                </datafield>
+              </xsl:if>
+            </record>
+          </xsl:otherwise>
+        </xsl:choose>
+      </xsl:for-each>
+    </collection>
+  </xsl:template>
 </xsl:stylesheet>
diff --git a/modules/bibconvert/lib/bibconvert_xslt_engine.py b/modules/bibconvert/lib/bibconvert_xslt_engine.py
index c9e15db11..81f33eff3 100644
--- a/modules/bibconvert/lib/bibconvert_xslt_engine.py
+++ b/modules/bibconvert/lib/bibconvert_xslt_engine.py
@@ -1,279 +1,317 @@
 # -*- coding: utf-8 -*-
 ##
 ## This file is part of Invenio.
 ## Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011 CERN.
 ##
 ## Invenio is free software; you can redistribute it and/or
 ## modify it under the terms of the GNU General Public License as
 ## published by the Free Software Foundation; either version 2 of the
 ## License, or (at your option) any later version.
 ##
 ## Invenio is distributed in the hope that it will be useful, but
 ## WITHOUT ANY WARRANTY; without even the implied warranty of
 ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 ## General Public License for more details.
 ##
 ## You should have received a copy of the GNU General Public License
 ## along with Invenio; if not, write to the Free Software Foundation, Inc.,
 ## 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA.
 
 """
 bibconvert_xslt_engine - Wrapper for an XSLT engine.
 
 Customized to support BibConvert functions through the
 use of XPath 'format' function.
 
 Dependencies: Need one of the following XSLT processors:
               - libxml2 & libxslt
               - 4suite
 
 Used by: bibconvert.in
 
 FIXME: - Find better namespace for functions
        - Find less bogus URI (given as param to processor)
          for source and template
        - Implement command-line options
        - Think about better handling of 'value' parameter
          in bibconvert_function_*
 """
 
 __revision__ = "$Id$"
 
 import sys
 import os
 
+from warnings import warn
+
 from invenio.config import \
      CFG_ETCDIR, \
      CFG_SITE_URL
 from invenio.bibconvert import FormatField
+from invenio.textutils import encode_for_xml
 
 # The namespace used for BibConvert functions
 CFG_BIBCONVERT_FUNCTION_NS = "http://cdsweb.cern.ch/bibconvert/fn"
 
 # Import one XSLT processor
 #
 # processor_type:
 #       -1 : No processor found
 #        0 : libxslt
 #        1 : 4suite
 processor_type = -1
 try:
     # libxml2 & libxslt
     import libxml2
     import libxslt
     processor_type = 0
 except ImportError:
     pass
 
 if processor_type == -1:
     try:
         # 4suite
         from Ft.Xml.Xslt import Processor, XsltException
         from Ft.Xml import InputSource
         from xml.dom import Node
         processor_type = 1
     except ImportError:
         pass
 
 CFG_BIBCONVERT_XSL_PATH = "%s%sbibconvert%sconfig" % (CFG_ETCDIR, os.sep, os.sep)
 
-def bibconvert_function_libxslt(ctx, value, func):
+def bibconvert_function_libxslt(dummy_ctx, value, func):
     """
     libxslt extension function:
     Bridge between BibConvert formatting functions and XSL stylesheets.
 
     Can be used in that way in XSL stylesheet
     (provided xmlns:fn="http://cdsweb.cern.ch/bibconvert/fn" has been declared):
     <xsl:value-of select="fn:format(., 'ADD(mypref,mysuff)')"/>
     (Adds strings 'mypref' and 'mysuff' as prefix/suffix to current node value,
     using BibConvert ADD function)
 
     if value is int, value is converted to string
     if value is Node (PyCObj), first child node (text node) is taken as value
     """
     try:
         if isinstance(value, str):
             string_value = value
         elif isinstance(value, (int, long)):
             string_value = str(value)
         else:
             string_value = libxml2.xmlNode(_obj=value[0]).children.content
 
         return FormatField(string_value, func).rstrip('\n')
 
     except Exception, err:
         sys.stderr.write("Error during formatting function evaluation: " + \
                          str(err) + \
                          '\n')
 
     return ''
 
+def bibconvert_escape_libxslt(dummy_ctx, value):
+    """
+    Bridge to libxslt to escape the provided value.
+    """
+    try:
+        if isinstance(value, str):
+            string_value = value
+        elif isinstance(value, (int, long)):
+            string_value = str(value)
+        else:
+            string_value = libxml2.xmlNode(_obj=value[0]).serialize('utf8')
+
+        return encode_for_xml(string_value)
 
-def bibconvert_function_4suite(ctx, value, func):
+    except Exception, err:
+        sys.stderr.write("Error during formatting function evaluation: " + \
+                         str(err) + \
+                         '\n')
+
+    return ''
+
+
+def bibconvert_function_4suite(dummy_ctx, value, func):
     """
     4suite extension function:
     Bridge between BibConvert formatting functions and XSL stylesheets.
 
     Can be used in that way in XSL stylesheet
     (provided xmlns:fn="http://cdsweb.cern.ch/bibconvert/fn" has been declared):
     <xsl:value-of select="fn:format(., 'ADD(mypref,mysuff)')"/>
     (Adds strings 'mypref' and 'mysuff' as prefix/suffix to current node value,
     using BibConvert ADD function)
 
     if value is int, value is converted to string
     if value is Node, first child node (text node) is taken as value
     """
     try:
         if len(value) > 0 and isinstance(value[0], Node):
             string_value = value[0].firstChild.nodeValue
             if string_value is None:
                 string_value = ''
         else:
             string_value = str(value)
 
         return FormatField(string_value, func).rstrip('\n')
 
     except Exception, err:
         sys.stderr.write("Error during formatting function evaluation: " + \
                          str(err) + \
                          '\n')
 
     return ''
 
+def bibconvert_escape_4suite(dummy_ctx, value):
+    """
+    Bridge to 4suite to escape the provided value.
+    """
+    ##FIXME: this does not work with 4suite. How one does serialize a Node?
+    warn("Invenio fn:escape is currently broken, when using 4suite")
+    return value
+
 def convert(xmltext, template_filename=None, template_source=None):
     """
     Processes an XML text according to a template, and returns the result.
 
     The template can be given either by name (or by path) or by source.
     If source is given, name is ignored.
 
     bibconvert_xslt_engine will look for template_filename in standard directories
     for templates. If not found, template_filename will be assumed to be a path to
     a template. If none can be found, return None.
 
     Raises an exception if cannot find an appropriate XSLT processor.
 
 
     @param xmltext: The string representation of the XML to process
     @param template_filename: The name of the template to use for the processing
     @param template_source: The configuration describing the processing.
     @return: the transformed XML text, or None if an error occured
     """
     if processor_type == -1:
         # No XSLT processor found
         raise "No XSLT processor could be found"
 
     # Retrieve template and read it
     if template_source:
         template = template_source
     elif template_filename:
         try:
             path_to_templates = (CFG_BIBCONVERT_XSL_PATH + os.sep +
                                  template_filename)
             if os.path.exists(path_to_templates):
                 template = file(path_to_templates).read()
             elif os.path.exists(template_filename):
                 template = file(template_filename).read()
             else:
                 sys.stderr.write(template_filename +' does not exist.')
                 return None
         except IOError:
             sys.stderr.write(template_filename +' could not be read.')
             return None
     else:
         sys.stderr.write(template_filename +' was not given.')
         return None
 
     result = ""
     if processor_type == 0:
         # libxml2 & libxslt
 
         # Register BibConvert functions for use in XSL
         libxslt.registerExtModuleFunction("format",
                                           CFG_BIBCONVERT_FUNCTION_NS,
                                           bibconvert_function_libxslt)
+        libxslt.registerExtModuleFunction("escape",
+                                          CFG_BIBCONVERT_FUNCTION_NS,
+                                          bibconvert_escape_libxslt)
 
         # Load template and source
         try:
             template_xml = libxml2.parseDoc(template)
         except libxml2.parserError, e:
             sys.stderr.write('Parsing XSL template failed:\n ' + \
                              str(e) + '\n')
             return None
         processor = libxslt.parseStylesheetDoc(template_xml)
         try:
             source = libxml2.parseDoc(xmltext)
         except libxml2.parserError, e:
             sys.stderr.write('Parsing XML source failed:\n ' + \
                              str(e) + '\n')
             return None
 
         # Transform
         result_object = processor.applyStylesheet(source, None)
         result = processor.saveResultToString(result_object)
 
         # Deallocate
         processor.freeStylesheet()
         source.freeDoc()
         result_object.freeDoc()
 
     elif processor_type == 1:
         # 4suite
 
         # Init
         processor = Processor.Processor()
 
         # Register BibConvert functions for use in XSL
         processor.registerExtensionFunction(CFG_BIBCONVERT_FUNCTION_NS,
                                             "format",
                                             bibconvert_function_4suite)
+        processor.registerExtensionFunction(CFG_BIBCONVERT_FUNCTION_NS,
+                                          "escape",
+                                          bibconvert_escape_4suite)
 
         # Load template and source
         transform = InputSource.DefaultFactory.fromString(template,
                                                           uri=CFG_SITE_URL)
         source = InputSource.DefaultFactory.fromString(xmltext,
                                                        uri=CFG_SITE_URL)
         try:
             processor.appendStylesheet(transform)
         except XsltException, e:
             sys.stderr.write('Parsing XSL template failed:\n' + str(e))
             return None
 
         # Transform
         try:
             result = processor.run(source)
         except XsltException, e:
             sys.stderr.write('Conversion failed:\n' + str(e))
             return None
     else:
         sys.stderr.write("No XSLT processor could be found")
 
     return result
 
 ## def bc_profile():
 ##     """
 ##     Runs a benchmark
 ##     """
 ##     global xmltext
 
 ##     convert(xmltext, 'oaidc2marcxml.xsl')
 ##     return
 
 ## def benchmark():
 ##     """
 ##     Benchmark the module, using profile and pstats
 ##     """
 ##     import profile
 ##     import pstats
 ##     from invenio.bibformat import record_get_xml
 
 ##     global xmltext
 
 ##     xmltext = record_get_xml(10, 'oai_dc')
 ##     profile.run('bc_profile()', "bibconvert_xslt_profile")
 ##     p = pstats.Stats("bibconvert_xslt_profile")
 ##     p.strip_dirs().sort_stats("cumulative").print_stats()
 
 if __name__ == "__main__":
     pass
 
diff --git a/modules/bibformat/etc/format_templates/Makefile.am b/modules/bibformat/etc/format_templates/Makefile.am
index cdf7dd94f..71e964eb9 100644
--- a/modules/bibformat/etc/format_templates/Makefile.am
+++ b/modules/bibformat/etc/format_templates/Makefile.am
@@ -1,40 +1,40 @@
 ## This file is part of Invenio.
 ## Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011 CERN.
 ##
 ## Invenio is free software; you can redistribute it and/or
 ## modify it under the terms of the GNU General Public License as
 ## published by the Free Software Foundation; either version 2 of the
 ## License, or (at your option) any later version.
 ##
 ## Invenio is distributed in the hope that it will be useful, but
 ## WITHOUT ANY WARRANTY; without even the implied warranty of
 ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 ## General Public License for more details.
 ##
 ## You should have received a copy of the GNU General Public License
 ## along with Invenio; if not, write to the Free Software Foundation, Inc.,
 ## 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA.
 
 etcdir = $(sysconfdir)/bibformat/format_templates
 
 etc_DATA = Default_HTML_captions.bft Picture_HTML_brief.bft \
 	   Default_HTML_detailed.bft Default_HTML_portfolio.bft \
 	   Picture_HTML_detailed.bft Default_HTML_brief.bft \
 	   BibTeX.bft MARCXML.bft Excel.bft \
 	   Default_HTML_similarity.bft NLM.xsl \
-	   OAI_DC.xsl DC.xsl EndNote.xsl RSS.xsl \
+	   OAI_DC.xsl OAI_MARC.bft DC.xsl EndNote.xsl RSS.xsl \
 	   RefWorks.xsl MODS.xsl \
 	   Default_HTML_references.bft Default_HTML_files.bft \
 	   Default_HTML_actions.bft Journal_HTML_detailed.bft \
 	   Journal_HTML_brief.bft \
-           Poetry_HTML_brief.bft Poetry_HTML_detailed.bft \
-           AID_HTML_very_brief.bft Podcast.xsl \
-           Video_HTML_brief.bft Video_HTML_detailed.bft
+	   Poetry_HTML_brief.bft Poetry_HTML_detailed.bft \
+	   AID_HTML_very_brief.bft Podcast.xsl \
+	   Video_HTML_brief.bft Video_HTML_detailed.bft
 
 tmpdir = $(prefix)/var/tmp
 
 tmp_DATA = Test1.bft Test3.bft Test_2.bft Test_no_template.test
 
 EXTRA_DIST = $(etc_DATA) $(tmp_DATA)
 
 CLEANFILES = *.tmp
diff --git a/modules/bibformat/etc/format_templates/OAI_MARC.bft b/modules/bibformat/etc/format_templates/OAI_MARC.bft
new file mode 100644
index 000000000..1d7d1fe57
--- /dev/null
+++ b/modules/bibformat/etc/format_templates/OAI_MARC.bft
@@ -0,0 +1,3 @@
+<name>OAI MARC</name>
+<description>Standard MARC XML output suitable for embed in OAI-PMH responses</description>
+<BFE_OAI_MARCXML/>
diff --git a/modules/bibformat/etc/output_formats/Makefile.am b/modules/bibformat/etc/output_formats/Makefile.am
index 3864bd3c3..01fd35d8a 100644
--- a/modules/bibformat/etc/output_formats/Makefile.am
+++ b/modules/bibformat/etc/output_formats/Makefile.am
@@ -1,32 +1,32 @@
 ## This file is part of Invenio.
 ## Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011 CERN.
 ##
 ## Invenio is free software; you can redistribute it and/or
 ## modify it under the terms of the GNU General Public License as
 ## published by the Free Software Foundation; either version 2 of the
 ## License, or (at your option) any later version.
 ##
 ## Invenio is distributed in the hope that it will be useful, but
 ## WITHOUT ANY WARRANTY; without even the implied warranty of
 ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 ## General Public License for more details.
 ##
 ## You should have received a copy of the GNU General Public License
 ## along with Invenio; if not, write to the Free Software Foundation, Inc.,
 ## 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA.
 
 etcdir = $(sysconfdir)/bibformat/output_formats
 
 etc_DATA = HB.bfo HC.bfo HD.bfo HP.bfo HX.bfo XM.bfo EXCEL.bfo \
 	   XD.bfo HS.bfo HA.bfo \
 	   XE.bfo XN.bfo XR.bfo XW.bfo \
-	   XOAIDC.bfo XO.bfo \
+	   XOAIDC.bfo XO.bfo XOAIMARC.bfo \
 	   HDREF.bfo HDFILE.bfo HDACT.bfo XP.bfo
 
 tmpdir = $(prefix)/var/tmp
 
 tmp_DATA = TEST1.bfo TEST2.bfo TEST3.bfo
 
 EXTRA_DIST = $(etc_DATA) $(tmp_DATA)
 
 CLEANFILES = *.tmp
diff --git a/modules/bibformat/etc/output_formats/XOAIMARC.bfo b/modules/bibformat/etc/output_formats/XOAIMARC.bfo
new file mode 100644
index 000000000..f3cd4655f
--- /dev/null
+++ b/modules/bibformat/etc/output_formats/XOAIMARC.bfo
@@ -0,0 +1 @@
+default: OAI_MARC.bft
diff --git a/modules/bibformat/lib/bibformat_engine.py b/modules/bibformat/lib/bibformat_engine.py
index bd8f17d0d..6f530033f 100644
--- a/modules/bibformat/lib/bibformat_engine.py
+++ b/modules/bibformat/lib/bibformat_engine.py
@@ -1,2111 +1,2111 @@
 # -*- coding: utf-8 -*-
 ##
 ## This file is part of Invenio.
 ## Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011 CERN.
 ##
 ## Invenio is free software; you can redistribute it and/or
 ## modify it under the terms of the GNU General Public License as
 ## published by the Free Software Foundation; either version 2 of the
 ## License, or (at your option) any later version.
 ##
 ## Invenio is distributed in the hope that it will be useful, but
 ## WITHOUT ANY WARRANTY; without even the implied warranty of
 ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 ## General Public License for more details.
 ##
 ## You should have received a copy of the GNU General Public License
 ## along with Invenio; if not, write to the Free Software Foundation, Inc.,
 ## 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA.
 
 """
 Formats a single XML Marc record using specified format.
 There is no API for the engine. Instead use module L{bibformat}.
 
 You can have a look at the various escaping modes available in
 X{BibFormatObject} in function L{escape_field}
 
 Still it is useful sometimes for debugging purpose to use the
 L{BibFormatObject} class directly. For eg:
 
    >>> from invenio.bibformat_engine import BibFormatObject
    >>> bfo = BibFormatObject(102)
    >>> bfo.field('245__a')
    The order Rodentia in South America
    >>> from invenio.bibformat_elements import bfe_title
    >>> bfe_title.format_element(bfo)
    The order Rodentia in South America
 
 @see: bibformat.py, bibformat_utils.py
 """
 
 __revision__ = "$Id$"
 
 import re
 import sys
 import os
 import inspect
 import traceback
 import zlib
 import cgi
 
 from invenio.config import \
      CFG_PATH_PHP, \
      CFG_BINDIR, \
      CFG_SITE_LANG
 from invenio.errorlib import \
      register_errors, \
      get_msgs_for_code_list
 from invenio.bibrecord import \
      create_record, \
      record_get_field_instances, \
      record_get_field_value, \
      record_get_field_values, \
      record_xml_output
 from invenio.bibformat_xslt_engine import format
 from invenio.dbquery import run_sql
 from invenio.messages import \
      language_list_long, \
      wash_language, \
      gettext_set_language
 from invenio import bibformat_dblayer
 from invenio.bibformat_config import \
      CFG_BIBFORMAT_FORMAT_TEMPLATE_EXTENSION, \
      CFG_BIBFORMAT_FORMAT_OUTPUT_EXTENSION, \
      CFG_BIBFORMAT_TEMPLATES_PATH, \
      CFG_BIBFORMAT_ELEMENTS_PATH, \
      CFG_BIBFORMAT_OUTPUTS_PATH, \
      CFG_BIBFORMAT_ELEMENTS_IMPORT_PATH
 from invenio.bibformat_utils import \
      record_get_xml, \
      parse_tag
 from invenio.htmlutils import \
      HTMLWasher, \
-     cfg_html_buffer_allowed_tag_whitelist, \
-     cfg_html_buffer_allowed_attribute_whitelist
+     CFG_HTML_BUFFER_ALLOWED_TAG_WHITELIST, \
+     CFG_HTML_BUFFER_ALLOWED_ATTRIBUTE_WHITELIST
 from invenio.webuser import collect_user_info
 from invenio.bibknowledge import get_kbr_values
 from HTMLParser import HTMLParseError
 
 if CFG_PATH_PHP: #Remove when call_old_bibformat is removed
     from xml.dom import minidom
     import tempfile
 
 # Cache for data we have already read and parsed
 format_templates_cache = {}
 format_elements_cache = {}
 format_outputs_cache = {}
 
 html_field = '<!--HTML-->' # String indicating that field should be
                            # treated as HTML (and therefore no escaping of
                            # HTML tags should occur.
                            # Appears in some field values.
 
 washer = HTMLWasher()      # Used to remove dangerous tags from HTML
                            # sources
 
 # Regular expression for finding <lang>...</lang> tag in format templates
 pattern_lang = re.compile(r'''
     <lang              #<lang tag (no matter case)
     \s*                #any number of white spaces
     >                  #closing <lang> start tag
     (?P<langs>.*?)     #anything but the next group (greedy)
     (</lang\s*>)       #end tag
     ''', re.IGNORECASE | re.DOTALL | re.VERBOSE)
 
 # Builds regular expression for finding each known language in <lang> tags
 ln_pattern_text = r"<("
 for lang in language_list_long(enabled_langs_only=False):
     ln_pattern_text += lang[0] +r"|"
 
 ln_pattern_text = ln_pattern_text.rstrip(r"|")
 ln_pattern_text += r")>(.*?)</\1>"
 
 ln_pattern =  re.compile(ln_pattern_text, re.IGNORECASE | re.DOTALL)
 
 # Regular expression for finding text to be translated
 translation_pattern = re.compile(r'_\((?P<word>.*?)\)_', \
                                  re.IGNORECASE | re.DOTALL | re.VERBOSE)
 
 # Regular expression for finding <name> tag in format templates
 pattern_format_template_name = re.compile(r'''
     <name              #<name tag (no matter case)
     \s*                #any number of white spaces
     >                  #closing <name> start tag
     (?P<name>.*?)      #name value. any char that is not end tag
     (</name\s*>)(\n)?  #end tag
     ''', re.IGNORECASE | re.DOTALL | re.VERBOSE)
 
 # Regular expression for finding <description> tag in format templates
 pattern_format_template_desc = re.compile(r'''
     <description           #<decription tag (no matter case)
     \s*                    #any number of white spaces
     >                      #closing <description> start tag
     (?P<desc>.*?)          #description value. any char that is not end tag
     </description\s*>(\n)? #end tag
     ''', re.IGNORECASE | re.DOTALL | re.VERBOSE)
 
 # Regular expression for finding <BFE_ > tags in format templates
 pattern_tag = re.compile(r'''
     <BFE_                        #every special tag starts with <BFE_ (no matter case)
     (?P<function_name>[^/\s]+)   #any char but a space or slash
     \s*                          #any number of spaces
     (?P<params>(\s*              #params here
      (?P<param>([^=\s])*)\s*     #param name: any chars that is not a white space or equality. Followed by space(s)
      =\s*                        #equality: = followed by any number of spaces
      (?P<sep>[\'"])              #one of the separators
      (?P<value>.*?)              #param value: any chars that is not a separator like previous one
      (?P=sep)                    #same separator as starting one
     )*)                          #many params
     \s*                          #any number of spaces
     (/)?>                        #end of the tag
     ''', re.IGNORECASE | re.DOTALL | re.VERBOSE)
 
 # Regular expression for finding params inside <BFE_ > tags in format templates
 pattern_function_params = re.compile('''
     (?P<param>([^=\s])*)\s*  # Param name: any chars that is not a white space or equality. Followed by space(s)
     =\s*                     # Equality: = followed by any number of spaces
     (?P<sep>[\'"])           # One of the separators
     (?P<value>.*?)           # Param value: any chars that is not a separator like previous one
     (?P=sep)                 # Same separator as starting one
     ''', re.VERBOSE | re.DOTALL )
 
 # Regular expression for finding format elements "params" attributes
 # (defined by @param)
 pattern_format_element_params = re.compile('''
     @param\s*                          # Begins with AT param keyword followed by space(s)
     (?P<name>[^\s=]*):\s*              # A single keyword and comma, then space(s)
     #(=\s*(?P<sep>[\'"])               # Equality, space(s) and then one of the separators
     #(?P<default>.*?)                  # Default value: any chars that is not a separator like previous one
     #(?P=sep)                          # Same separator as starting one
     #)?\s*                             # Default value for param is optional. Followed by space(s)
     (?P<desc>.*)                       # Any text that is not end of line (thanks to MULTILINE parameter)
     ''', re.VERBOSE | re.MULTILINE)
 
 # Regular expression for finding format elements "see also" attribute
 # (defined by @see)
 pattern_format_element_seealso = re.compile('''@see:\s*(?P<see>.*)''',
                                             re.VERBOSE | re.MULTILINE)
 
 #Regular expression for finding 2 expressions in quotes, separated by
 #comma (as in template("1st","2nd") )
 #Used when parsing output formats
 ## pattern_parse_tuple_in_quotes = re.compile('''
 ##      (?P<sep1>[\'"])
 ##      (?P<val1>.*)
 ##      (?P=sep1)
 ##      \s*,\s*
 ##      (?P<sep2>[\'"])
 ##      (?P<val2>.*)
 ##      (?P=sep2)
 ##      ''', re.VERBOSE | re.MULTILINE)
 
 def call_old_bibformat(recID, of="HD", on_the_fly=False, verbose=0):
     """
     FIXME: REMOVE FUNCTION WHEN MIGRATION IS DONE
     Calls BibFormat for the record RECID in the desired output format 'of'.
 
     Note: this functions always try to return HTML, so when
     bibformat returns XML with embedded HTML format inside the tag
     FMT $g, as is suitable for prestoring output formats, we
     perform un-XML-izing here in order to return HTML body only.
 
     @param recID: record ID to format
     @param of: output format to be used for formatting
     @param on_the_fly: if False, try to return an already preformatted version of the record in the database
     @param verbose: verbosity
     @return: a formatted output using old BibFormat
     """
 
     out = ""
     res = []
     if not on_the_fly:
         # look for formatted record existence:
         query = "SELECT value, last_updated FROM bibfmt WHERE "\
                 "id_bibrec='%s' AND format='%s'" % (recID, of)
         res = run_sql(query, None, 1)
     if res:
         # record 'recID' is formatted in 'of', so print it
         if verbose == 9:
             last_updated = res[0][1]
             out += """\n<br/><span class="quicknote">
             Found preformatted output for record %i (cache updated on %s).
             </span>""" % (recID, last_updated)
         decompress = zlib.decompress
         return "%s" % decompress(res[0][0])
     else:
         # record 'recID' is not formatted in 'of',
         # so try to call BibFormat on the fly or use default format:
         if verbose == 9:
             out += """\n<br/><span class="quicknote">
             Formatting record %i on-the-fly with old BibFormat.
             </span><br/>""" % recID
 
         # Retrieve MARCXML
         # Build it on-the-fly only if 'call_old_bibformat' was called
         # with format=xm and on_the_fly=True
         xm_record = record_get_xml(recID, 'xm',
                                    on_the_fly=(on_the_fly and of == 'xm'))
 
 ##         import platform
 ##         # Some problem have been found using either popen() or os.system().
 ##         # Here is a temporary workaround until the issue is solved.
 ##         if platform.python_compiler().find('Red Hat') > -1:
 ##             # use os.system
         (result_code, result_path) = tempfile.mkstemp()
         command = "( %s/bibformat otype=%s )  > %s" % \
                                      (CFG_BINDIR, of, result_path)
         (xm_code, xm_path) = tempfile.mkstemp()
         xm_file = open(xm_path, "w")
         xm_file.write(xm_record)
         xm_file.close()
         command = command + " <" + xm_path
         os.system(command)
         result_file = open(result_path,"r")
         bibformat_output = result_file.read()
         result_file.close()
         os.close(result_code)
         os.remove(result_path)
         os.close(xm_code)
         os.remove(xm_path)
 ##         else:
 ##             # use popen
 ##         pipe_input, pipe_output, pipe_error = os.popen3(["%s/bibformat" % CFG_BINDIR,
 ##                                                         "otype=%s" % format],
 ##                                                         'rw')
 ##         pipe_input.write(xm_record)
 ##         pipe_input.flush()
 ##         pipe_input.close()
 ##         bibformat_output = pipe_output.read()
 ##         pipe_output.close()
 ##         pipe_error.close()
 
         if bibformat_output.startswith("<record>"):
             dom = minidom.parseString(bibformat_output)
             for e in dom.getElementsByTagName('subfield'):
                 if e.getAttribute('code') == 'g':
                     for t in e.childNodes:
                         out += t.data.encode('utf-8')
         else:
             out += bibformat_output
         return out
 
 def format_record(recID, of, ln=CFG_SITE_LANG, verbose=0,
                   search_pattern=None, xml_record=None, user_info=None):
     """
     Formats a record given output format. Main entry function of
     bibformat engine.
 
     Returns a formatted version of the record in the specified
     language, search pattern, and with the specified output format.
     The function will define which format template must be applied.
 
     You can either specify an record ID to format, or give its xml
     representation.  if 'xml_record' is not None, then use it instead
     of recID.
 
     'user_info' allows to grant access to some functionalities on a
     page depending on the user's priviledges. 'user_info' is the same
     object as the one returned by 'webuser.collect_user_info(req)'
 
     @param recID: the ID of record to format
     @param of: an output format code (or short identifier for the output format)
     @param ln: the language to use to format the record
     @param verbose: the level of verbosity from 0 to 9 (O: silent,
                                                        5: errors,
                                                        7: errors and warnings, stop if error in format elements
                                                        9: errors and warnings, stop if error (debug mode ))
     @param search_pattern: list of strings representing the user request in web interface
     @param xml_record: an xml string representing the record to format
     @param user_info: the information of the user who will view the formatted page
     @return: formatted record
     """
     if search_pattern is None:
         search_pattern = []
 
     out = ""
     errors_ = []
     # Temporary workflow (during migration of formats):
     # Call new BibFormat
     # But if format not found for new BibFormat, then call old BibFormat
 
     #Create a BibFormat Object to pass that contain record and context
     bfo = BibFormatObject(recID, ln, search_pattern, xml_record, user_info, of)
 
     if of.lower() != 'xm' and \
            (not bfo.get_record() or len(bfo.get_record()) <= 1):
         # Record only has recid: do not format, excepted
         # for xm format
         return ""
 
     #Find out which format template to use based on record and output format.
     template = decide_format_template(bfo, of)
     if verbose == 9 and template is not None:
         out += """\n<br/><span class="quicknote">
         Using %s template for record %i.
         </span>""" % (template, recID)
 
     ############### FIXME: REMOVE WHEN MIGRATION IS DONE ###############
     path = "%s%s%s" % (CFG_BIBFORMAT_TEMPLATES_PATH, os.sep, template)
     if template is None or not os.access(path, os.R_OK):
         # template not found in new BibFormat. Call old one
         if verbose == 9:
             if template is None:
                 out += """\n<br/><span class="quicknote">
                 No template found for output format %s and record %i.
                 (Check invenio.err log file for more details)
                 </span>""" % (of, recID)
             else:
                 out += """\n<br/><span class="quicknote">
                 Template %s could not be read.
                 </span>""" % (template)
         if CFG_PATH_PHP:
             if verbose == 9:
                 out += """\n<br/><span class="quicknote">
                 Using old BibFormat for record %s.
                 </span>""" % recID
             return out + call_old_bibformat(recID, of=of, on_the_fly=True,
                                             verbose=verbose)
     ############################# END ##################################
 
         error = get_msgs_for_code_list([("ERR_BIBFORMAT_NO_TEMPLATE_FOUND", of)],
                                        stream='error', ln=CFG_SITE_LANG)
         errors_.append(error)
         if verbose == 0:
             register_errors(error, 'error')
         elif verbose > 5:
             return out + error[0][1]
         return out
 
     # Format with template
     (out_, errors) = format_with_format_template(template, bfo, verbose)
     errors_.extend(errors)
 
     out += out_
 
     return out
 
 def decide_format_template(bfo, of):
     """
     Returns the format template name that should be used for formatting
     given output format and L{BibFormatObject}.
 
     Look at of rules, and take the first matching one.
     If no rule matches, returns None
 
     To match we ignore lettercase and spaces before and after value of
     rule and value of record
 
     @param bfo: a L{BibFormatObject}
     @param of: the code of the output format to use
     @return: name of a format template
     """
 
     output_format = get_output_format(of)
 
     for rule in output_format['rules']:
         if rule['field'].startswith('00'):
             # Rule uses controlfield
             values = [bfo.control_field(rule['field']).strip()] #Remove spaces
         else:
             # Rule uses datafield
             values = bfo.fields(rule['field'])
         # loop over multiple occurences, but take the first match
         if len(values) > 0:
             for value in values:
                 value = value.strip() #Remove spaces
                 pattern = rule['value'].strip() #Remove spaces
                 match_obj = re.match(pattern, value, re.IGNORECASE)
                 if match_obj is not None and \
                        match_obj.end() == len(value):
                     return rule['template']
     template = output_format['default']
     if template != '':
         return template
     else:
         return None
 
 def format_with_format_template(format_template_filename, bfo,
                                 verbose=0, format_template_code=None):
     """ Format a record given a
     format template. Also returns errors
 
     Returns a formatted version of the record represented by bfo,
     in the language specified in bfo, and with the specified format template.
 
     If format_template_code is provided, the template will not be loaded from
     format_template_filename (but format_template_filename will still be used to
     determine if bft or xsl transformation applies). This allows to preview format
     code without having to save file on disk.
 
     @param format_template_filename: the dilename of a format template
     @param bfo: the object containing parameters for the current formatting
     @param format_template_code: if not empty, use code as template instead of reading format_template_filename (used for previews)
     @param verbose: the level of verbosity from 0 to 9 (O: silent,
                                                        5: errors,
                                                        7: errors and warnings,
                                                        9: errors and warnings, stop if error (debug mode ))
     @return: tuple (formatted text, errors)
     """
     _ = gettext_set_language(bfo.lang)
 
     def translate(match):
         """
         Translate matching values
         """
         word = match.group("word")
         translated_word = _(word)
         return translated_word
 
     errors_ = []
     if format_template_code is not None:
         format_content = str(format_template_code)
     else:
         format_content = get_format_template(format_template_filename)['code']
 
     if format_template_filename is None or \
            format_template_filename.endswith("."+CFG_BIBFORMAT_FORMAT_TEMPLATE_EXTENSION):
         # .bft
         filtered_format = filter_languages(format_content, bfo.lang)
         localized_format = translation_pattern.sub(translate, filtered_format)
 
         (evaluated_format, errors) = eval_format_template_elements(localized_format,
                                                                    bfo,
                                                                    verbose)
         errors_ = errors
     else:
         #.xsl
         if bfo.xml_record:
             # bfo was initialized with a custom MARCXML
             xml_record = '<?xml version="1.0" encoding="UTF-8"?>\n' + \
                          record_xml_output(bfo.record)
         else:
             # Fetch MARCXML. On-the-fly xm if we are now formatting in xm
             xml_record = '<?xml version="1.0" encoding="UTF-8"?>\n' + \
                          record_get_xml(bfo.recID, 'xm', on_the_fly=False)
 
         # Transform MARCXML using stylesheet
         evaluated_format = format(xml_record, template_source=format_content)
 
     return (evaluated_format, errors_)
 
 
 def eval_format_template_elements(format_template, bfo, verbose=0):
     """
     Evalutes the format elements of the given template and replace each element with its value.
     Also returns errors.
 
     Prepare the format template content so that we can directly replace the marc code by their value.
     This implies:
       1. Look for special tags
       2. replace special tags by their evaluation
 
     @param format_template: the format template code
     @param bfo: the object containing parameters for the current formatting
     @param verbose: the level of verbosity from 0 to 9 (O: silent,
                     5: errors, 7: errors and warnings,
                     9: errors and warnings, stop if error (debug mode ))
     @return: tuple (result, errors)
     """
     errors_ = []
 
     # First define insert_element_code(match), used in re.sub() function
     def insert_element_code(match):
         """
         Analyses 'match', interpret the corresponding code, and return the result of the evaluation.
 
         Called by substitution in 'eval_format_template_elements(...)'
 
         @param match: a match object corresponding to the special tag that must be interpreted
         """
 
         function_name = match.group("function_name")
         try:
             format_element = get_format_element(function_name, verbose)
         except Exception, e:
             if verbose >= 5:
                 return '<b><span style="color: rgb(255, 0, 0);">' + \
                        cgi.escape(str(e)).replace('\n', '<br/>') + \
                        '</span>'
         if format_element is None:
             error = get_msgs_for_code_list([("ERR_BIBFORMAT_CANNOT_RESOLVE_ELEMENT_NAME", function_name)],
                                            stream='error', ln=CFG_SITE_LANG)
             errors_.append(error)
             if verbose >= 5:
                 return '<b><span style="color: rgb(255, 0, 0);">' + \
                        error[0][1]+'</span></b>'
         else:
             params = {}
             # Look for function parameters given in format template code
             all_params = match.group('params')
             if all_params is not None:
                 function_params_iterator = pattern_function_params.finditer(all_params)
                 for param_match in function_params_iterator:
                     name = param_match.group('param')
                     value = param_match.group('value')
                     params[name] = value
 
             # Evaluate element with params and return (Do not return errors)
             (result, errors) = eval_format_element(format_element,
                                                    bfo,
                                                    params,
                                                    verbose)
             errors_.append(errors)
             return result
 
 
     # Substitute special tags in the format by our own text.
     # Special tags have the form <BNE_format_element_name [param="value"]* />
     format = pattern_tag.sub(insert_element_code, format_template)
 
     return (format, errors_)
 
 
 def eval_format_element(format_element, bfo, parameters=None, verbose=0):
     """
     Returns the result of the evaluation of the given format element
     name, with given L{BibFormatObject} and parameters. Also returns
     the errors of the evaluation.
 
     @param format_element: a format element structure as returned by get_format_element
     @param bfo: a L{BibFormatObject} used for formatting
     @param parameters: a dict of parameters to be used for formatting. Key is parameter and value is value of parameter
     @param verbose: the level of verbosity from 0 to 9 (O: silent,
                                                        5: errors,
                                                        7: errors and warnings,
                                                        9: errors and warnings, stop if error (debug mode ))
 
     @return: tuple (result, errors)
     """
     if parameters is None:
         parameters = {}
 
     errors = []
     #Load special values given as parameters
     prefix = parameters.get('prefix', "")
     suffix = parameters.get('suffix', "")
     default_value = parameters.get('default', "")
     escape = parameters.get('escape', "")
     output_text = ''
 
     # 3 possible cases:
     # a) format element file is found: we execute it
     # b) format element file is not found, but exist in tag table (e.g. bfe_isbn)
     # c) format element is totally unknown. Do nothing or report error
 
     if format_element is not None and format_element['type'] == "python":
         # a) We found an element with the tag name, of type "python"
         # Prepare a dict 'params' to pass as parameter to 'format'
         # function of element
         params = {}
 
         # Look for parameters defined in format element
         # Fill them with specified default values and values
         # given as parameters.
         # Also remember if the element overrides the 'escape'
         # parameter
         format_element_overrides_escape = False
         for param in format_element['attrs']['params']:
             name = param['name']
             default = param['default']
             params[name] = parameters.get(name, default)
             if name == 'escape':
                 format_element_overrides_escape = True
 
         # Add BibFormatObject
         params['bfo'] = bfo
 
         # Execute function with given parameters and return result.
         function = format_element['code']
 
         try:
             output_text = apply(function, (), params)
         except Exception, e:
             name = format_element['attrs']['name']
             error = ("ERR_BIBFORMAT_EVALUATING_ELEMENT", name, str(params))
             errors.append(error)
             if verbose == 0:
                 register_errors(errors, 'error')
             elif verbose >= 5:
                 tb = sys.exc_info()[2]
                 error_string = get_msgs_for_code_list(error,
                                                       stream='error',
                                                       ln=CFG_SITE_LANG)
                 stack = traceback.format_exception(Exception, e, tb, limit=None)
                 output_text = '<b><span style="color: rgb(255, 0, 0);">'+ \
                               str(error_string[0][1]) + "".join(stack) +'</span></b> '
 
         # None can be returned when evaluating function
         if output_text is None:
             output_text = ""
         else:
             output_text = str(output_text)
 
         # Escaping:
         # (1) By default, everything is escaped in mode 1
         # (2) If evaluated element has 'escape_values()' function, use
         #     its returned value as escape mode, and override (1)
         # (3) If template has a defined parameter 'escape' (in allowed
         #     values), use it, and override (1) and (2). If this
         #     'escape' parameter is overriden by the format element
         #     (defined in the 'format' function of the element), leave
         #     the escaping job to this element
 
         # (1)
         escape_mode = 1
 
         # (2)
         escape_function = format_element['escape_function']
         if escape_function is not None:
             try:
                 escape_mode = apply(escape_function, (), {'bfo': bfo})
             except Exception, e:
                 error = ("ERR_BIBFORMAT_EVALUATING_ELEMENT_ESCAPE", name)
                 errors.append(error)
                 if verbose == 0:
                     register_errors(errors, 'error')
                 elif verbose >= 5:
                     tb = sys.exc_info()[2]
                     error_string = get_msgs_for_code_list(error,
                                                           stream='error',
                                                           ln=CFG_SITE_LANG)
                     output_text += '<b><span style="color: rgb(255, 0, 0);">'+ \
                                    str(error_string[0][1]) +'</span></b> '
         # (3)
         if escape in ['0', '1', '2', '3', '4', '5', '6', '7']:
             escape_mode = int(escape)
 
         # If escape is equal to 1, then escape all
         # HTML reserved chars.
         if escape_mode > 0 and not format_element_overrides_escape:
             output_text = escape_field(output_text, mode=escape_mode)
 
         # Add prefix and suffix if they have been given as parameters and if
         # the evaluation of element is not empty
         if output_text.strip() != "":
             output_text = prefix + output_text + suffix
 
         # Add the default value if output_text is empty
         if output_text == "":
             output_text = default_value
 
         return (output_text, errors)
 
     elif format_element is not None and format_element['type'] == "field":
         # b) We have not found an element in files that has the tag
         # name. Then look for it in the table "tag"
         #
         # <BFE_LABEL_IN_TAG prefix = "" suffix = "" separator = ""
         #                   nbMax="" escape="0"/>
         #
 
         # Load special values given as parameters
         separator = parameters.get('separator ', "")
         nbMax = parameters.get('nbMax', "")
         escape = parameters.get('escape', "1") # By default, escape here
 
         # Get the fields tags that have to be printed
         tags = format_element['attrs']['tags']
 
         output_text = []
 
         # Get values corresponding to tags
         for tag in tags:
             p_tag = parse_tag(tag)
             values = record_get_field_values(bfo.get_record(),
                                              p_tag[0],
                                              p_tag[1],
                                              p_tag[2],
                                              p_tag[3])
             if len(values)>0 and isinstance(values[0], dict):
                 #flatten dict to its values only
                 values_list = map(lambda x: x.values(), values)
                 #output_text.extend(values)
                 for values in values_list:
                     output_text.extend(values)
             else:
                 output_text.extend(values)
 
         if nbMax != "":
             try:
                 nbMax = int(nbMax)
                 output_text = output_text[:nbMax]
             except:
                 name = format_element['attrs']['name']
                 error = ("ERR_BIBFORMAT_NBMAX_NOT_INT", name)
                 errors.append(error)
                 if verbose < 5:
                     register_errors(error, 'error')
                 elif verbose >= 5:
                     error_string = get_msgs_for_code_list(error,
                                                           stream='error',
                                                           ln=CFG_SITE_LANG)
                     output_text = output_text.append(error_string[0][1])
 
 
 
         # Add prefix and suffix if they have been given as parameters and if
         # the evaluation of element is not empty.
         # If evaluation is empty string, return default value if it exists.
         # Else return empty string
         if ("".join(output_text)).strip() != "":
             # If escape is equal to 1, then escape all
             # HTML reserved chars.
             if escape == '1':
                 output_text = cgi.escape(separator.join(output_text))
             else:
                 output_text = separator.join(output_text)
 
             output_text = prefix + output_text + suffix
         else:
             #Return default value
             output_text = default_value
 
         return (output_text, errors)
     else:
         # c) Element is unknown
         error = get_msgs_for_code_list([("ERR_BIBFORMAT_CANNOT_RESOLVE_ELEMENT_NAME", format_element)],
                                        stream='error', ln=CFG_SITE_LANG)
         errors.append(error)
         if verbose < 5:
             register_errors(error, 'error')
             return ("", errors)
         elif verbose >= 5:
             if verbose >= 9:
                 sys.exit(error[0][1])
             return ('<b><span style="color: rgb(255, 0, 0);">' + \
                     error[0][1]+'</span></b>', errors)
 
 
 def filter_languages(format_template, ln='en'):
     """
     Filters the language tags that do not correspond to the specified language.
 
     @param format_template: the format template code
     @param ln: the language that is NOT filtered out from the template
     @return: the format template with unnecessary languages filtered out
     """
     # First define search_lang_tag(match) and clean_language_tag(match), used
     # in re.sub() function
     def search_lang_tag(match):
         """
         Searches for the <lang>...</lang> tag and remove inner localized tags
         such as <en>, <fr>, that are not current_lang.
 
         If current_lang cannot be found inside <lang> ... </lang>, try to use 'CFG_SITE_LANG'
 
         @param match: a match object corresponding to the special tag that must be interpreted
         """
         current_lang = ln
         def clean_language_tag(match):
             """
             Return tag text content if tag language of match is output language.
 
             Called by substitution in 'filter_languages(...)'
 
             @param match: a match object corresponding to the special tag that must be interpreted
             """
             if match.group(1) == current_lang:
                 return match.group(2)
             else:
                 return ""
             # End of clean_language_tag
 
 
         lang_tag_content = match.group("langs")
         # Try to find tag with current lang. If it does not exists,
         # then current_lang becomes CFG_SITE_LANG until the end of this
         # replace
         pattern_current_lang = re.compile(r"<("+current_lang+ \
                                           r")\s*>(.*?)(</"+current_lang+r"\s*>)", re.IGNORECASE | re.DOTALL)
         if re.search(pattern_current_lang, lang_tag_content) is None:
             current_lang = CFG_SITE_LANG
 
         cleaned_lang_tag = ln_pattern.sub(clean_language_tag, lang_tag_content)
         return cleaned_lang_tag
         # End of search_lang_tag
 
 
     filtered_format_template = pattern_lang.sub(search_lang_tag, format_template)
     return filtered_format_template
 
 def get_format_template(filename, with_attributes=False):
     """
     Returns the structured content of the given formate template.
 
     if 'with_attributes' is true, returns the name and description. Else 'attrs' is not
     returned as key in dictionary (it might, if it has already been loaded previously)::
       {'code':"<b>Some template code</b>"
        'attrs': {'name': "a name", 'description': "a description"}
       }
 
     @param filename: the filename of an format template
     @param with_attributes: if True, fetch the attributes (names and description) for format'
     @return: strucured content of format template
     """
 
     # Get from cache whenever possible
     global format_templates_cache
 
     if not filename.endswith("."+CFG_BIBFORMAT_FORMAT_TEMPLATE_EXTENSION) and \
            not filename.endswith(".xsl"):
         return None
 
     if format_templates_cache.has_key(filename):
         # If we must return with attributes and template exist in
         # cache with attributes then return cache.
         # Else reload with attributes
         if with_attributes and \
                format_templates_cache[filename].has_key('attrs'):
             return format_templates_cache[filename]
 
     format_template = {'code':""}
     try:
 
         path = "%s%s%s" % (CFG_BIBFORMAT_TEMPLATES_PATH, os.sep, filename)
 
         format_file = open(path)
         format_content = format_file.read()
         format_file.close()
 
         # Load format template code
         # Remove name and description
         if filename.endswith("."+CFG_BIBFORMAT_FORMAT_TEMPLATE_EXTENSION):
             code_and_description = pattern_format_template_name.sub("",
                                                                     format_content, 1)
             code = pattern_format_template_desc.sub("", code_and_description, 1)
         else:
             code = format_content
 
         format_template['code'] = code
 
     except Exception, e:
         errors = get_msgs_for_code_list([("ERR_BIBFORMAT_CANNOT_READ_TEMPLATE_FILE", filename, str(e))],
                                         stream='error', ln=CFG_SITE_LANG)
         register_errors(errors, 'error')
 
     # Save attributes if necessary
     if with_attributes:
         format_template['attrs'] = get_format_template_attrs(filename)
 
     # Cache and return
     format_templates_cache[filename] = format_template
     return format_template
 
 
 def get_format_templates(with_attributes=False):
     """
     Returns the list of all format templates, as dictionary with filenames as keys
 
     if 'with_attributes' is true, returns the name and description. Else 'attrs' is not
     returned as key in each dictionary (it might, if it has already been loaded previously)::
 
       [{'code':"<b>Some template code</b>"
         'attrs': {'name': "a name", 'description': "a description"}
        },
       ...
       }
 
     @param with_attributes: if True, fetch the attributes (names and description) for formats
     @return: the list of format templates (with code and info)
     """
     format_templates = {}
     files = os.listdir(CFG_BIBFORMAT_TEMPLATES_PATH)
 
     for filename in files:
         if filename.endswith("."+CFG_BIBFORMAT_FORMAT_TEMPLATE_EXTENSION) or \
                filename.endswith(".xsl"):
             format_templates[filename] = get_format_template(filename,
                                                              with_attributes)
 
     return format_templates
 
 def get_format_template_attrs(filename):
     """
     Returns the attributes of the format template with given filename
 
     The attributes are {'name', 'description'}
     Caution: the function does not check that path exists or
     that the format element is valid.
     @param filename: the name of a format template
     @return: a structure with detailed information about given format template
     """
     attrs = {}
     attrs['name'] = ""
     attrs['description'] = ""
     try:
         template_file = open("%s%s%s" % (CFG_BIBFORMAT_TEMPLATES_PATH,
                                          os.sep,
                                          filename))
         code = template_file.read()
         template_file.close()
 
         match = None
         if filename.endswith(".xsl"):
             # .xsl
             attrs['name'] = filename[:-4]
         else:
             # .bft
             match = pattern_format_template_name.search(code)
             if match is not None:
                 attrs['name'] = match.group('name')
             else:
                 attrs['name'] = filename
 
 
             match = pattern_format_template_desc.search(code)
             if match is not None:
                 attrs['description'] = match.group('desc').rstrip('.')
     except Exception, e:
         errors = get_msgs_for_code_list([("ERR_BIBFORMAT_CANNOT_READ_TEMPLATE_FILE",
                                           filename, str(e))],
                                         stream='error', ln=CFG_SITE_LANG)
         register_errors(errors, 'error')
         attrs['name'] = filename
 
     return attrs
 
 
 def get_format_element(element_name, verbose=0, with_built_in_params=False):
     """
     Returns the format element structured content.
 
     Return None if element cannot be loaded (file not found, not readable or
     invalid)
 
     The returned structure is::
       {'attrs': {some attributes in dict. See get_format_element_attrs_from_*}
       'code': the_function_code,
       'type':"field" or "python" depending if element is defined in file or table,
       'escape_function': the function to call to know if element output must be escaped}
 
     @param element_name: the name of the format element to load
     @param verbose: the level of verbosity from 0 to 9 (O: silent,
                                                        5: errors,
                                                        7: errors and warnings,
                                                        9: errors and warnings, stop if error (debug mode ))
     @param with_built_in_params: if True, load the parameters built in all elements
     @return: a dictionary with format element attributes
     """
     # Get from cache whenever possible
     global format_elements_cache
 
     errors = []
 
     # Resolve filename and prepare 'name' as key for the cache
     filename = resolve_format_element_filename(element_name)
     if filename is not None:
         name = filename.upper()
     else:
         name = element_name.upper()
 
     if format_elements_cache.has_key(name):
         element = format_elements_cache[name]
         if not with_built_in_params or \
                (with_built_in_params and \
                 element['attrs'].has_key('builtin_params')):
             return element
 
     if filename is None:
         # Element is maybe in tag table
         if bibformat_dblayer.tag_exists_for_name(element_name):
             format_element = {'attrs': get_format_element_attrs_from_table( \
                 element_name,
                 with_built_in_params),
                               'code':None,
                               'escape_function':None,
                               'type':"field"}
             # Cache and returns
             format_elements_cache[name] = format_element
             return format_element
 
         else:
             errors = get_msgs_for_code_list([("ERR_BIBFORMAT_FORMAT_ELEMENT_NOT_FOUND",
                                               element_name)],
                                             stream='error', ln=CFG_SITE_LANG)
             if verbose == 0:
                 register_errors(errors, 'error')
             elif verbose >= 5:
                 sys.stderr.write(errors[0][1])
             return None
 
     else:
         format_element = {}
 
         module_name = filename
         if module_name.endswith(".py"):
             module_name = module_name[:-3]
 
         # Load element
         try:
             module = __import__(CFG_BIBFORMAT_ELEMENTS_IMPORT_PATH + \
                                 "." + module_name)
             # Load last module in import path
             # For eg. load bfe_name in
             # invenio.bibformat_elements.bfe_name
             # Used to keep flexibility regarding where elements
             # directory is (for eg. test cases)
             components = CFG_BIBFORMAT_ELEMENTS_IMPORT_PATH.split(".")
             for comp in components[1:]:
                 module = getattr(module, comp)
 
         except Exception, e:
             # We catch all exceptions here, as we just want to print
             # traceback in all cases
             tb = sys.exc_info()[2]
             stack = traceback.format_exception(Exception, e, tb, limit=None)
             errors = get_msgs_for_code_list([("ERR_BIBFORMAT_IN_FORMAT_ELEMENT",
                                               element_name,"\n" + "\n".join(stack[-2:-1]))],
                                             stream='error', ln=CFG_SITE_LANG)
             if verbose == 0:
                 register_errors(errors, 'error')
             elif verbose >= 5:
                 sys.stderr.write(errors[0][1])
 
         if errors:
             if verbose >= 7:
                 raise Exception, errors[0][1]
             return None
 
         # Load function 'format_element()' inside element
         try:
             function_format  = module.__dict__[module_name].format_element
             format_element['code'] = function_format
         except AttributeError, e:
             # Try to load 'format()' function
             try:
                 function_format  = module.__dict__[module_name].format
                 format_element['code'] = function_format
             except AttributeError, e:
                 errors = get_msgs_for_code_list([("ERR_BIBFORMAT_FORMAT_ELEMENT_FORMAT_FUNCTION",
                                                   element_name)],
                                                 stream='error', ln=CFG_SITE_LANG)
                 if verbose == 0:
                     register_errors(errors, 'error')
                 elif verbose >= 5:
                     sys.stderr.write(errors[0][1])
 
         if errors:
             if verbose >= 7:
                 raise Exception, errors[0][1]
             return None
 
         # Load function 'escape_values()' inside element
         function_escape  = getattr(module.__dict__[module_name],
                                    'escape_values',
                                    None)
         format_element['escape_function'] = function_escape
 
         # Prepare, cache and return
         format_element['attrs'] = get_format_element_attrs_from_function( \
                 function_format,
                 element_name,
                 with_built_in_params)
         format_element['type'] = "python"
         format_elements_cache[name] = format_element
         return format_element
 
 def get_format_elements(with_built_in_params=False):
     """
     Returns the list of format elements attributes as dictionary structure
 
     Elements declared in files have priority over element declared in 'tag' table
     The returned object has this format::
       {element_name1: {'attrs': {'description':..., 'seealso':...
                                'params':[{'name':..., 'default':..., 'description':...}, ...]
                                'builtin_params':[{'name':..., 'default':..., 'description':...}, ...]
                               },
                      'code': code_of_the_element
                     },
        element_name2: {...},
        ...}
 
      Returns only elements that could be loaded (not error in code)
 
     @return: a dict of format elements with name as key, and a dict as attributes
     @param with_built_in_params: if True, load the parameters built in all elements
     """
     format_elements = {}
 
     mappings = bibformat_dblayer.get_all_name_tag_mappings()
 
     for name in mappings:
         format_elements[name.upper().replace(" ", "_").strip()] = get_format_element(name, with_built_in_params=with_built_in_params)
 
     files = os.listdir(CFG_BIBFORMAT_ELEMENTS_PATH)
     for filename in files:
         filename_test = filename.upper().replace(" ", "_")
         if filename_test.endswith(".PY") and filename.upper() != "__INIT__.PY":
             if filename_test.startswith("BFE_"):
                 filename_test = filename_test[4:]
             element_name = filename_test[:-3]
             element = get_format_element(element_name,
                                          with_built_in_params=with_built_in_params)
             if element is not None:
                 format_elements[element_name] = element
 
     return format_elements
 
 def get_format_element_attrs_from_function(function, element_name,
                                            with_built_in_params=False):
     """
     Returns the attributes of the function given as parameter.
 
     It looks for standard parameters of the function, default
     values and comments in the docstring.
 
     The attributes are::
                         {'name' : "name of element" #basically the name of 'name' parameter
                         'description': "a string description of the element",
                         'seealso' : ["element_1.py", "element_2.py", ...] #a list of related elements
                         'params': [{'name':"param_name",   #a list of parameters for this element (except 'bfo')
                                     'default':"default value",
                                     'description': "a description"}, ...],
                         'builtin_params': {name: {'name':"param_name",#the parameters builtin for all elem of this kind
                                             'default':"default value",
                                             'description': "a description"}, ...},
                         }
     @param function: the formatting function of a format element
     @param element_name: the name of the element
     @param with_built_in_params: if True, load the parameters built in all elements
     @return: a structure with detailed information of a function
     """
 
     attrs = {}
     attrs['description'] = ""
     attrs['name'] = element_name.replace(" ", "_").upper()
     attrs['seealso'] = []
 
     docstring = function.__doc__
     if isinstance(docstring, str):
         # Look for function description in docstring
         #match = pattern_format_element_desc.search(docstring)
         description = docstring.split("@param")[0]
         description = description.split("@see:")[0]
         attrs['description'] = description.strip().rstrip('.')
 
         # Look for @see: in docstring
         match = pattern_format_element_seealso.search(docstring)
         if match is not None:
             elements = match.group('see').rstrip('.').split(",")
             for element in elements:
                 attrs['seealso'].append(element.strip())
 
     params = {}
     # Look for parameters in function definition
     (args, varargs, varkw, defaults) = inspect.getargspec(function)
 
     # Prepare args and defaults_list such that we can have a mapping
     # from args to defaults
     args.reverse()
     if defaults is not None:
         defaults_list = list(defaults)
         defaults_list.reverse()
     else:
         defaults_list = []
 
     for arg, default in map(None, args, defaults_list):
         if arg == "bfo":
             #Don't keep this as parameter. It is hidden to users, and
             #exists in all elements of this kind
             continue
         param = {}
         param['name'] = arg
         if default is None:
             #In case no check is made inside element, we prefer to
             #print "" (nothing) than None in output
             param['default'] = ""
         else:
             param['default'] = default
         param['description'] = "(no description provided)"
 
         params[arg] = param
 
     if isinstance(docstring, str):
         # Look for AT param descriptions in docstring.
         # Add description to existing parameters in params dict
         params_iterator = pattern_format_element_params.finditer(docstring)
         for match in params_iterator:
             name = match.group('name')
             if params.has_key(name):
                 params[name]['description'] = match.group('desc').rstrip('.')
 
     attrs['params'] = params.values()
 
     # Load built-in parameters if necessary
     if with_built_in_params:
 
         builtin_params = []
         # Add 'prefix' parameter
         param_prefix = {}
         param_prefix['name'] = "prefix"
         param_prefix['default'] = ""
         param_prefix['description'] = """A prefix printed only if the
                                          record has a value for this element"""
         builtin_params.append(param_prefix)
 
         # Add 'suffix' parameter
         param_suffix = {}
         param_suffix['name'] = "suffix"
         param_suffix['default'] = ""
         param_suffix['description'] = """A suffix printed only if the
                                          record has a value for this element"""
         builtin_params.append(param_suffix)
 
         # Add 'default' parameter
         param_default = {}
         param_default['name'] = "default"
         param_default['default'] = ""
         param_default['description'] = """A default value printed if the
                                           record has no value for this element"""
         builtin_params.append(param_default)
 
         # Add 'escape' parameter
         param_escape = {}
         param_escape['name'] = "escape"
         param_escape['default'] = ""
         param_escape['description'] = """0 keeps value as it is. Refer to main
                                          documentation for escaping modes
                                          1 to 7"""
         builtin_params.append(param_escape)
 
         attrs['builtin_params'] = builtin_params
 
     return attrs
 
 def get_format_element_attrs_from_table(element_name,
                                         with_built_in_params=False):
     """
     Returns the attributes of the format element with given name in 'tag' table.
 
     Returns None if element_name does not exist in tag table.
 
     The attributes are::
                        {'name' : "name of element" #basically the name of 'element_name' parameter
                         'description': "a string description of the element",
                         'seealso' : [] #a list of related elements. Always empty in this case
                         'params': [],  #a list of parameters for this element. Always empty in this case
                         'builtin_params': [{'name':"param_name", #the parameters builtin for all elem of this kind
                                             'default':"default value",
                                             'description': "a description"}, ...],
                         'tags':["950.1", 203.a] #the list of tags printed by this element
                        }
 
     @param element_name: an element name in database
     @param element_name: the name of the element
     @param with_built_in_params: if True, load the parameters built in all elements
     @return: a structure with detailed information of an element found in DB
     """
 
     attrs = {}
     tags = bibformat_dblayer.get_tags_from_name(element_name)
     field_label = "field"
     if len(tags)>1:
         field_label = "fields"
 
     attrs['description'] = "Prints %s %s of the record" % (field_label,
                                                            ", ".join(tags))
     attrs['name'] = element_name.replace(" ", "_").upper()
     attrs['seealso'] = []
     attrs['params'] = []
     attrs['tags'] = tags
 
     # Load built-in parameters if necessary
     if with_built_in_params:
         builtin_params = []
 
         # Add 'prefix' parameter
         param_prefix = {}
         param_prefix['name'] = "prefix"
         param_prefix['default'] = ""
         param_prefix['description'] = """A prefix printed only if the
                                        record has a value for this element"""
         builtin_params.append(param_prefix)
 
         # Add 'suffix' parameter
         param_suffix = {}
         param_suffix['name'] = "suffix"
         param_suffix['default'] = ""
         param_suffix['description'] = """A suffix printed only if the
                                          record has a value for this element"""
         builtin_params.append(param_suffix)
 
         # Add 'separator' parameter
         param_separator = {}
         param_separator['name'] = "separator"
         param_separator['default'] = " "
         param_separator['description'] = """A separator between elements of
                                             the field"""
         builtin_params.append(param_separator)
 
         # Add 'nbMax' parameter
         param_nbMax = {}
         param_nbMax['name'] = "nbMax"
         param_nbMax['default'] = ""
         param_nbMax['description'] = """The maximum number of values to
                                       print for this element. No limit if not
                                       specified"""
         builtin_params.append(param_nbMax)
 
         # Add 'default' parameter
         param_default = {}
         param_default['name'] = "default"
         param_default['default'] = ""
         param_default['description'] = """A default value printed if the
                                           record has no value for this element"""
         builtin_params.append(param_default)
 
         # Add 'escape' parameter
         param_escape = {}
         param_escape['name'] = "escape"
         param_escape['default'] = ""
         param_escape['description'] = """If set to 1, replaces special
                                          characters '&', '<' and '>' of this
                                          element by SGML entities"""
         builtin_params.append(param_escape)
 
         attrs['builtin_params'] = builtin_params
 
     return attrs
 
 def get_output_format(code, with_attributes=False, verbose=0):
     """
     Returns the structured content of the given output format
 
     If 'with_attributes' is true, also returns the names and description of the output formats,
     else 'attrs' is not returned in dict (it might, if it has already been loaded previously).
 
     if output format corresponding to 'code' is not found return an empty structure.
 
     See get_output_format_attrs() to learn more about the attributes::
 
         {'rules': [ {'field': "980__a",
                      'value': "PREPRINT",
                      'template': "filename_a.bft",
                     },
                     {...}
                   ],
          'attrs': {'names': {'generic':"a name", 'sn':{'en': "a name", 'fr':"un nom"}, 'ln':{'en':"a long name"}}
                    'description': "a description"
                    'code': "fnm1",
                    'content_type': "application/ms-excel",
                    'visibility': 1
                   }
          'default':"filename_b.bft"
         }
 
     @param code: the code of an output_format
     @param with_attributes: if True, fetch the attributes (names and description) for format
     @param verbose: the level of verbosity from 0 to 9 (O: silent,
                                                        5: errors,
                                                        7: errors and warnings,
                                                        9: errors and warnings, stop if error (debug mode ))
     @return: strucured content of output format
     """
 
     output_format = {'rules':[], 'default':""}
     filename = resolve_output_format_filename(code, verbose)
 
     if filename is None:
         errors = get_msgs_for_code_list([("ERR_BIBFORMAT_OUTPUT_FORMAT_CODE_UNKNOWN", code)],
                                         stream='error', ln=CFG_SITE_LANG)
         register_errors(errors, 'error')
         if with_attributes: #Create empty attrs if asked for attributes
             output_format['attrs'] = get_output_format_attrs(code, verbose)
         return output_format
 
     # Get from cache whenever possible
     global format_outputs_cache
     if format_outputs_cache.has_key(filename):
         # If was must return with attributes but cache has not
         # attributes, then load attributes
         if with_attributes and not \
                format_outputs_cache[filename].has_key('attrs'):
             format_outputs_cache[filename]['attrs'] = get_output_format_attrs(code, verbose)
 
         return format_outputs_cache[filename]
 
     try:
         if with_attributes:
             output_format['attrs'] = get_output_format_attrs(code, verbose)
 
         path = "%s%s%s" % (CFG_BIBFORMAT_OUTPUTS_PATH, os.sep, filename )
         format_file = open(path)
 
         current_tag = ''
         for line in format_file:
             line = line.strip()
             if line == "":
                 # Ignore blank lines
                 continue
             if line.endswith(":"):
                 # Retrieve tag
 
                 # Remove : spaces and eol at the end of line
                 clean_line = line.rstrip(": \n\r")
                 # The tag starts at second position
                 current_tag = "".join(clean_line.split()[1:]).strip()
             elif line.find('---') != -1:
                 words = line.split('---')
                 template = words[-1].strip()
                 condition = ''.join(words[:-1])
                 value = ""
 
                 output_format['rules'].append({'field': current_tag,
                                                'value': condition,
                                                'template': template,
                                                })
 
             elif line.find(':') != -1:
                 # Default case
                 default = line.split(':')[1].strip()
                 output_format['default'] = default
 
     except Exception, e:
         errors = get_msgs_for_code_list([("ERR_BIBFORMAT_CANNOT_READ_OUTPUT_FILE", filename, str(e))],
                                         stream='error', ln=CFG_SITE_LANG)
         register_errors(errors, 'error')
 
     # Cache and return
     format_outputs_cache[filename] = output_format
     return output_format
 
 def get_output_format_attrs(code, verbose=0):
     """
     Returns the attributes of an output format.
 
     The attributes contain 'code', which is the short identifier of the output format
     (to be given as parameter in format_record function to specify the output format),
     'description', a description of the output format, 'visibility' the visibility of
     the format in the output format list on public pages and 'names', the localized names
     of the output format. If 'content_type' is specified then the search_engine will
     send a file with this content type and with result of formatting as content to the user.
     The 'names' dict always contais 'generic', 'ln' (for long name) and 'sn' (for short names)
     keys. 'generic' is the default name for output format. 'ln' and 'sn' contain long and short
     localized names of the output format. Only the languages for which a localization exist
     are used::
 
         {'names': {'generic':"a name", 'sn':{'en': "a name", 'fr':"un nom"}, 'ln':{'en':"a long name"}}
          'description': "a description"
          'code': "fnm1",
          'content_type': "application/ms-excel",
          'visibility': 1
         }
 
     @param code: the short identifier of the format
     @param verbose: the level of verbosity from 0 to 9 (O: silent,
                                                        5: errors,
                                                        7: errors and warnings,
                                                        9: errors and warnings, stop if error (debug mode ))
     @return: strucured content of output format attributes
     """
     if code.endswith("."+CFG_BIBFORMAT_FORMAT_OUTPUT_EXTENSION):
         code = code[:-(len(CFG_BIBFORMAT_FORMAT_OUTPUT_EXTENSION) + 1)]
     attrs = {'names':{'generic':"",
                       'ln':{},
                       'sn':{}},
              'description':'',
              'code':code.upper(),
              'content_type':"",
              'visibility':1}
 
     filename = resolve_output_format_filename(code, verbose)
     if filename is None:
         return attrs
 
     attrs['names'] = bibformat_dblayer.get_output_format_names(code)
     attrs['description'] = bibformat_dblayer.get_output_format_description(code)
     attrs['content_type'] = bibformat_dblayer.get_output_format_content_type(code)
     attrs['visibility'] = bibformat_dblayer.get_output_format_visibility(code)
 
     return attrs
 
 def get_output_formats(with_attributes=False):
     """
     Returns the list of all output format, as a dictionary with their filename as key
 
     If 'with_attributes' is true, also returns the names and description of the output formats,
     else 'attrs' is not returned in dicts (it might, if it has already been loaded previously).
 
     See get_output_format_attrs() to learn more on the attributes::
 
         {'filename_1.bfo': {'rules': [ {'field': "980__a",
                                         'value': "PREPRINT",
                                         'template': "filename_a.bft",
                                        },
                                        {...}
                                      ],
                             'attrs': {'names': {'generic':"a name", 'sn':{'en': "a name", 'fr':"un nom"}, 'ln':{'en':"a long name"}}
                                       'description': "a description"
                                       'code': "fnm1"
                                      }
                             'default':"filename_b.bft"
                            },
 
          'filename_2.bfo': {...},
           ...
         }
     @param with_attributes: if returned output formats contain detailed info, or not
     @type with_attributes: boolean
     @return: the list of output formats
     """
     output_formats = {}
     files = os.listdir(CFG_BIBFORMAT_OUTPUTS_PATH)
 
     for filename in files:
         if filename.endswith("."+CFG_BIBFORMAT_FORMAT_OUTPUT_EXTENSION):
             code = "".join(filename.split(".")[:-1])
             output_formats[filename] = get_output_format(code, with_attributes)
 
     return output_formats
 
 def resolve_format_element_filename(element_name):
     """
     Returns the filename of element corresponding to x{element_name}
 
     This is necessary since format templates code call
     elements by ignoring case, for eg. <BFE_AUTHOR> is the
     same as <BFE_author>.
     It is also recommended that format elements filenames are
     prefixed with bfe_ . We need to look for these too.
 
     The name of the element has to start with "BFE_".
 
     @param element_name: a name for a format element
     @return: the corresponding filename, with right case
     """
 
     if not element_name.endswith(".py"):
         name = element_name.replace(" ", "_").upper() +".PY"
     else:
         name = element_name.replace(" ", "_").upper()
 
     files = os.listdir(CFG_BIBFORMAT_ELEMENTS_PATH)
     for filename in files:
         test_filename = filename.replace(" ", "_").upper()
 
         if test_filename == name or \
         test_filename == "BFE_" + name or \
         "BFE_" + test_filename == name:
             return filename
 
     # No element with that name found
     # Do not log error, as it might be a normal execution case:
     # element can be in database
     return None
 
 def resolve_output_format_filename(code, verbose=0):
     """
     Returns the filename of output corresponding to code
 
     This is necessary since output formats names are not case sensitive
     but most file systems are.
 
     @param code: the code for an output format
     @param verbose: the level of verbosity from 0 to 9 (O: silent,
                                                        5: errors,
                                                        7: errors and warnings,
                                                        9: errors and warnings, stop if error (debug mode ))
     @return: the corresponding filename, with right case, or None if not found
     """
     #Remove non alphanumeric chars (except . and _)
     code = re.sub(r"[^.0-9a-zA-Z_]", "", code)
     if not code.endswith("."+CFG_BIBFORMAT_FORMAT_OUTPUT_EXTENSION):
         code = re.sub(r"\W", "", code)
         code += "."+CFG_BIBFORMAT_FORMAT_OUTPUT_EXTENSION
 
     files = os.listdir(CFG_BIBFORMAT_OUTPUTS_PATH)
     for filename in files:
         if filename.upper() == code.upper():
             return filename
 
     # No output format with that name found
     errors = get_msgs_for_code_list([("ERR_BIBFORMAT_CANNOT_RESOLVE_OUTPUT_NAME", code)],
                                     stream='error', ln=CFG_SITE_LANG)
     if verbose == 0:
         register_errors(errors, 'error')
     elif verbose >= 5:
         sys.stderr.write(errors[0][1])
         if verbose >= 9:
             sys.exit(errors[0][1])
     return None
 
 def get_fresh_format_template_filename(name):
     """
     Returns a new filename and name for template with given name.
 
     Used when writing a new template to a file, so that the name
     has no space, is unique in template directory
 
     Returns (unique_filename, modified_name)
 
     @param name: name for a format template
     @return: the corresponding filename, and modified name if necessary
     """
     #name = re.sub(r"\W", "", name) #Remove non alphanumeric chars
     name = name.replace(" ", "_")
     filename = name
     # Remove non alphanumeric chars (except .)
     filename = re.sub(r"[^.0-9a-zA-Z]", "", filename)
     path = CFG_BIBFORMAT_TEMPLATES_PATH + os.sep + filename \
            + "." + CFG_BIBFORMAT_FORMAT_TEMPLATE_EXTENSION
     index = 1
     while os.path.exists(path):
         index += 1
         filename = name + str(index)
         path = CFG_BIBFORMAT_TEMPLATES_PATH + os.sep + filename \
                + "." + CFG_BIBFORMAT_FORMAT_TEMPLATE_EXTENSION
 
     if index > 1:
         returned_name = (name + str(index)).replace("_", " ")
     else:
         returned_name = name.replace("_", " ")
 
     return (filename + "." + CFG_BIBFORMAT_FORMAT_TEMPLATE_EXTENSION,
             returned_name) #filename.replace("_", " "))
 
 def get_fresh_output_format_filename(code):
     """
     Returns a new filename for output format with given code.
 
     Used when writing a new output format to a file, so that the code
     has no space, is unique in output format directory. The filename
     also need to be at most 6 chars long, as the convention is that
     filename == output format code (+ .extension)
     We return an uppercase code
     Returns (unique_filename, modified_code)
 
     @param code: the code of an output format
     @return: the corresponding filename, and modified code if necessary
     """
     #code = re.sub(r"\W", "", code) #Remove non alphanumeric chars
     code = code.upper().replace(" ", "_")
     # Remove non alphanumeric chars (except . and _)
     code = re.sub(r"[^.0-9a-zA-Z_]", "", code)
     if len(code) > 6:
         code = code[:6]
 
     filename = code
     path = CFG_BIBFORMAT_OUTPUTS_PATH + os.sep + filename \
            + "." + CFG_BIBFORMAT_FORMAT_OUTPUT_EXTENSION
     index = 2
     while os.path.exists(path):
         filename = code + str(index)
         if len(filename) > 6:
             filename = code[:-(len(str(index)))]+str(index)
         index += 1
         path = CFG_BIBFORMAT_OUTPUTS_PATH + os.sep + filename \
                + "." + CFG_BIBFORMAT_FORMAT_OUTPUT_EXTENSION
         # We should not try more than 99999... Well I don't see how we
         # could get there.. Sanity check.
         if index >= 99999:
             errors = get_msgs_for_code_list([("ERR_BIBFORMAT_NB_OUTPUTS_LIMIT_REACHED", code)],
                                             stream='error', ln=CFG_SITE_LANG)
             register_errors(errors, 'error')
             sys.exit("Output format cannot be named as %s"%code)
 
     return (filename + "." + CFG_BIBFORMAT_FORMAT_OUTPUT_EXTENSION, filename)
 
 def clear_caches():
     """
     Clear the caches (Output Format, Format Templates and Format Elements)
 
     @return: None
     """
     global format_templates_cache, format_elements_cache, format_outputs_cache
     format_templates_cache = {}
     format_elements_cache = {}
     format_outputs_cache = {}
 
 class BibFormatObject:
     """
     An object that encapsulates a record and associated methods, and that is given
     as parameter to all format elements 'format' function.
     The object is made specifically for a given formatting, i.e. it includes
     for example the language for the formatting.
 
     The object provides basic accessors to the record. For full access, one can get
     the record with get_record() and then use BibRecord methods on the returned object.
     """
     # The record
     record = None
 
     # The language in which the formatting has to be done
     lang = CFG_SITE_LANG
 
     # A list of string describing the context in which the record has
     # to be formatted.
     # It represents the words of the user request in web interface search
     search_pattern = []
 
     # The id of the record
     recID = 0
 
     # The information about the user, as returned by
     # 'webuser.collect_user_info(req)'
     user_info = None
 
     # The format in which the record is being formatted
     output_format = ''
 
     req = None # DEPRECATED: use bfo.user_info instead. Used by WebJournal.
 
     def __init__(self, recID, ln=CFG_SITE_LANG, search_pattern=None,
                  xml_record=None, user_info=None, output_format=''):
         """
         Creates a new bibformat object, with given record.
 
         You can either specify an record ID to format, or give its xml representation.
         if 'xml_record' is not None, use 'xml_record' instead of recID for the record.
 
         'user_info' allows to grant access to some functionalities on
         a page depending on the user's priviledges. It is a dictionary
         in the following form::
 
             user_info = {
                 'remote_ip' : '',
                 'remote_host' : '',
                 'referer' : '',
                 'uri' : '',
                 'agent' : '',
                 'uid' : -1,
                 'nickname' : '',
                 'email' : '',
                 'group' : [],
                 'guest' : '1'
                 }
 
         @param recID: the id of a record
         @param ln: the language in which the record has to be formatted
         @param search_pattern: list of string representing the request used by the user in web interface
         @param xml_record: a xml string of the record to format
         @param user_info: the information of the user who will view the formatted page
         @param output_format: the output_format used for formatting this record
         """
         self.xml_record = None # *Must* remain empty if recid is given
         if xml_record is not None:
             # If record is given as parameter
             self.xml_record = xml_record
             self.record = create_record(xml_record)[0]
             recID = record_get_field_value(self.record, "001")
 
         self.lang = wash_language(ln)
         if search_pattern is None:
             search_pattern = []
         self.search_pattern = search_pattern
         self.recID = recID
         self.output_format = output_format
         self.user_info = user_info
         if self.user_info is None:
             self.user_info = collect_user_info(None)
 
     def get_record(self):
         """
         Returns the record structure of this L{BibFormatObject} instance
 
         @return: the record structure as defined by BibRecord library
         """
         from invenio.search_engine import get_record
 
         # Create record if necessary
         if self.record is None:
             # on-the-fly creation if current output is xm
             self.record = get_record(self.recID)
 
         return self.record
 
     def control_field(self, tag, escape=0):
         """
         Returns the value of control field given by tag in record
 
         @param tag: the marc code of a field
         @param escape: 1 if returned value should be escaped. Else 0.
         @return: value of field tag in record
         """
         if self.get_record() is None:
             #Case where BibRecord could not parse object
             return ''
 
         p_tag = parse_tag(tag)
         field_value = record_get_field_value(self.get_record(),
                                              p_tag[0],
                                              p_tag[1],
                                              p_tag[2],
                                              p_tag[3])
         if escape == 0:
             return field_value
         else:
             return escape_field(field_value, escape)
 
     def field(self, tag, escape=0):
         """
         Returns the value of the field corresponding to tag in the
         current record.
 
         If the value does not exist, return empty string.  Else
         returns the same as bfo.fields(..)[0] (see docstring below).
 
         'escape' parameter allows to escape special characters
         of the field. The value of escape can be:
                       0. no escaping
                       1. escape all HTML characters
                       2. remove unsafe HTML tags (Eg. keep <br />)
                       3. Mix of mode 1 and 2. If value of field starts with
                       <!-- HTML -->, then use mode 2. Else use mode 1.
                       4. Remove all HTML tags
                       5. Same as 2, with more tags allowed (like <img>)
                       6. Same as 3, with more tags allowed (like <img>)
                       7. Mix of mode 0 and mode 1. If field_value
                       starts with <!--HTML-->, then use mode 0.
                       Else use mode 1.
 
         @param tag: the marc code of a field
         @param escape: 1 if returned value should be escaped. Else 0. (see above for other modes)
         @return: value of field tag in record
         """
         list_of_fields = self.fields(tag)
         if len(list_of_fields) > 0:
             # Escaping below
             if escape == 0:
                 return list_of_fields[0]
             else:
                 return escape_field(list_of_fields[0], escape)
         else:
             return ""
 
     def fields(self, tag, escape=0, repeatable_subfields_p=False):
         """
         Returns the list of values corresonding to "tag".
 
         If tag has an undefined subcode (such as 999C5),
         the function returns a list of dictionaries, whoose keys
         are the subcodes and the values are the values of tag.subcode.
         If the tag has a subcode, simply returns list of values
         corresponding to tag.
         Eg. for given MARC::
             999C5 $a value_1a $b value_1b
             999C5 $b value_2b
             999C5 $b value_3b $b value_3b_bis
 
             >>> bfo.fields('999C5b')
             >>> ['value_1b', 'value_2b', 'value_3b', 'value_3b_bis']
             >>> bfo.fields('999C5')
             >>> [{'a':'value_1a', 'b':'value_1b'},
                 {'b':'value_2b'},
                 {'b':'value_3b'}]
 
         By default the function returns only one value for each
         subfield (that is it considers that repeatable subfields are
         not allowed). It is why in the above example 'value3b_bis' is
         not shown for bfo.fields('999C5').  (Note that it is not
         defined which of value_3b or value_3b_bis is returned).  This
         is to simplify the use of the function, as most of the time
         subfields are not repeatable (in that way we get a string
         instead of a list).  You can allow repeatable subfields by
         setting 'repeatable_subfields_p' parameter to True. In
         this mode, the above example would return:
             >>> bfo.fields('999C5b', repeatable_subfields_p=True)
             >>> ['value_1b', 'value_2b', 'value_3b']
             >>> bfo.fields('999C5', repeatable_subfields_p=True)
             >>> [{'a':['value_1a'], 'b':['value_1b']},
                 {'b':['value_2b']},
                 {'b':['value_3b', 'value3b_bis']}]
 
         NOTICE THAT THE RETURNED STRUCTURE IS DIFFERENT.  Also note
         that whatever the value of 'repeatable_subfields_p' is,
         bfo.fields('999C5b') always show all fields, even repeatable
         ones. This is because the parameter has no impact on the
         returned structure (it is always a list).
 
         'escape' parameter allows to escape special characters
         of the fields. The value of escape can be:
                       0. No escaping
                       1. Escape all HTML characters
                       2. Remove unsafe HTML tags (Eg. keep <br />)
                       3. Mix of mode 1 and 2. If value of field starts with
                       <!-- HTML -->, then use mode 2. Else use mode 1.
                       4. Remove all HTML tags
                       5. Same as 2, with more tags allowed (like <img>)
                       6. Same as 3, with more tags allowed (like <img>)
                       7. Mix of mode 0 and mode 1. If field_value
                       starts with <!--HTML-->, then use mode 0.
                       Else use mode 1.
 
         @param tag: the marc code of a field
         @param escape: 1 if returned values should be escaped. Else 0.
         @repeatable_subfields_p if True, returns the list of subfields in the dictionary
         @return: values of field tag in record
         """
 
         if self.get_record() is None:
             # Case where BibRecord could not parse object
             return []
 
         p_tag = parse_tag(tag)
         if p_tag[3] != "":
             # Subcode has been defined. Simply returns list of values
             values = record_get_field_values(self.get_record(),
                                              p_tag[0],
                                              p_tag[1],
                                              p_tag[2],
                                              p_tag[3])
             if escape == 0:
                 return values
             else:
                 return [escape_field(value, escape) for value in values]
 
         else:
             # Subcode is undefined. Returns list of dicts.
             # However it might be the case of a control field.
 
             instances = record_get_field_instances(self.get_record(),
                                                    p_tag[0],
                                                    p_tag[1],
                                                    p_tag[2])
             if repeatable_subfields_p:
                 list_of_instances = []
                 for instance in instances:
                     instance_dict = {}
                     for subfield in instance[0]:
                         if not instance_dict.has_key(subfield[0]):
                             instance_dict[subfield[0]] = []
                         if escape == 0:
                             instance_dict[subfield[0]].append(subfield[1])
                         else:
                             instance_dict[subfield[0]].append(escape_field(subfield[1], escape))
                     list_of_instances.append(instance_dict)
                 return list_of_instances
             else:
                 if escape == 0:
                     return [dict(instance[0]) for instance in instances]
                 else:
                     return [dict([ (subfield[0], escape_field(subfield[1], escape)) \
                                    for subfield in instance[0] ]) \
                             for instance in instances]
 
     def kb(self, kb, string, default=""):
         """
         Returns the value of the "string" in the knowledge base "kb".
 
         If kb does not exist or string does not exist in kb,
         returns 'default' string or empty string if not specified.
 
         @param kb: a knowledge base name
         @param string: the string we want to translate
         @param default: a default value returned if 'string' not found in 'kb'
         @return: a string value corresponding to translated input with given kb
         """
         if not string:
             return default
 
         val = get_kbr_values(kb, searchkey=string, searchtype='e')
 
         try:
             return val[0][0]
         except:
             return default
 
 def escape_field(value, mode=0):
     """
     Utility function used to escape the value of a field in given mode.
 
       - mode 0: no escaping
       - mode 1: escaping all HTML/XML characters (escaped chars are shown as escaped)
       - mode 2: escaping unsafe HTML tags to avoid XSS, but
         keep basic one (such as <br />)
         Escaped tags are removed.
       - mode 3: mix of mode 1 and mode 2. If field_value starts with <!--HTML-->,
         then use mode 2. Else use mode 1.
       - mode 4: escaping all HTML/XML tags (escaped tags are removed)
       - mode 5: same as 2, but allows more tags, like <img>
       - mode 6: same as 3, but allows more tags, like <img>
       - mode 7: mix of mode 0 and mode 1. If field_value starts with <!--HTML-->,
         then use mode 0. Else use mode 1.
 
     @param value: value to escape
     @param mode: escaping mode to use
     @return: an escaped version of X{value} according to chosen X{mode}
     """
     if mode == 1:
         return cgi.escape(value)
     elif mode in [2, 5]:
-        allowed_attribute_whitelist = cfg_html_buffer_allowed_attribute_whitelist
-        allowed_tag_whitelist = cfg_html_buffer_allowed_tag_whitelist + \
+        allowed_attribute_whitelist = CFG_HTML_BUFFER_ALLOWED_ATTRIBUTE_WHITELIST
+        allowed_tag_whitelist = CFG_HTML_BUFFER_ALLOWED_TAG_WHITELIST + \
                                 ('class',)
         if mode == 5:
             allowed_attribute_whitelist += ('src', 'alt',
                                             'width', 'height',
                                             'style', 'summary',
                                             'border', 'cellspacing',
                                             'cellpadding')
             allowed_tag_whitelist += ('img', 'table', 'td',
                                       'tr', 'th', 'span', 'caption')
         try:
             return washer.wash(value,
                                allowed_attribute_whitelist=\
                                allowed_attribute_whitelist,
                                allowed_tag_whitelist= \
                                allowed_tag_whitelist
                                )
         except HTMLParseError:
             # Parsing failed
             return cgi.escape(value)
     elif mode in [3, 6]:
         if value.lstrip(' \n').startswith(html_field):
-            allowed_attribute_whitelist = cfg_html_buffer_allowed_attribute_whitelist
-            allowed_tag_whitelist = cfg_html_buffer_allowed_tag_whitelist + \
+            allowed_attribute_whitelist = CFG_HTML_BUFFER_ALLOWED_ATTRIBUTE_WHITELIST
+            allowed_tag_whitelist = CFG_HTML_BUFFER_ALLOWED_TAG_WHITELIST + \
                                     ('class',)
             if mode == 6:
                 allowed_attribute_whitelist += ('src', 'alt',
                                                 'width', 'height',
                                                 'style', 'summary',
                                                 'border', 'cellspacing',
                                                 'cellpadding')
                 allowed_tag_whitelist += ('img', 'table', 'td',
                                           'tr', 'th', 'span', 'caption')
             try:
                 return washer.wash(value,
                                    allowed_attribute_whitelist=\
                                    allowed_attribute_whitelist,
                                    allowed_tag_whitelist=\
                                    allowed_tag_whitelist
                                    )
             except HTMLParseError:
                 # Parsing failed
                 return cgi.escape(value)
         else:
             return cgi.escape(value)
     elif mode == 4:
         try:
             return washer.wash(value,
                                allowed_attribute_whitelist=[],
                                allowed_tag_whitelist=[]
                                )
         except HTMLParseError:
             # Parsing failed
             return cgi.escape(value)
     elif mode == 7:
         if value.lstrip(' \n').startswith(html_field):
             return value
         else:
             return cgi.escape(value)
     else:
         return value
 
 def bf_profile():
     """
     Runs a benchmark
 
     @return: None
     """
     for i in range(1, 51):
         format_record(i, "HD", ln=CFG_SITE_LANG, verbose=9, search_pattern=[])
     return
 
 if __name__ == "__main__":
     import profile
     import pstats
     #bf_profile()
     profile.run('bf_profile()', "bibformat_profile")
     p = pstats.Stats("bibformat_profile")
     p.strip_dirs().sort_stats("cumulative").print_stats()
 
diff --git a/modules/bibformat/lib/elements/Makefile.am b/modules/bibformat/lib/elements/Makefile.am
index 7f9bf81c3..2e15d1b24 100644
--- a/modules/bibformat/lib/elements/Makefile.am
+++ b/modules/bibformat/lib/elements/Makefile.am
@@ -1,46 +1,47 @@
 ## This file is part of Invenio.
 ## Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011 CERN.
 ##
 ## Invenio is free software; you can redistribute it and/or
 ## modify it under the terms of the GNU General Public License as
 ## published by the Free Software Foundation; either version 2 of the
 ## License, or (at your option) any later version.
 ##
 ## Invenio is distributed in the hope that it will be useful, but
 ## WITHOUT ANY WARRANTY; without even the implied warranty of
 ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 ## General Public License for more details.
 ##
 ## You should have received a copy of the GNU General Public License
 ## along with Invenio; if not, write to the Free Software Foundation, Inc.,
 ## 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA.
 
 pylibdir=$(libdir)/python/invenio/bibformat_elements
 
 pylib_DATA = bfe_field.py bfe_title.py bfe_authors.py bfe_abstract.py bfe_affiliation.py \
 	     bfe_imprint.py bfe_fulltext.py bfe_place.py bfe_publisher.py bfe_topbanner.py \
 	     bfe_date_rec.py bfe_keywords.py bfe_notes.py bfe_reprints.py bfe_publi_info.py \
 	     bfe_cited_by.py bfe_references.py bfe_title_brief.py \
 	     bfe_report_numbers.py bfe_additional_report_numbers.py bfe_url.py \
 	     bfe_addresses.py bfe_contact.py bfe_photo_resources_brief.py \
 	     bfe_collection.py bfe_editors.py bfe_bibtex.py bfe_edit_record.py \
 	     bfe_date.py bfe_xml_record.py bfe_external_publications.py __init__.py \
 	     bfe_bfx_engine.py bfe_creation_date.py bfe_server_info.py bfe_issn.py \
 	     bfe_client_info.py bfe_language.py bfe_record_id.py bfe_comments.py \
 	     bfe_pagination.py bfe_fulltext_mini.py bfe_year.py bfe_isbn.py \
 	     bfe_appears_in_collections.py bfe_photos.py bfe_record_stats.py \
 	     bfe_edit_files.py bfe_plots.py bfe_plots_thumb.py bfe_sword_push.py \
 	     bfe_video_sources.py bfe_video_bigthumb.py \
 	     bfe_aid_authors.py bfe_doi.py bfe_addthis.py \
 	     bfe_duration.py bfe_record_url.py bfe_video_selector.py \
 	     bfe_video_platform_downloads.py bfe_video_platform_suggestions.py \
-	     bfe_video_platform_sources.py bfe_sciencewise.py bfe_bookmark.py
+	     bfe_video_platform_sources.py bfe_sciencewise.py bfe_bookmark.py \
+         bfe_oai_marcxml.py
 
 tmpdir = $(prefix)/var/tmp/tests_bibformat_elements
 
 tmp_DATA = test_1.py bfe_test_2.py bfe_test_4.py test3.py test_5.py \
 	   test_no_element.test __init__.py
 
 EXTRA_DIST = $(pylib_DATA) $(tmp_DATA)
 
 CLEANFILES = *~ *.tmp *.pyc
diff --git a/modules/bibformat/lib/elements/bfe_oai_marcxml.py b/modules/bibformat/lib/elements/bfe_oai_marcxml.py
new file mode 100644
index 000000000..fc8555690
--- /dev/null
+++ b/modules/bibformat/lib/elements/bfe_oai_marcxml.py
@@ -0,0 +1,49 @@
+# -*- coding: utf-8 -*-
+##
+## This file is part of Invenio.
+## Copyright (C) 2011 CERN.
+##
+## Invenio is free software; you can redistribute it and/or
+## modify it under the terms of the GNU General Public License as
+## published by the Free Software Foundation; either version 2 of the
+## License, or (at your option) any later version.
+##
+## Invenio is distributed in the hope that it will be useful, but
+## WITHOUT ANY WARRANTY; without even the implied warranty of
+## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+## General Public License for more details.
+##
+## You should have received a copy of the GNU General Public License
+## along with Invenio; if not, write to the Free Software Foundation, Inc.,
+## 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA.
+"""BibFormat element - OAI ready MARCXML
+
+This element return the full MARCXML representation of a record with the marc
+prefix and namespace and adding the leader.
+"""
+
+from invenio.bibformat_dblayer import get_preformatted_record
+
+def format_element(bfo):
+    """
+    Return the MARCXML representation of the record with the marc prefix and
+    namespace and adding the leader.
+    """
+    formatted_record = get_preformatted_record(bfo.recID, 'xm')
+    formatted_record = formatted_record.replace("<record>", "<marc:record xmlns:marc=\"http://www.loc.gov/MARC21/slim\" xmlns:xsi=\"http://www.w3.org/2001/XMLSchema-instance\" xsi:schemaLocation=\"http://www.loc.gov/MARC21/slim http://www.loc.gov/standards/marcxml/schema/MARC21slim.xsd\" type=\"Bibliographic\">\n     <marc:leader>00000coc  2200000uu 4500</marc:leader>")
+    formatted_record = formatted_record.replace("<record xmlns=\"http://www.loc.gov/MARC21/slim\">", "<marc:record xmlns:marc=\"http://www.loc.gov/MARC21/slim\" xmlns:xsi=\"http://www.w3.org/2001/XMLSchema-instance\" xsi:schemaLocation=\"http://www.loc.gov/MARC21/slim http://www.loc.gov/standards/marcxml/schema/MARC21slim.xsd\" type=\"Bibliographic\">\n     <marc:leader>00000coc  2200000uu 4500</marc:leader>")
+    formatted_record = formatted_record.replace("</record", "</marc:record")
+    formatted_record = formatted_record.replace("<controlfield", "<marc:controlfield")
+    formatted_record = formatted_record.replace("</controlfield", "</marc:controlfield")
+    formatted_record = formatted_record.replace("<datafield", "<marc:datafield")
+    formatted_record = formatted_record.replace("</datafield", "</marc:datafield")
+    formatted_record = formatted_record.replace("<subfield", "<marc:subfield")
+    formatted_record = formatted_record.replace("</subfield", "</marc:subfield")
+    return formatted_record
+
+def escape_values(bfo):
+    """
+    Called by BibFormat in order to check if output of this element
+    should be escaped.
+    """
+    return 0
diff --git a/modules/bibharvest/Makefile.am b/modules/bibharvest/Makefile.am
index 25438a181..ffa9dff1a 100644
--- a/modules/bibharvest/Makefile.am
+++ b/modules/bibharvest/Makefile.am
@@ -1,24 +1,24 @@
 ## This file is part of Invenio.
 ## Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008, 2010, 2011 CERN.
 ##
 ## Invenio is free software; you can redistribute it and/or
 ## modify it under the terms of the GNU General Public License as
 ## published by the Free Software Foundation; either version 2 of the
 ## License, or (at your option) any later version.
 ##
 ## Invenio is distributed in the hope that it will be useful, but
 ## WITHOUT ANY WARRANTY; without even the implied warranty of
 ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-## General Public License for more details.  
+## General Public License for more details.
 ##
 ## You should have received a copy of the GNU General Public License
 ## along with Invenio; if not, write to the Free Software Foundation, Inc.,
 ## 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA.
 
 cachedir = $(localstatedir)/cache/RTdata
 
 cache_DATA =
 
-SUBDIRS = bin doc lib web
+SUBDIRS = bin doc etc lib web
 
 CLEANFILES = *~
diff --git a/modules/bibharvest/Makefile.am b/modules/bibharvest/etc/Makefile.am
similarity index 71%
copy from modules/bibharvest/Makefile.am
copy to modules/bibharvest/etc/Makefile.am
index 25438a181..ba5802fcd 100644
--- a/modules/bibharvest/Makefile.am
+++ b/modules/bibharvest/etc/Makefile.am
@@ -1,24 +1,26 @@
 ## This file is part of Invenio.
-## Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008, 2010, 2011 CERN.
+## Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011 CERN.
 ##
 ## Invenio is free software; you can redistribute it and/or
 ## modify it under the terms of the GNU General Public License as
 ## published by the Free Software Foundation; either version 2 of the
 ## License, or (at your option) any later version.
 ##
 ## Invenio is distributed in the hope that it will be useful, but
 ## WITHOUT ANY WARRANTY; without even the implied warranty of
 ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-## General Public License for more details.  
+## General Public License for more details.
 ##
 ## You should have received a copy of the GNU General Public License
 ## along with Invenio; if not, write to the Free Software Foundation, Inc.,
 ## 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA.
 
-cachedir = $(localstatedir)/cache/RTdata
+etcdir = $(sysconfdir)/bibharvest
+xsldir = $(localstatedir)/www/css
 
-cache_DATA =
+etc_DATA = OAI-PMH.xsd
+xsl_DATA = oai2.xsl.v1.0
 
-SUBDIRS = bin doc lib web
+EXTRA_DIST = $(etc_DATA) $(xsl_DATA)
 
-CLEANFILES = *~
+CLEANFILES = *.tmp *~
diff --git a/modules/bibharvest/etc/OAI-PMH.xsd b/modules/bibharvest/etc/OAI-PMH.xsd
new file mode 100644
index 000000000..aec50f045
--- /dev/null
+++ b/modules/bibharvest/etc/OAI-PMH.xsd
@@ -0,0 +1,317 @@
+<schema targetNamespace="http://www.openarchives.org/OAI/2.0/"
+        xmlns="http://www.w3.org/2001/XMLSchema"
+        xmlns:oai="http://www.openarchives.org/OAI/2.0/"
+        elementFormDefault="qualified"
+        attributeFormDefault="unqualified">
+
+  <annotation>
+    <documentation>
+    XML Schema which can be used to validate replies to all OAI-PMH
+    v2.0 requests. Herbert Van de Sompel, 2002-05-13.
+    Validated with XML Spy v.4.3 on 2002-05-13.
+    Validated with XSV 1.203.2.45/1.106.2.22 on 2002-05-13.
+    Added definition of protocolVersionType instead of using anonymous
+    type. No change of function. Simeon Warner, 2004-03-29.
+    Tightened definition of UTCdatetimeType to enforce the restriction
+    to UTC Z notation. Simeon Warner, 2004-09-14.
+    Corrected pattern matches for setSpecType and metadataPrefixType
+    to agree with protocol specification. Simeon Warner, 2004-10-12.
+    Spelling correction. Simeon Warner, 2008-12-07.
+    $Date: 2004/10/12 15:20:29 $
+    </documentation>
+  </annotation>
+
+  <element name="OAI-PMH" type="oai:OAI-PMHtype"/>
+
+  <complexType name="OAI-PMHtype">
+    <sequence>
+      <element name="responseDate" type="dateTime"/>
+      <element name="request" type="oai:requestType"/>
+      <choice>
+        <element name="error" type="oai:OAI-PMHerrorType" maxOccurs="unbounded"/>
+        <element name="Identify" type="oai:IdentifyType"/>
+        <element name="ListMetadataFormats" type="oai:ListMetadataFormatsType"/>
+        <element name="ListSets" type="oai:ListSetsType"/>
+        <element name="GetRecord" type="oai:GetRecordType"/>
+        <element name="ListIdentifiers" type="oai:ListIdentifiersType"/>
+        <element name="ListRecords" type="oai:ListRecordsType"/>
+      </choice>
+    </sequence>
+  </complexType>
+
+  <complexType name="requestType">
+    <annotation>
+      <documentation>Define requestType, indicating the protocol request that
+      led to the response. Element content is BASE-URL, attributes are arguments
+      of protocol request, attribute-values are values of arguments of protocol
+      request</documentation>
+    </annotation>
+    <simpleContent>
+      <extension base="anyURI">
+        <attribute name="verb" type="oai:verbType" use="optional"/>
+        <attribute name="identifier" type="oai:identifierType" use="optional"/>
+        <attribute name="metadataPrefix" type="oai:metadataPrefixType" use="optional"/>
+        <attribute name="from" type="oai:UTCdatetimeType" use="optional"/>
+        <attribute name="until" type="oai:UTCdatetimeType" use="optional"/>
+        <attribute name="set" type="oai:setSpecType" use="optional"/>
+        <attribute name="resumptionToken" type="string" use="optional"/>
+      </extension>
+    </simpleContent>
+  </complexType>
+
+  <simpleType name="verbType">
+    <restriction base="string">
+      <enumeration value="Identify"/>
+      <enumeration value="ListMetadataFormats"/>
+      <enumeration value="ListSets"/>
+      <enumeration value="GetRecord"/>
+      <enumeration value="ListIdentifiers"/>
+      <enumeration value="ListRecords"/>
+    </restriction>
+  </simpleType>
+
+  <!-- define OAI-PMH error conditions -->
+  <!-- =============================== -->
+
+  <complexType name="OAI-PMHerrorType">
+    <simpleContent>
+      <extension base="string">
+        <attribute name="code" type="oai:OAI-PMHerrorcodeType" use="required"/>
+      </extension>
+    </simpleContent>
+  </complexType>
+
+  <simpleType name="OAI-PMHerrorcodeType">
+    <restriction base="string">
+      <enumeration value="cannotDisseminateFormat"/>
+      <enumeration value="idDoesNotExist"/>
+      <enumeration value="badArgument"/>
+      <enumeration value="badVerb"/>
+      <enumeration value="noMetadataFormats"/>
+      <enumeration value="noRecordsMatch"/>
+      <enumeration value="badResumptionToken"/>
+      <enumeration value="noSetHierarchy"/>
+    </restriction>
+  </simpleType>
+
+  <!-- define OAI-PMH verb containers -->
+  <!-- ============================== -->
+
+  <complexType name="IdentifyType">
+    <sequence>
+      <element name="repositoryName" type="string"/>
+      <element name="baseURL" type="anyURI"/>
+      <element name="protocolVersion" type="oai:protocolVersionType"/>
+      <element name="adminEmail" type="oai:emailType" maxOccurs="unbounded"/>
+      <element name="earliestDatestamp" type="oai:UTCdatetimeType"/>
+      <element name="deletedRecord" type="oai:deletedRecordType"/>
+      <element name="granularity" type="oai:granularityType"/>
+      <element name="compression" type="string" minOccurs="0" maxOccurs="unbounded"/>
+      <element name="description" type="oai:descriptionType"
+               minOccurs="0" maxOccurs="unbounded"/>
+    </sequence>
+  </complexType>
+
+  <complexType name="ListMetadataFormatsType">
+    <sequence>
+      <element name="metadataFormat" type="oai:metadataFormatType" maxOccurs="unbounded"/>
+    </sequence>
+  </complexType>
+
+  <complexType name="ListSetsType">
+    <sequence>
+      <element name="set" type="oai:setType" maxOccurs="unbounded"/>
+      <element name="resumptionToken" type="oai:resumptionTokenType" minOccurs="0"/>
+    </sequence>
+  </complexType>
+
+  <complexType name="GetRecordType">
+    <sequence>
+      <element name="record" type="oai:recordType"/>
+    </sequence>
+  </complexType>
+
+  <complexType name="ListRecordsType">
+    <sequence>
+      <element name="record" type="oai:recordType" maxOccurs="unbounded"/>
+      <element name="resumptionToken" type="oai:resumptionTokenType" minOccurs="0"/>
+    </sequence>
+  </complexType>
+
+  <complexType name="ListIdentifiersType">
+    <sequence>
+      <element name="header" type="oai:headerType" maxOccurs="unbounded"/>
+      <element name="resumptionToken" type="oai:resumptionTokenType" minOccurs="0"/>
+    </sequence>
+  </complexType>
+
+  <!-- define basic types used in replies to
+       GetRecord, ListRecords, ListIdentifiers -->
+  <!-- ======================================= -->
+
+  <complexType name="recordType">
+    <annotation>
+      <documentation>A record has a header, a metadata part, and
+        an optional about container</documentation>
+    </annotation>
+    <sequence>
+      <element name="header" type="oai:headerType"/>
+      <element name="metadata" type="oai:metadataType" minOccurs="0"/>
+      <element name="about" type="oai:aboutType" minOccurs="0" maxOccurs="unbounded"/>
+    </sequence>
+  </complexType>
+
+  <complexType name="headerType">
+    <annotation>
+      <documentation>A header has a unique identifier, a datestamp,
+        and setSpec(s) in case the item from which
+        the record is disseminated belongs to set(s).
+        the header can carry a deleted status indicating
+        that the record is deleted.</documentation>
+    </annotation>
+    <sequence>
+      <element name="identifier" type="oai:identifierType"/>
+      <element name="datestamp" type="oai:UTCdatetimeType"/>
+      <element name="setSpec" type="oai:setSpecType" minOccurs="0" maxOccurs="unbounded"/>
+    </sequence>
+    <attribute name="status" type="oai:statusType" use="optional"/>
+  </complexType>
+
+  <simpleType name="identifierType">
+    <restriction base="anyURI"/>
+  </simpleType>
+
+  <simpleType name="statusType">
+    <restriction base="string">
+      <enumeration value="deleted"/>
+    </restriction>
+  </simpleType>
+
+  <complexType name="metadataType">
+    <annotation>
+      <documentation>Metadata must be expressed in XML that complies
+       with another XML Schema (namespace=#other). Metadata must be
+       explicitly qualified in the response.</documentation>
+    </annotation>
+    <sequence>
+      <any namespace="##other" processContents="lax"/>
+    </sequence>
+  </complexType>
+
+  <complexType name="aboutType">
+    <annotation>
+      <documentation>Data "about" the record must be expressed in XML
+      that is compliant with an XML Schema defined by a community.</documentation>
+    </annotation>
+    <sequence>
+      <any namespace="##other" processContents="lax"/>
+    </sequence>
+  </complexType>
+
+  <complexType name="resumptionTokenType">
+    <annotation>
+      <documentation>A resumptionToken may have 3 optional attributes
+       and can be used in ListSets, ListIdentifiers, ListRecords
+       responses.</documentation>
+    </annotation>
+    <simpleContent>
+      <extension base="string">
+        <attribute name="expirationDate" type="dateTime" use="optional"/>
+        <attribute name="completeListSize" type="positiveInteger" use="optional"/>
+        <attribute name="cursor" type="nonNegativeInteger" use="optional"/>
+      </extension>
+    </simpleContent>
+  </complexType>
+
+  <complexType name="descriptionType">
+    <annotation>
+      <documentation>The descriptionType is used for the description
+      element in Identify and for setDescription element in ListSets.
+      Content must be compliant with an XML Schema defined by a
+      community.</documentation>
+    </annotation>
+    <sequence>
+      <any namespace="##other" processContents="lax"/>
+    </sequence>
+  </complexType>
+
+  <simpleType name="UTCdatetimeType">
+    <annotation>
+      <documentation>Datestamps are to either day (type date)
+      or to seconds granularity (type oai:UTCdateTimeZType)</documentation>
+    </annotation>
+    <union memberTypes="date oai:UTCdateTimeZType"/>
+  </simpleType>
+
+  <simpleType name="UTCdateTimeZType">
+    <restriction base="dateTime">
+      <pattern value=".*Z"/>
+    </restriction>
+  </simpleType>
+
+  <!-- define types used for Identify verb only -->
+  <!-- ======================================== -->
+
+  <simpleType name="protocolVersionType">
+    <restriction base="string">
+      <enumeration value="2.0"/>
+    </restriction>
+  </simpleType>
+
+  <simpleType name="emailType">
+    <restriction base="string">
+      <pattern value="\S+@(\S+\.)+\S+"/>
+    </restriction>
+  </simpleType>
+
+  <simpleType name="deletedRecordType">
+    <restriction base="string">
+      <enumeration value="no"/>
+      <enumeration value="persistent"/>
+      <enumeration value="transient"/>
+    </restriction>
+  </simpleType>
+
+  <simpleType name="granularityType">
+    <restriction base="string">
+      <enumeration value="YYYY-MM-DD"/>
+      <enumeration value="YYYY-MM-DDThh:mm:ssZ"/>
+    </restriction>
+  </simpleType>
+
+  <!-- define types used for ListMetadataFormats verb only -->
+  <!-- =================================================== -->
+
+  <complexType name="metadataFormatType">
+    <sequence>
+      <element name="metadataPrefix" type="oai:metadataPrefixType"/>
+      <element name="schema" type="anyURI"/>
+      <element name="metadataNamespace" type="anyURI"/>
+    </sequence>
+  </complexType>
+
+  <simpleType name="metadataPrefixType">
+    <restriction base="string">
+      <pattern value="[A-Za-z0-9\-_\.!~\*'\(\)]+"/>
+    </restriction>
+  </simpleType>
+
+  <!-- define types used for ListSets verb -->
+  <!-- =================================== -->
+
+  <complexType name="setType">
+    <sequence>
+      <element name="setSpec" type="oai:setSpecType"/>
+      <element name="setName" type="string"/>
+      <element name="setDescription" type="oai:descriptionType"
+               minOccurs="0" maxOccurs="unbounded"/>
+    </sequence>
+  </complexType>
+
+  <simpleType name="setSpecType">
+    <restriction base="string">
+      <pattern value="([A-Za-z0-9\-_\.!~\*'\(\)])+(:[A-Za-z0-9\-_\.!~\*'\(\)]+)*"/>
+    </restriction>
+  </simpleType>
+
+</schema>
diff --git a/modules/bibharvest/etc/oai2.xsl.v1.0 b/modules/bibharvest/etc/oai2.xsl.v1.0
new file mode 100644
index 000000000..ee3173297
--- /dev/null
+++ b/modules/bibharvest/etc/oai2.xsl.v1.0
@@ -0,0 +1,659 @@
+<?xml version="1.0" encoding="utf-8"?>
+
+<!--
+
+  XSL Transform to convert OAI 2.0 responses into XHTML
+
+  By Christopher Gutteridge, University of Southampton
+
+-->
+
+<!-- 
+  
+Copyright (c) 2000-2004 University of Southampton, UK. SO17 1BJ.
+
+EPrints 2 is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2 of the License, or
+(at your option) any later version.
+
+EPrints 2 is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with EPrints 2; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+
+-->
+
+   
+<!--
+  
+  All the elements really needed for EPrints are done but if
+  you want to use this XSL for other OAI archive you may want
+  to make some minor changes or additions.
+
+  Not Done
+    The 'about' section of 'record'
+    The 'compession' part of 'identify'
+    The optional attributes of 'resumptionToken'
+    The optional 'setDescription' container of 'set'
+
+  All the links just link to oai_dc versions of records.
+
+-->
+<xsl:stylesheet
+    version="1.0"
+    xmlns:xsl="http://www.w3.org/1999/XSL/Transform" 
+    xmlns:oai="http://www.openarchives.org/OAI/2.0/"
+>
+
+<xsl:output method="html"/>
+
+
+
+<xsl:template name="style">
+td.value {
+	vertical-align: top;
+	padding-left: 1em;
+	padding: 3px;
+}
+td.key {
+	background-color: #e0e0ff;
+	padding: 3px;
+	text-align: right;
+	border: 1px solid #c0c0c0;
+	white-space: nowrap;
+	font-weight: bold;
+	vertical-align: top;
+}
+.dcdata td.key {
+	background-color: #ffffe0;
+}
+body { 
+	margin: 1em 2em 1em 2em;
+}
+h1, h2, h3 {
+	font-family: sans-serif;
+	clear: left;
+}
+h1 {
+	padding-bottom: 4px;
+	margin-bottom: 0px;
+}
+h2 {
+	margin-bottom: 0.5em;
+}
+h3 {
+	margin-bottom: 0.3em;
+	font-size: medium;
+}
+.link {
+	border: 1px outset #88f;
+	background-color: #c0c0ff;
+	padding: 1px 4px 1px 4px;
+	font-size: 80%;
+	text-decoration: none;
+	font-weight: bold;
+	font-family: sans-serif;
+	color: black;
+}
+.link:hover {
+	color: red;
+}
+.link:active {
+	color: red;
+	border: 1px inset #88f;
+	background-color: #a0a0df;
+}
+.oaiRecord, .oaiRecordTitle {
+	background-color: #f0f0ff;
+	border-style: solid;
+	border-color: #d0d0d0;
+}
+h2.oaiRecordTitle {
+	background-color: #e0e0ff;
+	font-size: medium;
+	font-weight: bold;
+	padding: 10px;
+	border-width: 2px 2px 0px 2px;
+	margin: 0px;
+}
+.oaiRecord {
+	margin-bottom: 3em;
+	border-width: 2px;
+	padding: 10px;
+}
+
+.results {
+	margin-bottom: 1.5em;
+}
+ul.quicklinks {
+	margin-top: 2px;
+	padding: 4px;
+	text-align: left;
+	border-bottom: 2px solid #ccc;
+	border-top: 2px solid #ccc;
+	clear: left;
+}
+ul.quicklinks li {
+	font-size: 80%;
+	display: inline;
+	list-stlye: none;
+	font-family: sans-serif;
+}
+p.intro {
+	font-size: 80%;
+}
+<xsl:call-template name='xmlstyle' />
+</xsl:template>
+
+<xsl:variable name='identifier' select="substring-before(concat(substring-after(/oai:OAI-PMH/oai:request,'identifier='),'&amp;'),'&amp;')" />
+
+<xsl:template match="/">
+<html>
+  <head>
+    <title>OAI 2.0 Request Results</title>
+    <style><xsl:call-template name="style"/></style>
+  </head>
+  <body>
+    <h1>OAI 2.0 Request Results</h1>
+    <xsl:call-template name="quicklinks"/>
+    <p class="intro">You are viewing an HTML version of the XML OAI response. To see the underlying XML use your web browsers view source option. More information about this XSLT is at the <a href="#moreinfo">bottom of the page</a>.</p>
+    <xsl:apply-templates select="/oai:OAI-PMH" />
+    <xsl:call-template name="quicklinks"/>
+    <h2><a name="moreinfo">About the XSLT</a></h2>
+    <p>An XSLT file has converted the <a href="http://www.openarchives.org">OAI-PMH 2.0</a> responses into XHTML which looks nice in a browser which supports XSLT such as Mozilla, Firebird and Internet Explorer. The XSLT file was created by <a href="http://www.ecs.soton.ac.uk/people/cjg">Christopher Gutteridge</a> at the University of Southampton as part of the <a href="http://software.eprints.org">GNU EPrints system</a>, and is freely redistributable under the <a href="http://www.gnu.org">GPL</a>.</p><p>If you want to use the XSL file on your own OAI interface you may but due to the way XSLT works you must install the XSL file on the same server as the OAI script, you can't just link to this copy.</p><p>For more information or to download the XSL file please see the <a href="http://software.eprints.org/xslt.php">OAI to XHTML XSLT homepage</a>.</p>
+
+  </body>
+</html>
+</xsl:template>
+
+<xsl:template name="quicklinks">
+    <ul class="quicklinks">
+      <li><a href="?verb=Identify">Identify</a> | </li> 
+      <li><a href="?verb=ListRecords&amp;metadataPrefix=oai_dc">ListRecords</a> | </li>
+      <li><a href="?verb=ListSets">ListSets</a> | </li>
+      <li><a href="?verb=ListMetadataFormats">ListMetadataFormats</a> | </li>
+      <li><a href="?verb=ListIdentifiers&amp;metadataPrefix=oai_dc">ListIdentifiers</a></li>
+    </ul>
+</xsl:template>
+
+
+<xsl:template match="/oai:OAI-PMH">
+  <table class="values">
+    <tr><td class="key">Datestamp of response</td>
+    <td class="value"><xsl:value-of select="oai:responseDate"/></td></tr>
+    <tr><td class="key">Request URL</td>
+    <td class="value"><xsl:value-of select="oai:request"/></td></tr>
+  </table>
+<!--  verb: [<xsl:value-of select="oai:request/@verb" />]<br /> -->
+  <xsl:choose>
+    <xsl:when test="oai:error">
+      <h2>OAI Error(s)</h2>
+      <p>The request could not be completed due to the following error or errors.</p>
+      <div class="results">
+        <xsl:apply-templates select="oai:error"/>
+      </div>
+    </xsl:when>
+    <xsl:otherwise>
+      <p>Request was of type <xsl:value-of select="oai:request/@verb"/>.</p>
+      <div class="results">
+        <xsl:apply-templates select="oai:Identify" />
+        <xsl:apply-templates select="oai:GetRecord"/>
+        <xsl:apply-templates select="oai:ListRecords"/>
+        <xsl:apply-templates select="oai:ListSets"/>
+        <xsl:apply-templates select="oai:ListMetadataFormats"/>
+        <xsl:apply-templates select="oai:ListIdentifiers"/>
+      </div>
+    </xsl:otherwise>
+  </xsl:choose>
+</xsl:template>
+
+
+<!-- ERROR -->
+
+<xsl:template match="/oai:OAI-PMH/oai:error">
+  <table class="values">
+    <tr><td class="key">Error Code</td>
+    <td class="value"><xsl:value-of select="@code"/></td></tr>
+  </table>
+  <p class="error"><xsl:value-of select="." /></p>
+</xsl:template>
+
+<!-- IDENTIFY -->
+
+<xsl:template match="/oai:OAI-PMH/oai:Identify">
+  <table class="values">
+    <tr><td class="key">Repository Name</td>
+    <td class="value"><xsl:value-of select="oai:repositoryName"/></td></tr>
+    <tr><td class="key">Base URL</td>
+    <td class="value"><xsl:value-of select="oai:baseURL"/></td></tr>
+    <tr><td class="key">Protocol Version</td>
+    <td class="value"><xsl:value-of select="oai:protocolVersion"/></td></tr>
+    <tr><td class="key">Earliest Datestamp</td>
+    <td class="value"><xsl:value-of select="oai:earliestDatestamp"/></td></tr>
+    <tr><td class="key">Deleted Record Policy</td>
+    <td class="value"><xsl:value-of select="oai:deletedRecord"/></td></tr>
+    <tr><td class="key">Granularity</td>
+    <td class="value"><xsl:value-of select="oai:granularity"/></td></tr>
+    <xsl:apply-templates select="oai:adminEmail"/>
+  </table>
+  <xsl:apply-templates select="oai:description"/>
+<!--no warning about unsupported descriptions -->
+</xsl:template>
+
+<xsl:template match="/oai:OAI-PMH/oai:Identify/oai:adminEmail">
+    <tr><td class="key">Admin Email</td>
+    <td class="value"><xsl:value-of select="."/></td></tr>
+</xsl:template>
+
+<!--
+   Identify / Unsupported Description
+-->
+
+<xsl:template match="oai:description/*" priority="-100">
+  <h2>Unsupported Description Type</h2>
+  <p>The XSL currently does not support this type of description.</p>
+  <div class="xmlSource">
+    <xsl:apply-templates select="." mode='xmlMarkup' />
+  </div>
+</xsl:template>
+
+
+<!--
+   Identify / OAI-Identifier
+-->
+
+<xsl:template match="id:oai-identifier" xmlns:id="http://www.openarchives.org/OAI/2.0/oai-identifier">
+  <h2>OAI-Identifier</h2>
+  <table class="values">
+    <tr><td class="key">Scheme</td>
+    <td class="value"><xsl:value-of select="id:scheme"/></td></tr>
+    <tr><td class="key">Repository Identifier</td>
+    <td class="value"><xsl:value-of select="id:repositoryIdentifier"/></td></tr>
+    <tr><td class="key">Delimiter</td>
+    <td class="value"><xsl:value-of select="id:delimiter"/></td></tr>
+    <tr><td class="key">Sample OAI Identifier</td>
+    <td class="value"><xsl:value-of select="id:sampleIdentifier"/></td></tr>
+  </table>
+</xsl:template>
+
+
+<!--
+   Identify / EPrints
+-->
+
+<xsl:template match="ep:eprints" xmlns:ep="http://www.openarchives.org/OAI/1.1/eprints">
+  <h2>EPrints Description</h2>
+  <h3>Content</h3>
+  <xsl:apply-templates select="ep:content"/>
+  <xsl:if test="ep:submissionPolicy">
+    <h3>Submission Policy</h3>
+    <xsl:apply-templates select="ep:submissionPolicy"/>
+  </xsl:if>
+  <h3>Metadata Policy</h3>
+  <xsl:apply-templates select="ep:metadataPolicy"/>
+  <h3>Data Policy</h3>
+  <xsl:apply-templates select="ep:dataPolicy"/>
+  <xsl:if test="ep:content">
+    <h3>Content</h3>
+    <xsl:apply-templates select="ep:content"/>
+  </xsl:if>
+  <xsl:apply-templates select="ep:comment"/>
+</xsl:template>
+
+<xsl:template match="ep:content|ep:dataPolicy|ep:metadataPolicy|ep:submissionPolicy" xmlns:ep="http://www.openarchives.org/OAI/1.1/eprints">
+  <xsl:if test="ep:text">
+    <p><xsl:value-of select="ep:text" /></p>
+  </xsl:if>
+  <xsl:if test="ep:URL">
+    <div><a href="{ep:URL}"><xsl:value-of select="ep:URL" /></a></div>
+  </xsl:if>
+</xsl:template>
+
+<xsl:template match="ep:comment" xmlns:ep="http://www.openarchives.org/OAI/1.1/eprints">
+  <h3>Comment</h3>
+  <div><xsl:value-of select="."/></div>
+</xsl:template>
+
+
+<!--
+   Identify / Friends
+-->
+
+<xsl:template match="fr:friends" xmlns:fr="http://www.openarchives.org/OAI/2.0/friends/">
+  <h2>Friends</h2>
+  <ul>
+    <xsl:apply-templates select="fr:baseURL"/>
+  </ul>
+</xsl:template>
+
+<xsl:template match="fr:baseURL" xmlns:fr="http://www.openarchives.org/OAI/2.0/friends/">
+  <li><xsl:value-of select="."/> 
+<xsl:text> </xsl:text>
+<a class="link" href="{.}?verb=Identify">Identify</a></li>
+</xsl:template>
+
+
+<!--
+   Identify / Branding
+-->
+
+<xsl:template match="br:branding" xmlns:br="http://www.openarchives.org/OAI/2.0/branding/">
+  <h2>Branding</h2>
+  <xsl:apply-templates select="br:collectionIcon"/>
+  <xsl:apply-templates select="br:metadataRendering"/>
+</xsl:template>
+
+<xsl:template match="br:collectionIcon" xmlns:br="http://www.openarchives.org/OAI/2.0/branding/">
+  <h3>Icon</h3>
+  <xsl:choose>
+    <xsl:when test="link!=''">
+      <a href="{br:link}"><img src="{br:url}" alt="{br:title}" width="{br:width}" height="{br:height}" border="0" /></a>
+    </xsl:when>
+    <xsl:otherwise>
+      <img src="{br:url}" alt="{br:title}" width="{br:width}" height="{br:height}" border="0" />
+    </xsl:otherwise>
+  </xsl:choose>
+</xsl:template>
+
+<xsl:template match="br:metadataRendering" xmlns:br="http://www.openarchives.org/OAI/2.0/branding/">
+  <h3>Metadata Rendering Rule</h3>
+  <table class="values">
+    <tr><td class="key">URL</td>
+    <td class="value"><xsl:value-of select="."/></td></tr>
+    <tr><td class="key">Namespace</td>
+    <td class="value"><xsl:value-of select="@metadataNamespace"/></td></tr>
+    <tr><td class="key">Mime Type</td>
+    <td class="value"><xsl:value-of select="@mimetype"/></td></tr>
+  </table>
+</xsl:template>
+
+
+
+<!--
+   Identify / Gateway
+-->
+
+<xsl:template match="gw:gateway" xmlns:gw="http://www.openarchives.org/OAI/2.0/gateway/x">
+  <h2>Gateway Information</h2>
+  <table class="values">
+    <tr><td class="key">Source</td>
+    <td class="value"><xsl:value-of select="gw:source"/></td></tr>
+    <tr><td class="key">Description</td>
+    <td class="value"><xsl:value-of select="gw:gatewayDescription"/></td></tr>
+    <xsl:apply-templates select="gw:gatewayAdmin"/>
+    <xsl:if test="gw:gatewayURL">
+      <tr><td class="key">URL</td>
+      <td class="value"><xsl:value-of select="gw:gatewayURL"/></td></tr>
+    </xsl:if>
+    <xsl:if test="gw:gatewayNotes">
+      <tr><td class="key">Notes</td>
+      <td class="value"><xsl:value-of select="gw:gatewayNotes"/></td></tr>
+    </xsl:if>
+  </table>
+</xsl:template>
+
+<xsl:template match="gw:gatewayAdmin" xmlns:gw="http://www.openarchives.org/OAI/2.0/gateway/">
+  <tr><td class="key">Admin</td>
+  <td class="value"><xsl:value-of select="."/></td></tr>
+</xsl:template>
+
+
+<!-- GetRecord -->
+
+<xsl:template match="oai:GetRecord">
+  <xsl:apply-templates select="oai:record" />
+</xsl:template>
+
+<!-- ListRecords -->
+
+<xsl:template match="oai:ListRecords">
+  <xsl:apply-templates select="oai:record" />
+  <xsl:apply-templates select="oai:resumptionToken" />
+</xsl:template>
+
+<!-- ListIdentifiers -->
+
+<xsl:template match="oai:ListIdentifiers">
+  <xsl:apply-templates select="oai:header" />
+  <xsl:apply-templates select="oai:resumptionToken" />
+</xsl:template>
+
+<!-- ListSets -->
+
+<xsl:template match="oai:ListSets">
+  <xsl:apply-templates select="oai:set" />
+  <xsl:apply-templates select="oai:resumptionToken" />
+</xsl:template>
+
+<xsl:template match="oai:set">
+  <h2>Set</h2>
+  <table class="values">
+    <tr><td class="key">setName</td>
+    <td class="value"><xsl:value-of select="oai:setName"/></td></tr>
+    <xsl:apply-templates select="oai:setSpec" />
+  </table>
+</xsl:template>
+
+<!-- ListMetadataFormats -->
+
+<xsl:template match="oai:ListMetadataFormats">
+  <xsl:choose>
+    <xsl:when test="$identifier">
+      <p>This is a list of metadata formats available for the record "<xsl:value-of select='$identifier' />". Use these links to view the metadata: <xsl:apply-templates select="oai:metadataFormat/oai:metadataPrefix" /></p>
+    </xsl:when>
+    <xsl:otherwise>
+      <p>This is a list of metadata formats available from this archive.</p>
+    </xsl:otherwise>
+  </xsl:choose>
+  <xsl:apply-templates select="oai:metadataFormat" />
+</xsl:template>
+
+<xsl:template match="oai:metadataFormat">
+  <h2>Metadata Format</h2>
+  <table class="values">
+    <tr><td class="key">metadataPrefix</td>
+    <td class="value"><xsl:value-of select="oai:metadataPrefix"/></td></tr>
+    <tr><td class="key">metadataNamespace</td>
+    <td class="value"><xsl:value-of select="oai:metadataNamespace"/></td></tr>
+    <tr><td class="key">schema</td>
+    <td class="value"><a href="{oai:schema}"><xsl:value-of select="oai:schema"/></a></td></tr>
+  </table>
+</xsl:template>
+
+<xsl:template match="oai:metadataPrefix">
+      <xsl:text> </xsl:text><a class="link" href="?verb=GetRecord&amp;metadataPrefix={.}&amp;identifier={$identifier}"><xsl:value-of select='.' /></a>
+</xsl:template>
+
+<!-- record object -->
+
+<xsl:template match="oai:record">
+  <h2 class="oaiRecordTitle">OAI Record: <xsl:value-of select="oai:header/oai:identifier"/></h2>
+  <div class="oaiRecord">
+    <xsl:apply-templates select="oai:header" />
+    <xsl:apply-templates select="oai:metadata" />
+    <xsl:apply-templates select="oai:about" />
+  </div>
+</xsl:template>
+
+<xsl:template match="oai:header">
+  <h3>OAI Record Header</h3>
+  <table class="values">
+    <tr><td class="key">OAI Identifier</td>
+    <td class="value">
+      <xsl:value-of select="oai:identifier"/>
+      <xsl:text> </xsl:text><a class="link" href="?verb=GetRecord&amp;metadataPrefix=oai_dc&amp;identifier={oai:identifier}">oai_dc</a>
+      <xsl:text> </xsl:text><a class="link" href="?verb=ListMetadataFormats&amp;identifier={oai:identifier}">formats</a>
+    </td></tr>
+    <tr><td class="key">Datestamp</td>
+    <td class="value"><xsl:value-of select="oai:datestamp"/></td></tr>
+  <xsl:apply-templates select="oai:setSpec" />
+  </table>
+  <xsl:if test="@status='deleted'">
+    <p>This record has been deleted.</p>
+  </xsl:if>
+</xsl:template>
+
+
+<xsl:template match="oai:about">
+  <p>"about" part of record container not supported by the XSL</p>
+</xsl:template>
+
+<xsl:template match="oai:metadata">
+  &#160;
+  <div class="metadata">
+    <xsl:apply-templates select="*" />
+  </div>
+</xsl:template>
+
+
+
+
+<!-- oai setSpec object -->
+
+<xsl:template match="oai:setSpec">
+  <tr><td class="key">setSpec</td>
+  <td class="value"><xsl:value-of select="."/>
+    <xsl:text> </xsl:text><a class="link" href="?verb=ListIdentifiers&amp;metadataPrefix=oai_dc&amp;set={.}">Identifiers</a>
+    <xsl:text> </xsl:text><a class="link" href="?verb=ListRecords&amp;metadataPrefix=oai_dc&amp;set={.}">Records</a>
+  </td></tr>
+</xsl:template>
+
+
+
+<!-- oai resumptionToken -->
+
+<xsl:template match="oai:resumptionToken">
+   <p>There are more results.</p>
+   <table class="values">
+     <tr><td class="key">resumptionToken:</td>
+     <td class="value"><xsl:value-of select="."/>
+<xsl:text> </xsl:text>
+<a class="link" href="?verb={/oai:OAI-PMH/oai:request/@verb}&amp;resumptionToken={.}">Resume</a></td></tr>
+   </table>
+</xsl:template>
+
+<!-- unknown metadata format -->
+
+<xsl:template match="oai:metadata/*" priority='-100'>
+  <h3>Unknown Metadata Format</h3>
+  <div class="xmlSource">
+    <xsl:apply-templates select="." mode='xmlMarkup' />
+  </div>
+</xsl:template>
+
+<!-- oai_dc record -->
+
+<xsl:template match="oai_dc:dc"  xmlns:oai_dc="http://www.openarchives.org/OAI/2.0/oai_dc/" >
+  <div class="dcdata">
+    <h3>Dublin Core Metadata (oai_dc)</h3>
+    <table class="dcdata">
+      <xsl:apply-templates select="*" />
+    </table>
+  </div>
+</xsl:template>
+
+<xsl:template match="dc:title" xmlns:dc="http://purl.org/dc/elements/1.1/">
+<tr><td class="key">Title</td><td class="value"><xsl:value-of select="."/></td></tr></xsl:template>
+
+<xsl:template match="dc:creator" xmlns:dc="http://purl.org/dc/elements/1.1/">
+<tr><td class="key">Author or Creator</td><td class="value"><xsl:value-of select="."/></td></tr></xsl:template>
+
+<xsl:template match="dc:subject" xmlns:dc="http://purl.org/dc/elements/1.1/">
+<tr><td class="key">Subject and Keywords</td><td class="value"><xsl:value-of select="."/></td></tr></xsl:template>
+
+<xsl:template match="dc:description" xmlns:dc="http://purl.org/dc/elements/1.1/">
+<tr><td class="key">Description</td><td class="value"><xsl:value-of select="."/></td></tr></xsl:template>
+
+<xsl:template match="dc:publisher" xmlns:dc="http://purl.org/dc/elements/1.1/">
+<tr><td class="key">Publisher</td><td class="value"><xsl:value-of select="."/></td></tr></xsl:template>
+
+<xsl:template match="dc:contributor" xmlns:dc="http://purl.org/dc/elements/1.1/">
+<tr><td class="key">Other Contributor</td><td class="value"><xsl:value-of select="."/></td></tr></xsl:template>
+
+<xsl:template match="dc:date" xmlns:dc="http://purl.org/dc/elements/1.1/">
+<tr><td class="key">Date</td><td class="value"><xsl:value-of select="."/></td></tr></xsl:template>
+
+<xsl:template match="dc:type" xmlns:dc="http://purl.org/dc/elements/1.1/">
+<tr><td class="key">Resource Type</td><td class="value"><xsl:value-of select="."/></td></tr></xsl:template>
+
+<xsl:template match="dc:format" xmlns:dc="http://purl.org/dc/elements/1.1/">
+<tr><td class="key">Format</td><td class="value"><xsl:value-of select="."/></td></tr></xsl:template>
+
+<xsl:template match="dc:identifier" xmlns:dc="http://purl.org/dc/elements/1.1/">
+<tr><td class="key">Resource Identifier</td><td class="value"><xsl:value-of select="."/></td></tr></xsl:template>
+
+<xsl:template match="dc:source" xmlns:dc="http://purl.org/dc/elements/1.1/">
+<tr><td class="key">Source</td><td class="value"><xsl:value-of select="."/></td></tr></xsl:template>
+
+<xsl:template match="dc:language" xmlns:dc="http://purl.org/dc/elements/1.1/">
+<tr><td class="key">Language</td><td class="value"><xsl:value-of select="."/></td></tr></xsl:template>
+
+<xsl:template match="dc:relation" xmlns:dc="http://purl.org/dc/elements/1.1/">
+<tr><td class="key">Relation</td><td class="value">
+  <xsl:choose>
+    <xsl:when test='starts-with(.,"http" )'>
+      <xsl:choose>
+        <xsl:when test='string-length(.) &gt; 50'>
+          <a class="link" href="{.}">URL</a>
+          <i> URL not shown as it is very long.</i>
+        </xsl:when>
+        <xsl:otherwise>
+          <a href="{.}"><xsl:value-of select="."/></a>
+        </xsl:otherwise>
+      </xsl:choose>
+    </xsl:when>
+    <xsl:otherwise>
+      <xsl:value-of select="."/>
+    </xsl:otherwise>
+  </xsl:choose>
+</td></tr></xsl:template>
+
+<xsl:template match="dc:coverage" xmlns:dc="http://purl.org/dc/elements/1.1/">
+<tr><td class="key">Coverage</td><td class="value"><xsl:value-of select="."/></td></tr></xsl:template>
+
+<xsl:template match="dc:rights" xmlns:dc="http://purl.org/dc/elements/1.1/">
+<tr><td class="key">Rights Management</td><td class="value"><xsl:value-of select="."/></td></tr></xsl:template>
+
+<!-- XML Pretty Maker -->
+
+<xsl:template match="node()" mode='xmlMarkup'>
+  <div class="xmlBlock">
+    &lt;<span class="xmlTagName"><xsl:value-of select='name(.)' /></span><xsl:apply-templates select="@*" mode='xmlMarkup'/>&gt;<xsl:apply-templates select="node()" mode='xmlMarkup' />&lt;/<span class="xmlTagName"><xsl:value-of select='name(.)' /></span>&gt;
+  </div>
+</xsl:template>
+
+<xsl:template match="text()" mode='xmlMarkup'><span class="xmlText"><xsl:value-of select='.' /></span></xsl:template>
+
+<xsl:template match="@*" mode='xmlMarkup'>
+  <xsl:text> </xsl:text><span class="xmlAttrName"><xsl:value-of select='name()' /></span>="<span class="xmlAttrValue"><xsl:value-of select='.' /></span>"
+</xsl:template>
+
+<xsl:template name="xmlstyle">
+.xmlSource {
+	font-size: 70%;
+	border: solid #c0c0a0 1px;
+	background-color: #ffffe0;
+	padding: 2em 2em 2em 0em;
+}
+.xmlBlock {
+	padding-left: 2em;
+}
+.xmlTagName {
+	color: #800000;
+	font-weight: bold;
+}
+.xmlAttrName {
+	font-weight: bold;
+}
+.xmlAttrValue {
+	color: #0000c0;
+}
+</xsl:template>
+
+</xsl:stylesheet>
+
diff --git a/modules/bibharvest/lib/Makefile.am b/modules/bibharvest/lib/Makefile.am
index fb24aab01..cab6ffe49 100644
--- a/modules/bibharvest/lib/Makefile.am
+++ b/modules/bibharvest/lib/Makefile.am
@@ -1,37 +1,38 @@
 ## This file is part of Invenio.
 ## Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011 CERN.
 ##
 ## Invenio is free software; you can redistribute it and/or
 ## modify it under the terms of the GNU General Public License as
 ## published by the Free Software Foundation; either version 2 of the
 ## License, or (at your option) any later version.
 ##
 ## Invenio is distributed in the hope that it will be useful, but
 ## WITHOUT ANY WARRANTY; without even the implied warranty of
 ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 ## General Public License for more details.
 ##
 ## You should have received a copy of the GNU General Public License
 ## along with Invenio; if not, write to the Free Software Foundation, Inc.,
 ## 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA.
 
 pylibdir = $(libdir)/python/invenio
 
 pylib_DATA = oai_repository_server.py \
              oai_repository_tests.py \
              oai_repository_webinterface.py \
              oai_repository_regression_tests.py \
+             oai_repository_config.py \
              oai_harvest_getter.py \
-	     oai_harvest_dblayer.py \
+             oai_harvest_dblayer.py \
              bibharvest_templates.py \
              oai_harvest_admin.py \
              oai_harvest_admin_regression_tests.py \
              oai_repository_admin.py \
              oai_repository_admin_regression_tests.py \
              oai_harvest_daemon.py \
              oai_repository_updater.py \
              oai_harvest_config.py
 
 EXTRA_DIST = $(pylib_DATA)
 
 CLEANFILES = *~ *.tmp *.pyc
diff --git a/modules/bibharvest/lib/oai_repository_admin.py b/modules/bibharvest/lib/oai_repository_admin.py
index 571678c68..1358d22fd 100644
--- a/modules/bibharvest/lib/oai_repository_admin.py
+++ b/modules/bibharvest/lib/oai_repository_admin.py
@@ -1,816 +1,826 @@
 ## This file is part of Invenio.
 ## Copyright (C) 2009, 2010, 2011 CERN.
 ##
 ## Invenio is free software; you can redistribute it and/or
 ## modify it under the terms of the GNU General Public License as
 ## published by the Free Software Foundation; either version 2 of the
 ## License, or (at your option) any later version.
 ##
 ## Invenio is distributed in the hope that it will be useful, but
 ## WITHOUT ANY WARRANTY; without even the implied warranty of
 ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 ## General Public License for more details.
 ##
 ## You should have received a copy of the GNU General Public License
 ## along with Invenio; if not, write to the Free Software Foundation, Inc.,
 ## 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA.
 
 """Invenio OAI Repository Administrator Interface."""
 
 __revision__ = "$Id$"
 
 import cgi
 import os
 
 from invenio.config import \
      CFG_SITE_LANG, \
      CFG_TMPDIR, \
      CFG_SITE_URL
 import invenio.access_control_engine as access_manager
 from invenio.urlutils import create_html_link
 from invenio.dbquery import run_sql
 from invenio.oai_repository_updater import parse_set_definition
 from invenio.messages import gettext_set_language
+from invenio.errorlib import register_exception
+from invenio.oai_repository_config import CFG_OAI_REPOSITORY_GLOBAL_SET_SPEC
 
 import invenio.template
 bibharvest_templates = invenio.template.load('bibharvest')
 
 tmppath = CFG_TMPDIR + '/oairepositoryadmin.' + str(os.getpid())
 guideurl = "help/admin/oai-admin-guide"
 oai_rep_admin_url = CFG_SITE_URL + \
                     "/admin/bibharvest/oairepositoryadmin.py"
 
 def getnavtrail(previous = '', ln = CFG_SITE_LANG):
     """Get navtrail"""
     return bibharvest_templates.tmpl_getnavtrail(previous = previous, ln = ln)
 
 def perform_request_index(ln=CFG_SITE_LANG):
     """OAI Repository admin index"""
 
     out = '''<p>Define below the sets to expose through the OAI harvesting
     protocol. <br /> You will have to run the
     <a href="%(siteurl)s/help/admin/oai-admin-guide?ln=%(ln)s#3.2"><code>oairepositoryupdater</code></a>
     utility to apply the settings you have defined here.</p>''' % {'siteurl': CFG_SITE_URL,
                                                                    'ln': ln}
 
     titlebar = bibharvest_templates.tmpl_draw_titlebar(ln = ln,
                                                        title = "OAI repository",
                                                        guideurl = guideurl,
                                                        extraname = "add new OAI set",
                                                        extraurl = "admin/bibharvest/oairepositoryadmin.py/addset")
 
     header = ['id', 'setSpec',
               'setName', 'collection',
               'p1', 'f1', 'm1', 'op1',
               'p2', 'f2', 'm2', 'op2',
               'p3', 'f3', 'm3', '', '']
 
     oai_set = get_oai_set()
     sets = []
 
     for (id, setSpec, setName, setCollection, \
          setDescription, p1, f1, m1, p2, f2, m2, \
          p3, f3, m3, op1, op2) in oai_set:
 
         del_request = '<a href="' + CFG_SITE_URL + "/" + \
                       "admin/bibharvest/oairepositoryadmin.py/delset?ln=" + \
                       ln + "&amp;oai_set_id=" + str(id) + '">delete</a>'
 
         edit_request = '<a href="' + CFG_SITE_URL + "/" + \
                        "admin/bibharvest/oairepositoryadmin.py/editset?ln=" + \
                        ln + "&amp;oai_set_id=" + str(id) + '">edit</a>'
 
         sets.append([id, cgi.escape(setSpec), cgi.escape(setName),
                      cgi.escape(setCollection),
                      cgi.escape(p1), f1, m1, op1,
                      cgi.escape(p2), f2, m2, op2,
                      cgi.escape(p3), f3, m3,
                      del_request, edit_request])
 
     add_request = '<a href="' + CFG_SITE_URL + "/" + \
                   "admin/bibharvest/oairepositoryadmin.py/addset?ln=" + \
                   ln + '">Add new OAI set definition</a>'
 
     sets.append(['', add_request, '', '', '', '', '',
                  '', '', '', '', '', '', '', '', '', ''])
 
     out += transform_tuple(header=header, tuple=sets)
 
     out += "<br /><br />"
 
     return out
 
 
 def perform_request_addset(oai_set_name='', oai_set_spec='',
                            oai_set_collection='',
                            oai_set_description='',
                            oai_set_definition='', oai_set_reclist='',
                            oai_set_p1='', oai_set_f1='',oai_set_m1='',
                            oai_set_p2='', oai_set_f2='',
                            oai_set_m2='', oai_set_p3='',
                            oai_set_f3='', oai_set_m3='',
                            oai_set_op1='a', oai_set_op2='a',
                            ln=CFG_SITE_LANG, func=0):
     """add a new OAI set"""
     _ = gettext_set_language(ln)
     out  = ""
 
     if func in ["0", 0]:
         text = input_form(oai_set_name, oai_set_spec,
                           oai_set_collection, oai_set_description,
                           oai_set_definition, oai_set_reclist,
                           oai_set_p1, oai_set_f1,oai_set_m1,
                           oai_set_p2, oai_set_f2,oai_set_m2,
                           oai_set_p3, oai_set_f3, oai_set_m3,
                           oai_set_op1, oai_set_op2, ln=ln)
         out = createform(action="addset",
                          text=text,
                          ln=ln,
                          button="Add new OAI set definition line",
                          func=1)
         lnargs = [["ln", ln]]
 
     if func in ["1", 1]:
         out += "<br />"
 
         res = add_oai_set(oai_set_name, oai_set_spec,
                           oai_set_collection, oai_set_description,
                           oai_set_definition, oai_set_reclist,
                           oai_set_p1, oai_set_f1, oai_set_m1,
                           oai_set_p2, oai_set_f2, oai_set_m2,
                           oai_set_p3, oai_set_f3, oai_set_m3,
                           oai_set_op1, oai_set_op2)
         if res[0] == 1:
             out += bibharvest_templates.tmpl_print_info(ln,
                                                         "OAI set definition %s added." % \
                                                         cgi.escape(oai_set_name))
             out += "<br />"
 
         out += "<br /><br />"
         out += create_html_link(urlbase=oai_rep_admin_url + \
                                 "/index",
                                 urlargd={'ln': ln},
                                 link_label=_("Return to main selection"))
 
     return nice_box("", out)
 
 
 def perform_request_editset(oai_set_id=None, oai_set_name='',
                             oai_set_spec='', oai_set_collection='',
                             oai_set_description='',
                             oai_set_definition='', oai_set_reclist='',
                             oai_set_p1='', oai_set_f1='',
                             oai_set_m1='', oai_set_p2='',
                             oai_set_f2='', oai_set_m2='',
                             oai_set_p3='', oai_set_f3='',
                             oai_set_m3='', oai_set_op1='a',
                             oai_set_op2='a', ln=CFG_SITE_LANG,
                             func=0):
     """creates html form to edit an OAI set."""
     _ = gettext_set_language(ln)
 
     if oai_set_id is None:
         return "No OAI set ID selected."
 
     out  = ""
 
     if func in [0, "0"]:
 
         oai_set = get_oai_set(oai_set_id)
         if not oai_set:
             return "ERROR: oai_set_id %s seems invalid" % oai_set_id
         oai_set_spec = oai_set[0][1]
         oai_set_name = oai_set[0][2]
         oai_set_collection  = oai_set[0][3]
         oai_set_description = oai_set[0][4]
         oai_set_definition  = ''
         oai_set_reclist     = ''
         oai_set_p1 = oai_set[0][5]
         oai_set_f1 = oai_set[0][6]
         oai_set_m1 = oai_set[0][7]
         oai_set_p2 = oai_set[0][8]
         oai_set_f2 = oai_set[0][9]
         oai_set_m2 = oai_set[0][10]
         oai_set_p3 = oai_set[0][11]
         oai_set_f3 = oai_set[0][12]
         oai_set_m3 = oai_set[0][13]
         oai_set_op1 = oai_set[0][14]
         oai_set_op2 = oai_set[0][15]
 
         text = input_form(oai_set_name,
                           oai_set_spec,
                           oai_set_collection,
                           oai_set_description,
                           oai_set_definition,
                           oai_set_reclist,
                           oai_set_p1,
                           oai_set_f1,
                           oai_set_m1,
                           oai_set_p2,
                           oai_set_f2,
                           oai_set_m2,
                           oai_set_p3,
                           oai_set_f3,
                           oai_set_m3,
                           oai_set_op1,
                           oai_set_op2,
                           ln=ln)
 
         out += extended_input_form(action="editset",
                                 text=text,
                                 button="Modify",
                                 oai_set_id=oai_set_id,
                                 ln=ln,
                                 func=1)
 
     if func in [1, "1"]:
         res = modify_oai_set(oai_set_id,
                              oai_set_name,
                              oai_set_spec,
                              oai_set_collection,
                              oai_set_description,
                              oai_set_p1,
                              oai_set_f1,
                              oai_set_m1,
                              oai_set_p2,
                              oai_set_f2,
                              oai_set_m2,
                              oai_set_p3,
                              oai_set_f3,
                              oai_set_m3,
                              oai_set_op1,
                              oai_set_op2)
         out += "<br />"
         if res[0] == 1:
             out += bibharvest_templates.tmpl_print_info(ln,
                                                         "OAI set definition #%s edited." % oai_set_id)
             out += "<br />"
         else:
             out += bibharvest_templates.tmpl_print_warning(ln,
                                                            "A problem was encountered: <br/>" + cgi.escape(res[1]))
             out += "<br />"
 
     out += "<br />"
 
     out += create_html_link(urlbase=oai_rep_admin_url + \
                             "/index",
                             urlargd={'ln': ln},
                             link_label=_("Return to main selection"))
 
     return nice_box("", out)
 
 
 def perform_request_delset(oai_set_id=None, ln=CFG_SITE_LANG,
                            callback='yes', func=0):
     """creates html form to delete an OAI set"""
     _ = gettext_set_language(ln)
     out = ""
 
     if oai_set_id:
         oai_set = get_oai_set(oai_set_id)
         if not oai_set:
             return "ERROR: oai_set_id %s seems invalid" % oai_set_id
         nameset = (oai_set[0][1])
         pagetitle = """Delete OAI set: %s""" % cgi.escape(nameset)
 
         if func in ["0", 0]:
 
             oai_set = get_oai_set(oai_set_id)
             oai_set_spec = oai_set[0][1]
             oai_set_name = oai_set[0][2]
             oai_set_collection = oai_set[0][3]
             oai_set_description = oai_set[0][4]
             oai_set_definition = ''
             oai_set_reclist = ''
             oai_set_p1 = oai_set[0][5]
             oai_set_f1 = oai_set[0][6]
             oai_set_m1 = oai_set[0][7]
             oai_set_p2 = oai_set[0][8]
             oai_set_f2 = oai_set[0][9]
             oai_set_m2 = oai_set[0][10]
             oai_set_p3 = oai_set[0][11]
             oai_set_f3 = oai_set[0][12]
             oai_set_m3 = oai_set[0][13]
             oai_set_op1 = oai_set[0][14]
             oai_set_op2 = oai_set[0][15]
 
             if oai_set:
                 question = """Do you want to delete the OAI definition #%s?""" % oai_set_id
                 text = bibharvest_templates.tmpl_print_info(ln, question)
                 text += "<br /><br /><br />"
                 text += pagebody_text(
                     cgi.escape("%s-%s-%s-%s-%s-%s-%s-%s-%s-%s-%s-%s-%s-%s" % \
                                (oai_set_spec,
                                 oai_set_name,
                                 oai_set_collection,
                                 oai_set_p1,
                                 oai_set_f1,
                                 oai_set_m1,
                                 oai_set_op1,
                                 oai_set_p2,
                                 oai_set_f2,
                                 oai_set_m2,
                                 oai_set_op2,
                                 oai_set_p3,
                                 oai_set_f3,
                                 oai_set_m3)))
 
                 out += createform(action="delset",
                                        text=text,
                                        button="Delete",
                                        oai_set_id=oai_set_id,
                                        func=1)
             else:
                 return bibharvest_templates.tmpl_print_info(ln, "OAI set does not exist.")
         elif func in ["1", 1]:
             res = delete_oai_set(oai_set_id)
             if res[0] == 1:
                 out += bibharvest_templates.tmpl_print_info(ln, "OAI set definition #%s deleted." % oai_set_id)
                 out += "<br />"
             else:
                 pass
 
     out += "<br /><br />"
     out += create_html_link(urlbase=oai_rep_admin_url + \
                                 "/index",
                                 urlargd={'ln': ln},
                                 link_label=_("Return to main selection"))
 
     return nice_box("", out)
 
 def get_oai_set(id=''):
     """Returns a row parameters for a given id"""
     sets = []
     sql = "SELECT id, setSpec, setName, setCollection, setDescription, p1,f1,m1, p2,f2,m2, p3,f3,m3, setDefinition FROM oaiREPOSITORY"
     try:
         if id:
             sql += " WHERE id=%s" % id
         sql += " ORDER BY setSpec asc"
         res = run_sql(sql)
         for row in res:
             set = ['']*16
             set[0] = row[0]
             set[1] = row[1]
             set[2] = row[2]
             params = parse_set_definition(row[14])
             set[3] = params.get('c', '')
             set[5] = params.get('p1', '')
             set[6] = params.get('f1', '')
             set[7] = params.get('m1', '')
             set[8] = params.get('p2', '')
             set[9] = params.get('f2', '')
             set[10] = params.get('m2', '')
             set[11] = params.get('p3', '')
             set[12] = params.get('f3', '')
             set[13] = params.get('m3', '')
             set[14] = params.get('op1', 'a')
             set[15] = params.get('op2', 'a')
             sets.append(set)
         return sets
     except StandardError, e:
+        register_exception(alert_admin=True)
         return str(e)
 
 def modify_oai_set(oai_set_id, oai_set_name, oai_set_spec,
                    oai_set_collection, oai_set_description,
                    oai_set_p1, oai_set_f1,oai_set_m1, oai_set_p2,
                    oai_set_f2, oai_set_m2, oai_set_p3, oai_set_f3,
                    oai_set_m3, oai_set_op1, oai_set_op2):
     """Modifies a row's parameters"""
 
     try:
+        if not oai_set_spec:
+            oai_set_spec = CFG_OAI_REPOSITORY_GLOBAL_SET_SPEC
         set_definition = 'c=' + oai_set_collection + ';' + \
                          'p1=' + oai_set_p1  + ';' + \
                          'f1=' + oai_set_f1  + ';' + \
                          'm1=' + oai_set_m1  + ';' + \
                          'op1='+ oai_set_op1 + ';' + \
                          'p2=' + oai_set_p2  + ';' + \
                          'f2=' + oai_set_f2  + ';' + \
                          'm2=' + oai_set_m2  + ';' + \
                          'op2='+ oai_set_op2 + ';' + \
                          'p3=' + oai_set_p3  + ';' + \
                          'f3=' + oai_set_f3  + ';' + \
                          'm3=' + oai_set_m3  + ';'
         res = run_sql("""UPDATE oaiREPOSITORY SET
                             setName=%s,
                             setSpec=%s,
                             setCollection=%s,
                             setDescription=%s,
                             setDefinition=%s,
                             p1=%s,
                             f1=%s,
                             m1=%s,
                             p2=%s,
                             f2=%s,
                             m2=%s,
                             p3=%s,
                             f3=%s,
-                            m3=%s
+                            m3=%s,
                          WHERE id=%s""",
                       (oai_set_name,
                        oai_set_spec,
                        oai_set_collection,
                        oai_set_description,
                        set_definition,
                        oai_set_p1,
                        oai_set_f1,
                        oai_set_m1,
                        oai_set_p2,
                        oai_set_f2,
                        oai_set_m2,
                        oai_set_p3,
                        oai_set_f3,
                        oai_set_m3,
                        oai_set_id))
 
         return (1, "")
     except StandardError, e:
+        register_exception(alert_admin=True)
         return (0, str(e))
 
 def add_oai_set(oai_set_name, oai_set_spec, oai_set_collection,
                 oai_set_description, oai_set_definition,
                 oai_set_reclist, oai_set_p1, oai_set_f1,oai_set_m1,
                 oai_set_p2, oai_set_f2,oai_set_m2, oai_set_p3,
                 oai_set_f3, oai_set_m3, oai_set_op1, oai_set_op2):
     """Add a definition into the OAI Repository"""
     try:
+        if not oai_set_spec:
+            oai_set_spec = CFG_OAI_REPOSITORY_GLOBAL_SET_SPEC
         set_definition = 'c=' + oai_set_collection + ';' + \
                          'p1=' + oai_set_p1  + ';' + \
                          'f1=' + oai_set_f1  + ';' + \
                          'm1=' + oai_set_m1  + ';' + \
                          'op1='+ oai_set_op1 + ';' + \
                          'p2=' + oai_set_p2  + ';' + \
                          'f2=' + oai_set_f2  + ';' + \
                          'm2=' + oai_set_m2  + ';' + \
                          'op2='+ oai_set_op2 + ';' + \
                          'p3=' + oai_set_p3  + ';' + \
                          'f3=' + oai_set_f3  + ';' + \
                          'm3=' + oai_set_m3  + ';'
 
         res = run_sql("""INSERT INTO oaiREPOSITORY (id, setName, setSpec,
                            setCollection, setDescription, setDefinition,
                            setRecList, p1, f1, m1, p2, f2, m2, p3, f3, m3)
                          VALUES (0, %s, %s, %s, %s, %s, NULL, %s, %s, %s,
                            %s, %s, %s, %s, %s, %s)""",
                       (oai_set_name, oai_set_spec, oai_set_collection,
                        oai_set_description, set_definition, oai_set_p1,
                        oai_set_f1, oai_set_m1, oai_set_p2, oai_set_f2,
                        oai_set_m2, oai_set_p3, oai_set_f3, oai_set_m3))
         return (1, "")
     except StandardError, e:
+        register_exception(alert_admin=True)
         return (0, e)
 
 def delete_oai_set(oai_set_id):
     """"""
 
     try:
         res = run_sql("DELETE FROM oaiREPOSITORY WHERE id=%s", (oai_set_id,))
         return (1, "")
     except StandardError, e:
+        register_exception(alert_admin=True)
         return (0, e)
 
 def drop_down_menu(boxname, content):
     """
     Returns the code of a drop down menu.
 
     Parameters:
 
        boxname - *str* name of the input form
 
        content - *list(tuple3)* the content of the list. List of items
                  as tuple3 with:
                  - *str* value of the item
                  - *bool* if item is selected of not
                  - *str* label of the item (displayed value)
     """
     text = "<select name=\"%s\">" % boxname
 
     for (value, selectedflag, txt) in content:
         text += "<option value=\""
         text += "%s\"" % value
         if selectedflag:
             text += ' selected="selected"'
         text += ">%s</option>" % txt
     text += "</select>"
     return text
 
 def create_drop_down_menu_content(sql):
     """
     Create the content to be used in the drop_down_menu(..) function
     from an SQL statement
     """
     content = []
 
     res = run_sql(sql)
     for item in res:
         tmp_list = []
         tmp_list.append(item)
         tmp_list.append("")
         tmp_list.append(item)
         content.append(tmp_list)
     return content
 
 def createform(action="", text="", button="func", cnfrm='', **hidden):
     """"""
     out  = '<form action="%s" method="post">\n' % (action, )
 
     out += text
     if cnfrm:
         out += ' <input type="checkbox" name="func" value="1"/>'
     for key in hidden.keys():
         if type(hidden[key]) is list:
             for value in hidden[key]:
                 out += ' <input type="hidden" name="%s" value="%s"/>\n' % (key, value)
         else:
             out += ' <input type="hidden" name="%s" value="%s"/>\n' % (key, hidden[key])
 
     out += ' <input class="adminbutton" type="submit" value="%s"/>\n' % (button, )
     out += '</form>\n'
 
     return out
 
 def input_text(ln, title, name, value):
     """"""
     if name is None:
         name = ""
     if value is None:
         value = ""
     text = """<table><tr><td width="100%%"><span class="adminlabel">%s</span></td>""" % title
     text += """<td align="left">
                 <input class="admin_w200" type="text" name="%s" value="%s" />
                 </td></tr></table>""" % \
     (cgi.escape(name, 1), cgi.escape(value, 1))
     return text
 
 def pagebody_text(title):
     """"""
     text = """<span class="admintd">%s</span>""" % title
     return text
 
 def bar_text(title):
     """"""
     text = """<span class="adminlabel">%s</span>""" % title
     return text
 
 def input_form(oai_set_name, oai_set_spec, oai_set_collection,
                oai_set_description, oai_set_definition,
                oai_set_reclist, oai_set_p1, oai_set_f1,oai_set_m1,
                oai_set_p2, oai_set_f2,oai_set_m2, oai_set_p3,
                oai_set_f3, oai_set_m3, oai_set_op1, oai_set_op2,
                ln=CFG_SITE_LANG):
     """returns the standard settings form"""
 
     modes = {
     'r' : 'Regular Expression',
     'a' : 'All of the words',
     'y' : 'Any of the words',
     'e' : 'Exact phrase',
     'p' : 'Partial phrase'
         }
 
     mode_dropdown = [['r', '', modes['r']],
                      ['e', '', modes['e']],
                      ['p', '', modes['p']],
                      ['a', '', modes['a']],
                      ['y', '', modes['y']],
                      ['', '', '']]
 
     operators = {
     'a' : 'AND',
     'o' : 'OR',
     'n' : 'AND NOT',
         }
 
     mode_operators_1 = [['a', '', operators['a']],
                         ['o', '', operators['o']],
                         ['n', '', operators['n']],
                         ['a', '', '']]
 
     mode_operators_2 = [['a', '', operators['a']],
                         ['o', '', operators['o']],
                         ['n', '', operators['n']],
                         ['a', '', '']]
 
     text = "<br />"
     text += "<table><tr><td>"
     text += input_text(ln = ln, title = "OAI Set spec:",
                        name = "oai_set_spec", value = oai_set_spec)
-    text += '</td><td colspan="3"><small><small><em>Optional: leave blank if not needed</em> [<a href="http://www.openarchives.org/OAI/openarchivesprotocol.html#Set" target="_blank">?</a>]</small></small>'
+    text += '</td><td colspan="3"><small><small><em>Optional: if you leave it blank it will be automatically set to "%s", with the implicit convention that any record belonging to it can be harvested by not specifying any set.</em> [<a href="http://www.openarchives.org/OAI/openarchivesprotocol.html#set" target="_blank">?</a>]</small></small>' % CFG_OAI_REPOSITORY_GLOBAL_SET_SPEC
     text += "</td></tr><tr><td>"
     text += input_text(ln = ln,
                        title = "OAI Set name:",
                        name = "oai_set_name", value = oai_set_name)
     text += '</td><td colspan="3"><small><small><em>Optional: leave blank if not needed</em> [<a href="http://www.openarchives.org/OAI/openarchivesprotocol.html#Set" target="_blank">?</a>]</small></small>'
 
     text += "</td></tr><tr><td>&nbsp;</td></tr><tr><td>"
     text += '</td></tr><tr><td colspan="4">Choose below the search query that defines the records that belong to this set:</td></tr><tr><td>'
     text += "</td></tr><tr><td>&nbsp;</td></tr><tr><td>"
 
 #    text += input_text(ln = ln, title = "OAI Set description", name = "oai_set_description", value = oai_set_description)
 
     #text += "</td><td colspan=2>"
 
     #menu = create_drop_down_menu_content("SELECT distinct(name) from collection")
     #menu.append(['','',''])
 
     #if (oai_set_collection):
     #    menu.append([oai_set_collection,'selected',oai_set_collection])
     #else:
     #    menu.append(['','selected','Collection'])
 
     text += input_text(ln = ln, title = "Collection(s):",
                        name="oai_set_collection",
                        value=oai_set_collection)
 
     #text += drop_down_menu("oai_set_collection", menu)
 
     text += '</td><td colspan="3"><small><small>Eg:</small> <code>Published Articles, Preprints, Theses</code><br/><small><em>(collections <b>identifiers</b>, not collections names/translations).</em></small></small></td></tr><tr><td>'
 
     text += input_text(ln = ln, title = "Phrase:", name =
                        "oai_set_p1", value = oai_set_p1)
     text += "</td><td>"
 
     fields = create_drop_down_menu_content("SELECT distinct(code) from field")
     fields.append(['', '', ''])
     if (oai_set_f1):
         fields.append([oai_set_f1, 'selected', oai_set_f1])
     else:
         fields.append(['', 'selected', 'Field'])
 
     if (oai_set_m1):
         mode_dropdown_m1 = [[oai_set_m1, 'selected', modes[oai_set_m1]]]
     else:
         mode_dropdown_m1 = [['', 'selected', 'Mode']]
 
     text += drop_down_menu("oai_set_f1", fields)
     text += "</td><td>"
     text += drop_down_menu("oai_set_m1", mode_dropdown + mode_dropdown_m1)
 
     text += "</td><td>"
     if (oai_set_op1):
         mode_operators_1.append([oai_set_op1, 'selected', operators[oai_set_op1]])
     else:
         mode_operators_1.append(['', 'selected', 'Operators'])
     text += drop_down_menu("oai_set_op1", mode_operators_1)
     text += "</td></tr><tr><td>"
     text += input_text(ln = ln, title = "Phrase:", name = "oai_set_p2", value = oai_set_p2)
     text += "</td><td>"
 
     fields = create_drop_down_menu_content("SELECT distinct(code) from field")
     fields.append(['', '', ''])
     if (oai_set_f2):
         fields.append([oai_set_f2, 'selected', oai_set_f2])
     else:
         fields.append(['', 'selected', 'Field'])
     if (oai_set_m2):
         mode_dropdown_m2 = [[oai_set_m2, 'selected', modes[oai_set_m2]]]
     else:
         mode_dropdown_m2 = [['', 'selected', 'Mode']]
 
     text += drop_down_menu("oai_set_f2", fields)
     text += "</td><td>"
     text += drop_down_menu("oai_set_m2", mode_dropdown + mode_dropdown_m2)
 
     text += "</td><td>"
     if (oai_set_op2):
         mode_operators_2.append([oai_set_op2, 'selected', operators[oai_set_op2]])
     else:
         mode_operators_2.append(['', 'selected', 'Operators'])
     text += drop_down_menu("oai_set_op2", mode_operators_2)
     text += "</td></tr><tr><td>"
     text += input_text(ln = ln, title = "Phrase:", name = "oai_set_p3", value = oai_set_p3)
     text += "</td><td>"
 
     fields = create_drop_down_menu_content("SELECT distinct(code) from field")
     fields.append(['', '', ''])
     if (oai_set_f3):
         fields.append([oai_set_f3, 'selected', oai_set_f3])
     else:
         fields.append(['', 'selected', 'Field'])
     if (oai_set_m3):
         mode_dropdown_m3 = [[oai_set_m3, 'selected', modes[oai_set_m3]]]
     else:
         mode_dropdown_m3 = [['', 'selected', 'Mode']]
 
     text += drop_down_menu("oai_set_f3", fields)
     text += "</td><td>"
     text += drop_down_menu("oai_set_m3", mode_dropdown + mode_dropdown_m3)
 
     text += "</td></tr></table>"
 
     return text
 
 def check_user(req, role, adminarea=2, authorized=0):
     """"""
     (auth_code, auth_message) = access_manager.acc_authorize_action(req, role)
     if not authorized and auth_code != 0:
         return ("false", auth_message)
     return ("", auth_message)
 
 def transform_tuple(header, tuple, start='', end='', extracolumn=''):
     """"""
     align = []
     try:
         firstrow = tuple[0]
 
         if type(firstrow) in [int, long]:
             align = ['admintdright']
         elif type(firstrow) in [str, dict]:
             align = ['admintdleft']
         else:
             for item in firstrow:
                 if type(item) is int:
                     align.append('admintdright')
                 else:
                     align.append('admintdleft')
     except IndexError:
         firstrow = []
     tblstr = ''
     for h in header:
         tblstr += '  <th class="adminheader">%s</th>\n' % (h, )
     if tblstr: tblstr = ' <tr>\n%s\n </tr>\n' % (tblstr, )
 
     tblstr = start + '<table class="admin_wvar_nomargin">\n' + tblstr
 
     try:
         extra = '<tr>'
 
         if type(firstrow) not in [int, long, str, dict]:
             for i in range(len(firstrow)): extra += '<td class="%s">%s</td>\n' % (align[i], firstrow[i])
         else:
             extra += '  <td class="%s">%s</td>\n' % (align[0], firstrow)
         #extra += '<td rowspan="%s" style="vertical-align: top">\n%s\n</td>\n</tr>\n' % (len(tuple), extracolumn)
         extra += '</tr>\n'
     except IndexError:
         extra = ''
     tblstr += extra
 
     j = 1
     for row in tuple[1:]:
         style = ''
         if j % 2:
             style = ' style="background-color: rgb(235, 247, 255);"'
         j += 1
         tblstr += ' <tr%s>\n' % style
         if type(row) not in [int, long, str, dict]:
             for i in range(len(row)): tblstr += '<td class="%s" style="padding:5px 10px;">%s</td>\n' % (align[i], row[i])
         else:
             tblstr += '  <td class="%s" style="padding:5px 10px;">%s</td>\n' % (align[0], row)
         tblstr += ' </tr> \n'
 
 
     tblstr += '</table> \n '
     tblstr += end
 
     return tblstr
 
 def nice_box(header='', content='', cls="admin_wvar"):
     """
     Embed the content into a box with given header
 
     Parameters:
         header - *str* header of the box
       datalist - *str* the content of the box
            cls - *str* the class of the box
 
     """
 
     out  = '''
     <table class="%s" width="95%%">
      <thead>
       <tr>
        <th class="adminheaderleft" colspan="1">%s</th>
       </tr>
      </thead>
      <tbody>
       <tr>
        <td style="vertical-align: top; margin-top: 5px; width: 100%%;">
        %s
        </td>
       </tr>
      </tbody>
     </table>
     ''' % (cls, header, content)
 
     return out
 
 def extended_input_form(action="", text="", button="func", cnfrm='',
                         **hidden):
     """"""
 
     out  = '<form action="%s" method="post">\n' % (action, )
     out += '<table>\n<tr><td style="vertical-align: top">'
     out += text
     if cnfrm:
         out += ' <input type="checkbox" name="func" value="1"/>'
     for key in hidden.keys():
         if type(hidden[key]) is list:
             for value in hidden[key]:
                 out += ' <input type="hidden" name="%s" value="%s"/>\n' % (key, value)
         else:
             out += ' <input type="hidden" name="%s" value="%s"/>\n' % (key, hidden[key])
     out += '</td><td style="vertical-align: bottom">'
     out += ' <input class="adminbutton" type="submit" value="%s"/>\n' % (button, )
     out += '</td></tr></table>'
     out += '</form>\n'
 
     return out
diff --git a/modules/bibupload/lib/bibupload_config.py b/modules/bibharvest/lib/oai_repository_config.py
similarity index 59%
copy from modules/bibupload/lib/bibupload_config.py
copy to modules/bibharvest/lib/oai_repository_config.py
index 690e5a2e0..9b29d21ef 100644
--- a/modules/bibupload/lib/bibupload_config.py
+++ b/modules/bibharvest/lib/oai_repository_config.py
@@ -1,31 +1,28 @@
-# -*- coding: utf-8 -*-
-##
 ## This file is part of Invenio.
-## Copyright (C) 2006, 2007, 2008, 2010, 2011 CERN.
+## Copyright (C) 2011 CERN.
 ##
 ## Invenio is free software; you can redistribute it and/or
 ## modify it under the terms of the GNU General Public License as
 ## published by the Free Software Foundation; either version 2 of the
 ## License, or (at your option) any later version.
 ##
 ## Invenio is distributed in the hope that it will be useful, but
 ## WITHOUT ANY WARRANTY; without even the implied warranty of
 ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 ## General Public License for more details.
 ##
 ## You should have received a copy of the GNU General Public License
 ## along with Invenio; if not, write to the Free Software Foundation, Inc.,
 ## 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA.
 
-"""
-BibUpload Engine configuration.
-"""
-
-__revision__ = "$Id$"
-
-CFG_BIBUPLOAD_CONTROLFIELD_TAGS = ['001', '002', '003', '004',
-                                   '005', '006', '007', '008']
-
-CFG_BIBUPLOAD_SPECIAL_TAGS = ['FMT', 'FFT']
+"""OAI Repository Configuration."""
 
+## Maximum number of records to put in a single bibupload
+CFG_OAI_REPOSITORY_MARCXML_SIZE = 100
 
+## A magic value used to specify the global set (e.g. when the admin
+## specify a set configuration without putting any setSpec)
+## NOTE: if you change this value, please update accordingly the root
+## Makefile.am and tabcreate.sql defaults for setSpec column in
+## oaiREPOSITORY MySQL table.
+CFG_OAI_REPOSITORY_GLOBAL_SET_SPEC = "GLOBAL_SET"
diff --git a/modules/bibharvest/lib/oai_repository_regression_tests.py b/modules/bibharvest/lib/oai_repository_regression_tests.py
index ef32d5fd2..d71221693 100644
--- a/modules/bibharvest/lib/oai_repository_regression_tests.py
+++ b/modules/bibharvest/lib/oai_repository_regression_tests.py
@@ -1,192 +1,186 @@
 # -*- coding: utf-8 -*-
 ##
 ## This file is part of Invenio.
 ## Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011 CERN.
 ##
 ## Invenio is free software; you can redistribute it and/or
 ## modify it under the terms of the GNU General Public License as
 ## published by the Free Software Foundation; either version 2 of the
 ## License, or (at your option) any later version.
 ##
 ## Invenio is distributed in the hope that it will be useful, but
 ## WITHOUT ANY WARRANTY; without even the implied warranty of
 ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 ## General Public License for more details.
 ##
 ## You should have received a copy of the GNU General Public License
 ## along with Invenio; if not, write to the Free Software Foundation, Inc.,
 ## 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA.
 
 """OAI Repository Regression Test Suite."""
 
 __revision__ = "$Id$"
 
 import unittest
 import time
 import re
 
+from cStringIO import StringIO
+
 from invenio.config import CFG_SITE_URL, \
      CFG_OAI_SLEEP, \
      CFG_OAI_LOAD, \
      CFG_OAI_ID_FIELD
-from invenio import oai_repository_server, search_engine, oai_repository_updater
+from invenio.intbitset import intbitset
+from invenio import oai_repository_server, search_engine
 from invenio.testutils import make_test_suite, run_test_suite, \
                               test_web_page_content, merge_error_messages
 
 class OAIRepositoryWebPagesAvailabilityTest(unittest.TestCase):
     """Check OAI Repository web pages whether they are up or not."""
 
-    def test_your_baskets_pages_availability(self):
+    def test_oai_server_pages_availability(self):
         """oairepository - availability of OAI server pages"""
 
         baseurl = CFG_SITE_URL + '/oai2d'
 
         _exports = [#fast commands first:
                     '?verb=Identify',
                     '?verb=ListMetadataFormats',
                     # sleepy commands now:
                     '?verb=ListSets',
                     '?verb=ListRecords',
                     '?verb=GetRecord']
 
         error_messages = []
         for url in [baseurl + page for page in _exports]:
             if url.endswith('Identify') or \
                url.endswith('ListMetadataFormats'):
                 pass
             else:
                 # some sleep required for verbs other than Identify
                 # and ListMetadataFormats, since oai2d refuses too
                 # frequent access:
                 time.sleep(CFG_OAI_SLEEP)
             error_messages.extend(test_web_page_content(url,
                                                         expected_text=
                                                         '</OAI-PMH>'))
         if error_messages:
             self.fail(merge_error_messages(error_messages))
         return
 
 class TestSelectiveHarvesting(unittest.TestCase):
     """Test set, from and until parameters used to do selective harvesting."""
 
     def test_set(self):
         """oairepository - testing selective harvesting with 'set' parameter"""
-        self.assertNotEqual([], oai_repository_server.oaigetsysnolist(set="cern:experiment"))
+        self.assertEqual(intbitset([10, 17]), oai_repository_server.oai_get_recid_list(set_spec="cern:experiment"))
         self.assert_("Multifractal analysis of minimum bias events" in \
                      ''.join([oai_repository_server.print_record(recID) for recID in \
-                              oai_repository_server.oaigetsysnolist(set="cern:experiment")]))
+                              oai_repository_server.oai_get_recid_list(set_spec="cern:experiment")]))
         self.assert_("Multifractal analysis of minimum bias events" not in \
                      ''.join([oai_repository_server.print_record(recID) for recID in \
-                              oai_repository_server.oaigetsysnolist(set="cern:theory")]))
-        self.assertEqual([], oai_repository_server.oaigetsysnolist(set="nonExistingSet"))
+                              oai_repository_server.oai_get_recid_list(set_spec="cern:theory")]))
+        self.failIf(oai_repository_server.oai_get_recid_list(set_spec="nonExistingSet"))
 
     def test_from_and_until(self):
         """oairepository - testing selective harvesting with 'from' and 'until' parameters"""
 
+        req = StringIO()
         # List available records, get datestamps and play with them
-        identifiers = oai_repository_server.oailistidentifiers("")
-        datestamps = re.findall('<identifier>(?P<id>.*)</identifier>\s*<datestamp>(?P<date>.*)</datestamp>', identifiers)
+        oai_repository_server.oai_list_records_or_identifiers(req, {'verb': 'ListIdentifiers', 'metadataPrefix': 'marcxml'})
+        identifiers = req.getvalue()
+        datestamps = re.findall('<identifier>(?P<id>.*?)</identifier>\s*<datestamp>(?P<date>.*?)</datestamp>', identifiers, re.M)
 
         sample_datestamp = datestamps[0][1] # Take one datestamp
         sample_oai_id = datestamps[0][0] # Take corresponding oai id
         sample_id = search_engine.perform_request_search(p=sample_oai_id,
                                                          f=CFG_OAI_ID_FIELD)[0] # Find corresponding system number id
 
         # There must be some datestamps
         self.assertNotEqual([], datestamps)
 
         # We must be able to retrieve an id with the date we have just found
-        self.assert_(sample_id in oai_repository_server.oaigetsysnolist(fromdate=sample_datestamp))
-        self.assert_(sample_id in oai_repository_server.oaigetsysnolist(untildate=sample_datestamp))
-        self.assert_(sample_id in oai_repository_server.oaigetsysnolist(untildate=sample_datestamp, \
+        self.assert_(sample_id in oai_repository_server.oai_get_recid_list(fromdate=sample_datestamp))
+        self.assert_(sample_id in oai_repository_server.oai_get_recid_list(untildate=sample_datestamp))
+        self.assert_(sample_id in oai_repository_server.oai_get_recid_list(untildate=sample_datestamp, \
                                                                  fromdate=sample_datestamp))
 
         # Same, with short format date. Eg 2007-12-13
-        self.assert_(sample_id in oai_repository_server.oaigetsysnolist(fromdate=sample_datestamp.split('T')[0]))
-        self.assert_(sample_id in oai_repository_server.oaigetsysnolist(untildate=sample_datestamp.split('T')[0]))
-        self.assert_(sample_id in oai_repository_server.oaigetsysnolist(fromdate=sample_datestamp.split('T')[0], \
+        self.assert_(sample_id in oai_repository_server.oai_get_recid_list(fromdate=sample_datestamp.split('T')[0]))
+        self.assert_(sample_id in oai_repository_server.oai_get_recid_list(untildate=sample_datestamp.split('T')[0]))
+        self.assert_(sample_id in oai_repository_server.oai_get_recid_list(fromdate=sample_datestamp.split('T')[0], \
                                                                  untildate=sample_datestamp.split('T')[0]))
 
         # At later date (year after) we should not find our id again
         sample_datestamp_year = int(sample_datestamp[0:4])
         sample_datestamp_rest = sample_datestamp[4:]
         later_datestamp = str(sample_datestamp_year + 1) + sample_datestamp_rest
-        self.assert_(sample_id not in oai_repository_server.oaigetsysnolist(fromdate=later_datestamp))
+        self.assert_(sample_id not in oai_repository_server.oai_get_recid_list(fromdate=later_datestamp))
 
         # At earlier date (year before) we should not find our id again
         earlier_datestamp = str(sample_datestamp_year - 1) + sample_datestamp_rest
-        self.assert_(sample_id not in oai_repository_server.oaigetsysnolist(untildate=earlier_datestamp))
+        self.assert_(sample_id not in oai_repository_server.oai_get_recid_list(untildate=earlier_datestamp))
 
         # From earliest date to latest date must include all oai records
         dates = [(time.mktime(time.strptime(date[1], "%Y-%m-%dT%H:%M:%SZ")), date[1]) for date in datestamps]
         dates = dict(dates)
         sorted_times = dates.keys()
         sorted_times.sort()
         earliest_datestamp = dates[sorted_times[0]]
         latest_datestamp = dates[sorted_times[-1]]
-        self.assertEqual(len(oai_repository_server.oaigetsysnolist()), \
-                         len(oai_repository_server.oaigetsysnolist(fromdate=earliest_datestamp, \
-                                                            untildate=latest_datestamp)))
+        self.assertEqual(oai_repository_server.oai_get_recid_list(), \
+                         oai_repository_server.oai_get_recid_list(fromdate=earliest_datestamp, \
+                                                            untildate=latest_datestamp))
+
     def test_resumption_token(self):
         """oairepository - testing harvesting with bad resumption token"""
         # Non existing resumptionToken
-        self.assert_('badResumptionToken' in oai_repository_server.oailistrecords('resumptionToken=foobar&verb=ListRecords'))
+        req = StringIO()
+        oai_repository_server.oai_list_records_or_identifiers(req, {'resumptionToken': 'foobar', 'verb': 'ListRecords'})
+
+        self.assert_('badResumptionToken' in req.getvalue())
 
 class TestPerformance(unittest.TestCase):
     """Test performance of the repository """
 
     def setUp(self):
         """Setting up some variables"""
         # Determine how many records are served
-        self.number_of_records = oai_repository_server.oaigetsysnolist("", "", "")
+        self.number_of_records = len(oai_repository_server.oai_get_recid_list("", "", ""))
         if CFG_OAI_LOAD < self.number_of_records:
             self.number_of_records = CFG_OAI_LOAD
 
     def test_response_speed_oai(self):
         """oairepository - speed of response for oai_dc output"""
         allowed_seconds_per_record_oai = 0.02
 
         # Test oai ListRecords performance
         t0 = time.time()
-        oai_repository_server.oailistrecords('metadataPrefix=oai_dc&verb=ListRecords')
+        oai_repository_server.oai_list_records_or_identifiers(StringIO(), {'metadataPrefix': 'oai_dc', 'verb': 'ListRecords'})
         t = time.time() - t0
         if t > self.number_of_records * allowed_seconds_per_record_oai:
             self.fail("""Response for ListRecords with metadataPrefix=oai_dc took too much time:
 %s seconds.
 Limit: %s seconds""" % (t, self.number_of_records * allowed_seconds_per_record_oai))
 
     def test_response_speed_marcxml(self):
         """oairepository - speed of response for marcxml output"""
         allowed_seconds_per_record_marcxml = 0.05
 
         # Test marcxml ListRecords performance
         t0 = time.time()
-        oai_repository_server.oailistrecords('metadataPrefix=marcxml&verb=ListRecords')
+        oai_repository_server.oai_list_records_or_identifiers(StringIO(), argd={'metadataPrefix': 'marcxml', 'verb': 'ListRecords'})
         t = time.time() - t0
         if t > self.number_of_records * allowed_seconds_per_record_marcxml:
             self.fail("""Response for ListRecords with metadataPrefix=marcxml took too much time:\n
 %s seconds.
 Limit: %s seconds""" % (t, self.number_of_records * allowed_seconds_per_record_marcxml))
 
 
-class TestOAIRepositoryUpdater(unittest.TestCase):
-    """Test functions in OAI_repository_updater"""
-
-    def test_marcxml_filtering(self):
-        """oairepository - test MARCXML filtering"""
-        self.assertEqual(oai_repository_updater.marcxml_filter_out_tags(98, ['088__a']),
-                         '  <datafield tag="088" ind1=" " ind2=" ">\n    <subfield code="9">SCAN-9709037</subfield>\n  </datafield>\n')
-
-        self.assertEqual(oai_repository_updater.marcxml_filter_out_tags(98, ['088__c']),
-                         '  <datafield tag="088" ind1=" " ind2=" ">\n    <subfield code="9">SCAN-9709037</subfield>\n  </datafield>\n  <datafield tag="088" ind1=" " ind2=" ">\n    <subfield code="a">UCRL-8417</subfield>\n  </datafield>\n')
-
-        self.assertEqual(oai_repository_updater.marcxml_filter_out_tags(98, ['0248_p']),
-                         '  <datafield tag="024" ind1="8" ind2=" ">\n    <subfield code="a">oai:cds.cern.ch:SCAN-9709037</subfield>\n  </datafield>\n')
-
 TEST_SUITE = make_test_suite(OAIRepositoryWebPagesAvailabilityTest,
                              TestSelectiveHarvesting,
-                             TestPerformance,
-                             TestOAIRepositoryUpdater)
+                             TestPerformance)
 
 if __name__ == "__main__":
     run_test_suite(TEST_SUITE, warn_user=True)
diff --git a/modules/bibharvest/lib/oai_repository_server.py b/modules/bibharvest/lib/oai_repository_server.py
index 23a0b2b91..a048afc85 100644
--- a/modules/bibharvest/lib/oai_repository_server.py
+++ b/modules/bibharvest/lib/oai_repository_server.py
@@ -1,1010 +1,869 @@
 ## This file is part of Invenio.
 ## Copyright (C) 2009, 2010, 2011 CERN.
 ##
 ## Invenio is free software; you can redistribute it and/or
 ## modify it under the terms of the GNU General Public License as
 ## published by the Free Software Foundation; either version 2 of the
 ## License, or (at your option) any later version.
 ##
 ## Invenio is distributed in the hope that it will be useful, but
 ## WITHOUT ANY WARRANTY; without even the implied warranty of
 ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 ## General Public License for more details.
 ##
 ## You should have received a copy of the GNU General Public License
 ## along with Invenio; if not, write to the Free Software Foundation, Inc.,
 ## 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA.
 
 """Receive OAI-PMH 2.0 requests and responds"""
 
 __revision__ = "$Id$"
 
 import cPickle
 import os
 import re
-import cgi
-import urllib
 import time
+import datetime
+import tempfile
 import sys
-if sys.hexversion < 0x2060000:
-    from md5 import md5
+if sys.hexversion < 0x2050000:
+    from glob import glob as iglob
 else:
-    from hashlib import md5
+    from glob import iglob
 
 from invenio.config import \
      CFG_OAI_DELETED_POLICY, \
      CFG_OAI_EXPIRE, \
      CFG_OAI_IDENTIFY_DESCRIPTION, \
      CFG_OAI_ID_FIELD, \
      CFG_OAI_LOAD, \
      CFG_OAI_SET_FIELD, \
+     CFG_OAI_PREVIOUS_SET_FIELD, \
+     CFG_OAI_METADATA_FORMATS, \
      CFG_CACHEDIR, \
      CFG_SITE_NAME, \
      CFG_SITE_SUPPORT_EMAIL, \
      CFG_SITE_URL, \
-     CFG_WEBSTYLE_HTTP_USE_COMPRESSION
+     CFG_WEBSTYLE_HTTP_USE_COMPRESSION, \
+     CFG_CERN_SITE, \
+     CFG_OAI_SAMPLE_IDENTIFIER, \
+     CFG_OAI_ID_PREFIX, \
+     CFG_OAI_FRIENDS, \
+     CFG_BIBUPLOAD_EXTERNAL_OAIID_TAG, \
+     CFG_OAI_PROVENANCE_BASEURL_SUBFIELD, \
+     CFG_OAI_PROVENANCE_DATESTAMP_SUBFIELD, \
+     CFG_OAI_PROVENANCE_METADATANAMESPACE_SUBFIELD, \
+     CFG_OAI_PROVENANCE_ORIGINDESCRIPTION_SUBFIELD, \
+     CFG_OAI_PROVENANCE_HARVESTDATE_SUBFIELD, \
+     CFG_OAI_PROVENANCE_ALTERED_SUBFIELD
 
 from invenio.intbitset import intbitset
+from invenio.htmlutils import X, EscapedXMLString
 from invenio.dbquery import run_sql, wash_table_column_name
-from invenio.search_engine import record_exists, perform_request_search, \
-    get_all_restricted_recids
-from invenio.bibformat_dblayer import get_preformatted_record
+from invenio.search_engine import record_exists, get_all_restricted_recids, get_all_field_values, search_unit_in_bibxxx, get_record
 from invenio.bibformat import format_record
-from invenio.textutils import encode_for_xml
+from invenio.bibrecord import record_get_field_instances
+from invenio.errorlib import register_exception
+from invenio.oai_repository_config import CFG_OAI_REPOSITORY_GLOBAL_SET_SPEC
 
-verbs = {
+CFG_VERBS = {
     'GetRecord'          : ['identifier', 'metadataPrefix'],
     'Identify'           : [],
     'ListIdentifiers'    : ['from', 'until',
                             'metadataPrefix',
                             'set',
                             'resumptionToken'],
     'ListMetadataFormats': ['identifier'],
     'ListRecords'        : ['from', 'until',
                             'metadataPrefix',
                             'set',
                             'resumptionToken'],
     'ListSets'           : ['resumptionToken']
     }
-params = {
-    "verb" : ["Identify","ListIdentifiers","ListSets","ListMetadataFormats","ListRecords","GetRecord"],
-    "metadataPrefix" : ["oai_dc","marcxml"],
-    "from" :[""],
-    "until":[""],
-    "set" :[""],
-    "identifier": [""]
-}
-
-def escape_space(strxml):
-    "Encode special chars in string for URL-compliancy."
-
-    strxml = strxml.replace(' ', '%20')
-    return strxml
-
-def encode_for_url(strxml):
-    "Encode special chars in string for URL-compliancy."
-
-    strxml = strxml.replace('%', '%25')
-    strxml = strxml.replace(' ', '%20')
-    strxml = strxml.replace('?', '%3F')
-    strxml = strxml.replace('#', '%23')
-    strxml = strxml.replace('=', '%3D')
-    strxml = strxml.replace('&', '%26')
-    strxml = strxml.replace('/', '%2F')
-    strxml = strxml.replace(':', '%3A')
-    strxml = strxml.replace(';', '%3B')
-    strxml = strxml.replace('+', '%2B')
 
-    return strxml
+CFG_ERRORS = {
+    "badArgument": "The request includes illegal arguments, is missing required arguments, includes a repeated argument, or values for arguments have an illegal syntax:",
+    "badResumptionToken": "The value of the resumptionToken argument is invalid or expired:",
+    "badVerb": "Value of the verb argument is not a legal OAI-PMH verb, the verb argument is missing, or the verb argument is repeated:",
+    "cannotDisseminateFormat": "The metadata format identified by the value given for the metadataPrefix argument is not supported by the item or by the repository:",
+    "idDoesNotExist": "The value of the identifier argument is unknown or illegal in this repository:",
+    "noRecordsMatch": "The combination of the values of the from, until, set and metadataPrefix arguments results in an empty list:",
+    "noMetadataFormats": "There are no metadata formats available for the specified item:",
+    "noSetHierarchy": "The repository does not support sets:"
+}
 
-def oai_header(args, verb):
-    "Print OAI header"
+def oai_error(argd, errors):
+    """
+    Return a well-formatted OAI-PMH error
+    """
+    out = """<?xml version="1.0" encoding="UTF-8"?>
+<OAI-PMH xmlns="http://www.openarchives.org/OAI/2.0/"
+         xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+         xsi:schemaLocation="http://www.openarchives.org/OAI/2.0/
+         http://www.openarchives.org/OAI/2.0/OAI-PMH.xsd">"""
+    out += X.responseDate()(get_utc_now())
+    for error_code, error_msg in errors:
+        assert(error_code in CFG_ERRORS)
+        if error_code in ("badArgument", "badVerb"):
+            out += X.request()(oai_get_request_url())
+            break
+    else:
+        ## There are no badArgument or badVerb errors so we can
+        ## return the whole request information
+        out += X.request(**argd)(oai_get_request_url())
+    for error_code, error_msg in errors:
+        if error_msg is None:
+            error_msg = CFG_ERRORS[error_code]
+        else:
+            error_msg = "%s %s" % (CFG_ERRORS[error_code], error_msg)
+        out += X.error(code=error_code)(error_msg)
+    out += "</OAI-PMH>"
+    return out
 
-    out = ""
+def oai_header(argd, verb):
+    """
+    Return OAI header
+    """
 
-    out = out + "<?xml version=\"1.0\" encoding=\"UTF-8\"?>" + "\n"
-    out = out + "<OAI-PMH xmlns=\"http://www.openarchives.org/OAI/2.0/\" xmlns:xsi=\"http://www.w3.org/2001/XMLSchema-instance\" xsi:schemaLocation=\"http://www.openarchives.org/OAI/2.0/ http://www.openarchives.org/OAI/2.0/OAI-PMH.xsd\">\n"
+    out = "<?xml version=\"1.0\" encoding=\"UTF-8\"?>" + "\n"
+    out += "<?xml-stylesheet type=\"text/xsl\" href=\"%s/css/oai2.xsl.v1.0\" ?>\n" % CFG_SITE_URL
+    out += "<OAI-PMH xmlns=\"http://www.openarchives.org/OAI/2.0/\" xmlns:xsi=\"http://www.w3.org/2001/XMLSchema-instance\" xsi:schemaLocation=\"http://www.openarchives.org/OAI/2.0/ http://www.openarchives.org/OAI/2.0/OAI-PMH.xsd\">\n"
 
-    out = out + " <responseDate>" + oaigetresponsedate() + "</responseDate>\n"
+    #out += "<responseDate>%s</responseDate>" % get_utc_now()
+    out += X.responseDate()(get_utc_now())
 
     if verb:
-        out = out + " <request verb=\"%s\">%s</request>\n" % (verb, oaigetrequesturl(args))
-        out = out + " <%s>\n" % verb
+        out += X.request(**argd)(oai_get_request_url())
+        out += "<%s>\n" % verb
     else:
-        out = out + " <request>%s</request>\n" % (oaigetrequesturl(args))
+        out += X.request()(oai_get_request_url())
 
     return out
 
 def oai_footer(verb):
-    "Print OAI footer"
-
+    """
+    @return: the OAI footer.
+    """
     out = ""
-
     if verb:
-        out = "%s </%s>\n" % (out, verb)
-    out = out + "</OAI-PMH>\n"
-
-    return out
-
-def oai_error_header(args, verb):
-    "Print OAI header"
-
-    out = ""
-
-###    out = "Content-Type: text/xml\n\n"
-    out = out + "<?xml version=\"1.0\" encoding=\"UTF-8\"?>" + "\n"
-    out = out + "<OAI-PMH xmlns=\"http://www.openarchives.org/OAI/2.0/\" xmlns:xsi=\"http://www.w3.org/2001/XMLSchema-instance\" xsi:schemaLocation=\"http://www.openarchives.org/OAI/2.0/ http://www.openarchives.org/OAI/2.0/OAI-PMH.xsd\">\n"
-
-    out = out + " <responseDate>" + oaigetresponsedate() + "</responseDate>\n"
-    out = out + " <request verb=\"%s\">%s</request>\n" % (verb, oaigetrequesturl(args))
-
+        out += "</%s>\n" % (verb)
+    out += "</OAI-PMH>\n"
     return out
 
-def oai_error_footer(verb):
-    "Print OAI footer"
-
-    out  = verb
-    out  = "</OAI-PMH>\n"
-    return out
-
-def get_field(sysno, field):
-    "Gets list of field 'field' for the record with 'sysno' system number."
+def get_field(recid, field):
+    """
+    Gets list of field 'field' for the record with 'recid' system number.
+    """
 
-    out   = []
     digit = field[0:2]
 
     bibbx = "bib%sx" % digit
     bibx  = "bibrec_bib%sx" % digit
     query = "SELECT bx.value FROM %s AS bx, %s AS bibx WHERE bibx.id_bibrec=%%s AND bx.id=bibx.id_bibxxx AND bx.tag=%%s" % (wash_table_column_name(bibbx), wash_table_column_name(bibx))
 
-    res = run_sql(query, (sysno, field))
-
-    for row in res:
-
-        out.append(row[0])
-
-    return out
+    return [row[0] for row in run_sql(query, (recid, field))]
 
 def utc_to_localtime(date):
     """
     Convert UTC to localtime
 
     Reference:
      - (1) http://www.openarchives.org/OAI/openarchivesprotocol.html#Dates
      - (2) http://www.w3.org/TR/NOTE-datetime
 
     This function works only with dates complying with the
     "Complete date plus hours, minutes and seconds" profile of
     ISO 8601 defined by (2), and linked from (1).
 
     Eg:    1994-11-05T13:15:30Z
     """
     ldate = date.split("T")[0]
     ltime = date.split("T")[1]
 
     lhour   = ltime.split(":")[0]
     lminute = ltime.split(":")[1]
     lsec    = ltime.split(":")[2]
     lsec    = lsec[:-1] # Remove trailing "Z"
 
     lyear   = ldate.split("-")[0]
     lmonth  = ldate.split("-")[1]
     lday    = ldate.split("-")[2]
 
 
     # 1: Build a time as UTC. Since time.mktime() expect a local time :
     ## 1a: build it without knownledge of dst
     ## 1b: substract timezone to get a local time, with possibly wrong dst
     utc_time = time.mktime((int(lyear), int(lmonth), int(lday), int(lhour), int(lminute), int(lsec), 0, 0, -1))
     local_time = utc_time - time.timezone
 
     # 2: Fix dst for local_time
     # Find out the offset for daily saving time of the local
     # timezone at the time of the given 'date'
     if time.localtime(local_time)[-1] == 1:
         local_time = local_time + 3600
 
     return time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(local_time))
 
 def localtime_to_utc(date):
-    "Convert localtime to UTC"
+    """Convert localtime to UTC"""
 
     ldate = date.split(" ")[0]
     ltime = date.split(" ")[1]
 
     lhour   = ltime.split(":")[0]
     lminute = ltime.split(":")[1]
     lsec    = ltime.split(":")[2]
 
     lyear   = ldate.split("-")[0]
     lmonth  = ldate.split("-")[1]
     lday    = ldate.split("-")[2]
 
     # Find out the offset for daily saving time of the local
     # timezone at the time of the given 'date'
     #
     # 1: build time that correspond to local date, without knowledge of dst
     # 2: determine if dst is locally enabled at this time
     tmp_date = time.mktime((int(lyear), int(lmonth), int(lday), int(lhour), int(lminute), int(lsec), 0, 0, -1))
     if time.localtime(tmp_date)[-1] == 1:
         dst = time.localtime(tmp_date)[-1]
     else:
         dst = 0
 
     # 3: Build a new time with knowledge of the dst
     local_time = time.mktime((int(lyear), int(lmonth), int(lday), int(lhour), int(lminute), int(lsec), 0, 0, dst))
     # 4: Get the time as UTC
     utc_time = time.gmtime(local_time)
 
     return time.strftime("%Y-%m-%dT%H:%M:%SZ", utc_time)
 
-def get_modification_date(sysno):
-    "Returns the date of last modification for the record 'sysno'."
+def get_modification_date(recid):
+    """Returns the date of last modification for the record 'recid'."""
     out = ""
-    res = run_sql("SELECT DATE_FORMAT(modification_date,'%%Y-%%m-%%d %%H:%%i:%%s') FROM bibrec WHERE id=%s", (sysno,), 1)
+    res = run_sql("SELECT DATE_FORMAT(modification_date,'%%Y-%%m-%%d %%H:%%i:%%s') FROM bibrec WHERE id=%s", (recid,), 1)
     if res and res[0][0]:
         out = localtime_to_utc(res[0][0])
     return out
 
 def get_earliest_datestamp():
-    "Get earliest datestamp in the database"
+    """Get earliest datestamp in the database"""
     out = ""
-    res = run_sql("SELECT MIN(DATE_FORMAT(creation_date,'%%Y-%%m-%%d %%H:%%i:%%s')) FROM bibrec", (), 1)
-    if res[0][0]:
+    res = run_sql("SELECT DATE_FORMAT(MIN(creation_date),'%Y-%m-%d %H:%i:%s') FROM bibrec", n=1)
+    if res:
         out = localtime_to_utc(res[0][0])
     return out
 
 def get_latest_datestamp():
-    "Get latest datestamp in the database"
+    """Get latest datestamp in the database"""
     out = ""
-    res = run_sql("SELECT MAX(DATE_FORMAT(modification_date,'%%Y-%%m-%%d %%H:%%i:%%s')) FROM bibrec", (), 1)
-    if res[0][0]:
+    res = run_sql("SELECT DATE_FORMAT(MAX(modification_date),'%Y-%m-%d %H:%i:%s') FROM bibrec", n=1)
+    if res:
         out = localtime_to_utc(res[0][0])
     return out
 
 def check_date(date):
     """Check if given date has a correct format, complying to "Complete date" or
     "Complete date plus hours, minutes and seconds" formats defined in ISO8601."""
 
     if(re.match("\d\d\d\d-\d\d-\d\d(T\d\d:\d\d:\d\dZ)?\Z", date) is not None):
         return date
     else:
         return ""
 
 def normalize_date(date, dtime="T00:00:00Z"):
     """
     Normalize the given date to the
     "Complete date plus hours, minutes and seconds" format defined in ISO8601
     (If "hours, minutes and seconds" part is missing, append 'dtime' to date).
     'date' must be checked before with check_date(..).
 
     Returns empty string if cannot be normalized
     """
     if len(date) == 10:
         date = date + dtime
     elif len(date) != 20:
         date = ""
 
     return date
 
-def print_record(sysno, format='marcxml', record_exists_result=None):
-    """Prints record 'sysno' formatted according to 'format'.
+def get_record_provenance(recid):
+    """
+    Return the provenance XML representation of a record, suitable to be put
+    in the about tag.
+    """
+    record = get_record(recid)
+    provenances = record_get_field_instances(record, CFG_BIBUPLOAD_EXTERNAL_OAIID_TAG[:3], CFG_BIBUPLOAD_EXTERNAL_OAIID_TAG[3], CFG_BIBUPLOAD_EXTERNAL_OAIID_TAG[4])
+    out = ""
+    for provenance in provenances:
+        base_url = identifier = datestamp = metadata_namespace = origin_description = harvest_date = altered = ""
+        for (code, value) in provenance[0]:
+            if code == CFG_OAI_PROVENANCE_BASEURL_SUBFIELD:
+                base_url = value
+            elif code == CFG_BIBUPLOAD_EXTERNAL_OAIID_TAG[5]:
+                identifier = value
+            elif code == CFG_OAI_PROVENANCE_DATESTAMP_SUBFIELD:
+                datestamp = value
+            elif code == CFG_OAI_PROVENANCE_METADATANAMESPACE_SUBFIELD:
+                metadata_namespace = value
+            elif code == CFG_OAI_PROVENANCE_ORIGINDESCRIPTION_SUBFIELD:
+                origin_description = value
+            elif code == CFG_OAI_PROVENANCE_HARVESTDATE_SUBFIELD:
+                harvest_date = value
+            elif code == CFG_OAI_PROVENANCE_ALTERED_SUBFIELD:
+                altered = value
+        if base_url:
+            out += """<provenance xmlns="http://www.openarchives.org/OAI/2.0/provenance" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://www.openarchives.org/OAI/2.0/provenance http://www.openarchives.org/OAI/2.0/provenance.xsd">"""
+            out += X.originDescription(harvestDate=harvest_date, altered=altered)(
+                X.baseURL()(base_url),
+                X.identifier()(identifier),
+                X.datestamp()(datestamp),
+                X.metadataNamespace()(metadata_namespace),
+                origin_description and X.originDescription(origin_description) or '' ## This is already XML
+            )
+            out += """</provenance>"""
+    return out
+
+def get_record_rights(dummy):
+    """
+    Return the record rights parts, suitable to be put in the about tag.
+    """
+    return ""
+    ## FIXME: This need to be thought in a good way. What shall we really
+    ## put in the rights parts?
+    #record = get_record(recid)
+    #rights = record_get_field_instances(record, CFG_OAI_RIGHTS_FIELD[:3], CFG_OAI_RIGHTS_FIELD[3], CFG_OAI_RIGHTS_FIELD[4])
+    #license = record_get_field_instances(record, CFG_OAI_LICENSE_FIELD[:3], CFG_OAI_LICENSE_FIELD[3], CFG_OAI_LICENSE_FIELD[4])
+
+    #holder = date = rights_uri = contact = statement = terms = publisher = license_uri = ''
+    #if rights:
+        #for code, value in rights[0][0]:
+            #if code == CFG_OAI_RIGHTS_HOLDER_SUBFIELD:
+                #holder = value
+            #elif code == CFG_OAI_RIGHTS_DATE_SUBFIELD:
+                #date = value
+            #elif code == CFG_OAI_RIGHTS_URI_SUBFIELD:
+                #rights_uri = value
+            #elif code == CFG_OAI_RIGHTS_CONTACT_SUBFIELD:
+                #contact = value
+            #elif CFG_OAI_RIGHTS_STATEMENT_SUBFIELD:
+                #statement = value
+    #if license:
+        #for code, value in license[0][0]:
+            #if code == CFG_OAI_LICENSE_TERMS_SUBFIELD:
+                #terms = value
+            #elif code == CFG_OAI_LICENSE_PUBLISHER_SUBFIELD:
+                #publisher = value
+            #elif code == CFG_OAI_LICENSE_URI_SUBFIELD:
+                #license_uri = value
+
+def print_record(recid, prefix='marcxml', verb='ListRecords', set_spec=None):
+    """Prints record 'recid' formatted according to 'prefix'.
 
     - if record does not exist, return nothing.
 
     - if record has been deleted and CFG_OAI_DELETED_POLICY is
       'transient' or 'deleted', then return only header, with status
       'deleted'.
 
     - if record has been deleted and CFG_OAI_DELETED_POLICY is 'no',
       then return nothing.
 
     Optional parameter 'record_exists_result' has the value of the result
-    of the record_exists(sysno) function (in order not to call that function
+    of the record_exists(recid) function (in order not to call that function
     again if already done.)
     """
 
-    out = ""
+    record_exists_result = record_exists(recid) == 1
+    if record_exists_result:
+        sets = get_field(recid, CFG_OAI_SET_FIELD)
+        if set_spec is not None and not set_spec in sets and not [set_ for set_ in sets if set_.startswith("%s:" % set_spec)]:
+            ## the record is not in the requested set, and is not
+            ## in any subset
+            record_exists_result = False
 
-    # sanity check:
-    if record_exists_result is not None:
-        _record_exists = record_exists_result
+    if record_exists_result:
+        status = None
     else:
-        _record_exists = record_exists(sysno)
+        status = 'deleted'
 
-    if not _record_exists:
+    if not record_exists_result and CFG_OAI_DELETED_POLICY not in ('persistent', 'transient'):
         return
 
-    if (format == "dc") or (format == "oai_dc"):
-        format = "xd"
-
-    # print record opening tags:
-
-    out = out + "  <record>\n"
-
-    if _record_exists == -1: # Deleted?
-        if CFG_OAI_DELETED_POLICY == "persistent" or \
-               CFG_OAI_DELETED_POLICY == "transient":
-            out = out + "    <header status=\"deleted\">\n"
-        else:
-            return
-    else:
-        out = out + "   <header>\n"
-
-    for ident in get_field(sysno, CFG_OAI_ID_FIELD):
-        out = "%s    <identifier>%s</identifier>\n" % (out, escape_space(ident))
-    out = "%s    <datestamp>%s</datestamp>\n" % (out, get_modification_date(sysno))
-    for set in get_field(sysno, CFG_OAI_SET_FIELD):
-        if set:
+    idents = get_field(recid, CFG_OAI_ID_FIELD)
+    try:
+        assert idents, "No OAI ID for record %s, please do your checks!" % recid
+    except AssertionError, err:
+        register_exception(alert_admin=True)
+        return
+    try:
+        assert len(idents) == 1, "More than OAI ID found for recid %s. Considering only the first one, but please do your checks: %s" % (recid, idents)
+    except AssertionError, err:
+        register_exception(alert_admin=True)
+    ident = idents[0]
+
+    header_body = EscapedXMLString('')
+    header_body += X.identifier()(ident)
+    header_body += X.datestamp()(get_modification_date(recid))
+    for set_spec in get_field(recid, CFG_OAI_SET_FIELD):
+        if set_spec and set_spec != CFG_OAI_REPOSITORY_GLOBAL_SET_SPEC:
             # Print only if field not empty
-            out = "%s    <setSpec>%s</setSpec>\n" % (out, set)
-    out = out + "   </header>\n"
+            header_body += X.setSpec()(set_spec)
+
+    header = X.header(status=status)(header_body)
 
-    if _record_exists == -1: # Deleted?
-        pass
+    if verb == 'ListIdentifiers':
+        return header
     else:
-        out = out + "   <metadata>\n"
-
-        if format == "marcxml":
-            formatted_record = get_preformatted_record(sysno, 'xm')
-            if formatted_record is not None:
-                ## MARCXML is already preformatted. Adapt it if needed
-                formatted_record = formatted_record.replace("<record>", "<marc:record xmlns:marc=\"http://www.loc.gov/MARC21/slim\" xmlns:xsi=\"http://www.w3.org/2001/XMLSchema-instance\" xsi:schemaLocation=\"http://www.loc.gov/MARC21/slim http://www.loc.gov/standards/marcxml/schema/MARC21slim.xsd\" type=\"Bibliographic\">\n     <marc:leader>00000coc  2200000uu 4500</marc:leader>")
-                formatted_record = formatted_record.replace("<record xmlns=\"http://www.loc.gov/MARC21/slim\">", "<marc:record xmlns:marc=\"http://www.loc.gov/MARC21/slim\" xmlns:xsi=\"http://www.w3.org/2001/XMLSchema-instance\" xsi:schemaLocation=\"http://www.loc.gov/MARC21/slim http://www.loc.gov/standards/marcxml/schema/MARC21slim.xsd\" type=\"Bibliographic\">\n     <marc:leader>00000coc  2200000uu 4500</marc:leader>")
-                formatted_record = formatted_record.replace("</record", "</marc:record")
-                formatted_record = formatted_record.replace("<controlfield", "<marc:controlfield")
-                formatted_record = formatted_record.replace("</controlfield", "</marc:controlfield")
-                formatted_record = formatted_record.replace("<datafield", "<marc:datafield")
-                formatted_record = formatted_record.replace("</datafield", "</marc:datafield")
-                formatted_record = formatted_record.replace("<subfield", "<marc:subfield")
-                formatted_record = formatted_record.replace("</subfield", "</marc:subfield")
-                out += formatted_record
+        if record_exists_result:
+            metadata_body = format_record(recid, CFG_OAI_METADATA_FORMATS[prefix][0])
+            metadata = X.metadata(body=metadata_body)
+            provenance_body = get_record_provenance(recid)
+            if provenance_body:
+                provenance = X.about(body=provenance_body)
             else:
-                ## MARCXML is not formatted in the database, so produce it.
-                out = out + "    <marc:record xmlns:marc=\"http://www.loc.gov/MARC21/slim\" xmlns:xsi=\"http://www.w3.org/2001/XMLSchema-instance\" xsi:schemaLocation=\"http://www.loc.gov/MARC21/slim http://www.loc.gov/standards/marcxml/schema/MARC21slim.xsd\" type=\"Bibliographic\">"
-                out = out + "     <marc:leader>00000coc  2200000uu 4500</marc:leader>"
-                out = "%s     <marc:controlfield tag=\"001\">%d</marc:controlfield>\n" % (out, int(sysno))
-
-                for digit1 in range(0, 10):
-                    for digit2 in range(0, 10):
-                        bibbx = "bib%d%dx" % (digit1, digit2)
-                        bibx = "bibrec_bib%d%dx" % (digit1, digit2)
-                        query = "SELECT b.tag,b.value,bb.field_number FROM %s AS b, %s AS bb "\
-                                "WHERE bb.id_bibrec=%%s AND b.id=bb.id_bibxxx AND b.tag LIKE %%s "\
-                                "ORDER BY bb.field_number, b.tag ASC" % (wash_table_column_name(bibbx), wash_table_column_name(bibx))
-                        res = run_sql(query, (sysno, str(digit1) + str(digit2) + "%"))
-                        field_number_old = -999
-                        field_old = ""
-                        for row in res:
-                            field, value, field_number = row[0], row[1], row[2]
-                            ind1, ind2 = field[3], field[4]
-                            if ind1 == "_":
-                                ind1 = " "
-                            if ind2 == "_":
-                                ind2 = " "
-                            # print field tag
-                            if field_number != field_number_old or field[:-1] != field_old[:-1]:
-                                if format == "marcxml":
-
-                                    if field_number_old != -999:
-                                        if field_old[0:2] == "00":
-                                            out = out + "     </marc:controlfield>\n"
-                                        else:
-                                            out = out + "     </marc:datafield>\n"
-
-                                    if field[0:2] == "00":
-                                        out = "%s     <marc:controlfield tag=\"%s\">\n" % (out, encode_for_xml(field[0:3]))
-                                    else:
-                                        out = "%s     <marc:datafield tag=\"%s\" ind1=\"%s\" ind2=\"%s\">\n" % (out, encode_for_xml(field[0:3]), encode_for_xml(ind1).lower(), encode_for_xml(ind2).lower())
-
-
-                                field_number_old = field_number
-                                field_old = field
-                            # print subfield value
-                            if format == "marcxml":
-                                value = encode_for_xml(value)
-
-                                if(field[0:2] == "00"):
-                                    out = "%s      %s\n" % (out, value)
-                                else:
-                                    out = "%s      <marc:subfield code=\"%s\">%s</marc:subfield>\n" % (out, encode_for_xml(field[-1:]), value)
-
-
-                            # fetch next subfield
-                        # all fields/subfields printed in this run, so close the tag:
-                        if (format == "marcxml") and field_number_old != -999:
-                            if field_old[0:2] == "00":
-                                out = out + "     </marc:controlfield>\n"
-                            else:
-                                out = out + "     </marc:datafield>\n"
-
-                out = out + "    </marc:record>\n"
-
-        elif format == "xd":
-            out += format_record(sysno, 'xoaidc')
-
-    # print record closing tags:
-
-        out = out + "   </metadata>\n"
-
-    out = out + "  </record>\n"
-
-    return out
-
-def oailistmetadataformats(args):
-    "Generates response to oailistmetadataformats verb."
-
-    arg = parse_args(args)
-
-    out = ""
-
-    flag = 1 # list or not depending on identifier
-
-    if arg['identifier'] != "":
-
-        flag = 0
-
-        sysno = oaigetsysno(arg['identifier'])
-        _record_exists = record_exists(sysno)
-        if _record_exists == 1 or \
-               (_record_exists == -1 and CFG_OAI_DELETED_POLICY != "no"):
-
-            flag = 1
-
+                provenance = ''
+            rights_body = get_record_rights(recid)
+            if rights_body:
+                rights = X.about(body=rights_body)
+            else:
+                rights = ''
         else:
+            metadata = ''
+            provenance = ''
+            rights = ''
+        return X.record()(header, metadata, provenance, rights)
 
-            out = out + oai_error("idDoesNotExist","invalid record Identifier")
-            out = oai_error_header(args, "ListMetadataFormats") + out + oai_error_footer("ListMetadataFormats")
-            return out
-
-    if flag:
-        out = out + "   <metadataFormat>\n"
-        out = out + "    <metadataPrefix>oai_dc</metadataPrefix>\n"
-        out = out + "    <schema>http://www.openarchives.org/OAI/1.1/dc.xsd</schema>\n"
-        out = out + "    <metadataNamespace>http://purl.org/dc/elements/1.1/</metadataNamespace>\n"
-        out = out + "   </metadataFormat>\n"
-        out = out + "   <metadataFormat>\n"
-        out = out + "    <metadataPrefix>marcxml</metadataPrefix>\n"
-        out = out + "    <schema>http://www.loc.gov/standards/marcxml/schema/MARC21slim.xsd</schema>\n"
-        out = out + "    <metadataNamespace>http://www.loc.gov/MARC21/slim</metadataNamespace>\n"
-        out = out + "   </metadataFormat>\n"
-
-    out = oai_header(args, "ListMetadataFormats") + out + oai_footer("ListMetadataFormats")
-    return out
-
+def oai_list_metadata_formats(argd):
+    """Generates response to oai_list_metadata_formats verb."""
 
-def oailistrecords(args):
-    "Generates response to oailistrecords verb."
-
-    arg = parse_args(args)
+    if argd.get('identifier'):
+        recid = oai_get_recid(argd['identifier'])
+        _record_exists = record_exists(recid)
+        if _record_exists != 1 and (_record_exists != -1 or CFG_OAI_DELETED_POLICY == "no"):
+            return oai_error(argd, [("idDoesNotExist", "invalid record Identifier: %s" % argd['identifier'])])
 
     out = ""
-    resumptionToken_printed = False
-
-    sysnos = []
-    sysno  = []
-    # check if the resumptionToken did not expire
-    if arg['resumptionToken']:
-        filename = os.path.join(CFG_CACHEDIR, 'RTdata', arg['resumptionToken'])
-        if os.path.exists(filename) == 0:
-            out = oai_error("badResumptionToken", "ResumptionToken expired")
-            out = oai_error_header(args, "ListRecords") + out + oai_error_footer("ListRecords")
-            return out
-
-    if arg['resumptionToken'] != "":
-        sysnos = oaicacheout(arg['resumptionToken'])
-        arg['metadataPrefix'] = sysnos.pop()
+    for prefix, (dummy, schema, namespace) in CFG_OAI_METADATA_FORMATS.items():
+        out += X.metadataFormat()(
+            X.metadataPrefix(prefix),
+            X.schema(schema),
+            X.metadataNamespace(namespace)
+        )
+
+    return oai_header(argd, "ListMetadataFormats") + out + oai_footer("ListMetadataFormats")
+
+def oai_list_records_or_identifiers(req, argd):
+    """Generates response to oai_list_records verb."""
+
+    verb = argd['verb']
+    resumption_token_was_specified = False
+
+    # check if the resumption_token did not expire
+    if argd.get('resumptionToken'):
+        resumption_token_was_specified = True
+        try:
+            cache = oai_cache_load(argd['resumptionToken'])
+            last_recid = cache['last_recid']
+            argd = cache['argd']
+            complete_list = cache['complete_list']
+            complete_list = filter_out_based_on_date_range(complete_list, argd.get('from', ''), argd.get('until', ''))
+        except Exception:
+            register_exception(alert_admin=True)
+            req.write(oai_error(argd, [("badResumptionToken", "ResumptionToken expired or invalid: %s" % argd['resumptionToken'])]))
+            return
     else:
-        sysnos = oaigetsysnolist(arg['set'], arg['from'], arg['until'])
-
-    if len(sysnos) == 0: # noRecordsMatch error
-
-        out = out + oai_error("noRecordsMatch", "no records correspond to the request")
-        out = oai_error_header(args, "ListRecords") + out + oai_error_footer("ListRecords")
-        return out
-
-    i = 0
-    for sysno_ in sysnos:
-        if sysno_:
-            if i >= CFG_OAI_LOAD:          # cache or write?
-                if not resumptionToken_printed: # resumptionToken?
-                    arg['resumptionToken'] = oaigenresumptionToken()
-                    extdate = oaigetresponsedate(CFG_OAI_EXPIRE)
-                    if extdate:
-                        out = "%s <resumptionToken expirationDate=\"%s\">%s</resumptionToken>\n" % (out, extdate, arg['resumptionToken'])
-                    else:
-                        out = "%s <resumptionToken>%s</resumptionToken>\n" % (out, arg['resumptionToken'])
-                    resumptionToken_printed = True
-                sysno.append(sysno_)
-            else:
-                _record_exists = record_exists(sysno_)
-                if not (_record_exists == -1 and CFG_OAI_DELETED_POLICY == "no"):
-                    #Produce output only if record exists and had to be printed
-                    i = i + 1 # Increment limit only if record is returned
-                    res = print_record(sysno_, arg['metadataPrefix'], _record_exists)
-                    if res:
-                        out += res
-
-    if resumptionToken_printed:
-        oaicacheclean()
-        sysno.append(arg['metadataPrefix'])
-        oaicachein(arg['resumptionToken'], sysno)
-
-    out = oai_header(args, "ListRecords") + out + oai_footer("ListRecords")
-    return out
+        last_recid = 0
+        complete_list = oai_get_recid_list(argd.get('set', ""), argd.get('from', ""), argd.get('until', ""))
 
-def oailistsets(args):
-    "Lists available sets for OAI metadata harvesting."
+        if not complete_list: # noRecordsMatch error
+            req.write(oai_error(argd, [("noRecordsMatch", "no records correspond to the request")]))
+            return
+
+    cursor = 0
+    for cursor, recid in enumerate(complete_list):
+        ## Let's fast-forward the cursor to point after the last recid that was
+        ## disseminated successfully
+        if recid > last_recid:
+            break
+
+    req.write(oai_header(argd, verb))
+    for recid in list(complete_list)[cursor:cursor+CFG_OAI_LOAD]:
+        req.write(print_record(recid, argd['metadataPrefix'], verb=verb, set_spec=argd.get('set')))
+
+    if list(complete_list)[cursor+CFG_OAI_LOAD:]:
+        resumption_token = oai_generate_resumption_token(argd.get('set', ''))
+        cache = {
+            'argd': argd,
+            'last_recid': recid,
+            'complete_list': complete_list.fastdump(),
+        }
+        oai_cache_dump(resumption_token, cache)
+        expdate = oai_get_response_date(CFG_OAI_EXPIRE)
+        req.write(X.resumptionToken(expirationDate=expdate, cursor=cursor, completeListSize=len(complete_list))(resumption_token))
+    elif resumption_token_was_specified:
+        ## Since a resumptionToken was used we shall put a last empty resumptionToken
+        req.write(X.resumptionToken(cursor=cursor, completeListSize=len(complete_list))(""))
+    req.write(oai_footer(verb))
+    oai_cache_gc()
+
+def oai_list_sets(argd):
+    """
+    Lists available sets for OAI metadata harvesting.
+    """
 
     out = ""
 
     # note: no flow control in ListSets
-
-    sets = get_sets()
-
+    sets = get_all_sets().values()
+    if not sets:
+        return oai_error(argd, [("noSetHierarchy", "No sets have been configured for this repository")])
     for set_ in sets:
-
-        out = out + "  <set>\n"
-        out = "%s    <setSpec>%s</setSpec>\n" % (out, set_[0])
-        out = "%s    <setName>%s</setName>\n" % (out, set_[1])
+        out += "  <set>\n"
+        out += X.setSpec()(set_[0]) + X.setName()(set_[1])
         if set_[2]:
-            out = "%s    <setDescription>%s</setDescription>\n" % (out, set_[2])
+            out += X.setDescription()(set_[2])
         out = out + "   </set>\n"
 
-    out = oai_header(args, "ListSets") + out + oai_footer("ListSets")
-
-    return out
+    return oai_header(argd, "ListSets") + out + oai_footer("ListSets")
 
 
-def oaigetrecord(args):
+def oai_get_record(argd):
     """Returns record 'identifier' according to 'metadataPrefix' format for OAI metadata harvesting.
 
     - if record does not exist, return oai_error 'idDoesNotExist'.
 
     - if record has been deleted and CFG_OAI_DELETED_POLICY is
       'transient' or 'deleted', then return only header, with status
       'deleted'.
 
     - if record has been deleted and CFG_OAI_DELETED_POLICY is 'no',
       then return oai_error 'idDoesNotExist'.
     """
 
-    arg = parse_args(args)
-    out = ""
-    sysno = oaigetsysno(arg['identifier'])
-    _record_exists = record_exists(sysno)
+    recid = oai_get_recid(argd['identifier'])
+    _record_exists = record_exists(recid)
     if _record_exists == 1 or \
            (_record_exists == -1 and CFG_OAI_DELETED_POLICY != 'no'):
-        out = print_record(sysno, arg['metadataPrefix'], _record_exists)
-        out = oai_header(args, "GetRecord") + out + oai_footer("GetRecord")
-    else:
-        out = oai_error("idDoesNotExist", "invalid record Identifier")
-        out = oai_error_header(args, "GetRecord") + out + oai_error_footer("GetRecord")
-    return out
-
-def oailistidentifiers(args):
-    "Prints OAI response to the ListIdentifiers verb."
-
-    arg = parse_args(args)
-
-    out = ""
-    resumptionToken_printed = False
-
-    sysno  = []
-    sysnos = []
-
-    if arg['resumptionToken']:
-        filename = os.path.join(CFG_CACHEDIR, 'RTdata', arg['resumptionToken'])
-        if os.path.exists(filename) == 0:
-            out = out + oai_error("badResumptionToken", "ResumptionToken expired")
-            out = oai_error_header(args, "ListIdentifiers") + out + oai_error_footer("ListIdentifiers")
-            return out
-
-    if arg['resumptionToken']:
-        sysnos = oaicacheout(arg['resumptionToken'])
+        out = print_record(recid, argd['metadataPrefix'], _record_exists)
+        out = oai_header(argd, "GetRecord") + out + oai_footer("GetRecord")
     else:
-        sysnos = oaigetsysnolist(arg['set'], arg['from'], arg['until'])
-
-    if len(sysnos) == 0: # noRecordsMatch error
-        out = out + oai_error("noRecordsMatch", "no records correspond to the request")
-        out = oai_error_header(args, "ListIdentifiers") + out + oai_error_footer("ListIdentifiers")
-        return out
-
-    i = 0
-    for sysno_ in sysnos:
-        if sysno_:
-            if i >= CFG_OAI_LOAD:           # cache or write?
-                if not resumptionToken_printed: # resumptionToken?
-                    arg['resumptionToken'] = oaigenresumptionToken()
-                    extdate = oaigetresponsedate(CFG_OAI_EXPIRE)
-                    if extdate:
-                        out = "%s  <resumptionToken expirationDate=\"%s\">%s</resumptionToken>\n" % (out, extdate, arg['resumptionToken'])
-                    else:
-                        out = "%s  <resumptionToken>%s</resumptionToken>\n" % (out, arg['resumptionToken'])
-                    resumptionToken_printed = True
-                sysno.append(sysno_)
-            else:
-                _record_exists = record_exists(sysno_)
-                if (not _record_exists == -1 and CFG_OAI_DELETED_POLICY == "no"):
-                    i = i + 1 # Increment limit only if record is returned
-                for ident in get_field(sysno_, CFG_OAI_ID_FIELD):
-                    if ident != '':
-                        if _record_exists == -1: #Deleted?
-                            if CFG_OAI_DELETED_POLICY == "persistent" \
-                                   or CFG_OAI_DELETED_POLICY == "transient":
-                                out = out + "    <header status=\"deleted\">\n"
-                            else:
-                                # In that case, print nothing (do not go further)
-                                break
-                        else:
-                            out = out + "    <header>\n"
-                        out = "%s      <identifier>%s</identifier>\n" % (out, escape_space(ident))
-                        out = "%s      <datestamp>%s</datestamp>\n" % (out, get_modification_date(oaigetsysno(ident)))
-                        for set in get_field(sysno_, CFG_OAI_SET_FIELD):
-                            if set:
-                                # Print only if field not empty
-                                out = "%s      <setSpec>%s</setSpec>\n" % (out, set)
-                        out = out + "    </header>\n"
-
-    if resumptionToken_printed:
-        oaicacheclean() # clean cache from expired resumptionTokens
-        oaicachein(arg['resumptionToken'], sysno)
-
-    out = oai_header(args, "ListIdentifiers") + out + oai_footer("ListIdentifiers")
-
+        return oai_error(argd, [("idDoesNotExist", "invalid record Identifier: %s" % argd['identifier'])])
     return out
 
 
-def oaiidentify(args, script_url):
-    """Generates a response to oaiidentify verb.
 
-    Parameters:
-           args - *dict* query parameters
+def oai_identify(argd):
+    """Generates a response to oai_identify verb.
 
      script_url - *str* URL of the script used to access the
                   service. This is made necessary since the gateway
                   can be accessed either via /oai2d or /oai2d/ (or for
                   backward compatibility: oai2d.py or oai2d.py/), and
                   that the base URL must be returned in the Identify
                   response
     """
 
-    out = """  <repositoryName>%(CFG_SITE_NAME)s</repositoryName>
-    <baseURL>%(CFG_SITE_URL)s%(script_url)s</baseURL>
-    <protocolVersion>2.0</protocolVersion>
-    <adminEmail>%(CFG_SITE_SUPPORT_EMAIL)s</adminEmail>
-    <earliestDatestamp>%(earliest_datestamp)s</earliestDatestamp>
-    <deletedRecord>%(CFG_OAI_DELETED_POLICY)s</deletedRecord>
-    <granularity>%(granularity)s</granularity>
-    %(compression)s
-    %(CFG_OAI_IDENTIFY_DESCRIPTION)s\n""" % \
-        {"CFG_SITE_NAME": cgi.escape(CFG_SITE_NAME),
-         "CFG_SITE_URL": cgi.escape(CFG_SITE_URL),
-         "earliest_datestamp": cgi.escape(get_earliest_datestamp()),
-         "granularity": "YYYY-MM-DDThh:mm:ssZ",
-         "CFG_OAI_DELETED_POLICY": cgi.escape(CFG_OAI_DELETED_POLICY),
-         "CFG_SITE_SUPPORT_EMAIL": cgi.escape(CFG_SITE_SUPPORT_EMAIL),
-         "CFG_OAI_IDENTIFY_DESCRIPTION": CFG_OAI_IDENTIFY_DESCRIPTION,
-         "compression": CFG_WEBSTYLE_HTTP_USE_COMPRESSION and "<compression>deflate</compression>" or "",
-         "script_url": script_url}
-
-    out = oai_header(args, "Identify") + out + oai_footer("Identify")
+    out = X.repositoryName()(CFG_SITE_NAME)
+    out += X.baseURL()(CFG_SITE_URL + '/oai2d')
+    out += X.protocolVersion()("2.0")
+    out += X.adminEmail()(CFG_SITE_SUPPORT_EMAIL)
+    out += X.earliestDatestamp()(get_earliest_datestamp())
+    out += X.deletedRecord()(CFG_OAI_DELETED_POLICY)
+    out += X.granularity()("YYYY-MM-DDThh:mm:ssZ")
+    if CFG_WEBSTYLE_HTTP_USE_COMPRESSION:
+        out += X.compression()('deflate')
+    out += X.description("""<oai-identifier xmlns="http://www.openarchives.org/OAI/2.0/oai-identifier"
+                   xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+                   xsi:schemaLocation="http://www.openarchives.org/OAI/2.0/oai-identifier
+                                       http://www.openarchives.org/OAI/2.0/oai-identifier.xsd">""" +
+                X.scheme()("oai") +
+                X.repositoryIdentifier()(CFG_OAI_ID_PREFIX) +
+                X.delimiter()(":") +
+                X.sampleIdentifier()(CFG_OAI_SAMPLE_IDENTIFIER) +
+                """</oai-identifier>""")
+    out += CFG_OAI_IDENTIFY_DESCRIPTION
+    if CFG_OAI_FRIENDS:
+        friends = """<friends xmlns="http://www.openarchives.org/OAI/2.0/friends/"
+    xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+    xsi:schemaLocation="http://www.openarchives.org/OAI/2.0/friends/
+      http://www.openarchives.org/OAI/2.0/friends.xsd">"""
+        for baseurl in CFG_OAI_FRIENDS:
+            friends += X.baseURL()(baseurl)
+        friends += """</friends>"""
+        out += X.description(friends)
+
+    out = oai_header(argd, "Identify") + out + oai_footer("Identify")
 
     return out
 
-def oaigetrequesturl(args):
-    "Generates requesturl tag for OAI."
-
-    # re_amp = re.compile('&')
+def get_utc_now():
+    """
+    Return current UTC time in the OAI-PMH format.
+    """
+    return datetime.datetime.utcnow().strftime('%Y-%m-%dT%H:%M:%SZ')
 
-    requesturl = CFG_SITE_URL + "/" + "oai2d/"# + "?" + re_amp.sub("&amp;", args)
+def oai_build_request_element(argd=None):
+    """
+    Build the request tag.
+    """
+    if argd is None:
+        argd = {}
+    return X.responseDate()(get_utc_now()) + X.request(**argd)("%s/oai2d" % CFG_SITE_URL)
 
+def oai_get_request_url():
+    """Generates requesturl tag for OAI."""
+    requesturl = CFG_SITE_URL + "/oai2d"
     return requesturl
 
-def oaigetresponsedate(delay=0):
-    "Generates responseDate tag for OAI."
-
+def oai_get_response_date(delay=0):
+    """Generates responseDate tag for OAI."""
     return time.strftime("%Y-%m-%dT%H:%M:%SZ", time.gmtime(time.time() + delay))
 
-
-def oai_error(code, msg):
-    "OAI error occured"
-
-    return "<error code=\"%s\">%s</error>\n" % (code, msg)
-
-
-def oaigetsysno(identifier):
-    "Returns the first database BIB ID for the OAI identifier 'identifier', if it exists."
-    sysno = None
+def oai_get_recid(identifier):
+    """Returns the first database BIB ID for the OAI identifier 'identifier', if it exists."""
+    recid = None
     if identifier:
         query = "SELECT DISTINCT(bb.id_bibrec) FROM bib%sx AS bx, bibrec_bib%sx AS bb WHERE bx.tag=%%s AND bb.id_bibxxx=bx.id AND bx.value=%%s" % (CFG_OAI_ID_FIELD[0:2], CFG_OAI_ID_FIELD[0:2])
         res = run_sql(query, (CFG_OAI_ID_FIELD, identifier))
         for row in res:
-            sysno = row[0]
-    return sysno
-
-
-def oaigetsysnolist(set="", fromdate="", untildate=""):
-    "Returns list of system numbers for the OAI set 'set', modified from 'fromdate' until 'untildate'."
-    from invenio.oai_repository_updater import get_set_definitions
+            recid = row[0]
+    return recid
 
+def filter_out_based_on_date_range(recids, fromdate="", untildate=""):
+    """ Filter out recids based on date range."""
     if fromdate != "":
         fromdate = normalize_date(fromdate, "T00:00:00Z")
     else:
         fromdate = get_earliest_datestamp()
+    fromdate = utc_to_localtime(fromdate)
 
     if untildate != "":
         untildate = normalize_date(untildate, "T23:59:59Z")
     else:
         untildate = get_latest_datestamp()
 
-    collections = []
-    for set_definition in get_set_definitions(set):
-        collections.extend(coll.strip() for coll in set_definition['c'].split(','))
-    recids = perform_request_search(f1=CFG_OAI_ID_FIELD, p1="oai:*", m1="e", op1='a',
-                                    f2=((set and CFG_OAI_SET_FIELD) or ""), p2=set, m2="e",
-                                    d1=utc_to_localtime(fromdate),
-                                    d2=utc_to_localtime(untildate),
-                                    c=collections,
-                                    dt='m',
-                                    ap=0)
-    ## Let's discard non public records
-    return list(intbitset(recids) - get_all_restricted_recids())
-
-def oaigenresumptionToken():
-    "Generates unique ID for resumption token management."
-
-    return md5(str(time.time())).hexdigest()
-
-
-def oaicachein(resumptionToken, sysnos):
-    "Stores or adds sysnos in cache.  Input is a string of sysnos separated by commas."
+    untildate = utc_to_localtime(untildate)
 
-    filename = os.path.join(CFG_CACHEDIR, 'RTdata', resumptionToken)
+    recids = intbitset(recids) ## Let's clone :-)
 
-    fil = open(filename, "w")
-    cPickle.dump(sysnos, fil)
-    fil.close()
-    return 1
+    if fromdate and untildate:
+        recids &= intbitset(run_sql("SELECT id FROM bibrec WHERE modification_date BETWEEN %s AND %s", (fromdate, untildate)))
+    elif fromdate:
+        recids &= intbitset(run_sql("SELECT id FROM bibrec WHERE modification_date >= %s", (fromdate, )))
+    elif untildate:
+        recids &= intbitset(run_sql("SELECT id FROM bibrec WHERE modification_date <= %s", (untildate, )))
+    return recids - get_all_restricted_recids()
 
-
-def oaicacheout(resumptionToken):
-    "Restores string of comma-separated system numbers from cache."
-
-    sysnos = []
-
-    filename = os.path.join(CFG_CACHEDIR, 'RTdata', resumptionToken)
-
-    if oaicachestatus(resumptionToken):
-        fil = open(filename, "r")
-        sysnos = cPickle.load(fil)
-        fil.close()
+def oai_get_recid_list(set_spec="", fromdate="", untildate=""):
+    """
+    Returns list of recids for the OAI set 'set', modified from 'fromdate' until 'untildate'.
+    """
+    ret = intbitset()
+    if not set_spec:
+        ret |= search_unit_in_bibxxx(p='*', f=CFG_OAI_SET_FIELD, type='e')
+        if CFG_OAI_DELETED_POLICY != 'no':
+            ret |= search_unit_in_bibxxx(p='*', f=CFG_OAI_PREVIOUS_SET_FIELD, type='e')
     else:
-        return 0
-    return sysnos
-
-
-def oaicacheclean():
-    "Removes cached resumptionTokens older than specified"
-
-    directory = os.path.join(CFG_CACHEDIR, 'RTdata')
+        ret |= search_unit_in_bibxxx(p=set_spec, f=CFG_OAI_SET_FIELD, type='e')
+        ret |= search_unit_in_bibxxx(p='%s:*' % set_spec, f=CFG_OAI_SET_FIELD, type='e')
+        if CFG_OAI_DELETED_POLICY != 'no':
+            ret |= search_unit_in_bibxxx(p=set_spec, f=CFG_OAI_PREVIOUS_SET_FIELD, type='e')
+            ret |= search_unit_in_bibxxx(p='%s:*' % set_spec, f=CFG_OAI_PREVIOUS_SET_FIELD, type='e')
+    if CFG_OAI_DELETED_POLICY == 'no':
+        ret -= search_unit_in_bibxxx(p='DELETED', f='980__%', type='e')
+        if CFG_CERN_SITE:
+            ret -= search_unit_in_bibxxx(p='DUMMY', f='980__%', type='e')
+    return filter_out_based_on_date_range(ret, fromdate, untildate)
+
+def oai_generate_resumption_token(set_spec):
+    """Generates unique ID for resumption token management."""
+    fd, name = tempfile.mkstemp(dir=os.path.join(CFG_CACHEDIR, 'RTdata'), prefix='%s___' % set_spec)
+    os.close(fd)
+    return os.path.basename(name)
+
+def oai_delete_resumption_tokens_for_set(set_spec):
+    """
+    In case a set is modified by the admin interface, this will delete
+    any resumption token that is now invalid.
+    """
+    aset = set_spec
+    while aset:
+        for name in iglob(os.path.join(CFG_CACHEDIR, 'RTdata', '%s___*' % set_spec)):
+            os.remove(name)
+        aset = aset.rsplit(":", 1)[0]
+    for name in iglob(os.path.join(CFG_CACHEDIR, 'RTdata', '___*')):
+        os.remove(name)
+
+def oai_cache_dump(resumption_token, cache):
+    """
+    Given a resumption_token and the cache, stores the cache.
+    """
+    cPickle.dump(cache, open(os.path.join(CFG_CACHEDIR, 'RTdata', resumption_token), 'w'), -1)
 
-    files = os.listdir(directory)
+def oai_cache_load(resumption_token):
+    """
+    Restores the cache from the resumption_token.
+    """
+    fullpath = os.path.join(CFG_CACHEDIR, 'RTdata', resumption_token)
+    if os.path.dirname(os.path.abspath(fullpath)) != os.path.abspath(os.path.join(CFG_CACHEDIR, 'RTdata')):
+        raise ValueError("Invalid path")
+    return cPickle.load(open(fullpath))
 
-    for file_ in files:
-        filename = os.path.join(directory, file_)
+def oai_cache_gc():
+    """
+    OAI Cache Garbage Collector.
+    """
+    for file_ in os.listdir(os.path.join(CFG_CACHEDIR, 'RTdata')):
+        filename = os.path.join(os.path.join(CFG_CACHEDIR, 'RTdata', file_))
         # cache entry expires when not modified during a specified period of time
         if ((time.time() - os.path.getmtime(filename)) > CFG_OAI_EXPIRE):
             try:
                 os.remove(filename)
             except OSError, e:
                 # Most probably the cache was already deleted
                 pass
-    return 1
-
-
-def oaicachestatus(resumptionToken):
-    "Checks cache status.  Returns 0 for empty, 1 for full."
 
-    filename = os.path.join(CFG_CACHEDIR, 'RTdata', resumptionToken)
-
-    if os.path.exists(filename):
-        if os.path.getsize(filename) > 0:
-            return 1
-        else:
-            return 0
-    else:
-        return 0
-
-
-def get_sets():
-    "Returns list of sets."
-    # TODO: Try to remove dependency on oaiREPOSITORY table, by
-    # determining available sets from data.
-
-    out = {}
-    row = ['', '']
-
-    query = "SELECT setSpec,setName,setDescription FROM oaiREPOSITORY"
-    res = run_sql(query)
+def get_all_sets():
+    """
+    Return all the sets.
+    """
+    res = run_sql("SELECT setSpec, setName, setDescription FROM oaiREPOSITORY")
+    ret = {}
     for row in res:
-        row_bis = [row[0], row[1], row[2]]
-        out[row[0]] = row_bis
-
-    return out.values()
+        ret[row[0]] = row
 
+    ## Let's expand with all the set that exist in the DB
+    for a_set in get_all_field_values(CFG_OAI_SET_FIELD):
+        if a_set not in ret:
+            ret[a_set] = (a_set, a_set, '')
 
-def parse_args(args=""):
-    "Parse input args"
+    ## Let's expand with all the supersets
+    for a_set in ret.keys():
+        while ':' in a_set:
+            try:
+                a_set = a_set.rsplit(":", 1)[0]
+            except AttributeError:
+                a_set = ':'.join(a_set.split(":")[:-1])
+            if a_set not in ret:
+                ret[a_set] = (a_set, a_set, '')
 
-    out_args = {
-       "verb"             : "",
-       "metadataPrefix"   : "",
-       "from"             : "",
-       "until"            : "",
-       "set"              : "",
-       "identifier"       : "",
-       "resumptionToken"  : ""
-    }
+    if CFG_OAI_REPOSITORY_GLOBAL_SET_SPEC in ret:
+        ## Let's remove the special global set
+        del ret[CFG_OAI_REPOSITORY_GLOBAL_SET_SPEC]
 
-    if args == "" or args is None:
-        pass
-    else:
-
-        list_of_arguments = args.split('&')
-
-        for item in list_of_arguments:
-            keyvalue = item.split('=')
-            if len(keyvalue) == 2:
-                if (out_args.has_key(keyvalue[0])):
-                    if(out_args[keyvalue[0]] != ""):
-                        out_args[keyvalue[0]] = "Error"
-                    else:
-                        out_args[keyvalue[0]] = urllib.unquote(keyvalue[1])
-                else:
-                    out_args[keyvalue[0]] = urllib.unquote(keyvalue[1])
-            else:
-                out_args['verb'] = ""
+    if '' in ret:
+        ## '' is not a valid setSpec but might be in the MARC
+        del ret['']
 
-    return out_args
+    return ret
 
-def check_argd(arguments):
+def check_argd(argd):
     """
     Check OAI arguments
     Also transform them from lists to strings.
     """
-
-    out = ""
-
-## no several times the same argument
-#
-#
-    for param, value in arguments.iteritems():
-        if len(value) > 1 and not 'The request includes illegal arguments' in out:
-            out = out + oai_error("badArgument", "The request includes illegal arguments")
-            bad_arguments_error = True
+    errors = []
+
+    ## no several times the same argument
+    bad_arguments_error = False
+    for param, value in argd.iteritems():
+        if len(value) > 1 and not bad_arguments_error:
+            errors.append(("badArgument", "More than one value specified for the %s argument: %s" % (param, value)))
+            bad_arguments_error = True ## This is needed only once
         if len(value) > 0:
-            arguments[param] = value[0]
+            argd[param] = value[0]
         else:
-            arguments[param] = ''
+            argd[param] = ''
 
-## principal argument required
-#
-#
-    if verbs.has_key(arguments['verb']):
-        pass
-    else:
-        out = out + oai_error("badVerb", "Illegal OAI verb")
-
-## defined args
-#
-#
-    for param in arguments.keys():
-        if not param in verbs.get(arguments['verb'], []) and param != 'verb' \
-               and not 'The request includes illegal arguments' in out:
-            out = out + oai_error("badArgument", "The request includes illegal arguments")
+    ## principal argument required
+    if argd['verb'] not in CFG_VERBS:
+        errors.append(("badVerb", "Illegal OAI verb: %s" % argd['verb']))
+
+    ## defined argd
+    for param in argd.keys():
+        if not param in CFG_VERBS.get(argd['verb'], []) and param != 'verb' \
+               and not bad_arguments_error:
+            errors.append(("badArgument", "The request includes illegal arguments for the given verb: %s" % param))
+            bad_arguments_error = True
             break # Indicate only once
 
-## resumptionToken exclusive
-#
-#
-    if arguments.get('resumptionToken', '') != "" and \
-           len(arguments.keys()) != 2 and \
-           not 'The request includes illegal arguments' in out:
-        out = out + oai_error("badArgument",
-                              "The request includes illegal arguments")
-
-## resumptionToken not empty when defined
-#
-#
-    if arguments.get('resumptionToken', None) == '':
-        out = out + oai_error("badResumptionToken",
-                              "ResumptionToken invalid")
-
-## datestamp formats
-#
-#
-    if arguments.has_key('from') and \
-           'from' in verbs.get(arguments['verb'], []):
-        from_length = len(arguments['from'])
-        if check_date(arguments['from']) == "":
-            out = out + oai_error("badArgument",
-                                  "Bad datestamp format in from")
+    ## resumptionToken exclusive
+    if argd.get('resumptionToken', '') != "" and \
+           len(argd.keys()) != 2 and not bad_arguments_error:
+        errors.append(("badArgument", "The resumptionToken was specified together with other arguments"))
+        bad_arguments_error = True
+
+    if argd.get('resumptionToken', None) == '':
+        errors.append(("badResumptionToken", "ResumptionToken invalid: %s" % argd.get('resumptionToken', None)))
+
+    ## datestamp formats
+    if argd.has_key('from') and \
+           'from' in CFG_VERBS.get(argd['verb'], []):
+        from_length = len(argd['from'])
+        if check_date(argd['from']) == "":
+            errors.append(("badArgument", "Bad datestamp format in from: %s" % argd['from']))
     else:
         from_length = 0
 
-    if arguments.has_key('until') and \
-           'until' in verbs.get(arguments['verb'], []):
-        until_length = len(arguments['until'])
-        if check_date(arguments['until']) == "":
-            out = out + oai_error("badArgument",
-                                  "Bad datestamp format in until")
+    if argd.has_key('until') and \
+           'until' in CFG_VERBS.get(argd['verb'], []):
+        until_length = len(argd['until'])
+        if check_date(argd['until']) == "":
+            errors.append(("badArgument", "Bad datestamp format in until: %s" % argd['until']))
     else:
         until_length = 0
 
     if from_length != 0:
         if until_length != 0:
             if from_length != until_length:
-                out = out + oai_error("badArgument",
-                                      "Bad datestamp format")
-
-    if arguments.has_key('from') and arguments.has_key('until') \
-           and arguments['from'] > arguments['until'] and \
-           'from' in verbs.get(arguments['verb'], []) and \
-           'until' in verbs.get(arguments['verb'], []):
-        out = out + oai_error("badArgument", "Wrong date")
-
-## Identify exclusive
-#
-#
-    if arguments['verb'] == "Identify" and \
-           len(arguments.keys()) != 1:
-        if not 'The request includes illegal arguments' in out: # Do not repeat this error
-            out = out + oai_error("badArgument",
-                                  "The request includes illegal arguments")
-
-## parameters for GetRecord
-#
-#
-    if arguments['verb'] == "GetRecord" and \
-           not arguments.has_key('identifier'):
-        out = out + oai_error("badArgument",
-                              "Record identifier missing")
-
-    if arguments['verb'] == "GetRecord" and \
-           not arguments.has_key('metadataPrefix'):
-        out = out + oai_error("badArgument",
-                              "Missing metadataPrefix")
-
-## parameters for ListRecords and ListIdentifiers
-#
-#
-    if (arguments['verb'] == "ListRecords" or arguments['verb'] == "ListIdentifiers") and \
-           (not arguments.has_key('resumptionToken') and not arguments.has_key('metadataPrefix')):
-        out = out + oai_error("badArgument", "Missing metadataPrefix")
-
-## Metadata prefix defined and valid
-#
-#
-    if arguments.has_key('metadataPrefix') and \
-           not arguments['metadataPrefix'] in params['metadataPrefix']:
-        out = out + oai_error("cannotDisseminateFormat", "Chosen format is not supported")
+                errors.append(("badArgument", "From and until have two different formats: %s Vs. %s" % (from_length, until_length)))
+
+    if argd.has_key('from') and argd.has_key('until') \
+           and argd['from'] > argd['until'] and \
+           'from' in CFG_VERBS.get(argd['verb'], []) and \
+           'until' in CFG_VERBS.get(argd['verb'], []):
+        errors.append(("badArgument", "from argument comes after until argument: %s > %s" % (argd['from'], argd['until'])))
+
+    ## Identify exclusive
+    if argd['verb'] == "Identify" and \
+           len(argd.keys()) != 1:
+        if not bad_arguments_error: # Do not repeat this error
+            errors.append(("badArgument", "The request includes illegal arguments"))
+            bad_arguments_error = True
 
-    return out
+    ## parameters for GetRecord
+    if argd['verb'] == "GetRecord" and \
+           not argd.has_key('identifier'):
+        errors.append(("badArgument", "Record identifier missing"))
+
+    if argd['verb'] == "GetRecord" and \
+           not argd.has_key('metadataPrefix'):
+        errors.append(("badArgument", "Missing metadataPrefix"))
+
+    ## parameters for ListRecords and ListIdentifiers
+    if (argd['verb'] == "ListRecords" or argd['verb'] == "ListIdentifiers") and \
+           (not argd.has_key('resumptionToken') and not argd.has_key('metadataPrefix')):
+        errors.append(("badArgument", "Missing metadataPrefix"))
+
+    ## Metadata prefix defined and valid
+    if argd.has_key('metadataPrefix') and \
+           not argd['metadataPrefix'] in CFG_OAI_METADATA_FORMATS:
+        errors.append(("cannotDisseminateFormat", "Chosen format is not supported. Valid formats are: %s" % ', '.join(CFG_OAI_METADATA_FORMATS.keys())))
+
+    return errors
 
 def oai_profile():
     """
     Runs a benchmark
     """
-    oailistrecords('set=&from=&metadataPrefix=oai_dc&verb=ListRecords&resumptionToken=&identifier=&until=')
-    #oailistrecords('set=&from=&metadataPrefix=marcxml&verb=ListRecords&resumptionToken=&identifier=&until=')
-    #oailistidentifiers('set=&from=&metadataPrefix=oai_dc&verb=ListIdentifiers&resumptionToken=&identifier=&until=')
-
+    from cStringIO import StringIO
+    oai_list_records_or_identifiers(StringIO(), argd={"metadataPrefix": "oai_dc", "verb": "ListRecords"})
+    oai_list_records_or_identifiers(StringIO(), argd={"metadataPrefix": "marcxml", "verb" :"ListRecords"})
+    oai_list_records_or_identifiers(StringIO(), argd={"metadataPrefix": "oai_dc", "verb": "ListIdentifiers"})
     return
 
 if __name__ == "__main__":
     import profile
     import pstats
     profile.run('oai_profile()', "oai_profile")
     p = pstats.Stats("oai_profile")
     p.strip_dirs().sort_stats("cumulative").print_stats()
diff --git a/modules/bibharvest/lib/oai_repository_tests.py b/modules/bibharvest/lib/oai_repository_tests.py
index 5c34c4523..04eb0ccf7 100644
--- a/modules/bibharvest/lib/oai_repository_tests.py
+++ b/modules/bibharvest/lib/oai_repository_tests.py
@@ -1,94 +1,87 @@
 # -*- coding: utf-8 -*-
 ## Invenio OAI repository unit tests.
 ##
 ## This file is part of Invenio.
 ## Copyright (C) 2005, 2006, 2007, 2008, 2009, 2010, 2011 CERN.
 ##
 ## Invenio is free software; you can redistribute it and/or
 ## modify it under the terms of the GNU General Public License as
 ## published by the Free Software Foundation; either version 2 of the
 ## License, or (at your option) any later version.
 ##
 ## Invenio is distributed in the hope that it will be useful, but
 ## WITHOUT ANY WARRANTY; without even the implied warranty of
 ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 ## General Public License for more details.
 ##
 ## You should have received a copy of the GNU General Public License
 ## along with Invenio; if not, write to the Free Software Foundation, Inc.,
 ## 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA.
 
 """Unit tests for the oai repository."""
 
 __revision__ = "$Id$"
 
 import unittest
 import re
 
+from cStringIO import StringIO
+
 from invenio import oai_repository_server
 from invenio.testutils import make_test_suite, run_test_suite
 
 class TestVerbs(unittest.TestCase):
     """Test for OAI verb functionality."""
 
     def test_verbs(self):
         """oairepository - testing verbs"""
-        self.assertNotEqual(None, re.search("Identify", oai_repository_server.oaiidentify("", None)))
-        self.assertNotEqual(None, re.search("ListIdentifiers", oai_repository_server.oailistidentifiers("")))
-        self.assertNotEqual(None, re.search("ListRecords", oai_repository_server.oailistrecords("")))
-        self.assertNotEqual(None, re.search("ListMetadataFormats", oai_repository_server.oailistmetadataformats("")))
-        self.assertNotEqual(None, re.search("ListSets", oai_repository_server.oailistsets("")))
-        self.assertNotEqual(None, re.search("GetRecord", oai_repository_server.oaigetrecord("")))
+        self.assertNotEqual(None, re.search("Identify", oai_repository_server.oai_identify({'verb': 'Identify'})))
+        ret = StringIO()
+        oai_repository_server.oai_list_records_or_identifiers(ret, {'verb': 'ListIdentifiers', 'metadataPrefix': 'marcxml'})
+        self.assertNotEqual(None, re.search("ListIdentifiers", ret.getvalue()))
+        ret = StringIO()
+        oai_repository_server.oai_list_records_or_identifiers(ret, {'verb': 'ListRecords', 'metadataPrefix': 'marcxml'})
+        self.assertNotEqual(None, re.search("ListRecords", ret.getvalue()))
+        self.assertNotEqual(None, re.search("ListMetadataFormats", oai_repository_server.oai_list_metadata_formats({'verb': 'ListMetadataFormats'})))
+        self.assertNotEqual(None, re.search("ListSets", oai_repository_server.oai_list_sets({'verb': 'ListSets'})))
+        self.assertNotEqual(None, re.search("GetRecord", oai_repository_server.oai_get_record({'identifier': 'oai:atlantis.cern.ch:1', 'verb': 'GetRecord'})))
 
 
 class TestErrorCodes(unittest.TestCase):
     """Test for handling OAI error codes."""
 
     def test_issue_error_identify(self):
         """oairepository - testing error codes"""
 
-        self.assertNotEqual(None, re.search("badVerb", oai_repository_server.check_argd({'verb':"IllegalVerb"})))
-        self.assertNotEqual(None, re.search("badArgument", oai_repository_server.check_argd({'verb':"Identify",
-                                                                                      'test':"test"})))
-        self.assertNotEqual(None, re.search("badArgument", oai_repository_server.check_argd({'verb':"ListIdentifiers",
+        self.assertNotEqual([], [code for (code, dummy_text) in oai_repository_server.check_argd({'verb':"IllegalVerb"}) if code == 'badVerb'])
+        self.assertNotEqual([], [code for (code, dummy_text) in oai_repository_server.check_argd({'verb':"Identify",
+                                                                                      'test':"test"}) if code == 'badArgument'])
+        self.assertNotEqual([], [code for (code, dummy_text) in oai_repository_server.check_argd({'verb':"ListIdentifiers",
                                                                                       'metadataPrefix':"oai_dc",
                                                                                       'from':"some_random_date",
-                                                                                      'until':"some_random_date"})))
-        self.assertNotEqual(None, re.search("badArgument", oai_repository_server.check_argd({'verb':"ListIdentifiers",
+                                                                                      'until':"some_random_date"}) if code == 'badArgument'])
+        self.assertNotEqual([], [code for (code, dummy_text) in oai_repository_server.check_argd({'verb':"ListIdentifiers",
                                                                                       'metadataPrefix':"oai_dc",
                                                                                       'from':"2001-01-01",
-                                                                                      'until':"2002-01-01T00:00:00Z"})))
-        self.assertNotEqual(None, re.search("badArgument", oai_repository_server.check_argd({'verb':"ListIdentifiers"})))
-        self.assertNotEqual(None, re.search("cannotDisseminateFormat", oai_repository_server.check_argd({'verb':"ListIdentifiers",
-                                                                                                  'metadataPrefix':"illegal_mdp"})))
+                                                                                      'until':"2002-01-01T00:00:00Z"}) if code == 'badArgument'])
+        self.assertNotEqual([], [code for (code, dummy_text) in oai_repository_server.check_argd({'verb':"ListIdentifiers"}) if code == 'badArgument'])
+        self.assertNotEqual([], [code for (code, dummy_text) in oai_repository_server.check_argd({'verb':"ListIdentifiers",
+                                                                                                  'metadataPrefix':"illegal_mdp"}) if code == 'cannotDisseminateFormat'])
 
-        self.assertNotEqual(None, re.search("badArgument", oai_repository_server.check_argd({'verb':"ListIdentifiers",
+        self.assertNotEqual([], [code for (code, dummy_text) in oai_repository_server.check_argd({'verb':"ListIdentifiers",
                                                                                       'metadataPrefix':"oai_dc",
-                                                                                      'metadataPrefix':"oai_dc"})))
-        self.assertNotEqual(None, re.search("badArgument", oai_repository_server.check_argd({'verb':"ListRecords",
+                                                                                      'metadataPrefix':"oai_dc"}) if code == 'badArgument'])
+        self.assertNotEqual([], [code for (code, dummy_text) in oai_repository_server.check_argd({'verb':"ListRecords",
                                                                                       'metadataPrefix':"oai_dc",
                                                                                       'set':"really_wrong_set",
                                                                                       'from':"some_random_date",
-                                                                                      'until':"some_random_date"})))
-        self.assertNotEqual(None, re.search("badArgument", oai_repository_server.check_argd({'verb':"ListRecords"})))
-
-        self.assertNotEqual(None, re.search("badResumptionToken", oai_repository_server.check_argd({'verb': 'ListRecords', 'resumptionToken': ''})))
-
-class TestEncodings(unittest.TestCase):
-    """Test for OAI response encodings."""
-
-    def test_encoding(self):
-        """oairepository - testing encodings"""
-
-        self.assertEqual("&lt;&amp;>", oai_repository_server.encode_for_xml("<&>"))
-        self.assertEqual("%20", oai_repository_server.escape_space(" "))
-        self.assertEqual("%25%20%3F%23%3D%26%2F%3A%3B%2B", oai_repository_server.encode_for_url("% ?#=&/:;+"))
-
+                                                                                      'until':"some_random_date"}) if code == 'badArgument'])
+        self.assertNotEqual([], [code for (code, dummy_text) in oai_repository_server.check_argd({'verb':"ListRecords"}) if code == 'badArgument'])
 
+        self.assertNotEqual([], [code for (code, dummy_text) in oai_repository_server.check_argd({'verb': 'ListRecords', 'resumptionToken': ''}) if code == 'badResumptionToken'])
 
 TEST_SUITE = make_test_suite(TestVerbs,
-                             TestErrorCodes,
-                             TestEncodings,)
+                             TestErrorCodes)
 
 if __name__ == "__main__":
     run_test_suite(TEST_SUITE)
diff --git a/modules/bibharvest/lib/oai_repository_updater.py b/modules/bibharvest/lib/oai_repository_updater.py
index c3110d7ce..52796dabb 100644
--- a/modules/bibharvest/lib/oai_repository_updater.py
+++ b/modules/bibharvest/lib/oai_repository_updater.py
@@ -1,543 +1,512 @@
 ## This file is part of Invenio.
 ## Copyright (C) 2009, 2010, 2011 CERN.
 ##
 ## Invenio is free software; you can redistribute it and/or
 ## modify it under the terms of the GNU General Public License as
 ## published by the Free Software Foundation; either version 2 of the
 ## License, or (at your option) any later version.
 ##
 ## Invenio is distributed in the hope that it will be useful, but
 ## WITHOUT ANY WARRANTY; without even the implied warranty of
 ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 ## General Public License for more details.
 ##
 ## You should have received a copy of the GNU General Public License
 ## along with Invenio; if not, write to the Free Software Foundation, Inc.,
 ## 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA.
 
 """OAI Repository administration tool -
 
    Updates the metadata of the records to include OAI identifiers and
    OAI SetSpec according to the settings defined in OAI Repository
    admin interface
 
 """
 
-__revision__ = "$Id$"
-
 import os
 import sys
 import time
 
 if sys.hexversion < 0x2040000:
     # pylint: disable=W0622
     from sets import Set as set
     # pylint: enable=W0622
 
 from tempfile import mkstemp
+from pprint import pformat
 
 from invenio.config import \
      CFG_OAI_ID_FIELD, \
      CFG_OAI_ID_PREFIX, \
      CFG_OAI_SET_FIELD, \
-     CFG_BINDIR, \
+     CFG_OAI_PREVIOUS_SET_FIELD, \
      CFG_SITE_NAME, \
      CFG_TMPDIR
-from invenio.search_engine import perform_request_search, get_record
-from invenio.search_engine_utils import get_fieldvalues
-from invenio.intbitset import intbitset as HitSet
+from invenio.oai_repository_config import CFG_OAI_REPOSITORY_MARCXML_SIZE, \
+     CFG_OAI_REPOSITORY_GLOBAL_SET_SPEC
+from invenio.search_engine import perform_request_search, get_record, search_unit_in_bibxxx
+from invenio.intbitset import intbitset
 from invenio.dbquery import run_sql
 from invenio.bibtask import \
      task_get_option, \
      task_set_option, \
      write_message, \
      task_update_progress, \
      task_init, \
-     task_sleep_now_if_required
+     task_sleep_now_if_required, \
+     task_low_level_submission
 from invenio.bibrecord import \
-     record_delete_subfield, \
-     field_xml_output
-
-DATAFIELD_SET_HEAD = \
-                   "<datafield tag=\"%s\" ind1=\"%s\" ind2=\"%s\">" % \
-                   (CFG_OAI_SET_FIELD[0:3],
-                    CFG_OAI_SET_FIELD[3:4].replace('_', ' '),
-                    CFG_OAI_SET_FIELD[4:5].replace('_', ' '))
-DATAFIELD_ID_HEAD  = \
-                  "<datafield tag=\"%s\" ind1=\"%s\" ind2=\"%s\">" % \
-                  (CFG_OAI_ID_FIELD[0:3],
-                   CFG_OAI_ID_FIELD[3:4].replace('_', ' '),
-                   CFG_OAI_ID_FIELD[4:5].replace('_', ' '))
+     record_get_field_value, \
+     record_get_field_values, \
+     record_add_field, \
+     record_xml_output
 
 def get_set_definitions(set_spec):
     """
     Retrieve set definitions from oaiREPOSITORY table.
 
     The set definitions are the search patterns that define the records
     which are in the set
     """
     set_definitions = []
 
     query = "select setName, setDefinition from oaiREPOSITORY where setSpec=%s"
     res = run_sql(query, (set_spec, ))
 
     for (set_name, set_definition) in res:
         params = parse_set_definition(set_definition)
         params['setSpec'] = set_spec
         params['setName'] = set_name
+
         set_definitions.append(params)
     return set_definitions
 
 def parse_set_definition(set_definition):
     """
     Returns the parameters for the given set definition.
 
     The returned structure is a dictionary with keys being
     c, p1, f1, m1, p2, f2, m2, p3, f3, m3 and corresponding values
 
     @param set_definition: a string as returned by the database for column 'setDefinition'
     @return: a dictionary
     """
     params = {'c':'',
               'p1':'', 'f1':'', 'm1':'',
               'p2':'', 'f2':'', 'm2':'',
               'p3':'', 'f3':'', 'm3':'',
               'op1':'a', 'op2':'a'}
     definitions = set_definition.split(';')
     for definition in definitions:
         arguments = definition.split('=')
         if len(arguments) == 2:
             params[arguments[0]] = arguments[1]
     return params
 
 def all_set_specs():
     """
     Returns the list of (distinct) setSpecs defined in the settings.
     This also include the "empty" setSpec if any setting uses it.
 
     Note: there can be several times the same setSpec in the settings,
     given that a setSpec might be defined by several search
     queries. Here we return distinct values
     """
     query = "SELECT DISTINCT setSpec FROM oaiREPOSITORY"
     res = run_sql(query)
 
     return [row[0] for row in res]
 
 def get_recids_for_set_spec(set_spec):
     """
-    Returns the list (as HitSet) of recids belonging to 'set'
+    Returns the list (as intbitset) of recids belonging to 'set'
 
     Parameters:
 
       set_spec - *str* the set_spec for which we would like to get the
                  recids
     """
-    recids = HitSet()
+    recids = intbitset()
 
     for set_def in get_set_definitions(set_spec):
         new_recids = perform_request_search(c=[coll.strip() \
                                                for coll in set_def['c'].split(',')],
                                             p1=set_def['p1'],
                                             f1=set_def['f1'],
                                             m1=set_def['m1'],
                                             op1=set_def['op1'],
                                             p2=set_def['p2'],
                                             f2=set_def['f2'],
                                             m2=set_def['m2'],
                                             op2=set_def['op2'],
                                             p3=set_def['p3'],
                                             f3=set_def['f3'],
                                             m3=set_def['m3'],
                                             ap=0)
 
-        recids = recids.union(HitSet(new_recids))
+        recids |= intbitset(new_recids)
 
     return recids
 
 def get_set_name_for_set_spec(set_spec):
     """
     Returns the OAI setName of a setSpec.
 
     Note that the OAI Repository admin lets the user add several set
     definition with the same setSpec, and possibly with different
     setNames... -> Returns the first (non empty) one found.
 
     Parameters:
 
       set_spec - *str* the set_spec for which we would like to get the
                  setName
     """
     query = "select setName from oaiREPOSITORY where setSpec=%s and setName!=''"
     res = run_sql(query, (set_spec, ))
     if len(res) > 0:
         return res[0][0]
     else:
         return ""
 
-def print_repository_status(write_message=write_message,
+def print_repository_status(local_write_message=write_message,
                             verbose=0):
     """
     Prints the repository status to the standard output.
 
     Parameters:
 
       write_message - *function* the function used to write the output
 
             verbose - *int* the verbosity of the output
                        - 0: print repository size
                        - 1: print quick status of each set (numbers
                          can be wrong if the repository is in some
                          inconsistent state, i.e. a record is in an
                          OAI setSpec but has not OAI ID)
                        - 2: print detailed status of repository, with
                          number of records that needs to be
                          synchronized according to the sets
                          definitions. Precise, but ~slow...
     """
     repository_size_s = "%d" % repository_size()
-    repository_recids_after_update = HitSet()
+    repository_recids_after_update = intbitset()
 
-    write_message(CFG_SITE_NAME)
-    write_message(" OAI Repository Status")
+    local_write_message(CFG_SITE_NAME)
+    local_write_message(" OAI Repository Status")
 
     set_spec_max_length = 19 # How many max char do we display for
     set_name_max_length = 20 # setName and setSpec?
 
     if verbose == 0:
         # Just print repository size
-        write_message("  Total(**)" + " " * 29 +
+        local_write_message("  Total(**)" + " " * 29 +
                       " " * (9 - len(repository_size_s)) + repository_size_s)
         return
     elif verbose == 1:
         # We display few information: show longer set name and spec
         set_spec_max_length = 30
         set_name_max_length = 30
 
-    write_message("=" * 80)
+    local_write_message("=" * 80)
     header = "  setSpec" + " " * (set_spec_max_length - 7) + \
              "  setName" + " " * (set_name_max_length - 5) + " Volume"
     if verbose > 1:
         header += " " * 5 + "After update(*):"
-    write_message(header)
+    local_write_message(header)
 
     if verbose > 1:
-        write_message(" " * 57 + "Additions  Deletions")
+        local_write_message(" " * 57 + "Additions  Deletions")
 
-    write_message("-" * 80)
+    local_write_message("-" * 80)
 
     for set_spec in all_set_specs():
 
         if verbose <= 1:
             # Get the records that are in this set. This is an
             # incomplete check, as it can happen that some records are
             # in this set (according to the metadata) but have no OAI
             # ID (so they are not exported). This can happen if the
             # repository has some records coming from external
             # sources, or if it has never been synchronized with this
             # tool.
-            current_recids = perform_request_search(c=CFG_SITE_NAME,
-                                                    p1=set_spec,
-                                                    f1=CFG_OAI_SET_FIELD,
-                                                    m1="e", ap=0)
+            current_recids = get_recids_for_set_spec(set_spec)
             nb_current_recids = len(current_recids)
         else:
             # Get the records that are *currently* exported for this
             # setSpec
-            current_recids = perform_request_search(c=CFG_SITE_NAME,
-                                                    p1=set_spec,
-                                                    f1=CFG_OAI_SET_FIELD,
-                                                    m1="e", ap=0, op1="a",
-                                                    p2="oai:*",
-                                                    f2=CFG_OAI_ID_FIELD,
-                                                    m2="e")
+            current_recids = search_unit_in_bibxxx(p=set_spec, f=CFG_OAI_SET_FIELD, type='e')
             nb_current_recids = len(current_recids)
             # Get the records that *should* be in this set according to
             # the admin defined settings, and compute how many should be
             # added or removed
             should_recids = get_recids_for_set_spec(set_spec)
-            repository_recids_after_update = repository_recids_after_update.union(should_recids)
+            repository_recids_after_update |= should_recids
 
-            nb_add_recids = len(HitSet(should_recids).difference(HitSet(current_recids)))
-            nb_remove_recids = len(HitSet(current_recids).difference(HitSet(should_recids)))
+            nb_add_recids = len(should_recids -  current_recids)
+            nb_remove_recids = len(current_recids - should_recids)
             nb_should_recids = len(should_recids)
-            nb_recids_after_update = len(repository_recids_after_update)
 
 
         # Adapt setName and setSpec strings lengths
         set_spec_str = set_spec
         if len(set_spec_str) > set_spec_max_length :
             set_spec_str = "%s.." % set_spec_str[:set_spec_max_length]
         set_name_str = get_set_name_for_set_spec(set_spec)
         if len(set_name_str) > set_name_max_length :
             set_name_str = "%s.." % set_name_str[:set_name_max_length]
 
         row = "  " + set_spec_str + \
                " " * ((set_spec_max_length + 2) - len(set_spec_str)) + set_name_str + \
                " " * ((set_name_max_length + 2) - len(set_name_str)) + \
                " " * (7 - len(str(nb_current_recids))) + str(nb_current_recids)
         if verbose > 1:
             row += \
                 " " * max(9 - len(str(nb_add_recids)), 0) + '+' + str(nb_add_recids) + \
                 " " * max(7 - len(str(nb_remove_recids)), 0) + '-' + str(nb_remove_recids) + " = " +\
                 " " * max(7 - len(str(nb_should_recids)), 0) + str(nb_should_recids)
-        write_message(row)
+        local_write_message(row)
 
-    write_message("=" * 80)
+    local_write_message("=" * 80)
     footer = "  Total(**)" + " " * (set_spec_max_length + set_name_max_length - 7) + \
              " " * (9 - len(repository_size_s)) + repository_size_s
     if verbose > 1:
-        footer += ' ' * (28 - len(str(nb_recids_after_update))) + str(nb_recids_after_update)
-    write_message(footer)
+        footer += ' ' * (28 - len(str(len(repository_recids_after_update)))) + str(len(repository_recids_after_update))
+    local_write_message(footer)
 
     if verbose > 1:
-        write_message('  *The "after update" columns show the repository after you run this tool.')
+        local_write_message('  *The "after update" columns show the repository after you run this tool.')
     else:
-        write_message(' *"Volume" is indicative if repository is out of sync. Use --detailed-report.')
-    write_message('**The "total" is not the sum of the above numbers, but the union of the records.')
+        local_write_message(' *"Volume" is indicative if repository is out of sync. Use --detailed-report.')
+    local_write_message('**The "total" is not the sum of the above numbers, but the union of the records.')
 
 def repository_size():
-    "Read repository size"
-    return len(perform_request_search(p1="oai:*",
-                                      f1=CFG_OAI_ID_FIELD,
-                                      m1="e",
-                                      ap=0))
+    """Read repository size"""
+    return len(search_unit_in_bibxxx(p="*", f=CFG_OAI_SET_FIELD, type="e"))
 
 ### MAIN ###
-
 def oairepositoryupdater_task():
     """Main business logic code of oai_archive"""
     no_upload = task_get_option("no_upload")
     report = task_get_option("report")
 
     if report > 1:
         print_repository_status(verbose=report)
         return True
 
+    initial_snapshot = {}
+    for set_spec in all_set_specs():
+        initial_snapshot[set_spec] = get_set_definitions(set_spec)
+    write_message("Initial set snapshot: %s" % pformat(initial_snapshot), verbose=2)
+
     task_update_progress("Fetching records to process")
 
-    # Build the list of records to be processed, that is, search for
-    # the records that match one of the search queries defined in OAI
-    # Repository admin interface.
-    recids_for_set = {} # Remember exactly which record belongs to which set
-    recids = HitSet() # "Flat" set of the recids_for_set values
+    recids_with_oaiid = search_unit_in_bibxxx(p='*', f=CFG_OAI_ID_FIELD, type='e')
+    write_message("%s recids have an OAI ID" % len(recids_with_oaiid), verbose=2)
+
+    all_current_recids = search_unit_in_bibxxx(p='*', f=CFG_OAI_SET_FIELD, type='e')
+    no_more_exported_recids = intbitset(all_current_recids)
+    write_message("%s recids are currently exported" % (len(all_current_recids)), verbose=2)
+
+    all_affected_recids = intbitset()
+    all_should_recids = intbitset()
+    recids_for_set = {}
     for set_spec in all_set_specs():
-        task_sleep_now_if_required(can_stop_too=True)
-        _recids = get_recids_for_set_spec(set_spec)
-        recids_for_set[set_spec] = _recids
-        recids = recids.union(_recids)
-
-    # Also get the list of records that are currently exported through
-    # OAI and that might need to be refreshed
-    oai_recids = perform_request_search(c=CFG_SITE_NAME,
-                                        p1='oai:%s:*' % CFG_OAI_ID_PREFIX,
-                                        f1=CFG_OAI_ID_FIELD,
-                                        m1="e", ap=0)
-    recids = recids.union(HitSet(oai_recids))
+        if not set_spec:
+            set_spec = CFG_OAI_REPOSITORY_GLOBAL_SET_SPEC
+        should_recids = get_recids_for_set_spec(set_spec)
+        recids_for_set[set_spec] = should_recids
+        no_more_exported_recids -= should_recids
+        all_should_recids |= should_recids
+        current_recids = search_unit_in_bibxxx(p=set_spec, f=CFG_OAI_SET_FIELD, type='e')
+        write_message("%s recids should be in %s. Currently %s are in %s" % (len(should_recids), set_spec, len(current_recids), set_spec), verbose=2)
+        to_add = should_recids - current_recids
+        write_message("%s recids should be added to %s" % (len(to_add), set_spec), verbose=2)
+        to_remove = current_recids - should_recids
+        write_message("%s recids should be removed from %s" % (len(to_remove), set_spec), verbose=2)
+        affected_recids = to_add | to_remove
+        write_message("%s recids should be hence updated for %s" % (len(affected_recids), set_spec), verbose=2)
+        all_affected_recids |= affected_recids
+
+    missing_oaiid = all_should_recids - recids_with_oaiid
+    write_message("%s recids are missing an oaiid" % len(missing_oaiid))
+    write_message("%s recids should no longer be exported" % len(no_more_exported_recids))
+
+    ## Let's add records with missing OAI ID
+    all_affected_recids |= missing_oaiid | no_more_exported_recids
+    write_message("%s recids should updated" % (len(all_affected_recids)), verbose=2)
+
+    if not all_affected_recids:
+        write_message("Nothing to do!")
+        return True
 
     # Prepare to save results in a tmp file
     (fd, filename) = mkstemp(dir=CFG_TMPDIR,
                                   prefix='oairepository_' + \
                                   time.strftime("%Y%m%d_%H%M%S_",
                                                 time.localtime()))
     oai_out = os.fdopen(fd, "w")
-    oai_out.write('<collection>')
-    has_updated_records = False
+    oai_out.write("<collection>")
+
+    tot = 0
     # Iterate over the recids
-    i = 0
-    for recid in recids:
-        i += 1
+    for i, recid in enumerate(all_affected_recids):
         task_sleep_now_if_required(can_stop_too=True)
         task_update_progress("Done %s out of %s records." % \
-                             (i, len(recids)))
+                             (i, len(all_affected_recids)))
+
+        write_message("Elaborating recid %s" % recid, verbose=3)
+        record = get_record(recid)
+        if not record:
+            write_message("Record %s seems empty. Let's skip it." % recid, verbose=3)
+            continue
+        new_record = {}
 
         # Check if an OAI identifier is already in the record or
         # not.
-        oai_id_entry = "<subfield code=\"%s\">oai:%s:%s</subfield>\n" % \
-                       (CFG_OAI_ID_FIELD[5:6], CFG_OAI_ID_PREFIX, recid)
-        already_has_oai_id = True
-        oai_ids = [_oai_id for _oai_id in \
-                   get_fieldvalues(recid, CFG_OAI_ID_FIELD) \
-                   if _oai_id.strip() != '']
-        if len(oai_ids) == 0:
-            already_has_oai_id = False
+        assign_oai_id_entry = False
+        oai_id_entry = record_get_field_value(record, tag=CFG_OAI_ID_FIELD[:3], ind1=CFG_OAI_ID_FIELD[3], ind2=CFG_OAI_ID_FIELD[4], code=CFG_OAI_ID_FIELD[5])
+        if not oai_id_entry:
+            assign_oai_id_entry = True
+            oai_id_entry = "oai:%s:%s" % (CFG_OAI_ID_PREFIX, recid)
+            write_message("Setting new oai_id %s for record %s" % (oai_id_entry, recid), verbose=3)
+        else:
+            write_message("Already existing oai_id %s for record %s" % (oai_id_entry, recid), verbose=3)
 
         # Get the sets to which this record already belongs according
         # to the metadata
-        current_oai_sets = set(\
-            [_oai_set for _oai_set in \
-             get_fieldvalues(recid, CFG_OAI_SET_FIELD) \
-             if _oai_set.strip() != ''])
+        current_oai_sets = set(record_get_field_values(record, tag=CFG_OAI_SET_FIELD[:3], ind1=CFG_OAI_SET_FIELD[3], ind2=CFG_OAI_SET_FIELD[4], code=CFG_OAI_SET_FIELD[5]))
+        write_message("Record %s currently belongs to these oai_sets: %s" % (recid, ", ".join(current_oai_sets)), verbose=3)
+
+        current_previous_oai_sets = set(record_get_field_values(record, tag=CFG_OAI_PREVIOUS_SET_FIELD[:3], ind1=CFG_OAI_PREVIOUS_SET_FIELD[3], ind2=CFG_OAI_PREVIOUS_SET_FIELD[4], code=CFG_OAI_PREVIOUS_SET_FIELD[5]))
+        write_message("Record %s currently doesn't belong anymore to these oai_sets: %s" % (recid, ", ".join(current_previous_oai_sets)), verbose=3)
 
         # Get the sets that should be in this record according to
         # settings
-        updated_oai_sets = set(\
-            [_set for _set, _recids in recids_for_set.iteritems()
-             if recid in _recids if _set])
+        updated_oai_sets = set(_set for _set, _recids in recids_for_set.iteritems()
+             if recid in _recids)
+        write_message("Record %s now belongs to these oai_sets: %s" % (recid, ", ".join(updated_oai_sets)), verbose=3)
+
+        updated_previous_oai_sets = set(_set for _set in (current_previous_oai_sets - updated_oai_sets) |
+             (current_oai_sets - updated_oai_sets))
+        write_message("Record %s now doesn't belong anymore to these oai_sets: %s" % (recid, ", ".join(updated_previous_oai_sets)), verbose=3)
 
         # Ok, we have the old sets and the new sets. If they are equal
         # and oai ID does not need to be added, then great, nothing to
         # change . Otherwise apply the new sets.
-        if current_oai_sets == updated_oai_sets and already_has_oai_id:
+        if current_oai_sets == updated_oai_sets and not assign_oai_id_entry:
+            write_message("Nothing has changed for record %s, let's move on!" % recid, verbose=3)
             continue # Jump to next recid
 
-        has_updated_records = True
-
-        # Generate the xml sets entry
-        oai_set_entry = '\n'.join(["<subfield code=\"%s\">%s</subfield>" % \
-                                   (CFG_OAI_SET_FIELD[5:6], _oai_set) \
-                                   for _oai_set in updated_oai_sets if \
-                                   _oai_set]) + \
-                                   "\n"
-
-        # Also get all the datafields with tag and indicator matching
-        # CFG_OAI_SET_FIELD[:5] and CFG_OAI_ID_FIELD[:5] but with
-        # subcode != CFG_OAI_SET_FIELD[5:6] and subcode !=
-        # CFG_OAI_SET_FIELD[5:6], so that we can preserve these values
-        other_data = marcxml_filter_out_tags(recid, [CFG_OAI_SET_FIELD,
-                                                     CFG_OAI_ID_FIELD])
-
-        if CFG_OAI_ID_FIELD[0:5] == CFG_OAI_SET_FIELD[0:5]:
-            # Put set and OAI ID in the same datafield
-            oai_out.write("<record>\n")
-            oai_out.write("<controlfield tag=\"001\">%s"
-                "</controlfield>\n" % recid)
-            oai_out.write(DATAFIELD_ID_HEAD)
-            oai_out.write("\n")
-            #if oai_id_entry:
-            oai_out.write(oai_id_entry)
-            #if oai_set_entry:
-            oai_out.write(oai_set_entry)
-            oai_out.write("</datafield>\n")
-            oai_out.write(other_data)
-            oai_out.write("</record>\n")
-        else:
-            oai_out.write("<record>\n")
-            oai_out.write("<controlfield tag=\"001\">%s"
-                "</controlfield>\n" % recid)
-            oai_out.write(DATAFIELD_ID_HEAD)
-            oai_out.write("\n")
-            oai_out.write(oai_id_entry)
-            oai_out.write("</datafield>\n")
-            oai_out.write(DATAFIELD_SET_HEAD)
-            oai_out.write("\n")
-            oai_out.write(oai_set_entry)
-            oai_out.write("</datafield>\n")
-            oai_out.write(other_data)
-            oai_out.write("</record>\n")
-
-    oai_out.write('</collection>')
+        write_message("Something has changed for record %s, let's update it!" % recid, verbose=3)
+        subfields = [(CFG_OAI_ID_FIELD[5], oai_id_entry)]
+        for oai_set in updated_oai_sets:
+            subfields.append((CFG_OAI_SET_FIELD[5], oai_set))
+        for oai_set in updated_previous_oai_sets:
+            subfields.append((CFG_OAI_PREVIOUS_SET_FIELD[5], oai_set))
+
+        record_add_field(new_record, tag="001", controlfield_value=str(recid))
+        record_add_field(new_record, tag=CFG_OAI_ID_FIELD[:3], ind1=CFG_OAI_ID_FIELD[3], ind2=CFG_OAI_ID_FIELD[4], subfields=subfields)
+        oai_out.write(record_xml_output(new_record))
+        tot += 1
+        if tot == CFG_OAI_REPOSITORY_MARCXML_SIZE:
+            oai_out.write("</collection>")
+            oai_out.close()
+            write_message("Wrote to file %s" % filename)
+            if not no_upload:
+                task_low_level_submission('bibupload', 'oairepository', '-c', filename, '-n')
+            # Prepare to save results in a tmp file
+            (fd, filename) = mkstemp(dir=CFG_TMPDIR,
+                                        prefix='oairepository_' + \
+                                        time.strftime("%Y%m%d_%H%M%S_",
+                                                        time.localtime()))
+            oai_out = os.fdopen(fd, "w")
+            oai_out.write("<collection>")
+            tot = 0
+            task_sleep_now_if_required(can_stop_too=True)
+
+    oai_out.write("</collection>")
     oai_out.close()
     write_message("Wrote to file %s" % filename)
 
     if not no_upload:
         task_sleep_now_if_required(can_stop_too=True)
-        if has_updated_records:
-            command = "%s/bibupload -c %s -u oairepository" % (CFG_BINDIR, filename)
-            os.system(command)
+        if tot > 0:
+            task_low_level_submission('bibupload', 'oairepository', '-c', filename, '-n')
         else:
             os.remove(filename)
 
     return True
 
-def marcxml_filter_out_tags(recid, fields):
-    """
-    Returns the fields of record 'recid' that share the same tag and
-    indicators as those specified in 'fields', but for which the
-    subfield is different. This is nice to emulate a bibupload -c that
-    corrects only specific subfields.
-
-    Parameters:
-           recid - *int* the id of the record to process
-
-          fields - *list(str)* the list of fields that we want to filter
-                   out. Eg ['909COp', '909COo']
-    """
-    out = ''
-
-    record = get_record(recid)
-
-    # Delete subfields that we want to replace
-    for field in fields:
-        record_delete_subfield(record,
-                               tag=field[0:3],
-                               ind1=field[3:4],
-                               ind2=field[4:5],
-                               subfield_code=field[5:6])
-
-    # Select only datafields that share tag + indicators
-    processed_tags_and_ind = []
-    for field in fields:
-        if not field[0:5] in processed_tags_and_ind:
-            # Ensure that we do not process twice the same datafields
-            processed_tags_and_ind.append(field[0:5])
-            for datafield in record.get(field[0:3], []):
-                if datafield[1] == field[3:4].replace('_', ' ') and \
-                       datafield[2] == field[4:5].replace('_', ' ') and \
-                       datafield[0]:
-                    out += field_xml_output(datafield, field[0:3]) + '\n'
-
-    return out
-
 #########################
 
 def main():
     """Main that construct all the bibtask."""
 
     # if there is any -r or --report option (or other similar options)
     # in the arguments, just print the status and exit (do not run
     # through BibSched...)
+    if (CFG_OAI_ID_FIELD[:5] != CFG_OAI_SET_FIELD[:5]) or \
+            (CFG_OAI_ID_FIELD[:5] != CFG_OAI_PREVIOUS_SET_FIELD[:5]):
+        print >> sys.stderr, """\
+ERROR: since Invenio 1.0 the OAI ID and the OAI Set must be stored in the same
+field. Please revise your configuration for the variables
+    CFG_OAI_ID_FIELD (currently set to %s)
+    CFG_OAI_SET_FIELD (currently set to %s)
+    CFG_OAI_PREVIOUS_SET_FIELD (currently set to %s)""" % (
+            CFG_OAI_ID_FIELD,
+            CFG_OAI_SET_FIELD,
+            CFG_OAI_PREVIOUS_SET_FIELD
+        )
+        sys.exit(1)
     mode = -1
     if '-d' in sys.argv[1:] or '--detailed-report' in sys.argv[1:]:
         mode = 2
     elif '-r' in sys.argv[1:] or '--report' in sys.argv[1:]:
         mode = 1
 
     if mode != -1:
-        def write_message(*args):
+        def local_write_message(*args):
             """Overload BibTask function so that it does not need to
             run in BibSched environment"""
             sys.stdout.write(args[0] + '\n')
-        print_repository_status(write_message=write_message,
-                                verbose=mode)
+        print_repository_status(local_write_message=local_write_message, verbose=mode)
         return
 
     task_init(authorization_action='runoairepository',
             authorization_msg="OAI Archive Task Submission",
             description="Examples:\n"
                 " Expose records according to sets defined in OAI Repository admin interface\n"
                 "   $ oairepositoryupdater \n"
                 " Expose records according to sets defined in OAI Repository admin interface and update them every day\n"
                 "   $ oairepositoryupdater -s24\n"
                 " Print OAI repository status\n"
                 "   $ oairepositoryupdater -r\n"
                 " Print OAI repository detailed status\n"
                 "   $ oairepositoryupdater -d\n\n",
             help_specific_usage="Options:\n"
                 " -r --report\t\tOAI repository status\n"
                 " -d --detailed-report\t\tOAI repository detailed status\n"
                 " -n --no-process\tDo no upload the modifications\n",
-            version=__revision__,
             specific_params=("rdn", [
                 "report",
                 "detailed-report",
                 "no-process"]),
             task_submit_elaborate_specific_parameter_fnc=
                 task_submit_elaborate_specific_parameter,
             task_run_fnc=oairepositoryupdater_task)
 
-def task_submit_elaborate_specific_parameter(key, value, opts, args):
+def task_submit_elaborate_specific_parameter(key, _value, _opts, _args):
     """Elaborate specific CLI parameters of oairepositoryupdater"""
     if key in ("-r", "--report"):
         task_set_option("report", 1)
     if key in ("-d", "--detailed-report"):
         task_set_option("report", 2)
     elif key in ("-n", "--no-process"):
         task_set_option("no_upload", 1)
     else:
         return False
     return True
 
 ### okay, here we go:
 if __name__ == '__main__':
     main()
diff --git a/modules/bibharvest/lib/oai_repository_webinterface.py b/modules/bibharvest/lib/oai_repository_webinterface.py
index 936eea99a..f58b578d2 100644
--- a/modules/bibharvest/lib/oai_repository_webinterface.py
+++ b/modules/bibharvest/lib/oai_repository_webinterface.py
@@ -1,154 +1,148 @@
 ## This file is part of Invenio.
 ## Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011 CERN.
 ##
 ## Invenio is free software; you can redistribute it and/or
 ## modify it under the terms of the GNU General Public License as
 ## published by the Free Software Foundation; either version 2 of the
 ## License, or (at your option) any later version.
 ##
 ## Invenio is distributed in the hope that it will be useful, but
 ## WITHOUT ANY WARRANTY; without even the implied warranty of
 ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
 ## General Public License for more details.
 ##
 ## You should have received a copy of the GNU General Public License
 ## along with Invenio; if not, write to the Free Software Foundation, Inc.,
 ## 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA.
 
 """Invenio OAI provider interface, compliant with OAI-PMH/2.0"""
 
 __revision__ = "$Id$"
 
 import os
-import urllib
 import time
-from invenio import webinterface_handler_config as apache
+import cStringIO
 
+from invenio import webinterface_handler_config as apache
 from invenio import oai_repository_server
-from invenio.config import CFG_CACHEDIR, CFG_OAI_SLEEP
+from invenio.errorlib import register_exception
+from invenio.config import CFG_CACHEDIR, CFG_OAI_SLEEP, CFG_DEVEL_SITE, \
+    CFG_ETCDIR
 from invenio.webinterface_handler import wash_urlargd, WebInterfaceDirectory
 
+CFG_VALIDATE_RESPONSES = False
+OAI_PMH_VALIDATOR = None
+
+if CFG_DEVEL_SITE:
+    try:
+        from lxml import etree
+        OAI_PMH_VALIDATOR = etree.XMLSchema(etree.parse(open(os.path.join(CFG_ETCDIR, 'bibharvest', 'OAI-PMH.xsd'))))
+        CFG_VALIDATE_RESPONSES = True
+    except ImportError:
+        pass
+
 class WebInterfaceOAIProviderPages(WebInterfaceDirectory):
     """Defines the set of /oai2d OAI provider pages."""
 
     _exports = ['']
 
     def __call__(self, req, form):
-        "OAI repository interface"
+        """OAI repository interface"""
 
         # Clean input arguments. The protocol specifies that an error
         # has to be returned if the same argument is specified several
         # times. Eg:
         # oai2d?verb=ListIdentifiers&metadataPrefix=marcxml&metadataPrefix=marcxml
         # So keep the arguments as list for now so that check_argd can
         # return an error if needed (check_argd also transforms these
         # lists into strings)
         argd = wash_urlargd(form, {'verb': (list, []),
                                    'metadataPrefix': (list, []),
                                    'from': (list, []),
                                    'until': (list, []),
                                    'set': (list, []),
                                    'identifier': (list, []),
                                    'resumptionToken': (list, []),
                                    })
 
+        if CFG_VALIDATE_RESPONSES:
+            req.track_writings = True
+
         ## wash_urlargd(..) function cleaned everything, but also added
         ## unwanted parameters. Remove them now
         for param in argd.keys():
             if not param in form and param != 'verb':
                 del argd[param]
 
         ## wash_urlargd(..) function also removed unknown parameters
         ## that we would like to keep in order to send back an error
         ## as required by the protocol. But we do not need that value,
         ## so set it to empty string.
         for param in form.keys():
             if param not in argd.keys():
                 argd[param] = ''
 
         ## But still remove 'ln' parameter that was automatically added.
         if argd.has_key('ln'):
             del argd['ln']
 
         ## check request for OAI compliancy
         ## also transform all the list arguments into string
-        oai_error = oai_repository_server.check_argd(argd)
+        oai_errors = oai_repository_server.check_argd(argd)
 
         ## check availability (OAI requests for Identify, ListSets and
         ## ListMetadataFormats are served immediately, otherwise we
         ## shall wait for CFG_OAI_SLEEP seconds between requests):
-        if os.path.exists("%s/RTdata/RTdata" % CFG_CACHEDIR) and (argd['verb'] not in ["Identify", "ListMetadataFormats", "ListSets"]):
+        if os.path.exists("%s/RTdata/RTdata" % CFG_CACHEDIR) and (argd['verb'] not in ["Identify", "ListMetadataFormats", "ListSets"] and not argd.get('resumptionToken')):
             time_gap = int(time.time() - os.path.getmtime("%s/RTdata/RTdata" % CFG_CACHEDIR))
             if(time_gap < CFG_OAI_SLEEP):
                 req.headers_out["Status-Code"] = "503"
                 req.headers_out["Retry-After"] = "%d" % (CFG_OAI_SLEEP - time_gap)
                 req.status = apache.HTTP_SERVICE_UNAVAILABLE
                 return "Retry after %d seconds" % (CFG_OAI_SLEEP - time_gap)
         command = "touch %s/RTdata/RTdata" % CFG_CACHEDIR
         os.system(command)
 
-        ## construct args (argd string equivalent) for the
-        ## oai_repository_server business logic (later it may be good if it
-        ## takes argd directly):
-        args = urllib.urlencode(argd)
 
         ## create OAI response
-
         req.content_type = "text/xml"
         req.send_http_header()
 
-        if oai_error == "":
-
+        if not oai_errors:
             ## OAI Identify
-
             if argd['verb']   == "Identify":
-                req.write(oai_repository_server.oaiidentify(args, script_url=req.uri))
-
+                req.write(oai_repository_server.oai_identify(argd))
 
             ## OAI ListSets
-
             elif argd['verb'] == "ListSets":
-                req.write(oai_repository_server.oailistsets(args))
-
-
-            ## OAI ListIdentifiers
-
-            elif argd['verb'] == "ListIdentifiers":
-                req.write(oai_repository_server.oailistidentifiers(args))
-
-
-            ## OAI ListRecords
-
-            elif argd['verb'] == "ListRecords":
-                req.write(oai_repository_server.oailistrecords(args))
+                req.write(oai_repository_server.oai_list_sets(argd))
 
+            ## OAI ListIdentifiers or OAI ListRecords
+            elif argd['verb'] in ("ListIdentifiers", "ListRecords"):
+                oai_repository_server.oai_list_records_or_identifiers(req, argd)
 
             ## OAI GetRecord
-
             elif argd['verb'] == "GetRecord":
-                req.write(oai_repository_server.oaigetrecord(args))
-
+                req.write(oai_repository_server.oai_get_record(argd))
 
             ## OAI ListMetadataFormats
-
             elif argd['verb'] == "ListMetadataFormats":
-                req.write(oai_repository_server.oailistmetadataformats(args))
-
+                req.write(oai_repository_server.oai_list_metadata_formats(argd))
 
             ## Unknown verb
 
-            else:
-                req.write(oai_repository_server.oai_error("badVerb","Illegal OAI verb"))
-
-
         ## OAI error
-
         else:
-            req.write(oai_repository_server.oai_header(args,""))
-            req.write(oai_error)
-            req.write(oai_repository_server.oai_footer(""))
-
+            req.write(oai_repository_server.oai_error(argd, oai_errors))
+
+        if CFG_VALIDATE_RESPONSES:
+            req.track_writings = False
+            try:
+                OAI_PMH_VALIDATOR.assertValid(etree.parse(cStringIO.StringIO(req.what_was_written)))
+            except etree.DocumentInvalid:
+                register_exception(req=req, alert_admin=True)
+                raise
         return "\n"
 
     ## Return the same page wether we ask for /oai2d?verb or /oai2d/?verb
     index = __call__
diff --git a/modules/bibrank/doc/hacking/bibrank-api.webdoc b/modules/bibrank/doc/hacking/bibrank-api.webdoc
index 3fa9a488d..d0757e88b 100644
--- a/modules/bibrank/doc/hacking/bibrank-api.webdoc
+++ b/modules/bibrank/doc/hacking/bibrank-api.webdoc
@@ -1,137 +1,137 @@
 ## -*- mode: html; coding: utf-8; -*-
 
 ## This file is part of Invenio.
 ## Copyright (C) 2007, 2008, 2010, 2011 CERN.
 ##
 ## Invenio is free software; you can redistribute it and/or
 ## modify it under the terms of the GNU General Public License as
 ## published by the Free Software Foundation; either version 2 of the
 ## License, or (at your option) any later version.
 ##
 ## Invenio is distributed in the hope that it will be useful, but
 ## WITHOUT ANY WARRANTY; without even the implied warranty of
 ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 ## General Public License for more details.
 ##
 ## You should have received a copy of the GNU General Public License
 ## along with Invenio; if not, write to the Free Software Foundation, Inc.,
 ## 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA.
 
 <!-- WebDoc-Page-Title: BibRank Record Sorter API -->
 <!-- WebDoc-Page-Navtrail: <a class="navtrail" href="<CFG_SITE_URL>/help/hacking">Hacking Invenio</a> &gt; <a class="navtrail" href="bibrank-internals">BibRank Internals</a> -->
 <!-- WebDoc-Page-Revision: $Id$ -->
 
 <protect>
 <pre>
 Invenio Bibrank Record Sorter can be called from within your Python
 programs via a high-level API and a mid-mid level API.
 
 1. High-level API
 
    Description:
 
       The high-level access to the BibRank Record Sorter is provided
       by exactly the same function as called from the web interface
       when users submit their queries, if a rank method has been
       selected. This should guarantee exactly the same behaviour if
       the same parameters are given.
 
       There are three thing to note: (i) When a search is done, the
-      search engine is sending a HitSet containing all the records that
-      matches the query. Since only records in the HitSet are ranked, a
-      HitSet must be created containing wished records to rank and be sent
+      search engine is sending a intbitset containing all the records that
+      matches the query. Since only records in the intbitset are ranked, a
+      intbitset must be created containing wished records to rank and be sent
       as a parameter to the function. (ii) Some rank methods may choose to
-      ignore the HitSet, like the "Similar Records" function. (iii) In case
+      ignore the intbitset, like the "Similar Records" function. (iii) In case
       of an error ranking the records, the returned data is different.
 
    Signature:
 
       def rank_records(rank_method_code, rank_limit_relevance,
                        hitset_global, pattern, verbose=0):
        """
        rank_method_code - 'jif', 'wrd' or other methods
        rank_limit_relevance - a number defining the threshold of which
        to rank records, may be ignored by rank method.
        hitset_global, search engine hits, if all records should be
-       ranked, fill the HitSet with ones.
+       ranked, fill the intbitset with ones.
        pattern, search engine query or record ID, must be a
        list. ['CERN', 'fermilab'] or ['recid:12345']
        verbose, verbose level - 0-9 defines how much debug information
        should be shown
 
        output if successfull:
        list of records - [123, 321, 12451, 123, 12, 4]
        list of rank values - ascending, same length as the list of
        records [0, 10, 20, 30, 40, 100]
        prefix - text to show before the rank value, '<--' hides rank
        value, defined in config file.
        postfix - text to show after the rank value, '-!>' hides rank
        value, defined in config file.
        verbose_output - the debug text depending on the verbose level.
 
        output if error:
        list of records - is None
        list of rank values - is None
        prefix - Contains headline of error
        postfix - Error message or error box if exception.
        verbose_output - the debug text depending on the verbose level.
        """
 
 
    Examples:
 
       >>> # import the function:
       >>> from invenio.bibrank_record_sorter import rank_records
       >>> # rank all records with the words 'higgs boson' according to the method "wrd"
       >>> rank_records('wrd', 0, a_hitset, ['higgs', 'boson'], 0)
       >>> # find similar records to the record 12345, hitset is here ignored because of 'similar records'
       >>> rank_records('wrd', 0, a_hitset, ['recid:12345'], 0)
       >>> # rank all records based on jif value
       >>> rank_records('jif', 0, a_hitset, [], 0)
 
 2. Mid-level API
 
    Description:
 
       Using the mid-level API, you can call directly the various methods
       for ranking. The functions will not return data in a way the search
       engine understands. They will neither find out if it is the correct
       function that is called, but return an error if wrong code/function
       is used.
 
    Signatures:
       def combine_method(rank_method_code, pattern, hitset, rank_limit_relevance,verbose):
       -This method calls each method mentioned in the config file and add the results together
 
       def find_similar(rank_method_code, recID, hitset, rank_limit_relevance,
       -This method finds similar records based on the one given in the recID field. The recID field
        must be a integer value. hitset is ignored. rank_method_code has to be 'wrd'.
 
       def word_frequency(rank_method_code, lwords, hitset, rank_limit_relevance,verbose):
       -This method ranks records based on the list of words in lwords field. rank_method_code has to be
        'wrd'. Only records in hitset is ranked.
 
       def rank_by_method(rank_method_code, lwords, hitset, rank_limit_relevance,verbose):
       -All other rank methods uses this function together with data from the rnkMETHODDATA table
        (a dictionary of {recid: (text, value)} to rank the data. Only records in the hitset is ranked.
 
       These mid-level API functions demands that the function create_rnkmethod_cache() has been called,
       since it loads the config options needed.
       The rank methods returns all the same data:
       ([[recid, value],[recid, value]], prefix, postfix, verbose_output)
 
    Examples:
 
       >>> # import the function:
       >>> from invenio.bibrank_record_sorter import find_similar
       >>> # find records similar to 12345, hitset must be full
       >>> find_similar('wrd', 12345, hitset, 0, 0)
       >>> # rank records according to a method called jif, using the single_tag...based method.
       >>> # the list of words is here ignored, only the records in the hitset are used.
       >>> rank_by_method('jif',['higgs'], hitset, 0, 0)
       >>> # rank records containing ['higgs', 'boson'] using word similarity ('wrd')
       >>> word_similarity('wrd',['higgs', 'boson'], hitset, 0, 0)
       >>> # rank records using various methods, which methods to use is read from the config file.
       >>> combine_method('cmb', ['higgs','boson'], hitset, 0, 0)
 </pre>
 </protect>
diff --git a/modules/bibrank/lib/bibrank_record_sorter_tests.py b/modules/bibrank/lib/bibrank_record_sorter_tests.py
index 95e9c694c..e7fc70ec0 100644
--- a/modules/bibrank/lib/bibrank_record_sorter_tests.py
+++ b/modules/bibrank/lib/bibrank_record_sorter_tests.py
@@ -1,53 +1,53 @@
 # -*- coding: utf-8 -*-
 ##
 ## This file is part of Invenio.
 ## Copyright (C) 2004, 2005, 2006, 2007, 2008, 2010, 2011 CERN.
 ##
 ## Invenio is free software; you can redistribute it and/or
 ## modify it under the terms of the GNU General Public License as
 ## published by the Free Software Foundation; either version 2 of the
 ## License, or (at your option) any later version.
 ##
 ## Invenio is distributed in the hope that it will be useful, but
 ## WITHOUT ANY WARRANTY; without even the implied warranty of
 ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 ## General Public License for more details.
 ##
 ## You should have received a copy of the GNU General Public License
 ## along with Invenio; if not, write to the Free Software Foundation, Inc.,
 ## 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA.
 
 """Unit tests for the ranking engine."""
 
 __revision__ = "$Id$"
 
 import unittest
 
 from invenio import bibrank_record_sorter
-from invenio.search_engine import HitSet
+from invenio.intbitset import intbitset
 from invenio.testutils import make_test_suite, run_test_suite
 
 class TestListSetOperations(unittest.TestCase):
     """Test list set operations."""
 
     def test_record_sorter(self):
         """bibrank record sorter - sorting records"""
-        hitset = HitSet()
+        hitset = intbitset()
         hitset += (1,2,5)
-        hitset2 = HitSet()
+        hitset2 = intbitset()
         hitset2.add(5)
         rec_termcount = {1: 1, 2: 1, 5: 1}
         (res1, res2) = bibrank_record_sorter.sort_record_relevance({1: 50, 2:30, 3:70,4:10},rec_termcount,hitset, 50,0)
         self.assertEqual(([(1, 71), (3, 100)], list(hitset2)), (res1, list(res2)))
 
     def test_calculate_record_relevance(self):
         """bibrank record sorter - calculating relevances"""
-        hitset = HitSet()
+        hitset = intbitset()
         hitset += (1,2,5)
         self.assertEqual(({1: 7, 2: 7, 5: 5}, {1: 1, 2: 1, 5: 1}),  bibrank_record_sorter.calculate_record_relevance(("testterm", 2.0),
 {"Gi":(0, 50.0), 1: (3, 4.0), 2: (4, 5.0), 5: (1, 3.5)}, hitset, {}, {}, 0, None))
 
 TEST_SUITE = make_test_suite(TestListSetOperations,)
 
 if __name__ == "__main__":
     run_test_suite(TEST_SUITE)
diff --git a/modules/bibrank/lib/bibrank_tag_based_indexer.py b/modules/bibrank/lib/bibrank_tag_based_indexer.py
index 63d2cc12e..e81c22c63 100644
--- a/modules/bibrank/lib/bibrank_tag_based_indexer.py
+++ b/modules/bibrank/lib/bibrank_tag_based_indexer.py
@@ -1,546 +1,546 @@
 # -*- coding: utf-8 -*-
 ## Ranking of records using different parameters and methods.
 
 ## This file is part of Invenio.
 ## Copyright (C) 2004, 2005, 2006, 2007, 2008, 2010, 2011 CERN.
 ##
 ## Invenio is free software; you can redistribute it and/or
 ## modify it under the terms of the GNU General Public License as
 ## published by the Free Software Foundation; either version 2 of the
 ## License, or (at your option) any later version.
 ##
 ## Invenio is distributed in the hope that it will be useful, but
 ## WITHOUT ANY WARRANTY; without even the implied warranty of
 ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 ## General Public License for more details.
 ##
 ## You should have received a copy of the GNU General Public License
 ## along with Invenio; if not, write to the Free Software Foundation, Inc.,
 ## 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA.
 
 
 
 import os
 import sys
 import time
 import ConfigParser
 
 from invenio.config import \
      CFG_SITE_LANG, \
      CFG_ETCDIR, \
      CFG_PREFIX
-from invenio.search_engine import perform_request_search, HitSet
+from invenio.search_engine import perform_request_search
 from invenio.bibrank_citation_indexer import get_citation_weight, print_missing, get_cit_dict, insert_into_cit_db
 from invenio.bibrank_downloads_indexer import *
 from invenio.dbquery import run_sql, serialize_via_marshal, deserialize_via_marshal, \
      wash_table_column_name, get_table_update_time
 from invenio.errorlib import register_exception
 from invenio.bibtask import task_get_option, write_message, task_sleep_now_if_required
 from invenio.bibindex_engine import create_range_list
 from invenio.intbitset import intbitset
 
 options = {}
 
 def remove_auto_cites(dic):
     """Remove auto-cites and dedupe."""
     for key in dic.keys():
         new_list = dic.fromkeys(dic[key]).keys()
         try:
             new_list.remove(key)
         except ValueError:
             pass
         dic[key] = new_list
     return dic
 
 def citation_repair_exec():
     """Repair citation ranking method"""
     ## repair citations
     for rowname in ["citationdict","reversedict"]:
         ## get dic
         dic = get_cit_dict(rowname)
         ## repair
         write_message("Repairing %s" % rowname)
         dic = remove_auto_cites(dic)
         ## store healthy citation dic
         insert_into_cit_db(dic, rowname)
     return
 
 def download_weight_filtering_user_repair_exec ():
     """Repair download weight filtering user ranking method"""
     write_message("Repairing for this ranking method is not defined. Skipping.")
     return
 
 def download_weight_total_repair_exec():
     """Repair download weight total ranking method"""
     write_message("Repairing for this ranking method is not defined. Skipping.")
     return
 
 def file_similarity_by_times_downloaded_repair_exec():
     """Repair file similarity by times downloaded ranking method"""
     write_message("Repairing for this ranking method is not defined. Skipping.")
     return
 
 def single_tag_rank_method_repair_exec():
     """Repair single tag ranking method"""
     write_message("Repairing for this ranking method is not defined. Skipping.")
     return
 
 def citation_exec(rank_method_code, name, config):
     """Rank method for citation analysis"""
     #first check if this is a specific task
     begin_date = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())
     if task_get_option("cmd") == "print-missing":
         num = task_get_option("num")
         print_missing(num)
     else:
         dict = get_citation_weight(rank_method_code, config)
         if dict:
             intoDB(dict, begin_date, rank_method_code)
         else:
             write_message("no need to update the indexes for citations")
 
 def download_weight_filtering_user(run):
     return bibrank_engine(run)
 
 def download_weight_total(run):
     return bibrank_engine(run)
 
 def file_similarity_by_times_downloaded(run):
     return bibrank_engine(run)
 
 def download_weight_filtering_user_exec (rank_method_code, name, config):
     """Ranking by number of downloads per User.
     Only one full Text Download is taken in account for one
     specific userIP address"""
     begin_date = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())
     time1 = time.time()
     dic = fromDB(rank_method_code)
     last_updated = get_lastupdated(rank_method_code)
     keys = new_downloads_to_index(last_updated)
     filter_downloads_per_hour(keys, last_updated)
     dic = get_download_weight_filtering_user(dic, keys)
     intoDB(dic, begin_date, rank_method_code)
     time2 = time.time()
     return {"time":time2-time1}
 
 def download_weight_total_exec(rank_method_code, name, config):
     """rankink by total number of downloads without check the user ip
     if users downloads 3 time the same full text document it has to be count as 3 downloads"""
     begin_date = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())
     time1 = time.time()
     dic = fromDB(rank_method_code)
     last_updated = get_lastupdated(rank_method_code)
     keys = new_downloads_to_index(last_updated)
     filter_downloads_per_hour(keys, last_updated)
     dic = get_download_weight_total(dic, keys)
     intoDB(dic, begin_date, rank_method_code)
     time2 = time.time()
     return {"time":time2-time1}
 
 def file_similarity_by_times_downloaded_exec(rank_method_code, name, config):
     """update dictionnary {recid:[(recid, nb page similarity), ()..]}"""
     begin_date = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())
     time1 = time.time()
     dic = fromDB(rank_method_code)
     last_updated = get_lastupdated(rank_method_code)
     keys = new_downloads_to_index(last_updated)
     filter_downloads_per_hour(keys, last_updated)
     dic = get_file_similarity_by_times_downloaded(dic, keys)
     intoDB(dic, begin_date, rank_method_code)
     time2 = time.time()
     return {"time":time2-time1}
 
 def single_tag_rank_method_exec(rank_method_code, name, config):
     """Creating the rank method data"""
     begin_date = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())
     rnkset = {}
     rnkset_old = fromDB(rank_method_code)
     rnkset_new = single_tag_rank(config)
     rnkset = union_dicts(rnkset_old, rnkset_new)
     intoDB(rnkset, begin_date, rank_method_code)
 
 def single_tag_rank(config):
     """Connect the given tag with the data from the kb file given"""
     write_message("Loading knowledgebase file", verbose=9)
     kb_data = {}
     records = []
 
     write_message("Reading knowledgebase file: %s" % \
                    config.get(config.get("rank_method", "function"), "kb_src"))
     input = open(config.get(config.get("rank_method", "function"), "kb_src"), 'r')
     data = input.readlines()
     for line in data:
         if not line[0:1] == "#":
             kb_data[string.strip((string.split(string.strip(line), "---"))[0])] = (string.split(string.strip(line), "---"))[1]
     write_message("Number of lines read from knowledgebase file: %s" % len(kb_data))
 
     tag = config.get(config.get("rank_method", "function"), "tag")
     tags = config.get(config.get("rank_method", "function"), "check_mandatory_tags").split(", ")
     if tags == ['']:
         tags = ""
 
     records = []
     for (recids, recide) in options["recid_range"]:
         task_sleep_now_if_required(can_stop_too=True)
         write_message("......Processing records #%s-%s" % (recids, recide))
         recs = run_sql("SELECT id_bibrec, value FROM bib%sx, bibrec_bib%sx WHERE tag=%%s AND id_bibxxx=id and id_bibrec >=%%s and id_bibrec<=%%s" % (tag[0:2], tag[0:2]), (tag, recids, recide))
-        valid = HitSet(trailing_bits=1)
+        valid = intbitset(trailing_bits=1)
         valid.discard(0)
         for key in tags:
-            newset = HitSet()
+            newset = intbitset()
             newset += [recid[0] for recid in (run_sql("SELECT id_bibrec FROM bib%sx, bibrec_bib%sx WHERE id_bibxxx=id AND tag=%%s AND id_bibxxx=id and id_bibrec >=%%s and id_bibrec<=%%s" % (tag[0:2], tag[0:2]), (key, recids, recide)))]
             valid.intersection_update(newset)
         if tags:
             recs = filter(lambda x: x[0] in valid, recs)
         records = records + list(recs)
         write_message("Number of records found with the necessary tags: %s" % len(records))
 
     records = filter(lambda x: x[0] in options["validset"], records)
     rnkset = {}
     for key, value in records:
         if kb_data.has_key(value):
             if not rnkset.has_key(key):
                 rnkset[key] = float(kb_data[value])
             else:
                 if kb_data.has_key(rnkset[key]) and float(kb_data[value]) > float((rnkset[key])[1]):
                     rnkset[key] = float(kb_data[value])
         else:
             rnkset[key] = 0
 
     write_message("Number of records available in rank method: %s" % len(rnkset))
     return rnkset
 
 def get_lastupdated(rank_method_code):
     """Get the last time the rank method was updated"""
     res = run_sql("SELECT rnkMETHOD.last_updated FROM rnkMETHOD WHERE name=%s", (rank_method_code, ))
     if res:
         return res[0][0]
     else:
         raise Exception("Is this the first run? Please do a complete update.")
 
 def intoDB(dict, date, rank_method_code):
     """Insert the rank method data into the database"""
     mid = run_sql("SELECT id from rnkMETHOD where name=%s", (rank_method_code, ))
     del_rank_method_codeDATA(rank_method_code)
     serdata = serialize_via_marshal(dict);
     midstr = str(mid[0][0]);
     run_sql("INSERT INTO rnkMETHODDATA(id_rnkMETHOD, relevance_data) VALUES (%s,%s)", (midstr, serdata,))
     run_sql("UPDATE rnkMETHOD SET last_updated=%s WHERE name=%s", (date, rank_method_code))
 
     # FIXME: the following is a workaround for the citation indexer
     # memory troubles, when Apache WSGI daemon processes may end up
     # doubling the memory after citation dictionary is updated;
     # therefore let us restart the WSGI daemon application after the
     # citation indexer finished, which relieves this problem.  The
     # restart is done via touching invenio.wsgi file.  The proper fix
     # for this problem would be strict separation between citation
     # indexer updating dicts and citation searcher loading dicts.
     if rank_method_code == 'citation':
         os.system('touch ' + os.path.join(CFG_PREFIX, 'var', 'www-wsgi',
                                           'invenio.wsgi'))
 
 def fromDB(rank_method_code):
     """Get the data for a rank method"""
     id = run_sql("SELECT id from rnkMETHOD where name=%s", (rank_method_code, ))
     res = run_sql("SELECT relevance_data FROM rnkMETHODDATA WHERE id_rnkMETHOD=%s", (id[0][0], ))
     if res:
         return deserialize_via_marshal(res[0][0])
     else:
         return {}
 
 def del_rank_method_codeDATA(rank_method_code):
     """Delete the data for a rank method"""
     id = run_sql("SELECT id from rnkMETHOD where name=%s", (rank_method_code, ))
     run_sql("DELETE FROM rnkMETHODDATA WHERE id_rnkMETHOD=%s", (id[0][0], ))
 
 def del_recids(rank_method_code, range_rec):
     """Delete some records from the rank method"""
     id = run_sql("SELECT id from rnkMETHOD where name=%s", (rank_method_code, ))
     res = run_sql("SELECT relevance_data FROM rnkMETHODDATA WHERE id_rnkMETHOD=%s", (id[0][0], ))
     if res:
         rec_dict = deserialize_via_marshal(res[0][0])
         write_message("Old size: %s" % len(rec_dict))
         for (recids, recide) in range_rec:
             for i in range(int(recids), int(recide)):
                 if rec_dict.has_key(i):
                     del rec_dict[i]
         write_message("New size: %s" % len(rec_dict))
         intoDB(rec_dict, begin_date, rank_method_code)
     else:
         write_message("Create before deleting!")
 
 def union_dicts(dict1, dict2):
     "Returns union of the two dicts."
     union_dict = {}
     for (key, value) in dict1.iteritems():
         union_dict[key] = value
     for (key, value) in dict2.iteritems():
         union_dict[key] = value
     return union_dict
 
 def rank_method_code_statistics(rank_method_code):
     """Print statistics"""
 
     method = fromDB(rank_method_code)
     max = ('', -999999)
     maxcount = 0
     min = ('', 999999)
     mincount = 0
 
     for (recID, value) in method.iteritems():
         if value < min and value > 0:
             min = value
         if value > max:
             max = value
 
     for (recID, value) in method.iteritems():
         if value == min:
             mincount += 1
         if value == max:
             maxcount += 1
 
     write_message("Showing statistic for selected method")
     write_message("Method name: %s" % getName(rank_method_code))
     write_message("Short name: %s" % rank_method_code)
     write_message("Last run: %s" % get_lastupdated(rank_method_code))
     write_message("Number of records: %s" % len(method))
     write_message("Lowest value: %s - Number of records: %s" % (min, mincount))
     write_message("Highest value: %s - Number of records: %s" % (max, maxcount))
     write_message("Divided into 10 sets:")
     for i in range(1, 11):
         setcount = 0
         distinct_values = {}
         lower = -1.0 + ((float(max + 1) / 10)) * (i - 1)
         upper = -1.0 + ((float(max + 1) / 10)) * i
         for (recID, value) in method.iteritems():
             if value >= lower and value <= upper:
                 setcount += 1
                 distinct_values[value] = 1
         write_message("Set %s (%s-%s) %s Distinct values: %s" % (i, lower, upper, len(distinct_values), setcount))
 
 def check_method(rank_method_code):
     write_message("Checking rank method...")
     if len(fromDB(rank_method_code)) == 0:
         write_message("Rank method not yet executed, please run it to create the necessary data.")
     else:
         if len(add_recIDs_by_date(rank_method_code)) > 0:
             write_message("Records modified, update recommended")
         else:
             write_message("No records modified, update not necessary")
 
 def bibrank_engine(run):
     """Run the indexing task.
     Return 1 in case of success and 0 in case of failure.
     """
 
     try:
         import psyco
         psyco.bind(single_tag_rank)
         psyco.bind(single_tag_rank_method_exec)
         psyco.bind(serialize_via_marshal)
         psyco.bind(deserialize_via_marshal)
     except StandardError, e:
         pass
 
     startCreate = time.time()
     try:
         options["run"] = []
         options["run"].append(run)
         for rank_method_code in options["run"]:
             task_sleep_now_if_required(can_stop_too=True)
             cfg_name = getName(rank_method_code)
             write_message("Running rank method: %s." % cfg_name)
 
             file = CFG_ETCDIR + "/bibrank/" + rank_method_code + ".cfg"
             config = ConfigParser.ConfigParser()
             try:
                 config.readfp(open(file))
             except StandardError, e:
                 write_message("Cannot find configurationfile: %s" % file, sys.stderr)
                 raise StandardError
 
             cfg_short = rank_method_code
             cfg_function = config.get("rank_method", "function") + "_exec"
             cfg_repair_function = config.get("rank_method", "function") + "_repair_exec"
             cfg_name = getName(cfg_short)
             options["validset"] = get_valid_range(rank_method_code)
 
             if task_get_option("collection"):
                 l_of_colls = string.split(task_get_option("collection"), ", ")
                 recIDs = perform_request_search(c=l_of_colls)
                 recIDs_range = []
                 for recID in recIDs:
                     recIDs_range.append([recID, recID])
                 options["recid_range"] = recIDs_range
             elif task_get_option("id"):
                 options["recid_range"] = task_get_option("id")
             elif task_get_option("modified"):
                 options["recid_range"] = add_recIDs_by_date(rank_method_code, task_get_option("modified"))
             elif task_get_option("last_updated"):
                 options["recid_range"] = add_recIDs_by_date(rank_method_code)
             else:
                 write_message("No records specified, updating all", verbose=2)
                 min_id = run_sql("SELECT min(id) from bibrec")[0][0]
                 max_id = run_sql("SELECT max(id) from bibrec")[0][0]
                 options["recid_range"] = [[min_id, max_id]]
 
             if task_get_option("quick") == "no":
                 write_message("Recalculate parameter not used, parameter ignored.", verbose=9)
 
             if task_get_option("cmd") == "del":
                 del_recids(cfg_short, options["recid_range"])
             elif task_get_option("cmd") == "add":
                 func_object = globals().get(cfg_function)
                 func_object(rank_method_code, cfg_name, config)
             elif task_get_option("cmd") == "stat":
                 rank_method_code_statistics(rank_method_code)
             elif task_get_option("cmd") == "check":
                 check_method(rank_method_code)
             elif task_get_option("cmd") == "print-missing":
                 func_object = globals().get(cfg_function)
                 func_object(rank_method_code, cfg_name, config)
             elif task_get_option("cmd") == "repair":
                 func_object = globals().get(cfg_repair_function)
                 func_object()
             else:
                 write_message("Invalid command found processing %s" % rank_method_code, sys.stderr)
                 raise StandardError
     except StandardError, e:
         write_message("\nException caught: %s" % e, sys.stderr)
         register_exception()
         raise StandardError
 
     if task_get_option("verbose"):
         showtime((time.time() - startCreate))
     return 1
 
 def get_valid_range(rank_method_code):
     """Return a range of records"""
     write_message("Getting records from collections enabled for rank method.", verbose=9)
 
     res = run_sql("SELECT collection.name FROM collection, collection_rnkMETHOD, rnkMETHOD WHERE collection.id=id_collection and id_rnkMETHOD=rnkMETHOD.id and rnkMETHOD.name=%s",  (rank_method_code, ))
     l_of_colls = []
     for coll in res:
         l_of_colls.append(coll[0])
     if len(l_of_colls) > 0:
         recIDs = perform_request_search(c=l_of_colls)
     else:
         recIDs = []
-    valid = HitSet()
+    valid = intbitset()
     valid += recIDs
     return valid
 
 def add_recIDs_by_date(rank_method_code, dates=""):
     """Return recID range from records modified between DATES[0] and DATES[1].
        If DATES is not set, then add records modified since the last run of
        the ranking method RANK_METHOD_CODE.
     """
     if not dates:
         try:
             dates = (get_lastupdated(rank_method_code), '')
         except Exception:
             dates = ("0000-00-00 00:00:00", '')
     if dates[0] is None:
         dates = ("0000-00-00 00:00:00", '')
     query = """SELECT b.id FROM bibrec AS b WHERE b.modification_date >= %s"""
     if dates[1]:
         query += " and b.modification_date <= %s"
     query += " ORDER BY b.id ASC"""
     if dates[1]:
         res = run_sql(query, (dates[0], dates[1]))
     else:
         res = run_sql(query, (dates[0], ))
     alist = create_range_list([row[0] for row in res])
     if not alist:
         write_message("No new records added since last time method was run")
     return alist
 
 def getName(rank_method_code, ln=CFG_SITE_LANG, type='ln'):
     """Returns the name of the method if it exists"""
 
     try:
         rnkid = run_sql("SELECT id FROM rnkMETHOD where name=%s", (rank_method_code, ))
         if rnkid:
             rnkid = str(rnkid[0][0])
             res = run_sql("SELECT value FROM rnkMETHODNAME where type=%s and ln=%s and id_rnkMETHOD=%s", (type, ln, rnkid))
             if not res:
                 res = run_sql("SELECT value FROM rnkMETHODNAME WHERE ln=%s and id_rnkMETHOD=%s and type=%s", (CFG_SITE_LANG, rnkid, type))
             if not res:
                 return rank_method_code
             return res[0][0]
         else:
             raise Exception
     except Exception:
         write_message("Cannot run rank method, either given code for method is wrong, or it has not been added using the webinterface.")
         raise Exception
 
 def single_tag_rank_method(run):
     return bibrank_engine(run)
 
 def showtime(timeused):
     """Show time used for method"""
     write_message("Time used: %d second(s)." % timeused, verbose=9)
 
 def citation(run):
     return bibrank_engine(run)
 
 
 # Hack to put index based sorting here, but this is very similar to tag
 #based method and should re-use a lot of this code, so better to have here
 #than separate
 #
 
 def index_term_count_exec(rank_method_code, name, config):
     """Creating the rank method data"""
     write_message("Recreating index weighting data")
     begin_date = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())
     # we must recalculate these every time for all records, since the
     # weighting of a record is determined by the index entries of _other_
     # records
 
     rnkset = calculate_index_term_count(config)
     intoDB(rnkset, begin_date, rank_method_code)
 
 def calculate_index_term_count(config):
     """Calculate the weight of a record set based on number of enries of a
     tag from the record in another index...useful for authority files"""
 
     records = []
 
     if config.has_section("index_term_count"):
         index = config.get("index_term_count","index_table_name")
         tag = config.get("index_term_count","index_term_value_from_tag")
         # check against possible SQL injection:
         dummy = get_table_update_time(index)
         tag = wash_table_column_name(tag)
     else:
         raise Exception("Config file " + config + " does not have index_term_count section")
         return()
 
     task_sleep_now_if_required(can_stop_too=True)
     write_message("......Processing all records")
     query = "SELECT id_bibrec, value FROM bib%sx, bibrec_bib%sx WHERE tag=%%s AND id_bibxxx=id" % \
             (tag[0:2], tag[0:2]) # we checked that tag is safe
     records = list(run_sql(query, (tag,)))
     write_message("Number of records found with the necessary tags: %s" % len(records))
 
 
     rnkset = {}
     for key, value in records:
         hits = 0
         if len(value):
             query = "SELECT hitlist from %s where term = %%s" % index # we checked that index is a table
             row = run_sql(query, (value,))
             if row and row[0] and row[0][0]:
                 #has to be prepared for corrupted data!
                 try:
                     hits = len(intbitset(row[0][0]))
                 except:
                     hits = 0
         rnkset[key] = hits
     write_message("Number of records available in rank method: %s" % len(rnkset))
     return rnkset
 
 
 def index_term_count(run):
     return bibrank_engine(run)
diff --git a/modules/bibupload/lib/bibupload.py b/modules/bibupload/lib/bibupload.py
index 26906b70e..adae6d851 100644
--- a/modules/bibupload/lib/bibupload.py
+++ b/modules/bibupload/lib/bibupload.py
@@ -1,2164 +1,2175 @@
 # -*- coding: utf-8 -*-
 ##
 ## This file is part of Invenio.
 ## Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011 CERN.
 ##
 ## Invenio is free software; you can redistribute it and/or
 ## modify it under the terms of the GNU General Public License as
 ## published by the Free Software Foundation; either version 2 of the
 ## License, or (at your option) any later version.
 ##
 ## Invenio is distributed in the hope that it will be useful, but
 ## WITHOUT ANY WARRANTY; without even the implied warranty of
 ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 ## General Public License for more details.
 ##
 ## You should have received a copy of the GNU General Public License
 ## along with Invenio; if not, write to the Free Software Foundation, Inc.,
 ## 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA.
 
 """
 BibUpload: Receive MARC XML file and update the appropriate database
 tables according to options.
 """
 
 __revision__ = "$Id$"
 
 import os
 import re
 import sys
 import time
 from zlib import compress
 import socket
 import marshal
 import copy
 import tempfile
 import urlparse
 import urllib2
 
 from invenio.config import CFG_OAI_ID_FIELD, \
      CFG_BIBUPLOAD_REFERENCE_TAG, \
      CFG_BIBUPLOAD_EXTERNAL_SYSNO_TAG, \
      CFG_BIBUPLOAD_EXTERNAL_OAIID_TAG, \
      CFG_BIBUPLOAD_EXTERNAL_OAIID_PROVENANCE_TAG, \
      CFG_BIBUPLOAD_STRONG_TAGS, \
      CFG_BIBUPLOAD_CONTROLLED_PROVENANCE_TAGS, \
      CFG_BIBUPLOAD_SERIALIZE_RECORD_STRUCTURE, \
-     CFG_SITE_URL, CFG_SITE_RECORD
+     CFG_SITE_URL, CFG_SITE_RECORD, \
+     CFG_OAI_PROVENANCE_ALTERED_SUBFIELD
 
 from invenio.jsonutils import json, CFG_JSON_AVAILABLE
 from invenio.bibupload_config import CFG_BIBUPLOAD_CONTROLFIELD_TAGS, \
     CFG_BIBUPLOAD_SPECIAL_TAGS
 from invenio.dbquery import run_sql, \
                             Error
 from invenio.bibrecord import create_records, \
                               record_add_field, \
                               record_delete_field, \
                               record_xml_output, \
                               record_get_field_instances, \
                               record_get_field_values, \
                               field_get_subfield_values, \
                               field_get_subfield_instances, \
                               record_modify_subfield, \
                               record_delete_subfield_from, \
                               record_delete_fields, \
                               record_add_subfield_into, \
                               record_find_field, \
                               record_extract_oai_id
 from invenio.search_engine import get_record
 from invenio.dateutils import convert_datestruct_to_datetext
 from invenio.errorlib import register_exception
 from invenio.intbitset import intbitset
 from invenio.config import CFG_WEBSUBMIT_FILEDIR
 from invenio.bibtask import task_init, write_message, \
     task_set_option, task_get_option, task_get_task_param, task_update_status, \
     task_update_progress, task_sleep_now_if_required, fix_argv_paths
 from invenio.bibdocfile import BibRecDocs, file_strip_ext, normalize_format, \
     get_docname_from_url, check_valid_url, download_url, \
     KEEP_OLD_VALUE, decompose_bibdocfile_url, InvenioWebSubmitFileError, \
     bibdocfile_url_p, CFG_BIBDOCFILE_AVAILABLE_FLAGS, guess_format_from_url
 
 from invenio.search_engine import search_pattern
 
 #Statistic variables
 stat = {}
 stat['nb_records_to_upload'] = 0
 stat['nb_records_updated'] = 0
 stat['nb_records_inserted'] = 0
 stat['nb_errors'] = 0
 stat['nb_holdingpen'] = 0
 stat['exectime'] = time.localtime()
 
 _WRITING_RIGHTS = None
 
 ## Let's set a reasonable timeout for URL request (e.g. FFT)
 socket.setdefaulttimeout(40)
 
 _re_find_001 = re.compile('<controlfield\\s+tag=("001"|\'001\')\\s*>\\s*(\\d*)\\s*</controlfield>', re.S)
 def bibupload_pending_recids():
     """This function embed a bit of A.I. and is more a hack than an elegant
     algorithm. It should be updated in case bibupload/bibsched are modified
     in incompatible ways.
     This function return the intbitset of all the records that are being
     (or are scheduled to be) touched by other bibuploads.
     """
     options = run_sql("""SELECT arguments FROM schTASK WHERE status<>'DONE' AND
         proc='bibupload' AND (status='RUNNING' OR status='CONTINUING' OR
         status='WAITING' OR status='SCHEDULED' OR status='ABOUT TO STOP' OR
         status='ABOUT TO SLEEP')""")
     ret = intbitset()
     xmls = []
     if options:
         for arguments in options:
             arguments = marshal.loads(arguments[0])
             for argument in arguments[1:]:
                 if argument.startswith('/'):
                     # XMLs files are recognizable because they're absolute
                     # files...
                     xmls.append(argument)
     for xmlfile in xmls:
         # Let's grep for the 001
         try:
             xml = open(xmlfile).read()
             ret += [int(group[1]) for group in _re_find_001.findall(xml)]
         except:
             continue
     return ret
 
 ### bibupload engine functions:
 def bibupload(record, opt_tag=None, opt_mode=None,
         opt_stage_to_start_from=1, opt_notimechange=0, oai_rec_id = "", pretend=False):
     """Main function: process a record and fit it in the tables
     bibfmt, bibrec, bibrec_bibxxx, bibxxx with proper record
     metadata.
 
     Return (error_code, recID) of the processed record.
     """
     assert(opt_mode in ('insert', 'replace', 'replace_or_insert', 'reference',
         'correct', 'append', 'format', 'holdingpen', 'delete'))
     error = None
     # If there are special tags to proceed check if it exists in the record
     if opt_tag is not None and not(record.has_key(opt_tag)):
         msg = "    Failed: Tag not found, enter a valid tag to update."
         write_message(msg, verbose=1, stream=sys.stderr)
         return (1, -1, msg)
 
     # Extraction of the Record Id from 001, SYSNO or OAIID tags:
     rec_id = retrieve_rec_id(record, opt_mode, pretend=pretend)
     if rec_id == -1:
         msg = "    Failed: either the record already exists and insert was " \
             "requested or the record does not exists and " \
             "replace/correct/append has been used"
         write_message(msg, verbose=1, stream=sys.stderr)
         return (1, -1, msg)
     elif rec_id > 0:
         write_message("   -Retrieve record ID (found %s): DONE." % rec_id, verbose=2)
         if not record.has_key('001'):
             # Found record ID by means of SYSNO or OAIID, and the
             # input MARCXML buffer does not have this 001 tag, so we
             # should add it now:
             error = record_add_field(record, '001', controlfield_value=rec_id)
             if error is None:
                 msg = "   Failed: Error during adding the 001 controlfield "  \
                     "to the record"
                 write_message(msg, verbose=1, stream=sys.stderr)
                 return (1, int(rec_id), msg)
             else:
                 error = None
             write_message("   -Added tag 001: DONE.", verbose=2)
     write_message("   -Check if the xml marc file is already in the database: DONE" , verbose=2)
 
     # Reference mode check if there are reference tag
     if opt_mode == 'reference':
         error = extract_tag_from_record(record, CFG_BIBUPLOAD_REFERENCE_TAG)
         if error is None:
             msg = "   Failed: No reference tags has been found..."
             write_message(msg, verbose=1, stream=sys.stderr)
             return (1, -1, msg)
         else:
             error = None
             write_message("   -Check if reference tags exist: DONE", verbose=2)
 
     record_deleted_p = False
     if opt_mode == 'insert' or \
     (opt_mode == 'replace_or_insert') and rec_id is None:
         insert_mode_p = True
         # Insert the record into the bibrec databases to have a recordId
         rec_id = create_new_record(pretend=pretend)
         write_message("   -Creation of a new record id (%d): DONE" % rec_id, verbose=2)
 
         # we add the record Id control field to the record
         error = record_add_field(record, '001', controlfield_value=rec_id)
         if error is None:
             msg = "   Failed: Error during adding the 001 controlfield "  \
                   "to the record"
             write_message(msg, verbose=1, stream=sys.stderr)
             return (1, int(rec_id), msg)
         else:
             error = None
 
     elif opt_mode != 'insert' and opt_mode != 'format' and \
             opt_stage_to_start_from != 5:
         insert_mode_p = False
         # Update Mode
         # Retrieve the old record to update
         rec_old = get_record(rec_id)
+        record_had_altered_bit = record_get_field_values(rec_old, CFG_BIBUPLOAD_EXTERNAL_OAIID_TAG[:3], CFG_BIBUPLOAD_EXTERNAL_OAIID_TAG[3], CFG_BIBUPLOAD_EXTERNAL_OAIID_TAG[4], CFG_OAI_PROVENANCE_ALTERED_SUBFIELD)
         # Also save a copy to restore previous situation in case of errors
         original_record = get_record(rec_id)
         if rec_old is None:
             msg = "   Failed during the creation of the old record!"
             write_message(msg, verbose=1, stream=sys.stderr)
             return (1, int(rec_id), msg)
         else:
             write_message("   -Retrieve the old record to update: DONE", verbose=2)
 
         # In Replace mode, take over old strong tags if applicable:
         if opt_mode == 'replace' or \
             opt_mode == 'replace_or_insert':
             copy_strong_tags_from_old_record(record, rec_old)
 
         # Delete tags to correct in the record
         if opt_mode == 'correct' or opt_mode == 'reference':
             delete_tags_to_correct(record, rec_old, opt_tag)
             write_message("   -Delete the old tags to correct in the old record: DONE",
                         verbose=2)
 
         # Delete tags specified if in delete mode
         if opt_mode == 'delete':
             record = delete_tags(record, rec_old)
             write_message("   -Delete specified tags in the old record: DONE", verbose=2)
 
         # Append new tag to the old record and update the new record with the old_record modified
         if opt_mode == 'append' or opt_mode == 'correct' or \
             opt_mode == 'reference':
             record = append_new_tag_to_old_record(record, rec_old,
                 opt_tag, opt_mode)
             write_message("   -Append new tags to the old record: DONE", verbose=2)
 
+        # if record_had_altered_bit, this must be set to true, since the
+        # record has been altered.
+        if record_had_altered_bit:
+            oai_provenance_fields = record_get_field_instances(record, CFG_BIBUPLOAD_EXTERNAL_OAIID_TAG[:3], CFG_BIBUPLOAD_EXTERNAL_OAIID_TAG[3], CFG_BIBUPLOAD_EXTERNAL_OAIID_TAG[4])
+            for oai_provenance_field in oai_provenance_fields:
+                for i, (code, dummy_value) in enumerate(oai_provenance_field[0]):
+                    if code == CFG_OAI_PROVENANCE_ALTERED_SUBFIELD:
+                        oai_provenance_field[0][i] = (code, 'true')
+
         # now we clear all the rows from bibrec_bibxxx from the old
         # record (they will be populated later (if needed) during
         # stage 4 below):
         delete_bibrec_bibxxx(rec_old, rec_id, pretend=pretend)
 
         record_deleted_p = True
         write_message("   -Clean bibrec_bibxxx: DONE", verbose=2)
     write_message("   -Stage COMPLETED", verbose=2)
 
     try:
         if not record_is_valid(record):
             msg = "ERROR: record is not valid"
             write_message(msg, verbose=1, stream=sys.stderr)
             return (1, -1, msg)
 
         # Have a look if we have FMT tags
         write_message("Stage 1: Start (Insert of FMT tags if exist).", verbose=2)
         if opt_stage_to_start_from <= 1 and \
             extract_tag_from_record(record, 'FMT') is not None:
             record = insert_fmt_tags(record, rec_id, opt_mode, pretend=pretend)
             if record is None:
                 msg = "   Stage 1 failed: Error while inserting FMT tags"
                 write_message(msg, verbose=1, stream=sys.stderr)
                 return (1, int(rec_id), msg)
             elif record == 0:
                 # Mode format finished
                 stat['nb_records_updated'] += 1
                 return (0, int(rec_id), "")
             write_message("   -Stage COMPLETED", verbose=2)
         else:
             write_message("   -Stage NOT NEEDED", verbose=2)
 
         # Have a look if we have FFT tags
         write_message("Stage 2: Start (Process FFT tags if exist).", verbose=2)
         record_had_FFT = False
         if opt_stage_to_start_from <= 2 and \
             extract_tag_from_record(record, 'FFT') is not None:
             record_had_FFT = True
             if not writing_rights_p():
                 write_message("   Stage 2 failed: Error no rights to write fulltext files",
                     verbose=1, stream=sys.stderr)
                 task_update_status("ERROR")
                 sys.exit(1)
             try:
                 record = elaborate_fft_tags(record, rec_id, opt_mode, pretend=pretend)
             except Exception, e:
                 register_exception()
                 msg = "   Stage 2 failed: Error while elaborating FFT tags: %s" % e
                 write_message(msg, verbose=1, stream=sys.stderr)
                 return (1, int(rec_id), msg)
             if record is None:
                 msg = "   Stage 2 failed: Error while elaborating FFT tags"
                 write_message(msg, verbose=1, stream=sys.stderr)
                 return (1, int(rec_id), msg)
             write_message("   -Stage COMPLETED", verbose=2)
         else:
             write_message("   -Stage NOT NEEDED", verbose=2)
 
         # Have a look if we have FFT tags
         write_message("Stage 2B: Start (Synchronize 8564 tags).", verbose=2)
         has_bibdocs = run_sql("SELECT count(id_bibdoc) FROM bibrec_bibdoc JOIN bibdoc ON id_bibdoc=id WHERE id_bibrec=%s AND status<>'DELETED'", (rec_id, ))[0][0] > 0
         if opt_stage_to_start_from <= 2 and (has_bibdocs or record_had_FFT or extract_tag_from_record(record, '856') is not None):
             try:
                 record = synchronize_8564(rec_id, record, record_had_FFT, pretend=pretend)
             except Exception, e:
                 register_exception(alert_admin=True)
                 msg = "   Stage 2B failed: Error while synchronizing 8564 tags: %s" % e
                 write_message(msg, verbose=1, stream=sys.stderr)
                 return (1, int(rec_id), msg)
             if record is None:
                 msg = "   Stage 2B failed: Error while synchronizing 8564 tags"
                 write_message(msg, verbose=1, stream=sys.stderr)
                 return (1, int(rec_id), msg)
             write_message("   -Stage COMPLETED", verbose=2)
         else:
             write_message("   -Stage NOT NEEDED", verbose=2)
 
         # Update of the BibFmt
         write_message("Stage 3: Start (Update bibfmt).", verbose=2)
         if opt_stage_to_start_from <= 3:
             # format the single record as xml
             rec_xml_new = record_xml_output(record)
             # Update bibfmt with the format xm of this record
             if opt_mode != 'format':
                 error = update_bibfmt_format(rec_id, rec_xml_new, 'xm', pretend=pretend)
                 if error == 1:
                     msg = "   Failed: error during update_bibfmt_format 'xm'"
                     write_message(msg, verbose=1, stream=sys.stderr)
                     return (1, int(rec_id), msg)
                 if CFG_BIBUPLOAD_SERIALIZE_RECORD_STRUCTURE:
                     error = update_bibfmt_format(rec_id, marshal.dumps(record), 'recstruct', pretend=pretend)
                     if error == 1:
                         msg = "   Failed: error during update_bibfmt_format 'recstruct'"
                         write_message(msg, verbose=1, stream=sys.stderr)
                         return (1, int(rec_id), msg)
                 # archive MARCXML format of this record for version history purposes:
                 error = archive_marcxml_for_history(rec_id, pretend=pretend)
                 if error == 1:
                     msg = "   Failed to archive MARCXML for history"
                     write_message(msg, verbose=1, stream=sys.stderr)
                     return (1, int(rec_id), msg)
                 else:
                     write_message("   -Archived MARCXML for history : DONE", verbose=2)
             write_message("   -Stage COMPLETED", verbose=2)
 
         # Update the database MetaData
         write_message("Stage 4: Start (Update the database with the metadata).",
                     verbose=2)
         if opt_stage_to_start_from <= 4:
             if opt_mode in ('insert', 'replace', 'replace_or_insert',
                 'append', 'correct', 'reference', 'delete'):
                 update_database_with_metadata(record, rec_id, oai_rec_id, pretend=pretend)
                 record_deleted_p = False
             else:
                 write_message("   -Stage NOT NEEDED in mode %s" % opt_mode,
                             verbose=2)
             write_message("   -Stage COMPLETED", verbose=2)
         else:
             write_message("   -Stage NOT NEEDED", verbose=2)
 
         # Finally we update the bibrec table with the current date
         write_message("Stage 5: Start (Update bibrec table with current date).",
                     verbose=2)
         if opt_stage_to_start_from <= 5 and \
         opt_notimechange == 0 and \
         not insert_mode_p:
             now = convert_datestruct_to_datetext(time.localtime())
             write_message("   -Retrieved current localtime: DONE", verbose=2)
             update_bibrec_modif_date(now, rec_id, pretend=pretend)
             write_message("   -Stage COMPLETED", verbose=2)
         else:
             write_message("   -Stage NOT NEEDED", verbose=2)
 
         # Increase statistics
         if insert_mode_p:
             stat['nb_records_inserted'] += 1
         else:
             stat['nb_records_updated'] += 1
 
         # Upload of this record finish
         write_message("Record "+str(rec_id)+" DONE", verbose=1)
         return (0, int(rec_id), "")
     finally:
         if record_deleted_p:
             ## BibUpload has failed living the record deleted. We should
             ## back the original record then.
             update_database_with_metadata(original_record, rec_id, oai_rec_id, pretend=pretend)
             write_message("   Restored original record", verbose=1, stream=sys.stderr)
 
 def record_is_valid(record):
     """
     Check if the record is valid. Currently this simply checks if the record
     has exactly one rec_id.
 
     @param record: the record
     @type record: recstruct
     @return: True if the record is valid
     @rtype: bool
     """
     rec_ids = record_get_field_values(record, tag="001")
     if len(rec_ids) != 1:
         write_message("    The record is not valid: it has not a single rec_id: %s" % (rec_ids), stream=sys.stderr)
         return False
     return True
 
 def find_record_ids_by_oai_id(oaiId):
     """
     A method finding the records identifier provided the oai identifier
     returns a list of identifiers matching a given oai identifier
     """
     # Is this record already in invenio (matching by oaiid)
     if oaiId:
         recids = search_pattern(p=oaiId, f=CFG_BIBUPLOAD_EXTERNAL_OAIID_TAG, m='e')
 
         # Is this record already in invenio (matching by reportnumber i.e.
         # particularly 037. Idea: to avoid doubbles insertions)
         repnumber = oaiId.split(":")[-1]
         if repnumber:
             recids |= search_pattern(p = repnumber,
                                     f = "reportnumber",
                                     m = 'e' )
 
         # Is this record already in invenio (matching by reportnumber i.e.
         # particularly 037. Idea:  to avoid double insertions)
         repnumber = "arXiv:" + oaiId.split(":")[-1]
         recids |= search_pattern(p = repnumber,
                                 f = "reportnumber",
                                 m = 'e' )
 
         return recids
     else:
         return intbitset()
 
 def insert_record_into_holding_pen(record, oai_id, pretend=False):
     query = "INSERT INTO bibHOLDINGPEN (oai_id, changeset_date, changeset_xml, id_bibrec) VALUES (%s, NOW(), %s, %s)"
     xml_record = record_xml_output(record)
     bibrec_ids = find_record_ids_by_oai_id(oai_id)  # here determining the identifier of the record
     if len(bibrec_ids) > 0:
         bibrec_id = bibrec_ids.pop()
     else:
         # id not found by using the oai_id, let's use a wider search based
         # on any information we might have.
         bibrec_id = retrieve_rec_id(record, 'holdingpen', pretend=pretend)
         if bibrec_id is None:
             bibrec_id = 0
 
     if not pretend:
         run_sql(query, (oai_id, xml_record, bibrec_id))
 
     # record_id is logged as 0! ( We are not inserting into the main database)
     log_record_uploading(oai_id, task_get_task_param('task_id', 0), 0, 'H', pretend=pretend)
     stat['nb_holdingpen'] += 1
 
 def print_out_bibupload_statistics():
     """Print the statistics of the process"""
     out = "Task stats: %(nb_input)d input records, %(nb_updated)d updated, " \
           "%(nb_inserted)d inserted, %(nb_errors)d errors, %(nb_holdingpen)d inserted to holding pen.  " \
           "Time %(nb_sec).2f sec." % { \
               'nb_input': stat['nb_records_to_upload'],
               'nb_updated': stat['nb_records_updated'],
               'nb_inserted': stat['nb_records_inserted'],
               'nb_errors': stat['nb_errors'],
               'nb_holdingpen': stat['nb_holdingpen'],
               'nb_sec': time.time() - time.mktime(stat['exectime']) }
     write_message(out)
 
 def open_marc_file(path):
     """Open a file and return the data"""
     try:
         # open the file containing the marc document
         marc_file = open(path,'r')
         marc = marc_file.read()
         marc_file.close()
     except IOError, erro:
         write_message("Error: %s" % erro, verbose=1, stream=sys.stderr)
         write_message("Exiting.", sys.stderr)
         task_update_status("ERROR")
         sys.exit(1)
     return marc
 
 def xml_marc_to_records(xml_marc):
     """create the records"""
     # Creation of the records from the xml Marc in argument
     recs = create_records(xml_marc, 1, 1)
     if recs == []:
         write_message("Error: Cannot parse MARCXML file.", verbose=1, stream=sys.stderr)
         write_message("Exiting.", sys.stderr)
         task_update_status("ERROR")
         sys.exit(1)
     elif recs[0][0] is None:
         write_message("Error: MARCXML file has wrong format: %s" % recs,
             verbose=1, stream=sys.stderr)
         write_message("Exiting.", sys.stderr)
         task_update_status("ERROR")
         sys.exit(1)
     else:
         recs = map((lambda x:x[0]), recs)
         return recs
 
 def find_record_format(rec_id, format):
     """Look whether record REC_ID is formatted in FORMAT,
        i.e. whether FORMAT exists in the bibfmt table for this record.
 
        Return the number of times it is formatted: 0 if not, 1 if yes,
        2 if found more than once (should never occur).
     """
     out = 0
     query = """SELECT COUNT(id) FROM bibfmt WHERE id_bibrec=%s AND format=%s"""
     params = (rec_id, format)
     res = []
     try:
         res = run_sql(query, params)
         out = res[0][0]
     except Error, error:
         write_message("   Error during find_record_format() : %s " % error, verbose=1, stream=sys.stderr)
     return out
 
 def find_record_from_recid(rec_id):
     """
     Try to find record in the database from the REC_ID number.
     Return record ID if found, None otherwise.
     """
     try:
         res = run_sql("SELECT id FROM bibrec WHERE id=%s",
                       (rec_id,))
     except Error, error:
         write_message("   Error during find_record_bibrec() : %s "
             % error, verbose=1, stream=sys.stderr)
     if res:
         return res[0][0]
     else:
         return None
 
 def find_record_from_sysno(sysno):
     """
     Try to find record in the database from the external SYSNO number.
     Return record ID if found, None otherwise.
     """
     bibxxx = 'bib'+CFG_BIBUPLOAD_EXTERNAL_SYSNO_TAG[0:2]+'x'
     bibrec_bibxxx = 'bibrec_' + bibxxx
     try:
         res = run_sql("""SELECT bb.id_bibrec FROM %(bibrec_bibxxx)s AS bb,
             %(bibxxx)s AS b WHERE b.tag=%%s AND b.value=%%s
             AND bb.id_bibxxx=b.id""" % \
                       {'bibxxx': bibxxx,
                        'bibrec_bibxxx': bibrec_bibxxx},
                       (CFG_BIBUPLOAD_EXTERNAL_SYSNO_TAG, sysno,))
     except Error, error:
         write_message("   Error during find_record_from_sysno(): %s " % error,
                       verbose=1, stream=sys.stderr)
     if res:
         return res[0][0]
     else:
         return None
 
 def find_records_from_extoaiid(extoaiid, extoaisrc=None):
     """
     Try to find records in the database from the external EXTOAIID number.
     Return list of record ID if found, None otherwise.
     """
     assert(CFG_BIBUPLOAD_EXTERNAL_OAIID_TAG[:5] == CFG_BIBUPLOAD_EXTERNAL_OAIID_PROVENANCE_TAG[:5])
     bibxxx = 'bib'+CFG_BIBUPLOAD_EXTERNAL_OAIID_TAG[0:2]+'x'
     bibrec_bibxxx = 'bibrec_' + bibxxx
     try:
         write_message('   Looking for extoaiid="%s" with extoaisrc="%s"' % (extoaiid, extoaisrc), verbose=9)
         id_bibrecs = intbitset(run_sql("""SELECT bb.id_bibrec FROM %(bibrec_bibxxx)s AS bb,
             %(bibxxx)s AS b WHERE b.tag=%%s AND b.value=%%s
             AND bb.id_bibxxx=b.id""" % \
                       {'bibxxx': bibxxx,
                        'bibrec_bibxxx': bibrec_bibxxx},
                       (CFG_BIBUPLOAD_EXTERNAL_OAIID_TAG, extoaiid,)))
         write_message('   Partially found %s for extoaiid="%s"' % (id_bibrecs, extoaiid), verbose=9)
         ret = intbitset()
         for id_bibrec in id_bibrecs:
             record = get_record(id_bibrec)
             instances = record_get_field_instances(record, CFG_BIBUPLOAD_EXTERNAL_OAIID_TAG[0:3], CFG_BIBUPLOAD_EXTERNAL_OAIID_TAG[3], CFG_BIBUPLOAD_EXTERNAL_OAIID_TAG[4])
             write_message('   recid %s -> instances "%s"' % (id_bibrec, instances), verbose=9)
             for instance in instances:
                 this_extoaisrc = field_get_subfield_values(instance, CFG_BIBUPLOAD_EXTERNAL_OAIID_PROVENANCE_TAG[5])
                 this_extoaisrc = this_extoaisrc and this_extoaisrc[0] or None
                 this_extoaiid = field_get_subfield_values(instance, CFG_BIBUPLOAD_EXTERNAL_OAIID_TAG[5])
                 this_extoaiid = this_extoaiid and this_extoaiid[0] or None
                 write_message("        this_extoaisrc -> %s, this_extoaiid -> %s" % (this_extoaisrc, this_extoaiid), verbose=9)
                 if this_extoaiid == extoaiid:
                     write_message('   recid %s -> provenance "%s"' % (id_bibrec, this_extoaisrc), verbose=9)
                     if this_extoaisrc == extoaisrc:
                         write_message('Found recid %s for extoaiid="%s" with provenance="%s"' % (id_bibrec, extoaiid, extoaisrc), verbose=9)
                         ret.add(id_bibrec)
                         break
                     if this_extoaisrc is None:
                         write_message('WARNING: Found recid %s for extoaiid="%s" that doesn\'t specify any provenance, while input record does.' % (id_bibrec, extoaiid), stream=sys.stderr)
                     if extoaisrc is None:
                         write_message('WARNING: Found recid %s for extoaiid="%s" that specify a provenance (%s), while input record does not have a provenance.' % (id_bibrec, extoaiid, this_extoaisrc), stream=sys.stderr)
 
         return ret
     except Error, error:
         write_message("   Error during find_records_from_extoaiid(): %s "
             % error, verbose=1, stream=sys.stderr)
         raise
 
 def find_record_from_oaiid(oaiid):
     """
     Try to find record in the database from the OAI ID number and OAI SRC.
     Return record ID if found, None otherwise.
     """
     bibxxx = 'bib'+CFG_OAI_ID_FIELD[0:2]+'x'
     bibrec_bibxxx = 'bibrec_' + bibxxx
     try:
         res = run_sql("""SELECT bb.id_bibrec FROM %(bibrec_bibxxx)s AS bb,
             %(bibxxx)s AS b WHERE b.tag=%%s AND b.value=%%s
             AND bb.id_bibxxx=b.id""" % \
                       {'bibxxx': bibxxx,
                        'bibrec_bibxxx': bibrec_bibxxx},
                       (CFG_OAI_ID_FIELD, oaiid,))
     except Error, error:
         write_message("   Error during find_record_from_oaiid(): %s " % error,
                       verbose=1, stream=sys.stderr)
     if res:
         return res[0][0]
     else:
         return None
 
 def extract_tag_from_record(record, tag_number):
     """ Extract the tag_number for record."""
     # first step verify if the record is not already in the database
     if record:
         return record.get(tag_number, None)
     return None
 
 def retrieve_rec_id(record, opt_mode, pretend=False):
     """Retrieve the record Id from a record by using tag 001 or SYSNO or OAI ID
     tag. opt_mod is the desired mode."""
 
     rec_id = None
 
     # 1st step: we look for the tag 001
     tag_001 = extract_tag_from_record(record, '001')
     if tag_001 is not None:
         # We extract the record ID from the tag
         rec_id = tag_001[0][3]
         # if we are in insert mode => error
         if opt_mode == 'insert':
             write_message("   Failed: tag 001 found in the xml" \
                           " submitted, you should use the option replace," \
                           " correct or append to replace an existing" \
                           " record. (-h for help)",
                           verbose=1, stream=sys.stderr)
             return -1
         else:
             # we found the rec id and we are not in insert mode => continue
             # we try to match rec_id against the database:
             if find_record_from_recid(rec_id) is not None:
                 # okay, 001 corresponds to some known record
                 return int(rec_id)
             elif opt_mode in ('replace', 'replace_or_insert'):
                 if task_get_option('force'):
                     # we found the rec_id but it's not in the system and we are
                     # requested to replace records. Therefore we create on the fly
                     # a empty record allocating the recid.
                     write_message("   Warning: tag 001 found in the xml with"
                                 " value %(rec_id)s, but rec_id %(rec_id)s does"
                                 " not exist. Since the mode replace was"
                                 " requested the rec_id %(rec_id)s is allocated"
                                 " on-the-fly." % {"rec_id" : rec_id},
                                 stream=sys.stderr)
                     return create_new_record(rec_id=rec_id, pretend=pretend)
                 else:
                     # Since --force was not used we are going to raise an error
                     write_message("   Failed: tag 001 found in the xml"
                                   " submitted with value %(rec_id)s. The"
                                   " corresponding record however does not"
                                   " exists. If you want to really create"
                                   " such record, please use the --force"
                                   " parameter when calling bibupload." % {
                                     "rec_id": rec_id}, stream=sys.stderr)
                     return -1
             else:
                 # The record doesn't exist yet. We shall have try to check
                 # the SYSNO or OAI id later.
                 write_message("   -Tag 001 value not found in database.",
                               verbose=9)
                 rec_id = None
     else:
         write_message("   -Tag 001 not found in the xml marc file.", verbose=9)
 
     if rec_id is None:
         # 2nd step we look for the SYSNO
         sysnos = record_get_field_values(record,
             CFG_BIBUPLOAD_EXTERNAL_SYSNO_TAG[0:3],
             CFG_BIBUPLOAD_EXTERNAL_SYSNO_TAG[3:4] != "_" and \
             CFG_BIBUPLOAD_EXTERNAL_SYSNO_TAG[3:4] or "",
             CFG_BIBUPLOAD_EXTERNAL_SYSNO_TAG[4:5] != "_" and \
             CFG_BIBUPLOAD_EXTERNAL_SYSNO_TAG[4:5] or "",
             CFG_BIBUPLOAD_EXTERNAL_SYSNO_TAG[5:6])
         if sysnos:
             sysno = sysnos[0] # there should be only one external SYSNO
             write_message("   -Checking if SYSNO " + sysno + \
                           " exists in the database", verbose=9)
             # try to find the corresponding rec id from the database
             rec_id = find_record_from_sysno(sysno)
             if rec_id is not None:
                 # rec_id found
                 pass
             else:
                 # The record doesn't exist yet. We will try to check
                 # external and internal OAI ids later.
                 write_message("   -Tag SYSNO value not found in database.",
                               verbose=9)
                 rec_id = None
         else:
             write_message("   -Tag SYSNO not found in the xml marc file.",
                 verbose=9)
 
     if rec_id is None:
         # 2nd step we look for the external OAIID
         extoai_fields = record_get_field_instances(record,
             CFG_BIBUPLOAD_EXTERNAL_OAIID_TAG[0:3],
             CFG_BIBUPLOAD_EXTERNAL_OAIID_TAG[3:4] != "_" and \
             CFG_BIBUPLOAD_EXTERNAL_OAIID_TAG[3:4] or "",
             CFG_BIBUPLOAD_EXTERNAL_OAIID_TAG[4:5] != "_" and \
             CFG_BIBUPLOAD_EXTERNAL_OAIID_TAG[4:5] or "")
         if extoai_fields:
             for field in extoai_fields:
                 extoaiid = field_get_subfield_values(field, CFG_BIBUPLOAD_EXTERNAL_OAIID_TAG[5:6])
                 extoaisrc = field_get_subfield_values(field, CFG_BIBUPLOAD_EXTERNAL_OAIID_PROVENANCE_TAG[5:6])
                 if extoaiid:
                     extoaiid = extoaiid[0]
                     if extoaisrc:
                         extoaisrc = extoaisrc[0]
                     else:
                         extoaisrc = None
                     write_message("   -Checking if EXTOAIID %s (%s) exists in the database" % (extoaiid, extoaisrc), verbose=9)
                     # try to find the corresponding rec id from the database
                     try:
                         rec_ids = find_records_from_extoaiid(extoaiid, extoaisrc)
                     except Error, e:
                         write_message(e, verbose=1, stream=sys.stderr)
                         return -1
                     if rec_ids:
                         # rec_id found
                         rec_id = rec_ids.pop()
                         break
                     else:
                         # The record doesn't exist yet. We will try to check
                         # OAI id later.
                         write_message("   -Tag EXTOAIID value not found in database.",
                                     verbose=9)
                         rec_id = None
         else:
             write_message("   -Tag EXTOAIID not found in the xml marc file.", verbose=9)
 
     if rec_id is None:
         # 4th step we look for the OAI ID
         oaiidvalues = record_get_field_values(record,
             CFG_OAI_ID_FIELD[0:3],
             CFG_OAI_ID_FIELD[3:4] != "_" and \
             CFG_OAI_ID_FIELD[3:4] or "",
             CFG_OAI_ID_FIELD[4:5] != "_" and \
             CFG_OAI_ID_FIELD[4:5] or "",
             CFG_OAI_ID_FIELD[5:6])
         if oaiidvalues:
             oaiid = oaiidvalues[0] # there should be only one OAI ID
             write_message("   -Check if local OAI ID " + oaiid + \
                           " exist in the database", verbose=9)
 
             # try to find the corresponding rec id from the database
             rec_id = find_record_from_oaiid(oaiid)
             if rec_id is not None:
                 # rec_id found
                 pass
             else:
                 write_message("   -Tag OAI ID value not found in database.",
                               verbose=9)
                 rec_id = None
         else:
             write_message("   -Tag SYSNO not found in the xml marc file.",
                 verbose=9)
 
     # Now we should have detected rec_id from SYSNO or OAIID
     # tags.  (None otherwise.)
     if rec_id:
         if opt_mode == 'insert':
             write_message("   Failed : Record found in the database," \
                           " you should use the option replace," \
                           " correct or append to replace an existing" \
                           " record. (-h for help)",
                           verbose=1, stream=sys.stderr)
             return -1
     else:
         if opt_mode != 'insert' and \
            opt_mode != 'replace_or_insert':
             write_message("   Failed : Record not found in the database."\
                           " Please insert the file before updating it."\
                           " (-h for help)", verbose=1, stream=sys.stderr)
             return -1
 
     return rec_id and int(rec_id) or None
 
 ### Insert functions
 
 def create_new_record(rec_id=None, pretend=False):
     """
     Create new record in the database
 
     @param rec_id: if specified the new record will have this rec_id.
     @type rec_id: int
     @return: the allocated rec_id
     @rtype: int
 
     @note: in case of errors will be returned None
     """
     if rec_id is not None:
         try:
             rec_id = int(rec_id)
         except (ValueError, TypeError), error:
             write_message("   Error during the creation_new_record function : %s "
         % error, verbose=1, stream=sys.stderr)
             return None
         if run_sql("SELECT id FROM bibrec WHERE id=%s", (rec_id, )):
             write_message("   Error during the creation_new_record function : the requested rec_id %s already exists." % rec_id)
             return None
     if pretend:
         if rec_id:
             return rec_id
         else:
             return run_sql("SELECT max(id)+1 FROM bibrec")[0][0]
     try:
         if rec_id is not None:
             return run_sql("INSERT INTO bibrec (id, creation_date, modification_date) VALUES (%s, NOW(), NOW())", (rec_id, ))
         else:
             return run_sql("INSERT INTO bibrec (creation_date, modification_date) VALUES (NOW(), NOW())")
     except Error, error:
         write_message("   Error during the creation_new_record function : %s " % error, verbose=1, stream=sys.stderr)
         return None
 
 def insert_bibfmt(id_bibrec, marc, format, modification_date='1970-01-01 00:00:00', pretend=False):
     """Insert the format in the table bibfmt"""
     # compress the marc value
     pickled_marc =  compress(marc)
     try:
         time.strptime(modification_date, "%Y-%m-%d %H:%M:%S")
     except ValueError:
         modification_date = '1970-01-01 00:00:00'
 
     query = """INSERT INTO  bibfmt (id_bibrec, format, last_updated, value)
         VALUES (%s, %s, %s, %s)"""
     try:
         if not pretend:
             row_id  = run_sql(query, (id_bibrec, format, modification_date, pickled_marc))
             return row_id
         else:
             return 1
     except Error, error:
         write_message("   Error during the insert_bibfmt function : %s "
             % error, verbose=1, stream=sys.stderr)
     return None
 
 def insert_record_bibxxx(tag, value, pretend=False):
     """Insert the record into bibxxx"""
     # determine into which table one should insert the record
     table_name = 'bib'+tag[0:2]+'x'
 
     # check if the tag, value combination exists in the table
     query = """SELECT id,value FROM %s """ % table_name
     query += """ WHERE tag=%s AND value=%s"""
     params = (tag, value)
     try:
         res = run_sql(query, params)
     except Error, error:
         write_message("   Error during the insert_record_bibxxx function : %s "
             % error, verbose=1, stream=sys.stderr)
 
     # Note: compare now the found values one by one and look for
     # string binary equality (e.g. to respect lowercase/uppercase
     # match), regardless of the charset etc settings.  Ideally we
     # could use a BINARY operator in the above SELECT statement, but
     # we would have to check compatibility on various MySQLdb versions
     # etc; this approach checks all matched values in Python, not in
     # MySQL, which is less cool, but more conservative, so it should
     # work better on most setups.
     for row in res:
         row_id = row[0]
         row_value = row[1]
         if row_value == value:
             return (table_name, row_id)
 
     # We got here only when the tag,value combination was not found,
     # so it is now necessary to insert the tag,value combination into
     # bibxxx table as new.
     query = """INSERT INTO %s """ % table_name
     query += """ (tag, value) values (%s , %s)"""
     params = (tag, value)
     try:
         if not pretend:
             row_id = run_sql(query, params)
         else:
             return (table_name, 1)
     except Error, error:
         write_message("   Error during the insert_record_bibxxx function : %s "
             % error, verbose=1, stream=sys.stderr)
     return (table_name, row_id)
 
 def insert_record_bibrec_bibxxx(table_name, id_bibxxx,
         field_number, id_bibrec, pretend=False):
     """Insert the record into bibrec_bibxxx"""
     # determine into which table one should insert the record
     full_table_name = 'bibrec_'+ table_name
 
     # insert the proper row into the table
     query = """INSERT INTO %s """ % full_table_name
     query += """(id_bibrec,id_bibxxx, field_number) values (%s , %s, %s)"""
     params = (id_bibrec, id_bibxxx, field_number)
     try:
         if not pretend:
             res = run_sql(query, params)
         else:
             return 1
     except Error, error:
         write_message("   Error during the insert_record_bibrec_bibxxx"
             " function 2nd query : %s " % error, verbose=1, stream=sys.stderr)
     return res
 
 def synchronize_8564(rec_id, record, record_had_FFT, pretend=False):
     """
     Synchronize 8564_ tags and BibDocFile tables.
 
     This function directly manipulate the record parameter.
 
     @type rec_id: positive integer
     @param rec_id: the record identifier.
     @param record: the record structure as created by bibrecord.create_record
     @type record_had_FFT: boolean
     @param record_had_FFT: True if the incoming bibuploaded-record used FFT
     @return: the manipulated record (which is also modified as a side effect)
     """
     def merge_marc_into_bibdocfile(field, pretend=False):
         """
         Internal function that reads a single field and store its content
         in BibDocFile tables.
         @param field: the 8564_ field containing a BibDocFile URL.
         """
         write_message('Merging field: %s' % (field, ), verbose=9)
         url = field_get_subfield_values(field, 'u')[:1] or field_get_subfield_values(field, 'q')[:1]
         description = field_get_subfield_values(field, 'y')[:1]
         comment = field_get_subfield_values(field, 'z')[:1]
         if url:
             recid, docname, format = decompose_bibdocfile_url(url[0])
             if recid != rec_id:
                 write_message("INFO: URL %s is not pointing to a fulltext owned by this record (%s)" % (url, recid), stream=sys.stderr)
             else:
                 try:
                     bibdoc = BibRecDocs(recid).get_bibdoc(docname)
                     if description and not pretend:
                         bibdoc.set_description(description[0], format)
                     if comment and not pretend:
                         bibdoc.set_comment(comment[0], format)
                 except InvenioWebSubmitFileError:
                     ## Apparently the referenced docname doesn't exist anymore.
                     ## Too bad. Let's skip it.
                     write_message("WARNING: docname %s does not seem to exist for record %s. Has it been renamed outside FFT?" % (docname, recid), stream=sys.stderr)
 
     def merge_bibdocfile_into_marc(field, subfields):
         """
         Internal function that reads BibDocFile table entries referenced by
         the URL in the given 8564_ field and integrate the given information
         directly with the provided subfields.
 
         @param field: the 8564_ field containing a BibDocFile URL.
         @param subfields: the subfields corresponding to the BibDocFile URL
                           generated after BibDocFile tables.
         """
         write_message('Merging subfields %s into field %s' % (subfields, field), verbose=9)
         subfields = dict(subfields) ## We make a copy not to have side-effects
         subfield_to_delete = []
         for subfield_position, (code, value) in enumerate(field_get_subfield_instances(field)):
             ## For each subfield instance already existing...
             if code in subfields:
                 ## ...We substitute it with what is in BibDocFile tables
                 record_modify_subfield(record, '856', code, subfields[code],
                     subfield_position, field_position_global=field[4])
                 del subfields[code]
             else:
                 ## ...We delete it otherwise
                 subfield_to_delete.append(subfield_position)
 
         subfield_to_delete.sort()
 
         for counter, position in enumerate(subfield_to_delete):
             ## FIXME: Very hackish algorithm. Since deleting a subfield
             ## will alterate the position of following subfields, we
             ## are taking note of this and adjusting further position
             ## by using a counter.
             record_delete_subfield_from(record, '856', position - counter,
                 field_position_global=field[4])
 
         subfields = subfields.items()
         subfields.sort()
         for code, value in subfields:
             ## Let's add non-previously existing subfields
             record_add_subfield_into(record, '856', code, value,
                 field_position_global=field[4])
 
     def get_bibdocfile_managed_info():
         """
         Internal function to eturns a dictionary of
         BibDocFile URL -> wanna-be subfields.
 
         @rtype: mapping
         @return: BibDocFile URL -> wanna-be subfields dictionary
         """
         ret = {}
         bibrecdocs = BibRecDocs(rec_id)
         latest_files = bibrecdocs.list_latest_files(list_hidden=False)
         for afile in latest_files:
             url = afile.get_url()
             ret[url] = {'u' : url}
             description = afile.get_description()
             comment = afile.get_comment()
             subformat = afile.get_subformat()
             if description:
                 ret[url]['y'] = description
             if comment:
                 ret[url]['z'] = comment
             if subformat:
                 ret[url]['x'] = subformat
 
         return ret
 
     write_message("Synchronizing MARC of recid '%s' with:\n%s" % (rec_id, record), verbose=9)
     tags856s = record_get_field_instances(record, '856', '%', '%')
     write_message("Original 856%% instances: %s" % tags856s, verbose=9)
     tags8564s_to_add = get_bibdocfile_managed_info()
     write_message("BibDocFile instances: %s" % tags8564s_to_add, verbose=9)
     positions_tags8564s_to_remove = []
 
     for local_position, field in enumerate(tags856s):
         if field[1] == '4' and field[2] == ' ':
             write_message('Analysing %s' % (field, ), verbose=9)
             for url in field_get_subfield_values(field, 'u') + field_get_subfield_values(field, 'q'):
                 if url in tags8564s_to_add:
                     if record_had_FFT:
                         merge_bibdocfile_into_marc(field, tags8564s_to_add[url])
                     else:
                         merge_marc_into_bibdocfile(field, pretend=pretend)
                     del tags8564s_to_add[url]
                     break
                 elif bibdocfile_url_p(url) and decompose_bibdocfile_url(url)[0] == rec_id:
                     positions_tags8564s_to_remove.append(local_position)
                     write_message("%s to be deleted and re-synchronized" % (field, ),  verbose=9)
                     break
 
     record_delete_fields(record, '856', positions_tags8564s_to_remove)
 
     tags8564s_to_add = tags8564s_to_add.values()
     tags8564s_to_add.sort()
     for subfields in tags8564s_to_add:
         subfields = subfields.items()
         subfields.sort()
         record_add_field(record, '856', '4', ' ', subfields=subfields)
 
     write_message('Final record: %s' % record, verbose=9)
     return record
 
 def elaborate_fft_tags(record, rec_id, mode, pretend=False):
     """
     Process FFT tags that should contain $a with file pathes or URLs
     to get the fulltext from.  This function enriches record with
     proper 8564 URL tags, downloads fulltext files and stores them
     into var/data structure where appropriate.
 
     CFG_BIBUPLOAD_WGET_SLEEP_TIME defines time to sleep in seconds in
     between URL downloads.
 
     Note: if an FFT tag contains multiple $a subfields, we upload them
     into different 856 URL tags in the metadata.  See regression test
     case test_multiple_fft_insert_via_http().
     """
 
     # Let's define some handy sub procedure.
     def _add_new_format(bibdoc, url, format, docname, doctype, newname, description, comment, flags, pretend=False):
         """Adds a new format for a given bibdoc. Returns True when everything's fine."""
         write_message('Add new format to %s url: %s, format: %s, docname: %s, doctype: %s, newname: %s, description: %s, comment: %s, flags: %s' % (repr(bibdoc), url, format, docname, doctype, newname, description, comment, flags), verbose=9)
         try:
             if not url: # Not requesting a new url. Just updating comment & description
                 return _update_description_and_comment(bibdoc, docname, format, description, comment, flags, pretend=pretend)
             try:
                 if not pretend:
                     bibdoc.add_file_new_format(url, description=description, comment=comment, flags=flags)
             except StandardError, e:
                 write_message("('%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s') not inserted because format already exists (%s)." % (url, format, docname, doctype, newname, description, comment, flags, e), stream=sys.stderr)
                 raise
         except Exception, e:
             write_message("Error in adding '%s' as a new format because of: %s" % (url, e), stream=sys.stderr)
             raise
         return True
 
     def _add_new_version(bibdoc, url, format, docname, doctype, newname, description, comment, flags, pretend=False):
         """Adds a new version for a given bibdoc. Returns True when everything's fine."""
         write_message('Add new version to %s url: %s, format: %s, docname: %s, doctype: %s, newname: %s, description: %s, comment: %s, flags: %s' % (repr(bibdoc), url, format, docname, doctype, newname, description, comment, flags))
         try:
             if not url:
                 return _update_description_and_comment(bibdoc, docname, format, description, comment, flags, pretend=pretend)
             try:
                 if not pretend:
                     bibdoc.add_file_new_version(url, description=description, comment=comment, flags=flags)
             except StandardError, e:
                 write_message("('%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s') not inserted because '%s'." % (url, format, docname, doctype, newname, description, comment, flags, e), stream=sys.stderr)
                 raise
         except Exception, e:
             write_message("Error in adding '%s' as a new version because of: %s" % (url, e), stream=sys.stderr)
             raise
         return True
 
     def _update_description_and_comment(bibdoc, docname, format, description, comment, flags, pretend=False):
         """Directly update comments and descriptions."""
         write_message('Just updating description and comment for %s with format %s with description %s, comment %s and flags %s' % (docname, format, description, comment, flags), verbose=9)
         try:
             if not pretend:
                 bibdoc.set_description(description, format)
                 bibdoc.set_comment(comment, format)
                 for flag in CFG_BIBDOCFILE_AVAILABLE_FLAGS:
                     if flag in flags:
                         bibdoc.set_flag(flag, format)
                     else:
                         bibdoc.unset_flag(flag, format)
         except StandardError, e:
             write_message("('%s', '%s', '%s', '%s', '%s') description and comment not updated because '%s'." % (docname, format, description, comment, flags, e))
             raise
         return True
 
     if mode == 'delete':
         raise StandardError('FFT tag specified but bibupload executed in --delete mode')
 
     tuple_list = extract_tag_from_record(record, 'FFT')
     if tuple_list: # FFT Tags analysis
         write_message("FFTs: "+str(tuple_list), verbose=9)
         docs = {} # docnames and their data
 
         for fft in record_get_field_instances(record, 'FFT', ' ', ' '):
             # Let's discover the type of the document
             # This is a legacy field and will not be enforced any particular
             # check on it.
             doctype = field_get_subfield_values(fft, 't')
             if doctype:
                 doctype = doctype[0]
             else: # Default is Main
                 doctype = 'Main'
 
             # Let's discover the url.
             url = field_get_subfield_values(fft, 'a')
             if url:
                 url = url[0]
                 try:
                     check_valid_url(url)
                 except StandardError, e:
                     raise StandardError, "fft '%s' specifies in $a a location ('%s') with problems: %s" % (fft, url, e)
             else:
                 url = ''
 
             # Let's discover the description
             description = field_get_subfield_values(fft, 'd')
             if description != []:
                 description = description[0]
             else:
                 if mode == 'correct' and doctype != 'FIX-MARC':
                     ## If the user require to correct, and do not specify
                     ## a description this means she really want to
                     ## modify the description.
                     description = ''
                 else:
                     description = KEEP_OLD_VALUE
 
             # Let's discover the desired docname to be created/altered
             name = field_get_subfield_values(fft, 'n')
             if name:
                 ## Let's remove undesired extensions
                 name = file_strip_ext(name[0] + '.pdf')
             else:
                 if url:
                     name = get_docname_from_url(url)
                 elif mode != 'correct' and doctype != 'FIX-MARC':
                     raise StandardError, "Warning: fft '%s' doesn't specifies either a location in $a or a docname in $n" % str(fft)
                 else:
                     continue
 
             # Let's discover the desired new docname in case we want to change it
             newname = field_get_subfield_values(fft, 'm')
             if newname:
                 newname = file_strip_ext(newname[0] + '.pdf')
             else:
                 newname = name
 
             # Let's discover the desired format
             format = field_get_subfield_values(fft, 'f')
             if format:
                 format = normalize_format(format[0])
             else:
                 if url:
                     format = guess_format_from_url(url)
                 else:
                     format = ""
 
             # Let's discover the icon
             icon = field_get_subfield_values(fft, 'x')
             if icon != []:
                 icon = icon[0]
                 if icon != KEEP_OLD_VALUE:
                     try:
                         check_valid_url(icon)
                     except StandardError, e:
                         raise StandardError, "fft '%s' specifies in $x an icon ('%s') with problems: %s" % (fft, icon, e)
             else:
                 icon = ''
 
             # Let's discover the comment
             comment = field_get_subfield_values(fft, 'z')
             if comment != []:
                 comment = comment[0]
             else:
                 if mode == 'correct' and doctype != 'FIX-MARC':
                     ## See comment on description
                     comment = ''
                 else:
                     comment = KEEP_OLD_VALUE
 
             # Let's discover the restriction
             restriction = field_get_subfield_values(fft, 'r')
             if restriction != []:
                 restriction = restriction[0]
             else:
                 if mode == 'correct' and doctype != 'FIX-MARC':
                     ## See comment on description
                     restriction = ''
                 else:
                     restriction = KEEP_OLD_VALUE
 
             version = field_get_subfield_values(fft, 'v')
             if version:
                 version = version[0]
             else:
                 version = ''
 
             flags = field_get_subfield_values(fft, 'o')
             for flag in flags:
                 if flag not in CFG_BIBDOCFILE_AVAILABLE_FLAGS:
                     raise StandardError, "fft '%s' specifies a non available flag: %s" % (fft, flag)
 
             if docs.has_key(name): # new format considered
                 (doctype2, newname2, restriction2, version2, urls) = docs[name]
                 if doctype2 != doctype:
                     raise StandardError, "fft '%s' specifies a different doctype from previous fft with docname '%s'" % (str(fft), name)
                 if newname2 != newname:
                     raise StandardError, "fft '%s' specifies a different newname from previous fft with docname '%s'" % (str(fft), name)
                 if restriction2 != restriction:
                     raise StandardError, "fft '%s' specifies a different restriction from previous fft with docname '%s'" % (str(fft), name)
                 if version2 != version:
                     raise StandardError, "fft '%x' specifies a different version than the previous fft with docname '%s'" % (str(fft), name)
                 for (url2, format2, description2, comment2, flags2) in urls:
                     if format == format2:
                         raise StandardError, "fft '%s' specifies a second file '%s' with the same format '%s' from previous fft with docname '%s'" % (str(fft), url, format, name)
                 if url or format:
                     urls.append((url, format, description, comment, flags))
                 if icon:
                     urls.append((icon, icon[len(file_strip_ext(icon)):] + ';icon', description, comment, flags))
             else:
                 if url or format:
                     docs[name] = (doctype, newname, restriction, version, [(url, format, description, comment, flags)])
                     if icon:
                         docs[name][4].append((icon, icon[len(file_strip_ext(icon)):] + ';icon', description, comment, flags))
                 elif icon:
                     docs[name] = (doctype, newname, restriction, version, [(icon, icon[len(file_strip_ext(icon)):] + ';icon', description, comment, flags)])
                 else:
                     docs[name] = (doctype, newname, restriction, version, [])
 
         write_message('Result of FFT analysis:\n\tDocs: %s' % (docs,), verbose=9)
 
         # Let's remove all FFT tags
         record_delete_field(record, 'FFT', ' ', ' ')
 
         # Preprocessed data elaboration
         bibrecdocs = BibRecDocs(rec_id)
 
         ## Let's pre-download all the URLs to see if, in case of mode 'correct' or 'append'
         ## we can avoid creating a new revision.
         for docname, (doctype, newname, restriction, version, urls) in docs.items():
             downloaded_urls = []
             try:
                 bibdoc = bibrecdocs.get_bibdoc(docname)
             except InvenioWebSubmitFileError:
                 ## A bibdoc with the given docname does not exists.
                 ## So there is no chance we are going to revise an existing
                 ## format with an identical file :-)
                 bibdoc = None
 
             new_revision_needed = False
             for url, format, description, comment, flags in urls:
                 if url:
                     try:
                         downloaded_url = download_url(url, format)
                         write_message("%s saved into %s" % (url, downloaded_url), verbose=9)
                     except Exception, err:
                         write_message("Error in downloading '%s' because of: %s" % (url, err), stream=sys.stderr)
                         raise
                     if mode == 'correct' and bibdoc is not None and not new_revision_needed:
                         downloaded_urls.append((downloaded_url, format, description, comment, flags))
                         if not bibdoc.check_file_exists(downloaded_url):
                             new_revision_needed = True
                         else:
                             write_message("WARNING: %s is already attached to bibdoc %s for recid %s" % (url, docname, rec_id), stream=sys.stderr)
                     elif mode == 'append' and bibdoc is not None:
                         if not bibdoc.check_file_exists(downloaded_url):
                             downloaded_urls.append((downloaded_url, format, description, comment, flags))
                         else:
                             write_message("WARNING: %s is already attached to bibdoc %s for recid %s" % (url, docname, rec_id), stream=sys.stderr)
                     else:
                         downloaded_urls.append((downloaded_url, format, description, comment, flags))
                 else:
                     downloaded_urls.append(('', format, description, comment, flags))
             if mode == 'correct' and bibdoc is not None and not new_revision_needed:
                 ## Since we don't need a new revision (because all the files
                 ## that are being uploaded are different)
                 ## we can simply remove the urls but keep the other information
                 write_message("No need to add a new revision for docname %s for recid %s" % (docname, rec_id), verbose=2)
                 docs[docname] = (doctype, newname, restriction, version, [('', format, description, comment, flags) for (dummy, format, description, comment, flags) in downloaded_urls])
                 for downloaded_url, dummy, dummy, dummy, dummy in downloaded_urls:
                     ## Let's free up some space :-)
                     if downloaded_url and os.path.exists(downloaded_url):
                         os.remove(downloaded_url)
             else:
                 if downloaded_urls or mode != 'append':
                     docs[docname] = (doctype, newname, restriction, version, downloaded_urls)
                 else:
                     ## In case we are in append mode and there are no urls to append
                     ## we discard the whole FFT
                     del docs[docname]
 
         if mode == 'replace': # First we erase previous bibdocs
             if not pretend:
                 for bibdoc in bibrecdocs.list_bibdocs():
                     bibdoc.delete()
                 bibrecdocs.build_bibdoc_list()
 
         for docname, (doctype, newname, restriction, version, urls) in docs.iteritems():
             write_message("Elaborating olddocname: '%s', newdocname: '%s', doctype: '%s', restriction: '%s', urls: '%s', mode: '%s'" % (docname, newname, doctype, restriction, urls, mode), verbose=9)
             if mode in ('insert', 'replace'): # new bibdocs, new docnames, new marc
                 if newname in bibrecdocs.get_bibdoc_names():
                     write_message("('%s', '%s') not inserted because docname already exists." % (newname, urls), stream=sys.stderr)
                     raise StandardError
                 try:
                     if not pretend:
                         bibdoc = bibrecdocs.add_bibdoc(doctype, newname)
                         bibdoc.set_status(restriction)
                     else:
                         bibdoc = None
                 except Exception, e:
                     write_message("('%s', '%s', '%s') not inserted because: '%s'." % (doctype, newname, urls, e), stream=sys.stderr)
                     raise StandardError
                 for (url, format, description, comment, flags) in urls:
                     assert(_add_new_format(bibdoc, url, format, docname, doctype, newname, description, comment, flags, pretend=pretend))
             elif mode == 'replace_or_insert': # to be thought as correct_or_insert
                 for bibdoc in bibrecdocs.list_bibdocs():
                     if bibdoc.get_docname() == docname:
                         if doctype not in ('PURGE', 'DELETE', 'EXPUNGE', 'REVERT', 'FIX-ALL', 'FIX-MARC', 'DELETE-FILE'):
                             if newname != docname:
                                 try:
                                     if not pretend:
                                         bibdoc.change_name(newname)
                                         ## Let's refresh the list of bibdocs.
                                         bibrecdocs.build_bibdoc_list()
                                 except StandardError, e:
                                     write_message(e, stream=sys.stderr)
                                     raise
                 found_bibdoc = False
                 for bibdoc in bibrecdocs.list_bibdocs():
                     if bibdoc.get_docname() == newname:
                         found_bibdoc = True
                         if doctype == 'PURGE':
                             if not pretend:
                                 bibdoc.purge()
                         elif doctype == 'DELETE':
                             if not pretend:
                                 bibdoc.delete()
                         elif doctype == 'EXPUNGE':
                             if not pretend:
                                 bibdoc.expunge()
                         elif doctype == 'FIX-ALL':
                             if not pretend:
                                 bibrecdocs.fix(docname)
                         elif doctype == 'FIX-MARC':
                             pass
                         elif doctype == 'DELETE-FILE':
                             if urls:
                                 for (url, format, description, comment, flags) in urls:
                                     if not pretend:
                                         bibdoc.delete_file(format, version)
                         elif doctype == 'REVERT':
                             try:
                                 if not pretend:
                                     bibdoc.revert(version)
                             except Exception, e:
                                 write_message('(%s, %s) not correctly reverted: %s' % (newname, version, e), stream=sys.stderr)
                                 raise
                         else:
                             if restriction != KEEP_OLD_VALUE:
                                 if not pretend:
                                     bibdoc.set_status(restriction)
                             # Since the docname already existed we have to first
                             # bump the version by pushing the first new file
                             # then pushing the other files.
                             if urls:
                                 (first_url, first_format, first_description, first_comment, first_flags) = urls[0]
                                 other_urls = urls[1:]
                                 assert(_add_new_version(bibdoc, first_url, first_format, docname, doctype, newname, first_description, first_comment, first_flags, pretend=pretend))
                                 for (url, format, description, comment, flags) in other_urls:
                                     assert(_add_new_format(bibdoc, url, format, docname, doctype, newname, description, comment, flags, pretend=pretend))
                         ## Let's refresh the list of bibdocs.
                         bibrecdocs.build_bibdoc_list()
                 if not found_bibdoc:
                     if not pretend:
                         bibdoc = bibrecdocs.add_bibdoc(doctype, newname)
                         for (url, format, description, comment, flags) in urls:
                             assert(_add_new_format(bibdoc, url, format, docname, doctype, newname, description, comment, flags))
             elif mode == 'correct':
                 for bibdoc in bibrecdocs.list_bibdocs():
                     if bibdoc.get_docname() == docname:
                         if doctype not in ('PURGE', 'DELETE', 'EXPUNGE', 'REVERT', 'FIX-ALL', 'FIX-MARC', 'DELETE-FILE'):
                             if newname != docname:
                                 try:
                                     if not pretend:
                                         bibdoc.change_name(newname)
                                         ## Let's refresh the list of bibdocs.
                                         bibrecdocs.build_bibdoc_list()
                                 except StandardError, e:
                                     write_message('Error in renaming %s to %s: %s' % (docname, newname, e), stream=sys.stderr)
                                     raise
                 found_bibdoc = False
                 for bibdoc in bibrecdocs.list_bibdocs():
                     if bibdoc.get_docname() == newname:
                         found_bibdoc = True
                         if doctype == 'PURGE':
                             if not pretend:
                                 bibdoc.purge()
                         elif doctype == 'DELETE':
                             if not pretend:
                                 bibdoc.delete()
                         elif doctype == 'EXPUNGE':
                             if not pretend:
                                 bibdoc.expunge()
                         elif doctype == 'FIX-ALL':
                             if not pretend:
                                 bibrecdocs.fix(newname)
                         elif doctype == 'FIX-MARC':
                             pass
                         elif doctype == 'DELETE-FILE':
                             if urls:
                                 for (url, format, description, comment, flags) in urls:
                                     if not pretend:
                                         bibdoc.delete_file(format, version)
                         elif doctype == 'REVERT':
                             try:
                                 if not pretend:
                                     bibdoc.revert(version)
                             except Exception, e:
                                 write_message('(%s, %s) not correctly reverted: %s' % (newname, version, e), stream=sys.stderr)
                                 raise
                         else:
                             if restriction != KEEP_OLD_VALUE:
                                 if not pretend:
                                     bibdoc.set_status(restriction)
                             if urls:
                                 (first_url, first_format, first_description, first_comment, first_flags) = urls[0]
                                 other_urls = urls[1:]
                                 assert(_add_new_version(bibdoc, first_url, first_format, docname, doctype, newname, first_description, first_comment, first_flags, pretend=pretend))
                                 for (url, format, description, comment, flags) in other_urls:
                                     assert(_add_new_format(bibdoc, url, format, docname, doctype, newname, description, comment, flags, pretend=pretend))
                         ## Let's refresh the list of bibdocs.
                         bibrecdocs.build_bibdoc_list()
                 if not found_bibdoc:
                     if doctype in ('PURGE', 'DELETE', 'EXPUNGE', 'FIX-ALL', 'FIX-MARC', 'DELETE-FILE', 'REVERT'):
                         write_message("('%s', '%s', '%s') not performed because '%s' docname didn't existed." % (doctype, newname, urls, docname), stream=sys.stderr)
                         raise StandardError
                     else:
                         if not pretend:
                             bibdoc = bibrecdocs.add_bibdoc(doctype, newname)
                             for (url, format, description, comment, flags) in urls:
                                 assert(_add_new_format(bibdoc, url, format, docname, doctype, newname, description, comment, flags))
             elif mode == 'append':
                 try:
                     found_bibdoc = False
                     for bibdoc in bibrecdocs.list_bibdocs():
                         if bibdoc.get_docname() == docname:
                             found_bibdoc = True
                             for (url, format, description, comment, flags) in urls:
                                 assert(_add_new_format(bibdoc, url, format, docname, doctype, newname, description, comment, flags, pretend=pretend))
                     if not found_bibdoc:
                         try:
                             if not pretend:
                                 bibdoc = bibrecdocs.add_bibdoc(doctype, docname)
                                 bibdoc.set_status(restriction)
                                 for (url, format, description, comment, flags) in urls:
                                     assert(_add_new_format(bibdoc, url, format, docname, doctype, newname, description, comment, flags))
                         except Exception, e:
                             register_exception()
                             write_message("('%s', '%s', '%s') not appended because: '%s'." % (doctype, newname, urls, e), stream=sys.stderr)
                             raise
                 except:
                     register_exception()
                     raise
     return record
 
 def insert_fmt_tags(record, rec_id, opt_mode, pretend=False):
     """Process and insert FMT tags"""
 
     fmt_fields = record_get_field_instances(record, 'FMT')
     if fmt_fields:
         for fmt_field in fmt_fields:
             # Get the d, f, g subfields of the FMT tag
             try:
                 d_value = field_get_subfield_values(fmt_field, "d")[0]
             except IndexError:
                 d_value = ""
             try:
                 f_value = field_get_subfield_values(fmt_field, "f")[0]
             except IndexError:
                 f_value = ""
             try:
                 g_value = field_get_subfield_values(fmt_field, "g")[0]
             except IndexError:
                 g_value = ""
             # Update the format
             if not pretend:
                 res = update_bibfmt_format(rec_id, g_value, f_value, d_value, pretend=pretend)
                 if res == 1:
                     write_message("   Failed: Error during update_bibfmt", verbose=1, stream=sys.stderr)
 
         # If we are in format mode, we only care about the FMT tag
         if opt_mode == 'format':
             return 0
         # We delete the FMT Tag of the record
         record_delete_field(record, 'FMT')
         write_message("   -Delete field FMT from record : DONE", verbose=2)
         return record
 
     elif opt_mode == 'format':
         write_message("   Failed: Format updated failed : No tag FMT found", verbose=1, stream=sys.stderr)
         return None
     else:
         return record
 
 
 ### Update functions
 
 def update_bibrec_modif_date(now, bibrec_id, pretend=False):
     """Update the date of the record in bibrec table """
     query = """UPDATE bibrec SET modification_date=%s WHERE id=%s"""
     params = (now, bibrec_id)
     try:
         if not pretend:
             run_sql(query, params)
         write_message("   -Update record modification date : DONE" , verbose=2)
     except Error, error:
         write_message("   Error during update_bibrec_modif_date function : %s" % error,
                       verbose=1, stream=sys.stderr)
 
 def update_bibfmt_format(id_bibrec, format_value, format_name, modification_date=None, pretend=False):
     """Update the format in the table bibfmt"""
     if modification_date is None:
         modification_date = time.strftime('%Y-%m-%d %H:%M:%S')
     else:
         try:
             time.strptime(modification_date, "%Y-%m-%d %H:%M:%S")
         except ValueError:
             modification_date = '1970-01-01 00:00:00'
 
     # We check if the format is already in bibFmt
     nb_found = find_record_format(id_bibrec, format_name)
     if nb_found == 1:
         # we are going to update the format
         # compress the format_value value
         pickled_format_value =  compress(format_value)
         # update the format:
         query = """UPDATE bibfmt SET last_updated=%s, value=%s WHERE id_bibrec=%s AND format=%s"""
         params = (modification_date, pickled_format_value, id_bibrec, format_name)
         try:
             if not pretend:
                 row_id  = run_sql(query, params)
             if not pretend and row_id is None:
                 write_message("   Failed: Error during update_bibfmt_format function", verbose=1, stream=sys.stderr)
                 return 1
             else:
                 write_message("   -Update the format %s in bibfmt : DONE" % format_name , verbose=2)
                 return 0
         except Error, error:
             write_message("   Error during the update_bibfmt_format function : %s " % error, verbose=1, stream=sys.stderr)
 
     elif nb_found > 1:
         write_message("   Failed: Same format %s found several time in bibfmt for the same record." % format_name, verbose=1, stream=sys.stderr)
         return 1
     else:
         # Insert the format information in BibFMT
         res = insert_bibfmt(id_bibrec, format_value, format_name, modification_date, pretend=pretend)
         if res is None:
             write_message("   Failed: Error during insert_bibfmt", verbose=1, stream=sys.stderr)
             return 1
         else:
             write_message("   -Insert the format %s in bibfmt : DONE" % format_name , verbose=2)
             return 0
 
 def archive_marcxml_for_history(recID, pretend=False):
     """
     Archive current MARCXML format of record RECID from BIBFMT table
     into hstRECORD table.  Useful to keep MARCXML history of records.
 
     Return 0 if everything went fine.  Return 1 otherwise.
     """
     try:
         res = run_sql("SELECT id_bibrec, value, last_updated FROM bibfmt WHERE format='xm' AND id_bibrec=%s",
                       (recID,))
         if res and not pretend:
             run_sql("""INSERT INTO hstRECORD (id_bibrec, marcxml, job_id, job_name, job_person, job_date, job_details)
                                       VALUES (%s,%s,%s,%s,%s,%s,%s)""",
                     (res[0][0], res[0][1], task_get_task_param('task_id', 0), 'bibupload', task_get_task_param('user','UNKNOWN'), res[0][2],
                      'mode: ' + task_get_option('mode','UNKNOWN') + '; file: ' + task_get_option('file_path','UNKNOWN') + '.'))
     except Error, error:
         write_message("   Error during archive_marcxml_for_history: %s " % error,
                       verbose=1, stream=sys.stderr)
         return 1
     return 0
 
 def update_database_with_metadata(record, rec_id, oai_rec_id = "oai", pretend=False):
     """Update the database tables with the record and the record id given in parameter"""
     for tag in record.keys():
         # check if tag is not a special one:
         if tag not in CFG_BIBUPLOAD_SPECIAL_TAGS:
             # for each tag there is a list of tuples representing datafields
             tuple_list = record[tag]
             # this list should contain the elements of a full tag [tag, ind1, ind2, subfield_code]
             tag_list = []
             tag_list.append(tag)
             for single_tuple in tuple_list:
                 # these are the contents of a single tuple
                 subfield_list = single_tuple[0]
                 ind1 = single_tuple[1]
                 ind2 = single_tuple[2]
                 # append the ind's to the full tag
                 if ind1 == '' or ind1 == ' ':
                     tag_list.append('_')
                 else:
                     tag_list.append(ind1)
                 if ind2 == '' or ind2 == ' ':
                     tag_list.append('_')
                 else:
                     tag_list.append(ind2)
                 datafield_number = single_tuple[4]
 
                 if tag in CFG_BIBUPLOAD_SPECIAL_TAGS:
                     # nothing to do for special tags (FFT, FMT)
                     pass
                 elif tag in CFG_BIBUPLOAD_CONTROLFIELD_TAGS and tag != "001":
                     value = single_tuple[3]
                     # get the full tag
                     full_tag = ''.join(tag_list)
 
                     # update the tables
                     write_message("   insertion of the tag "+full_tag+" with the value "+value, verbose=9)
                     # insert the tag and value into into bibxxx
                     (table_name, bibxxx_row_id) = insert_record_bibxxx(full_tag, value, pretend=pretend)
                     #print 'tname, bibrow', table_name, bibxxx_row_id;
                     if table_name is None or bibxxx_row_id is None:
                         write_message("   Failed : during insert_record_bibxxx", verbose=1, stream=sys.stderr)
                     # connect bibxxx and bibrec with the table bibrec_bibxxx
                     res = insert_record_bibrec_bibxxx(table_name, bibxxx_row_id, datafield_number, rec_id, pretend=pretend)
                     if res is None:
                         write_message("   Failed : during insert_record_bibrec_bibxxx", verbose=1, stream=sys.stderr)
                 else:
                     # get the tag and value from the content of each subfield
                     for subfield in subfield_list:
                         subtag = subfield[0]
                         value = subfield[1]
                         tag_list.append(subtag)
                         # get the full tag
                         full_tag = ''.join(tag_list)
                         # update the tables
                         write_message("   insertion of the tag "+full_tag+" with the value "+value, verbose=9)
                         # insert the tag and value into into bibxxx
                         (table_name, bibxxx_row_id) = insert_record_bibxxx(full_tag, value, pretend=pretend)
                         if table_name is None or bibxxx_row_id is None:
                             write_message("   Failed : during insert_record_bibxxx", verbose=1, stream=sys.stderr)
                         # connect bibxxx and bibrec with the table bibrec_bibxxx
                         res = insert_record_bibrec_bibxxx(table_name, bibxxx_row_id, datafield_number, rec_id, pretend=pretend)
                         if res is None:
                             write_message("   Failed : during insert_record_bibrec_bibxxx", verbose=1, stream=sys.stderr)
                         # remove the subtag from the list
                         tag_list.pop()
                 tag_list.pop()
                 tag_list.pop()
             tag_list.pop()
     write_message("   -Update the database with metadata : DONE", verbose=2)
 
     log_record_uploading(oai_rec_id, task_get_task_param('task_id', 0), rec_id, 'P', pretend=pretend)
 
 def append_new_tag_to_old_record(record, rec_old, opt_tag, opt_mode):
     """Append new tags to a old record"""
 
     def _append_tag(tag):
         # Reference mode append only reference tag
         if opt_mode == 'reference':
             if tag == CFG_BIBUPLOAD_REFERENCE_TAG:
                 for single_tuple in record[tag]:
                     # We retrieve the information of the tag
                     subfield_list = single_tuple[0]
                     ind1 = single_tuple[1]
                     ind2 = single_tuple[2]
                     # We add the datafield to the old record
                     write_message("      Adding tag: %s ind1=%s ind2=%s code=%s" % (tag, ind1, ind2, subfield_list), verbose=9)
                     newfield_number = record_add_field(rec_old, tag, ind1,
                         ind2, subfields=subfield_list)
                     if newfield_number is None:
                         write_message("   Error when adding the field"+tag, verbose=1, stream=sys.stderr)
         else:
             if tag in CFG_BIBUPLOAD_CONTROLFIELD_TAGS:
                 if tag == '001':
                     pass
                 else:
                     # if it is a controlfield,just access the value
                     for single_tuple in record[tag]:
                         controlfield_value = single_tuple[3]
                         # add the field to the old record
                         newfield_number = record_add_field(rec_old, tag,
                             controlfield_value=controlfield_value)
                         if newfield_number is None:
                             write_message("   Error when adding the field"+tag, verbose=1, stream=sys.stderr)
             else:
                 # For each tag there is a list of tuples representing datafields
                 for single_tuple in record[tag]:
                     # We retrieve the information of the tag
                     subfield_list = single_tuple[0]
                     ind1 = single_tuple[1]
                     ind2 = single_tuple[2]
                     if '%s%s%s' % (tag, ind1 == ' ' and '_' or ind1, ind2 == ' ' and '_' or ind2) in (CFG_BIBUPLOAD_EXTERNAL_OAIID_TAG[:5], CFG_BIBUPLOAD_EXTERNAL_SYSNO_TAG[:5]):
                         ## We don't want to append the external identifier
                         ## if it is already existing.
                         if record_find_field(rec_old, tag, single_tuple)[0] is not None:
                             write_message("      Not adding tag: %s ind1=%s ind2=%s subfields=%s: it's already there" % (tag, ind1, ind2, subfield_list), verbose=9)
                             continue
                     # We add the datafield to the old record
                     write_message("      Adding tag: %s ind1=%s ind2=%s subfields=%s" % (tag, ind1, ind2, subfield_list), verbose=9)
                     newfield_number = record_add_field(rec_old, tag, ind1,
                         ind2, subfields=subfield_list)
                     if newfield_number is None:
                         write_message("   Error when adding the field"+tag, verbose=1, stream=sys.stderr)
 
     if opt_tag is not None:
         _append_tag(opt_tag)
     else:
         # Go through each tag in the appended record
         for tag in record:
             _append_tag(tag)
     return rec_old
 
 def copy_strong_tags_from_old_record(record, rec_old):
     """
     Look for strong tags in RECORD and REC_OLD.  If no strong tags are
     found in RECORD, then copy them over from REC_OLD.  This function
     modifies RECORD structure on the spot.
     """
     for strong_tag in CFG_BIBUPLOAD_STRONG_TAGS:
         if not record_get_field_instances(record, strong_tag):
             strong_tag_old_field_instances = record_get_field_instances(rec_old, strong_tag)
             if strong_tag_old_field_instances:
                 for strong_tag_old_field_instance in strong_tag_old_field_instances:
                     sf_vals, fi_ind1, fi_ind2, controlfield, dummy = strong_tag_old_field_instance
                     record_add_field(record, strong_tag, fi_ind1, fi_ind2, controlfield, sf_vals)
     return
 
 ### Delete functions
 def delete_tags(record, rec_old):
     """
     Returns a record structure with all the fields in rec_old minus the
     fields in record.
 
     @param record: The record containing tags to delete.
     @type record: record structure
 
     @param rec_old: The original record.
     @type rec_old: record structure
 
     @return: The modified record.
     @rtype: record structure
     """
     returned_record = copy.deepcopy(rec_old)
     for tag, fields in record.iteritems():
         if tag in ('001', ):
             continue
         for field in fields:
             local_position = record_find_field(returned_record, tag, field)[1]
             if local_position is not None:
                 record_delete_field(returned_record, tag, field_position_local=local_position)
     return returned_record
 
 def delete_tags_to_correct(record, rec_old, opt_tag):
     """
     Delete tags from REC_OLD which are also existing in RECORD.  When
     deleting, pay attention not only to tags, but also to indicators,
     so that fields with the same tags but different indicators are not
     deleted.
     """
     ## Some fields are controlled via provenance information.
     ## We should re-add saved fields at the end.
     fields_to_readd = {}
     for tag in CFG_BIBUPLOAD_CONTROLLED_PROVENANCE_TAGS:
         if tag[:3] in record:
             tmp_field_instances = record_get_field_instances(record, tag[:3], tag[3], tag[4]) ## Let's discover the provenance that will be updated
             provenances_to_update = []
             for instance in tmp_field_instances:
                 for code, value in instance[0]:
                     if code == tag[5]:
                         if value not in provenances_to_update:
                             provenances_to_update.append(value)
                         break
                 else:
                     ## The provenance is not specified.
                     ## let's add the special empty provenance.
                     if '' not in provenances_to_update:
                         provenances_to_update.append('')
             potential_fields_to_readd = record_get_field_instances(rec_old, tag[:3], tag[3], tag[4]) ## Let's take all the field corresponding to tag
             ## Let's save apart all the fields that should be updated, but
             ## since they have a different provenance not mentioned in record
             ## they should be preserved.
             fields = []
             for sf_vals, ind1, ind2, dummy_cf, dummy_line in potential_fields_to_readd:
                 for code, value in sf_vals:
                     if code == tag[5]:
                         if value not in provenances_to_update:
                             fields.append(sf_vals)
                         break
                 else:
                     if '' not in provenances_to_update:
                         ## Empty provenance, let's protect in any case
                         fields.append(sf_vals)
             fields_to_readd[tag] = fields
 
     # browse through all the tags from the MARCXML file:
     for tag in record:
         # do we have to delete only a special tag or any tag?
         if opt_tag is None or opt_tag == tag:
             # check if the tag exists in the old record too:
             if tag in rec_old and tag != '001':
                 # the tag does exist, so delete all record's tag+ind1+ind2 combinations from rec_old
                 for dummy_sf_vals, ind1, ind2, dummy_cf, field_number in record[tag]:
                     write_message("      Delete tag: " + tag + " ind1=" + ind1 + " ind2=" + ind2, verbose=9)
                     record_delete_field(rec_old, tag, ind1, ind2)
 
     ## Ok, we readd necessary fields!
     for tag, fields in fields_to_readd.iteritems():
         for sf_vals in fields:
             write_message("      Adding tag: " + tag[:3] + " ind1=" + tag[3] + " ind2=" + tag[4] + " code=" + str(sf_vals), verbose=9)
             record_add_field(rec_old, tag[:3], tag[3], tag[4], subfields=sf_vals)
 
 def delete_bibrec_bibxxx(record, id_bibrec, pretend=False):
     """Delete the database record from the table bibxxx given in parameters"""
     # we clear all the rows from bibrec_bibxxx from the old record
     for tag in record.keys():
         if tag not in CFG_BIBUPLOAD_SPECIAL_TAGS:
             # for each name construct the bibrec_bibxxx table name
             table_name = 'bibrec_bib'+tag[0:2]+'x'
             # delete all the records with proper id_bibrec
             query = """DELETE FROM `%s` where id_bibrec = %s"""
             params = (table_name, id_bibrec)
             if not pretend:
                 try:
                     run_sql(query % params)
                 except Error, error:
                     write_message("   Error during the delete_bibrec_bibxxx function : %s " % error, verbose=1, stream=sys.stderr)
 
 def main():
     """Main that construct all the bibtask."""
     task_init(authorization_action='runbibupload',
             authorization_msg="BibUpload Task Submission",
             description="""Receive MARC XML file and update appropriate database
 tables according to options.
 Examples:
     $ bibupload -i input.xml
 """,
             help_specific_usage="""  -a, --append\t\tnew fields are appended to the existing record
   -c, --correct\t\tfields are replaced by the new ones in the existing record
   -f, --format\t\ttakes only the FMT fields into account. Does not update
   -i, --insert\t\tinsert the new record in the database
   -r, --replace\t\tthe existing record is entirely replaced by the new one
   -z, --reference\tupdate references (update only 999 fields)
   -d, --delete\t\tspecified fields are deleted in existing record
   -S, --stage=STAGE\tstage to start from in the algorithm (0: always done; 1: FMT tags;
 \t\t\t2: FFT tags; 3: BibFmt; 4: Metadata update; 5: time update)
   -n, --notimechange\tdo not change record last modification date when updating
   -o, --holdingpen\tInsert record into holding pen instead of the normal database
   --pretend\t\tdo not really insert/append/correct/replace the input file
   --force\t\twhen --replace, use provided 001 tag values, even if the matching
 \t\t\trecord does not exist (thus allocating it on-the-fly)
   --callback-url\tSend via a POST request a JSON-serialized answer (see admin guide), in
 \t\t\torder to provide a feedback to an external service about the outcome of the operation.
 """,
             version=__revision__,
             specific_params=("ircazdS:fno",
                  [
                    "insert",
                    "replace",
                    "correct",
                    "append",
                    "reference",
                    "delete",
                    "stage=",
                    "format",
                    "notimechange",
                    "holdingpen",
                    "pretend",
                    "force",
                    "callback-url="
                  ]),
             task_submit_elaborate_specific_parameter_fnc=task_submit_elaborate_specific_parameter,
             task_run_fnc=task_run_core)
 
 def task_submit_elaborate_specific_parameter(key, value, opts, args):
     """ Given the string key it checks it's meaning, eventually using the
     value. Usually it fills some key in the options dict.
     It must return True if it has elaborated the key, False, if it doesn't
     know that key.
     eg:
     if key in ['-n', '--number']:
         task_get_option(\1) = value
         return True
     return False
     """
 
     # No time change option
     if key in ("-n", "--notimechange"):
         task_set_option('notimechange', 1)
 
     # Insert mode option
     elif key in ("-i", "--insert"):
         if task_get_option('mode') == 'replace':
             # if also replace found, then set to replace_or_insert
             task_set_option('mode', 'replace_or_insert')
         else:
             task_set_option('mode', 'insert')
         fix_argv_paths([args[0]])
         task_set_option('file_path', os.path.abspath(args[0]))
 
     # Replace mode option
     elif key in ("-r", "--replace"):
         if task_get_option('mode') == 'insert':
             # if also insert found, then set to replace_or_insert
             task_set_option('mode', 'replace_or_insert')
         else:
             task_set_option('mode', 'replace')
         fix_argv_paths([args[0]])
         task_set_option('file_path', os.path.abspath(args[0]))
     # Holding pen mode option
     elif key in ("-o", "--holdingpen"):
         write_message("Holding pen mode", verbose=3)
         task_set_option('mode', 'holdingpen')
         fix_argv_paths([args[0]])
         task_set_option('file_path', os.path.abspath(args[0]))
     # Correct mode option
     elif key in ("-c", "--correct"):
         task_set_option('mode', 'correct')
         fix_argv_paths([args[0]])
         task_set_option('file_path', os.path.abspath(args[0]))
 
     # Append mode option
     elif key in ("-a", "--append"):
         task_set_option('mode', 'append')
         fix_argv_paths([args[0]])
         task_set_option('file_path', os.path.abspath(args[0]))
 
     # Reference mode option
     elif key in ("-z", "--reference"):
         task_set_option('mode', 'reference')
         fix_argv_paths([args[0]])
         task_set_option('file_path', os.path.abspath(args[0]))
 
     elif key in ("-d", "--delete"):
         task_set_option('mode', 'delete')
         fix_argv_paths([args[0]])
         task_set_option('file_path', os.path.abspath(args[0]))
 
     # Format mode option
     elif key in ("-f", "--format"):
         task_set_option('mode', 'format')
         fix_argv_paths([args[0]])
         task_set_option('file_path', os.path.abspath(args[0]))
 
     elif key in ("--pretend",):
         task_set_option('pretend', True)
         fix_argv_paths([args[0]])
         task_set_option('file_path', os.path.abspath(args[0]))
 
     elif key in ("--force",):
         task_set_option('force', True)
         fix_argv_paths([args[0]])
         task_set_option('file_path', os.path.abspath(args[0]))
 
     # Stage
     elif key in ("-S", "--stage"):
         try:
             value = int(value)
         except ValueError:
             print >> sys.stderr, """The value specified for --stage must be a valid integer, not %s""" % value
             return False
         if not (0 <= value <= 5):
             print >> sys.stderr, """The value specified for --stage must be comprised between 0 and 5"""
             return False
         task_set_option('stage_to_start_from', value)
 
     elif key in ("--callback-url", ):
         task_set_option('callback_url', value)
     else:
         return False
     return True
 
 
 def task_submit_check_options():
     """ Reimplement this method for having the possibility to check options
     before submitting the task, in order for example to provide default
     values. It must return False if there are errors in the options.
     """
     if task_get_option('mode') is None:
         write_message("Please specify at least one update/insert mode!")
         return False
 
     if task_get_option('file_path') is None:
         write_message("Missing filename! -h for help.")
         return False
     return True
 
 def writing_rights_p():
     """Return True in case bibupload has the proper rights to write in the
     fulltext file folder."""
     global _WRITING_RIGHTS
     if _WRITING_RIGHTS is not None:
         return _WRITING_RIGHTS
     try:
         if not os.path.exists(CFG_WEBSUBMIT_FILEDIR):
             os.makedirs(CFG_WEBSUBMIT_FILEDIR)
         fd, filename = tempfile.mkstemp(suffix='.txt', prefix='test', dir=CFG_WEBSUBMIT_FILEDIR)
         test = os.fdopen(fd, 'w')
         test.write('TEST')
         test.close()
         if open(filename).read() != 'TEST':
             raise IOError("Can not successfully write and readback %s" % filename)
         os.remove(filename)
     except:
         register_exception(alert_admin=True)
         return False
     return True
 
 def post_results_to_callback_url(results, callback_url):
     if not CFG_JSON_AVAILABLE:
         from warnings import warn
         warn("--callback-url used but simplejson/json not available")
         return
     json_results = json.dumps(results)
     ## <scheme>://<netloc>/<path>?<query>#<fragment>
     scheme, netloc, path, query, fragment = urlparse.urlsplit(callback_url)
     ## See: http://stackoverflow.com/questions/111945/is-there-any-way-to-do-http-put-in-python
     if scheme == 'http':
         opener = urllib2.build_opener(urllib2.HTTPHandler)
     elif scheme == 'https':
         opener = urllib2.build_opener(urllib2.HTTPSHandler)
     else:
         raise ValueError("Scheme not handled %s for callback_url %s" % (scheme, callback_url))
     request = urllib2.Request(callback_url, data=json_results)
     request.add_header('Content-Type', 'application/json')
     request.get_method = lambda: 'POST'
     return opener.open(request)
 
 def task_run_core():
     """ Reimplement to add the body of the task."""
     error = 0
     write_message("Input file '%s', input mode '%s'." %
             (task_get_option('file_path'), task_get_option('mode')))
     write_message("STAGE 0:", verbose=2)
 
     if task_get_option('file_path') is not None:
         write_message("start preocessing", verbose=3)
         task_update_progress("Reading XML input")
         recs = xml_marc_to_records(open_marc_file(task_get_option('file_path')))
         stat['nb_records_to_upload'] = len(recs)
         write_message("   -Open XML marc: DONE", verbose=2)
         task_sleep_now_if_required(can_stop_too=True)
         write_message("Entering records loop", verbose=3)
         callback_url = task_get_option('callback_url')
         results_for_callback = {'results': []}
         if recs is not None:
             # We proceed each record by record
             for record in recs:
                 record_id = record_extract_oai_id(record)
                 task_sleep_now_if_required(can_stop_too=True)
                 if task_get_option("mode") == "holdingpen":
                     #inserting into the holding pen
                     write_message("Inserting into holding pen", verbose=3)
                     insert_record_into_holding_pen(record, record_id)
                 else:
                     write_message("Inserting into main database", verbose=3)
                     error = bibupload(
                         record,
                         opt_tag=task_get_option('tag'),
                         opt_mode=task_get_option('mode'),
                         opt_stage_to_start_from=task_get_option('stage_to_start_from'),
                         opt_notimechange=task_get_option('notimechange'),
                         oai_rec_id=record_id,
                         pretend=task_get_option('pretend'))
                     if error[0] == 1:
                         if record:
                             write_message(record_xml_output(record),
                                           stream=sys.stderr)
                         else:
                             write_message("Record could not have been parsed",
                                           stream=sys.stderr)
                         stat['nb_errors'] += 1
                         if callback_url:
                             results_for_callback['results'].append({'recid': error[1], 'success': False, 'error_message': error[2]})
                     elif error[0] == 2:
                         if record:
                             write_message(record_xml_output(record),
                                           stream=sys.stderr)
                         else:
                             write_message("Record could not have been parsed",
                                           stream=sys.stderr)
                         if callback_url:
                             results_for_callback['results'].append({'recid': error[1], 'success': False, 'error_message': error[2]})
                     elif error[0] == 0:
                         if callback_url:
                             from invenio.search_engine import print_record
                             results_for_callback['results'].append({'recid': error[1], 'success': True, "marcxml": print_record(error[1], 'xm'), 'url': "%s/%s/%s" % (CFG_SITE_URL, CFG_SITE_RECORD, error[1])})
                     else:
                         if callback_url:
                             results_for_callback['results'].append({'recid': error[1], 'success': False, 'error_message': error[2]})
 
                 task_update_progress("Done %d out of %d." % \
                     (stat['nb_records_inserted'] + \
                     stat['nb_records_updated'],
                     stat['nb_records_to_upload']))
         else:
             write_message("   Error bibupload failed: No record found",
                         verbose=1, stream=sys.stderr)
         callback_url = task_get_option("callback_url")
         if callback_url:
             post_results_to_callback_url(results_for_callback, callback_url)
 
     if task_get_task_param('verbose') >= 1:
         # Print out the statistics
         print_out_bibupload_statistics()
 
     # Check if they were errors
     return not stat['nb_errors'] >= 1
 
 def log_record_uploading(oai_rec_id, task_id, bibrec_id, insertion_db, pretend=False):
     if oai_rec_id != "" and oai_rec_id != None:
         query = """UPDATE oaiHARVESTLOG SET date_inserted=NOW(), inserted_to_db=%s, id_bibrec=%s WHERE oai_id = %s AND bibupload_task_id = %s ORDER BY date_harvested LIMIT 1"""
         try:
             if not pretend:
                 run_sql(query, (str(insertion_db), str(bibrec_id), str(oai_rec_id), str(task_id), ))
         except Error, error:
             write_message("   Error during the log_record_uploading function : %s "
                           % error, verbose=1, stream=sys.stderr)
 if __name__ == "__main__":
     main()
diff --git a/modules/bibupload/lib/bibupload_config.py b/modules/bibupload/lib/bibupload_config.py
index 690e5a2e0..641cf8cef 100644
--- a/modules/bibupload/lib/bibupload_config.py
+++ b/modules/bibupload/lib/bibupload_config.py
@@ -1,31 +1,30 @@
 # -*- coding: utf-8 -*-
 ##
 ## This file is part of Invenio.
 ## Copyright (C) 2006, 2007, 2008, 2010, 2011 CERN.
 ##
 ## Invenio is free software; you can redistribute it and/or
 ## modify it under the terms of the GNU General Public License as
 ## published by the Free Software Foundation; either version 2 of the
 ## License, or (at your option) any later version.
 ##
 ## Invenio is distributed in the hope that it will be useful, but
 ## WITHOUT ANY WARRANTY; without even the implied warranty of
 ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 ## General Public License for more details.
 ##
 ## You should have received a copy of the GNU General Public License
 ## along with Invenio; if not, write to the Free Software Foundation, Inc.,
 ## 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA.
 
 """
 BibUpload Engine configuration.
 """
 
 __revision__ = "$Id$"
 
 CFG_BIBUPLOAD_CONTROLFIELD_TAGS = ['001', '002', '003', '004',
                                    '005', '006', '007', '008']
 
 CFG_BIBUPLOAD_SPECIAL_TAGS = ['FMT', 'FFT']
 
-
diff --git a/modules/miscutil/lib/htmlutils.py b/modules/miscutil/lib/htmlutils.py
index 06fece685..1b3a95e25 100644
--- a/modules/miscutil/lib/htmlutils.py
+++ b/modules/miscutil/lib/htmlutils.py
@@ -1,636 +1,786 @@
 # -*- coding: utf-8 -*-
 ##
 ## This file is part of Invenio.
 ## Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011 CERN.
 ##
 ## Invenio is free software; you can redistribute it and/or
 ## modify it under the terms of the GNU General Public License as
 ## published by the Free Software Foundation; either version 2 of the
 ## License, or (at your option) any later version.
 ##
 ## Invenio is distributed in the hope that it will be useful, but
 ## WITHOUT ANY WARRANTY; without even the implied warranty of
 ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 ## General Public License for more details.
 ##
 ## You should have received a copy of the GNU General Public License
 ## along with Invenio; if not, write to the Free Software Foundation, Inc.,
 ## 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA.
 """HTML utilities."""
 
 __revision__ = "$Id$"
 
 from HTMLParser import HTMLParser
 from invenio.config import CFG_SITE_URL, \
      CFG_MATHJAX_HOSTING, \
      CFG_SITE_LANG, \
      CFG_WEBDIR
-from invenio.textutils import indent_text
+from invenio.textutils import indent_text, encode_for_xml
 import re
 import cgi
 import os
 
 try:
     from BeautifulSoup import BeautifulSoup
     CFG_BEAUTIFULSOUP_INSTALLED = True
 except ImportError:
       CFG_BEAUTIFULSOUP_INSTALLED = False
 try:
     import tidy
     CFG_TIDY_INSTALLED = True
 except ImportError:
     CFG_TIDY_INSTALLED = False
 
 # List of allowed tags (tags that won't create any XSS risk)
-cfg_html_buffer_allowed_tag_whitelist = ('a',
+CFG_HTML_BUFFER_ALLOWED_TAG_WHITELIST = ('a',
                                          'p', 'br', 'blockquote',
                                          'strong', 'b', 'u', 'i', 'em',
                                          'ul', 'ol', 'li', 'sub', 'sup', 'div', 'strike')
 # List of allowed attributes. Be cautious, some attributes may be risky:
 # <p style="background: url(myxss_suite.js)">
-cfg_html_buffer_allowed_attribute_whitelist = ('href', 'name', 'class')
+CFG_HTML_BUFFER_ALLOWED_ATTRIBUTE_WHITELIST = ('href', 'name', 'class')
 
 ## precompile some often-used regexp for speed reasons:
-re_html = re.compile("(?s)<[^>]*>|&#?\w+;")
+RE_HTML = re.compile("(?s)<[^>]*>|&#?\w+;")
 
 def nmtoken_from_string(text):
     """
     Returns a Nmtoken from a string.
     It is useful to produce XHTML valid values for the 'name'
     attribute of an anchor.
 
     CAUTION: the function is surjective: 2 different texts might lead to
     the same result. This is improbable on a single page.
 
     Nmtoken is the type that is a mixture of characters supported in
     attributes such as 'name' in HTML 'a' tag. For example,
     <a name="Articles%20%26%20Preprints"> should be tranformed to
     <a name="Articles372037263720Preprints"> using this function.
     http://www.w3.org/TR/2000/REC-xml-20001006#NT-Nmtoken
 
     Also note that this function filters more characters than
     specified by the definition of Nmtoken ('CombiningChar' and
     'Extender' charsets are filtered out).
     """
     text = text.replace('-', '--')
     return ''.join( [( ((not char.isalnum() and not char in ['.', '-', '_', ':']) and str(ord(char))) or char)
             for char in text] )
 
 def escape_html(text, escape_quotes=False):
     """Escape all HTML tags, avoiding XSS attacks.
     < => &lt;
     > => &gt;
     & => &amp:
     @param text: text to be escaped from HTML tags
     @param escape_quotes: if True, escape any quote mark to its HTML entity:
                           " => &quot;
                           ' => &#34;
     """
     text = text.replace('&', '&amp;')
     text = text.replace('<', '&lt;')
     text = text.replace('>', '&gt;')
     if escape_quotes:
         text = text.replace('"', '&quot;')
         text = text.replace("'", '&#34;')
     return text
 
 class HTMLWasher(HTMLParser):
     """
     Creates a washer for HTML, avoiding XSS attacks. See wash function for
     details on parameters.
 
     Usage::
        from invenio.htmlutils import HTMLWasher
        washer = HTMLWasher()
        escaped_text = washer.wash(unescaped_text)
 
     Examples::
         a.wash('Spam and <b><blink>eggs</blink></b>')
         => 'Spam and <b>eggs</b>'
         a.wash('Spam and <b><blink>eggs</blink></b>', True)
         => 'Spam and <b>&lt;blink&gt;eggs&lt;/blink&gt;</b>'
         a.wash('Spam and <b><a href="python.org">eggs</u></b>')
         => 'Spam and <b><a href="python.org">eggs</a></b>'
         a.wash('Spam and <b><a href="javascript:xss();">eggs</a></b>')
         =>'Spam and <b><a href="">eggs</a></b>'
         a.wash('Spam and <b><a href="jaVas  cRipt:xss();">poilu</a></b>')
         =>'Spam and <b><a href="">eggs</a></b>'
     """
     silent = False
 
     def __init__(self):
         """ Constructor; initializes washer """
         HTMLParser.__init__(self)
         self.result = ''
         self.nb = 0
         self.previous_nbs = []
         self.previous_type_lists = []
         self.url = ''
         self.render_unallowed_tags = False
         self.allowed_tag_whitelist = \
-                cfg_html_buffer_allowed_tag_whitelist
+                CFG_HTML_BUFFER_ALLOWED_TAG_WHITELIST
         self.allowed_attribute_whitelist = \
-                cfg_html_buffer_allowed_attribute_whitelist
+                CFG_HTML_BUFFER_ALLOWED_ATTRIBUTE_WHITELIST
         # javascript:
         self.re_js = re.compile( ".*(j|&#106;|&#74;)"\
                                 "\s*(a|&#97;|&#65;)"\
                                 "\s*(v|&#118;|&#86;)"\
                                 "\s*(a|&#97;|&#65;)"\
                                 "\s*(s|&#115;|&#83;)"\
                                 "\s*(c|&#99;|&#67;)"\
                                 "\s*(r|&#114;|&#82;)"\
                                 "\s*(i|&#195;|&#73;)"\
                                 "\s*(p|&#112;|&#80;)"\
                                 "\s*(t|&#112;|&#84)"\
                                 "\s*(:|&#58;).*", re.IGNORECASE | re.DOTALL)
         # vbscript:
         self.re_vb = re.compile( ".*(v|&#118;|&#86;)"\
                                 "\s*(b|&#98;|&#66;)"\
                                 "\s*(s|&#115;|&#83;)"\
                                 "\s*(c|&#99;|&#67;)"\
                                 "\s*(r|&#114;|&#82;)"\
                                 "\s*(i|&#195;|&#73;)"\
                                 "\s*(p|&#112;|&#80;)"\
                                 "\s*(t|&#112;|&#84;)"\
                                 "\s*(:|&#58;).*", re.IGNORECASE | re.DOTALL)
 
     def wash(self, html_buffer,
              render_unallowed_tags=False,
-             allowed_tag_whitelist=cfg_html_buffer_allowed_tag_whitelist,
+             allowed_tag_whitelist=CFG_HTML_BUFFER_ALLOWED_TAG_WHITELIST,
              allowed_attribute_whitelist=\
-                    cfg_html_buffer_allowed_attribute_whitelist):
+                    CFG_HTML_BUFFER_ALLOWED_ATTRIBUTE_WHITELIST):
         """
         Wash HTML buffer, escaping XSS attacks.
         @param html_buffer: text to escape
         @param render_unallowed_tags: if True, print unallowed tags escaping
             < and >.  Else, only print content of unallowed tags.
         @param allowed_tag_whitelist: list of allowed tags
         @param allowed_attribute_whitelist: list of allowed attributes
         """
         self.reset()
         self.result = ''
         self.nb = 0
         self.previous_nbs = []
         self.previous_type_lists = []
         self.url = ''
         self.render_unallowed_tags = render_unallowed_tags
         self.allowed_tag_whitelist = allowed_tag_whitelist
         self.allowed_attribute_whitelist = allowed_attribute_whitelist
         self.feed(html_buffer)
         self.close()
 
         return self.result
 
     def handle_starttag(self, tag, attrs):
         """Function called for new opening tags"""
         if tag.lower() in self.allowed_tag_whitelist:
             self.result  += '<' + tag
             for (attr, value) in attrs:
                 if attr.lower() in self.allowed_attribute_whitelist:
                     self.result += ' %s="%s"' % \
                                      (attr, self.handle_attribute_value(value))
             self.result += '>'
         else:
             if self.render_unallowed_tags:
                 self.result += '&lt;' + cgi.escape(tag)
                 for (attr, value) in attrs:
                     self.result += ' %s="%s"' % \
                                      (attr, cgi.escape(value, True))
                 self.result += '&gt;'
             elif tag == 'style' or tag == 'script':
                 # In that case we want to remove content too
                 self.silent = True
 
     def handle_data(self, data):
         """Function called for text nodes"""
         if not self.silent:
             # let's to check if data contains a link
             import string
             if string.find(str(data),'http://') == -1:
                 self.result += cgi.escape(data, True)
             else:
                 if self.url:
                     if self.url <> data:
                         self.url = ''
                         self.result += '(' + cgi.escape(data, True) + ')'
 
     def handle_endtag(self, tag):
         """Function called for ending of tags"""
         if tag.lower() in self.allowed_tag_whitelist:
             self.result  += '</' + tag + '>'
         else:
             if self.render_unallowed_tags:
                 self.result += '&lt;/' + cgi.escape(tag) + '&gt;'
 
         if tag == 'style' or tag == 'script':
             self.silent = False
 
     def handle_startendtag(self, tag, attrs):
         """Function called for empty tags (e.g. <br />)"""
         if tag.lower() in self.allowed_tag_whitelist:
             self.result  += '<' + tag
             for (attr, value) in attrs:
                 if attr.lower() in self.allowed_attribute_whitelist:
                     self.result += ' %s="%s"' % \
                                      (attr, self.handle_attribute_value(value))
             self.result += ' />'
         else:
             if self.render_unallowed_tags:
                 self.result += '&lt;' + cgi.escape(tag)
                 for (attr, value) in attrs:
                     self.result += ' %s="%s"' % \
                                      (attr, cgi.escape(value, True))
                 self.result += ' /&gt;'
 
     def handle_attribute_value(self, value):
         """Check attribute. Especially designed for avoiding URLs in the form:
         javascript:myXSSFunction();"""
         if self.re_js.match(value) or self.re_vb.match(value):
             return ''
         return value
 
     def handle_charref(self, name):
         """Process character references of the form "&#ref;". Return it as it is."""
         self.result += '&#' + name + ';'
 
     def handle_entityref(self, name):
         """Process a general entity reference of the form "&name;".
         Return it as it is."""
         self.result += '&' + name + ';'
 
 def tidy_html(html_buffer, cleaning_lib='utidylib'):
     """
     Tidy up the input HTML using one of the installed cleaning
     libraries.
 
     @param html_buffer: the input HTML to clean up
     @type html_buffer: string
     @param cleaning_lib: chose the preferred library to clean the HTML. One of:
                          - utidylib
                          - beautifulsoup
     @return: a cleaned version of the input HTML
     @note: requires uTidylib or BeautifulSoup to be installed. If the chosen library is missing, the input X{html_buffer} is returned I{as is}.
     """
 
     if CFG_TIDY_INSTALLED and cleaning_lib == 'utidylib':
         options = dict(output_xhtml=1,
                        show_body_only=1,
                        merge_divs=0,
                        wrap=0)
         try:
             output = str(tidy.parseString(html_buffer, **options))
         except:
             output = html_buffer
     elif CFG_BEAUTIFULSOUP_INSTALLED and cleaning_lib == 'beautifulsoup':
         try:
             output = str(BeautifulSoup(html_buffer).prettify())
         except:
             output = html_buffer
     else:
         output = html_buffer
 
     return output
 
 def get_mathjax_header(https=False):
     """
     Return the snippet of HTML code to put in HTML HEAD tag, in order to
     enable MathJax support.
     @param https: when using the CDN, whether to use the HTTPS URL rather
         than the HTTP one.
     @type https: bool
     @note: with new releases of MathJax, update this function toghether with
            $MJV variable in the root Makefile.am
     """
     if CFG_MATHJAX_HOSTING.lower() == 'cdn':
         if https:
             mathjax_path = "https://d3eoax9i5htok0.cloudfront.net/mathjax/1.1-latest"
         else:
             mathjax_path = "http://cdn.mathjax.org/mathjax/1.1-latest"
     else:
         mathjax_path = "/MathJax"
     return """<script type="text/x-mathjax-config">
 MathJax.Hub.Config({
   tex2jax: {inlineMath: [['$','$']]},
   showProcessingMessages: false,
   messageStyle: "none"
 });
 </script>
 <script src="%(mathjax_path)s/MathJax.js?config=TeX-AMS_HTML" type="text/javascript">
 </script>""" % {
     'mathjax_path': mathjax_path
 }
 
 def is_html_text_editor_installed():
     """
     Returns True if the wysiwyg editor (CKeditor) is installed
     """
     return os.path.exists(os.path.join(CFG_WEBDIR, 'ckeditor', 'ckeditor.js'))
 
 ckeditor_available = is_html_text_editor_installed()
 
 def get_html_text_editor(name, id=None, content='', textual_content=None, width='300px', height='200px',
                          enabled=True, file_upload_url=None, toolbar_set="Basic",
                          custom_configurations_path='/ckeditor/invenio-ckeditor-config.js',
                          ln=CFG_SITE_LANG):
     """
     Returns a wysiwyg editor (CKEditor) to embed in html pages.
 
     Fall back to a simple textarea when the library is not installed,
     or when the user's browser is not compatible with the editor, or
     when 'enable' is False, or when javascript is not enabled.
 
     NOTE that the output also contains a hidden field named
     'editor_type' that contains the kind of editor used, 'textarea' or
     'ckeditor'.
 
     Based on 'editor_type' you might want to take different actions,
     like replace CRLF with <br/> when editor_type equals to
     'textarea', but not when editor_type equals to 'ckeditor'.
 
     @param name: *str* the name attribute of the returned editor
 
     @param id: *str* the id attribute of the returned editor (when
         applicable)
 
     @param content: *str* the default content of the editor.
 
     @param textual_content: *str* a content formatted for the case where the
         wysiwyg editor is not available for user. When not
         specified, use value of 'content'
 
     @param width: *str* width of the editor in an html compatible unit:
         Eg: '400px', '50%'.
 
     @param height: *str* height of the editor in an html compatible unit:
         Eg: '400px', '50%'.
 
     @param enabled: *bool* if the wysiwyg editor is return (True) or if a
         simple texteara is returned (False)
 
     @param file_upload_url: *str* the URL used to upload new files via the
         editor upload panel. You have to implement the
         handler for your own use. The URL handler will get
         form variables 'File' as POST for the uploaded file,
         and 'Type' as GET for the type of file ('file',
         'image', 'flash', 'media')
         When value is not given, the file upload is disabled.
 
     @param toolbar_set: *str* the name of the toolbar layout to
         use. CKeditor comes by default with 'Basic' and
         'Default'. To define other sets, customize the
         config file in
         /opt/cds-invenio/var/www/ckeditor/invenio-ckconfig.js
 
     @param custom_configurations_path: *str* value for the CKeditor config
         variable 'CustomConfigurationsPath',
         which allows to specify the path of a
         file that contains a custom configuration
         for the editor. The path is relative to
         /opt/invenio/var/www/
 
     @return: the HTML markup of the editor
     """
     if textual_content is None:
         textual_content = content
 
     editor = ''
 
     if enabled and ckeditor_available:
         # Prepare upload path settings
         file_upload_script = ''
         if file_upload_url is not None:
             file_upload_script = ''',
             filebrowserLinkUploadUrl: '%(file_upload_url)s',
             filebrowserImageUploadUrl: '%(file_upload_url)s?type=Image',
             filebrowserFlashUploadUrl: '%(file_upload_url)s?type=Flash'
             ''' % {'file_upload_url': file_upload_url}
 
         # Prepare code to instantiate an editor
         editor += '''
         <script language="javascript">
         /* Load the script only once, or else multiple instance of the editor on the same page will not work */
         var INVENIO_CKEDITOR_ALREADY_LOADED
             if (INVENIO_CKEDITOR_ALREADY_LOADED != 1) {
-	        document.write("<script src='%(CFG_SITE_URL)s/ckeditor/ckeditor.js'><\/script>");
+                document.write("<script src='%(CFG_SITE_URL)s/ckeditor/ckeditor.js'><\/script>");
                 INVENIO_CKEDITOR_ALREADY_LOADED = 1;
             }
-	</script>
+        </script>
         <input type="hidden" name="editor_type" id="%(id)seditortype" value="textarea" />
         <textarea id="%(id)s" name="%(name)s" style="width:%(width)s;height:%(height)s">%(textual_content)s</textarea>
         <textarea id="%(id)shtmlvalue" name="%(name)shtmlvalue" style="display:none;width:%(width)s;height:%(height)s">%(html_content)s</textarea>
         <script type="text/javascript">
           var CKEDITOR_BASEPATH = '/ckeditor/';
 
           CKEDITOR.replace( '%(name)s',
                             {customConfig: '%(custom_configurations_path)s',
                             toolbar: '%(toolbar)s',
                             width: '%(width)s',
                             height:'%(height)s',
                             language: '%(ln)s'
                             %(file_upload_script)s
                             });
 
         CKEDITOR.on('instanceReady',
           function( evt )
           {
             /* If CKeditor was correctly loaded, display the nice HTML representation */
             var oEditor = evt.editor;
             editor_id = oEditor.id
             editor_name = oEditor.name
             var html_editor = document.getElementById(editor_name + 'htmlvalue');
             oEditor.setData(html_editor.value);
             var editor_type_field = document.getElementById(editor_name + 'editortype');
             editor_type_field.value = 'ckeditor';
             var writer = oEditor.dataProcessor.writer;
             writer.indentationChars = ''; /*Do not indent source code with tabs*/
             oEditor.resetDirty();
             /* Workaround: http://dev.ckeditor.com/ticket/3674 */
              evt.editor.on( 'contentDom', function( ev )
              {
              ev.removeListener();
              evt.editor.resetDirty();
              } );
             /* End workaround */
           })
 
         </script>
         ''' % \
           {'textual_content': cgi.escape(textual_content),
            'html_content': content,
            'width': width,
            'height': height,
            'name': name,
            'id': id or name,
            'custom_configurations_path': custom_configurations_path,
            'toolbar': toolbar_set,
            'file_upload_script': file_upload_script,
            'CFG_SITE_URL': CFG_SITE_URL,
            'ln': ln}
 
     else:
         # CKedior is not installed
         textarea = '<textarea %(id)s name="%(name)s" style="width:%(width)s;height:%(height)s">%(content)s</textarea>' \
                      % {'content': cgi.escape(textual_content),
                         'width': width,
                         'height': height,
                         'name': name,
                         'id': id and ('id="%s"' % id) or ''}
         editor += textarea
         editor += '<input type="hidden" name="editor_type" value="textarea" />'
 
     return editor
 
 def remove_html_markup(text, replacechar=' '):
     """
     Remove HTML markup from text.
 
     @param text: Input text.
     @type text: string.
     @param replacechar: By which character should we replace HTML markup.
         Usually, a single space or an empty string are nice values.
     @type replacechar: string
     @return: Input text with HTML markup removed.
     @rtype: string
     """
-    return re_html.sub(replacechar, text)
+    return RE_HTML.sub(replacechar, text)
+
+
+class EscapedString(str):
+    """
+    This class is a stub used by the MLClass machinery in order
+    to distinguish native string, from string that don't need to be
+    escaped.
+    """
+    pass
+
+class EscapedHTMLString(EscapedString):
+    """
+    This class automatically escape a non-escaped string used to initialize
+    it, using the HTML escaping method (i.e. cgi.escape).
+    """
+    def __new__(cls, original_string='', escape_quotes=False):
+        if isinstance(original_string, EscapedString):
+            escaped_string = str(original_string)
+        else:
+            if original_string and not str(original_string).strip():
+                escaped_string = '&nbsp;'
+            else:
+                escaped_string = cgi.escape(str(original_string), escape_quotes)
+        obj = str.__new__(cls, escaped_string)
+        obj.original_string = original_string
+        obj.escape_quotes = escape_quotes
+        return obj
+
+    def __repr__(self):
+        return 'EscapedHTMLString(%s, %s)' % (repr(self.original_string), repr(self.escape_quotes))
+
+    def __add__(self, rhs):
+        return EscapedHTMLString(EscapedString(str(self) + str(rhs)))
+
+class EscapedXMLString(EscapedString):
+    """
+    This class automatically escape a non-escaped string used to initialize
+    it, using the XML escaping method (i.e. encode_for_xml).
+    """
+    def __new__(cls, original_string='', escape_quotes=False):
+        if isinstance(original_string, EscapedString):
+            escaped_string = str(original_string)
+        else:
+            if original_string and not str(original_string).strip():
+                escaped_string = '&nbsp;'
+            else:
+                escaped_string = encode_for_xml(str(original_string), wash=True, quote=escape_quotes)
+        obj = str.__new__(cls, escaped_string)
+        obj.original_string = original_string
+        obj.escape_quotes = escape_quotes
+        return obj
+
+    def __repr__(self):
+        return 'EscapedXMLString(%s, %s)' % (repr(self.original_string), repr(self.escape_quotes))
+
+    def __add__(self, rhs):
+        return EscapedXMLString(EscapedString(str(self) + str(rhs)))
 
-def create_html_tag(tag, body=None, escape_body=False, escape_attr=True, indent=0, attrs=None, **other_attrs):
+def create_tag(tag, escaper=EscapedHTMLString, opening_only=False, body=None, escape_body=False, escape_attr=True, indent=0, attrs=None, **other_attrs):
     """
-    Create an HTML tag.
+    Create an XML/HTML tag.
 
-    This function create a full HTML tag, putting toghether an
+    This function create a full XML/HTML tag, putting toghether an
     optional inner body and a dictionary of attributes.
 
         >>> print create_html_tag ("select", create_html_tag("h1",
         ... "hello", other_attrs={'class': "foo"}))
         <select>
           <h1 class="foo">
             hello
           </h1>
         </select>
 
     @param tag: the tag (e.g. "select", "body", "h1"...).
     @type tag: string
     @param body: some text/HTML to put in the body of the tag (this
         body will be indented WRT the tag).
     @type body: string
     @param escape_body: wether the body (if any) must be escaped.
     @type escape_body: boolean
     @param escape_attr: wether the attribute values (if any) must be
         escaped.
     @type escape_attr: boolean
     @param indent: number of level of indentation for the tag.
     @type indent: integer
     @param attrs: map of attributes to add to the tag.
     @type attrs: dict
     @return: the HTML tag.
     @rtype: string
     """
 
     if attrs is None:
         attrs = {}
-    attrs.update(other_attrs)
+    for key, value in other_attrs.iteritems():
+        if value is not None:
+            if key.endswith('_'):
+                attrs[key[:-1]] = value
+            else:
+                attrs[key] = value
     out = "<%s" % tag
     for key, value in attrs.iteritems():
         if escape_attr:
-            value = escape_html(value, escape_quotes=True)
+            value = escaper(value, escape_quotes=True)
         out += ' %s="%s"' % (key, value)
-    if body:
-        out += ">\n"
-        if escape_body:
-            body = escape_html(body)
-        out += indent_text(body, 1)
-        out += "</%s>" % tag
-    else:
+    if body is not None:
+        if callable(body) and body.__name__ == 'handle_body':
+            body = body()
+        out += ">"
+        if escape_body and not isinstance(body, EscapedString):
+            body = escaper(body)
+        out += body
+        if not opening_only:
+            out += "</%s>" % tag
+    elif not opening_only:
         out += " />"
-    out = indent_text(out, indent)
-    out = out[:-1] # Let's remove trailing new line
-    return out
+    if indent:
+        out = indent_text(out, indent)[:-1]
+    return EscapedString(out)
 
-def create_html_select(options, selected=None, attrs=None, **other_attrs):
+class MLClass(object):
+    """
+    Swiss army knife to generate XML or HTML strings a la carte.
+
+    >>> from invenio.htmlutils import X, H
+    >>> X.foo()()
+    ... '<foo />'
+    >>> X.foo(bar='baz')()
+    ... '<foo bar="baz" />'
+    >>> X.foo(bar='baz&pi')()
+    ... '<foo bar="baz&amp;pi" />'
+    >>> X.foo("<body />", bar='baz')
+    ... '<foo bar="baz"><body /></foo>'
+    >>> X.foo(bar='baz')(X.body())
+    ... '<foo bar="baz"><body /></foo>'
+    >>> X.foo(bar='baz')("<body />") ## automatic escaping
+    ... '<foo bar="baz">&lt;body /></foo>'
+    >>> X.foo()(X.p(), X.p()) ## magic concatenation
+    ... '<foo><p /><p /></foo>'
+    >>> X.foo(class_='bar')() ## protected keywords...
+    ... '<foo class="bar" />'
+    >>> X["xml-bar"]()()
+    ... '<xml-bar />'
+    """
+
+    def __init__(self, escaper):
+        self.escaper = escaper
+
+    def __getattr__(self, tag):
+        def tag_creator(body=None, opening_only=False, escape_body=False, escape_attr=True, indent=0, attrs=None, **other_attrs):
+            if body:
+                return create_tag(tag, body=body, opening_only=opening_only, escape_body=escape_body, escape_attr=escape_attr, indent=indent, attrs=attrs, **other_attrs)
+            else:
+                def handle_body(*other_bodies):
+                    full_body = None
+                    if other_bodies:
+                        full_body = ""
+                        for body in other_bodies:
+                            if callable(body) and body.__name__ == 'handle_body':
+                                full_body += body()
+                            elif isinstance(body, EscapedString):
+                                full_body += body
+                            else:
+                                full_body += self.escaper(str(body))
+                    return create_tag(tag, body=full_body, opening_only=opening_only, escape_body=escape_body, escape_attr=escape_attr, indent=indent, attrs=attrs, **other_attrs)
+                return handle_body
+        return tag_creator
+
+    __getitem__ = __getattr__
+
+
+H = MLClass(EscapedHTMLString)
+X = MLClass(EscapedXMLString)
+
+def create_html_select(options, name=None, selected=None, disabled=None, multiple=False, attrs=None, **other_attrs):
     """
     Create an HTML select box.
 
         >>> print create_html_select(["foo", "bar"], selected="bar", name="baz")
         <select name="baz">
           <option selected="selected" value="bar">
             bar
           </option>
           <option value="foo">
             foo
           </option>
         </select>
-        >>> print create_html_select({"foo": "oof", "bar": "rab"}, selected="bar", name="baz")
+        >>> print create_html_select([("foo", "oof"), ("bar", "rab")], selected="bar", name="baz")
         <select name="baz">
           <option value="foo">
             oof
           </option>
           <option selected="selected" value="bar">
             rab
           </option>
         </select>
 
-    @param options: this can either be a sequence of strings or a map of
-        C{key->value}. In the former case, the C{select} tag will contain
-        a list of C{option} tags (in alphabetical order), where the
-        C{value} attribute is set to C{value}. In the latter case, the
-        C{value} attribute will be set to the C{key}, while the body
+    @param options: this can either be a sequence of strings, or a sequence
+        of couples or a map of C{key->value}. In the former case, the C{select}
+        tag will contain a list of C{option} tags (in alphabetical order),
+        where the C{value} attribute is not specified. In the latter case,
+        the C{value} attribute will be set to the C{key}, while the body
         of the C{option} will be set to C{value}.
     @type options: sequence or map
-    @param selected: optional key/value to select by default. In case
-        a map has been used for options, C{selected} must be set to an
-        existing C{key}, otherwise it must be set to an existing
-        C{value}.
-    @type selected: string
+    @param name: the name of the form element.
+    @type name: string
+    @param selected: optional key(s)/value(s) to select by default. In case
+        a map has been used for options.
+    @type selected: string (or list of string)
+    @param disabled: optional key(s)/value(s) to disable.
+    @type disabled: string (or list of string)
+    @param multiple: whether a multiple select box must be created.
+    @type mutable: bool
     @param attrs: optional attributes to create the select tag.
     @type attrs: dict
     @param other_attrs: other optional attributes.
     @return: the HTML output.
     @rtype: string
 
     @note: the values and keys will be escaped for HTML.
 
     @note: it is important that parameter C{value} is always
         specified, in case some browser plugin play with the
         markup, for eg. when translating the page.
     """
     body = []
-    try:
+    if selected is None:
+        selected = []
+    elif isinstance(selected, (str, unicode)):
+        selected = [selected]
+    if disabled is None:
+        disabled = []
+    elif isinstance(disabled, (str, unicode)):
+        disabled = [disabled]
+    if name is not None and multiple and not name.endswith('[]'):
+        name += "[]"
+    if isinstance(options, dict):
         items = options.items()
         items.sort(lambda item1, item2: cmp(item1[1], item2[1]))
-        for key, value in items:
-            option_attrs = key == selected and {"selected": "selected"} or {}
-            body.append(create_html_tag("option", body=value, escape_body=True, value=key, attrs=option_attrs))
-    except AttributeError:
-        options.sort()
-        for value in options:
-            option_attrs = value == selected and {"selected": "selected"} or {}
-            body.append(create_html_tag("option", body=value, escape_body=True, value=value, attrs=option_attrs))
-    return create_html_tag("select", body='\n'.join(body), attrs=attrs, **other_attrs)
+    elif isinstance(options, (list, tuple)):
+        options = list(options)
+        items = []
+        for item in options:
+            if isinstance(item, (str, unicode)):
+                items.append((item, item))
+            elif isinstance(item, (tuple, list)) and len(item) == 2:
+                items.append(tuple(item))
+            else:
+                raise ValueError('Item "%s" of incompatible type: %s' % (item, type(item)))
+    else:
+        raise ValueError('Options of incompatible type: %s' % type(options))
+    for key, value in items:
+        option_attrs = {}
+        if key in selected:
+            option_attrs['selected'] = 'selected'
+        if key in disabled:
+            option_attrs['disabled'] = 'disabled'
+        body.append(create_tag("option", body=value, escape_body=True, value=key, attrs=option_attrs))
+    if attrs is None:
+        attrs = {}
+    if name is not None:
+        attrs['name'] = name
+    if multiple:
+        attrs['multiple'] = 'multiple'
+    return create_tag("select", body='\n'.join(body), attrs=attrs, **other_attrs)
 
 class _LinkGetter(HTMLParser):
     """
     Hidden class that, by deriving from HTMLParser, will intercept all
     <a> tags and retrieve the corresponding href attribute.
     All URLs are available in the urls attribute of the class.
     """
     def __init__(self):
         HTMLParser.__init__(self)
         self.urls = set()
 
     def handle_starttag(self, tag, attrs):
         if tag == 'a':
             for (name, value) in attrs:
                 if name == 'href':
                     self.urls.add(value)
 
 def get_links_in_html_page(html):
     """
     @param html: the HTML text to parse
     @type html: str
     @return: the list of URLs that were referenced via <a> tags.
     @rtype: set of str
     """
     parser = _LinkGetter()
     parser.feed(html)
     return parser.urls
diff --git a/modules/miscutil/lib/htmlutils_tests.py b/modules/miscutil/lib/htmlutils_tests.py
index eeee301c2..fa51720e3 100644
--- a/modules/miscutil/lib/htmlutils_tests.py
+++ b/modules/miscutil/lib/htmlutils_tests.py
@@ -1,243 +1,241 @@
 # -*- coding: utf-8 -*-
 ##
 ## This file is part of Invenio.
 ## Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011 CERN.
 ##
 ## Invenio is free software; you can redistribute it and/or
 ## modify it under the terms of the GNU General Public License as
 ## published by the Free Software Foundation; either version 2 of the
 ## License, or (at your option) any later version.
 ##
 ## Invenio is distributed in the hope that it will be useful, but
 ## WITHOUT ANY WARRANTY; without even the implied warranty of
 ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 ## General Public License for more details.
 ##
 ## You should have received a copy of the GNU General Public License
 ## along with Invenio; if not, write to the Free Software Foundation, Inc.,
 ## 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA.
 
 """Unit tests for htmlutils library."""
 
 __revision__ = "$Id$"
 
 import unittest
 
 from invenio.htmlutils import HTMLWasher, nmtoken_from_string, \
      remove_html_markup, create_html_select, \
      CFG_TIDY_INSTALLED, \
      CFG_BEAUTIFULSOUP_INSTALLED, tidy_html
 from invenio.testutils import make_test_suite, run_test_suite
 
 class XSSEscapingTest(unittest.TestCase):
     """Test functions related to the prevention of XSS attacks."""
 
     def __init__(self, methodName='test'):
         self.washer = HTMLWasher()
         unittest.TestCase.__init__(self, methodName)
 
     def test_forbidden_formatting_tags(self):
         """htmlutils - washing of tags altering formatting of a page (e.g. </html>)"""
         test_str = """</html></body></pre>"""
         self.assertEqual(self.washer.wash(html_buffer=test_str),
                          '')
         self.assertEqual(self.washer.wash(html_buffer=test_str,
                                           render_unallowed_tags=True),
                          '&lt;/html&gt;&lt;/body&gt;&lt;/pre&gt;')
 
     def test_forbidden_script_tags(self):
         """htmlutils - washing of tags defining scripts (e.g. <script>)"""
         test_str = """<script>malicious_function();</script>"""
         self.assertEqual(self.washer.wash(html_buffer=test_str),
                          '')
         self.assertEqual(self.washer.wash(html_buffer=test_str,
                                           render_unallowed_tags=True),
                          '&lt;script&gt;malicious_function();&lt;/script&gt;')
 
     def test_forbidden_attributes(self):
         """htmlutils - washing of forbidden attributes in allowed tags (e.g. onLoad)"""
         # onload
         test_str = """<p onload="javascript:malicious_functtion();">"""
         self.assertEqual(self.washer.wash(html_buffer=test_str), '<p>')
         # tricky: css calling a javascript
         test_str = """<p style="background: url('http://malicious_site.com/malicious_script.js');">"""
         self.assertEqual(self.washer.wash(html_buffer=test_str), '<p>')
 
     def test_fake_url(self):
         """htmlutils - washing of fake URLs which execute scripts"""
         test_str = """<a href="javascript:malicious_function();">link</a>"""
         self.assertEqual(self.washer.wash(html_buffer=test_str),
                          '<a href="">link</a>')
         # Pirates could encode ascii values, or use uppercase letters...
         test_str = """<a href="&#106;a&#118;asCRi&#112;t:malicious_function();">link</a>"""
         self.assertEqual(self.washer.wash(html_buffer=test_str),
                          '<a href="">link</a>')
         # MSIE treats 'java\ns\ncript:' the same way as 'javascript:'
         # Here we test with:
         # j
         #     avas
         #   crIPt :
         test_str = """<a href="&#106;\n    a&#118;as\n  crI&#80;t :malicious_function();">link</a>"""
         self.assertEqual(self.washer.wash(html_buffer=test_str),
                          '<a href="">link</a>')
 
 class CharactersEscapingTest(unittest.TestCase):
     """Test functions related to escaping reserved or forbidden characters """
 
     def test_convert_string_to_nmtoken(self):
         """htmlutils - converting string to Nmtoken"""
 
         # TODO: possibly extend this test to include 'extenders' and
         # 'combining characters' as defined in
         # http://www.w3.org/TR/2000/REC-xml-20001006#NT-Nmtoken
 
         ascii_str = "".join([chr(i) for i in range(0, 256)])
         nmtoken = nmtoken_from_string(ascii_str)
         for char in nmtoken:
             self.assert_(char in ['.', '-', '_', ':'] or char.isalnum())
 
 class HTMLWashingTest(unittest.TestCase):
     """Test functions related to general washing of HTML source"""
 
     def __init__(self, methodName='test'):
         self.washer = HTMLWasher()
         unittest.TestCase.__init__(self, methodName)
 
     def test_wash_html(self):
         """htmlutils - washing HTML tags"""
 
         # Simple test case
         test_str = 'Spam and <b><blink>eggs</blink></b>'
         self.assertEqual(self.washer.wash(html_buffer=test_str),
                          'Spam and <b>eggs</b>')
 
         # Show 'escaped' tags
         test_str = 'Spam and <b><blink>eggs</blink></b>'
         self.assertEqual(self.washer.wash(html_buffer=test_str,
                                           render_unallowed_tags=True),
                          'Spam and <b>&lt;blink&gt;eggs&lt;/blink&gt;</b>')
 
         # Keep entity and character references
         test_str = '<b> a &lt; b &gt; c </b> &#247;'
         self.assertEqual(self.washer.wash(html_buffer=test_str),
                          '<b> a &lt; b &gt; c </b> &#247;')
 
         # Remove content of <script> tags
         test_str = '<script type="text/javacript">alert("foo")</script>bar'
         self.assertEqual(self.washer.wash(html_buffer=test_str),
                          'bar')
         test_str = '<script type="text/javacript"><!--alert("foo")--></script>bar'
         self.assertEqual(self.washer.wash(html_buffer=test_str),
                          'bar')
 
         # Remove content of <style> tags
         test_str = '<style>.myclass {color:#f00}</style><span class="myclass">styled text</span>'
         self.assertEqual(self.washer.wash(html_buffer=test_str),
                          'styled text')
         test_str = '<style><!-- .myclass {color:#f00} --></style><span class="myclass">styled text</span>'
         self.assertEqual(self.washer.wash(html_buffer=test_str),
                          'styled text')
 
 class HTMLTidyingTest(unittest.TestCase):
     """Test functions related to tidying up HTML source"""
 
     html_buffer_1 = 'test</blockquote >'
     html_buffer_2 = '<blockquote >test </div><div>test2'
     html_buffer_3 = '''<UL>
 <LI>
 <UL>
 <LI><A HREF="rememberwhenb.html">Next</A>
 <LI><A HREF="daysofourlives.html">Back</A>
 <LI><A HREF="newstuff.html">New Stuff</A>
 </UL>
 </UL>
 
 <UL>
 <LI>Merge adjacent lists
 </UL>
 
 <UL>
 
 <UL>
 <LI><A HREF="one.html">One</A>
 <LI><A HREF="two.html">Two</A>
 <LI><A HREF="three.html">Three</A>
 </UL>''' # Input test 427841 from Tidy
 
     def test_tidy_html_with_utidylib(self):
         """htmlutils - Tidying up HTML with µTidylib """
         res1 = tidy_html(self.html_buffer_1, 'utidylib')
         res2 = tidy_html(self.html_buffer_2, 'utidylib')
         res3 = tidy_html(self.html_buffer_3, 'utidylib')
         if CFG_TIDY_INSTALLED:
             self.assertEqual(res1.replace('\n', '').replace(' ', ''),
                              'test')
             self.assertEqual(res2.replace('\n', '').replace(' ', ''),
                              '<blockquote>test<div>test2</div></blockquote>')
             self.assertEqual(res3.replace('\n', '').replace(' ', ''),
                              '<ul><li><ul><li><ahref="rememberwhenb.html">Next</a></li><li><ahref="daysofourlives.html">Back</a></li><li><ahref="newstuff.html">NewStuff</a></li></ul></li></ul><ul><li>Mergeadjacentlists</li></ul><divstyle="margin-left:2em"><ul><li><ahref="one.html">One</a></li><li><ahref="two.html">Two</a></li><li><ahref="three.html">Three</a></li></ul></div>')
         else:
             self.assertEqual(res1, res1)
             self.assertEqual(res2, res2)
             self.assertEqual(res3, res3)
 
     def test_tidy_html_with_beautifulsoup(self):
         """htmlutils - Tidying up HTML with BeautifulSoup"""
         res1 = tidy_html(self.html_buffer_1, 'beautifulsoup')
         res2 = tidy_html(self.html_buffer_2, 'beautifulsoup')
         res3 = tidy_html(self.html_buffer_3, 'beautifulsoup')
         if CFG_TIDY_INSTALLED:
             self.assertEqual(res1.replace('\n', '').replace(' ', ''),
                              'test')
             self.assertEqual(res2.replace('\n', '').replace(' ', ''),
                              '<blockquote>test<div>test2</div></blockquote>')
             self.assertEqual(res3.replace('\n', '').replace(' ', ''),
                              '<ul><li><ul><li><ahref="rememberwhenb.html">Next</a></li><li><ahref="daysofourlives.html">Back</a></li><li><ahref="newstuff.html">NewStuff</a></li></ul></li></ul><ul><li>Mergeadjacentlists</li></ul><ul><ul><li><ahref="one.html">One</a></li><li><ahref="two.html">Two</a></li><li><ahref="three.html">Three</a></li></ul></ul>')
         else:
             self.assertEqual(res1, res1)
             self.assertEqual(res2, res2)
             self.assertEqual(res3, res3)
 
     def test_tidy_html_with_unknown_lib(self):
         """htmlutils - Tidying up HTML with non existing library"""
         res = tidy_html(self.html_buffer_1, 'foo')
         self.assertEqual(res.replace('\n', '').replace(' ', ''),
                          self.html_buffer_1.replace('\n', '').replace(' ', ''))
 
 class HTMLMarkupRemovalTest(unittest.TestCase):
     """Test functions related to removing HTML markup."""
 
     def test_remove_html_markup_empty(self):
         """htmlutils - remove HTML markup, empty replacement"""
         test_input = 'This is <a href="test">test</a>.'
         test_expected = 'This is test.'
         self.assertEqual(remove_html_markup(test_input, ''),
                          test_expected)
 
     def test_remove_html_markup_replacement(self):
         """htmlutils - remove HTML markup, some replacement"""
         test_input = 'This is <a href="test">test</a>.'
         test_expected = 'This is XtestX.'
         self.assertEqual(remove_html_markup(test_input, 'X'),
                          test_expected)
 
 class HTMLCreation(unittest.TestCase):
     """Test functions related to creation of HTML markup."""
 
     def test_create_html_select(self):
         """htmlutils - create HTML <select> list """
-        self.assertEqual(create_html_select(["foo", '"bar"'], selected="bar", name="baz"),
-                         '<select name="baz">\n  <option value="&quot;bar&quot;">\n    "bar"\n  </option>\n  <option value="foo">\n    foo\n  </option>\n</select>')
+        self.assertEqual(create_html_select(["foo", "bar"], selected="bar", name="baz"),
+                         '<select name="baz"><option value="foo">foo</option>\n<option selected="selected" value="bar">bar</option></select>')
 
 
 TEST_SUITE = make_test_suite(XSSEscapingTest,
                              CharactersEscapingTest,
                              HTMLWashingTest,
                              HTMLMarkupRemovalTest,
                              HTMLTidyingTest,
                              HTMLCreation)
 
 if __name__ == "__main__":
     run_test_suite(TEST_SUITE)
-
-
diff --git a/modules/miscutil/lib/inveniocfg.py b/modules/miscutil/lib/inveniocfg.py
index 3e56bb34e..df73a7373 100644
--- a/modules/miscutil/lib/inveniocfg.py
+++ b/modules/miscutil/lib/inveniocfg.py
@@ -1,1321 +1,1326 @@
 # -*- coding: utf-8 -*-
 ##
 ## This file is part of Invenio.
 ## Copyright (C) 2008, 2009, 2010, 2011 CERN.
 ##
 ## Invenio is free software; you can redistribute it and/or
 ## modify it under the terms of the GNU General Public License as
 ## published by the Free Software Foundation; either version 2 of the
 ## License, or (at your option) any later version.
 ##
 ## Invenio is distributed in the hope that it will be useful, but
 ## WITHOUT ANY WARRANTY; without even the implied warranty of
 ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 ## General Public License for more details.
 ##
 ## You should have received a copy of the GNU General Public License
 ## along with Invenio; if not, write to the Free Software Foundation, Inc.,
 ## 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA.
 
 """
 Invenio configuration and administration CLI tool.
 
 Usage: inveniocfg [options]
 
 General options:
    -h, --help               print this help
    -V, --version            print version number
 
 Options to finish your installation:
    --create-apache-conf     create Apache configuration files
    --create-tables          create DB tables for Invenio
    --load-webstat-conf      load the WebStat configuration
    --drop-tables            drop DB tables of Invenio
    --check-openoffice       check for correctly set up of openoffice temporary directory
 
 Options to set up and test a demo site:
    --create-demo-site       create demo site
    --load-demo-records      load demo records
    --remove-demo-records    remove demo records, keeping demo site
    --drop-demo-site         drop demo site configurations too
    --run-unit-tests         run unit test suite (needs demo site)
    --run-regression-tests   run regression test suite (needs demo site)
    --run-web-tests          run web tests in a browser (needs demo site, Firefox, Selenium IDE)
 
 Options to update config files in situ:
    --update-all             perform all the update options
    --update-config-py       update config.py file from invenio.conf file
    --update-dbquery-py      update dbquery.py with DB credentials from invenio.conf
    --update-dbexec          update dbexec with DB credentials from invenio.conf
    --update-bibconvert-tpl  update bibconvert templates with CFG_SITE_URL from invenio.conf
    --update-web-tests       update web test cases with CFG_SITE_URL from invenio.conf
 
 Options to update DB tables:
    --reset-all              perform all the reset options
    --reset-sitename         reset tables to take account of new CFG_SITE_NAME*
    --reset-siteadminemail   reset tables to take account of new CFG_SITE_ADMIN_EMAIL
    --reset-fieldnames       reset tables to take account of new I18N names from PO files
    --reset-recstruct-cache  reset record structure cache according to CFG_BIBUPLOAD_SERIALIZE_RECORD_STRUCTURE
 
 Options to help the work:
    --list                   print names and values of all options from conf files
    --get <some-opt>         get value of a given option from conf files
    --conf-dir </some/path>  path to directory where invenio*.conf files are [optional]
    --detect-system-details  print system details such as Apache/Python/MySQL versions
 """
 
 __revision__ = "$Id$"
 
 from ConfigParser import ConfigParser
 import os
 import re
 import shutil
 import socket
 import sys
 
 def print_usage():
     """Print help."""
     print __doc__
 
 def print_version():
     """Print version information."""
     print __revision__
 
 def convert_conf_option(option_name, option_value):
     """
     Convert conf option into Python config.py line, converting
     values to ints or strings as appropriate.
     """
 
     ## 1) convert option name to uppercase:
     option_name = option_name.upper()
 
     ## 2) convert option value to int or string:
     if option_name in ['CFG_BIBUPLOAD_REFERENCE_TAG',
                        'CFG_BIBUPLOAD_EXTERNAL_SYSNO_TAG',
                        'CFG_BIBUPLOAD_EXTERNAL_OAIID_TAG',
                        'CFG_BIBUPLOAD_EXTERNAL_OAIID_PROVENANCE_TAG',
                        'CFG_BIBUPLOAD_STRONG_TAGS',
                        'CFG_BIBFORMAT_HIDDEN_TAGS',]:
         # some options are supposed be string even when they look like
         # numeric
         option_value = '"' + option_value + '"'
     else:
         try:
             option_value = int(option_value)
         except ValueError:
             option_value = '"' + option_value + '"'
 
     ## 3a) special cases: chars regexps
     if option_name in ['CFG_BIBINDEX_CHARS_ALPHANUMERIC_SEPARATORS',
                        'CFG_BIBINDEX_CHARS_PUNCTUATION']:
         option_value = 'r"[' + option_value[1:-1] + ']"'
 
     ## 3abis) special cases: real regexps
     if option_name in ['CFG_BIBINDEX_PERFORM_OCR_ON_DOCNAMES']:
         option_value = 'r"' + option_value[1:-1] + '"'
 
     ## 3b) special cases: True, False, None
     if option_value in ['"True"', '"False"', '"None"']:
         option_value = option_value[1:-1]
 
     ## 3c) special cases: dicts
     if option_name in ['CFG_WEBSEARCH_FIELDS_CONVERT',
                        'CFG_BATCHUPLOADER_WEB_ROBOT_RIGHTS',
                        'CFG_SITE_EMERGENCY_EMAIL_ADDRESSES',
                        'CFG_BIBMATCH_FUZZY_WORDLIMITS',
                        'CFG_BIBMATCH_QUERY_TEMPLATES',
                        'CFG_WEBSEARCH_SYNONYM_KBRS',
                        'CFG_BIBINDEX_SYNONYM_KBRS',
                        'CFG_WEBCOMMENT_EMAIL_REPLIES_TO',
                        'CFG_WEBCOMMENT_RESTRICTION_DATAFIELD',
                        'CFG_WEBCOMMENT_ROUND_DATAFIELD',
                        'CFG_BIBUPLOAD_FFT_ALLOWED_EXTERNAL_URLS',
                        'CFG_BIBSCHED_NODE_TASKS',
-                       'CFG_BIBEDIT_EXTEND_RECORD_WITH_COLLECTION_TEMPLATE']:
+                       'CFG_BIBEDIT_EXTEND_RECORD_WITH_COLLECTION_TEMPLATE',
+                       'CFG_OAI_METADATA_FORMATS']:
         option_value = option_value[1:-1]
 
     ## 3cbis) very special cases: dicts with backward compatible string
     if option_name in ['CFG_BIBINDEX_SPLASH_PAGES']:
         if option_value.startswith('"{') and option_value.endswith('}"'):
             option_value = option_value[1:-1]
         else:
             option_value = """{%s: ".*"}""" % option_value
 
     ## 3d) special cases: comma-separated lists
     if option_name in ['CFG_SITE_LANGS',
                        'CFG_WEBSUBMIT_ADDITIONAL_KNOWN_FILE_EXTENSIONS',
                        'CFG_WEBSEARCH_USE_MATHJAX_FOR_FORMATS',
                        'CFG_BIBUPLOAD_STRONG_TAGS',
                        'CFG_BIBFORMAT_HIDDEN_TAGS',
                        'CFG_BIBSCHED_GC_TASKS_TO_REMOVE',
                        'CFG_BIBSCHED_GC_TASKS_TO_ARCHIVE',
                        'CFG_BIBUPLOAD_FFT_ALLOWED_LOCAL_PATHS',
                        'CFG_BIBUPLOAD_CONTROLLED_PROVENANCE_TAGS',
                        'CFG_WEBSEARCH_ENABLED_SEARCH_INTERFACES',
                        'CFG_WEBSTYLE_HTTP_STATUS_ALERT_LIST',
                        'CFG_WEBSEARCH_RSS_I18N_COLLECTIONS',
                        'CFG_BATCHUPLOADER_FILENAME_MATCHING_POLICY',
                        'CFG_BATCHUPLOADER_WEB_ROBOT_AGENT',
                        'CFG_BIBAUTHORID_EXTERNAL_CLAIMED_RECORDS_KEY',
-                       'CFG_PLOTEXTRACTOR_DISALLOWED_TEX']:
+                       'CFG_PLOTEXTRACTOR_DISALLOWED_TEX',
+                       'CFG_OAI_FRIENDS']:
         out = "["
         for elem in option_value[1:-1].split(","):
             if elem:
                 if option_name in ['CFG_WEBSEARCH_ENABLED_SEARCH_INTERFACES']:
                     # 3d1) integer values
                     out += "%i, " % int(elem)
                 else:
                     # 3d2) string values
                     out += "'%s', " % elem
         out += "]"
         option_value = out
 
     ## 3e) special cases: multiline
     if option_name == 'CFG_OAI_IDENTIFY_DESCRIPTION':
         # make triple quotes
         option_value = '""' + option_value + '""'
 
     ## 3f) ignore some options:
     if option_name.startswith('CFG_SITE_NAME_INTL'):
         # treated elsewhere
         return
 
     ## 3g) special cases: float
     if option_name in ['CFG_BIBDOCFILE_MD5_CHECK_PROBABILITY',
                        'CFG_BIBMATCH_LOCAL_SLEEPTIME',
                        'CFG_BIBMATCH_REMOTE_SLEEPTIME',
                        'CFG_BIBAUTHORID_PERSONID_MIN_P_FROM_BCTKD_RA',
                        'CFG_BIBAUTHORID_PERSONID_MIN_P_FROM_NEW_RA',
                        'CFG_BIBAUTHORID_PERSONID_MAX_COMP_LIST_MIN_TRSH',
                        'CFG_BIBAUTHORID_PERSONID_MAX_COMP_LIST_MIN_TRSH_P_N',
                        'CFG_PLOTEXTRACTOR_DOWNLOAD_TIMEOUT']:
         option_value = float(option_value[1:-1])
 
     ## 4) finally, return output line:
     return '%s = %s' % (option_name, option_value)
 
 def cli_cmd_update_config_py(conf):
     """
     Update new config.py from conf options, keeping previous
     config.py in a backup copy.
     """
     print ">>> Going to update config.py..."
     ## location where config.py is:
     configpyfile = conf.get("Invenio", "CFG_PYLIBDIR") + \
                    os.sep + 'invenio' + os.sep + 'config.py'
     ## backup current config.py file:
     if os.path.exists(configpyfile):
         shutil.copy(configpyfile, configpyfile + '.OLD')
     ## here we go:
     fdesc = open(configpyfile, 'w')
     ## generate preamble:
     fdesc.write("# -*- coding: utf-8 -*-\n")
     fdesc.write("# DO NOT EDIT THIS FILE!  IT WAS AUTOMATICALLY GENERATED\n")
     fdesc.write("# FROM INVENIO.CONF BY EXECUTING:\n")
     fdesc.write("# " + " ".join(sys.argv) + "\n")
     ## special treatment for CFG_SITE_NAME_INTL options:
     fdesc.write("CFG_SITE_NAME_INTL = {}\n")
     for lang in conf.get("Invenio", "CFG_SITE_LANGS").split(","):
         fdesc.write("CFG_SITE_NAME_INTL['%s'] = \"%s\"\n" % (lang, conf.get("Invenio",
                                                                             "CFG_SITE_NAME_INTL_" + lang)))
     ## special treatment for CFG_SITE_SECURE_URL that may be empty, in
     ## which case it should be put equal to CFG_SITE_URL:
     if not conf.get("Invenio", "CFG_SITE_SECURE_URL"):
         conf.set("Invenio", "CFG_SITE_SECURE_URL",
                  conf.get("Invenio", "CFG_SITE_URL"))
     ## process all the options normally:
     sections = conf.sections()
     sections.sort()
     for section in sections:
         options = conf.options(section)
         options.sort()
         for option in options:
             if not option.startswith('CFG_DATABASE_'):
                 # put all options except for db credentials into config.py
                 line_out = convert_conf_option(option, conf.get(section, option))
                 if line_out:
                     fdesc.write(line_out + "\n")
     ## FIXME: special treatment for experimental variables
     ## CFG_WEBSEARCH_ENABLED_SEARCH_INTERFACES and CFG_WEBSEARCH_DEFAULT_SEARCH_INTERFACE
     ## (not offering them in invenio.conf since they will be refactored)
     fdesc.write("CFG_WEBSEARCH_DEFAULT_SEARCH_INTERFACE = 0\n")
     fdesc.write("CFG_WEBSEARCH_ENABLED_SEARCH_INTERFACES = [0, 1,]\n")
     ## generate postamble:
     fdesc.write("")
     fdesc.write("# END OF GENERATED FILE")
     ## we are done:
     fdesc.close()
     print "You may want to restart Apache now."
     print ">>> config.py updated successfully."
 
 def cli_cmd_update_dbquery_py(conf):
     """
     Update lib/dbquery.py file with DB parameters read from conf file.
     Note: this edits dbquery.py in situ, taking a backup first.
     Use only when you know what you are doing.
     """
     print ">>> Going to update dbquery.py..."
     ## location where dbquery.py is:
     dbquerypyfile = conf.get("Invenio", "CFG_PYLIBDIR") + \
                     os.sep + 'invenio' + os.sep + 'dbquery.py'
     ## backup current dbquery.py file:
     if os.path.exists(dbquerypyfile):
         shutil.copy(dbquerypyfile, dbquerypyfile + '.OLD')
     ## replace db parameters:
     out = ''
     for line in open(dbquerypyfile, 'r').readlines():
         match = re.search(r'^CFG_DATABASE_(HOST|PORT|NAME|USER|PASS)(\s*=\s*)\'.*\'$', line)
         if match:
             dbparam = 'CFG_DATABASE_' + match.group(1)
             out += "%s%s'%s'\n" % (dbparam, match.group(2),
                                    conf.get('Invenio', dbparam))
         else:
             out += line
     fdesc = open(dbquerypyfile, 'w')
     fdesc.write(out)
     fdesc.close()
     print "You may want to restart Apache now."
     print ">>> dbquery.py updated successfully."
 
 def cli_cmd_update_dbexec(conf):
     """
     Update bin/dbexec file with DB parameters read from conf file.
     Note: this edits dbexec in situ, taking a backup first.
     Use only when you know what you are doing.
     """
     print ">>> Going to update dbexec..."
     ## location where dbexec is:
     dbexecfile = conf.get("Invenio", "CFG_BINDIR") + \
                     os.sep + 'dbexec'
     ## backup current dbexec file:
     if os.path.exists(dbexecfile):
         shutil.copy(dbexecfile, dbexecfile + '.OLD')
     ## replace db parameters via sed:
     out = ''
     for line in open(dbexecfile, 'r').readlines():
         match = re.search(r'^CFG_DATABASE_(HOST|PORT|NAME|USER|PASS)(\s*=\s*)\'.*\'$', line)
         if match:
             dbparam = 'CFG_DATABASE_' + match.group(1)
             out += "%s%s'%s'\n" % (dbparam, match.group(2),
                                    conf.get("Invenio", dbparam))
         else:
             out += line
     fdesc = open(dbexecfile, 'w')
     fdesc.write(out)
     fdesc.close()
     print ">>> dbexec updated successfully."
 
 def cli_cmd_update_bibconvert_tpl(conf):
     """
     Update bibconvert/config/*.tpl files looking for 856
     http://.../CFG_SITE_RECORD lines, replacing URL with CFG_SITE_URL taken
     from conf file.  Note: this edits tpl files in situ, taking a
     backup first.  Use only when you know what you are doing.
     """
     print ">>> Going to update bibconvert templates..."
     ## location where bibconvert/config/*.tpl are:
     tpldir = conf.get("Invenio", 'CFG_ETCDIR') + \
              os.sep + 'bibconvert' + os.sep + 'config'
     ## find all *.tpl files:
     for tplfilename in os.listdir(tpldir):
         if tplfilename.endswith(".tpl"):
             ## change tpl file:
             tplfile = tpldir + os.sep + tplfilename
             shutil.copy(tplfile, tplfile + '.OLD')
             out = ''
             for line in open(tplfile, 'r').readlines():
                 match = re.search(r'^(.*)http://.*?/%s/(.*)$' % conf.get("Invenio", 'CFG_SITE_RECORD'), line)
                 if match:
                     out += "%s%s/%s/%s\n" % (match.group(1),
                                                  conf.get("Invenio", 'CFG_SITE_URL'),
                                                  conf.get("Invenio", 'CFG_SITE_RECORD'),
                                                  match.group(2))
                 else:
                     out += line
             fdesc = open(tplfile, 'w')
             fdesc.write(out)
             fdesc.close()
     print ">>> bibconvert templates updated successfully."
 
 def cli_cmd_update_web_tests(conf):
     """
     Update web test cases lib/webtest/test_*.html looking for
     <td>http://.+?[</] strings and replacing them with CFG_SITE_URL
     taken from conf file.  Note: this edits test files in situ, taking
     a backup first.  Use only when you know what you are doing.
     """
     print ">>> Going to update web tests..."
     ## location where test_*.html files are:
     testdir = conf.get("Invenio", 'CFG_PREFIX') + os.sep + \
              'lib' + os.sep + 'webtest' + os.sep + 'invenio'
     ## find all test_*.html files:
     for testfilename in os.listdir(testdir):
         if testfilename.startswith("test_") and \
                testfilename.endswith(".html"):
             ## change test file:
             testfile = testdir + os.sep + testfilename
             shutil.copy(testfile, testfile + '.OLD')
             out = ''
             for line in open(testfile, 'r').readlines():
                 match = re.search(r'^(.*<td>)http://.+?([</].*)$', line)
                 if match:
                     out += "%s%s%s\n" % (match.group(1),
                                          conf.get("Invenio", 'CFG_SITE_URL'),
                                          match.group(2))
                 else:
                     match = re.search(r'^(.*<td>)/opt/invenio(.*)$', line)
                     if match:
                         out += "%s%s%s\n" % (match.group(1),
                                             conf.get("Invenio", 'CFG_PREFIX'),
                                             match.group(2))
                     else:
                         out += line
             fdesc = open(testfile, 'w')
             fdesc.write(out)
             fdesc.close()
     print ">>> web tests updated successfully."
 
 def cli_cmd_reset_sitename(conf):
     """
     Reset collection-related tables with new CFG_SITE_NAME and
     CFG_SITE_NAME_INTL* read from conf files.
     """
     print ">>> Going to reset CFG_SITE_NAME and CFG_SITE_NAME_INTL..."
     from invenio.dbquery import run_sql, IntegrityError
     # reset CFG_SITE_NAME:
     sitename = conf.get("Invenio", "CFG_SITE_NAME")
     try:
         run_sql("""INSERT INTO collection (id, name, dbquery, reclist) VALUES
                                           (1,%s,NULL,NULL)""", (sitename,))
     except IntegrityError:
         run_sql("""UPDATE collection SET name=%s WHERE id=1""", (sitename,))
     # reset CFG_SITE_NAME_INTL:
     for lang in conf.get("Invenio", "CFG_SITE_LANGS").split(","):
         sitename_lang = conf.get("Invenio", "CFG_SITE_NAME_INTL_" + lang)
         try:
             run_sql("""INSERT INTO collectionname (id_collection, ln, type, value) VALUES
                          (%s,%s,%s,%s)""", (1, lang, 'ln', sitename_lang))
         except IntegrityError:
             run_sql("""UPDATE collectionname SET value=%s
                         WHERE ln=%s AND id_collection=1 AND type='ln'""",
                     (sitename_lang, lang))
     print "You may want to restart Apache now."
     print ">>> CFG_SITE_NAME and CFG_SITE_NAME_INTL* reset successfully."
 
 def cli_cmd_reset_recstruct_cache(conf):
     """If CFG_BIBUPLOAD_SERIALIZE_RECORD_STRUCTURE is changed, this function
     will adapt the database to either store or not store the recstruct
     format."""
     from invenio.intbitset import intbitset
     from invenio.dbquery import run_sql, serialize_via_marshal
     from invenio.search_engine import get_record
     from invenio.bibsched import server_pid, pidfile
     enable_recstruct_cache = conf.get("Invenio", "CFG_BIBUPLOAD_SERIALIZE_RECORD_STRUCTURE")
     enable_recstruct_cache = enable_recstruct_cache in ('True', '1')
     pid = server_pid(ping_the_process=False)
     if pid:
         print >> sys.stderr, "ERROR: bibsched seems to run with pid %d, according to %s." % (pid, pidfile)
         print >> sys.stderr, "       Please stop bibsched before running this procedure."
         sys.exit(1)
     if enable_recstruct_cache:
         print ">>> Searching records which need recstruct cache resetting; this may take a while..."
         all_recids = intbitset(run_sql("SELECT id FROM bibrec"))
         good_recids = intbitset(run_sql("SELECT bibrec.id FROM bibrec JOIN bibfmt ON bibrec.id = bibfmt.id_bibrec WHERE format='recstruct' AND modification_date < last_updated"))
         recids = all_recids - good_recids
         print ">>> Generating recstruct cache..."
         tot = len(recids)
         count = 0
         for recid in recids:
             value = serialize_via_marshal(get_record(recid))
             run_sql("DELETE FROM bibfmt WHERE id_bibrec=%s AND format='recstruct'", (recid, ))
             run_sql("INSERT INTO bibfmt(id_bibrec, format, last_updated, value) VALUES(%s, 'recstruct', NOW(), %s)", (recid, value))
             count += 1
             if count % 1000 == 0:
                 print "    ... done records %s/%s" % (count, tot)
         if count % 1000 != 0:
             print "    ... done records %s/%s" % (count, tot)
         print ">>> recstruct cache generated successfully."
     else:
         print ">>> Cleaning recstruct cache..."
         run_sql("DELETE FROM bibfmt WHERE format='recstruct'")
 
 def cli_cmd_reset_siteadminemail(conf):
     """
     Reset user-related tables with new CFG_SITE_ADMIN_EMAIL read from conf files.
     """
     print ">>> Going to reset CFG_SITE_ADMIN_EMAIL..."
     from invenio.dbquery import run_sql
     siteadminemail = conf.get("Invenio", "CFG_SITE_ADMIN_EMAIL")
     run_sql("DELETE FROM user WHERE id=1")
     run_sql("""INSERT INTO user (id, email, password, note, nickname) VALUES
                         (1, %s, AES_ENCRYPT(email, ''), 1, 'admin')""",
             (siteadminemail,))
     print "You may want to restart Apache now."
     print ">>> CFG_SITE_ADMIN_EMAIL reset successfully."
 
 def cli_cmd_reset_fieldnames(conf):
     """
     Reset I18N field names such as author, title, etc and other I18N
     ranking method names such as word similarity.  Their translations
     are taken from the PO files.
     """
     print ">>> Going to reset I18N field names..."
     from invenio.messages import gettext_set_language, language_list_long
     from invenio.dbquery import run_sql, IntegrityError
 
     ## get field id and name list:
     field_id_name_list = run_sql("SELECT id, name FROM field")
     ## get rankmethod id and name list:
     rankmethod_id_name_list = run_sql("SELECT id, name FROM rnkMETHOD")
     ## update names for every language:
     for lang, dummy in language_list_long():
         _ = gettext_set_language(lang)
         ## this list is put here in order for PO system to pick names
         ## suitable for translation
         field_name_names = {"any field": _("any field"),
                             "title": _("title"),
                             "author": _("author"),
                             "abstract": _("abstract"),
                             "keyword": _("keyword"),
                             "report number": _("report number"),
                             "subject": _("subject"),
                             "reference": _("reference"),
                             "fulltext": _("fulltext"),
                             "collection": _("collection"),
                             "division": _("division"),
                             "year": _("year"),
                             "journal": _("journal"),
                             "experiment": _("experiment"),
                             "record ID": _("record ID")}
         ## update I18N names for every language:
         for (field_id, field_name) in field_id_name_list:
             if field_name_names.has_key(field_name):
                 try:
                     run_sql("""INSERT INTO fieldname (id_field,ln,type,value) VALUES
                                 (%s,%s,%s,%s)""", (field_id, lang, 'ln',
                                                 field_name_names[field_name]))
                 except IntegrityError:
                     run_sql("""UPDATE fieldname SET value=%s
                                 WHERE id_field=%s AND ln=%s AND type=%s""",
                             (field_name_names[field_name], field_id, lang, 'ln',))
         ## ditto for rank methods:
         rankmethod_name_names = {"wrd": _("word similarity"),
                                  "demo_jif": _("journal impact factor"),
                                  "citation": _("times cited"),
                                  "citerank_citation_t": _("time-decay cite count"),
                                  "citerank_pagerank_c": _("all-time-best cite rank"),
                                  "citerank_pagerank_t": _("time-decay cite rank"),}
         for (rankmethod_id, rankmethod_name) in rankmethod_id_name_list:
             if rankmethod_name_names.has_key(rankmethod_name):
                 try:
                     run_sql("""INSERT INTO rnkMETHODNAME (id_rnkMETHOD,ln,type,value) VALUES
                                 (%s,%s,%s,%s)""", (rankmethod_id, lang, 'ln',
                                                    rankmethod_name_names[rankmethod_name]))
                 except IntegrityError:
                     run_sql("""UPDATE rnkMETHODNAME SET value=%s
                                 WHERE id_rnkMETHOD=%s AND ln=%s AND type=%s""",
                             (rankmethod_name_names[rankmethod_name], rankmethod_id, lang, 'ln',))
 
     print ">>> I18N field names reset successfully."
 
 def cli_check_openoffice(conf):
     """
     If OpenOffice.org integration is enabled, checks whether the system is
     properly configured.
     """
     from invenio.bibtask import check_running_process_user
     from invenio.websubmit_file_converter import can_unoconv, get_file_converter_logger
     logger = get_file_converter_logger()
     for handler in logger.handlers:
         logger.removeHandler(handler)
     check_running_process_user()
     print ">>> Checking if Libre/OpenOffice.org is correctly integrated...",
     sys.stdout.flush()
     if can_unoconv(True):
         print "ok"
     else:
         sys.exit(1)
 
 def test_db_connection():
     """
     Test DB connection, and if fails, advise user how to set it up.
     Useful to be called during table creation.
     """
     print "Testing DB connection...",
     from invenio.textutils import wrap_text_in_a_box
     from invenio.dbquery import run_sql, Error
 
     ## first, test connection to the DB server:
     try:
         run_sql("SHOW TABLES")
     except Error, err:
         from invenio.dbquery import CFG_DATABASE_HOST, CFG_DATABASE_PORT, \
              CFG_DATABASE_NAME, CFG_DATABASE_USER, CFG_DATABASE_PASS
         print wrap_text_in_a_box("""\
 DATABASE CONNECTIVITY ERROR %(errno)d: %(errmsg)s.\n
 
 Perhaps you need to set up database and connection rights?
 If yes, then please login as MySQL admin user and run the
 following commands now:
 
 
 $ mysql -h %(dbhost)s -P %(dbport)s -u root -p mysql
 
 mysql> CREATE DATABASE %(dbname)s DEFAULT CHARACTER SET utf8;
 
 mysql> GRANT ALL PRIVILEGES ON %(dbname)s.*
 
        TO %(dbuser)s@%(webhost)s IDENTIFIED BY '%(dbpass)s';
 
 mysql> QUIT
 
 
 The values printed above were detected from your
 configuration. If they are not right, then please edit your
 invenio-local.conf file and rerun 'inveniocfg --update-all' first.
 
 
 If the problem is of different nature, then please inspect
 the above error message and fix the problem before continuing.""" % \
                                  {'errno': err.args[0],
                                   'errmsg': err.args[1],
                                   'dbname': CFG_DATABASE_NAME,
                                   'dbhost': CFG_DATABASE_HOST,
                                   'dbport': CFG_DATABASE_PORT,
                                   'dbuser': CFG_DATABASE_USER,
                                   'dbpass': CFG_DATABASE_PASS,
                                   'webhost': CFG_DATABASE_HOST == 'localhost' and 'localhost' or os.popen('hostname -f', 'r').read().strip(),
                                   })
         sys.exit(1)
     print "ok"
 
     ## second, test insert/select of a Unicode string to detect
     ## possible Python/MySQL/MySQLdb mis-setup:
     print "Testing Python/MySQL/MySQLdb UTF-8 chain...",
     try:
         beta_in_utf8 = "β" # Greek beta in UTF-8 is 0xCEB2
         run_sql("CREATE TEMPORARY TABLE test__invenio__utf8 (x char(1), y varbinary(2)) DEFAULT CHARACTER SET utf8")
         run_sql("INSERT INTO test__invenio__utf8 (x, y) VALUES (%s, %s)", (beta_in_utf8, beta_in_utf8))
         res = run_sql("SELECT x,y,HEX(x),HEX(y),LENGTH(x),LENGTH(y),CHAR_LENGTH(x),CHAR_LENGTH(y) FROM test__invenio__utf8")
         assert res[0] == ('\xce\xb2', '\xce\xb2', 'CEB2', 'CEB2', 2L, 2L, 1L, 2L)
         run_sql("DROP TEMPORARY TABLE test__invenio__utf8")
     except Exception, err:
         print wrap_text_in_a_box("""\
 DATABASE RELATED ERROR %s\n
 
 A problem was detected with the UTF-8 treatment in the chain
 between the Python application, the MySQLdb connector, and
 the MySQL database. You may perhaps have installed older
 versions of some prerequisite packages?\n
 
 Please check the INSTALL file and please fix this problem
 before continuing.""" % err)
 
         sys.exit(1)
     print "ok"
 
 def cli_cmd_create_tables(conf):
     """Create and fill Invenio DB tables.  Useful for the installation process."""
     print ">>> Going to create and fill tables..."
     from invenio.config import CFG_PREFIX
     test_db_connection()
     for cmd in ["%s/bin/dbexec < %s/lib/sql/invenio/tabcreate.sql" % (CFG_PREFIX, CFG_PREFIX),
                 "%s/bin/dbexec < %s/lib/sql/invenio/tabfill.sql" % (CFG_PREFIX, CFG_PREFIX)]:
         if os.system(cmd):
             print "ERROR: failed execution of", cmd
             sys.exit(1)
     cli_cmd_reset_sitename(conf)
     cli_cmd_reset_siteadminemail(conf)
     cli_cmd_reset_fieldnames(conf)
     for cmd in ["%s/bin/webaccessadmin -u admin -c -a" % CFG_PREFIX]:
         if os.system(cmd):
             print "ERROR: failed execution of", cmd
             sys.exit(1)
     print ">>> Tables created and filled successfully."
 
 def cli_cmd_load_webstat_conf(conf):
     print ">>> Going to load WebStat config..."
     from invenio.config import CFG_PREFIX
     cmd = "%s/bin/webstatadmin --load-config" % CFG_PREFIX
     if os.system(cmd):
         print "ERROR: failed execution of", cmd
         sys.exit(1)
     print ">>> WebStat config load successfully."
 
 def cli_cmd_drop_tables(conf):
     """Drop Invenio DB tables.  Useful for the uninstallation process."""
     print ">>> Going to drop tables..."
     from invenio.config import CFG_PREFIX
     from invenio.textutils import wrap_text_in_a_box, wait_for_user
     wait_for_user(wrap_text_in_a_box("""WARNING: You are going to destroy
 your database tables!"""))
     cmd = "%s/bin/dbexec < %s/lib/sql/invenio/tabdrop.sql" % (CFG_PREFIX, CFG_PREFIX)
     if os.system(cmd):
         print "ERROR: failed execution of", cmd
         sys.exit(1)
     print ">>> Tables dropped successfully."
 
 def cli_cmd_create_demo_site(conf):
     """Create demo site.  Useful for testing purposes."""
     print ">>> Going to create demo site..."
     from invenio.config import CFG_PREFIX
     from invenio.dbquery import run_sql
     run_sql("TRUNCATE schTASK")
     run_sql("TRUNCATE session")
     run_sql("DELETE FROM user WHERE email=''")
     for cmd in ["%s/bin/dbexec < %s/lib/sql/invenio/democfgdata.sql" % \
                    (CFG_PREFIX, CFG_PREFIX),]:
         if os.system(cmd):
             print "ERROR: failed execution of", cmd
             sys.exit(1)
     cli_cmd_reset_fieldnames(conf) # needed for I18N demo ranking method names
     for cmd in ["%s/bin/webaccessadmin -u admin -c -r -D" % CFG_PREFIX,
                 "%s/bin/webcoll -u admin" % CFG_PREFIX,
                 "%s/bin/webcoll 1" % CFG_PREFIX,]:
         if os.system(cmd):
             print "ERROR: failed execution of", cmd
             sys.exit(1)
     print ">>> Demo site created successfully."
 
 def cli_cmd_load_demo_records(conf):
     """Load demo records.  Useful for testing purposes."""
     from invenio.config import CFG_PREFIX
     from invenio.dbquery import run_sql
     print ">>> Going to load demo records..."
     run_sql("TRUNCATE schTASK")
     for cmd in ["%s/bin/bibupload -u admin -i %s/var/tmp/demobibdata.xml" % (CFG_PREFIX, CFG_PREFIX),
                 "%s/bin/bibupload 1" % CFG_PREFIX,
                 "%s/bin/bibdocfile --textify --with-ocr --recid 97" % CFG_PREFIX,
                 "%s/bin/bibdocfile --textify --all" % CFG_PREFIX,
                 "%s/bin/bibindex -u admin" % CFG_PREFIX,
                 "%s/bin/bibindex 2" % CFG_PREFIX,
                 "%s/bin/bibreformat -u admin -o HB" % CFG_PREFIX,
                 "%s/bin/bibreformat 3" % CFG_PREFIX,
                 "%s/bin/webcoll -u admin" % CFG_PREFIX,
                 "%s/bin/webcoll 4" % CFG_PREFIX,
                 "%s/bin/bibrank -u admin" % CFG_PREFIX,
-                "%s/bin/bibrank 5" % CFG_PREFIX,]:
+                "%s/bin/bibrank 5" % CFG_PREFIX,
+                "%s/bin/oairepositoryupdater -u admin" % CFG_PREFIX,
+                "%s/bin/oairepositoryupdater 6" % CFG_PREFIX,
+                "%s/bin/bibupload 7" % CFG_PREFIX,]:
         if os.system(cmd):
             print "ERROR: failed execution of", cmd
             sys.exit(1)
     print ">>> Demo records loaded successfully."
 
 def cli_cmd_remove_demo_records(conf):
     """Remove demo records.  Useful when you are finished testing."""
     print ">>> Going to remove demo records..."
     from invenio.config import CFG_PREFIX
     from invenio.dbquery import run_sql
     from invenio.textutils import wrap_text_in_a_box, wait_for_user
     wait_for_user(wrap_text_in_a_box("""WARNING: You are going to destroy
 your records and documents!"""))
     if os.path.exists(CFG_PREFIX + os.sep + 'var' + os.sep + 'data'):
         shutil.rmtree(CFG_PREFIX + os.sep + 'var' + os.sep + 'data')
     run_sql("TRUNCATE schTASK")
     for cmd in ["%s/bin/dbexec < %s/lib/sql/invenio/tabbibclean.sql" % (CFG_PREFIX, CFG_PREFIX),
                 "%s/bin/webcoll -u admin" % CFG_PREFIX,
                 "%s/bin/webcoll 1" % CFG_PREFIX,]:
         if os.system(cmd):
             print "ERROR: failed execution of", cmd
             sys.exit(1)
     print ">>> Demo records removed successfully."
 
 def cli_cmd_drop_demo_site(conf):
     """Drop demo site completely.  Useful when you are finished testing."""
     print ">>> Going to drop demo site..."
     from invenio.textutils import wrap_text_in_a_box, wait_for_user
     wait_for_user(wrap_text_in_a_box("""WARNING: You are going to destroy
 your site and documents!"""))
     cli_cmd_drop_tables(conf)
     cli_cmd_create_tables(conf)
     cli_cmd_remove_demo_records(conf)
     print ">>> Demo site dropped successfully."
 
 def cli_cmd_run_unit_tests(conf):
     """Run unit tests, usually on the working demo site."""
     from invenio.testutils import build_and_run_unit_test_suite
     build_and_run_unit_test_suite()
 
 def cli_cmd_run_regression_tests(conf):
     """Run regression tests, usually on the working demo site."""
     from invenio.testutils import build_and_run_regression_test_suite
     build_and_run_regression_test_suite()
 
 def cli_cmd_run_web_tests(conf):
     """Run web tests in a browser. Requires Firefox with Selenium."""
     from invenio.testutils import build_and_run_web_test_suite
     build_and_run_web_test_suite()
 
 def _detect_ip_address():
     """Detect IP address of this computer.  Useful for creating Apache
     vhost conf snippet on RHEL like machines.
 
     @return: IP address, or '*' if cannot detect
     @rtype: string
     @note: creates socket for real in order to detect real IP address,
         not the loopback one.
     """
     try:
         s = socket.socket(socket.AF_INET, socket.SOCK_DGRAM)
         s.connect(('invenio-software.org', 0))
         return s.getsockname()[0]
     except:
         return '*'
 
 def cli_cmd_create_apache_conf(conf):
     """
     Create Apache conf files for this site, keeping previous
     files in a backup copy.
     """
     print ">>> Going to create Apache conf files..."
     from invenio.textutils import wrap_text_in_a_box
     from invenio.access_control_config import CFG_EXTERNAL_AUTH_USING_SSO
     apache_conf_dir = conf.get("Invenio", 'CFG_ETCDIR') + \
                       os.sep + 'apache'
 
     ## Preparation of XSendFile directive
     xsendfile_directive_needed = int(conf.get("Invenio", 'CFG_BIBDOCFILE_USE_XSENDFILE')) != 0
     if xsendfile_directive_needed:
         xsendfile_directive = "XSendFile On\n"
     else:
         xsendfile_directive = "#XSendFile On\n"
     for path in (conf.get('Invenio', 'CFG_WEBSUBMIT_FILEDIR'), # BibDocFile
             conf.get('Invenio', 'CFG_WEBDIR'),
             conf.get('Invenio', 'CFG_WEBSUBMIT_STORAGEDIR'), # WebSubmit
             conf.get('Invenio', 'CFG_TMPDIR'),
             os.path.join(conf.get('Invenio', 'CFG_PREFIX'), 'var', 'tmp', 'attachfile'),
             os.path.join(conf.get('Invenio', 'CFG_PREFIX'), 'var', 'data', 'comments'),
             os.path.join(conf.get('Invenio', 'CFG_PREFIX'), 'var', 'data', 'baskets', 'comments'),
             '/tmp'): # BibExport
         if xsendfile_directive_needed:
             xsendfile_directive += '        XSendFilePath %s\n' % path
         else:
             xsendfile_directive += '        #XSendFilePath %s\n' % path
     xsendfile_directive = xsendfile_directive.strip()
 
     ## Preparation of deflate directive
     deflate_directive_needed = int(conf.get("Invenio", 'CFG_WEBSTYLE_HTTP_USE_COMPRESSION')) != 0
     if deflate_directive_needed:
         deflate_directive = r"""
         ## Configuration snippet taken from:
         ## <http://httpd.apache.org/docs/2.2/mod/mod_deflate.html>
         <IfModule mod_deflate.c>
             SetOutputFilter DEFLATE
 
             # Netscape 4.x has some problems...
             BrowserMatch ^Mozilla/4 gzip-only-text/html
 
             # Netscape 4.06-4.08 have some more problems
             BrowserMatch ^Mozilla/4\.0[678] no-gzip
 
             # MSIE masquerades as Netscape, but it is fine
             # BrowserMatch \bMSIE !no-gzip !gzip-only-text/html
 
             # NOTE: Due to a bug in mod_setenvif up to Apache 2.0.48
             # the above regex won't work. You can use the following
             # workaround to get the desired effect:
             BrowserMatch \bMSI[E] !no-gzip !gzip-only-text/html
 
             # Don't compress images
             SetEnvIfNoCase Request_URI \
                 \.(?:gif|jpe?g|png)$ no-gzip dont-vary
 
             # Make sure proxies don't deliver the wrong content
             <IfModule mod_header.c>
                 Header append Vary User-Agent env=!dont-vary
             </IfModule>
         </IfModule>
         """
     else:
         deflate_directive = ""
 
     if CFG_EXTERNAL_AUTH_USING_SSO:
         shibboleth_directive = r"""
         <Location ~ "/youraccount/login|Shibboleth.sso/">
             SSLRequireSSL   # The modules only work using HTTPS
             AuthType shibboleth
             ShibRequireSession On
             ShibRequireAll On
             ShibExportAssertion Off
             require valid-user
         </Location>
         """
     else:
         shibboleth_directive = ""
 
     ## Apache vhost conf file is distro specific, so analyze needs:
     # Gentoo (and generic defaults):
     listen_directive_needed = True
     ssl_pem_directive_needed = False
     ssl_pem_path = '/etc/apache2/ssl/apache.pem'
     ssl_crt_path = '/etc/apache2/ssl/server.crt'
     ssl_key_path = '/etc/apache2/ssl/server.key'
     vhost_ip_address_needed = False
     wsgi_socket_directive_needed = False
     # Debian:
     if os.path.exists(os.path.sep + 'etc' + os.path.sep + 'debian_version'):
         listen_directive_needed = False
         ssl_pem_directive_needed = True
     # RHEL/SLC:
     if os.path.exists(os.path.sep + 'etc' + os.path.sep + 'redhat-release'):
         listen_directive_needed = False
         ssl_crt_path = '/etc/pki/tls/certs/localhost.crt'
         ssl_key_path = '/etc/pki/tls/private/localhost.key'
         vhost_ip_address_needed = True
         wsgi_socket_directive_needed = True
     # maybe we are using non-standard ports?
     vhost_site_url = conf.get('Invenio', 'CFG_SITE_URL').replace("http://", "")
     if vhost_site_url.startswith("https://"):
         ## The installation is configured to require HTTPS for any connection
         vhost_site_url = vhost_site_url.replace("https://", "")
     vhost_site_url_port = '80'
     vhost_site_secure_url = conf.get('Invenio', 'CFG_SITE_SECURE_URL').replace("https://", "")
     vhost_site_secure_url_port = '443'
     if ':' in vhost_site_url:
         vhost_site_url, vhost_site_url_port = vhost_site_url.split(':', 1)
     if ':' in vhost_site_secure_url:
         vhost_site_secure_url, vhost_site_secure_url_port = vhost_site_secure_url.split(':', 1)
     if vhost_site_url_port != '80' or vhost_site_secure_url_port != '443':
         listen_directive_needed = True
     ## OK, let's create Apache vhost files:
     if not os.path.exists(apache_conf_dir):
         os.mkdir(apache_conf_dir)
     apache_vhost_file = apache_conf_dir + os.sep + \
                             'invenio-apache-vhost.conf'
     apache_vhost_ssl_file = apache_conf_dir + os.sep + \
                              'invenio-apache-vhost-ssl.conf'
     apache_vhost_body = """\
 AddDefaultCharset UTF-8
 ServerSignature Off
 ServerTokens Prod
 NameVirtualHost %(vhost_ip_address)s:%(vhost_site_url_port)s
 %(listen_directive)s
 %(wsgi_socket_directive)s
 WSGIRestrictStdout Off
 <Files *.pyc>
    deny from all
 </Files>
 <Files *~>
    deny from all
 </Files>
 <VirtualHost %(vhost_ip_address)s:%(vhost_site_url_port)s>
         ServerName %(servername)s
         ServerAlias %(serveralias)s
         ServerAdmin %(serveradmin)s
         DocumentRoot %(webdir)s
         <Directory %(webdir)s>
            Options FollowSymLinks MultiViews
            AllowOverride None
            Order allow,deny
            Allow from all
         </Directory>
         ErrorLog %(logdir)s/apache.err
         LogLevel warn
         CustomLog %(logdir)s/apache.log combined
         DirectoryIndex index.en.html index.html
         Alias /img/ %(webdir)s/img/
         Alias /js/ %(webdir)s/js/
         Alias /flash/ %(webdir)s/flash/
         Alias /css/ %(webdir)s/css/
         Alias /export/ %(webdir)s/export/
         Alias /MathJax/ %(webdir)s/MathJax/
         Alias /jsCalendar/ %(webdir)s/jsCalendar/
         Alias /ckeditor/ %(webdir)s/ckeditor/
         Alias /mediaelement/ %(webdir)s/mediaelement/
         AliasMatch /sitemap-(.*) %(webdir)s/sitemap-$1
         Alias /robots.txt %(webdir)s/robots.txt
         Alias /favicon.ico %(webdir)s/favicon.ico
         WSGIDaemonProcess invenio processes=5 threads=1 display-name=%%{GROUP} inactivity-timeout=3600 maximum-requests=10000
         WSGIImportScript %(wsgidir)s/invenio.wsgi process-group=invenio application-group=%%{GLOBAL}
         WSGIScriptAlias / %(wsgidir)s/invenio.wsgi
         WSGIPassAuthorization On
         %(xsendfile_directive)s
         <Directory %(wsgidir)s>
            WSGIProcessGroup invenio
            WSGIApplicationGroup %%{GLOBAL}
            Options FollowSymLinks MultiViews
            AllowOverride None
            Order allow,deny
            Allow from all
         </Directory>
         %(deflate_directive)s
 </VirtualHost>
 """ % {'vhost_site_url_port': vhost_site_url_port,
        'servername': vhost_site_url,
        'serveralias': vhost_site_url.split('.')[0],
        'serveradmin': conf.get('Invenio', 'CFG_SITE_ADMIN_EMAIL'),
        'webdir': conf.get('Invenio', 'CFG_WEBDIR'),
        'logdir': conf.get('Invenio', 'CFG_LOGDIR'),
        'libdir' : conf.get('Invenio', 'CFG_PYLIBDIR'),
        'wsgidir': os.path.join(conf.get('Invenio', 'CFG_PREFIX'), 'var', 'www-wsgi'),
        'vhost_ip_address': vhost_ip_address_needed and _detect_ip_address() or '*',
        'listen_directive': listen_directive_needed and 'Listen ' + vhost_site_url_port or \
                            '#Listen ' + vhost_site_url_port,
        'wsgi_socket_directive': (wsgi_socket_directive_needed and \
                                 'WSGISocketPrefix ' or '#WSGISocketPrefix ') + \
               conf.get('Invenio', 'CFG_PREFIX') + os.sep + 'var' + os.sep + 'run',
        'xsendfile_directive' : xsendfile_directive,
        'deflate_directive': deflate_directive,
        }
     apache_vhost_ssl_body = """\
 ServerSignature Off
 ServerTokens Prod
 %(listen_directive)s
 NameVirtualHost %(vhost_ip_address)s:%(vhost_site_secure_url_port)s
 %(ssl_pem_directive)s
 %(ssl_crt_directive)s
 %(ssl_key_directive)s
 WSGIRestrictStdout Off
 <Files *.pyc>
    deny from all
 </Files>
 <Files *~>
    deny from all
 </Files>
 <VirtualHost %(vhost_ip_address)s:%(vhost_site_secure_url_port)s>
         ServerName %(servername)s
         ServerAlias %(serveralias)s
         ServerAdmin %(serveradmin)s
         SSLEngine on
         DocumentRoot %(webdir)s
         <Directory %(webdir)s>
            Options FollowSymLinks MultiViews
            AllowOverride None
            Order allow,deny
            Allow from all
         </Directory>
         ErrorLog %(logdir)s/apache-ssl.err
         LogLevel warn
         CustomLog %(logdir)s/apache-ssl.log combined
         DirectoryIndex index.en.html index.html
         Alias /img/ %(webdir)s/img/
         Alias /js/ %(webdir)s/js/
         Alias /flash/ %(webdir)s/flash/
         Alias /css/ %(webdir)s/css/
         Alias /export/ %(webdir)s/export/
         Alias /MathJax/ %(webdir)s/MathJax/
         Alias /jsCalendar/ %(webdir)s/jsCalendar/
         Alias /ckeditor/ %(webdir)s/ckeditor/
         Alias /mediaelement/ %(webdir)s/mediaelement/
         AliasMatch /sitemap-(.*) %(webdir)s/sitemap-$1
         Alias /robots.txt %(webdir)s/robots.txt
         Alias /favicon.ico %(webdir)s/favicon.ico
         RedirectMatch /sslredirect/(.*) http://$1
         WSGIScriptAlias / %(wsgidir)s/invenio.wsgi
         WSGIPassAuthorization On
         %(xsendfile_directive)s
         <Directory %(wsgidir)s>
            WSGIProcessGroup invenio
            WSGIApplicationGroup %%{GLOBAL}
            Options FollowSymLinks MultiViews
            AllowOverride None
            Order allow,deny
            Allow from all
         </Directory>
         %(deflate_directive)s
         %(shibboleth_directive)s
 </VirtualHost>
 """ % {'vhost_site_secure_url_port': vhost_site_secure_url_port,
        'servername': vhost_site_secure_url,
        'serveralias': vhost_site_secure_url.split('.')[0],
        'serveradmin': conf.get('Invenio', 'CFG_SITE_ADMIN_EMAIL'),
        'webdir': conf.get('Invenio', 'CFG_WEBDIR'),
        'logdir': conf.get('Invenio', 'CFG_LOGDIR'),
        'libdir' : conf.get('Invenio', 'CFG_PYLIBDIR'),
        'wsgidir' : os.path.join(conf.get('Invenio', 'CFG_PREFIX'), 'var', 'www-wsgi'),
        'vhost_ip_address': vhost_ip_address_needed and _detect_ip_address() or '*',
        'listen_directive' : listen_directive_needed and 'Listen ' + vhost_site_secure_url_port or \
                             '#Listen ' + vhost_site_secure_url_port,
        'ssl_pem_directive': ssl_pem_directive_needed and \
                             'SSLCertificateFile %s' % ssl_pem_path or \
                             '#SSLCertificateFile %s' % ssl_pem_path,
        'ssl_crt_directive': ssl_pem_directive_needed and \
                             '#SSLCertificateFile %s' % ssl_crt_path or \
                             'SSLCertificateFile %s' % ssl_crt_path,
        'ssl_key_directive': ssl_pem_directive_needed and \
                             '#SSLCertificateKeyFile %s' % ssl_key_path or \
                             'SSLCertificateKeyFile %s' % ssl_key_path,
        'xsendfile_directive' : xsendfile_directive,
        'deflate_directive': deflate_directive,
        'shibboleth_directive': shibboleth_directive,
        }
     # write HTTP vhost snippet:
     if os.path.exists(apache_vhost_file):
         shutil.copy(apache_vhost_file,
                     apache_vhost_file + '.OLD')
     fdesc = open(apache_vhost_file, 'w')
     fdesc.write(apache_vhost_body)
     fdesc.close()
     print
     print "Created file", apache_vhost_file
     # write HTTPS vhost snippet:
     vhost_ssl_created = False
     if conf.get('Invenio', 'CFG_SITE_SECURE_URL').startswith("https://"):
         if os.path.exists(apache_vhost_ssl_file):
             shutil.copy(apache_vhost_ssl_file,
                         apache_vhost_ssl_file + '.OLD')
         fdesc = open(apache_vhost_ssl_file, 'w')
         fdesc.write(apache_vhost_ssl_body)
         fdesc.close()
         vhost_ssl_created = True
         print "Created file", apache_vhost_ssl_file
 
     print wrap_text_in_a_box("""\
 Apache virtual host configuration file(s) for your Invenio site
 was(were) created.  Please check created file(s) and activate virtual
 host(s).  For example, you can put the following include statements in
 your httpd.conf:\n
 
 Include %s
 
 %s
 
 
 Please see the INSTALL file for more details.
     """ % (apache_vhost_file, (vhost_ssl_created and 'Include ' or '#Include ') + apache_vhost_ssl_file))
     print ">>> Apache conf files created."
 
 def cli_cmd_get(conf, varname):
     """
     Return value of VARNAME read from CONF files.  Useful for
     third-party programs to access values of conf options such as
     CFG_PREFIX.  Return None if VARNAME is not found.
     """
     # do not pay attention to upper/lower case:
     varname = varname.lower()
     # do not pay attention to section names yet:
     all_options = {}
     for section in conf.sections():
         for option in conf.options(section):
             all_options[option] = conf.get(section, option)
     return  all_options.get(varname, None)
 
 def cli_cmd_list(conf):
     """
     Print a list of all conf options and values from CONF.
     """
     sections = conf.sections()
     sections.sort()
     for section in sections:
         options = conf.options(section)
         options.sort()
         for option in options:
             print option.upper(), '=', conf.get(section, option)
 
 def _grep_version_from_executable(path_to_exec, version_regexp):
     """
     Try to detect a program version by digging into its binary
     PATH_TO_EXEC and looking for VERSION_REGEXP.  Return program
     version as a string.  Return empty string if not succeeded.
     """
     from invenio.shellutils import run_shell_command
     exec_version = ""
     if os.path.exists(path_to_exec):
         dummy1, cmd2_out, dummy2 = run_shell_command("strings %s | grep %s",
                                                      (path_to_exec, version_regexp))
         if cmd2_out:
             for cmd2_out_line in cmd2_out.split("\n"):
                 if len(cmd2_out_line) > len(exec_version):
                     # the longest the better
                     exec_version = cmd2_out_line
     return exec_version
 
 def detect_apache_version():
     """
     Try to detect Apache version by localizing httpd or apache
     executables and grepping inside binaries.  Return list of all
     found Apache versions and paths.  (For a given executable, the
     returned format is 'apache_version [apache_path]'.)  Return empty
     list if no success.
     """
     from invenio.shellutils import run_shell_command
     out = []
     dummy1, cmd_out, dummy2 = run_shell_command("locate bin/httpd bin/apache")
     for apache in cmd_out.split("\n"):
         apache_version = _grep_version_from_executable(apache, '^Apache\/')
         if apache_version:
             out.append("%s [%s]" % (apache_version, apache))
     return out
 
 def cli_cmd_detect_system_details(conf):
     """
     Detect and print system details such as Apache/Python/MySQL
     versions etc.  Useful for debugging problems on various OS.
     """
     import MySQLdb
     print ">>> Going to detect system details..."
     print "* Hostname: " + socket.gethostname()
     print "* Invenio version: " + conf.get("Invenio", "CFG_VERSION")
     print "* Python version: " + sys.version.replace("\n", " ")
     print "* Apache version: " + ";\n                  ".join(detect_apache_version())
     print "* MySQLdb version: " + MySQLdb.__version__
     try:
         from invenio.dbquery import run_sql
         print "* MySQL version:"
         for key, val in run_sql("SHOW VARIABLES LIKE 'version%'") + \
                 run_sql("SHOW VARIABLES LIKE 'charact%'") + \
                 run_sql("SHOW VARIABLES LIKE 'collat%'"):
             if False:
                 print "    - %s: %s" % (key, val)
             elif key in ['version',
                          'character_set_client',
                          'character_set_connection',
                          'character_set_database',
                          'character_set_results',
                          'character_set_server',
                          'character_set_system',
                          'collation_connection',
                          'collation_database',
                          'collation_server']:
                 print "    - %s: %s" % (key, val)
     except ImportError:
         print "* ERROR: cannot import dbquery"
     print ">>> System details detected successfully."
 
 def main():
     """Main entry point."""
     conf = ConfigParser()
     if '--help' in sys.argv or \
        '-h' in sys.argv:
         print_usage()
     elif '--version' in sys.argv or \
          '-V' in sys.argv:
         print_version()
     else:
         confdir = None
         if '--conf-dir' in sys.argv:
             try:
                 confdir = sys.argv[sys.argv.index('--conf-dir') + 1]
             except IndexError:
                 pass # missing --conf-dir argument value
             if not os.path.exists(confdir):
                 print "ERROR: bad or missing --conf-dir option value."
                 sys.exit(1)
         else:
             ## try to detect path to conf dir (relative to this bin dir):
             confdir = re.sub(r'/bin$', '/etc', sys.path[0])
         ## read conf files:
         for conffile in [confdir + os.sep + 'invenio.conf',
                          confdir + os.sep + 'invenio-autotools.conf',
                          confdir + os.sep + 'invenio-local.conf',]:
             if os.path.exists(conffile):
                 conf.read(conffile)
             else:
                 if not conffile.endswith("invenio-local.conf"):
                     # invenio-local.conf is optional, otherwise stop
                     print "ERROR: Badly guessed conf file location", conffile
                     print "(Please use --conf-dir option.)"
                     sys.exit(1)
         ## decide what to do:
         done = False
         for opt_idx in range(0, len(sys.argv)):
             opt = sys.argv[opt_idx]
             if opt == '--conf-dir':
                 # already treated before, so skip silently:
                 pass
             elif opt == '--get':
                 try:
                     varname = sys.argv[opt_idx + 1]
                 except IndexError:
                     print "ERROR: bad or missing --get option value."
                     sys.exit(1)
                 if varname.startswith('-'):
                     print "ERROR: bad or missing --get option value."
                     sys.exit(1)
                 varvalue = cli_cmd_get(conf, varname)
                 if varvalue is not None:
                     print varvalue
                 else:
                     sys.exit(1)
                 done = True
             elif opt == '--list':
                 cli_cmd_list(conf)
                 done = True
             elif opt == '--detect-system-details':
                 cli_cmd_detect_system_details(conf)
                 done = True
             elif opt == '--create-tables':
                 cli_cmd_create_tables(conf)
                 done = True
             elif opt == '--load-webstat-conf':
                 cli_cmd_load_webstat_conf(conf)
                 done = True
             elif opt == '--drop-tables':
                 cli_cmd_drop_tables(conf)
                 done = True
             elif opt == '--check-openoffice':
                 cli_check_openoffice(conf)
                 done = True
             elif opt == '--create-demo-site':
                 cli_cmd_create_demo_site(conf)
                 done = True
             elif opt == '--load-demo-records':
                 cli_cmd_load_demo_records(conf)
                 done = True
             elif opt == '--remove-demo-records':
                 cli_cmd_remove_demo_records(conf)
                 done = True
             elif opt == '--drop-demo-site':
                 cli_cmd_drop_demo_site(conf)
                 done = True
             elif opt == '--run-unit-tests':
                 cli_cmd_run_unit_tests(conf)
                 done = True
             elif opt == '--run-regression-tests':
                 cli_cmd_run_regression_tests(conf)
                 done = True
             elif opt == '--run-web-tests':
                 cli_cmd_run_web_tests(conf)
                 done = True
             elif opt == '--update-all':
                 cli_cmd_update_config_py(conf)
                 cli_cmd_update_dbquery_py(conf)
                 cli_cmd_update_dbexec(conf)
                 cli_cmd_update_bibconvert_tpl(conf)
                 cli_cmd_update_web_tests(conf)
                 done = True
             elif opt == '--update-config-py':
                 cli_cmd_update_config_py(conf)
                 done = True
             elif opt == '--update-dbquery-py':
                 cli_cmd_update_dbquery_py(conf)
                 done = True
             elif opt == '--update-dbexec':
                 cli_cmd_update_dbexec(conf)
                 done = True
             elif opt == '--update-bibconvert-tpl':
                 cli_cmd_update_bibconvert_tpl(conf)
                 done = True
             elif opt == '--update-web-tests':
                 cli_cmd_update_web_tests(conf)
                 done = True
             elif opt == '--reset-all':
                 cli_cmd_reset_sitename(conf)
                 cli_cmd_reset_siteadminemail(conf)
                 cli_cmd_reset_fieldnames(conf)
                 cli_cmd_reset_recstruct_cache(conf)
                 done = True
             elif opt == '--reset-sitename':
                 cli_cmd_reset_sitename(conf)
                 done = True
             elif opt == '--reset-siteadminemail':
                 cli_cmd_reset_siteadminemail(conf)
                 done = True
             elif opt == '--reset-fieldnames':
                 cli_cmd_reset_fieldnames(conf)
                 done = True
             elif opt == '--reset-recstruct-cache':
                 cli_cmd_reset_recstruct_cache(conf)
                 done = True
             elif opt == '--create-apache-conf':
                 cli_cmd_create_apache_conf(conf)
                 done = True
             elif opt.startswith("-") and opt != '--yes-i-know':
                 print "ERROR: unknown option", opt
                 sys.exit(1)
         if not done:
             print """ERROR: Please specify a command.  Please see '--help'."""
             sys.exit(1)
 
 if __name__ == '__main__':
     main()
diff --git a/modules/miscutil/sql/tabcreate.sql b/modules/miscutil/sql/tabcreate.sql
index eb01eadc9..16319220c 100644
--- a/modules/miscutil/sql/tabcreate.sql
+++ b/modules/miscutil/sql/tabcreate.sql
@@ -1,4009 +1,4009 @@
 -- This file is part of Invenio.
 -- Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011 CERN.
 --
 -- Invenio is free software; you can redistribute it and/or
 -- modify it under the terms of the GNU General Public License as
 -- published by the Free Software Foundation; either version 2 of the
 -- License, or (at your option) any later version.
 --
 -- Invenio is distributed in the hope that it will be useful, but
 -- WITHOUT ANY WARRANTY; without even the implied warranty of
 -- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 -- General Public License for more details.
 --
 -- You should have received a copy of the GNU General Public License
 -- along with Invenio; if not, write to the Free Software Foundation, Inc.,
 -- 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA.
 
 -- tables for bibliographic records:
 
 CREATE TABLE IF NOT EXISTS bibrec (
   id mediumint(8) unsigned NOT NULL auto_increment,
   creation_date datetime NOT NULL default '0000-00-00',
   modification_date datetime NOT NULL default '0000-00-00',
   PRIMARY KEY  (id),
   KEY creation_date (creation_date),
   KEY modification_date (modification_date)
 ) ENGINE=MyISAM;
 
 CREATE TABLE IF NOT EXISTS bib00x (
   id mediumint(8) unsigned NOT NULL auto_increment,
   tag varchar(6) NOT NULL default '',
   value text NOT NULL,
   PRIMARY KEY  (id),
   KEY kt (tag),
   KEY kv (value(35))
 ) ENGINE=MyISAM;
 
 CREATE TABLE IF NOT EXISTS bib01x (
   id mediumint(8) unsigned NOT NULL auto_increment,
   tag varchar(6) NOT NULL default '',
   value text NOT NULL,
   PRIMARY KEY  (id),
   KEY kt (tag),
   KEY kv (value(35))
 ) ENGINE=MyISAM;
 
 CREATE TABLE IF NOT EXISTS bib02x (
   id mediumint(8) unsigned NOT NULL auto_increment,
   tag varchar(6) NOT NULL default '',
   value text NOT NULL,
   PRIMARY KEY  (id),
   KEY kt (tag),
   KEY kv (value(35))
 ) ENGINE=MyISAM;
 
 CREATE TABLE IF NOT EXISTS bib03x (
   id mediumint(8) unsigned NOT NULL auto_increment,
   tag varchar(6) NOT NULL default '',
   value text NOT NULL,
   PRIMARY KEY  (id),
   KEY kt (tag),
   KEY kv (value(35))
 ) ENGINE=MyISAM;
 
 CREATE TABLE IF NOT EXISTS bib04x (
   id mediumint(8) unsigned NOT NULL auto_increment,
   tag varchar(6) NOT NULL default '',
   value text NOT NULL,
   PRIMARY KEY  (id),
   KEY kt (tag),
   KEY kv (value(35))
 ) ENGINE=MyISAM;
 
 CREATE TABLE IF NOT EXISTS bib05x (
   id mediumint(8) unsigned NOT NULL auto_increment,
   tag varchar(6) NOT NULL default '',
   value text NOT NULL,
   PRIMARY KEY  (id),
   KEY kt (tag),
   KEY kv (value(35))
 ) ENGINE=MyISAM;
 
 CREATE TABLE IF NOT EXISTS bib06x (
   id mediumint(8) unsigned NOT NULL auto_increment,
   tag varchar(6) NOT NULL default '',
   value text NOT NULL,
   PRIMARY KEY  (id),
   KEY kt (tag),
   KEY kv (value(35))
 ) ENGINE=MyISAM;
 
 CREATE TABLE IF NOT EXISTS bib07x (
   id mediumint(8) unsigned NOT NULL auto_increment,
   tag varchar(6) NOT NULL default '',
   value text NOT NULL,
   PRIMARY KEY  (id),
   KEY kt (tag),
   KEY kv (value(35))
 ) ENGINE=MyISAM;
 
 CREATE TABLE IF NOT EXISTS bib08x (
   id mediumint(8) unsigned NOT NULL auto_increment,
   tag varchar(6) NOT NULL default '',
   value text NOT NULL,
   PRIMARY KEY  (id),
   KEY kt (tag),
   KEY kv (value(35))
 ) ENGINE=MyISAM;
 
 CREATE TABLE IF NOT EXISTS bib09x (
   id mediumint(8) unsigned NOT NULL auto_increment,
   tag varchar(6) NOT NULL default '',
   value text NOT NULL,
   PRIMARY KEY  (id),
   KEY kt (tag),
   KEY kv (value(35))
 ) ENGINE=MyISAM;
 
 CREATE TABLE IF NOT EXISTS bib10x (
   id mediumint(8) unsigned NOT NULL auto_increment,
   tag varchar(6) NOT NULL default '',
   value text NOT NULL,
   PRIMARY KEY  (id),
   KEY kt (tag),
   KEY kv (value(35))
 ) ENGINE=MyISAM;
 
 CREATE TABLE IF NOT EXISTS bib11x (
   id mediumint(8) unsigned NOT NULL auto_increment,
   tag varchar(6) NOT NULL default '',
   value text NOT NULL,
   PRIMARY KEY  (id),
   KEY kt (tag),
   KEY kv (value(35))
 ) ENGINE=MyISAM;
 
 CREATE TABLE IF NOT EXISTS bib12x (
   id mediumint(8) unsigned NOT NULL auto_increment,
   tag varchar(6) NOT NULL default '',
   value text NOT NULL,
   PRIMARY KEY  (id),
   KEY kt (tag),
   KEY kv (value(35))
 ) ENGINE=MyISAM;
 
 CREATE TABLE IF NOT EXISTS bib13x (
   id mediumint(8) unsigned NOT NULL auto_increment,
   tag varchar(6) NOT NULL default '',
   value text NOT NULL,
   PRIMARY KEY  (id),
   KEY kt (tag),
   KEY kv (value(35))
 ) ENGINE=MyISAM;
 
 CREATE TABLE IF NOT EXISTS bib14x (
   id mediumint(8) unsigned NOT NULL auto_increment,
   tag varchar(6) NOT NULL default '',
   value text NOT NULL,
   PRIMARY KEY  (id),
   KEY kt (tag),
   KEY kv (value(35))
 ) ENGINE=MyISAM;
 
 CREATE TABLE IF NOT EXISTS bib15x (
   id mediumint(8) unsigned NOT NULL auto_increment,
   tag varchar(6) NOT NULL default '',
   value text NOT NULL,
   PRIMARY KEY  (id),
   KEY kt (tag),
   KEY kv (value(35))
 ) ENGINE=MyISAM;
 
 CREATE TABLE IF NOT EXISTS bib16x (
   id mediumint(8) unsigned NOT NULL auto_increment,
   tag varchar(6) NOT NULL default '',
   value text NOT NULL,
   PRIMARY KEY  (id),
   KEY kt (tag),
   KEY kv (value(35))
 ) ENGINE=MyISAM;
 
 CREATE TABLE IF NOT EXISTS bib17x (
   id mediumint(8) unsigned NOT NULL auto_increment,
   tag varchar(6) NOT NULL default '',
   value text NOT NULL,
   PRIMARY KEY  (id),
   KEY kt (tag),
   KEY kv (value(35))
 ) ENGINE=MyISAM;
 
 CREATE TABLE IF NOT EXISTS bib18x (
   id mediumint(8) unsigned NOT NULL auto_increment,
   tag varchar(6) NOT NULL default '',
   value text NOT NULL,
   PRIMARY KEY  (id),
   KEY kt (tag),
   KEY kv (value(35))
 ) ENGINE=MyISAM;
 
 CREATE TABLE IF NOT EXISTS bib19x (
   id mediumint(8) unsigned NOT NULL auto_increment,
   tag varchar(6) NOT NULL default '',
   value text NOT NULL,
   PRIMARY KEY  (id),
   KEY kt (tag),
   KEY kv (value(35))
 ) ENGINE=MyISAM;
 
 CREATE TABLE IF NOT EXISTS bib20x (
   id mediumint(8) unsigned NOT NULL auto_increment,
   tag varchar(6) NOT NULL default '',
   value text NOT NULL,
   PRIMARY KEY  (id),
   KEY kt (tag),
   KEY kv (value(35))
 ) ENGINE=MyISAM;
 
 CREATE TABLE IF NOT EXISTS bib21x (
   id mediumint(8) unsigned NOT NULL auto_increment,
   tag varchar(6) NOT NULL default '',
   value text NOT NULL,
   PRIMARY KEY  (id),
   KEY kt (tag),
   KEY kv (value(35))
 ) ENGINE=MyISAM;
 
 CREATE TABLE IF NOT EXISTS bib22x (
   id mediumint(8) unsigned NOT NULL auto_increment,
   tag varchar(6) NOT NULL default '',
   value text NOT NULL,
   PRIMARY KEY  (id),
   KEY kt (tag),
   KEY kv (value(35))
 ) ENGINE=MyISAM;
 
 CREATE TABLE IF NOT EXISTS bib23x (
   id mediumint(8) unsigned NOT NULL auto_increment,
   tag varchar(6) NOT NULL default '',
   value text NOT NULL,
   PRIMARY KEY  (id),
   KEY kt (tag),
   KEY kv (value(35))
 ) ENGINE=MyISAM;
 
 CREATE TABLE IF NOT EXISTS bib24x (
   id mediumint(8) unsigned NOT NULL auto_increment,
   tag varchar(6) NOT NULL default '',
   value text NOT NULL,
   PRIMARY KEY  (id),
   KEY kt (tag),
   KEY kv (value(35))
 ) ENGINE=MyISAM;
 
 CREATE TABLE IF NOT EXISTS bib25x (
   id mediumint(8) unsigned NOT NULL auto_increment,
   tag varchar(6) NOT NULL default '',
   value text NOT NULL,
   PRIMARY KEY  (id),
   KEY kt (tag),
   KEY kv (value(35))
 ) ENGINE=MyISAM;
 
 CREATE TABLE IF NOT EXISTS bib26x (
   id mediumint(8) unsigned NOT NULL auto_increment,
   tag varchar(6) NOT NULL default '',
   value text NOT NULL,
   PRIMARY KEY  (id),
   KEY kt (tag),
   KEY kv (value(35))
 ) ENGINE=MyISAM;
 
 CREATE TABLE IF NOT EXISTS bib27x (
   id mediumint(8) unsigned NOT NULL auto_increment,
   tag varchar(6) NOT NULL default '',
   value text NOT NULL,
   PRIMARY KEY  (id),
   KEY kt (tag),
   KEY kv (value(35))
 ) ENGINE=MyISAM;
 
 CREATE TABLE IF NOT EXISTS bib28x (
   id mediumint(8) unsigned NOT NULL auto_increment,
   tag varchar(6) NOT NULL default '',
   value text NOT NULL,
   PRIMARY KEY  (id),
   KEY kt (tag),
   KEY kv (value(35))
 ) ENGINE=MyISAM;
 
 CREATE TABLE IF NOT EXISTS bib29x (
   id mediumint(8) unsigned NOT NULL auto_increment,
   tag varchar(6) NOT NULL default '',
   value text NOT NULL,
   PRIMARY KEY  (id),
   KEY kt (tag),
   KEY kv (value(35))
 ) ENGINE=MyISAM;
 
 CREATE TABLE IF NOT EXISTS bib30x (
   id mediumint(8) unsigned NOT NULL auto_increment,
   tag varchar(6) NOT NULL default '',
   value text NOT NULL,
   PRIMARY KEY  (id),
   KEY kt (tag),
   KEY kv (value(35))
 ) ENGINE=MyISAM;
 
 CREATE TABLE IF NOT EXISTS bib31x (
   id mediumint(8) unsigned NOT NULL auto_increment,
   tag varchar(6) NOT NULL default '',
   value text NOT NULL,
   PRIMARY KEY  (id),
   KEY kt (tag),
   KEY kv (value(35))
 ) ENGINE=MyISAM;
 
 CREATE TABLE IF NOT EXISTS bib32x (
   id mediumint(8) unsigned NOT NULL auto_increment,
   tag varchar(6) NOT NULL default '',
   value text NOT NULL,
   PRIMARY KEY  (id),
   KEY kt (tag),
   KEY kv (value(35))
 ) ENGINE=MyISAM;
 
 CREATE TABLE IF NOT EXISTS bib33x (
   id mediumint(8) unsigned NOT NULL auto_increment,
   tag varchar(6) NOT NULL default '',
   value text NOT NULL,
   PRIMARY KEY  (id),
   KEY kt (tag),
   KEY kv (value(35))
 ) ENGINE=MyISAM;
 
 CREATE TABLE IF NOT EXISTS bib34x (
   id mediumint(8) unsigned NOT NULL auto_increment,
   tag varchar(6) NOT NULL default '',
   value text NOT NULL,
   PRIMARY KEY  (id),
   KEY kt (tag),
   KEY kv (value(35))
 ) ENGINE=MyISAM;
 
 CREATE TABLE IF NOT EXISTS bib35x (
   id mediumint(8) unsigned NOT NULL auto_increment,
   tag varchar(6) NOT NULL default '',
   value text NOT NULL,
   PRIMARY KEY  (id),
   KEY kt (tag),
   KEY kv (value(35))
 ) ENGINE=MyISAM;
 
 CREATE TABLE IF NOT EXISTS bib36x (
   id mediumint(8) unsigned NOT NULL auto_increment,
   tag varchar(6) NOT NULL default '',
   value text NOT NULL,
   PRIMARY KEY  (id),
   KEY kt (tag),
   KEY kv (value(35))
 ) ENGINE=MyISAM;
 
 CREATE TABLE IF NOT EXISTS bib37x (
   id mediumint(8) unsigned NOT NULL auto_increment,
   tag varchar(6) NOT NULL default '',
   value text NOT NULL,
   PRIMARY KEY  (id),
   KEY kt (tag),
   KEY kv (value(35))
 ) ENGINE=MyISAM;
 
 CREATE TABLE IF NOT EXISTS bib38x (
   id mediumint(8) unsigned NOT NULL auto_increment,
   tag varchar(6) NOT NULL default '',
   value text NOT NULL,
   PRIMARY KEY  (id),
   KEY kt (tag),
   KEY kv (value(35))
 ) ENGINE=MyISAM;
 
 CREATE TABLE IF NOT EXISTS bib39x (
   id mediumint(8) unsigned NOT NULL auto_increment,
   tag varchar(6) NOT NULL default '',
   value text NOT NULL,
   PRIMARY KEY  (id),
   KEY kt (tag),
   KEY kv (value(35))
 ) ENGINE=MyISAM;
 
 CREATE TABLE IF NOT EXISTS bib40x (
   id mediumint(8) unsigned NOT NULL auto_increment,
   tag varchar(6) NOT NULL default '',
   value text NOT NULL,
   PRIMARY KEY  (id),
   KEY kt (tag),
   KEY kv (value(35))
 ) ENGINE=MyISAM;
 
 CREATE TABLE IF NOT EXISTS bib41x (
   id mediumint(8) unsigned NOT NULL auto_increment,
   tag varchar(6) NOT NULL default '',
   value text NOT NULL,
   PRIMARY KEY  (id),
   KEY kt (tag),
   KEY kv (value(35))
 ) ENGINE=MyISAM;
 
 CREATE TABLE IF NOT EXISTS bib42x (
   id mediumint(8) unsigned NOT NULL auto_increment,
   tag varchar(6) NOT NULL default '',
   value text NOT NULL,
   PRIMARY KEY  (id),
   KEY kt (tag),
   KEY kv (value(35))
 ) ENGINE=MyISAM;
 
 CREATE TABLE IF NOT EXISTS bib43x (
   id mediumint(8) unsigned NOT NULL auto_increment,
   tag varchar(6) NOT NULL default '',
   value text NOT NULL,
   PRIMARY KEY  (id),
   KEY kt (tag),
   KEY kv (value(35))
 ) ENGINE=MyISAM;
 
 CREATE TABLE IF NOT EXISTS bib44x (
   id mediumint(8) unsigned NOT NULL auto_increment,
   tag varchar(6) NOT NULL default '',
   value text NOT NULL,
   PRIMARY KEY  (id),
   KEY kt (tag),
   KEY kv (value(35))
 ) ENGINE=MyISAM;
 
 CREATE TABLE IF NOT EXISTS bib45x (
   id mediumint(8) unsigned NOT NULL auto_increment,
   tag varchar(6) NOT NULL default '',
   value text NOT NULL,
   PRIMARY KEY  (id),
   KEY kt (tag),
   KEY kv (value(35))
 ) ENGINE=MyISAM;
 
 CREATE TABLE IF NOT EXISTS bib46x (
   id mediumint(8) unsigned NOT NULL auto_increment,
   tag varchar(6) NOT NULL default '',
   value text NOT NULL,
   PRIMARY KEY  (id),
   KEY kt (tag),
   KEY kv (value(35))
 ) ENGINE=MyISAM;
 
 CREATE TABLE IF NOT EXISTS bib47x (
   id mediumint(8) unsigned NOT NULL auto_increment,
   tag varchar(6) NOT NULL default '',
   value text NOT NULL,
   PRIMARY KEY  (id),
   KEY kt (tag),
   KEY kv (value(35))
 ) ENGINE=MyISAM;
 
 CREATE TABLE IF NOT EXISTS bib48x (
   id mediumint(8) unsigned NOT NULL auto_increment,
   tag varchar(6) NOT NULL default '',
   value text NOT NULL,
   PRIMARY KEY  (id),
   KEY kt (tag),
   KEY kv (value(35))
 ) ENGINE=MyISAM;
 
 CREATE TABLE IF NOT EXISTS bib49x (
   id mediumint(8) unsigned NOT NULL auto_increment,
   tag varchar(6) NOT NULL default '',
   value text NOT NULL,
   PRIMARY KEY  (id),
   KEY kt (tag),
   KEY kv (value(35))
 ) ENGINE=MyISAM;
 
 CREATE TABLE IF NOT EXISTS bib50x (
   id mediumint(8) unsigned NOT NULL auto_increment,
   tag varchar(6) NOT NULL default '',
   value text NOT NULL,
   PRIMARY KEY  (id),
   KEY kt (tag),
   KEY kv (value(35))
 ) ENGINE=MyISAM;
 
 CREATE TABLE IF NOT EXISTS bib51x (
   id mediumint(8) unsigned NOT NULL auto_increment,
   tag varchar(6) NOT NULL default '',
   value text NOT NULL,
   PRIMARY KEY  (id),
   KEY kt (tag),
   KEY kv (value(35))
 ) ENGINE=MyISAM;
 
 CREATE TABLE IF NOT EXISTS bib52x (
   id mediumint(8) unsigned NOT NULL auto_increment,
   tag varchar(6) NOT NULL default '',
   value text NOT NULL,
   PRIMARY KEY  (id),
   KEY kt (tag),
   KEY kv (value(35))
 ) ENGINE=MyISAM;
 
 CREATE TABLE IF NOT EXISTS bib53x (
   id mediumint(8) unsigned NOT NULL auto_increment,
   tag varchar(6) NOT NULL default '',
   value text NOT NULL,
   PRIMARY KEY  (id),
   KEY kt (tag),
   KEY kv (value(35))
 ) ENGINE=MyISAM;
 
 CREATE TABLE IF NOT EXISTS bib54x (
   id mediumint(8) unsigned NOT NULL auto_increment,
   tag varchar(6) NOT NULL default '',
   value text NOT NULL,
   PRIMARY KEY  (id),
   KEY kt (tag),
   KEY kv (value(35))
 ) ENGINE=MyISAM;
 
 CREATE TABLE IF NOT EXISTS bib55x (
   id mediumint(8) unsigned NOT NULL auto_increment,
   tag varchar(6) NOT NULL default '',
   value text NOT NULL,
   PRIMARY KEY  (id),
   KEY kt (tag),
   KEY kv (value(35))
 ) ENGINE=MyISAM;
 
 CREATE TABLE IF NOT EXISTS bib56x (
   id mediumint(8) unsigned NOT NULL auto_increment,
   tag varchar(6) NOT NULL default '',
   value text NOT NULL,
   PRIMARY KEY  (id),
   KEY kt (tag),
   KEY kv (value(35))
 ) ENGINE=MyISAM;
 
 CREATE TABLE IF NOT EXISTS bib57x (
   id mediumint(8) unsigned NOT NULL auto_increment,
   tag varchar(6) NOT NULL default '',
   value text NOT NULL,
   PRIMARY KEY  (id),
   KEY kt (tag),
   KEY kv (value(35))
 ) ENGINE=MyISAM;
 
 CREATE TABLE IF NOT EXISTS bib58x (
   id mediumint(8) unsigned NOT NULL auto_increment,
   tag varchar(6) NOT NULL default '',
   value text NOT NULL,
   PRIMARY KEY  (id),
   KEY kt (tag),
   KEY kv (value(35))
 ) ENGINE=MyISAM;
 
 CREATE TABLE IF NOT EXISTS bib59x (
   id mediumint(8) unsigned NOT NULL auto_increment,
   tag varchar(6) NOT NULL default '',
   value text NOT NULL,
   PRIMARY KEY  (id),
   KEY kt (tag),
   KEY kv (value(35))
 ) ENGINE=MyISAM;
 
 CREATE TABLE IF NOT EXISTS bib60x (
   id mediumint(8) unsigned NOT NULL auto_increment,
   tag varchar(6) NOT NULL default '',
   value text NOT NULL,
   PRIMARY KEY  (id),
   KEY kt (tag),
   KEY kv (value(35))
 ) ENGINE=MyISAM;
 
 CREATE TABLE IF NOT EXISTS bib61x (
   id mediumint(8) unsigned NOT NULL auto_increment,
   tag varchar(6) NOT NULL default '',
   value text NOT NULL,
   PRIMARY KEY  (id),
   KEY kt (tag),
   KEY kv (value(35))
 ) ENGINE=MyISAM;
 
 CREATE TABLE IF NOT EXISTS bib62x (
   id mediumint(8) unsigned NOT NULL auto_increment,
   tag varchar(6) NOT NULL default '',
   value text NOT NULL,
   PRIMARY KEY  (id),
   KEY kt (tag),
   KEY kv (value(35))
 ) ENGINE=MyISAM;
 
 CREATE TABLE IF NOT EXISTS bib63x (
   id mediumint(8) unsigned NOT NULL auto_increment,
   tag varchar(6) NOT NULL default '',
   value text NOT NULL,
   PRIMARY KEY  (id),
   KEY kt (tag),
   KEY kv (value(35))
 ) ENGINE=MyISAM;
 
 CREATE TABLE IF NOT EXISTS bib64x (
   id mediumint(8) unsigned NOT NULL auto_increment,
   tag varchar(6) NOT NULL default '',
   value text NOT NULL,
   PRIMARY KEY  (id),
   KEY kt (tag),
   KEY kv (value(35))
 ) ENGINE=MyISAM;
 
 CREATE TABLE IF NOT EXISTS bib65x (
   id mediumint(8) unsigned NOT NULL auto_increment,
   tag varchar(6) NOT NULL default '',
   value text NOT NULL,
   PRIMARY KEY  (id),
   KEY kt (tag),
   KEY kv (value(35))
 ) ENGINE=MyISAM;
 
 CREATE TABLE IF NOT EXISTS bib66x (
   id mediumint(8) unsigned NOT NULL auto_increment,
   tag varchar(6) NOT NULL default '',
   value text NOT NULL,
   PRIMARY KEY  (id),
   KEY kt (tag),
   KEY kv (value(35))
 ) ENGINE=MyISAM;
 
 CREATE TABLE IF NOT EXISTS bib67x (
   id mediumint(8) unsigned NOT NULL auto_increment,
   tag varchar(6) NOT NULL default '',
   value text NOT NULL,
   PRIMARY KEY  (id),
   KEY kt (tag),
   KEY kv (value(35))
 ) ENGINE=MyISAM;
 
 CREATE TABLE IF NOT EXISTS bib68x (
   id mediumint(8) unsigned NOT NULL auto_increment,
   tag varchar(6) NOT NULL default '',
   value text NOT NULL,
   PRIMARY KEY  (id),
   KEY kt (tag),
   KEY kv (value(35))
 ) ENGINE=MyISAM;
 
 CREATE TABLE IF NOT EXISTS bib69x (
   id mediumint(8) unsigned NOT NULL auto_increment,
   tag varchar(6) NOT NULL default '',
   value text NOT NULL,
   PRIMARY KEY  (id),
   KEY kt (tag),
   KEY kv (value(35))
 ) ENGINE=MyISAM;
 
 CREATE TABLE IF NOT EXISTS bib70x (
   id mediumint(8) unsigned NOT NULL auto_increment,
   tag varchar(6) NOT NULL default '',
   value text NOT NULL,
   PRIMARY KEY  (id),
   KEY kt (tag),
   KEY kv (value(35))
 ) ENGINE=MyISAM;
 
 CREATE TABLE IF NOT EXISTS bib71x (
   id mediumint(8) unsigned NOT NULL auto_increment,
   tag varchar(6) NOT NULL default '',
   value text NOT NULL,
   PRIMARY KEY  (id),
   KEY kt (tag),
   KEY kv (value(35))
 ) ENGINE=MyISAM;
 
 CREATE TABLE IF NOT EXISTS bib72x (
   id mediumint(8) unsigned NOT NULL auto_increment,
   tag varchar(6) NOT NULL default '',
   value text NOT NULL,
   PRIMARY KEY  (id),
   KEY kt (tag),
   KEY kv (value(35))
 ) ENGINE=MyISAM;
 
 CREATE TABLE IF NOT EXISTS bib73x (
   id mediumint(8) unsigned NOT NULL auto_increment,
   tag varchar(6) NOT NULL default '',
   value text NOT NULL,
   PRIMARY KEY  (id),
   KEY kt (tag),
   KEY kv (value(35))
 ) ENGINE=MyISAM;
 
 CREATE TABLE IF NOT EXISTS bib74x (
   id mediumint(8) unsigned NOT NULL auto_increment,
   tag varchar(6) NOT NULL default '',
   value text NOT NULL,
   PRIMARY KEY  (id),
   KEY kt (tag),
   KEY kv (value(35))
 ) ENGINE=MyISAM;
 
 CREATE TABLE IF NOT EXISTS bib75x (
   id mediumint(8) unsigned NOT NULL auto_increment,
   tag varchar(6) NOT NULL default '',
   value text NOT NULL,
   PRIMARY KEY  (id),
   KEY kt (tag),
   KEY kv (value(35))
 ) ENGINE=MyISAM;
 
 CREATE TABLE IF NOT EXISTS bib76x (
   id mediumint(8) unsigned NOT NULL auto_increment,
   tag varchar(6) NOT NULL default '',
   value text NOT NULL,
   PRIMARY KEY  (id),
   KEY kt (tag),
   KEY kv (value(35))
 ) ENGINE=MyISAM;
 
 CREATE TABLE IF NOT EXISTS bib77x (
   id mediumint(8) unsigned NOT NULL auto_increment,
   tag varchar(6) NOT NULL default '',
   value text NOT NULL,
   PRIMARY KEY  (id),
   KEY kt (tag),
   KEY kv (value(35))
 ) ENGINE=MyISAM;
 
 CREATE TABLE IF NOT EXISTS bib78x (
   id mediumint(8) unsigned NOT NULL auto_increment,
   tag varchar(6) NOT NULL default '',
   value text NOT NULL,
   PRIMARY KEY  (id),
   KEY kt (tag),
   KEY kv (value(35))
 ) ENGINE=MyISAM;
 
 CREATE TABLE IF NOT EXISTS bib79x (
   id mediumint(8) unsigned NOT NULL auto_increment,
   tag varchar(6) NOT NULL default '',
   value text NOT NULL,
   PRIMARY KEY  (id),
   KEY kt (tag),
   KEY kv (value(35))
 ) ENGINE=MyISAM;
 
 CREATE TABLE IF NOT EXISTS bib80x (
   id mediumint(8) unsigned NOT NULL auto_increment,
   tag varchar(6) NOT NULL default '',
   value text NOT NULL,
   PRIMARY KEY  (id),
   KEY kt (tag),
   KEY kv (value(35))
 ) ENGINE=MyISAM;
 
 CREATE TABLE IF NOT EXISTS bib81x (
   id mediumint(8) unsigned NOT NULL auto_increment,
   tag varchar(6) NOT NULL default '',
   value text NOT NULL,
   PRIMARY KEY  (id),
   KEY kt (tag),
   KEY kv (value(35))
 ) ENGINE=MyISAM;
 
 CREATE TABLE IF NOT EXISTS bib82x (
   id mediumint(8) unsigned NOT NULL auto_increment,
   tag varchar(6) NOT NULL default '',
   value text NOT NULL,
   PRIMARY KEY  (id),
   KEY kt (tag),
   KEY kv (value(35))
 ) ENGINE=MyISAM;
 
 CREATE TABLE IF NOT EXISTS bib83x (
   id mediumint(8) unsigned NOT NULL auto_increment,
   tag varchar(6) NOT NULL default '',
   value text NOT NULL,
   PRIMARY KEY  (id),
   KEY kt (tag),
   KEY kv (value(35))
 ) ENGINE=MyISAM;
 
 CREATE TABLE IF NOT EXISTS bib84x (
   id mediumint(8) unsigned NOT NULL auto_increment,
   tag varchar(6) NOT NULL default '',
   value text NOT NULL,
   PRIMARY KEY  (id),
   KEY kt (tag),
   KEY kv (value(35))
 ) ENGINE=MyISAM;
 
 CREATE TABLE IF NOT EXISTS bib85x (
   id mediumint(8) unsigned NOT NULL auto_increment,
   tag varchar(6) NOT NULL default '',
   value text NOT NULL,
   PRIMARY KEY  (id),
   KEY kt (tag),
   KEY kv (value(100)) -- URLs need usually a larger index for speedy lookups
 ) ENGINE=MyISAM;
 
 CREATE TABLE IF NOT EXISTS bib86x (
   id mediumint(8) unsigned NOT NULL auto_increment,
   tag varchar(6) NOT NULL default '',
   value text NOT NULL,
   PRIMARY KEY  (id),
   KEY kt (tag),
   KEY kv (value(35))
 ) ENGINE=MyISAM;
 
 CREATE TABLE IF NOT EXISTS bib87x (
   id mediumint(8) unsigned NOT NULL auto_increment,
   tag varchar(6) NOT NULL default '',
   value text NOT NULL,
   PRIMARY KEY  (id),
   KEY kt (tag),
   KEY kv (value(35))
 ) ENGINE=MyISAM;
 
 CREATE TABLE IF NOT EXISTS bib88x (
   id mediumint(8) unsigned NOT NULL auto_increment,
   tag varchar(6) NOT NULL default '',
   value text NOT NULL,
   PRIMARY KEY  (id),
   KEY kt (tag),
   KEY kv (value(35))
 ) ENGINE=MyISAM;
 
 CREATE TABLE IF NOT EXISTS bib89x (
   id mediumint(8) unsigned NOT NULL auto_increment,
   tag varchar(6) NOT NULL default '',
   value text NOT NULL,
   PRIMARY KEY  (id),
   KEY kt (tag),
   KEY kv (value(35))
 ) ENGINE=MyISAM;
 
 CREATE TABLE IF NOT EXISTS bib90x (
   id mediumint(8) unsigned NOT NULL auto_increment,
   tag varchar(6) NOT NULL default '',
   value text NOT NULL,
   PRIMARY KEY  (id),
   KEY kt (tag),
   KEY kv (value(35))
 ) ENGINE=MyISAM;
 
 CREATE TABLE IF NOT EXISTS bib91x (
   id mediumint(8) unsigned NOT NULL auto_increment,
   tag varchar(6) NOT NULL default '',
   value text NOT NULL,
   PRIMARY KEY  (id),
   KEY kt (tag),
   KEY kv (value(35))
 ) ENGINE=MyISAM;
 
 CREATE TABLE IF NOT EXISTS bib92x (
   id mediumint(8) unsigned NOT NULL auto_increment,
   tag varchar(6) NOT NULL default '',
   value text NOT NULL,
   PRIMARY KEY  (id),
   KEY kt (tag),
   KEY kv (value(35))
 ) ENGINE=MyISAM;
 
 CREATE TABLE IF NOT EXISTS bib93x (
   id mediumint(8) unsigned NOT NULL auto_increment,
   tag varchar(6) NOT NULL default '',
   value text NOT NULL,
   PRIMARY KEY  (id),
   KEY kt (tag),
   KEY kv (value(35))
 ) ENGINE=MyISAM;
 
 CREATE TABLE IF NOT EXISTS bib94x (
   id mediumint(8) unsigned NOT NULL auto_increment,
   tag varchar(6) NOT NULL default '',
   value text NOT NULL,
   PRIMARY KEY  (id),
   KEY kt (tag),
   KEY kv (value(35))
 ) ENGINE=MyISAM;
 
 CREATE TABLE IF NOT EXISTS bib95x (
   id mediumint(8) unsigned NOT NULL auto_increment,
   tag varchar(6) NOT NULL default '',
   value text NOT NULL,
   PRIMARY KEY  (id),
   KEY kt (tag),
   KEY kv (value(35))
 ) ENGINE=MyISAM;
 
 CREATE TABLE IF NOT EXISTS bib96x (
   id mediumint(8) unsigned NOT NULL auto_increment,
   tag varchar(6) NOT NULL default '',
   value text NOT NULL,
   PRIMARY KEY  (id),
   KEY kt (tag),
   KEY kv (value(35))
 ) ENGINE=MyISAM;
 
 CREATE TABLE IF NOT EXISTS bib97x (
   id mediumint(8) unsigned NOT NULL auto_increment,
   tag varchar(6) NOT NULL default '',
   value text NOT NULL,
   PRIMARY KEY  (id),
   KEY kt (tag),
   KEY kv (value(35))
 ) ENGINE=MyISAM;
 
 CREATE TABLE IF NOT EXISTS bib98x (
   id mediumint(8) unsigned NOT NULL auto_increment,
   tag varchar(6) NOT NULL default '',
   value text NOT NULL,
   PRIMARY KEY  (id),
   KEY kt (tag),
   KEY kv (value(35))
 ) ENGINE=MyISAM;
 
 CREATE TABLE IF NOT EXISTS bib99x (
   id mediumint(8) unsigned NOT NULL auto_increment,
   tag varchar(6) NOT NULL default '',
   value text NOT NULL,
   PRIMARY KEY  (id),
   KEY kt (tag),
   KEY kv (value(35))
 ) ENGINE=MyISAM;
 
 CREATE TABLE IF NOT EXISTS bibrec_bib00x (
   id_bibrec mediumint(8) unsigned NOT NULL default '0',
   id_bibxxx mediumint(8) unsigned NOT NULL default '0',
   field_number smallint(5) unsigned default NULL,
   KEY id_bibxxx (id_bibxxx),
   KEY id_bibrec (id_bibrec)
 ) ENGINE=MyISAM;
 
 CREATE TABLE IF NOT EXISTS bibrec_bib01x (
   id_bibrec mediumint(8) unsigned NOT NULL default '0',
   id_bibxxx mediumint(8) unsigned NOT NULL default '0',
   field_number smallint(5) unsigned default NULL,
   KEY id_bibxxx (id_bibxxx),
   KEY id_bibrec (id_bibrec)
 ) ENGINE=MyISAM;
 
 CREATE TABLE IF NOT EXISTS bibrec_bib02x (
   id_bibrec mediumint(8) unsigned NOT NULL default '0',
   id_bibxxx mediumint(8) unsigned NOT NULL default '0',
   field_number smallint(5) unsigned default NULL,
   KEY id_bibxxx (id_bibxxx),
   KEY id_bibrec (id_bibrec)
 ) ENGINE=MyISAM;
 
 CREATE TABLE IF NOT EXISTS bibrec_bib03x (
   id_bibrec mediumint(8) unsigned NOT NULL default '0',
   id_bibxxx mediumint(8) unsigned NOT NULL default '0',
   field_number smallint(5) unsigned default NULL,
   KEY id_bibxxx (id_bibxxx),
   KEY id_bibrec (id_bibrec)
 ) ENGINE=MyISAM;
 
 CREATE TABLE IF NOT EXISTS bibrec_bib04x (
   id_bibrec mediumint(8) unsigned NOT NULL default '0',
   id_bibxxx mediumint(8) unsigned NOT NULL default '0',
   field_number smallint(5) unsigned default NULL,
   KEY id_bibxxx (id_bibxxx),
   KEY id_bibrec (id_bibrec)
 ) ENGINE=MyISAM;
 
 CREATE TABLE IF NOT EXISTS bibrec_bib05x (
   id_bibrec mediumint(8) unsigned NOT NULL default '0',
   id_bibxxx mediumint(8) unsigned NOT NULL default '0',
   field_number smallint(5) unsigned default NULL,
   KEY id_bibxxx (id_bibxxx),
   KEY id_bibrec (id_bibrec)
 ) ENGINE=MyISAM;
 
 CREATE TABLE IF NOT EXISTS bibrec_bib06x (
   id_bibrec mediumint(8) unsigned NOT NULL default '0',
   id_bibxxx mediumint(8) unsigned NOT NULL default '0',
   field_number smallint(5) unsigned default NULL,
   KEY id_bibxxx (id_bibxxx),
   KEY id_bibrec (id_bibrec)
 ) ENGINE=MyISAM;
 
 CREATE TABLE IF NOT EXISTS bibrec_bib07x (
   id_bibrec mediumint(8) unsigned NOT NULL default '0',
   id_bibxxx mediumint(8) unsigned NOT NULL default '0',
   field_number smallint(5) unsigned default NULL,
   KEY id_bibxxx (id_bibxxx),
   KEY id_bibrec (id_bibrec)
 ) ENGINE=MyISAM;
 
 CREATE TABLE IF NOT EXISTS bibrec_bib08x (
   id_bibrec mediumint(8) unsigned NOT NULL default '0',
   id_bibxxx mediumint(8) unsigned NOT NULL default '0',
   field_number smallint(5) unsigned default NULL,
   KEY id_bibxxx (id_bibxxx),
   KEY id_bibrec (id_bibrec)
 ) ENGINE=MyISAM;
 
 CREATE TABLE IF NOT EXISTS bibrec_bib09x (
   id_bibrec mediumint(8) unsigned NOT NULL default '0',
   id_bibxxx mediumint(8) unsigned NOT NULL default '0',
   field_number smallint(5) unsigned default NULL,
   KEY id_bibxxx (id_bibxxx),
   KEY id_bibrec (id_bibrec)
 ) ENGINE=MyISAM;
 
 CREATE TABLE IF NOT EXISTS bibrec_bib10x (
   id_bibrec mediumint(8) unsigned NOT NULL default '0',
   id_bibxxx mediumint(8) unsigned NOT NULL default '0',
   field_number smallint(5) unsigned default NULL,
   KEY id_bibxxx (id_bibxxx),
   KEY id_bibrec (id_bibrec)
 ) ENGINE=MyISAM;
 
 CREATE TABLE IF NOT EXISTS bibrec_bib11x (
   id_bibrec mediumint(8) unsigned NOT NULL default '0',
   id_bibxxx mediumint(8) unsigned NOT NULL default '0',
   field_number smallint(5) unsigned default NULL,
   KEY id_bibxxx (id_bibxxx),
   KEY id_bibrec (id_bibrec)
 ) ENGINE=MyISAM;
 
 CREATE TABLE IF NOT EXISTS bibrec_bib12x (
   id_bibrec mediumint(8) unsigned NOT NULL default '0',
   id_bibxxx mediumint(8) unsigned NOT NULL default '0',
   field_number smallint(5) unsigned default NULL,
   KEY id_bibxxx (id_bibxxx),
   KEY id_bibrec (id_bibrec)
 ) ENGINE=MyISAM;
 
 CREATE TABLE IF NOT EXISTS bibrec_bib13x (
   id_bibrec mediumint(8) unsigned NOT NULL default '0',
   id_bibxxx mediumint(8) unsigned NOT NULL default '0',
   field_number smallint(5) unsigned default NULL,
   KEY id_bibxxx (id_bibxxx),
   KEY id_bibrec (id_bibrec)
 ) ENGINE=MyISAM;
 
 CREATE TABLE IF NOT EXISTS bibrec_bib14x (
   id_bibrec mediumint(8) unsigned NOT NULL default '0',
   id_bibxxx mediumint(8) unsigned NOT NULL default '0',
   field_number smallint(5) unsigned default NULL,
   KEY id_bibxxx (id_bibxxx),
   KEY id_bibrec (id_bibrec)
 ) ENGINE=MyISAM;
 
 CREATE TABLE IF NOT EXISTS bibrec_bib15x (
   id_bibrec mediumint(8) unsigned NOT NULL default '0',
   id_bibxxx mediumint(8) unsigned NOT NULL default '0',
   field_number smallint(5) unsigned default NULL,
   KEY id_bibxxx (id_bibxxx),
   KEY id_bibrec (id_bibrec)
 ) ENGINE=MyISAM;
 
 CREATE TABLE IF NOT EXISTS bibrec_bib16x (
   id_bibrec mediumint(8) unsigned NOT NULL default '0',
   id_bibxxx mediumint(8) unsigned NOT NULL default '0',
   field_number smallint(5) unsigned default NULL,
   KEY id_bibxxx (id_bibxxx),
   KEY id_bibrec (id_bibrec)
 ) ENGINE=MyISAM;
 
 CREATE TABLE IF NOT EXISTS bibrec_bib17x (
   id_bibrec mediumint(8) unsigned NOT NULL default '0',
   id_bibxxx mediumint(8) unsigned NOT NULL default '0',
   field_number smallint(5) unsigned default NULL,
   KEY id_bibxxx (id_bibxxx),
   KEY id_bibrec (id_bibrec)
 ) ENGINE=MyISAM;
 
 CREATE TABLE IF NOT EXISTS bibrec_bib18x (
   id_bibrec mediumint(8) unsigned NOT NULL default '0',
   id_bibxxx mediumint(8) unsigned NOT NULL default '0',
   field_number smallint(5) unsigned default NULL,
   KEY id_bibxxx (id_bibxxx),
   KEY id_bibrec (id_bibrec)
 ) ENGINE=MyISAM;
 
 CREATE TABLE IF NOT EXISTS bibrec_bib19x (
   id_bibrec mediumint(8) unsigned NOT NULL default '0',
   id_bibxxx mediumint(8) unsigned NOT NULL default '0',
   field_number smallint(5) unsigned default NULL,
   KEY id_bibxxx (id_bibxxx),
   KEY id_bibrec (id_bibrec)
 ) ENGINE=MyISAM;
 
 CREATE TABLE IF NOT EXISTS bibrec_bib20x (
   id_bibrec mediumint(8) unsigned NOT NULL default '0',
   id_bibxxx mediumint(8) unsigned NOT NULL default '0',
   field_number smallint(5) unsigned default NULL,
   KEY id_bibxxx (id_bibxxx),
   KEY id_bibrec (id_bibrec)
 ) ENGINE=MyISAM;
 
 CREATE TABLE IF NOT EXISTS bibrec_bib21x (
   id_bibrec mediumint(8) unsigned NOT NULL default '0',
   id_bibxxx mediumint(8) unsigned NOT NULL default '0',
   field_number smallint(5) unsigned default NULL,
   KEY id_bibxxx (id_bibxxx),
   KEY id_bibrec (id_bibrec)
 ) ENGINE=MyISAM;
 
 CREATE TABLE IF NOT EXISTS bibrec_bib22x (
   id_bibrec mediumint(8) unsigned NOT NULL default '0',
   id_bibxxx mediumint(8) unsigned NOT NULL default '0',
   field_number smallint(5) unsigned default NULL,
   KEY id_bibxxx (id_bibxxx),
   KEY id_bibrec (id_bibrec)
 ) ENGINE=MyISAM;
 
 CREATE TABLE IF NOT EXISTS bibrec_bib23x (
   id_bibrec mediumint(8) unsigned NOT NULL default '0',
   id_bibxxx mediumint(8) unsigned NOT NULL default '0',
   field_number smallint(5) unsigned default NULL,
   KEY id_bibxxx (id_bibxxx),
   KEY id_bibrec (id_bibrec)
 ) ENGINE=MyISAM;
 
 CREATE TABLE IF NOT EXISTS bibrec_bib24x (
   id_bibrec mediumint(8) unsigned NOT NULL default '0',
   id_bibxxx mediumint(8) unsigned NOT NULL default '0',
   field_number smallint(5) unsigned default NULL,
   KEY id_bibxxx (id_bibxxx),
   KEY id_bibrec (id_bibrec)
 ) ENGINE=MyISAM;
 
 CREATE TABLE IF NOT EXISTS bibrec_bib25x (
   id_bibrec mediumint(8) unsigned NOT NULL default '0',
   id_bibxxx mediumint(8) unsigned NOT NULL default '0',
   field_number smallint(5) unsigned default NULL,
   KEY id_bibxxx (id_bibxxx),
   KEY id_bibrec (id_bibrec)
 ) ENGINE=MyISAM;
 
 CREATE TABLE IF NOT EXISTS bibrec_bib26x (
   id_bibrec mediumint(8) unsigned NOT NULL default '0',
   id_bibxxx mediumint(8) unsigned NOT NULL default '0',
   field_number smallint(5) unsigned default NULL,
   KEY id_bibxxx (id_bibxxx),
   KEY id_bibrec (id_bibrec)
 ) ENGINE=MyISAM;
 
 CREATE TABLE IF NOT EXISTS bibrec_bib27x (
   id_bibrec mediumint(8) unsigned NOT NULL default '0',
   id_bibxxx mediumint(8) unsigned NOT NULL default '0',
   field_number smallint(5) unsigned default NULL,
   KEY id_bibxxx (id_bibxxx),
   KEY id_bibrec (id_bibrec)
 ) ENGINE=MyISAM;
 
 CREATE TABLE IF NOT EXISTS bibrec_bib28x (
   id_bibrec mediumint(8) unsigned NOT NULL default '0',
   id_bibxxx mediumint(8) unsigned NOT NULL default '0',
   field_number smallint(5) unsigned default NULL,
   KEY id_bibxxx (id_bibxxx),
   KEY id_bibrec (id_bibrec)
 ) ENGINE=MyISAM;
 
 CREATE TABLE IF NOT EXISTS bibrec_bib29x (
   id_bibrec mediumint(8) unsigned NOT NULL default '0',
   id_bibxxx mediumint(8) unsigned NOT NULL default '0',
   field_number smallint(5) unsigned default NULL,
   KEY id_bibxxx (id_bibxxx),
   KEY id_bibrec (id_bibrec)
 ) ENGINE=MyISAM;
 
 CREATE TABLE IF NOT EXISTS bibrec_bib30x (
   id_bibrec mediumint(8) unsigned NOT NULL default '0',
   id_bibxxx mediumint(8) unsigned NOT NULL default '0',
   field_number smallint(5) unsigned default NULL,
   KEY id_bibxxx (id_bibxxx),
   KEY id_bibrec (id_bibrec)
 ) ENGINE=MyISAM;
 
 CREATE TABLE IF NOT EXISTS bibrec_bib31x (
   id_bibrec mediumint(8) unsigned NOT NULL default '0',
   id_bibxxx mediumint(8) unsigned NOT NULL default '0',
   field_number smallint(5) unsigned default NULL,
   KEY id_bibxxx (id_bibxxx),
   KEY id_bibrec (id_bibrec)
 ) ENGINE=MyISAM;
 
 CREATE TABLE IF NOT EXISTS bibrec_bib32x (
   id_bibrec mediumint(8) unsigned NOT NULL default '0',
   id_bibxxx mediumint(8) unsigned NOT NULL default '0',
   field_number smallint(5) unsigned default NULL,
   KEY id_bibxxx (id_bibxxx),
   KEY id_bibrec (id_bibrec)
 ) ENGINE=MyISAM;
 
 CREATE TABLE IF NOT EXISTS bibrec_bib33x (
   id_bibrec mediumint(8) unsigned NOT NULL default '0',
   id_bibxxx mediumint(8) unsigned NOT NULL default '0',
   field_number smallint(5) unsigned default NULL,
   KEY id_bibxxx (id_bibxxx),
   KEY id_bibrec (id_bibrec)
 ) ENGINE=MyISAM;
 
 CREATE TABLE IF NOT EXISTS bibrec_bib34x (
   id_bibrec mediumint(8) unsigned NOT NULL default '0',
   id_bibxxx mediumint(8) unsigned NOT NULL default '0',
   field_number smallint(5) unsigned default NULL,
   KEY id_bibxxx (id_bibxxx),
   KEY id_bibrec (id_bibrec)
 ) ENGINE=MyISAM;
 
 CREATE TABLE IF NOT EXISTS bibrec_bib35x (
   id_bibrec mediumint(8) unsigned NOT NULL default '0',
   id_bibxxx mediumint(8) unsigned NOT NULL default '0',
   field_number smallint(5) unsigned default NULL,
   KEY id_bibxxx (id_bibxxx),
   KEY id_bibrec (id_bibrec)
 ) ENGINE=MyISAM;
 
 CREATE TABLE IF NOT EXISTS bibrec_bib36x (
   id_bibrec mediumint(8) unsigned NOT NULL default '0',
   id_bibxxx mediumint(8) unsigned NOT NULL default '0',
   field_number smallint(5) unsigned default NULL,
   KEY id_bibxxx (id_bibxxx),
   KEY id_bibrec (id_bibrec)
 ) ENGINE=MyISAM;
 
 CREATE TABLE IF NOT EXISTS bibrec_bib37x (
   id_bibrec mediumint(8) unsigned NOT NULL default '0',
   id_bibxxx mediumint(8) unsigned NOT NULL default '0',
   field_number smallint(5) unsigned default NULL,
   KEY id_bibxxx (id_bibxxx),
   KEY id_bibrec (id_bibrec)
 ) ENGINE=MyISAM;
 
 CREATE TABLE IF NOT EXISTS bibrec_bib38x (
   id_bibrec mediumint(8) unsigned NOT NULL default '0',
   id_bibxxx mediumint(8) unsigned NOT NULL default '0',
   field_number smallint(5) unsigned default NULL,
   KEY id_bibxxx (id_bibxxx),
   KEY id_bibrec (id_bibrec)
 ) ENGINE=MyISAM;
 
 CREATE TABLE IF NOT EXISTS bibrec_bib39x (
   id_bibrec mediumint(8) unsigned NOT NULL default '0',
   id_bibxxx mediumint(8) unsigned NOT NULL default '0',
   field_number smallint(5) unsigned default NULL,
   KEY id_bibxxx (id_bibxxx),
   KEY id_bibrec (id_bibrec)
 ) ENGINE=MyISAM;
 
 CREATE TABLE IF NOT EXISTS bibrec_bib40x (
   id_bibrec mediumint(8) unsigned NOT NULL default '0',
   id_bibxxx mediumint(8) unsigned NOT NULL default '0',
   field_number smallint(5) unsigned default NULL,
   KEY id_bibxxx (id_bibxxx),
   KEY id_bibrec (id_bibrec)
 ) ENGINE=MyISAM;
 
 CREATE TABLE IF NOT EXISTS bibrec_bib41x (
   id_bibrec mediumint(8) unsigned NOT NULL default '0',
   id_bibxxx mediumint(8) unsigned NOT NULL default '0',
   field_number smallint(5) unsigned default NULL,
   KEY id_bibxxx (id_bibxxx),
   KEY id_bibrec (id_bibrec)
 ) ENGINE=MyISAM;
 
 CREATE TABLE IF NOT EXISTS bibrec_bib42x (
   id_bibrec mediumint(8) unsigned NOT NULL default '0',
   id_bibxxx mediumint(8) unsigned NOT NULL default '0',
   field_number smallint(5) unsigned default NULL,
   KEY id_bibxxx (id_bibxxx),
   KEY id_bibrec (id_bibrec)
 ) ENGINE=MyISAM;
 
 CREATE TABLE IF NOT EXISTS bibrec_bib43x (
   id_bibrec mediumint(8) unsigned NOT NULL default '0',
   id_bibxxx mediumint(8) unsigned NOT NULL default '0',
   field_number smallint(5) unsigned default NULL,
   KEY id_bibxxx (id_bibxxx),
   KEY id_bibrec (id_bibrec)
 ) ENGINE=MyISAM;
 
 CREATE TABLE IF NOT EXISTS bibrec_bib44x (
   id_bibrec mediumint(8) unsigned NOT NULL default '0',
   id_bibxxx mediumint(8) unsigned NOT NULL default '0',
   field_number smallint(5) unsigned default NULL,
   KEY id_bibxxx (id_bibxxx),
   KEY id_bibrec (id_bibrec)
 ) ENGINE=MyISAM;
 
 CREATE TABLE IF NOT EXISTS bibrec_bib45x (
   id_bibrec mediumint(8) unsigned NOT NULL default '0',
   id_bibxxx mediumint(8) unsigned NOT NULL default '0',
   field_number smallint(5) unsigned default NULL,
   KEY id_bibxxx (id_bibxxx),
   KEY id_bibrec (id_bibrec)
 ) ENGINE=MyISAM;
 
 CREATE TABLE IF NOT EXISTS bibrec_bib46x (
   id_bibrec mediumint(8) unsigned NOT NULL default '0',
   id_bibxxx mediumint(8) unsigned NOT NULL default '0',
   field_number smallint(5) unsigned default NULL,
   KEY id_bibxxx (id_bibxxx),
   KEY id_bibrec (id_bibrec)
 ) ENGINE=MyISAM;
 
 CREATE TABLE IF NOT EXISTS bibrec_bib47x (
   id_bibrec mediumint(8) unsigned NOT NULL default '0',
   id_bibxxx mediumint(8) unsigned NOT NULL default '0',
   field_number smallint(5) unsigned default NULL,
   KEY id_bibxxx (id_bibxxx),
   KEY id_bibrec (id_bibrec)
 ) ENGINE=MyISAM;
 
 CREATE TABLE IF NOT EXISTS bibrec_bib48x (
   id_bibrec mediumint(8) unsigned NOT NULL default '0',
   id_bibxxx mediumint(8) unsigned NOT NULL default '0',
   field_number smallint(5) unsigned default NULL,
   KEY id_bibxxx (id_bibxxx),
   KEY id_bibrec (id_bibrec)
 ) ENGINE=MyISAM;
 
 CREATE TABLE IF NOT EXISTS bibrec_bib49x (
   id_bibrec mediumint(8) unsigned NOT NULL default '0',
   id_bibxxx mediumint(8) unsigned NOT NULL default '0',
   field_number smallint(5) unsigned default NULL,
   KEY id_bibxxx (id_bibxxx),
   KEY id_bibrec (id_bibrec)
 ) ENGINE=MyISAM;
 
 CREATE TABLE IF NOT EXISTS bibrec_bib50x (
   id_bibrec mediumint(8) unsigned NOT NULL default '0',
   id_bibxxx mediumint(8) unsigned NOT NULL default '0',
   field_number smallint(5) unsigned default NULL,
   KEY id_bibxxx (id_bibxxx),
   KEY id_bibrec (id_bibrec)
 ) ENGINE=MyISAM;
 
 CREATE TABLE IF NOT EXISTS bibrec_bib51x (
   id_bibrec mediumint(8) unsigned NOT NULL default '0',
   id_bibxxx mediumint(8) unsigned NOT NULL default '0',
   field_number smallint(5) unsigned default NULL,
   KEY id_bibxxx (id_bibxxx),
   KEY id_bibrec (id_bibrec)
 ) ENGINE=MyISAM;
 
 CREATE TABLE IF NOT EXISTS bibrec_bib52x (
   id_bibrec mediumint(8) unsigned NOT NULL default '0',
   id_bibxxx mediumint(8) unsigned NOT NULL default '0',
   field_number smallint(5) unsigned default NULL,
   KEY id_bibxxx (id_bibxxx),
   KEY id_bibrec (id_bibrec)
 ) ENGINE=MyISAM;
 
 CREATE TABLE IF NOT EXISTS bibrec_bib53x (
   id_bibrec mediumint(8) unsigned NOT NULL default '0',
   id_bibxxx mediumint(8) unsigned NOT NULL default '0',
   field_number smallint(5) unsigned default NULL,
   KEY id_bibxxx (id_bibxxx),
   KEY id_bibrec (id_bibrec)
 ) ENGINE=MyISAM;
 
 CREATE TABLE IF NOT EXISTS bibrec_bib54x (
   id_bibrec mediumint(8) unsigned NOT NULL default '0',
   id_bibxxx mediumint(8) unsigned NOT NULL default '0',
   field_number smallint(5) unsigned default NULL,
   KEY id_bibxxx (id_bibxxx),
   KEY id_bibrec (id_bibrec)
 ) ENGINE=MyISAM;
 
 CREATE TABLE IF NOT EXISTS bibrec_bib55x (
   id_bibrec mediumint(8) unsigned NOT NULL default '0',
   id_bibxxx mediumint(8) unsigned NOT NULL default '0',
   field_number smallint(5) unsigned default NULL,
   KEY id_bibxxx (id_bibxxx),
   KEY id_bibrec (id_bibrec)
 ) ENGINE=MyISAM;
 
 CREATE TABLE IF NOT EXISTS bibrec_bib56x (
   id_bibrec mediumint(8) unsigned NOT NULL default '0',
   id_bibxxx mediumint(8) unsigned NOT NULL default '0',
   field_number smallint(5) unsigned default NULL,
   KEY id_bibxxx (id_bibxxx),
   KEY id_bibrec (id_bibrec)
 ) ENGINE=MyISAM;
 
 CREATE TABLE IF NOT EXISTS bibrec_bib57x (
   id_bibrec mediumint(8) unsigned NOT NULL default '0',
   id_bibxxx mediumint(8) unsigned NOT NULL default '0',
   field_number smallint(5) unsigned default NULL,
   KEY id_bibxxx (id_bibxxx),
   KEY id_bibrec (id_bibrec)
 ) ENGINE=MyISAM;
 
 CREATE TABLE IF NOT EXISTS bibrec_bib58x (
   id_bibrec mediumint(8) unsigned NOT NULL default '0',
   id_bibxxx mediumint(8) unsigned NOT NULL default '0',
   field_number smallint(5) unsigned default NULL,
   KEY id_bibxxx (id_bibxxx),
   KEY id_bibrec (id_bibrec)
 ) ENGINE=MyISAM;
 
 CREATE TABLE IF NOT EXISTS bibrec_bib59x (
   id_bibrec mediumint(8) unsigned NOT NULL default '0',
   id_bibxxx mediumint(8) unsigned NOT NULL default '0',
   field_number smallint(5) unsigned default NULL,
   KEY id_bibxxx (id_bibxxx),
   KEY id_bibrec (id_bibrec)
 ) ENGINE=MyISAM;
 
 CREATE TABLE IF NOT EXISTS bibrec_bib60x (
   id_bibrec mediumint(8) unsigned NOT NULL default '0',
   id_bibxxx mediumint(8) unsigned NOT NULL default '0',
   field_number smallint(5) unsigned default NULL,
   KEY id_bibxxx (id_bibxxx),
   KEY id_bibrec (id_bibrec)
 ) ENGINE=MyISAM;
 
 CREATE TABLE IF NOT EXISTS bibrec_bib61x (
   id_bibrec mediumint(8) unsigned NOT NULL default '0',
   id_bibxxx mediumint(8) unsigned NOT NULL default '0',
   field_number smallint(5) unsigned default NULL,
   KEY id_bibxxx (id_bibxxx),
   KEY id_bibrec (id_bibrec)
 ) ENGINE=MyISAM;
 
 CREATE TABLE IF NOT EXISTS bibrec_bib62x (
   id_bibrec mediumint(8) unsigned NOT NULL default '0',
   id_bibxxx mediumint(8) unsigned NOT NULL default '0',
   field_number smallint(5) unsigned default NULL,
   KEY id_bibxxx (id_bibxxx),
   KEY id_bibrec (id_bibrec)
 ) ENGINE=MyISAM;
 
 CREATE TABLE IF NOT EXISTS bibrec_bib63x (
   id_bibrec mediumint(8) unsigned NOT NULL default '0',
   id_bibxxx mediumint(8) unsigned NOT NULL default '0',
   field_number smallint(5) unsigned default NULL,
   KEY id_bibxxx (id_bibxxx),
   KEY id_bibrec (id_bibrec)
 ) ENGINE=MyISAM;
 
 CREATE TABLE IF NOT EXISTS bibrec_bib64x (
   id_bibrec mediumint(8) unsigned NOT NULL default '0',
   id_bibxxx mediumint(8) unsigned NOT NULL default '0',
   field_number smallint(5) unsigned default NULL,
   KEY id_bibxxx (id_bibxxx),
   KEY id_bibrec (id_bibrec)
 ) ENGINE=MyISAM;
 
 CREATE TABLE IF NOT EXISTS bibrec_bib65x (
   id_bibrec mediumint(8) unsigned NOT NULL default '0',
   id_bibxxx mediumint(8) unsigned NOT NULL default '0',
   field_number smallint(5) unsigned default NULL,
   KEY id_bibxxx (id_bibxxx),
   KEY id_bibrec (id_bibrec)
 ) ENGINE=MyISAM;
 
 CREATE TABLE IF NOT EXISTS bibrec_bib66x (
   id_bibrec mediumint(8) unsigned NOT NULL default '0',
   id_bibxxx mediumint(8) unsigned NOT NULL default '0',
   field_number smallint(5) unsigned default NULL,
   KEY id_bibxxx (id_bibxxx),
   KEY id_bibrec (id_bibrec)
 ) ENGINE=MyISAM;
 
 CREATE TABLE IF NOT EXISTS bibrec_bib67x (
   id_bibrec mediumint(8) unsigned NOT NULL default '0',
   id_bibxxx mediumint(8) unsigned NOT NULL default '0',
   field_number smallint(5) unsigned default NULL,
   KEY id_bibxxx (id_bibxxx),
   KEY id_bibrec (id_bibrec)
 ) ENGINE=MyISAM;
 
 CREATE TABLE IF NOT EXISTS bibrec_bib68x (
   id_bibrec mediumint(8) unsigned NOT NULL default '0',
   id_bibxxx mediumint(8) unsigned NOT NULL default '0',
   field_number smallint(5) unsigned default NULL,
   KEY id_bibxxx (id_bibxxx),
   KEY id_bibrec (id_bibrec)
 ) ENGINE=MyISAM;
 
 CREATE TABLE IF NOT EXISTS bibrec_bib69x (
   id_bibrec mediumint(8) unsigned NOT NULL default '0',
   id_bibxxx mediumint(8) unsigned NOT NULL default '0',
   field_number smallint(5) unsigned default NULL,
   KEY id_bibxxx (id_bibxxx),
   KEY id_bibrec (id_bibrec)
 ) ENGINE=MyISAM;
 
 CREATE TABLE IF NOT EXISTS bibrec_bib70x (
   id_bibrec mediumint(8) unsigned NOT NULL default '0',
   id_bibxxx mediumint(8) unsigned NOT NULL default '0',
   field_number smallint(5) unsigned default NULL,
   KEY id_bibxxx (id_bibxxx),
   KEY id_bibrec (id_bibrec)
 ) ENGINE=MyISAM;
 
 CREATE TABLE IF NOT EXISTS bibrec_bib71x (
   id_bibrec mediumint(8) unsigned NOT NULL default '0',
   id_bibxxx mediumint(8) unsigned NOT NULL default '0',
   field_number smallint(5) unsigned default NULL,
   KEY id_bibxxx (id_bibxxx),
   KEY id_bibrec (id_bibrec)
 ) ENGINE=MyISAM;
 
 CREATE TABLE IF NOT EXISTS bibrec_bib72x (
   id_bibrec mediumint(8) unsigned NOT NULL default '0',
   id_bibxxx mediumint(8) unsigned NOT NULL default '0',
   field_number smallint(5) unsigned default NULL,
   KEY id_bibxxx (id_bibxxx),
   KEY id_bibrec (id_bibrec)
 ) ENGINE=MyISAM;
 
 CREATE TABLE IF NOT EXISTS bibrec_bib73x (
   id_bibrec mediumint(8) unsigned NOT NULL default '0',
   id_bibxxx mediumint(8) unsigned NOT NULL default '0',
   field_number smallint(5) unsigned default NULL,
   KEY id_bibxxx (id_bibxxx),
   KEY id_bibrec (id_bibrec)
 ) ENGINE=MyISAM;
 
 CREATE TABLE IF NOT EXISTS bibrec_bib74x (
   id_bibrec mediumint(8) unsigned NOT NULL default '0',
   id_bibxxx mediumint(8) unsigned NOT NULL default '0',
   field_number smallint(5) unsigned default NULL,
   KEY id_bibxxx (id_bibxxx),
   KEY id_bibrec (id_bibrec)
 ) ENGINE=MyISAM;
 
 CREATE TABLE IF NOT EXISTS bibrec_bib75x (
   id_bibrec mediumint(8) unsigned NOT NULL default '0',
   id_bibxxx mediumint(8) unsigned NOT NULL default '0',
   field_number smallint(5) unsigned default NULL,
   KEY id_bibxxx (id_bibxxx),
   KEY id_bibrec (id_bibrec)
 ) ENGINE=MyISAM;
 
 CREATE TABLE IF NOT EXISTS bibrec_bib76x (
   id_bibrec mediumint(8) unsigned NOT NULL default '0',
   id_bibxxx mediumint(8) unsigned NOT NULL default '0',
   field_number smallint(5) unsigned default NULL,
   KEY id_bibxxx (id_bibxxx),
   KEY id_bibrec (id_bibrec)
 ) ENGINE=MyISAM;
 
 CREATE TABLE IF NOT EXISTS bibrec_bib77x (
   id_bibrec mediumint(8) unsigned NOT NULL default '0',
   id_bibxxx mediumint(8) unsigned NOT NULL default '0',
   field_number smallint(5) unsigned default NULL,
   KEY id_bibxxx (id_bibxxx),
   KEY id_bibrec (id_bibrec)
 ) ENGINE=MyISAM;
 
 CREATE TABLE IF NOT EXISTS bibrec_bib78x (
   id_bibrec mediumint(8) unsigned NOT NULL default '0',
   id_bibxxx mediumint(8) unsigned NOT NULL default '0',
   field_number smallint(5) unsigned default NULL,
   KEY id_bibxxx (id_bibxxx),
   KEY id_bibrec (id_bibrec)
 ) ENGINE=MyISAM;
 
 CREATE TABLE IF NOT EXISTS bibrec_bib79x (
   id_bibrec mediumint(8) unsigned NOT NULL default '0',
   id_bibxxx mediumint(8) unsigned NOT NULL default '0',
   field_number smallint(5) unsigned default NULL,
   KEY id_bibxxx (id_bibxxx),
   KEY id_bibrec (id_bibrec)
 ) ENGINE=MyISAM;
 
 CREATE TABLE IF NOT EXISTS bibrec_bib80x (
   id_bibrec mediumint(8) unsigned NOT NULL default '0',
   id_bibxxx mediumint(8) unsigned NOT NULL default '0',
   field_number smallint(5) unsigned default NULL,
   KEY id_bibxxx (id_bibxxx),
   KEY id_bibrec (id_bibrec)
 ) ENGINE=MyISAM;
 
 CREATE TABLE IF NOT EXISTS bibrec_bib81x (
   id_bibrec mediumint(8) unsigned NOT NULL default '0',
   id_bibxxx mediumint(8) unsigned NOT NULL default '0',
   field_number smallint(5) unsigned default NULL,
   KEY id_bibxxx (id_bibxxx),
   KEY id_bibrec (id_bibrec)
 ) ENGINE=MyISAM;
 
 CREATE TABLE IF NOT EXISTS bibrec_bib82x (
   id_bibrec mediumint(8) unsigned NOT NULL default '0',
   id_bibxxx mediumint(8) unsigned NOT NULL default '0',
   field_number smallint(5) unsigned default NULL,
   KEY id_bibxxx (id_bibxxx),
   KEY id_bibrec (id_bibrec)
 ) ENGINE=MyISAM;
 
 CREATE TABLE IF NOT EXISTS bibrec_bib83x (
   id_bibrec mediumint(8) unsigned NOT NULL default '0',
   id_bibxxx mediumint(8) unsigned NOT NULL default '0',
   field_number smallint(5) unsigned default NULL,
   KEY id_bibxxx (id_bibxxx),
   KEY id_bibrec (id_bibrec)
 ) ENGINE=MyISAM;
 
 CREATE TABLE IF NOT EXISTS bibrec_bib84x (
   id_bibrec mediumint(8) unsigned NOT NULL default '0',
   id_bibxxx mediumint(8) unsigned NOT NULL default '0',
   field_number smallint(5) unsigned default NULL,
   KEY id_bibxxx (id_bibxxx),
   KEY id_bibrec (id_bibrec)
 ) ENGINE=MyISAM;
 
 CREATE TABLE IF NOT EXISTS bibrec_bib85x (
   id_bibrec mediumint(8) unsigned NOT NULL default '0',
   id_bibxxx mediumint(8) unsigned NOT NULL default '0',
   field_number smallint(5) unsigned default NULL,
   KEY id_bibxxx (id_bibxxx),
   KEY id_bibrec (id_bibrec)
 ) ENGINE=MyISAM;
 
 CREATE TABLE IF NOT EXISTS bibrec_bib86x (
   id_bibrec mediumint(8) unsigned NOT NULL default '0',
   id_bibxxx mediumint(8) unsigned NOT NULL default '0',
   field_number smallint(5) unsigned default NULL,
   KEY id_bibxxx (id_bibxxx),
   KEY id_bibrec (id_bibrec)
 ) ENGINE=MyISAM;
 
 CREATE TABLE IF NOT EXISTS bibrec_bib87x (
   id_bibrec mediumint(8) unsigned NOT NULL default '0',
   id_bibxxx mediumint(8) unsigned NOT NULL default '0',
   field_number smallint(5) unsigned default NULL,
   KEY id_bibxxx (id_bibxxx),
   KEY id_bibrec (id_bibrec)
 ) ENGINE=MyISAM;
 
 CREATE TABLE IF NOT EXISTS bibrec_bib88x (
   id_bibrec mediumint(8) unsigned NOT NULL default '0',
   id_bibxxx mediumint(8) unsigned NOT NULL default '0',
   field_number smallint(5) unsigned default NULL,
   KEY id_bibxxx (id_bibxxx),
   KEY id_bibrec (id_bibrec)
 ) ENGINE=MyISAM;
 
 CREATE TABLE IF NOT EXISTS bibrec_bib89x (
   id_bibrec mediumint(8) unsigned NOT NULL default '0',
   id_bibxxx mediumint(8) unsigned NOT NULL default '0',
   field_number smallint(5) unsigned default NULL,
   KEY id_bibxxx (id_bibxxx),
   KEY id_bibrec (id_bibrec)
 ) ENGINE=MyISAM;
 
 CREATE TABLE IF NOT EXISTS bibrec_bib90x (
   id_bibrec mediumint(8) unsigned NOT NULL default '0',
   id_bibxxx mediumint(8) unsigned NOT NULL default '0',
   field_number smallint(5) unsigned default NULL,
   KEY id_bibxxx (id_bibxxx),
   KEY id_bibrec (id_bibrec)
 ) ENGINE=MyISAM;
 
 CREATE TABLE IF NOT EXISTS bibrec_bib91x (
   id_bibrec mediumint(8) unsigned NOT NULL default '0',
   id_bibxxx mediumint(8) unsigned NOT NULL default '0',
   field_number smallint(5) unsigned default NULL,
   KEY id_bibxxx (id_bibxxx),
   KEY id_bibrec (id_bibrec)
 ) ENGINE=MyISAM;
 
 CREATE TABLE IF NOT EXISTS bibrec_bib92x (
   id_bibrec mediumint(8) unsigned NOT NULL default '0',
   id_bibxxx mediumint(8) unsigned NOT NULL default '0',
   field_number smallint(5) unsigned default NULL,
   KEY id_bibxxx (id_bibxxx),
   KEY id_bibrec (id_bibrec)
 ) ENGINE=MyISAM;
 
 CREATE TABLE IF NOT EXISTS bibrec_bib93x (
   id_bibrec mediumint(8) unsigned NOT NULL default '0',
   id_bibxxx mediumint(8) unsigned NOT NULL default '0',
   field_number smallint(5) unsigned default NULL,
   KEY id_bibxxx (id_bibxxx),
   KEY id_bibrec (id_bibrec)
 ) ENGINE=MyISAM;
 
 CREATE TABLE IF NOT EXISTS bibrec_bib94x (
   id_bibrec mediumint(8) unsigned NOT NULL default '0',
   id_bibxxx mediumint(8) unsigned NOT NULL default '0',
   field_number smallint(5) unsigned default NULL,
   KEY id_bibxxx (id_bibxxx),
   KEY id_bibrec (id_bibrec)
 ) ENGINE=MyISAM;
 
 CREATE TABLE IF NOT EXISTS bibrec_bib95x (
   id_bibrec mediumint(8) unsigned NOT NULL default '0',
   id_bibxxx mediumint(8) unsigned NOT NULL default '0',
   field_number smallint(5) unsigned default NULL,
   KEY id_bibxxx (id_bibxxx),
   KEY id_bibrec (id_bibrec)
 ) ENGINE=MyISAM;
 
 CREATE TABLE IF NOT EXISTS bibrec_bib96x (
   id_bibrec mediumint(8) unsigned NOT NULL default '0',
   id_bibxxx mediumint(8) unsigned NOT NULL default '0',
   field_number smallint(5) unsigned default NULL,
   KEY id_bibxxx (id_bibxxx),
   KEY id_bibrec (id_bibrec)
 ) ENGINE=MyISAM;
 
 CREATE TABLE IF NOT EXISTS bibrec_bib97x (
   id_bibrec mediumint(8) unsigned NOT NULL default '0',
   id_bibxxx mediumint(8) unsigned NOT NULL default '0',
   field_number smallint(5) unsigned default NULL,
   KEY id_bibxxx (id_bibxxx),
   KEY id_bibrec (id_bibrec)
 ) ENGINE=MyISAM;
 
 CREATE TABLE IF NOT EXISTS bibrec_bib98x (
   id_bibrec mediumint(8) unsigned NOT NULL default '0',
   id_bibxxx mediumint(8) unsigned NOT NULL default '0',
   field_number smallint(5) unsigned default NULL,
   KEY id_bibxxx (id_bibxxx),
   KEY id_bibrec (id_bibrec)
 ) ENGINE=MyISAM;
 
 CREATE TABLE IF NOT EXISTS bibrec_bib99x (
   id_bibrec mediumint(8) unsigned NOT NULL default '0',
   id_bibxxx mediumint(8) unsigned NOT NULL default '0',
   field_number smallint(5) unsigned default NULL,
   KEY id_bibxxx (id_bibxxx),
   KEY id_bibrec (id_bibrec)
 ) ENGINE=MyISAM;
 
 -- tables for bibliographic records formatted:
 
 CREATE TABLE IF NOT EXISTS bibfmt (
   id mediumint(8) unsigned NOT NULL auto_increment,
   id_bibrec int(8) unsigned NOT NULL default '0',
   format varchar(10) NOT NULL default '',
   last_updated datetime NOT NULL default '0000-00-00',
   value longblob,
   PRIMARY KEY  (id),
   KEY id_bibrec (id_bibrec),
   KEY format (format)
 ) ENGINE=MyISAM;
 
 -- tables for index files:
 
 CREATE TABLE IF NOT EXISTS idxINDEX (
   id mediumint(9) unsigned NOT NULL,
   name varchar(50) NOT NULL default '',
   description varchar(255) NOT NULL default '',
   last_updated datetime NOT NULL default '0000-00-00 00:00:00',
   stemming_language varchar(10) NOT NULL default '',
   PRIMARY KEY  (id),
   UNIQUE KEY name (name)
 ) ENGINE=MyISAM;
 
 CREATE TABLE IF NOT EXISTS idxINDEXNAME (
   id_idxINDEX mediumint(9) unsigned NOT NULL,
   ln char(5) NOT NULL default '',
   type char(3) NOT NULL default 'sn',
   value varchar(255) NOT NULL,
   PRIMARY KEY  (id_idxINDEX,ln,type)
 ) ENGINE=MyISAM;
 
 CREATE TABLE IF NOT EXISTS idxINDEX_field (
   id_idxINDEX mediumint(9) unsigned NOT NULL,
   id_field mediumint(9) unsigned NOT NULL,
   regexp_punctuation varchar(255) NOT NULL default "[\.\,\:\;\?\!\"]",
   regexp_alphanumeric_separators varchar(255) NOT NULL default "[\!\"\#\$\%\&\'\(\)\*\+\,\-\.\/\:\;\<\=\>\?\@\[\\\]\^\_\`\{\|\}\~]",
   PRIMARY KEY  (id_idxINDEX,id_field)
 ) ENGINE=MyISAM;
 
 -- this comment line here is just to fix the SQL display mode in Emacs '
 
 CREATE TABLE IF NOT EXISTS idxWORD01F (
   id mediumint(9) unsigned NOT NULL auto_increment,
   term varchar(50) default NULL,
   hitlist longblob,
   PRIMARY KEY  (id),
   UNIQUE KEY term (term)
 ) ENGINE=MyISAM;
 
 CREATE TABLE IF NOT EXISTS idxWORD01R (
   id_bibrec mediumint(9) unsigned NOT NULL,
   termlist longblob,
   type enum('CURRENT','FUTURE','TEMPORARY') NOT NULL default 'CURRENT',
   PRIMARY KEY (id_bibrec,type)
 ) ENGINE=MyISAM;
 
 CREATE TABLE IF NOT EXISTS idxWORD02F (
   id mediumint(9) unsigned NOT NULL auto_increment,
   term varchar(50) default NULL,
   hitlist longblob,
   PRIMARY KEY  (id),
   UNIQUE KEY term (term)
 ) ENGINE=MyISAM;
 
 CREATE TABLE IF NOT EXISTS idxWORD02R (
   id_bibrec mediumint(9) unsigned NOT NULL,
   termlist longblob,
   type enum('CURRENT','FUTURE','TEMPORARY') NOT NULL default 'CURRENT',
   PRIMARY KEY (id_bibrec,type)
 ) ENGINE=MyISAM;
 
 CREATE TABLE IF NOT EXISTS idxWORD03F (
   id mediumint(9) unsigned NOT NULL auto_increment,
   term varchar(50) default NULL,
   hitlist longblob,
   PRIMARY KEY  (id),
   UNIQUE KEY term (term)
 ) ENGINE=MyISAM;
 
 CREATE TABLE IF NOT EXISTS idxWORD03R (
   id_bibrec mediumint(9) unsigned NOT NULL,
   termlist longblob,
   type enum('CURRENT','FUTURE','TEMPORARY') NOT NULL default 'CURRENT',
   PRIMARY KEY (id_bibrec,type)
 ) ENGINE=MyISAM;
 
 CREATE TABLE IF NOT EXISTS idxWORD04F (
   id mediumint(9) unsigned NOT NULL auto_increment,
   term varchar(50) default NULL,
   hitlist longblob,
   PRIMARY KEY  (id),
   UNIQUE KEY term (term)
 ) ENGINE=MyISAM;
 
 CREATE TABLE IF NOT EXISTS idxWORD04R (
   id_bibrec mediumint(9) unsigned NOT NULL,
   termlist longblob,
   type enum('CURRENT','FUTURE','TEMPORARY') NOT NULL default 'CURRENT',
   PRIMARY KEY (id_bibrec,type)
 ) ENGINE=MyISAM;
 
 CREATE TABLE IF NOT EXISTS idxWORD05F (
   id mediumint(9) unsigned NOT NULL auto_increment,
   term varchar(50) default NULL,
   hitlist longblob,
   PRIMARY KEY  (id),
   UNIQUE KEY term (term)
 ) ENGINE=MyISAM;
 
 CREATE TABLE IF NOT EXISTS idxWORD05R (
   id_bibrec mediumint(9) unsigned NOT NULL,
   termlist longblob,
   type enum('CURRENT','FUTURE','TEMPORARY') NOT NULL default 'CURRENT',
   PRIMARY KEY (id_bibrec,type)
 ) ENGINE=MyISAM;
 
 CREATE TABLE IF NOT EXISTS idxWORD06F (
   id mediumint(9) unsigned NOT NULL auto_increment,
   term varchar(50) default NULL,
   hitlist longblob,
   PRIMARY KEY  (id),
   UNIQUE KEY term (term)
 ) ENGINE=MyISAM;
 
 CREATE TABLE IF NOT EXISTS idxWORD06R (
   id_bibrec mediumint(9) unsigned NOT NULL,
   termlist longblob,
   type enum('CURRENT','FUTURE','TEMPORARY') NOT NULL default 'CURRENT',
   PRIMARY KEY (id_bibrec,type)
 ) ENGINE=MyISAM;
 
 CREATE TABLE IF NOT EXISTS idxWORD07F (
   id mediumint(9) unsigned NOT NULL auto_increment,
   term varchar(50) default NULL,
   hitlist longblob,
   PRIMARY KEY  (id),
   UNIQUE KEY term (term)
 ) ENGINE=MyISAM;
 
 CREATE TABLE IF NOT EXISTS idxWORD07R (
   id_bibrec mediumint(9) unsigned NOT NULL,
   termlist longblob,
   type enum('CURRENT','FUTURE','TEMPORARY') NOT NULL default 'CURRENT',
   PRIMARY KEY (id_bibrec,type)
 ) ENGINE=MyISAM;
 
 CREATE TABLE IF NOT EXISTS idxWORD08F (
   id mediumint(9) unsigned NOT NULL auto_increment,
   term varchar(50) default NULL,
   hitlist longblob,
   PRIMARY KEY  (id),
   UNIQUE KEY term (term)
 ) ENGINE=MyISAM;
 
 CREATE TABLE IF NOT EXISTS idxWORD08R (
   id_bibrec mediumint(9) unsigned NOT NULL,
   termlist longblob,
   type enum('CURRENT','FUTURE','TEMPORARY') NOT NULL default 'CURRENT',
   PRIMARY KEY (id_bibrec,type)
 ) ENGINE=MyISAM;
 
 CREATE TABLE IF NOT EXISTS idxWORD09F (
   id mediumint(9) unsigned NOT NULL auto_increment,
   term varchar(50) default NULL,
   hitlist longblob,
   PRIMARY KEY  (id),
   UNIQUE KEY term (term)
 ) ENGINE=MyISAM;
 
 CREATE TABLE IF NOT EXISTS idxWORD09R (
   id_bibrec mediumint(9) unsigned NOT NULL,
   termlist longblob,
   type enum('CURRENT','FUTURE','TEMPORARY') NOT NULL default 'CURRENT',
   PRIMARY KEY (id_bibrec,type)
 ) ENGINE=MyISAM;
 
 CREATE TABLE IF NOT EXISTS idxWORD10F (
   id mediumint(9) unsigned NOT NULL auto_increment,
   term varchar(50) default NULL,
   hitlist longblob,
   PRIMARY KEY  (id),
   UNIQUE KEY term (term)
 ) ENGINE=MyISAM;
 
 CREATE TABLE IF NOT EXISTS idxWORD10R (
   id_bibrec mediumint(9) unsigned NOT NULL,
   termlist longblob,
   type enum('CURRENT','FUTURE','TEMPORARY') NOT NULL default 'CURRENT',
   PRIMARY KEY (id_bibrec,type)
 ) ENGINE=MyISAM;
 
 CREATE TABLE IF NOT EXISTS idxWORD11F (
   id mediumint(9) unsigned NOT NULL auto_increment,
   term varchar(50) default NULL,
   hitlist longblob,
   PRIMARY KEY  (id),
   UNIQUE KEY term (term)
 ) ENGINE=MyISAM;
 
 CREATE TABLE IF NOT EXISTS idxWORD11R (
   id_bibrec mediumint(9) unsigned NOT NULL,
   termlist longblob,
   type enum('CURRENT','FUTURE','TEMPORARY') NOT NULL default 'CURRENT',
   PRIMARY KEY (id_bibrec,type)
 ) ENGINE=MyISAM;
 
 CREATE TABLE IF NOT EXISTS idxWORD12F (
   id mediumint(9) unsigned NOT NULL auto_increment,
   term varchar(50) default NULL,
   hitlist longblob,
   PRIMARY KEY  (id),
   UNIQUE KEY term (term)
 ) ENGINE=MyISAM;
 
 CREATE TABLE IF NOT EXISTS idxWORD12R (
   id_bibrec mediumint(9) unsigned NOT NULL,
   termlist longblob,
   type enum('CURRENT','FUTURE','TEMPORARY') NOT NULL default 'CURRENT',
   PRIMARY KEY (id_bibrec,type)
 ) ENGINE=MyISAM;
 
 CREATE TABLE IF NOT EXISTS idxWORD13F (
   id mediumint(9) unsigned NOT NULL auto_increment,
   term varchar(50) default NULL,
   hitlist longblob,
   PRIMARY KEY  (id),
   UNIQUE KEY term (term)
 ) ENGINE=MyISAM;
 
 CREATE TABLE IF NOT EXISTS idxWORD13R (
   id_bibrec mediumint(9) unsigned NOT NULL,
   termlist longblob,
   type enum('CURRENT','FUTURE','TEMPORARY') NOT NULL default 'CURRENT',
   PRIMARY KEY (id_bibrec,type)
 ) ENGINE=MyISAM;
 
 CREATE TABLE IF NOT EXISTS idxWORD14F (
   id mediumint(9) unsigned NOT NULL auto_increment,
   term varchar(50) default NULL,
   hitlist longblob,
   PRIMARY KEY  (id),
   UNIQUE KEY term (term)
 ) ENGINE=MyISAM;
 
 CREATE TABLE IF NOT EXISTS idxWORD14R (
   id_bibrec mediumint(9) unsigned NOT NULL,
   termlist longblob,
   type enum('CURRENT','FUTURE','TEMPORARY') NOT NULL default 'CURRENT',
   PRIMARY KEY (id_bibrec,type)
 ) ENGINE=MyISAM;
 
 CREATE TABLE IF NOT EXISTS idxWORD15F (
   id mediumint(9) unsigned NOT NULL auto_increment,
   term varchar(50) default NULL,
   hitlist longblob,
   PRIMARY KEY  (id),
   UNIQUE KEY term (term)
 ) ENGINE=MyISAM;
 
 CREATE TABLE IF NOT EXISTS idxWORD15R (
   id_bibrec mediumint(9) unsigned NOT NULL,
   termlist longblob,
   type enum('CURRENT','FUTURE','TEMPORARY') NOT NULL default 'CURRENT',
   PRIMARY KEY (id_bibrec,type)
 ) ENGINE=MyISAM;
 
 CREATE TABLE IF NOT EXISTS idxWORD16F (
   id mediumint(9) unsigned NOT NULL auto_increment,
   term varchar(50) default NULL,
   hitlist longblob,
   PRIMARY KEY  (id),
   UNIQUE KEY term (term)
 ) ENGINE=MyISAM;
 
 CREATE TABLE IF NOT EXISTS idxWORD16R (
   id_bibrec mediumint(9) unsigned NOT NULL,
   termlist longblob,
   type enum('CURRENT','FUTURE','TEMPORARY') NOT NULL default 'CURRENT',
   PRIMARY KEY (id_bibrec,type)
 ) ENGINE=MyISAM;
 
 CREATE TABLE IF NOT EXISTS idxWORD17F (
   id mediumint(9) unsigned NOT NULL auto_increment,
   term varchar(50) default NULL,
   hitlist longblob,
   PRIMARY KEY  (id),
   UNIQUE KEY term (term)
 ) ENGINE=MyISAM;
 
 CREATE TABLE IF NOT EXISTS idxWORD17R (
   id_bibrec mediumint(9) unsigned NOT NULL,
   termlist longblob,
   type enum('CURRENT','FUTURE','TEMPORARY') NOT NULL default 'CURRENT',
   PRIMARY KEY (id_bibrec,type)
 ) ENGINE=MyISAM;
 
 CREATE TABLE IF NOT EXISTS idxPAIR01F (
   id mediumint(9) unsigned NOT NULL auto_increment,
   term varchar(100) default NULL,
   hitlist longblob,
   PRIMARY KEY  (id),
   UNIQUE KEY term (term)
 ) ENGINE=MyISAM;
 
 CREATE TABLE IF NOT EXISTS idxPAIR01R (
   id_bibrec mediumint(9) unsigned NOT NULL,
   termlist longblob,
   type enum('CURRENT','FUTURE','TEMPORARY') NOT NULL default 'CURRENT',
   PRIMARY KEY (id_bibrec,type)
 ) ENGINE=MyISAM;
 
 CREATE TABLE IF NOT EXISTS idxPAIR02F (
   id mediumint(9) unsigned NOT NULL auto_increment,
   term varchar(100) default NULL,
   hitlist longblob,
   PRIMARY KEY  (id),
   UNIQUE KEY term (term)
 ) ENGINE=MyISAM;
 
 CREATE TABLE IF NOT EXISTS idxPAIR02R (
   id_bibrec mediumint(9) unsigned NOT NULL,
   termlist longblob,
   type enum('CURRENT','FUTURE','TEMPORARY') NOT NULL default 'CURRENT',
   PRIMARY KEY (id_bibrec,type)
 ) ENGINE=MyISAM;
 
 CREATE TABLE IF NOT EXISTS idxPAIR03F (
   id mediumint(9) unsigned NOT NULL auto_increment,
   term varchar(100) default NULL,
   hitlist longblob,
   PRIMARY KEY  (id),
   UNIQUE KEY term (term)
 ) ENGINE=MyISAM;
 
 CREATE TABLE IF NOT EXISTS idxPAIR03R (
   id_bibrec mediumint(9) unsigned NOT NULL,
   termlist longblob,
   type enum('CURRENT','FUTURE','TEMPORARY') NOT NULL default 'CURRENT',
   PRIMARY KEY (id_bibrec,type)
 ) ENGINE=MyISAM;
 
 CREATE TABLE IF NOT EXISTS idxPAIR04F (
   id mediumint(9) unsigned NOT NULL auto_increment,
   term varchar(100) default NULL,
   hitlist longblob,
   PRIMARY KEY  (id),
   UNIQUE KEY term (term)
 ) ENGINE=MyISAM;
 
 CREATE TABLE IF NOT EXISTS idxPAIR04R (
   id_bibrec mediumint(9) unsigned NOT NULL,
   termlist longblob,
   type enum('CURRENT','FUTURE','TEMPORARY') NOT NULL default 'CURRENT',
   PRIMARY KEY (id_bibrec,type)
 ) ENGINE=MyISAM;
 
 CREATE TABLE IF NOT EXISTS idxPAIR05F (
   id mediumint(9) unsigned NOT NULL auto_increment,
   term varchar(100) default NULL,
   hitlist longblob,
   PRIMARY KEY  (id),
   UNIQUE KEY term (term)
 ) ENGINE=MyISAM;
 
 CREATE TABLE IF NOT EXISTS idxPAIR05R (
   id_bibrec mediumint(9) unsigned NOT NULL,
   termlist longblob,
   type enum('CURRENT','FUTURE','TEMPORARY') NOT NULL default 'CURRENT',
   PRIMARY KEY (id_bibrec,type)
 ) ENGINE=MyISAM;
 
 CREATE TABLE IF NOT EXISTS idxPAIR06F (
   id mediumint(9) unsigned NOT NULL auto_increment,
   term varchar(100) default NULL,
   hitlist longblob,
   PRIMARY KEY  (id),
   UNIQUE KEY term (term)
 ) ENGINE=MyISAM;
 
 CREATE TABLE IF NOT EXISTS idxPAIR06R (
   id_bibrec mediumint(9) unsigned NOT NULL,
   termlist longblob,
   type enum('CURRENT','FUTURE','TEMPORARY') NOT NULL default 'CURRENT',
   PRIMARY KEY (id_bibrec,type)
 ) ENGINE=MyISAM;
 
 CREATE TABLE IF NOT EXISTS idxPAIR07F (
   id mediumint(9) unsigned NOT NULL auto_increment,
   term varchar(100) default NULL,
   hitlist longblob,
   PRIMARY KEY  (id),
   UNIQUE KEY term (term)
 ) ENGINE=MyISAM;
 
 CREATE TABLE IF NOT EXISTS idxPAIR07R (
   id_bibrec mediumint(9) unsigned NOT NULL,
   termlist longblob,
   type enum('CURRENT','FUTURE','TEMPORARY') NOT NULL default 'CURRENT',
   PRIMARY KEY (id_bibrec,type)
 ) ENGINE=MyISAM;
 
 CREATE TABLE IF NOT EXISTS idxPAIR08F (
   id mediumint(9) unsigned NOT NULL auto_increment,
   term varchar(100) default NULL,
   hitlist longblob,
   PRIMARY KEY  (id),
   UNIQUE KEY term (term)
 ) ENGINE=MyISAM;
 
 CREATE TABLE IF NOT EXISTS idxPAIR08R (
   id_bibrec mediumint(9) unsigned NOT NULL,
   termlist longblob,
   type enum('CURRENT','FUTURE','TEMPORARY') NOT NULL default 'CURRENT',
   PRIMARY KEY (id_bibrec,type)
 ) ENGINE=MyISAM;
 
 CREATE TABLE IF NOT EXISTS idxPAIR09F (
   id mediumint(9) unsigned NOT NULL auto_increment,
   term varchar(100) default NULL,
   hitlist longblob,
   PRIMARY KEY  (id),
   UNIQUE KEY term (term)
 ) ENGINE=MyISAM;
 
 CREATE TABLE IF NOT EXISTS idxPAIR09R (
   id_bibrec mediumint(9) unsigned NOT NULL,
   termlist longblob,
   type enum('CURRENT','FUTURE','TEMPORARY') NOT NULL default 'CURRENT',
   PRIMARY KEY (id_bibrec,type)
 ) ENGINE=MyISAM;
 
 CREATE TABLE IF NOT EXISTS idxPAIR10F (
   id mediumint(9) unsigned NOT NULL auto_increment,
   term varchar(100) default NULL,
   hitlist longblob,
   PRIMARY KEY  (id),
   UNIQUE KEY term (term)
 ) ENGINE=MyISAM;
 
 CREATE TABLE IF NOT EXISTS idxPAIR10R (
   id_bibrec mediumint(9) unsigned NOT NULL,
   termlist longblob,
   type enum('CURRENT','FUTURE','TEMPORARY') NOT NULL default 'CURRENT',
   PRIMARY KEY (id_bibrec,type)
 ) ENGINE=MyISAM;
 
 CREATE TABLE IF NOT EXISTS idxPAIR11F (
   id mediumint(9) unsigned NOT NULL auto_increment,
   term varchar(100) default NULL,
   hitlist longblob,
   PRIMARY KEY  (id),
   UNIQUE KEY term (term)
 ) ENGINE=MyISAM;
 
 CREATE TABLE IF NOT EXISTS idxPAIR11R (
   id_bibrec mediumint(9) unsigned NOT NULL,
   termlist longblob,
   type enum('CURRENT','FUTURE','TEMPORARY') NOT NULL default 'CURRENT',
   PRIMARY KEY (id_bibrec,type)
 ) ENGINE=MyISAM;
 
 CREATE TABLE IF NOT EXISTS idxPAIR12F (
   id mediumint(9) unsigned NOT NULL auto_increment,
   term varchar(100) default NULL,
   hitlist longblob,
   PRIMARY KEY  (id),
   UNIQUE KEY term (term)
 ) ENGINE=MyISAM;
 
 CREATE TABLE IF NOT EXISTS idxPAIR12R (
   id_bibrec mediumint(9) unsigned NOT NULL,
   termlist longblob,
   type enum('CURRENT','FUTURE','TEMPORARY') NOT NULL default 'CURRENT',
   PRIMARY KEY (id_bibrec,type)
 ) ENGINE=MyISAM;
 
 CREATE TABLE IF NOT EXISTS idxPAIR13F (
   id mediumint(9) unsigned NOT NULL auto_increment,
   term varchar(100) default NULL,
   hitlist longblob,
   PRIMARY KEY  (id),
   UNIQUE KEY term (term)
 ) ENGINE=MyISAM;
 
 CREATE TABLE IF NOT EXISTS idxPAIR13R (
   id_bibrec mediumint(9) unsigned NOT NULL,
   termlist longblob,
   type enum('CURRENT','FUTURE','TEMPORARY') NOT NULL default 'CURRENT',
   PRIMARY KEY (id_bibrec,type)
 ) ENGINE=MyISAM;
 
 CREATE TABLE IF NOT EXISTS idxPAIR14F (
   id mediumint(9) unsigned NOT NULL auto_increment,
   term varchar(100) default NULL,
   hitlist longblob,
   PRIMARY KEY  (id),
   UNIQUE KEY term (term)
 ) ENGINE=MyISAM;
 
 CREATE TABLE IF NOT EXISTS idxPAIR14R (
   id_bibrec mediumint(9) unsigned NOT NULL,
   termlist longblob,
   type enum('CURRENT','FUTURE','TEMPORARY') NOT NULL default 'CURRENT',
   PRIMARY KEY (id_bibrec,type)
 ) ENGINE=MyISAM;
 
 CREATE TABLE IF NOT EXISTS idxPAIR15F (
   id mediumint(9) unsigned NOT NULL auto_increment,
   term varchar(100) default NULL,
   hitlist longblob,
   PRIMARY KEY  (id),
   UNIQUE KEY term (term)
 ) ENGINE=MyISAM;
 
 CREATE TABLE IF NOT EXISTS idxPAIR15R (
   id_bibrec mediumint(9) unsigned NOT NULL,
   termlist longblob,
   type enum('CURRENT','FUTURE','TEMPORARY') NOT NULL default 'CURRENT',
   PRIMARY KEY (id_bibrec,type)
 ) ENGINE=MyISAM;
 
 CREATE TABLE IF NOT EXISTS idxPAIR16F (
   id mediumint(9) unsigned NOT NULL auto_increment,
   term varchar(100) default NULL,
   hitlist longblob,
   PRIMARY KEY  (id),
   UNIQUE KEY term (term)
 ) ENGINE=MyISAM;
 
 CREATE TABLE IF NOT EXISTS idxPAIR16R (
   id_bibrec mediumint(9) unsigned NOT NULL,
   termlist longblob,
   type enum('CURRENT','FUTURE','TEMPORARY') NOT NULL default 'CURRENT',
   PRIMARY KEY (id_bibrec,type)
 ) ENGINE=MyISAM;
 
 CREATE TABLE IF NOT EXISTS idxPAIR17F (
   id mediumint(9) unsigned NOT NULL auto_increment,
   term varchar(100) default NULL,
   hitlist longblob,
   PRIMARY KEY  (id),
   UNIQUE KEY term (term)
 ) ENGINE=MyISAM;
 
 CREATE TABLE IF NOT EXISTS idxPAIR17R (
   id_bibrec mediumint(9) unsigned NOT NULL,
   termlist longblob,
   type enum('CURRENT','FUTURE','TEMPORARY') NOT NULL default 'CURRENT',
   PRIMARY KEY (id_bibrec,type)
 ) ENGINE=MyISAM;
 
 CREATE TABLE IF NOT EXISTS idxPHRASE01F (
   id mediumint(9) unsigned NOT NULL auto_increment,
   term text default NULL,
   hitlist longblob,
   PRIMARY KEY  (id),
   KEY term (term(50))
 ) ENGINE=MyISAM;
 
 CREATE TABLE IF NOT EXISTS idxPHRASE01R (
   id_bibrec mediumint(9) unsigned NOT NULL,
   termlist longblob,
   type enum('CURRENT','FUTURE','TEMPORARY') NOT NULL default 'CURRENT',
   PRIMARY KEY (id_bibrec,type)
 ) ENGINE=MyISAM;
 
 CREATE TABLE IF NOT EXISTS idxPHRASE02F (
   id mediumint(9) unsigned NOT NULL auto_increment,
   term text default NULL,
   hitlist longblob,
   PRIMARY KEY  (id),
   KEY term (term(50))
 ) ENGINE=MyISAM;
 
 CREATE TABLE IF NOT EXISTS idxPHRASE02R (
   id_bibrec mediumint(9) unsigned NOT NULL,
   termlist longblob,
   type enum('CURRENT','FUTURE','TEMPORARY') NOT NULL default 'CURRENT',
   PRIMARY KEY (id_bibrec,type)
 ) ENGINE=MyISAM;
 
 CREATE TABLE IF NOT EXISTS idxPHRASE03F (
   id mediumint(9) unsigned NOT NULL auto_increment,
   term text default NULL,
   hitlist longblob,
   PRIMARY KEY  (id),
   KEY term (term(50))
 ) ENGINE=MyISAM;
 
 CREATE TABLE IF NOT EXISTS idxPHRASE03R (
   id_bibrec mediumint(9) unsigned NOT NULL,
   termlist longblob,
   type enum('CURRENT','FUTURE','TEMPORARY') NOT NULL default 'CURRENT',
   PRIMARY KEY (id_bibrec,type)
 ) ENGINE=MyISAM;
 
 CREATE TABLE IF NOT EXISTS idxPHRASE04F (
   id mediumint(9) unsigned NOT NULL auto_increment,
   term text default NULL,
   hitlist longblob,
   PRIMARY KEY  (id),
   KEY term (term(50))
 ) ENGINE=MyISAM;
 
 CREATE TABLE IF NOT EXISTS idxPHRASE04R (
   id_bibrec mediumint(9) unsigned NOT NULL,
   termlist longblob,
   type enum('CURRENT','FUTURE','TEMPORARY') NOT NULL default 'CURRENT',
   PRIMARY KEY (id_bibrec,type)
 ) ENGINE=MyISAM;
 
 CREATE TABLE IF NOT EXISTS idxPHRASE05F (
   id mediumint(9) unsigned NOT NULL auto_increment,
   term text default NULL,
   hitlist longblob,
   PRIMARY KEY  (id),
   KEY term (term(50))
 ) ENGINE=MyISAM;
 
 CREATE TABLE IF NOT EXISTS idxPHRASE05R (
   id_bibrec mediumint(9) unsigned NOT NULL,
   termlist longblob,
   type enum('CURRENT','FUTURE','TEMPORARY') NOT NULL default 'CURRENT',
   PRIMARY KEY (id_bibrec,type)
 ) ENGINE=MyISAM;
 
 CREATE TABLE IF NOT EXISTS idxPHRASE06F (
   id mediumint(9) unsigned NOT NULL auto_increment,
   term text default NULL,
   hitlist longblob,
   PRIMARY KEY  (id),
   KEY term (term(50))
 ) ENGINE=MyISAM;
 
 CREATE TABLE IF NOT EXISTS idxPHRASE06R (
   id_bibrec mediumint(9) unsigned NOT NULL,
   termlist longblob,
   type enum('CURRENT','FUTURE','TEMPORARY') NOT NULL default 'CURRENT',
   PRIMARY KEY (id_bibrec,type)
 ) ENGINE=MyISAM;
 
 CREATE TABLE IF NOT EXISTS idxPHRASE07F (
   id mediumint(9) unsigned NOT NULL auto_increment,
   term text default NULL,
   hitlist longblob,
   PRIMARY KEY  (id),
   KEY term (term(50))
 ) ENGINE=MyISAM;
 
 CREATE TABLE IF NOT EXISTS idxPHRASE07R (
   id_bibrec mediumint(9) unsigned NOT NULL,
   termlist longblob,
   type enum('CURRENT','FUTURE','TEMPORARY') NOT NULL default 'CURRENT',
   PRIMARY KEY (id_bibrec,type)
 ) ENGINE=MyISAM;
 
 CREATE TABLE IF NOT EXISTS idxPHRASE08F (
   id mediumint(9) unsigned NOT NULL auto_increment,
   term text default NULL,
   hitlist longblob,
   PRIMARY KEY  (id),
   KEY term (term(50))
 ) ENGINE=MyISAM;
 
 CREATE TABLE IF NOT EXISTS idxPHRASE08R (
   id_bibrec mediumint(9) unsigned NOT NULL,
   termlist longblob,
   type enum('CURRENT','FUTURE','TEMPORARY') NOT NULL default 'CURRENT',
   PRIMARY KEY (id_bibrec,type)
 ) ENGINE=MyISAM;
 
 CREATE TABLE IF NOT EXISTS idxPHRASE09F (
   id mediumint(9) unsigned NOT NULL auto_increment,
   term text default NULL,
   hitlist longblob,
   PRIMARY KEY  (id),
   KEY term (term(50))
 ) ENGINE=MyISAM;
 
 CREATE TABLE IF NOT EXISTS idxPHRASE09R (
   id_bibrec mediumint(9) unsigned NOT NULL,
   termlist longblob,
   type enum('CURRENT','FUTURE','TEMPORARY') NOT NULL default 'CURRENT',
   PRIMARY KEY (id_bibrec,type)
 ) ENGINE=MyISAM;
 
 CREATE TABLE IF NOT EXISTS idxPHRASE10F (
   id mediumint(9) unsigned NOT NULL auto_increment,
   term text default NULL,
   hitlist longblob,
   PRIMARY KEY  (id),
   KEY term (term(50))
 ) ENGINE=MyISAM;
 
 CREATE TABLE IF NOT EXISTS idxPHRASE10R (
   id_bibrec mediumint(9) unsigned NOT NULL,
   termlist longblob,
   type enum('CURRENT','FUTURE','TEMPORARY') NOT NULL default 'CURRENT',
   PRIMARY KEY (id_bibrec,type)
 ) ENGINE=MyISAM;
 
 CREATE TABLE IF NOT EXISTS idxPHRASE11F (
   id mediumint(9) unsigned NOT NULL auto_increment,
   term text default NULL,
   hitlist longblob,
   PRIMARY KEY  (id),
   KEY term (term(50))
 ) ENGINE=MyISAM;
 
 CREATE TABLE IF NOT EXISTS idxPHRASE11R (
   id_bibrec mediumint(9) unsigned NOT NULL,
   termlist longblob,
   type enum('CURRENT','FUTURE','TEMPORARY') NOT NULL default 'CURRENT',
   PRIMARY KEY (id_bibrec,type)
 ) ENGINE=MyISAM;
 
 CREATE TABLE IF NOT EXISTS idxPHRASE12F (
   id mediumint(9) unsigned NOT NULL auto_increment,
   term text default NULL,
   hitlist longblob,
   PRIMARY KEY  (id),
   KEY term (term(50))
 ) ENGINE=MyISAM;
 
 CREATE TABLE IF NOT EXISTS idxPHRASE12R (
   id_bibrec mediumint(9) unsigned NOT NULL,
   termlist longblob,
   type enum('CURRENT','FUTURE','TEMPORARY') NOT NULL default 'CURRENT',
   PRIMARY KEY (id_bibrec,type)
 ) ENGINE=MyISAM;
 
 CREATE TABLE IF NOT EXISTS idxPHRASE13F (
   id mediumint(9) unsigned NOT NULL auto_increment,
   term text default NULL,
   hitlist longblob,
   PRIMARY KEY  (id),
   KEY term (term(50))
 ) ENGINE=MyISAM;
 
 CREATE TABLE IF NOT EXISTS idxPHRASE13R (
   id_bibrec mediumint(9) unsigned NOT NULL,
   termlist longblob,
   type enum('CURRENT','FUTURE','TEMPORARY') NOT NULL default 'CURRENT',
   PRIMARY KEY (id_bibrec,type)
 ) ENGINE=MyISAM;
 
 CREATE TABLE IF NOT EXISTS idxPHRASE14F (
   id mediumint(9) unsigned NOT NULL auto_increment,
   term text default NULL,
   hitlist longblob,
   PRIMARY KEY  (id),
   KEY term (term(50))
 ) ENGINE=MyISAM;
 
 CREATE TABLE IF NOT EXISTS idxPHRASE14R (
   id_bibrec mediumint(9) unsigned NOT NULL,
   termlist longblob,
   type enum('CURRENT','FUTURE','TEMPORARY') NOT NULL default 'CURRENT',
   PRIMARY KEY (id_bibrec,type)
 ) ENGINE=MyISAM;
 
 CREATE TABLE IF NOT EXISTS idxPHRASE15F (
   id mediumint(9) unsigned NOT NULL auto_increment,
   term text default NULL,
   hitlist longblob,
   PRIMARY KEY  (id),
   KEY term (term(50))
 ) ENGINE=MyISAM;
 
 CREATE TABLE IF NOT EXISTS idxPHRASE15R (
   id_bibrec mediumint(9) unsigned NOT NULL,
   termlist longblob,
   type enum('CURRENT','FUTURE','TEMPORARY') NOT NULL default 'CURRENT',
   PRIMARY KEY (id_bibrec,type)
 ) ENGINE=MyISAM;
 
 CREATE TABLE IF NOT EXISTS idxPHRASE16F (
   id mediumint(9) unsigned NOT NULL auto_increment,
   term text default NULL,
   hitlist longblob,
   PRIMARY KEY  (id),
   KEY term (term(50))
 ) ENGINE=MyISAM;
 
 CREATE TABLE IF NOT EXISTS idxPHRASE16R (
   id_bibrec mediumint(9) unsigned NOT NULL,
   termlist longblob,
   type enum('CURRENT','FUTURE','TEMPORARY') NOT NULL default 'CURRENT',
   PRIMARY KEY (id_bibrec,type)
 ) ENGINE=MyISAM;
 
 CREATE TABLE IF NOT EXISTS idxPHRASE17F (
   id mediumint(9) unsigned NOT NULL auto_increment,
   term text default NULL,
   hitlist longblob,
   PRIMARY KEY  (id),
   KEY term (term(50))
 ) ENGINE=MyISAM;
 
 CREATE TABLE IF NOT EXISTS idxPHRASE17R (
   id_bibrec mediumint(9) unsigned NOT NULL,
   termlist longblob,
   type enum('CURRENT','FUTURE','TEMPORARY') NOT NULL default 'CURRENT',
   PRIMARY KEY (id_bibrec,type)
 ) ENGINE=MyISAM;
 
 -- tables for ranking:
 
 CREATE TABLE IF NOT EXISTS rnkMETHOD (
   id mediumint(9) unsigned NOT NULL auto_increment,
   name varchar(20) NOT NULL default '',
   last_updated datetime NOT NULL default '0000-00-00 00:00:00',
   PRIMARY KEY (id),
   UNIQUE KEY name (name)
 ) ENGINE=MyISAM;
 
 CREATE TABLE IF NOT EXISTS rnkMETHODNAME (
   id_rnkMETHOD mediumint(9) unsigned NOT NULL,
   ln char(5) NOT NULL default '',
   type char(3) NOT NULL default 'sn',
   value varchar(255) NOT NULL,
   PRIMARY KEY  (id_rnkMETHOD,ln,type)
 ) ENGINE=MyISAM;
 
 CREATE TABLE IF NOT EXISTS rnkMETHODDATA (
   id_rnkMETHOD mediumint(9) unsigned NOT NULL,
   relevance_data longblob,
   PRIMARY KEY  (id_rnkMETHOD)
 ) ENGINE=MyISAM;
 
 
 CREATE TABLE IF NOT EXISTS collection_rnkMETHOD (
   id_collection mediumint(9) unsigned NOT NULL,
   id_rnkMETHOD mediumint(9) unsigned NOT NULL,
   score tinyint(4) unsigned NOT NULL default '0',
   PRIMARY KEY  (id_collection,id_rnkMETHOD)
 ) ENGINE=MyISAM;
 
 CREATE TABLE IF NOT EXISTS rnkWORD01F (
   id mediumint(9) unsigned NOT NULL auto_increment,
   term varchar(50) default NULL,
   hitlist longblob,
   PRIMARY KEY  (id),
   UNIQUE KEY term (term)
 ) ENGINE=MyISAM;
 
 CREATE TABLE IF NOT EXISTS rnkWORD01R (
   id_bibrec mediumint(9) unsigned NOT NULL,
   termlist longblob,
   type enum('CURRENT','FUTURE','TEMPORARY') NOT NULL default 'CURRENT',
   PRIMARY KEY  (id_bibrec,type)
 ) ENGINE=MyISAM;
 
 CREATE TABLE IF NOT EXISTS rnkAUTHORDATA (
   aterm varchar(50) default NULL,
   hitlist longblob,
   UNIQUE KEY aterm (aterm)
 ) ENGINE=MyISAM;
 
 CREATE TABLE IF NOT EXISTS rnkPAGEVIEWS (
   id_bibrec mediumint(8) unsigned default NULL,
   id_user int(15) unsigned default '0',
   client_host int(10) unsigned default NULL,
   view_time datetime default '0000-00-00 00:00:00',
   KEY view_time (view_time),
   KEY id_bibrec (id_bibrec)
 ) ENGINE=MyISAM;
 
 CREATE TABLE IF NOT EXISTS rnkDOWNLOADS (
   id_bibrec mediumint(8) unsigned default NULL,
   download_time datetime default '0000-00-00 00:00:00',
   client_host int(10) unsigned default NULL,
   id_user int(15) unsigned default NULL,
   id_bibdoc mediumint(9) unsigned default NULL,
   file_version smallint(2) unsigned default NULL,
   file_format varchar(10) NULL default NULL,
   KEY download_time (download_time),
   KEY id_bibrec (id_bibrec)
 ) ENGINE=MyISAM;
 
 -- a table for citations. record-cites-record
 
 CREATE TABLE IF NOT EXISTS rnkCITATIONDATA (
   id mediumint(8) unsigned NOT NULL auto_increment,
   object_name varchar(255) NOT NULL,
   object_value longblob,
   last_updated datetime NOT NULL default '0000-00-00',
   PRIMARY KEY id (id),
   UNIQUE KEY object_name (object_name)
 ) ENGINE=MyISAM;
 
 -- a table for missing citations. This should be scanned by a program
 -- occasionally to check if some publication has been cited more than
 -- 50 times (or such), and alert cataloguers to create record for that
 -- external citation
 --
 -- id_bibrec is the id of the record. extcitepubinfo is publication info
 -- that looks in general like hep-th/0112088
 CREATE TABLE IF NOT EXISTS rnkCITATIONDATAEXT (
   id_bibrec int(8) unsigned,
   extcitepubinfo varchar(255) NOT NULL,
   PRIMARY KEY (id_bibrec, extcitepubinfo),
   KEY extcitepubinfo (extcitepubinfo)
 ) ENGINE=MyISAM;
 
 -- tables for collections and collection tree:
 
 CREATE TABLE IF NOT EXISTS collection (
   id mediumint(9) unsigned NOT NULL auto_increment,
   name varchar(255) NOT NULL,
   dbquery text,
   nbrecs int(10) unsigned default '0',
   reclist longblob,
   PRIMARY KEY  (id),
   UNIQUE KEY name (name),
   KEY dbquery (dbquery(50))
 ) ENGINE=MyISAM;
 
 CREATE TABLE IF NOT EXISTS collectionname (
   id_collection mediumint(9) unsigned NOT NULL,
   ln char(5) NOT NULL default '',
   type char(3) NOT NULL default 'sn',
   value varchar(255) NOT NULL,
   PRIMARY KEY  (id_collection,ln,type)
 ) ENGINE=MyISAM;
 
 CREATE TABLE IF NOT EXISTS collection_collection (
   id_dad mediumint(9) unsigned NOT NULL,
   id_son mediumint(9) unsigned NOT NULL,
   type char(1) NOT NULL default 'r',
   score tinyint(4) unsigned NOT NULL default '0',
   PRIMARY KEY (id_dad,id_son)
 ) ENGINE=MyISAM;
 
 -- tables for OAI sets:
 
 CREATE TABLE IF NOT EXISTS oaiREPOSITORY (
   id mediumint(9) unsigned NOT NULL auto_increment,
   setName varchar(255) NOT NULL default '',
-  setSpec varchar(255) NOT NULL default '',
+  setSpec varchar(255) NOT NULL default 'GLOBAL_SET',
   setCollection varchar(255) NOT NULL default '',
   setDescription text NOT NULL default '',
   setDefinition text NOT NULL default '',
   setRecList longblob,
   p1 text NOT NULL default '',
   f1 text NOT NULL default '',
   m1 text NOT NULL default '',
   p2 text NOT NULL default '',
   f2 text NOT NULL default '',
   m2 text NOT NULL default '',
   p3 text NOT NULL default '',
   f3 text NOT NULL default '',
   m3 text NOT NULL default '',
   PRIMARY KEY (id)
 ) ENGINE=MyISAM;
 
 CREATE TABLE IF NOT EXISTS oaiHARVEST (
   id mediumint(9) unsigned NOT NULL auto_increment,
   baseurl varchar(255) NOT NULL default '',
   metadataprefix varchar(255) NOT NULL default 'oai_dc',
   arguments text,
   comment text,
   bibconvertcfgfile varchar(255),
   name varchar(255) NOT NULL,
   lastrun datetime,
   frequency mediumint(12) NOT NULL default '0',
   postprocess varchar(20) NOT NULL default 'h',
   bibfilterprogram varchar(255) NOT NULL default '',
   setspecs text NOT NULL default '',
   PRIMARY KEY  (id)
 ) ENGINE=MyISAM;
 
 CREATE TABLE IF NOT EXISTS oaiHARVESTLOG (
   id_oaiHARVEST mediumint(9) unsigned NOT NULL REFERENCES oaiHARVEST, -- source we harvest from
   id_bibrec mediumint(8) unsigned NOT NULL default '0', -- internal record id ( filled by bibupload )
   bibupload_task_id int NOT NULL default 0, -- bib upload task number
   oai_id varchar(40) NOT NULL default "", -- OAI record identifier we harvested
   date_harvested datetime NOT NULL default '0000-00-00', -- when we harvested
   date_inserted datetime NOT NULL default '0000-00-00', -- when it was inserted
   inserted_to_db char(1) NOT NULL default 'P', -- where it was inserted (P=prod, H=holding-pen, etc)
   PRIMARY KEY (bibupload_task_id, oai_id, date_harvested)
 ) ENGINE=MyISAM;
 
 CREATE TABLE IF NOT EXISTS bibHOLDINGPEN (
   changeset_id INT NOT NULL AUTO_INCREMENT, -- the identifier of the changeset stored in the holding pen
   changeset_date datetime NOT NULL DEFAULT '0000:00:00 00:00:00', -- when was the changeset inserted
   changeset_xml TEXT NOT NULL DEFAULT '',
   oai_id varchar(40) NOT NULL DEFAULT '', -- OAI identifier of concerned record
   id_bibrec mediumint(8) unsigned NOT NULL default '0', -- record ID of concerned record (filled by bibupload)
   PRIMARY KEY (changeset_id),
   KEY changeset_date (changeset_date),
   KEY id_bibrec (id_bibrec)
 ) ENGINE=MyISAM;
 
 -- tables for portal elements:
 
 CREATE TABLE IF NOT EXISTS collection_portalbox (
   id_collection mediumint(9) unsigned NOT NULL,
   id_portalbox mediumint(9) unsigned NOT NULL,
   ln char(5) NOT NULL default '',
   position char(3) NOT NULL default 'top',
   score tinyint(4) unsigned NOT NULL default '0',
   PRIMARY KEY  (id_collection,id_portalbox,ln)
 ) ENGINE=MyISAM;
 
 CREATE TABLE IF NOT EXISTS portalbox (
   id mediumint(9) unsigned NOT NULL auto_increment,
   title text NOT NULL,
   body text NOT NULL,
   UNIQUE KEY id (id)
 ) ENGINE=MyISAM;
 
 -- tables for search examples:
 
 CREATE TABLE IF NOT EXISTS collection_example (
   id_collection mediumint(9) unsigned NOT NULL,
   id_example mediumint(9) unsigned NOT NULL,
   score tinyint(4) unsigned NOT NULL default '0',
   PRIMARY KEY  (id_collection,id_example)
 ) ENGINE=MyISAM;
 
 CREATE TABLE IF NOT EXISTS example (
   id mediumint(9) unsigned NOT NULL auto_increment,
   type text NOT NULL default '',
   body text NOT NULL,
   PRIMARY KEY  (id)
 ) ENGINE=MyISAM;
 
 -- tables for collection formats:
 
 CREATE TABLE IF NOT EXISTS collection_format (
   id_collection mediumint(9) unsigned NOT NULL,
   id_format mediumint(9) unsigned NOT NULL,
   score tinyint(4) unsigned NOT NULL default '0',
   PRIMARY KEY  (id_collection,id_format)
 ) ENGINE=MyISAM;
 
 CREATE TABLE IF NOT EXISTS format (
   id mediumint(9) unsigned NOT NULL auto_increment,
   name varchar(255) NOT NULL,
   code varchar(6) NOT NULL,
   description varchar(255) default '',
   content_type varchar(255) default '',
   visibility tinyint NOT NULL default '1',
   PRIMARY KEY  (id),
   UNIQUE KEY code (code)
 ) ENGINE=MyISAM;
 
 CREATE TABLE IF NOT EXISTS formatname (
   id_format mediumint(9) unsigned NOT NULL,
   ln char(5) NOT NULL default '',
   type char(3) NOT NULL default 'sn',
   value varchar(255) NOT NULL,
   PRIMARY KEY  (id_format,ln,type)
 ) ENGINE=MyISAM;
 
 -- tables for collection detailed page options
 
 CREATE TABLE IF NOT EXISTS collectiondetailedrecordpagetabs (
   id_collection mediumint(9) unsigned NOT NULL,
   tabs varchar(255) NOT NULL default '',
   PRIMARY KEY (id_collection)
 ) ENGINE=MyISAM;
 
 -- tables for search options and MARC tags:
 
 CREATE TABLE IF NOT EXISTS collection_field_fieldvalue (
   id_collection mediumint(9) unsigned NOT NULL,
   id_field mediumint(9) unsigned NOT NULL,
   id_fieldvalue mediumint(9) unsigned,
   type char(3) NOT NULL default 'src',
   score tinyint(4) unsigned NOT NULL default '0',
   score_fieldvalue tinyint(4) unsigned NOT NULL default '0',
   KEY id_collection (id_collection),
   KEY id_field (id_field),
   KEY id_fieldvalue (id_fieldvalue)
 ) ENGINE=MyISAM;
 
 CREATE TABLE IF NOT EXISTS field (
   id mediumint(9) unsigned NOT NULL auto_increment,
   name varchar(255) NOT NULL,
   code varchar(255) NOT NULL,
   PRIMARY KEY  (id),
   UNIQUE KEY code (code)
 ) ENGINE=MyISAM;
 
 CREATE TABLE IF NOT EXISTS fieldname (
   id_field mediumint(9) unsigned NOT NULL,
   ln char(5) NOT NULL default '',
   type char(3) NOT NULL default 'sn',
   value varchar(255) NOT NULL,
   PRIMARY KEY  (id_field,ln,type)
 ) ENGINE=MyISAM;
 
 CREATE TABLE IF NOT EXISTS fieldvalue (
   id mediumint(9) unsigned NOT NULL auto_increment,
   name varchar(255) NOT NULL,
   value text NOT NULL,
   PRIMARY KEY  (id)
 ) ENGINE=MyISAM;
 
 CREATE TABLE IF NOT EXISTS field_tag (
   id_field mediumint(9) unsigned NOT NULL,
   id_tag mediumint(9) unsigned NOT NULL,
   score tinyint(4) unsigned NOT NULL default '0',
   PRIMARY KEY  (id_field,id_tag)
 ) ENGINE=MyISAM;
 
 CREATE TABLE IF NOT EXISTS tag (
   id mediumint(9) unsigned NOT NULL auto_increment,
   name varchar(255) NOT NULL,
   value char(6) NOT NULL,
   PRIMARY KEY  (id)
 ) ENGINE=MyISAM;
 
 -- tables for file management
 
 CREATE TABLE IF NOT EXISTS bibdoc (
   id mediumint(9) unsigned NOT NULL auto_increment,
   status text NOT NULL default '',
   docname varchar(250) COLLATE utf8_bin NOT NULL default 'file',
   creation_date datetime NOT NULL default '0000-00-00',
   modification_date datetime NOT NULL default '0000-00-00',
   text_extraction_date datetime NOT NULL default '0000-00-00',
   more_info mediumblob NULL default NULL,
   PRIMARY KEY  (id),
   KEY docname (docname),
   KEY creation_date (creation_date),
   KEY modification_date (modification_date)
 ) ENGINE=MyISAM;
 
 CREATE TABLE IF NOT EXISTS bibrec_bibdoc (
   id_bibrec mediumint(9) unsigned NOT NULL default '0',
   id_bibdoc mediumint(9) unsigned NOT NULL default '0',
   type varchar(255),
   KEY  (id_bibrec),
   KEY  (id_bibdoc)
 ) ENGINE=MyISAM;
 
 CREATE TABLE IF NOT EXISTS bibdoc_bibdoc (
   id_bibdoc1 mediumint(9) unsigned NOT NULL,
   id_bibdoc2 mediumint(9) unsigned NOT NULL,
   type varchar(255),
   KEY  (id_bibdoc1),
   KEY  (id_bibdoc2)
 ) ENGINE=MyISAM;
 
 -- tables for publication requests:
 
 CREATE TABLE IF NOT EXISTS publreq (
   id int(11) NOT NULL auto_increment,
   host varchar(255) NOT NULL default '',
   date varchar(255) NOT NULL default '',
   name varchar(255) NOT NULL default '',
   email varchar(255) NOT NULL default '',
   address text NOT NULL,
   publication text NOT NULL,
   PRIMARY KEY  (id)
 ) ENGINE=MyISAM;
 
 -- table for sessions and users:
 
 CREATE TABLE IF NOT EXISTS session (
   session_key varchar(32) NOT NULL default '',
   session_expiry int(11) unsigned NOT NULL default '0',
   session_object longblob,
   uid int(15) unsigned NOT NULL,
   UNIQUE KEY session_key (session_key),
   KEY uid (uid)
 ) ENGINE=MyISAM;
 
 CREATE TABLE IF NOT EXISTS user (
   id int(15) unsigned NOT NULL auto_increment,
   email varchar(255) NOT NULL default '',
   password blob NOT NULL,
   note varchar(255) default NULL,
   settings blob default NULL,
   nickname varchar(255) NOT NULL default '',
   last_login datetime NOT NULL default '0000-00-00 00:00:00',
   PRIMARY KEY id (id),
   KEY email (email),
   KEY nickname (nickname)
 ) ENGINE=MyISAM;
 
 -- tables for usergroups
 
 CREATE TABLE IF NOT EXISTS usergroup (
   id int(15) unsigned NOT NULL auto_increment,
   name varchar(255) NOT NULL default '',
   description text default '',
   join_policy char(2) NOT NULL default '',
   login_method varchar(255) NOT NULL default 'INTERNAL',
   PRIMARY KEY  (id),
   UNIQUE KEY login_method_name (login_method(70), name),
   KEY name (name)
 ) ENGINE=MyISAM;
 
 
 CREATE TABLE IF NOT EXISTS user_usergroup (
   id_user int(15) unsigned NOT NULL default '0',
   id_usergroup int(15) unsigned NOT NULL default '0',
   user_status char(1) NOT NULL default '',
   user_status_date datetime NOT NULL default '0000-00-00 00:00:00',
   KEY id_user (id_user),
   KEY id_usergroup (id_usergroup)
 ) ENGINE=MyISAM;
 
 -- tables for access control engine
 
 CREATE TABLE IF NOT EXISTS accROLE (
   id int(15) unsigned NOT NULL auto_increment,
   name varchar(32),
   description varchar(255),
   firerole_def_ser blob NULL,
   firerole_def_src text NULL,
   PRIMARY KEY (id),
   UNIQUE KEY name (name)
 ) ENGINE=MyISAM;
 
 CREATE TABLE IF NOT EXISTS user_accROLE (
   id_user int(15) unsigned NOT NULL,
   id_accROLE int(15) unsigned NOT NULL,
   expiration datetime NOT NULL default '9999-12-31 23:59:59',
   PRIMARY KEY (id_user, id_accROLE)
 ) ENGINE=MyISAM;
 
 CREATE TABLE IF NOT EXISTS accMAILCOOKIE (
   id int(15) unsigned NOT NULL auto_increment,
   data blob NOT NULL,
   expiration datetime NOT NULL default '9999-12-31 23:59:59',
   kind varchar(32) NOT NULL,
   onetime boolean NOT NULL default 0,
   status char(1) NOT NULL default 'W',
   PRIMARY KEY (id),
   KEY expiration (expiration)
 ) ENGINE=MyISAM;
 
 CREATE TABLE IF NOT EXISTS accACTION (
   id int(15) unsigned NOT NULL auto_increment,
   name varchar(32),
   description varchar(255),
   allowedkeywords varchar(255),
   optional ENUM ('yes', 'no') NOT NULL default 'no',
   PRIMARY KEY (id),
   UNIQUE KEY name (name)
 ) ENGINE=MyISAM;
 
 CREATE TABLE IF NOT EXISTS accARGUMENT (
   id int(15) unsigned NOT NULL auto_increment,
   keyword varchar (32),
   value varchar(255),
   PRIMARY KEY (id),
   KEY KEYVAL (keyword, value)
 ) ENGINE=MyISAM;
 
 CREATE TABLE IF NOT EXISTS accROLE_accACTION_accARGUMENT (
   id_accROLE int(15),
   id_accACTION int(15),
   id_accARGUMENT int(15),
   argumentlistid mediumint(8),
   KEY id_accROLE     (id_accROLE),
   KEY id_accACTION   (id_accACTION),
   KEY id_accARGUMENT (id_accARGUMENT)
 ) ENGINE=MyISAM;
 
 -- tables for personal/collaborative features (baskets, alerts, searches, messages, usergroups):
 
 CREATE TABLE IF NOT EXISTS user_query (
   id_user int(15) unsigned NOT NULL default '0',
   id_query int(15) unsigned NOT NULL default '0',
   hostname varchar(50) default 'unknown host',
   date datetime default NULL,
   KEY id_user (id_user,id_query)
 ) ENGINE=MyISAM;
 
 CREATE TABLE IF NOT EXISTS query (
   id int(15) unsigned NOT NULL auto_increment,
   type char(1) NOT NULL default 'r',
   urlargs text NOT NULL,
   PRIMARY KEY  (id),
   KEY urlargs (urlargs(100))
 ) ENGINE=MyISAM;
 
 CREATE TABLE IF NOT EXISTS user_query_basket (
   id_user int(15) unsigned NOT NULL default '0',
   id_query int(15) unsigned NOT NULL default '0',
   id_basket int(15) unsigned NOT NULL default '0',
   frequency varchar(5) NOT NULL default '',
   date_creation date default NULL,
   date_lastrun date default '0000-00-00',
   alert_name varchar(30) NOT NULL default '',
   notification char(1) NOT NULL default 'y',
   PRIMARY KEY  (id_user,id_query,frequency,id_basket),
   KEY alert_name (alert_name)
 ) ENGINE=MyISAM;
 
 -- baskets
 CREATE TABLE IF NOT EXISTS bskBASKET (
   id int(15) unsigned NOT NULL auto_increment,
   id_owner int(15) unsigned NOT NULL default '0',
   name varchar(50) NOT NULL default '',
   date_modification datetime NOT NULL default '0000-00-00 00:00:00',
   nb_views int(15) NOT NULL default '0',
   PRIMARY KEY  (id),
   KEY id_owner (id_owner),
   KEY name (name)
 ) ENGINE=MyISAM;
 
 CREATE TABLE IF NOT EXISTS bskREC (
   id_bibrec_or_bskEXTREC int(16) NOT NULL default '0',
   id_bskBASKET int(15) unsigned NOT NULL default '0',
   id_user_who_added_item int(15) NOT NULL default '0',
   score int(15) NOT NULL default '0',
   date_added datetime NOT NULL default '0000-00-00 00:00:00',
   PRIMARY KEY  (id_bibrec_or_bskEXTREC,id_bskBASKET),
   KEY id_bibrec_or_bskEXTREC (id_bibrec_or_bskEXTREC),
   KEY id_bskBASKET (id_bskBASKET),
   KEY score (score),
   KEY date_added (date_added)
 ) ENGINE=MyISAM;
 
 CREATE TABLE IF NOT EXISTS bskEXTREC (
   id int(15) unsigned NOT NULL auto_increment,
   external_id int(15) NOT NULL default '0',
   collection_id int(15) unsigned NOT NULL default '0',
   original_url text,
   creation_date datetime NOT NULL default '0000-00-00 00:00:00',
   modification_date datetime NOT NULL default '0000-00-00 00:00:00',
   PRIMARY KEY  (id)
 ) ENGINE=MyISAM;
 
 CREATE TABLE IF NOT EXISTS bskEXTFMT (
    id int(15) unsigned NOT NULL auto_increment,
    id_bskEXTREC int(15) unsigned NOT NULL default '0',
    format varchar(10) NOT NULL default '',
    last_updated datetime NOT NULL default '0000-00-00 00:00:00',
    value longblob,
    PRIMARY KEY (id),
    KEY id_bskEXTREC (id_bskEXTREC),
    KEY format (format)
 ) ENGINE=MyISAM;
 
 CREATE TABLE IF NOT EXISTS user_bskBASKET (
   id_user int(15) unsigned NOT NULL default '0',
   id_bskBASKET int(15) unsigned NOT NULL default '0',
   topic varchar(50) NOT NULL default '',
   PRIMARY KEY  (id_user,id_bskBASKET),
   KEY id_user (id_user),
   KEY id_bskBASKET (id_bskBASKET)
 ) ENGINE=MyISAM;
 
 CREATE TABLE IF NOT EXISTS usergroup_bskBASKET (
   id_usergroup int(15) unsigned NOT NULL default '0',
   id_bskBASKET int(15) unsigned NOT NULL default '0',
   topic varchar(50) NOT NULL default '',
   date_shared datetime NOT NULL default '0000-00-00 00:00:00',
   share_level char(2) NOT NULL default '',
   PRIMARY KEY  (id_usergroup,id_bskBASKET),
   KEY id_usergroup (id_usergroup),
   KEY id_bskBASKET (id_bskBASKET)
 ) ENGINE=MyISAM;
 
 CREATE TABLE IF NOT EXISTS bskRECORDCOMMENT (
   id int(15) unsigned NOT NULL auto_increment,
   id_bibrec_or_bskEXTREC int(16) NOT NULL default '0',
   id_bskBASKET int(15) unsigned NOT NULL default '0',
   id_user int(15) unsigned NOT NULL default '0',
   title varchar(255) NOT NULL default '',
   body text NOT NULL,
   date_creation datetime NOT NULL default '0000-00-00 00:00:00',
   priority int(15) NOT NULL default '0',
   in_reply_to_id_bskRECORDCOMMENT int(15) unsigned NOT NULL default '0',
   reply_order_cached_data blob NULL default NULL,
   PRIMARY KEY  (id),
   KEY id_bskBASKET (id_bskBASKET),
   KEY id_bibrec_or_bskEXTREC (id_bibrec_or_bskEXTREC),
   KEY date_creation (date_creation),
   KEY in_reply_to_id_bskRECORDCOMMENT (in_reply_to_id_bskRECORDCOMMENT),
   INDEX (reply_order_cached_data(40))
 ) ENGINE=MyISAM;
 
 -- tables for messaging system
 
 CREATE TABLE IF NOT EXISTS msgMESSAGE (
   id int(15) unsigned NOT NULL auto_increment,
   id_user_from int(15) unsigned NOT NULL default '0',
   sent_to_user_nicks text NOT NULL default '',
   sent_to_group_names text NOT NULL default '',
   subject text NOT NULL default '',
   body text default NULL,
   sent_date datetime NOT NULL default '0000-00-00 00:00:00',
   received_date datetime NULL default '0000-00-00 00:00:00',
   PRIMARY KEY id (id),
   KEY id_user_from (id_user_from)
 ) ENGINE=MyISAM;
 
 CREATE TABLE IF NOT EXISTS user_msgMESSAGE (
   id_user_to int(15) unsigned NOT NULL default '0',
   id_msgMESSAGE int(15) unsigned NOT NULL default '0',
   status char(1) NOT NULL default 'N',
   PRIMARY KEY id (id_user_to, id_msgMESSAGE),
   KEY id_user_to (id_user_to),
   KEY id_msgMESSAGE (id_msgMESSAGE)
 ) ENGINE=MyISAM;
 
 -- tables for WebComment
 
 CREATE TABLE IF NOT EXISTS cmtRECORDCOMMENT (
   id int(15) unsigned NOT NULL auto_increment,
   id_bibrec int(15) unsigned NOT NULL default '0',
   id_user int(15) unsigned NOT NULL default '0',
   title varchar(255) NOT NULL default '',
   body text NOT NULL default '',
   date_creation datetime NOT NULL default '0000-00-00 00:00:00',
   star_score tinyint(5) unsigned NOT NULL default '0',
   nb_votes_yes int(10) NOT NULL default '0',
   nb_votes_total int(10) unsigned NOT NULL default '0',
   nb_abuse_reports int(10) NOT NULL default '0',
   status char(2) NOT NULL default 'ok',
   round_name varchar(255) NOT NULL default '',
   restriction varchar(50) NOT NULL default '',
   in_reply_to_id_cmtRECORDCOMMENT int(15) unsigned NOT NULL default '0',
   reply_order_cached_data blob NULL default NULL,
   PRIMARY KEY  (id),
   KEY id_bibrec (id_bibrec),
   KEY id_user (id_user),
   KEY status (status),
   KEY in_reply_to_id_cmtRECORDCOMMENT (in_reply_to_id_cmtRECORDCOMMENT),
   INDEX (reply_order_cached_data(40))
 ) ENGINE=MyISAM;
 
 CREATE TABLE IF NOT EXISTS cmtACTIONHISTORY (
   id_cmtRECORDCOMMENT int(15) unsigned NULL,
   id_bibrec int(15) unsigned NULL,
   id_user int(15) unsigned NULL default NULL,
   client_host int(10) unsigned default NULL,
   action_time datetime NOT NULL default '0000-00-00 00:00:00',
   action_code char(1) NOT NULL,
   KEY id_cmtRECORDCOMMENT (id_cmtRECORDCOMMENT),
   KEY client_host (client_host),
   KEY id_user (id_user),
   KEY action_code (action_code)
 ) ENGINE=MyISAM;
 
 CREATE TABLE IF NOT EXISTS cmtSUBSCRIPTION (
   id_bibrec mediumint(8) unsigned NOT NULL,
   id_user int(15) unsigned NOT NULL,
   creation_time datetime NOT NULL default '0000-00-00 00:00:00',
   KEY id_user (id_bibrec, id_user)
 ) ENGINE=MyISAM;
 
 -- tables for BibKnowledge:
 
 CREATE TABLE IF NOT EXISTS knwKB (
   id mediumint(8) unsigned NOT NULL auto_increment,
   name varchar(255) default '',
   description text default '',
   kbtype char default NULL,
   PRIMARY KEY  (id),
   UNIQUE KEY name (name)
 ) ENGINE=MyISAM;
 
 CREATE TABLE IF NOT EXISTS knwKBRVAL (
   id mediumint(8) unsigned NOT NULL auto_increment,
   m_key varchar(255) NOT NULL default '',
   m_value text NOT NULL default '',
   id_knwKB mediumint(8) NOT NULL default '0',
   PRIMARY KEY  (id),
   KEY id_knwKB (id_knwKB),
   KEY m_key (m_key(30)),
   KEY m_value (m_value(30))
 ) ENGINE=MyISAM;
 
 CREATE TABLE IF NOT EXISTS knwKBDDEF (
   id_knwKB mediumint(8) unsigned NOT NULL,
   id_collection mediumint(9),
   output_tag text default '',
   search_expression text default '',
   PRIMARY KEY  (id_knwKB)
 ) ENGINE=MyISAM;
 
 -- tables for WebSubmit:
 
 CREATE TABLE IF NOT EXISTS sbmACTION (
   lactname text,
   sactname char(3) NOT NULL default '',
   dir text,
   cd date default NULL,
   md date default NULL,
   actionbutton text,
   statustext text,
   PRIMARY KEY  (sactname)
 ) ENGINE=MyISAM;
 
 CREATE TABLE IF NOT EXISTS sbmALLFUNCDESCR (
   function varchar(40) NOT NULL default '',
   description tinytext,
   PRIMARY KEY  (function)
 ) ENGINE=MyISAM;
 
 CREATE TABLE IF NOT EXISTS sbmAPPROVAL (
   doctype varchar(10) NOT NULL default '',
   categ varchar(50) NOT NULL default '',
   rn varchar(50) NOT NULL default '',
   status varchar(10) NOT NULL default '',
   dFirstReq datetime NOT NULL default '0000-00-00 00:00:00',
   dLastReq datetime NOT NULL default '0000-00-00 00:00:00',
   dAction datetime NOT NULL default '0000-00-00 00:00:00',
   access varchar(20) NOT NULL default '0',
   note text NOT NULL default '',
   PRIMARY KEY  (rn)
 ) ENGINE=MyISAM;
 
 CREATE TABLE IF NOT EXISTS sbmCPLXAPPROVAL (
   doctype varchar(10) NOT NULL default '',
   categ varchar(50) NOT NULL default '',
   rn varchar(50) NOT NULL default '',
   type varchar(10) NOT NULL,
   status varchar(10) NOT NULL,
   id_group int(15) unsigned NOT NULL default '0',
   id_bskBASKET int(15) unsigned NOT NULL default '0',
   id_EdBoardGroup int(15) unsigned NOT NULL default '0',
   dFirstReq datetime NOT NULL default '0000-00-00 00:00:00',
   dLastReq datetime NOT NULL default '0000-00-00 00:00:00',
   dEdBoardSel datetime NOT NULL default '0000-00-00 00:00:00',
   dRefereeSel datetime NOT NULL default '0000-00-00 00:00:00',
   dRefereeRecom datetime NOT NULL default '0000-00-00 00:00:00',
   dEdBoardRecom datetime NOT NULL default '0000-00-00 00:00:00',
   dPubComRecom datetime NOT NULL default '0000-00-00 00:00:00',
   dProjectLeaderAction datetime NOT NULL default '0000-00-00 00:00:00',
   PRIMARY KEY  (rn, type)
 ) ENGINE=MyISAM;
 
 CREATE TABLE IF NOT EXISTS sbmCOLLECTION (
   id int(11) NOT NULL auto_increment,
   name varchar(100) NOT NULL default '',
   PRIMARY KEY  (id)
 ) ENGINE=MyISAM;
 
 CREATE TABLE IF NOT EXISTS sbmCOLLECTION_sbmCOLLECTION (
   id_father int(11) NOT NULL default '0',
   id_son int(11) NOT NULL default '0',
   catalogue_order int(11) NOT NULL default '0'
 ) ENGINE=MyISAM;
 
 CREATE TABLE IF NOT EXISTS sbmCOLLECTION_sbmDOCTYPE (
   id_father int(11) NOT NULL default '0',
   id_son char(10) NOT NULL default '0',
   catalogue_order int(11) NOT NULL default '0'
 ) ENGINE=MyISAM;
 
 CREATE TABLE IF NOT EXISTS sbmCATEGORIES (
   doctype varchar(10) NOT NULL default '',
   sname varchar(75) NOT NULL default '',
   lname varchar(75) NOT NULL default '',
   score tinyint unsigned NOT NULL default 0,
   PRIMARY KEY (doctype, sname),
   KEY doctype (doctype),
   KEY sname (sname)
 ) ENGINE=MyISAM;
 
 CREATE TABLE IF NOT EXISTS sbmCHECKS (
   chname varchar(15) NOT NULL default '',
   chdesc text,
   cd date default NULL,
   md date default NULL,
   chefi1 text,
   chefi2 text,
   PRIMARY KEY  (chname)
 ) ENGINE=MyISAM;
 
 CREATE TABLE IF NOT EXISTS sbmDOCTYPE (
   ldocname text,
   sdocname varchar(10) default NULL,
   cd date default NULL,
   md date default NULL,
   description text
 ) ENGINE=MyISAM;
 
 CREATE TABLE IF NOT EXISTS sbmFIELD (
   subname varchar(13) default NULL,
   pagenb int(11) default NULL,
   fieldnb int(11) default NULL,
   fidesc varchar(15) default NULL,
   fitext text,
   level char(1) default NULL,
   sdesc text,
   checkn text,
   cd date default NULL,
   md date default NULL,
   fiefi1 text,
   fiefi2 text
 ) ENGINE=MyISAM;
 
 CREATE TABLE IF NOT EXISTS sbmFIELDDESC (
   name varchar(15) NOT NULL default '',
   alephcode varchar(50) default NULL,
   marccode varchar(50) NOT NULL default '',
   type char(1) default NULL,
   size int(11) default NULL,
   rows int(11) default NULL,
   cols int(11) default NULL,
   maxlength int(11) default NULL,
   val text,
   fidesc text,
   cd date default NULL,
   md date default NULL,
   modifytext text,
   fddfi2 text,
   cookie int(11) default '0',
   PRIMARY KEY  (name)
 ) ENGINE=MyISAM;
 
 CREATE TABLE IF NOT EXISTS sbmFORMATEXTENSION (
   FILE_FORMAT text NOT NULL,
   FILE_EXTENSION text NOT NULL
 ) ENGINE=MyISAM;
 
 CREATE TABLE IF NOT EXISTS sbmFUNCTIONS (
   action varchar(10) NOT NULL default '',
   doctype varchar(10) NOT NULL default '',
   function varchar(40) NOT NULL default '',
   score int(11) NOT NULL default '0',
   step tinyint(4) NOT NULL default '1'
 ) ENGINE=MyISAM;
 
 CREATE TABLE IF NOT EXISTS sbmFUNDESC (
   function varchar(40) NOT NULL default '',
   param varchar(40) default NULL
 ) ENGINE=MyISAM;
 
 CREATE TABLE IF NOT EXISTS sbmGFILERESULT (
   FORMAT text NOT NULL,
   RESULT text NOT NULL
 ) ENGINE=MyISAM;
 
 CREATE TABLE IF NOT EXISTS sbmIMPLEMENT (
   docname varchar(10) default NULL,
   actname char(3) default NULL,
   displayed char(1) default NULL,
   subname varchar(13) default NULL,
   nbpg int(11) default NULL,
   cd date default NULL,
   md date default NULL,
   buttonorder int(11) default NULL,
   statustext text,
   level char(1) NOT NULL default '',
   score int(11) NOT NULL default '0',
   stpage int(11) NOT NULL default '0',
   endtxt varchar(100) NOT NULL default ''
 ) ENGINE=MyISAM;
 
 CREATE TABLE IF NOT EXISTS sbmPARAMETERS (
   doctype varchar(10) NOT NULL default '',
   name varchar(40) NOT NULL default '',
   value text NOT NULL default '',
   PRIMARY KEY  (doctype,name)
 ) ENGINE=MyISAM;
 
 CREATE TABLE IF NOT EXISTS sbmPUBLICATION (
   doctype varchar(10) NOT NULL default '',
   categ varchar(50) NOT NULL default '',
   rn varchar(50) NOT NULL default '',
   status varchar(10) NOT NULL default '',
   dFirstReq datetime NOT NULL default '0000-00-00 00:00:00',
   dLastReq datetime NOT NULL default '0000-00-00 00:00:00',
   dAction datetime NOT NULL default '0000-00-00 00:00:00',
   accessref varchar(20) NOT NULL default '',
   accessedi varchar(20) NOT NULL default '',
   access varchar(20) NOT NULL default '',
   referees varchar(50) NOT NULL default '',
   authoremail varchar(50) NOT NULL default '',
   dRefSelection datetime NOT NULL default '0000-00-00 00:00:00',
   dRefRec datetime NOT NULL default '0000-00-00 00:00:00',
   dEdiRec datetime NOT NULL default '0000-00-00 00:00:00',
   accessspo varchar(20) NOT NULL default '',
   journal varchar(100) default NULL,
   PRIMARY KEY  (doctype,categ,rn)
 ) ENGINE=MyISAM;
 
 CREATE TABLE IF NOT EXISTS sbmPUBLICATIONCOMM (
   id int(11) NOT NULL auto_increment,
   id_parent int(11) default '0',
   rn varchar(100) NOT NULL default '',
   firstname varchar(100) default NULL,
   secondname varchar(100) default NULL,
   email varchar(100) default NULL,
   date varchar(40) NOT NULL default '',
   synopsis varchar(255) NOT NULL default '',
   commentfulltext text,
   PRIMARY KEY  (id)
 ) ENGINE=MyISAM;
 
 CREATE TABLE IF NOT EXISTS sbmPUBLICATIONDATA (
   doctype varchar(10) NOT NULL default '',
   editoboard varchar(250) NOT NULL default '',
   base varchar(10) NOT NULL default '',
   logicalbase varchar(10) NOT NULL default '',
   spokesperson varchar(50) NOT NULL default '',
   PRIMARY KEY  (doctype)
 ) ENGINE=MyISAM;
 
 CREATE TABLE IF NOT EXISTS sbmREFEREES (
   doctype varchar(10) NOT NULL default '',
   categ varchar(10) NOT NULL default '',
   name varchar(50) NOT NULL default '',
   address varchar(50) NOT NULL default '',
   rid int(11) NOT NULL auto_increment,
   PRIMARY KEY  (rid)
 ) ENGINE=MyISAM;
 
 CREATE TABLE IF NOT EXISTS sbmSUBMISSIONS (
   email varchar(50) NOT NULL default '',
   doctype varchar(10) NOT NULL default '',
   action varchar(10) NOT NULL default '',
   status varchar(10) NOT NULL default '',
   id varchar(30) NOT NULL default '',
   reference varchar(40) NOT NULL default '',
   cd datetime NOT NULL default '0000-00-00 00:00:00',
   md datetime NOT NULL default '0000-00-00 00:00:00'
 ) ENGINE=MyISAM;
 
 CREATE TABLE IF NOT EXISTS sbmCOOKIES (
   id int(15) unsigned NOT NULL auto_increment,
   name varchar(100) NOT NULL,
   value text,
   uid int(15) NOT NULL,
   PRIMARY KEY  (id)
 ) ENGINE=MyISAM;
 
 -- Scheduler tables
 
 CREATE TABLE IF NOT EXISTS schTASK (
   id int(15) unsigned NOT NULL auto_increment,
   proc varchar(255) NOT NULL,
   host varchar(255) NOT NULL default '',
   user varchar(50) NOT NULL,
   runtime datetime NOT NULL,
   sleeptime varchar(20),
   arguments mediumblob,
   status varchar(50),
   progress varchar(255),
   priority tinyint(4) NOT NULL default 0,
   PRIMARY KEY  (id),
   KEY status (status),
   KEY runtime (runtime),
   KEY priority (priority)
 ) ENGINE=MyISAM;
 
 CREATE TABLE IF NOT EXISTS hstTASK (
   id int(15) unsigned NOT NULL,
   proc varchar(20) NOT NULL,
   host varchar(255) NOT NULL default '',
   user varchar(50) NOT NULL,
   runtime datetime NOT NULL,
   sleeptime varchar(20),
   arguments mediumblob,
   status varchar(50),
   progress varchar(255),
   priority tinyint(4) NOT NULL default 0,
   PRIMARY KEY  (id),
   KEY status (status),
   KEY runtime (runtime),
   KEY priority (priority)
 ) ENGINE=MyISAM;
 
 -- Batch Upload History
 
 CREATE TABLE IF NOT EXISTS hstBATCHUPLOAD (
   id int(15) unsigned NOT NULL auto_increment,
   user varchar(50) NOT NULL,
   submitdate datetime NOT NULL,
   filename varchar(255) NOT NULL,
   execdate datetime NOT NULL,
   id_schTASK int(15) unsigned NOT NULL,
   batch_mode varchar(15) NOT NULL,
   PRIMARY KEY (id),
   KEY user (user)
 ) ENGINE=MyISAM;
 
 -- External collections
 
 CREATE TABLE IF NOT EXISTS collection_externalcollection (
   id_collection         mediumint(9) unsigned NOT NULL default '0',
   id_externalcollection mediumint(9) unsigned NOT NULL default '0',
   type tinyint(4) unsigned NOT NULL default '0',
   PRIMARY KEY (id_collection, id_externalcollection)
 ) ENGINE=MyISAM;
 
 CREATE TABLE IF NOT EXISTS externalcollection (
   id mediumint(9) unsigned NOT NULL auto_increment,
   name varchar(255) NOT NULL default '',
   PRIMARY KEY (id),
   UNIQUE KEY name (name)
 ) ENGINE=MyISAM;
 
 -- WebStat tables:
 
 CREATE TABLE IF NOT EXISTS staEVENT (
   id varchar(255) NOT NULL,
   number smallint(2) unsigned ZEROFILL NOT NULL auto_increment,
   name varchar(255),
   creation_time TIMESTAMP DEFAULT NOW(),
   cols varchar(255),
   PRIMARY KEY  (id),
   UNIQUE KEY number (number)
 ) ENGINE=MyISAM;
 
 -- BibClassify tables:
 
 CREATE TABLE IF NOT EXISTS clsMETHOD (
   id mediumint(9) unsigned NOT NULL,
   name varchar(50) NOT NULL default '',
   location varchar(255) NOT NULL default '',
   description varchar(255) NOT NULL default '',
   last_updated datetime NOT NULL default '0000-00-00 00:00:00',
   PRIMARY KEY  (id),
   UNIQUE KEY name (name)
 ) ENGINE=MyISAM;
 
 CREATE TABLE IF NOT EXISTS collection_clsMETHOD (
   id_collection mediumint(9) unsigned NOT NULL,
   id_clsMETHOD mediumint(9) unsigned NOT NULL,
   PRIMARY KEY  (id_collection, id_clsMETHOD)
 ) ENGINE=MyISAM;
 
 -- WebJournal tables:
 
 CREATE TABLE IF NOT EXISTS jrnJOURNAL (
   id mediumint(9) unsigned NOT NULL auto_increment,
   name varchar(50) NOT NULL default '',
   PRIMARY KEY (id),
   UNIQUE KEY name (name)
 ) ENGINE=MyISAM;
 
 CREATE TABLE IF NOT EXISTS jrnISSUE (
   id_jrnJOURNAL mediumint(9) unsigned NOT NULL,
   issue_number varchar(50) NOT NULL default '',
   issue_display varchar(50) NOT NULL default '',
   date_released datetime NOT NULL default '0000-00-00 00:00:00',
   date_announced datetime NOT NULL default '0000-00-00 00:00:00',
   PRIMARY KEY (id_jrnJOURNAL,issue_number)
 ) ENGINE=MyISAM;
 
 -- tables recording history of record's metadata and fulltext documents:
 
 CREATE TABLE IF NOT EXISTS hstRECORD (
   id_bibrec mediumint(8) unsigned NOT NULL,
   marcxml blob NOT NULL,
   job_id mediumint(15) unsigned NOT NULL,
   job_name varchar(255) NOT NULL,
   job_person varchar(255) NOT NULL,
   job_date datetime NOT NULL,
   job_details blob NOT NULL,
   KEY (id_bibrec),
   KEY (job_id),
   KEY (job_name),
   KEY (job_person),
   KEY (job_date)
 ) ENGINE=MyISAM;
 
 CREATE TABLE IF NOT EXISTS hstDOCUMENT (
   id_bibdoc mediumint(9) unsigned NOT NULL,
   docname varchar(250) NOT NULL,
   docformat varchar(50) NOT NULL,
   docversion tinyint(4) unsigned NOT NULL,
   docsize bigint(15) unsigned NOT NULL,
   docchecksum char(32) NOT NULL,
   doctimestamp datetime NOT NULL,
   action varchar(50) NOT NULL,
   job_id mediumint(15) unsigned NULL default NULL,
   job_name varchar(255) NULL default NULL,
   job_person varchar(255) NULL default NULL,
   job_date datetime NULL default NULL,
   job_details blob NULL default NULL,
   KEY (action),
   KEY (id_bibdoc),
   KEY (docname),
   KEY (docformat),
   KEY (doctimestamp),
   KEY (job_id),
   KEY (job_name),
   KEY (job_person),
   KEY (job_date)
 ) ENGINE=MyISAM;
 
 -- BibCirculation tables:
 
 CREATE TABLE IF NOT EXISTS crcBORROWER (
   id int(15) unsigned NOT NULL auto_increment,
   name varchar(255) NOT NULL default '',
   email varchar(255) NOT NULL default '',
   phone varchar(60) default NULL,
   address varchar(60) default NULL,
   mailbox varchar(30) default NULL,
   borrower_since datetime NOT NULL default '0000-00-00 00:00:00',
   borrower_until datetime NOT NULL default '0000-00-00 00:00:00',
   notes text,
   PRIMARY KEY  (id)
 ) ENGINE=MyISAM;
 
 CREATE TABLE IF NOT EXISTS crcILLREQUEST (
   id int(15) unsigned NOT NULL auto_increment,
   id_crcBORROWER int(15) unsigned NOT NULL default '0',
   barcode varchar(30) NOT NULL default '',
   period_of_interest_from datetime NOT NULL default '0000-00-00 00:00:00',
   period_of_interest_to datetime NOT NULL default '0000-00-00 00:00:00',
   id_crcLIBRARY int(15) unsigned NOT NULL default '0',
   request_date datetime NOT NULL default '0000-00-00 00:00:00',
   expected_date datetime NOT NULL default '0000-00-00 00:00:00',
   arrival_date datetime NOT NULL default '0000-00-00 00:00:00',
   due_date datetime NOT NULL default '0000-00-00 00:00:00',
   return_date datetime NOT NULL default '0000-00-00 00:00:00',
   status varchar(20) NOT NULL default '',
   cost varchar(30) NOT NULL default '',
   item_info text,
   request_type text,
   borrower_comments text,
   only_this_edition varchar(10) NOT NULL default '',
   library_notes text,
   PRIMARY KEY  (id),
   KEY id_crcborrower (id_crcBORROWER),
   KEY id_crclibrary (id_crcLIBRARY)
 ) ENGINE=MyISAM;
 
 CREATE TABLE IF NOT EXISTS crcITEM (
   barcode varchar(30) NOT NULL default '',
   id_bibrec int(15) unsigned NOT NULL default '0',
   id_crcLIBRARY int(15) unsigned NOT NULL default '0',
   collection varchar(60) default NULL,
   location varchar(60) default NULL,
   description varchar(60) default NULL,
   loan_period varchar(30) NOT NULL default '',
   status varchar(20) NOT NULL default '',
   creation_date datetime NOT NULL default '0000-00-00 00:00:00',
   modification_date datetime NOT NULL default '0000-00-00 00:00:00',
   number_of_requests int(3) unsigned NOT NULL default '0',
   PRIMARY KEY  (barcode),
   KEY id_bibrec (id_bibrec),
   KEY id_crclibrary (id_crcLIBRARY)
 ) ENGINE=MyISAM;
 
 CREATE TABLE IF NOT EXISTS crcLIBRARY (
   id int(15) unsigned NOT NULL auto_increment,
   name varchar(80) NOT NULL default '',
   address varchar(255) NOT NULL default '',
   email varchar(255) NOT NULL default '',
   phone varchar(30) NOT NULL default '',
   type varchar(30) default NULL,
   notes text,
   PRIMARY KEY  (id)
 ) ENGINE=MyISAM;
 
 CREATE TABLE IF NOT EXISTS crcLOAN (
   id int(15) unsigned NOT NULL auto_increment,
   id_crcBORROWER int(15) unsigned NOT NULL default '0',
   id_bibrec int(15) unsigned NOT NULL default '0',
   barcode varchar(30) NOT NULL default '',
   loaned_on datetime NOT NULL default '0000-00-00 00:00:00',
   returned_on date NOT NULL default '0000-00-00',
   due_date datetime NOT NULL default '0000-00-00 00:00:00',
   number_of_renewals int(3) unsigned NOT NULL default '0',
   overdue_letter_number int(3) unsigned NOT NULL default '0',
   overdue_letter_date datetime NOT NULL default '0000-00-00 00:00:00',
   status varchar(20) NOT NULL default '',
   type varchar(20) NOT NULL default '',
   notes text,
   PRIMARY KEY  (id),
   KEY id_crcborrower (id_crcBORROWER),
   KEY id_bibrec (id_bibrec),
   KEY barcode (barcode)
 ) ENGINE=MyISAM;
 
 CREATE TABLE IF NOT EXISTS crcLOANREQUEST (
   id int(15) unsigned NOT NULL auto_increment,
   id_crcBORROWER int(15) unsigned NOT NULL default '0',
   id_bibrec int(15) unsigned NOT NULL default '0',
   barcode varchar(30) NOT NULL default '',
   period_of_interest_from datetime NOT NULL default '0000-00-00 00:00:00',
   period_of_interest_to datetime NOT NULL default '0000-00-00 00:00:00',
   status varchar(20) NOT NULL default '',
   notes text,
   request_date datetime NOT NULL default '0000-00-00 00:00:00',
   PRIMARY KEY  (id),
   KEY id_crcborrower (id_crcBORROWER),
   KEY id_bibrec (id_bibrec),
   KEY barcode (barcode)
 ) ENGINE=MyISAM;
 
 CREATE TABLE IF NOT EXISTS crcPURCHASE (
   id int(15) unsigned NOT NULL auto_increment,
   id_bibrec int(15) unsigned NOT NULL default '0',
   id_crcVENDOR int(15) unsigned NOT NULL default '0',
   ordered_date datetime NOT NULL default '0000-00-00 00:00:00',
   expected_date datetime NOT NULL default '0000-00-00 00:00:00',
   price varchar(20) NOT NULL default '0',
   status varchar(20) NOT NULL default '',
   notes text,
   PRIMARY KEY  (id),
   KEY id_bibrec (id_bibrec),
   KEY id_crcVENDOR (id_crcVENDOR)
 ) ENGINE=MyISAM;
 
 CREATE TABLE IF NOT EXISTS crcVENDOR (
   id int(15) unsigned NOT NULL auto_increment,
   name varchar(80) NOT NULL default '',
   address varchar(255) NOT NULL default '',
   email varchar(255) NOT NULL default '',
   phone varchar(30) NOT NULL default '',
   notes text,
   PRIMARY KEY  (id)
 ) ENGINE=MyISAM;
 
 -- BibExport tables:
 
 CREATE TABLE IF NOT EXISTS expJOB (
   id int(15) unsigned NOT NULL auto_increment,
   jobname varchar(50) NOT NULL default '',
   jobfreq mediumint(12) NOT NULL default '0',
   output_format mediumint(12) NOT NULL default '0',
   deleted mediumint(12) NOT NULL default '0',
   lastrun datetime NOT NULL default '0000-00-00 00:00:00',
   output_directory text,
   PRIMARY KEY  (id),
   UNIQUE KEY jobname (jobname)
 ) ENGINE=MyISAM;
 
 CREATE TABLE IF NOT EXISTS expQUERY (
   id int(15) unsigned NOT NULL auto_increment,
   name varchar(255) NOT NULL,
   search_criteria text NOT NULL,
   output_fields text NOT NULL,
   notes text,
   deleted mediumint(12) NOT NULL default '0',
   PRIMARY KEY  (id)
 ) ENGINE=MyISAM;
 
 CREATE TABLE IF NOT EXISTS expJOB_expQUERY (
   id_expJOB int(15) NOT NULL,
   id_expQUERY int(15) NOT NULL,
   PRIMARY KEY  (id_expJOB,id_expQUERY),
   KEY id_expJOB (id_expJOB),
   KEY id_expQUERY (id_expQUERY)
 ) ENGINE=MyISAM;
 
 CREATE TABLE IF NOT EXISTS expQUERYRESULT (
   id int(15) unsigned NOT NULL auto_increment,
   id_expQUERY int(15) NOT NULL,
   result text NOT NULL,
   status mediumint(12) NOT NULL default '0',
   status_message text NOT NULL,
   PRIMARY KEY  (id)
 ) ENGINE=MyISAM;
 
 CREATE TABLE IF NOT EXISTS expJOBRESULT (
   id int(15) unsigned NOT NULL auto_increment,
   id_expJOB int(15) NOT NULL,
   execution_time datetime NOT NULL default '0000-00-00 00:00:00',
   status mediumint(12) NOT NULL default '0',
   status_message text NOT NULL,
   PRIMARY KEY  (id)
 ) ENGINE=MyISAM;
 
 CREATE TABLE IF NOT EXISTS expJOBRESULT_expQUERYRESULT (
   id_expJOBRESULT int(15) NOT NULL,
   id_expQUERYRESULT int(15) NOT NULL,
   PRIMARY KEY  (id_expJOBRESULT, id_expQUERYRESULT),
   KEY id_expJOBRESULT (id_expJOBRESULT),
   KEY id_expQUERYRESULT (id_expQUERYRESULT)
 ) ENGINE=MyISAM;
 
 CREATE TABLE IF NOT EXISTS user_expJOB (
   id_user int(15) NOT NULL,
   id_expJOB int(15) NOT NULL,
   PRIMARY KEY  (id_user, id_expJOB),
   KEY id_user (id_user),
   KEY id_expJOB (id_expJOB)
 ) ENGINE=MyISAM;
 
 CREATE TABLE IF NOT EXISTS swrREMOTESERVER (
   id int(15) unsigned NOT NULL auto_increment,
   name varchar(50) unique NOT NULL,
   host varchar(50) NOT NULL,
   username varchar(50) NOT NULL,
   password varchar(50) NOT NULL,
   email varchar(50) NOT NULL,
   realm varchar(50) NOT NULL,
   url_base_record varchar(50) NOT NULL,
   url_servicedocument varchar(80) NOT NULL,
   xml_servicedocument longblob,
   last_update int(15) unsigned NOT NULL,
   PRIMARY KEY (id)
 ) ENGINE=MyISAM;
 
 CREATE TABLE IF NOT EXISTS swrCLIENTDATA (
   id int(15) unsigned NOT NULL auto_increment,
   id_swrREMOTESERVER int(15) NOT NULL,
   id_record int(15) NOT NULL,
   report_no varchar(50) NOT NULL,
   id_remote varchar(50) NOT NULL,
   id_user int(15) NOT NULL,
   user_name varchar(100) NOT NULL,
   user_email varchar(100) NOT NULL,
   xml_media_deposit longblob NOT NULL,
   xml_metadata_submit longblob NOT NULL,
   submission_date datetime NOT NULL default '0000-00-00 00:00:00',
   publication_date datetime NOT NULL default '0000-00-00 00:00:00',
   removal_date datetime NOT NULL default '0000-00-00 00:00:00',
   link_medias varchar(150) NOT NULL,
   link_metadata varchar(150) NOT NULL,
   link_status varchar(150) NOT NULL,
   status varchar(150) NOT NULL default 'submitted',
   last_update datetime NOT NULL,
   PRIMARY KEY (id)
 ) ENGINE=MyISAM;
 
 -- tables for exception management
 
 -- This table is used to log exceptions
 -- to discover the full details of an exception either check the email
 -- that are sent to CFG_SITE_ADMIN_EMAIL or look into invenio.err
 CREATE TABLE IF NOT EXISTS hstEXCEPTION (
   id int(15) unsigned NOT NULL auto_increment,
   name varchar(50) NOT NULL, -- name of the exception
   filename varchar(255) NULL, -- file where the exception was raised
   line int(9) NULL, -- line at which the exception was raised
   last_seen datetime NOT NULL default '0000-00-00 00:00:00', -- last time this exception has been seen
   last_notified datetime NOT NULL default '0000-00-00 00:00:00', -- last time this exception has been notified
   counter int(15) NOT NULL default 0, -- internal counter to decide when to notify this exception
   total int(15) NOT NULL default 0, -- total number of times this exception has been seen
   PRIMARY KEY (id),
   KEY (last_seen),
   KEY (last_notified),
   KEY (total),
   UNIQUE KEY (name(50), filename(255), line)
 ) ENGINE=MyISAM;
 
 -- tables for BibAuthorID module:
 
 CREATE TABLE IF NOT EXISTS `aidPERSONID` (
   `id` bigint(15) NOT NULL AUTO_INCREMENT,
   `personid` bigint(15) NOT NULL,
   `tag` varchar(50) NOT NULL,
   `data` varchar(250) NOT NULL,
   `flag` int NOT NULL DEFAULT '0',
   `lcul` int NOT NULL DEFAULT '0',
   PRIMARY KEY (`id`),
   INDEX `personid-b` (`personid`),
   INDEX `tag-b` (`tag`),
   INDEX `data-b` (`data`),
   INDEX `flag-b` (`flag`),
   INDEX `tdf-b` (`tag`,`data`,`flag`),
   INDEX `ptf-b` (`personid`,`tag`,`flag`)
 ) ENGINE=MyISAM;
 
 CREATE TABLE IF NOT EXISTS `aidUSERINPUTLOG` (
   `id` bigint(15) NOT NULL AUTO_INCREMENT,
   `transactionid` bigint(15) NOT NULL,
   `timestamp` datetime NOT NULL,
   `userinfo` varchar(255) NOT NULL,
   `personid` bigint(15) NOT NULL,
   `action` varchar(50) NOT NULL,
   `tag` varchar(50) NOT NULL,
   `value` varchar(200) NOT NULL,
   `comment` text,
   PRIMARY KEY (`id`),
   INDEX `transactionid-b` (`transactionid`),
   INDEX `timestamp-b` (`timestamp`),
   INDEX `userinfo-b` (`userinfo`),
   INDEX `personid-b` (`personid`),
   INDEX `action-b` (`action`),
   INDEX `tag-b` (`tag`),
   INDEX `value-b` (`value`)
 ) ENGINE=MyISAM;
 
 CREATE TABLE IF NOT EXISTS `aidAUTHORNAMES` (
   `id` bigint(15) NOT NULL auto_increment,
   `Name` varchar(255) NOT NULL,
   `bibrefs` varchar(200) NOT NULL,
   `db_name` varchar(255),
   PRIMARY KEY  (`id`),
   INDEX `Name-b` (`Name`),
   INDEX `db_Name-b` (`db_name`),
   INDEX `bibrefs-b` (`bibrefs`)
 ) ENGINE=MyISAM;
 
 CREATE TABLE IF NOT EXISTS `aidAUTHORNAMESBIBREFS` (
   `id` bigint(15) NOT NULL auto_increment,
   `Name_id` bigint(15) NOT NULL,
   `bibref` varchar(200) NOT NULL,
   PRIMARY KEY  (`id`),
   INDEX `Name_id-b` (`Name_id`),
   INDEX `bibref-b` (`bibref`)
 ) ENGINE=MyISAM;
 
 CREATE TABLE IF NOT EXISTS `aidDOCLIST` (
   `id` bigint(15) NOT NULL auto_increment,
   `bibrecID` bigint(15) NOT NULL,
   `processed_author` bigint(15) default NULL,
   PRIMARY KEY  (`id`),
   INDEX `bibrecID-b` (`bibrecID`),
   INDEX `processed_author-b` (`processed_author`)
 ) ENGINE=MyISAM;
 
 CREATE TABLE IF NOT EXISTS `aidREALAUTHORS` (
   `id` bigint(15) NOT NULL auto_increment,
   `realauthorID` bigint(15) NOT NULL,
   `virtualauthorID` bigint(15) NOT NULL,
   `p` float NOT NULL,
   PRIMARY KEY  (`id`),
   INDEX `realauthorID-b` (`realauthorID`),
   INDEX `virtualauthorID-b` (`virtualauthorID`)
 ) ENGINE=MyISAM;
 
 CREATE TABLE IF NOT EXISTS `aidREALAUTHORDATA` (
   `id` bigint(15) NOT NULL auto_increment,
   `realauthorID` bigint(15) NOT NULL,
   `tag` varchar(50) NOT NULL,
   `value` varchar(255) NOT NULL,
   `va_count` int(8) NOT NULL default '0',
   `va_names_p` double NOT NULL default '0' COMMENT 'Summed VA-Names probability',
   `va_p` double NOT NULL default '0' COMMENT 'Summed VA probabilities',
   PRIMARY KEY  (`id`),
   INDEX `realauthorID-b` (`realauthorID`,`tag`),
   INDEX `value-b` (`value`),
   INDEX `tag-b` (`tag`)
 ) ENGINE=MyISAM;
 
 CREATE TABLE IF NOT EXISTS `aidVIRTUALAUTHORS` (
   `id` bigint(15) NOT NULL auto_increment,
   `virtualauthorID` bigint(15) NOT NULL,
   `authornamesID` bigint(15) NOT NULL,
   `p` float NOT NULL,
   `clusterID` bigint(15) NOT NULL default '0',
   PRIMARY KEY  (`id`),
   INDEX `authornamesID-b` (`authornamesID`),
   INDEX `clusterID-b` (`clusterID`),
   INDEX `virtualauthorID-b` (`virtualauthorID`)
 ) ENGINE=MyISAM;
 
 CREATE TABLE IF NOT EXISTS `aidVIRTUALAUTHORSDATA` (
   `id` bigint(15) NOT NULL auto_increment,
   `virtualauthorID` bigint(15) NOT NULL,
   `tag` varchar(255) NOT NULL,
   `value` varchar(255) NOT NULL,
   PRIMARY KEY  (`id`),
   INDEX `virtualauthorID-b` (`virtualauthorID`),
   INDEX `tag-b` (`tag`),
   INDEX `value-b` (`value`)
 ) ENGINE=MyISAM;
 
 CREATE TABLE IF NOT EXISTS `aidVIRTUALAUTHORSCLUSTERS` (
   `id` int(15) NOT NULL auto_increment,
   `cluster_name` varchar(60) NOT NULL,
   PRIMARY KEY  (`id`)
 ) ENGINE=MyISAM;
 
 CREATE TABLE IF NOT EXISTS `aidCACHE` (
   `id` int(15) NOT NULL auto_increment,
   `object_name` varchar(120) NOT NULL,
   `object_key` varchar(120) NOT NULL,
   `object_value` text,
   `last_updated` datetime NOT NULL,
   PRIMARY KEY  (`id`),
   INDEX `name-b` (`object_name`),
   INDEX `key-b` (`object_key`),
   INDEX `last_updated-b` (`last_updated`)
 ) ENGINE=MyISAM;
 
 -- refextract tables:
 
 CREATE TABLE IF NOT EXISTS `xtrJOB` (
   `id` tinyint(4) NOT NULL AUTO_INCREMENT,
   `name` varchar(30) NOT NULL,
   `last_updated` datetime NOT NULL,
   PRIMARY KEY (`id`)
 ) ENGINE=MyISAM;
 
 -- end of file
diff --git a/modules/websearch/doc/hacking/search-engine-api.webdoc b/modules/websearch/doc/hacking/search-engine-api.webdoc
index a44c7217c..49153d237 100644
--- a/modules/websearch/doc/hacking/search-engine-api.webdoc
+++ b/modules/websearch/doc/hacking/search-engine-api.webdoc
@@ -1,367 +1,367 @@
 
 ## -*- mode: html; coding: utf-8; -*-
 
 ## This file is part of Invenio.
 ## Copyright (C) 2007, 2008, 2009, 2010, 2011 CERN.
 ##
 ## Invenio is free software; you can redistribute it and/or
 ## modify it under the terms of the GNU General Public License as
 ## published by the Free Software Foundation; either version 2 of the
 ## License, or (at your option) any later version.
 ##
 ## Invenio is distributed in the hope that it will be useful, but
 ## WITHOUT ANY WARRANTY; without even the implied warranty of
 ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 ## General Public License for more details.
 ##
 ## You should have received a copy of the GNU General Public License
 ## along with Invenio; if not, write to the Free Software Foundation, Inc.,
 ## 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA.
 
 <!-- WebDoc-Page-Title: Search Engine API -->
 <!-- WebDoc-Page-Navtrail: <a class="navtrail" href="<CFG_SITE_URL>/help/hacking">Hacking Invenio</a> &gt; <a class="navtrail" href="search-engine-internals">WebSearch Internals</a> -->
 <!-- WebDoc-Page-Revision: $Id$ -->
 
 <protect>
 <pre>
 Invenio Search Engine can be called from within your Python programs
 via both a high-level and low-level API interface.
 
 1. High-level API
 
    Description:
 
       The high-level access to the search engine is provided by
       exactly the same function as called from the web interface when
       users submit their queries.  This should guarantee exactly the
       same behaviour, and means that you can pass to the high-level
       API all the arguments as you see them in the URL.
 
       There are two things to note: (i) the function does not check
       for eventual restricted status of the collection, so the
       restricted collections will be searched without asking for a
       password; (ii) the output format argument (``of'') should be set
       to ``id'' (which is the default value) meaning to return list of
       recIDs.  The function returns the list of recIDs in this case.
 
    Signature:
 
        def perform_request_search(req=None, cc=CFG_SITE_NAME, c=None, p="", f="", rg=10, sf="", so="d", sp="", rm="", of="id", ot="", aas=0,
                                   p1="", f1="", m1="", op1="", p2="", f2="", m2="", op2="", p3="", f3="", m3="", sc=0, jrec=0,
                                   recid=-1, recidb=-1, sysno="", id=-1, idb=-1, sysnb="", action="",
                                   d1y=0, d1m=0, d1d=0, d2y=0, d2m=0, d2d=0, dt="", verbose=0, ap=0, ln=CFG_SITE_LANG, ec=None):
           """Perform search or browse request, without checking for
              authentication.  Return list of recIDs found, if of=id.
              Otherwise create web page.
 
              The arguments are as follows:
 
                req - mod_python Request class instance.
 
                 cc - current collection (e.g. "ATLAS").  The collection the
                      user started to search/browse from.
 
                  c - collection list (e.g. ["Theses", "Books"]).  The
                      collections user may have selected/deselected when
                      starting to search from 'cc'.
 
                 ec - external collection list (e.g. ['CiteSeer', 'Google']). The
                      external collections may have been selected/deselected by the
                      user.
 
                  p - pattern to search for (e.g. "ellis and muon or kaon").
 
                  f - field to search within (e.g. "author").
 
                 rg - records in groups of (e.g. "10").  Defines how many hits
                      per collection in the search results page are
                      displayed.
 
                 sf - sort field (e.g. "title").
 
                 so - sort order ("a"=ascending, "d"=descending).
 
                 sp - sort pattern (e.g. "CERN-") -- in case there are more
                      values in a sort field, this argument tells which one
                      to prefer
 
                 rm - ranking method (e.g. "jif").  Defines whether results
                      should be ranked by some known ranking method.
 
                 of - output format (e.g. "hb").  Usually starting "h" means
                      HTML output (and "hb" for HTML brief, "hd" for HTML
                      detailed), "x" means XML output, "t" means plain text
                      output, "id" means no output at all but to return list
                      of recIDs found.  (Suitable for high-level API.)
 
                 ot - output only these MARC tags (e.g. "100,700,909C0b").
                      Useful if only some fields are to be shown in the
                      output, e.g. for library to control some fields.
 
                aas - advanced search ("0" means no, "1" means yes).  Whether
                      search was called from within the advanced search
                      interface.
 
                 p1 - first pattern to search for in the advanced search
                      interface.  Much like 'p'.
 
                 f1 - first field to search within in the advanced search
                      interface.  Much like 'f'.
 
                 m1 - first matching type in the advanced search interface.
                      ("a" all of the words, "o" any of the words, "e" exact
                      phrase, "p" partial phrase, "r" regular expression).
 
                op1 - first operator, to join the first and the second unit
                      in the advanced search interface.  ("a" add, "o" or,
                      "n" not).
 
                 p2 - second pattern to search for in the advanced search
                      interface.  Much like 'p'.
 
                 f2 - second field to search within in the advanced search
                      interface.  Much like 'f'.
 
                 m2 - second matching type in the advanced search interface.
                      ("a" all of the words, "o" any of the words, "e" exact
                      phrase, "p" partial phrase, "r" regular expression).
 
                op2 - second operator, to join the second and the third unit
                      in the advanced search interface.  ("a" add, "o" or,
                      "n" not).
 
                 p3 - third pattern to search for in the advanced search
                      interface.  Much like 'p'.
 
                 f3 - third field to search within in the advanced search
                      interface.  Much like 'f'.
 
                 m3 - third matching type in the advanced search interface.
                      ("a" all of the words, "o" any of the words, "e" exact
                      phrase, "p" partial phrase, "r" regular expression).
 
                 sc - split by collection ("0" no, "1" yes).  Governs whether
                      we want to present the results in a single huge list,
                      or splitted by collection.
 
               jrec - jump to record (e.g. "234").  Used for navigation
                      inside the search results.
 
              recid - display record ID (e.g. "20000").  Do not
                      search/browse but go straight away to the Detailed
                      record page for the given recID.
 
             recidb - display record ID bis (e.g. "20010").  If greater than
                      'recid', then display records from recid to recidb.
                      Useful for example for dumping records from the
                      database for reformatting.
 
              sysno - display old system SYS number (e.g. "").  If you
                      migrate to Invenio from another system, and store your
                      old SYS call numbers, you can use them instead of recid
                      if you wish so.
 
                 id - the same as recid, in case recid is not set.  For
                      backwards compatibility.
 
                idb - the same as recid, in case recidb is not set.  For
                      backwards compatibility.
 
              sysnb - the same as sysno, in case sysno is not set.  For
                      backwards compatibility.
 
             action - action to do.  "SEARCH" for searching, "Browse" for
                      browsing.  Default is to search.
 
                 d1 - first datetime in full YYYY-mm-dd HH:MM:DD format
                      (e.g. "1998-08-23 12:34:56"). Useful for search limits
                      on creation/modification date (see 'dt' argument
                      below).  Note that 'd1' takes precedence over d1y, d1m,
                      d1d if these are defined.
 
                d1y - first date's year (e.g. "1998").  Useful for search
                      limits on creation/modification date.
 
                d1m - first date's month (e.g. "08").  Useful for search
                      limits on creation/modification date.
 
                d1d - first date's day (e.g. "23").  Useful for search
                      limits on creation/modification date.
 
                 d2 - second datetime in full YYYY-mm-dd HH:MM:DD format
                      (e.g. "1998-09-02 12:34:56"). Useful for search limits
                      on creation/modification date (see 'dt' argument
                      below).  Note that 'd2' takes precedence over d2y, d2m,
                      d2d if these are defined.
 
                d2y - second date's year (e.g. "1998").  Useful for search
                      limits on creation/modification date.
 
                d2m - second date's month (e.g. "09").  Useful for search
                      limits on creation/modification date.
 
                d2d - second date's day (e.g. "02").  Useful for search
                      limits on creation/modification date.
 
                 dt - first and second date's type (e.g. "c").  Specifies
                      whether to search in creation dates ("c") or in
                      modification dates ("m").  When dt is not set and d1*
                      and d2* are set, the default is "c".
 
            verbose - verbose level (0=min, 9=max).  Useful to print some
                      internal information on the searching process in case
                      something goes wrong.
 
                 ap - alternative patterns (0=no, 1=yes).  In case no exact
                      match is found, the search engine can try alternative
                      patterns e.g. to replace non-alphanumeric characters by
                      a boolean query.  ap defines if this is wanted.
 
                 ln - language of the search interface (e.g. "en").  Useful
                      for internationalization.
 
                 ec - list of external search engines to search as well
                      (e.g. "SPIRES HEP").
           """
 
    Examples: (retrieving record IDs)
 
       >>> # import the function:
       >>> from invenio.search_engine import perform_request_search
       >>> # get all hits in a collection:
       >>> perform_request_search(cc="ATLAS Communications")
       >>> # search for the word `of' in Theses and Books:
       >>> perform_request_search(p="of", c=["Theses","Books"])
       >>> # search for `muon or kaon' within title:
       >>> perform_request_search(p="muon or kaon", f="title")
       >>> # phrase search (not the quotes):
       >>> perform_request_search(p='"Ellis, J"', f="author")
       >>> # regexp search for a system number
       >>> perform_request_search(p1="^CERN.*2003-001$", f1="reportnumber", m1="r")
       >>> # moi inside Standards gives no hits...
       >>> perform_request_search(p="moi", cc="Standards")
       >>> # but it does if we use alternative patterns:
       >>> perform_request_search(p="moi", cc="Standards", ap=1)
 
    Example: (retrieving MARCXML)
 
       >>> import cStringIO
       >>> tmp = cStringIO.StringIO()
       >>> perform_request_search(req=tmp, p='ellis', of='xm')
       >>> out = tmp.getvalue()
       >>> tmp.close()
       >>> # `out' now contains MARCXML of 12 records found
 
    Example: (retrieving Text MARC, certain tags only)
 
       >>> import cStringIO
       >>> tmp = cStringIO.StringIO()
       >>> perform_request_search(req=tmp, p='higgs', of='tm', ot=['100', '700'])
       >>> out = tmp.getvalue()
       >>> tmp.close()
       >>> print out
       000000085 100__ $$aGirardello, L$$uINFN$$uUniversita di Milano-Bicocca
       000000085 700__ $$aPorrati, Massimo
       000000085 700__ $$aZaffaroni, A
       000000001 100__ $$aPhotolab
 
 2. Mid-level API
 
    Description:
 
       The mid-level API is provided by a search_pattern() function
       that only searches for the given pattern in the given field
       according to the given matching pattern.  This function does not
       know anything about collection.  The function does not wash its
       arguments, it expects them to be `clean' already.  The pattern
       is split into `basic search units' for which a boolean query is
-      launched.  The function returns an instance of the HitSet class.
+      launched.  The function returns an instance of the intbitset class.
       Note that if you want to obtain the list of recIDs (as with the
       high-level API), you can invoke the ``tolist()'' method on a
       hitset.
 
    Signature:
 
       def search_pattern(req=None, p=None, f=None, m=None, ap=0, of="id", verbose=0):
           """Search for complex pattern 'p' within field 'f' according to
              matching type 'm'.  Return hitset of recIDs.
 
              The function uses multi-stage searching algorithm in case of no
              exact match found.  See the Search Internals document for
              detailed description.
 
              The 'ap' argument governs whether an alternative patterns are to
              be used in case there is no direct hit for (p,f,m).  For
              example, whether to replace non-alphanumeric characters by
              spaces if it would give some hits.  See the Search Internals
              document for detailed description.  (ap=0 forbits the
              alternative pattern usage, ap=1 permits it.)
 
              The 'of' argument governs whether to print or not some
              information to the user in case of no match found.  (Usually it
              prints the information in case of HTML formats, otherwise it's
              silent).
 
              The 'verbose' argument controls the level of debugging information
              to be printed (0=least, 9=most).
 
              All the parameters are assumed to have been previously washed.
 
              This function is suitable as a mid-level API.
           """
 
    Examples:
 
       >>> # import the function:
       >>> from invenio.search_engine import search_pattern
       >>> # search for muon or kaon in any field:
       >>> search_pattern(p="muon or kaon").tolist()
       >>> # the following finds nothing by default...
       >>> search_pattern(p="cern-moi").tolist()
       >>> # ...but it does find something if we allow alternative patterns:
       >>> search_pattern(p="cern-moi", ap=1).tolist()
       >>> # wildcard search for a report number:
       >>> search_pattern(p="CERN-LHC-PROJECT-REPORT-40*", f="reportnumber").tolist()
       >>> # regexp search for a report number with possible trailing subjects:
       >>> search_pattern(p="^CERN-LHC-PROJECT-REPORT-40(-|$)", f="reportnumber", m="r").tolist()
 
 3. Low-level API
 
    Description:
 
       The low-level API is provided by search_unit() function that
       assumes its arguments to be already the basic search units.
       Therefore it does not know anything about boolean queries, etc.
-      The function returns an instance of the HitSet class.  Note that
+      The function returns an instance of the intbitset class.  Note that
       if you want to obtain the list of recIDs (as with the high-level
       API), you can invoke the ``tolist()'' method on a hitset.
 
    Signature:
 
       def search_unit(p, f=None, m=None):
           """Search for basic search unit defined by pattern 'p' and field
              'f' and matching type 'm'.  Return hitset of recIDs.
 
              All the parameters are assumed to have been previously washed.
              'p' is assumed to be already a ``basic search unit'' so that it
              is searched as such and is not broken up in any way.  Only
              wildcard and span queries are being detected inside 'p'.
 
              This function is suitable as a low-level API.
           """
 
    Examples:
 
       >>> # import the function:
       >>> from invenio.search_engine import search_unit
       >>> # search moi in any field:
       >>> search_unit(p="moi").tolist()
       >>> # this one will not match:
       >>> search_unit(p="muon or kaon").tolist()
       >>> # regexp search for a report number with possible trailing subjects:
       >>> search_unit(p="^CERN-PS-99-037(-|$)", f="reportnumber", m="r").tolist()
 
 More entry points may be created, but I think this threesome kind of
 access to the search engine should cover all your needs.
 </pre>
 </protect>
diff --git a/modules/websearch/lib/search_engine.py b/modules/websearch/lib/search_engine.py
index a34d905c6..b9e2bf740 100644
--- a/modules/websearch/lib/search_engine.py
+++ b/modules/websearch/lib/search_engine.py
@@ -1,5492 +1,5502 @@
 # -*- coding: utf-8 -*-
 
 ## This file is part of Invenio.
 ## Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011 CERN.
 ##
 ## Invenio is free software; you can redistribute it and/or
 ## modify it under the terms of the GNU General Public License as
 ## published by the Free Software Foundation; either version 2 of the
 ## License, or (at your option) any later version.
 ##
 ## Invenio is distributed in the hope that it will be useful, but
 ## WITHOUT ANY WARRANTY; without even the implied warranty of
 ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 ## General Public License for more details.
 ##
 ## You should have received a copy of the GNU General Public License
 ## along with Invenio; if not, write to the Free Software Foundation, Inc.,
 ## 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA.
 
 # pylint: disable=C0301
 
 """Invenio Search Engine in mod_python."""
 
 __lastupdated__ = """$Date$"""
 
 __revision__ = "$Id$"
 
 ## import general modules:
 import cgi
 import cStringIO
 import copy
 import string
 import os
 import re
 import time
 import urllib
 import urlparse
 import zlib
 import sys
 
 if sys.hexversion < 0x2040000:
     # pylint: disable=W0622
     from sets import Set as set
     # pylint: enable=W0622
 
 ## import Invenio stuff:
 from invenio.config import \
      CFG_CERN_SITE, \
      CFG_INSPIRE_SITE, \
      CFG_OAI_ID_FIELD, \
      CFG_WEBCOMMENT_ALLOW_REVIEWS, \
      CFG_WEBSEARCH_CALL_BIBFORMAT, \
      CFG_WEBSEARCH_CREATE_SIMILARLY_NAMED_AUTHORS_LINK_BOX, \
      CFG_WEBSEARCH_FIELDS_CONVERT, \
      CFG_WEBSEARCH_NB_RECORDS_TO_SORT, \
      CFG_WEBSEARCH_SEARCH_CACHE_SIZE, \
      CFG_WEBSEARCH_USE_MATHJAX_FOR_FORMATS, \
      CFG_WEBSEARCH_USE_ALEPH_SYSNOS, \
      CFG_WEBSEARCH_DEF_RECORDS_IN_GROUPS, \
      CFG_WEBSEARCH_FULLTEXT_SNIPPETS, \
      CFG_BIBUPLOAD_SERIALIZE_RECORD_STRUCTURE, \
      CFG_BIBUPLOAD_EXTERNAL_SYSNO_TAG, \
      CFG_BIBRANK_SHOW_DOWNLOAD_GRAPHS, \
      CFG_WEBSEARCH_WILDCARD_LIMIT, \
      CFG_WEBSEARCH_SYNONYM_KBRS, \
      CFG_SITE_LANG, \
      CFG_SITE_NAME, \
      CFG_LOGDIR, \
      CFG_BIBFORMAT_HIDDEN_TAGS, \
      CFG_SITE_URL, \
      CFG_ACCESS_CONTROL_LEVEL_ACCOUNTS, \
      CFG_SOLR_URL, \
      CFG_SITE_RECORD, \
      CFG_WEBSEARCH_PREV_NEXT_HIT_LIMIT, \
      CFG_WEBSEARCH_VIEWRESTRCOLL_POLICY
 
 from invenio.search_engine_config import InvenioWebSearchUnknownCollectionError, InvenioWebSearchWildcardLimitError
 from invenio.search_engine_utils import get_fieldvalues
 from invenio.bibrecord import create_record
 from invenio.bibrank_record_sorter import get_bibrank_methods, rank_records, is_method_valid
 from invenio.bibrank_downloads_similarity import register_page_view_event, calculate_reading_similarity_list
 from invenio.bibindex_engine_stemmer import stem
 from invenio.bibindex_engine_tokenizer import wash_author_name, author_name_requires_phrase_search
 from invenio.bibformat import format_record, format_records, get_output_format_content_type, create_excel
 from invenio.bibformat_config import CFG_BIBFORMAT_USE_OLD_BIBFORMAT
 from invenio.bibrank_downloads_grapher import create_download_history_graph_and_box
 from invenio.bibknowledge import get_kbr_values
 from invenio.data_cacher import DataCacher
 from invenio.websearch_external_collections import print_external_results_overview, perform_external_collection_search
 from invenio.access_control_admin import acc_get_action_id
 from invenio.access_control_config import VIEWRESTRCOLL, \
     CFG_ACC_GRANT_AUTHOR_RIGHTS_TO_EMAILS_IN_TAGS
 from invenio.websearchadminlib import get_detailed_page_tabs, get_detailed_page_tabs_counts
-from invenio.intbitset import intbitset as HitSet
+from invenio.intbitset import intbitset
 from invenio.dbquery import DatabaseError, deserialize_via_marshal, InvenioDbQueryWildcardLimitError
 from invenio.access_control_engine import acc_authorize_action
 from invenio.errorlib import register_exception
 from invenio.textutils import encode_for_xml, wash_for_utf8
 from invenio.htmlutils import get_mathjax_header
 from invenio.htmlutils import nmtoken_from_string
 
 import invenio.template
 webstyle_templates = invenio.template.load('webstyle')
 webcomment_templates = invenio.template.load('webcomment')
 
 from invenio.bibrank_citation_searcher import calculate_cited_by_list, \
     calculate_co_cited_with_list, get_records_with_num_cites, get_self_cited_by, \
     get_refersto_hitset, get_citedby_hitset
 from invenio.bibrank_citation_grapher import create_citation_history_graph_and_box
 
 
 from invenio.dbquery import run_sql, run_sql_with_limit, \
                             get_table_update_time, Error
 from invenio.webuser import getUid, collect_user_info, session_param_set
 from invenio.webpage import pageheaderonly, pagefooteronly, create_error_box
 from invenio.messages import gettext_set_language
 from invenio.search_engine_query_parser import SearchQueryParenthesisedParser, \
     SpiresToInvenioSyntaxConverter
 
 from invenio import webinterface_handler_config as apache
 from invenio.solrutils import solr_get_bitset
 
 
 try:
     import invenio.template
     websearch_templates = invenio.template.load('websearch')
 except:
     pass
 
 from invenio.websearch_external_collections import calculate_hosted_collections_results, do_calculate_hosted_collections_results
 from invenio.websearch_external_collections_config import CFG_HOSTED_COLLECTION_TIMEOUT_ANTE_SEARCH
 from invenio.websearch_external_collections_config import CFG_HOSTED_COLLECTION_TIMEOUT_POST_SEARCH
 from invenio.websearch_external_collections_config import CFG_EXTERNAL_COLLECTION_MAXRESULTS
 
 VIEWRESTRCOLL_ID = acc_get_action_id(VIEWRESTRCOLL)
 
 ## global vars:
 cfg_nb_browse_seen_records = 100 # limit of the number of records to check when browsing certain collection
 cfg_nicely_ordered_collection_list = 0 # do we propose collection list nicely ordered or alphabetical?
 
 ## precompile some often-used regexp for speed reasons:
 re_word = re.compile('[\s]')
 re_quotes = re.compile('[\'\"]')
 re_doublequote = re.compile('\"')
 re_equal = re.compile('\=')
 re_logical_and = re.compile('\sand\s', re.I)
 re_logical_or = re.compile('\sor\s', re.I)
 re_logical_not = re.compile('\snot\s', re.I)
 re_operators = re.compile(r'\s([\+\-\|])\s')
 re_pattern_wildcards_after_spaces = re.compile(r'(\s)[\*\%]+')
 re_pattern_single_quotes = re.compile("'(.*?)'")
 re_pattern_double_quotes = re.compile("\"(.*?)\"")
 re_pattern_regexp_quotes = re.compile("\/(.*?)\/")
 re_pattern_spaces_after_colon = re.compile(r'(:\s+)')
 re_pattern_short_words = re.compile(r'([\s\"]\w{1,3})[\*\%]+')
 re_pattern_space = re.compile("__SPACE__")
 re_pattern_today = re.compile("\$TODAY\$")
 re_pattern_parens = re.compile(r'\([^\)]+\s+[^\)]+\)')
 re_unicode_lowercase_a = re.compile(unicode(r"(?u)[áàäâãå]", "utf-8"))
 re_unicode_lowercase_ae = re.compile(unicode(r"(?u)[æ]", "utf-8"))
 re_unicode_lowercase_e = re.compile(unicode(r"(?u)[éèëê]", "utf-8"))
 re_unicode_lowercase_i = re.compile(unicode(r"(?u)[íìïî]", "utf-8"))
 re_unicode_lowercase_o = re.compile(unicode(r"(?u)[óòöôõø]", "utf-8"))
 re_unicode_lowercase_u = re.compile(unicode(r"(?u)[úùüû]", "utf-8"))
 re_unicode_lowercase_y = re.compile(unicode(r"(?u)[ýÿ]", "utf-8"))
 re_unicode_lowercase_c = re.compile(unicode(r"(?u)[çć]", "utf-8"))
 re_unicode_lowercase_n = re.compile(unicode(r"(?u)[ñ]", "utf-8"))
 re_unicode_uppercase_a = re.compile(unicode(r"(?u)[ÁÀÄÂÃÅ]", "utf-8"))
 re_unicode_uppercase_ae = re.compile(unicode(r"(?u)[Æ]", "utf-8"))
 re_unicode_uppercase_e = re.compile(unicode(r"(?u)[ÉÈËÊ]", "utf-8"))
 re_unicode_uppercase_i = re.compile(unicode(r"(?u)[ÍÌÏÎ]", "utf-8"))
 re_unicode_uppercase_o = re.compile(unicode(r"(?u)[ÓÒÖÔÕØ]", "utf-8"))
 re_unicode_uppercase_u = re.compile(unicode(r"(?u)[ÚÙÜÛ]", "utf-8"))
 re_unicode_uppercase_y = re.compile(unicode(r"(?u)[Ý]", "utf-8"))
 re_unicode_uppercase_c = re.compile(unicode(r"(?u)[ÇĆ]", "utf-8"))
 re_unicode_uppercase_n = re.compile(unicode(r"(?u)[Ñ]", "utf-8"))
 re_latex_lowercase_a = re.compile("\\\\[\"H'`~^vu=k]\{?a\}?")
 re_latex_lowercase_ae = re.compile("\\\\ae\\{\\}?")
 re_latex_lowercase_e = re.compile("\\\\[\"H'`~^vu=k]\\{?e\\}?")
 re_latex_lowercase_i = re.compile("\\\\[\"H'`~^vu=k]\\{?i\\}?")
 re_latex_lowercase_o = re.compile("\\\\[\"H'`~^vu=k]\\{?o\\}?")
 re_latex_lowercase_u = re.compile("\\\\[\"H'`~^vu=k]\\{?u\\}?")
 re_latex_lowercase_y = re.compile("\\\\[\"']\\{?y\\}?")
 re_latex_lowercase_c = re.compile("\\\\['uc]\\{?c\\}?")
 re_latex_lowercase_n = re.compile("\\\\[c'~^vu]\\{?n\\}?")
 re_latex_uppercase_a = re.compile("\\\\[\"H'`~^vu=k]\\{?A\\}?")
 re_latex_uppercase_ae = re.compile("\\\\AE\\{?\\}?")
 re_latex_uppercase_e = re.compile("\\\\[\"H'`~^vu=k]\\{?E\\}?")
 re_latex_uppercase_i = re.compile("\\\\[\"H'`~^vu=k]\\{?I\\}?")
 re_latex_uppercase_o = re.compile("\\\\[\"H'`~^vu=k]\\{?O\\}?")
 re_latex_uppercase_u = re.compile("\\\\[\"H'`~^vu=k]\\{?U\\}?")
 re_latex_uppercase_y = re.compile("\\\\[\"']\\{?Y\\}?")
 re_latex_uppercase_c = re.compile("\\\\['uc]\\{?C\\}?")
 re_latex_uppercase_n = re.compile("\\\\[c'~^vu]\\{?N\\}?")
 
 class RestrictedCollectionDataCacher(DataCacher):
     def __init__(self):
         def cache_filler():
             ret = []
             try:
                 res = run_sql("""SELECT DISTINCT ar.value
                     FROM accROLE_accACTION_accARGUMENT raa JOIN accARGUMENT ar ON raa.id_accARGUMENT = ar.id
                     WHERE ar.keyword = 'collection' AND raa.id_accACTION = %s""", (VIEWRESTRCOLL_ID,))
             except Exception:
                 # database problems, return empty cache
                 return []
             for coll in res:
                 ret.append(coll[0])
             return ret
 
         def timestamp_verifier():
             return max(get_table_update_time('accROLE_accACTION_accARGUMENT'), get_table_update_time('accARGUMENT'))
 
         DataCacher.__init__(self, cache_filler, timestamp_verifier)
 
 def collection_restricted_p(collection, recreate_cache_if_needed=True):
     if recreate_cache_if_needed:
         restricted_collection_cache.recreate_cache_if_needed()
     return collection in restricted_collection_cache.cache
 
 try:
     restricted_collection_cache.is_ok_p
 except Exception:
     restricted_collection_cache = RestrictedCollectionDataCacher()
 
 
 def ziplist(*lists):
     """Just like zip(), but returns lists of lists instead of lists of tuples
 
     Example:
     zip([f1, f2, f3], [p1, p2, p3], [op1, op2, '']) =>
        [(f1, p1, op1), (f2, p2, op2), (f3, p3, '')]
     ziplist([f1, f2, f3], [p1, p2, p3], [op1, op2, '']) =>
        [[f1, p1, op1], [f2, p2, op2], [f3, p3, '']]
 
     FIXME: This is handy to have, and should live somewhere else, like
            miscutil.really_useful_functions or something.
     XXX: Starting in python 2.6, the same can be achieved (faster) by
          using itertools.izip_longest(); when the minimum recommended Python
          is bumped, we should use that instead.
     """
     def l(*items):
         return list(items)
     return map(l, *lists)
 
 
 def get_permitted_restricted_collections(user_info, recreate_cache_if_needed=True):
     """Return a list of collection that are restricted but for which the user
     is authorized."""
     if recreate_cache_if_needed:
         restricted_collection_cache.recreate_cache_if_needed()
     ret = []
     for collection in restricted_collection_cache.cache:
         if acc_authorize_action(user_info, 'viewrestrcoll', collection=collection)[0] == 0:
             ret.append(collection)
     return ret
 
 def get_all_restricted_recids():
     """
     Return the set of all the restricted recids, i.e. the ids of those records
     which belong to at least one restricted collection.
     """
-    ret = HitSet()
+    ret = intbitset()
     for collection in restricted_collection_cache.cache:
         ret |= get_collection_reclist(collection)
     return ret
 
 def get_restricted_collections_for_recid(recid, recreate_cache_if_needed=True):
     """
     Return the list of restricted collection names to which recid belongs.
     """
     if recreate_cache_if_needed:
         restricted_collection_cache.recreate_cache_if_needed()
         collection_reclist_cache.recreate_cache_if_needed()
     return [collection for collection in restricted_collection_cache.cache if recid in get_collection_reclist(collection, recreate_cache_if_needed=False)]
 
 def is_user_owner_of_record(user_info, recid):
     """
     Check if the user is owner of the record, i.e. he is the submitter
     and/or belongs to a owner-like group authorized to 'see' the record.
 
     @param user_info: the user_info dictionary that describe the user.
     @type user_info: user_info dictionary
     @param recid: the record identifier.
     @type recid: positive integer
     @return: True if the user is 'owner' of the record; False otherwise
     @rtype: bool
     """
     authorized_emails_or_group = []
     for tag in CFG_ACC_GRANT_AUTHOR_RIGHTS_TO_EMAILS_IN_TAGS:
         authorized_emails_or_group.extend(get_fieldvalues(recid, tag))
     for email_or_group in authorized_emails_or_group:
         if email_or_group in user_info['group']:
             return True
         email = email_or_group.strip().lower()
         if user_info['email'].strip().lower() == email:
             return True
     return False
 
 def check_user_can_view_record(user_info, recid):
     """
     Check if the user is authorized to view the given recid. The function
     grants access in two cases: either user has author rights on this
     record, or he has view rights to the primary collection this record
     belongs to.
 
     @param user_info: the user_info dictionary that describe the user.
     @type user_info: user_info dictionary
     @param recid: the record identifier.
     @type recid: positive integer
     @return: (0, ''), when authorization is granted, (>0, 'message') when
     authorization is not granted
     @rtype: (int, string)
     """
     policy = CFG_WEBSEARCH_VIEWRESTRCOLL_POLICY.strip().upper()
     if isinstance(recid, str):
         recid = int(recid)
     if record_public_p(recid):
         ## The record is already known to be public.
         return (0, '')
     ## At this point, either webcoll has not yet run or there are some
     ## restricted collections. Let's see first if the user own the record.
     if is_user_owner_of_record(user_info, recid):
         ## Perfect! It's authorized then!
         return (0, '')
     restricted_collections = get_restricted_collections_for_recid(recid, recreate_cache_if_needed=False)
     if restricted_collections:
         ## If there are restricted collections the user must be authorized to all/any of them (depending on the policy)
         auth_code, auth_msg = 0, ''
         for collection in get_restricted_collections_for_recid(recid, recreate_cache_if_needed=False):
             (auth_code, auth_msg) = acc_authorize_action(user_info, VIEWRESTRCOLL, collection=collection)
             if auth_code and policy != 'ANY':
                 ## Ouch! the user is not authorized to this collection
                 return (auth_code, auth_msg)
             elif auth_code == 0 and policy == 'ANY':
                 ## Good! At least one collection is authorized
                 return (0, '')
         ## Depending on the policy, the user will be either authorized or not
         return auth_code, auth_msg
     if is_record_in_any_collection(recid, recreate_cache_if_needed=False):
         ## the record is not in any restricted collection
         return (0, '')
     elif record_exists(recid) > 0:
         ## We are in the case where webcoll has not run.
         ## Let's authorize SUPERADMIN
         (auth_code, auth_msg) = acc_authorize_action(user_info, VIEWRESTRCOLL, collection=None)
         if auth_code == 0:
             return (0, '')
         else:
             ## Too bad. Let's print a nice message:
             return (1, """The record you are trying to access has just been
 submitted to the system and needs to be assigned to the
 proper collections. It is currently restricted for security reasons
 until the assignment will be fully completed. Please come back later to
 properly access this record.""")
     else:
         ## The record either does not exists or has been deleted.
         ## Let's handle these situations outside of this code.
         return (0, '')
 
 class IndexStemmingDataCacher(DataCacher):
     """
     Provides cache for stemming information for word/phrase indexes.
     This class is not to be used directly; use function
     get_index_stemming_language() instead.
     """
     def __init__(self):
         def cache_filler():
             try:
                 res = run_sql("""SELECT id, stemming_language FROM idxINDEX""")
             except DatabaseError:
                 # database problems, return empty cache
                 return {}
             return dict(res)
 
         def timestamp_verifier():
             return get_table_update_time('idxINDEX')
 
         DataCacher.__init__(self, cache_filler, timestamp_verifier)
 
 try:
     index_stemming_cache.is_ok_p
 except Exception:
     index_stemming_cache = IndexStemmingDataCacher()
 
 def get_index_stemming_language(index_id, recreate_cache_if_needed=True):
     """Return stemming langugage for given index."""
     if recreate_cache_if_needed:
         index_stemming_cache.recreate_cache_if_needed()
     return index_stemming_cache.cache[index_id]
 
 class CollectionRecListDataCacher(DataCacher):
     """
     Provides cache for collection reclist hitsets.  This class is not
     to be used directly; use function get_collection_reclist() instead.
     """
     def __init__(self):
         def cache_filler():
             ret = {}
             try:
                 res = run_sql("SELECT name,reclist FROM collection")
             except Exception:
                 # database problems, return empty cache
                 return {}
             for name, reclist in res:
                 ret[name] = None # this will be filled later during runtime by calling get_collection_reclist(coll)
             return ret
 
         def timestamp_verifier():
             return get_table_update_time('collection')
 
         DataCacher.__init__(self, cache_filler, timestamp_verifier)
 
 try:
     if not collection_reclist_cache.is_ok_p:
         raise Exception
 except Exception:
     collection_reclist_cache = CollectionRecListDataCacher()
 
 def get_collection_reclist(coll, recreate_cache_if_needed=True):
     """Return hitset of recIDs that belong to the collection 'coll'."""
     if recreate_cache_if_needed:
         collection_reclist_cache.recreate_cache_if_needed()
     if not collection_reclist_cache.cache[coll]:
         # not yet it the cache, so calculate it and fill the cache:
-        set = HitSet()
+        set = intbitset()
         query = "SELECT nbrecs,reclist FROM collection WHERE name=%s"
         res = run_sql(query, (coll, ), 1)
         if res:
             try:
-                set = HitSet(res[0][1])
+                set = intbitset(res[0][1])
             except:
                 pass
         collection_reclist_cache.cache[coll] = set
     # finally, return reclist:
     return collection_reclist_cache.cache[coll]
 
 def get_available_output_formats(visible_only=False):
     """
     Return the list of available output formats.  When visible_only is
     True, returns only those output formats that have visibility flag
     set to 1.
     """
 
     formats = []
     query = "SELECT code,name FROM format"
     if visible_only:
         query += " WHERE visibility='1'"
     query += " ORDER BY name ASC"
     res = run_sql(query)
     if res:
         # propose found formats:
         for code, name in res:
             formats.append({ 'value' : code,
                              'text' : name
                            })
     else:
         formats.append({'value' : 'hb',
                         'text' : "HTML brief"
                        })
     return formats
 
 class SearchResultsCache(DataCacher):
     """
     Provides temporary lazy cache for Search Results.
     Useful when users click on `next page'.
     """
     def __init__(self):
         def cache_filler():
             return {}
         def timestamp_verifier():
             return '1970-01-01 00:00:00' # lazy cache is always okay;
                                          # its filling is governed by
                                          # CFG_WEBSEARCH_SEARCH_CACHE_SIZE
         DataCacher.__init__(self, cache_filler, timestamp_verifier)
 
 try:
     if not search_results_cache.is_ok_p:
         raise Exception
 except Exception:
     search_results_cache = SearchResultsCache()
 
 class CollectionI18nNameDataCacher(DataCacher):
     """
     Provides cache for I18N collection names.  This class is not to be
     used directly; use function get_coll_i18nname() instead.
     """
     def __init__(self):
         def cache_filler():
             ret = {}
             try:
                 res = run_sql("SELECT c.name,cn.ln,cn.value FROM collectionname AS cn, collection AS c WHERE cn.id_collection=c.id AND cn.type='ln'") # ln=long name
             except Exception:
                 # database problems
                 return {}
             for c, ln, i18nname in res:
                 if i18nname:
                     if not ret.has_key(c):
                         ret[c] = {}
                     ret[c][ln] = i18nname
             return ret
 
         def timestamp_verifier():
             return get_table_update_time('collectionname')
 
         DataCacher.__init__(self, cache_filler, timestamp_verifier)
 
 try:
     if not collection_i18nname_cache.is_ok_p:
         raise Exception
 except Exception:
     collection_i18nname_cache = CollectionI18nNameDataCacher()
 
 def get_coll_i18nname(c, ln=CFG_SITE_LANG, verify_cache_timestamp=True):
     """
     Return nicely formatted collection name (of the name type `ln'
     (=long name)) for collection C in language LN.
 
     This function uses collection_i18nname_cache, but it verifies
     whether the cache is up-to-date first by default.  This
     verification step is performed by checking the DB table update
     time.  So, if you call this function 1000 times, it can get very
     slow because it will do 1000 table update time verifications, even
     though collection names change not that often.
 
     Hence the parameter VERIFY_CACHE_TIMESTAMP which, when set to
     False, will assume the cache is already up-to-date.  This is
     useful namely in the generation of collection lists for the search
     results page.
     """
     if verify_cache_timestamp:
         collection_i18nname_cache.recreate_cache_if_needed()
     out = c
     try:
         out = collection_i18nname_cache.cache[c][ln]
     except KeyError:
         pass # translation in LN does not exist
     return out
 
 class FieldI18nNameDataCacher(DataCacher):
     """
     Provides cache for I18N field names.  This class is not to be used
     directly; use function get_field_i18nname() instead.
     """
     def __init__(self):
         def cache_filler():
             ret = {}
             try:
                 res = run_sql("SELECT f.name,fn.ln,fn.value FROM fieldname AS fn, field AS f WHERE fn.id_field=f.id AND fn.type='ln'") # ln=long name
             except Exception:
                 # database problems, return empty cache
                 return {}
             for f, ln, i18nname in res:
                 if i18nname:
                     if not ret.has_key(f):
                         ret[f] = {}
                     ret[f][ln] = i18nname
             return ret
 
         def timestamp_verifier():
             return get_table_update_time('fieldname')
 
         DataCacher.__init__(self, cache_filler, timestamp_verifier)
 
 try:
     if not field_i18nname_cache.is_ok_p:
         raise Exception
 except Exception:
     field_i18nname_cache = FieldI18nNameDataCacher()
 
 def get_field_i18nname(f, ln=CFG_SITE_LANG, verify_cache_timestamp=True):
     """
     Return nicely formatted field name (of type 'ln', 'long name') for
     field F in language LN.
 
     If VERIFY_CACHE_TIMESTAMP is set to True, then verify DB timestamp
     and field I18N name cache timestamp and refresh cache from the DB
     if needed.  Otherwise don't bother checking DB timestamp and
     return the cached value.  (This is useful when get_field_i18nname
     is called inside a loop.)
     """
     if verify_cache_timestamp:
         field_i18nname_cache.recreate_cache_if_needed()
     out = f
     try:
         out = field_i18nname_cache.cache[f][ln]
     except KeyError:
         pass # translation in LN does not exist
     return out
 
 def get_alphabetically_ordered_collection_list(level=0, ln=CFG_SITE_LANG):
     """Returns nicely ordered (score respected) list of collections, more exactly list of tuples
        (collection name, printable collection name).
        Suitable for create_search_box()."""
     out = []
     res = run_sql("SELECT id,name FROM collection ORDER BY name ASC")
     for c_id, c_name in res:
         # make a nice printable name (e.g. truncate c_printable for
         # long collection names in given language):
         c_printable_fullname = get_coll_i18nname(c_name, ln, False)
         c_printable = wash_index_term(c_printable_fullname, 30, False)
         if c_printable != c_printable_fullname:
             c_printable = c_printable + "..."
         if level:
             c_printable = " " + level * '-' + " " + c_printable
         out.append([c_name, c_printable])
     return out
 
 def get_nicely_ordered_collection_list(collid=1, level=0, ln=CFG_SITE_LANG):
     """Returns nicely ordered (score respected) list of collections, more exactly list of tuples
        (collection name, printable collection name).
        Suitable for create_search_box()."""
     colls_nicely_ordered = []
     res = run_sql("""SELECT c.name,cc.id_son FROM collection_collection AS cc, collection AS c
                      WHERE c.id=cc.id_son AND cc.id_dad=%s ORDER BY score DESC""", (collid, ))
     for c, cid in res:
         # make a nice printable name (e.g. truncate c_printable for
         # long collection names in given language):
         c_printable_fullname = get_coll_i18nname(c, ln, False)
         c_printable = wash_index_term(c_printable_fullname, 30, False)
         if c_printable != c_printable_fullname:
             c_printable = c_printable + "..."
         if level:
             c_printable = " " + level * '-' + " " + c_printable
         colls_nicely_ordered.append([c, c_printable])
         colls_nicely_ordered  = colls_nicely_ordered + get_nicely_ordered_collection_list(cid, level+1, ln=ln)
     return colls_nicely_ordered
 
 def get_index_id_from_field(field):
     """
     Return index id with name corresponding to FIELD, or the first
     index id where the logical field code named FIELD is indexed.
 
     Return zero in case there is no index defined for this field.
 
     Example: field='author', output=4.
     """
     out = 0
     if not field:
         field = 'global' # empty string field means 'global' index (field 'anyfield')
 
     # first look in the index table:
     res = run_sql("""SELECT id FROM idxINDEX WHERE name=%s""", (field,))
     if res:
         out = res[0][0]
         return out
 
     # not found in the index table, now look in the field table:
     res = run_sql("""SELECT w.id FROM idxINDEX AS w, idxINDEX_field AS wf, field AS f
                       WHERE f.code=%s AND wf.id_field=f.id AND w.id=wf.id_idxINDEX
                       LIMIT 1""", (field,))
     if res:
         out = res[0][0]
     return out
 
 def get_words_from_pattern(pattern):
     "Returns list of whitespace-separated words from pattern."
     words = {}
     for word in string.split(pattern):
         if not words.has_key(word):
             words[word] = 1
     return words.keys()
 
 def create_basic_search_units(req, p, f, m=None, of='hb'):
     """Splits search pattern and search field into a list of independently searchable units.
        - A search unit consists of '(operator, pattern, field, type, hitset)' tuples where
           'operator' is set union (|), set intersection (+) or set exclusion (-);
           'pattern' is either a word (e.g. muon*) or a phrase (e.g. 'nuclear physics');
           'field' is either a code like 'title' or MARC tag like '100__a';
           'type' is the search type ('w' for word file search, 'a' for access file search).
         - Optionally, the function accepts the match type argument 'm'.
           If it is set (e.g. from advanced search interface), then it
           performs this kind of matching.  If it is not set, then a guess is made.
           'm' can have values: 'a'='all of the words', 'o'='any of the words',
                                'p'='phrase/substring', 'r'='regular expression',
                                'e'='exact value'.
         - Warnings are printed on req (when not None) in case of HTML output formats."""
 
     opfts = [] # will hold (o,p,f,t,h) units
 
     # FIXME: quick hack for the journal index
     if f == 'journal':
         opfts.append(['+', p, f, 'w'])
         return opfts
 
     ## check arguments: is desired matching type set?
     if m:
         ## A - matching type is known; good!
         if m == 'e':
             # A1 - exact value:
             opfts.append(['+', p, f, 'a']) # '+' since we have only one unit
         elif m == 'p':
             # A2 - phrase/substring:
             opfts.append(['+', "%" + p + "%", f, 'a']) # '+' since we have only one unit
         elif m == 'r':
             # A3 - regular expression:
             opfts.append(['+', p, f, 'r']) # '+' since we have only one unit
         elif m == 'a' or m == 'w':
             # A4 - all of the words:
             p = strip_accents(p) # strip accents for 'w' mode, FIXME: delete when not needed
             for word in get_words_from_pattern(p):
                 opfts.append(['+', word, f, 'w']) # '+' in all units
         elif m == 'o':
             # A5 - any of the words:
             p = strip_accents(p) # strip accents for 'w' mode, FIXME: delete when not needed
             for word in get_words_from_pattern(p):
                 if len(opfts)==0:
                     opfts.append(['+', word, f, 'w']) # '+' in the first unit
                 else:
                     opfts.append(['|', word, f, 'w']) # '|' in further units
         else:
             if of.startswith("h"):
                 print_warning(req, "Matching type '%s' is not implemented yet." % cgi.escape(m), "Warning")
             opfts.append(['+', "%" + p + "%", f, 'w'])
     else:
         ## B - matching type is not known: let us try to determine it by some heuristics
         if f and p[0] == '"' and p[-1] == '"':
             ## B0 - does 'p' start and end by double quote, and is 'f' defined? => doing ACC search
             opfts.append(['+', p[1:-1], f, 'a'])
         elif f in ('author', 'firstauthor', 'exactauthor', 'exactfirstauthor') and author_name_requires_phrase_search(p):
             ## B1 - do we search in author, and does 'p' contain space/comma/dot/etc?
             ## => doing washed ACC search
             opfts.append(['+', p, f, 'a'])
         elif f and p[0] == "'" and p[-1] == "'":
             ## B0bis - does 'p' start and end by single quote, and is 'f' defined? => doing ACC search
             opfts.append(['+', '%' + p[1:-1] + '%', f, 'a'])
         elif f and p[0] == "/" and p[-1] == "/":
             ## B0ter - does 'p' start and end by a slash, and is 'f' defined? => doing regexp search
             opfts.append(['+', p[1:-1], f, 'r'])
         elif f and string.find(p, ',') >= 0:
             ## B1 - does 'p' contain comma, and is 'f' defined? => doing ACC search
             opfts.append(['+', p, f, 'a'])
         elif f and str(f[0:2]).isdigit():
             ## B2 - does 'f' exist and starts by two digits?  => doing ACC search
             opfts.append(['+', p, f, 'a'])
         else:
             ## B3 - doing WRD search, but maybe ACC too
             # search units are separated by spaces unless the space is within single or double quotes
             # so, let us replace temporarily any space within quotes by '__SPACE__'
             p = re_pattern_single_quotes.sub(lambda x: "'"+string.replace(x.group(1), ' ', '__SPACE__')+"'", p)
             p = re_pattern_double_quotes.sub(lambda x: "\""+string.replace(x.group(1), ' ', '__SPACE__')+"\"", p)
             p = re_pattern_regexp_quotes.sub(lambda x: "/"+string.replace(x.group(1), ' ', '__SPACE__')+"/", p)
             # and spaces after colon as well:
             p = re_pattern_spaces_after_colon.sub(lambda x: string.replace(x.group(1), ' ', '__SPACE__'), p)
             # wash argument:
             p = re_equal.sub(":", p)
             p = re_logical_and.sub(" ", p)
             p = re_logical_or.sub(" |", p)
             p = re_logical_not.sub(" -", p)
             p = re_operators.sub(r' \1', p)
             for pi in string.split(p): # iterate through separated units (or items, as "pi" stands for "p item")
                 pi = re_pattern_space.sub(" ", pi) # replace back '__SPACE__' by ' '
                 # firstly, determine set operator
                 if pi[0] == '+' or pi[0] == '-' or pi[0] == '|':
                     oi = pi[0]
                     pi = pi[1:]
                 else:
                     # okay, there is no operator, so let us decide what to do by default
                     oi = '+' # by default we are doing set intersection...
                 # secondly, determine search pattern and field:
                 if string.find(pi, ":") > 0:
                     fi, pi = string.split(pi, ":", 1)
                     fi = wash_field(fi)
                     # test whether fi is a real index code or a MARC-tag defined code:
                     if fi in get_fieldcodes() or '00' <= fi[:2] <= '99':
                         pass
                     else:
                         # it is not, so join it back:
                         fi, pi = f, fi + ":" + pi
                 else:
                     fi, pi = f, pi
                 # wash 'fi' argument:
                 fi = wash_field(fi)
                 # wash 'pi' argument:
                 pi = pi.strip() # strip eventual spaces
                 if re_quotes.match(pi):
                     # B3a - quotes are found => do ACC search (phrase search)
                     if pi[0] == '"' and pi[-1] == '"':
                         pi = string.replace(pi, '"', '') # remove quote signs
                         opfts.append([oi, pi, fi, 'a'])
                     elif pi[0] == "'" and pi[-1] == "'":
                         pi = string.replace(pi, "'", "") # remove quote signs
                         opfts.append([oi, "%" + pi + "%", fi, 'a'])
                     else: # unbalanced quotes, so fall back to WRD query:
                         opfts.append([oi, pi, fi, 'w'])
                 elif pi.startswith('/') and pi.endswith('/'):
                     # B3b - pi has slashes around => do regexp search
                     opfts.append([oi, pi[1:-1], fi, 'r'])
                 elif fi and str(fi[0]).isdigit() and str(fi[0]).isdigit():
                     # B3c - fi exists and starts by two digits => do ACC search
                     opfts.append([oi, pi, fi, 'a'])
                 elif fi and not get_index_id_from_field(fi) and get_field_name(fi):
                     # B3d - logical field fi exists but there is no WRD index for fi => try ACC search
                     opfts.append([oi, pi, fi, 'a'])
                 else:
                     # B3e - general case => do WRD search
                     pi = strip_accents(pi) # strip accents for 'w' mode, FIXME: delete when not needed
                     for pii in get_words_from_pattern(pi):
                         opfts.append([oi, pii, fi, 'w'])
     ## sanity check:
     for i in range(0, len(opfts)):
         try:
             pi = opfts[i][1]
             if pi == '*':
                 if of.startswith("h"):
                     print_warning(req, "Ignoring standalone wildcard word.", "Warning")
                 del opfts[i]
             if pi == '' or pi == ' ':
                 fi = opfts[i][2]
                 if fi:
                     if of.startswith("h"):
                         print_warning(req, "Ignoring empty <em>%s</em> search term." % fi, "Warning")
                 del opfts[i]
         except:
             pass
 
     ## replace old logical field names if applicable:
     if CFG_WEBSEARCH_FIELDS_CONVERT:
         opfts = [[o,p,wash_field(f),t] for o,p,f,t in opfts]
 
     ## return search units:
     return opfts
 
 def page_start(req, of, cc, aas, ln, uid, title_message=None,
                description='', keywords='', recID=-1, tab='', p=''):
     "Start page according to given output format."
     _ = gettext_set_language(ln)
 
     if not req or isinstance(req, cStringIO.OutputType):
         return # we were called from CLI
 
     if not title_message:
         title_message = _("Search Results")
 
     content_type = get_output_format_content_type(of)
 
     if of.startswith('x'):
         if of == 'xr':
             # we are doing RSS output
             req.content_type = "application/rss+xml"
             req.send_http_header()
             req.write("""<?xml version="1.0" encoding="UTF-8"?>\n""")
         else:
             # we are doing XML output:
             req.content_type = "text/xml"
             req.send_http_header()
             req.write("""<?xml version="1.0" encoding="UTF-8"?>\n""")
     elif of.startswith('t') or str(of[0:3]).isdigit():
         # we are doing plain text output:
         req.content_type = "text/plain"
         req.send_http_header()
     elif of == "id":
         pass # nothing to do, we shall only return list of recIDs
     elif content_type == 'text/html':
         # we are doing HTML output:
         req.content_type = "text/html"
         req.send_http_header()
 
         if not description:
             description = "%s %s." % (cc, _("Search Results"))
 
         if not keywords:
             keywords = "%s, WebSearch, %s" % (get_coll_i18nname(CFG_SITE_NAME, ln, False), get_coll_i18nname(cc, ln, False))
 
         ## generate RSS URL:
         argd = {}
         if req.args:
             argd = cgi.parse_qs(req.args)
         rssurl = websearch_templates.build_rss_url(argd)
 
         ## add MathJax if displaying single records (FIXME: find
         ## eventual better place to this code)
         if of.lower() in CFG_WEBSEARCH_USE_MATHJAX_FOR_FORMATS:
             metaheaderadd = get_mathjax_header(req.is_https())
         else:
             metaheaderadd = ''
 
         ## generate navtrail:
         navtrail = create_navtrail_links(cc, aas, ln)
         if navtrail != '':
             navtrail += ' &gt; '
         if (tab != '' or ((of != '' or of.lower() != 'hd') and of != 'hb')) and \
                recID != -1:
             # If we are not in information tab in HD format, customize
             # the nav. trail to have a link back to main record. (Due
             # to the way perform_request_search() works, hb
             # (lowercase) is equal to hd)
             navtrail += ' <a class="navtrail" href="%s/%s/%s">%s</a>' % \
                             (CFG_SITE_URL, CFG_SITE_RECORD, recID, title_message)
             if (of != '' or of.lower() != 'hd') and of != 'hb':
                 # Export
                 format_name = of
                 query = "SELECT name FROM format WHERE code=%s"
                 res = run_sql(query, (of,))
                 if res:
                     format_name = res[0][0]
                 navtrail += ' &gt; ' + format_name
             else:
                 # Discussion, citations, etc. tabs
                 tab_label = get_detailed_page_tabs(cc, ln=ln)[tab]['label']
                 navtrail += ' &gt; ' + _(tab_label)
         else:
             navtrail += title_message
 
         if p:
             # we are serving search/browse results pages, so insert pattern:
             navtrail += ": " + cgi.escape(p)
             title_message = cgi.escape(p) + " - " + title_message
 
         body_css_classes = []
         if cc:
             # we know the collection, lets allow page styles based on cc
 
             #collection names may not satisfy rules for css classes which
             #are something like:  -?[_a-zA-Z]+[_a-zA-Z0-9-]*
             #however it isn't clear what we should do about cases with
             #numbers, so we leave them to fail.  Everything else becomes "_"
 
             css = nmtoken_from_string(cc).replace('.','_').replace('-','_').replace(':','_')
             body_css_classes.append(css)
 
         ## finally, print page header:
         req.write(pageheaderonly(req=req, title=title_message,
                                  navtrail=navtrail,
                                  description=description,
                                  keywords=keywords,
                                  metaheaderadd=metaheaderadd,
                                  uid=uid,
                                  language=ln,
                                  navmenuid='search',
                                  navtrail_append_title_p=0,
                                  rssurl=rssurl,
                                  body_css_classes=body_css_classes))
         req.write(websearch_templates.tmpl_search_pagestart(ln=ln))
     #else:
     #    req.send_http_header()
 
 def page_end(req, of="hb", ln=CFG_SITE_LANG):
     "End page according to given output format: e.g. close XML tags, add HTML footer, etc."
     if of == "id":
         return [] # empty recID list
     if not req:
         return # we were called from CLI
     if of.startswith('h'):
         req.write(websearch_templates.tmpl_search_pageend(ln = ln)) # pagebody end
         req.write(pagefooteronly(lastupdated=__lastupdated__, language=ln, req=req))
     return
 
 def create_page_title_search_pattern_info(p, p1, p2, p3):
     """Create the search pattern bit for the page <title> web page
     HTML header.  Basically combine p and (p1,p2,p3) together so that
     the page header may be filled whether we are in the Simple Search
     or Advanced Search interface contexts."""
     out = ""
     if p:
         out = p
     else:
         out = p1
         if p2:
             out += ' ' + p2
         if p3:
             out += ' ' + p3
     return out
 
 def create_inputdate_box(name="d1", selected_year=0, selected_month=0, selected_day=0, ln=CFG_SITE_LANG):
     "Produces 'From Date', 'Until Date' kind of selection box.  Suitable for search options."
 
     _ = gettext_set_language(ln)
 
     box = ""
     # day
     box += """<select name="%sd">""" % name
     box += """<option value="">%s""" % _("any day")
     for day in range(1, 32):
         box += """<option value="%02d"%s>%02d""" % (day, is_selected(day, selected_day), day)
     box += """</select>"""
     # month
     box += """<select name="%sm">""" % name
     box += """<option value="">%s""" % _("any month")
     for mm, month in [(1, _("January")), (2, _("February")), (3, _("March")), (4, _("April")), \
                       (5, _("May")), (6, _("June")), (7, _("July")), (8, _("August")), \
                       (9, _("September")), (10, _("October")), (11, _("November")), (12, _("December"))]:
         box += """<option value="%02d"%s>%s""" % (mm, is_selected(mm, selected_month), month)
     box += """</select>"""
     # year
     box += """<select name="%sy">""" % name
     box += """<option value="">%s""" % _("any year")
     this_year = int(time.strftime("%Y", time.localtime()))
     for year in range(this_year-20, this_year+1):
         box += """<option value="%d"%s>%d""" % (year, is_selected(year, selected_year), year)
     box += """</select>"""
     return box
 
 def create_search_box(cc, colls, p, f, rg, sf, so, sp, rm, of, ot, aas,
                       ln, p1, f1, m1, op1, p2, f2, m2, op2, p3, f3,
                       m3, sc, pl, d1y, d1m, d1d, d2y, d2m, d2d, dt, jrec, ec,
                       action=""):
 
     """Create search box for 'search again in the results page' functionality."""
 
     # load the right message language
     _ = gettext_set_language(ln)
 
     # some computations
     cc_intl = get_coll_i18nname(cc, ln, False)
     cc_colID = get_colID(cc)
 
     colls_nicely_ordered = []
     if cfg_nicely_ordered_collection_list:
         colls_nicely_ordered = get_nicely_ordered_collection_list(ln=ln)
     else:
         colls_nicely_ordered = get_alphabetically_ordered_collection_list(ln=ln)
 
     colls_nice = []
     for (cx, cx_printable) in colls_nicely_ordered:
         if not cx.startswith("Unnamed collection"):
             colls_nice.append({ 'value' : cx,
                                 'text' : cx_printable
                               })
 
     coll_selects = []
     if colls and colls[0] != CFG_SITE_NAME:
         # some collections are defined, so print these first, and only then print 'add another collection' heading:
         for c in colls:
             if c:
                 temp = []
                 temp.append({ 'value' : CFG_SITE_NAME,
                               'text' : '*** %s ***' % _("any public collection")
                             })
                 # this field is used to remove the current collection from the ones to be searched.
                 temp.append({ 'value' : '',
                               'text' : '*** %s ***' % _("remove this collection")
                             })
                 for val in colls_nice:
                     # print collection:
                     if not cx.startswith("Unnamed collection"):
                         temp.append({ 'value' : val['value'],
                                       'text' : val['text'],
                                       'selected' : (c == re.sub("^[\s\-]*","", val['value']))
                                     })
                 coll_selects.append(temp)
         coll_selects.append([{ 'value' : '',
                                'text' : '*** %s ***' % _("add another collection")
                              }] + colls_nice)
     else: # we searched in CFG_SITE_NAME, so print 'any public collection' heading
         coll_selects.append([{ 'value' : CFG_SITE_NAME,
                                'text' : '*** %s ***' % _("any public collection")
                              }] + colls_nice)
 
     ## ranking methods
     ranks = [{
                'value' : '',
                'text' : "- %s %s -" % (_("OR").lower (), _("rank by")),
              }]
     for (code, name) in get_bibrank_methods(cc_colID, ln):
         # propose found rank methods:
         ranks.append({
                        'value' : code,
                        'text' : name,
                      })
 
     formats = get_available_output_formats(visible_only=True)
 
     # show collections in the search box? (not if there is only one
     # collection defined, and not if we are in light search)
     show_colls = True
     show_title = True
     if len(collection_reclist_cache.cache.keys()) == 1 or \
            aas == -1:
         show_colls = False
         show_title = False
 
     if cc == CFG_SITE_NAME:
         show_title = False
 
     if CFG_INSPIRE_SITE:
         show_title = False
 
     return websearch_templates.tmpl_search_box(
              ln = ln,
              aas = aas,
              cc_intl = cc_intl,
              cc = cc,
              ot = ot,
              sp = sp,
              action = action,
              fieldslist = get_searchwithin_fields(ln=ln, colID=cc_colID),
              f1 = f1,
              f2 = f2,
              f3 = f3,
              m1 = m1,
              m2 = m2,
              m3 = m3,
              p1 = p1,
              p2 = p2,
              p3 = p3,
              op1 = op1,
              op2 = op2,
              rm = rm,
              p = p,
              f = f,
              coll_selects = coll_selects,
              d1y = d1y, d2y = d2y, d1m = d1m, d2m = d2m, d1d = d1d, d2d = d2d,
              dt = dt,
              sort_fields = get_sortby_fields(ln=ln, colID=cc_colID),
              sf = sf,
              so = so,
              ranks = ranks,
              sc = sc,
              rg = rg,
              formats = formats,
              of = of,
              pl = pl,
              jrec = jrec,
              ec = ec,
              show_colls = show_colls,
              show_title = show_title,
            )
 
 def create_navtrail_links(cc=CFG_SITE_NAME, aas=0, ln=CFG_SITE_LANG, self_p=1, tab=''):
     """Creates navigation trail links, i.e. links to collection
     ancestors (except Home collection).  If aas==1, then links to
     Advanced Search interfaces; otherwise Simple Search.
     """
 
     dads = []
     for dad in get_coll_ancestors(cc):
         if dad != CFG_SITE_NAME: # exclude Home collection
             dads.append ((dad, get_coll_i18nname(dad, ln, False)))
 
     if self_p and cc != CFG_SITE_NAME:
         dads.append((cc, get_coll_i18nname(cc, ln, False)))
 
     return websearch_templates.tmpl_navtrail_links(
         aas=aas, ln=ln, dads=dads)
 
 def get_searchwithin_fields(ln='en', colID=None):
     """Retrieves the fields name used in the 'search within' selection box for the collection ID colID."""
     res = None
     if colID:
         res = run_sql("""SELECT f.code,f.name FROM field AS f, collection_field_fieldvalue AS cff
                                  WHERE cff.type='sew' AND cff.id_collection=%s AND cff.id_field=f.id
                               ORDER BY cff.score DESC, f.name ASC""", (colID,))
     if not res:
         res = run_sql("SELECT code,name FROM field ORDER BY name ASC")
     fields = [{
                 'value' : '',
                 'text' : get_field_i18nname("any field", ln, False)
               }]
     for field_code, field_name in res:
         if field_code and field_code != "anyfield":
             fields.append({ 'value' : field_code,
                             'text' : get_field_i18nname(field_name, ln, False)
                           })
     return fields
 
 def get_sortby_fields(ln='en', colID=None):
     """Retrieves the fields name used in the 'sort by' selection box for the collection ID colID."""
     _ = gettext_set_language(ln)
     res = None
     if colID:
         res = run_sql("""SELECT DISTINCT(f.code),f.name FROM field AS f, collection_field_fieldvalue AS cff
                                  WHERE cff.type='soo' AND cff.id_collection=%s AND cff.id_field=f.id
                               ORDER BY cff.score DESC, f.name ASC""", (colID,))
     if not res:
         # no sort fields defined for this colID, try to take Home collection:
         res = run_sql("""SELECT DISTINCT(f.code),f.name FROM field AS f, collection_field_fieldvalue AS cff
                                  WHERE cff.type='soo' AND cff.id_collection=%s AND cff.id_field=f.id
                                  ORDER BY cff.score DESC, f.name ASC""", (1,))
     if not res:
         # no sort fields defined for the Home collection, take all sort fields defined wherever they are:
         res = run_sql("""SELECT DISTINCT(f.code),f.name FROM field AS f, collection_field_fieldvalue AS cff
                                  WHERE cff.type='soo' AND cff.id_field=f.id
                                  ORDER BY cff.score DESC, f.name ASC""",)
     fields = [{
                 'value' : '',
                 'text' : _("latest first")
               }]
     for field_code, field_name in res:
         if field_code and field_code != "anyfield":
             fields.append({ 'value' : field_code,
                             'text' : get_field_i18nname(field_name, ln, False)
                           })
     return fields
 
 def create_andornot_box(name='op', value='', ln='en'):
     "Returns HTML code for the AND/OR/NOT selection box."
 
     _ = gettext_set_language(ln)
 
     out = """
     <select name="%s">
     <option value="a"%s>%s
     <option value="o"%s>%s
     <option value="n"%s>%s
     </select>
     """ % (name,
            is_selected('a', value), _("AND"),
            is_selected('o', value), _("OR"),
            is_selected('n', value), _("AND NOT"))
 
     return out
 
 def create_matchtype_box(name='m', value='', ln='en'):
     "Returns HTML code for the 'match type' selection box."
 
     _ = gettext_set_language(ln)
 
     out = """
     <select name="%s">
     <option value="a"%s>%s
     <option value="o"%s>%s
     <option value="e"%s>%s
     <option value="p"%s>%s
     <option value="r"%s>%s
     </select>
     """ % (name,
            is_selected('a', value), _("All of the words:"),
            is_selected('o', value), _("Any of the words:"),
            is_selected('e', value), _("Exact phrase:"),
            is_selected('p', value), _("Partial phrase:"),
            is_selected('r', value), _("Regular expression:"))
     return out
 
 def is_selected(var, fld):
     "Checks if the two are equal, and if yes, returns ' selected'.  Useful for select boxes."
     if type(var) is int and type(fld) is int:
         if var == fld:
             return " selected"
     elif str(var) == str(fld):
         return " selected"
     elif fld and len(fld)==3 and fld[0] == "w" and var == fld[1:]:
         return " selected"
     return ""
 
 def wash_colls(cc, c, split_colls=0, verbose=0):
     """Wash collection list by checking whether user has deselected
     anything under 'Narrow search'.  Checks also if cc is a list or not.
        Return list of cc, colls_to_display, colls_to_search since the list
     of collections to display is different from that to search in.
     This is because users might have chosen 'split by collection'
     functionality.
        The behaviour of "collections to display" depends solely whether
     user has deselected a particular collection: e.g. if it started
     from 'Articles and Preprints' page, and deselected 'Preprints',
     then collection to display is 'Articles'.  If he did not deselect
     anything, then collection to display is 'Articles & Preprints'.
        The behaviour of "collections to search in" depends on the
     'split_colls' parameter:
          * if is equal to 1, then we can wash the colls list down
            and search solely in the collection the user started from;
          * if is equal to 0, then we are splitting to the first level
            of collections, i.e. collections as they appear on the page
            we started to search from;
 
     The function raises exception
     InvenioWebSearchUnknownCollectionError
     if cc or one of c collections is not known.
     """
 
     colls_out = []
     colls_out_for_display = []
     # list to hold the hosted collections to be searched and displayed
     hosted_colls_out = []
     debug = ""
 
     if verbose:
         debug += "<br />"
         debug += "<br />1) --- initial parameters ---"
         debug += "<br />cc : %s" % cc
         debug += "<br />c : %s" % c
         debug += "<br />"
 
     # check what type is 'cc':
     if type(cc) is list:
         for ci in cc:
             if collection_reclist_cache.cache.has_key(ci):
                 # yes this collection is real, so use it:
                 cc = ci
                 break
     else:
         # check once if cc is real:
         if not collection_reclist_cache.cache.has_key(cc):
             if cc:
                 raise InvenioWebSearchUnknownCollectionError(cc)
             else:
                 cc = CFG_SITE_NAME # cc is not set, so replace it with Home collection
 
     # check type of 'c' argument:
     if type(c) is list:
         colls = c
     else:
         colls = [c]
 
     if verbose:
         debug += "<br />2) --- after check for the integrity of cc and the being or not c a list ---"
         debug += "<br />cc : %s" % cc
         debug += "<br />c : %s" % c
         debug += "<br />"
 
     # remove all 'unreal' collections:
     colls_real = []
     for coll in colls:
         if collection_reclist_cache.cache.has_key(coll):
             colls_real.append(coll)
         else:
             if coll:
                 raise InvenioWebSearchUnknownCollectionError(coll)
     colls = colls_real
 
     if verbose:
         debug += "<br />3) --- keeping only the real colls of c ---"
         debug += "<br />colls : %s" % colls
         debug += "<br />"
 
     # check if some real collections remain:
     if len(colls)==0:
         colls = [cc]
 
     if verbose:
         debug += "<br />4) --- in case no colls were left we use cc directly ---"
         debug += "<br />colls : %s" % colls
         debug += "<br />"
 
     # then let us check the list of non-restricted "real" sons of 'cc' and compare it to 'coll':
     res = run_sql("""SELECT c.name FROM collection AS c,
                                         collection_collection AS cc,
                                         collection AS ccc
                      WHERE c.id=cc.id_son AND cc.id_dad=ccc.id
                        AND ccc.name=%s AND cc.type='r'""", (cc,))
 
     # list that holds all the non restricted sons of cc that are also not hosted collections
     l_cc_nonrestricted_sons_and_nonhosted_colls = []
     res_hosted = run_sql("""SELECT c.name FROM collection AS c,
                          collection_collection AS cc,
                          collection AS ccc
                          WHERE c.id=cc.id_son AND cc.id_dad=ccc.id
                          AND ccc.name=%s AND cc.type='r'
                          AND (c.dbquery NOT LIKE 'hostedcollection:%%' OR c.dbquery IS NULL)""", (cc,))
     for row_hosted in res_hosted:
         l_cc_nonrestricted_sons_and_nonhosted_colls.append(row_hosted[0])
     l_cc_nonrestricted_sons_and_nonhosted_colls.sort()
 
     l_cc_nonrestricted_sons = []
     l_c = colls
     for row in res:
         if not collection_restricted_p(row[0]):
             l_cc_nonrestricted_sons.append(row[0])
     l_c.sort()
     l_cc_nonrestricted_sons.sort()
     if l_cc_nonrestricted_sons == l_c:
         colls_out_for_display = [cc] # yep, washing permitted, it is sufficient to display 'cc'
     # the following elif is a hack that preserves the above funcionality when we start searching from
     # the frontpage with some hosted collections deselected (either by default or manually)
     elif set(l_cc_nonrestricted_sons_and_nonhosted_colls).issubset(set(l_c)):
         colls_out_for_display = colls
         split_colls = 0
     else:
         colls_out_for_display = colls # nope, we need to display all 'colls' successively
 
     # remove duplicates:
     #colls_out_for_display_nondups=filter(lambda x, colls_out_for_display=colls_out_for_display: colls_out_for_display[x-1] not in colls_out_for_display[x:], range(1, len(colls_out_for_display)+1))
     #colls_out_for_display = map(lambda x, colls_out_for_display=colls_out_for_display:colls_out_for_display[x-1], colls_out_for_display_nondups)
     colls_out_for_display = list(set(colls_out_for_display))
 
     if verbose:
         debug += "<br />5) --- decide whether colls_out_for_diplay should be colls or is it sufficient for it to be cc; remove duplicates ---"
         debug += "<br />colls_out_for_display : %s" % colls_out_for_display
         debug += "<br />"
 
     # FIXME: The below quoted part of the code has been commented out
     # because it prevents searching in individual restricted daughter
     # collections when both parent and all its public daughter
     # collections were asked for, in addition to some restricted
     # daughter collections.  The removal was introduced for hosted
     # collections, so we may want to double check in this context.
 
     # the following piece of code takes care of removing collections whose ancestors are going to be searched anyway
     # list to hold the collections to be removed
     #colls_to_be_removed = []
     # first calculate the collections that can safely be removed
     #for coll in colls_out_for_display:
     #    for ancestor in get_coll_ancestors(coll):
     #        #if ancestor in colls_out_for_display: colls_to_be_removed.append(coll)
     #        if ancestor in colls_out_for_display and not is_hosted_collection(coll): colls_to_be_removed.append(coll)
     # secondly remove the collections
     #for coll in colls_to_be_removed:
     #    colls_out_for_display.remove(coll)
 
     if verbose:
         debug += "<br />6) --- remove collections that have ancestors about to be search, unless they are hosted ---"
         debug += "<br />colls_out_for_display : %s" % colls_out_for_display
         debug += "<br />"
 
     # calculate the hosted collections to be searched.
     if colls_out_for_display == [cc]:
         if is_hosted_collection(cc):
             hosted_colls_out.append(cc)
         else:
             for coll in get_coll_sons(cc):
                 if is_hosted_collection(coll):
                     hosted_colls_out.append(coll)
     else:
         for coll in colls_out_for_display:
             if is_hosted_collection(coll):
                 hosted_colls_out.append(coll)
 
     if verbose:
         debug += "<br />7) --- calculate the hosted_colls_out ---"
         debug += "<br />hosted_colls_out : %s" % hosted_colls_out
         debug += "<br />"
 
     # second, let us decide on collection splitting:
     if split_colls == 0:
         # type A - no sons are wanted
         colls_out = colls_out_for_display
     else:
         # type B - sons (first-level descendants) are wanted
         for coll in colls_out_for_display:
             coll_sons = get_coll_sons(coll)
             if coll_sons == []:
                 colls_out.append(coll)
             else:
                 for coll_son in coll_sons:
                     if not is_hosted_collection(coll_son):
                         colls_out.append(coll_son)
             #else:
             #    colls_out = colls_out + coll_sons
 
     # remove duplicates:
     #colls_out_nondups=filter(lambda x, colls_out=colls_out: colls_out[x-1] not in colls_out[x:], range(1, len(colls_out)+1))
     #colls_out = map(lambda x, colls_out=colls_out:colls_out[x-1], colls_out_nondups)
     colls_out = list(set(colls_out))
 
     if verbose:
         debug += "<br />8) --- calculate the colls_out; remove duplicates ---"
         debug += "<br />colls_out : %s" % colls_out
         debug += "<br />"
 
     # remove the hosted collections from the collections to be searched
     if hosted_colls_out:
         for coll in hosted_colls_out:
             try:
                 colls_out.remove(coll)
             except ValueError:
                 # in case coll was not found in colls_out
                 pass
 
     if verbose:
         debug += "<br />9) --- remove the hosted_colls from the colls_out ---"
         debug += "<br />colls_out : %s" % colls_out
 
     return (cc, colls_out_for_display, colls_out, hosted_colls_out, debug)
 
 def strip_accents(x):
     """Strip accents in the input phrase X (assumed in UTF-8) by replacing
     accented characters with their unaccented cousins (e.g. é by e).
     Return such a stripped X."""
     x = re_latex_lowercase_a.sub("a", x)
     x = re_latex_lowercase_ae.sub("ae", x)
     x = re_latex_lowercase_e.sub("e", x)
     x = re_latex_lowercase_i.sub("i", x)
     x = re_latex_lowercase_o.sub("o", x)
     x = re_latex_lowercase_u.sub("u", x)
     x = re_latex_lowercase_y.sub("x", x)
     x = re_latex_lowercase_c.sub("c", x)
     x = re_latex_lowercase_n.sub("n", x)
     x = re_latex_uppercase_a.sub("A", x)
     x = re_latex_uppercase_ae.sub("AE", x)
     x = re_latex_uppercase_e.sub("E", x)
     x = re_latex_uppercase_i.sub("I", x)
     x = re_latex_uppercase_o.sub("O", x)
     x = re_latex_uppercase_u.sub("U", x)
     x = re_latex_uppercase_y.sub("Y", x)
     x = re_latex_uppercase_c.sub("C", x)
     x = re_latex_uppercase_n.sub("N", x)
 
     # convert input into Unicode string:
     try:
         y = unicode(x, "utf-8")
     except:
         return x # something went wrong, probably the input wasn't UTF-8
     # asciify Latin-1 lowercase characters:
     y = re_unicode_lowercase_a.sub("a", y)
     y = re_unicode_lowercase_ae.sub("ae", y)
     y = re_unicode_lowercase_e.sub("e", y)
     y = re_unicode_lowercase_i.sub("i", y)
     y = re_unicode_lowercase_o.sub("o", y)
     y = re_unicode_lowercase_u.sub("u", y)
     y = re_unicode_lowercase_y.sub("y", y)
     y = re_unicode_lowercase_c.sub("c", y)
     y = re_unicode_lowercase_n.sub("n", y)
     # asciify Latin-1 uppercase characters:
     y = re_unicode_uppercase_a.sub("A", y)
     y = re_unicode_uppercase_ae.sub("AE", y)
     y = re_unicode_uppercase_e.sub("E", y)
     y = re_unicode_uppercase_i.sub("I", y)
     y = re_unicode_uppercase_o.sub("O", y)
     y = re_unicode_uppercase_u.sub("U", y)
     y = re_unicode_uppercase_y.sub("Y", y)
     y = re_unicode_uppercase_c.sub("C", y)
     y = re_unicode_uppercase_n.sub("N", y)
     # return UTF-8 representation of the Unicode string:
     return y.encode("utf-8")
 
 def wash_index_term(term, max_char_length=50, lower_term=True):
     """
     Return washed form of the index term TERM that would be suitable
     for storing into idxWORD* tables.  I.e., lower the TERM if
     LOWER_TERM is True, and truncate it safely to MAX_CHAR_LENGTH
     UTF-8 characters (meaning, in principle, 4*MAX_CHAR_LENGTH bytes).
 
     The function works by an internal conversion of TERM, when needed,
     from its input Python UTF-8 binary string format into Python
     Unicode format, and then truncating it safely to the given number
     of UTF-8 characters, without possible mis-truncation in the middle
     of a multi-byte UTF-8 character that could otherwise happen if we
     would have been working with UTF-8 binary representation directly.
 
     Note that MAX_CHAR_LENGTH corresponds to the length of the term
     column in idxINDEX* tables.
     """
     if lower_term:
         washed_term = unicode(term, 'utf-8').lower()
     else:
         washed_term = unicode(term, 'utf-8')
     if len(washed_term) <= max_char_length:
         # no need to truncate the term, because it will fit
         # nicely even if it uses four-byte UTF-8 characters
         return washed_term.encode('utf-8')
     else:
         # truncate the term in a safe position:
         return washed_term[:max_char_length].encode('utf-8')
 
 def lower_index_term(term):
     """
     Return safely lowered index term TERM.  This is done by converting
     to UTF-8 first, because standard Python lower() function is not
     UTF-8 safe.  To be called by both the search engine and the
     indexer when appropriate (e.g. before stemming).
 
     In case of problems with UTF-8 compliance, this function raises
     UnicodeDecodeError, so the client code may want to catch it.
     """
     return unicode(term, 'utf-8').lower().encode('utf-8')
 
 
 def get_synonym_terms(term, kbr_name, match_type):
     """
     Return list of synonyms for TERM by looking in KBR_NAME in
     MATCH_TYPE style.
 
     @param term: search-time term or index-time term
     @type term: str
     @param kbr_name: knowledge base name
     @type kbr_name: str
     @param match_type: specifies how the term matches against the KBR
         before doing the lookup.  Could be `exact' (default),
         'leading_to_comma', `leading_to_number'.
     @type match_type: str
     @return: list of term synonyms
     @rtype: list of strings
     """
     dterms = {}
     ## exact match is default:
     term_for_lookup = term
     term_remainder = ''
     ## but maybe match different term:
     if match_type == 'leading_to_comma':
         mmm = re.match(r'^(.*?)(\s*,.*)$', term)
         if mmm:
             term_for_lookup = mmm.group(1)
             term_remainder = mmm.group(2)
     elif match_type == 'leading_to_number':
         mmm = re.match(r'^(.*?)(\s*\d.*)$', term)
         if mmm:
             term_for_lookup = mmm.group(1)
             term_remainder = mmm.group(2)
     ## FIXME: workaround: escaping SQL wild-card signs, since KBR's
     ## exact search is doing LIKE query, so would match everything:
     term_for_lookup = term_for_lookup.replace('%', '\%')
     ## OK, now find synonyms:
     for kbr_values in get_kbr_values(kbr_name,
                                      searchkey=term_for_lookup,
                                      searchtype='e'):
         for kbr_value in kbr_values:
             dterms[kbr_value + term_remainder] = 1
     ## return list of term synonyms:
     return dterms.keys()
 
 
 def wash_output_format(format):
     """Wash output format FORMAT.  Currently only prevents input like
     'of=9' for backwards-compatible format that prints certain fields
     only.  (for this task, 'of=tm' is preferred)"""
     if str(format[0:3]).isdigit() and len(format) != 6:
         # asked to print MARC tags, but not enough digits,
         # so let's switch back to HTML brief default
         return 'hb'
     else:
         return format
 
 def wash_pattern(p):
     """Wash pattern passed by URL. Check for sanity of the wildcard by
     removing wildcards if they are appended to extremely short words
     (1-3 letters).  TODO: instead of this approximative treatment, it
     will be much better to introduce a temporal limit, e.g. to kill a
     query if it does not finish in 10 seconds."""
     # strip accents:
     # p = strip_accents(p) # FIXME: when available, strip accents all the time
     # add leading/trailing whitespace for the two following wildcard-sanity checking regexps:
     p = " " + p + " "
     # replace spaces within quotes by __SPACE__ temporarily:
     p = re_pattern_single_quotes.sub(lambda x: "'"+string.replace(x.group(1), ' ', '__SPACE__')+"'", p)
     p = re_pattern_double_quotes.sub(lambda x: "\""+string.replace(x.group(1), ' ', '__SPACE__')+"\"", p)
     p = re_pattern_regexp_quotes.sub(lambda x: "/"+string.replace(x.group(1), ' ', '__SPACE__')+"/", p)
     # get rid of unquoted wildcards after spaces:
     p = re_pattern_wildcards_after_spaces.sub("\\1", p)
     # get rid of extremely short words (1-3 letters with wildcards):
     #p = re_pattern_short_words.sub("\\1", p)
     # replace back __SPACE__ by spaces:
     p = re_pattern_space.sub(" ", p)
     # replace special terms:
     p = re_pattern_today.sub(time.strftime("%Y-%m-%d", time.localtime()), p)
     # remove unnecessary whitespace:
     p = string.strip(p)
     # remove potentially wrong UTF-8 characters:
     p = wash_for_utf8(p)
     return p
 
 def wash_field(f):
     """Wash field passed by URL."""
     if f:
         # get rid of unnecessary whitespace and make it lowercase
         # (e.g. Author -> author) to better suit iPhone etc input
         # mode:
         f = f.strip().lower()
     # wash legacy 'f' field names, e.g. replace 'wau' or `au' by
     # 'author', if applicable:
     if CFG_WEBSEARCH_FIELDS_CONVERT:
         f = CFG_WEBSEARCH_FIELDS_CONVERT.get(f, f)
     return f
 
 def wash_dates(d1="", d1y=0, d1m=0, d1d=0, d2="", d2y=0, d2m=0, d2d=0):
     """
     Take user-submitted date arguments D1 (full datetime string) or
     (D1Y, D1M, D1Y) year, month, day tuple and D2 or (D2Y, D2M, D2Y)
     and return (YYY1-M1-D2 H1:M1:S2, YYY2-M2-D2 H2:M2:S2) datetime
     strings in the YYYY-MM-DD HH:MM:SS format suitable for time
     restricted searching.
 
     Note that when both D1 and (D1Y, D1M, D1D) parameters are present,
     the precedence goes to D1.  Ditto for D2*.
 
     Note that when (D1Y, D1M, D1D) are taken into account, some values
     may be missing and are completed e.g. to 01 or 12 according to
     whether it is the starting or the ending date.
     """
     datetext1, datetext2 =  "", ""
     # sanity checking:
     if d1 == "" and d1y == 0 and d1m == 0 and d1d == 0 and d2 == "" and d2y == 0 and d2m == 0 and d2d == 0:
         return ("", "") # nothing selected, so return empty values
     # wash first (starting) date:
     if d1:
         # full datetime string takes precedence:
         datetext1 = d1
     else:
         # okay, first date passed as (year,month,day):
         if d1y:
             datetext1 += "%04d" % d1y
         else:
             datetext1 += "0000"
         if d1m:
             datetext1 += "-%02d" % d1m
         else:
             datetext1 += "-01"
         if d1d:
             datetext1 += "-%02d" % d1d
         else:
             datetext1 += "-01"
         datetext1 += " 00:00:00"
     # wash second (ending) date:
     if d2:
         # full datetime string takes precedence:
         datetext2 = d2
     else:
         # okay, second date passed as (year,month,day):
         if d2y:
             datetext2 += "%04d" % d2y
         else:
             datetext2 += "9999"
         if d2m:
             datetext2 += "-%02d" % d2m
         else:
             datetext2 += "-12"
         if d2d:
             datetext2 += "-%02d" % d2d
         else:
             datetext2 += "-31" # NOTE: perhaps we should add max(datenumber) in
                                # given month, but for our quering it's not
                                # needed, 31 will always do
         datetext2 += " 00:00:00"
     # okay, return constructed YYYY-MM-DD HH:MM:SS datetexts:
     return (datetext1, datetext2)
 
 def is_hosted_collection(coll):
     """Check if the given collection is a hosted one; i.e. its dbquery starts with hostedcollection:
     Returns True if it is, False if it's not or if the result is empty or if the query failed"""
 
     res = run_sql("SELECT dbquery FROM collection WHERE name=%s", (coll, ))
     try:
         return res[0][0].startswith("hostedcollection:")
     except:
         return False
 
 def get_colID(c):
     "Return collection ID for collection name C.  Return None if no match found."
     colID = None
     res = run_sql("SELECT id FROM collection WHERE name=%s", (c,), 1)
     if res:
         colID = res[0][0]
     return colID
 
 def get_coll_normalised_name(c):
     """Returns normalised collection name (case sensitive) for collection name
        C (case insensitive).
        Returns None if no match found."""
     try:
         return run_sql("SELECT name FROM collection WHERE name=%s", (c,))[0][0]
     except:
         return None
 
 def get_coll_ancestors(coll):
     "Returns a list of ancestors for collection 'coll'."
     coll_ancestors = []
     coll_ancestor = coll
     while 1:
         res = run_sql("""SELECT c.name FROM collection AS c
                           LEFT JOIN collection_collection AS cc ON c.id=cc.id_dad
                           LEFT JOIN collection AS ccc ON ccc.id=cc.id_son
                           WHERE ccc.name=%s ORDER BY cc.id_dad ASC LIMIT 1""",
                       (coll_ancestor,))
         if res:
             coll_name = res[0][0]
             coll_ancestors.append(coll_name)
             coll_ancestor = coll_name
         else:
             break
     # ancestors found, return reversed list:
     coll_ancestors.reverse()
     return coll_ancestors
 
 def get_coll_sons(coll, type='r', public_only=1):
     """Return a list of sons (first-level descendants) of type 'type' for collection 'coll'.
        If public_only, then return only non-restricted son collections.
     """
     coll_sons = []
     query = "SELECT c.name FROM collection AS c "\
             "LEFT JOIN collection_collection AS cc ON c.id=cc.id_son "\
             "LEFT JOIN collection AS ccc ON ccc.id=cc.id_dad "\
             "WHERE cc.type=%s AND ccc.name=%s"
     query += " ORDER BY cc.score DESC"
     res = run_sql(query, (type, coll))
     for name in res:
         if not public_only or not collection_restricted_p(name[0]):
             coll_sons.append(name[0])
     return coll_sons
 
 def get_coll_real_descendants(coll, type='_', get_hosted_colls=True):
     """Return a list of all descendants of collection 'coll' that are defined by a 'dbquery'.
        IOW, we need to decompose compound collections like "A & B" into "A" and "B" provided
        that "A & B" has no associated database query defined.
     """
     coll_sons = []
     res = run_sql("""SELECT c.name,c.dbquery FROM collection AS c
                      LEFT JOIN collection_collection AS cc ON c.id=cc.id_son
                      LEFT JOIN collection AS ccc ON ccc.id=cc.id_dad
                      WHERE ccc.name=%s AND cc.type LIKE %s ORDER BY cc.score DESC""",
                   (coll, type,))
     for name, dbquery in res:
         if dbquery: # this is 'real' collection, so return it:
             if get_hosted_colls:
                 coll_sons.append(name)
             else:
                 if not dbquery.startswith("hostedcollection:"):
                     coll_sons.append(name)
         else: # this is 'composed' collection, so recurse:
             coll_sons.extend(get_coll_real_descendants(name))
     return coll_sons
 
 def browse_pattern(req, colls, p, f, rg, ln=CFG_SITE_LANG):
     """Browse either biliographic phrases or words indexes, and display it."""
 
     # load the right message language
     _ = gettext_set_language(ln)
 
     ## is p enclosed in quotes? (coming from exact search)
     if p.startswith('"') and p.endswith('"'):
         p = p[1:-1]
 
     p_orig = p
     ## okay, "real browse" follows:
     ## FIXME: the maths in the get_nearest_terms_in_bibxxx is just a test
 
     if not f and string.find(p, ":") > 0: # does 'p' contain ':'?
         f, p = string.split(p, ":", 1)
 
     ## do we search in words indexes?
     if not f:
         return browse_in_bibwords(req, p, f)
 
     index_id = get_index_id_from_field(f)
     if index_id != 0:
-        coll = HitSet()
+        coll = intbitset()
         for coll_name in colls:
             coll |= get_collection_reclist(coll_name)
         browsed_phrases_in_colls = get_nearest_terms_in_idxphrase_with_collection(p, index_id, rg/2, rg/2, coll)
     else:
         browsed_phrases = get_nearest_terms_in_bibxxx(p, f, (rg+1)/2+1, (rg-1)/2+1)
         while not browsed_phrases:
             # try again and again with shorter and shorter pattern:
             try:
                 p = p[:-1]
                 browsed_phrases = get_nearest_terms_in_bibxxx(p, f, (rg+1)/2+1, (rg-1)/2+1)
             except:
                 # probably there are no hits at all:
                 req.write(_("No values found."))
                 return
 
         ## try to check hits in these particular collection selection:
         browsed_phrases_in_colls = []
         if 0:
             for phrase in browsed_phrases:
-                phrase_hitset = HitSet()
+                phrase_hitset = intbitset()
                 phrase_hitsets = search_pattern("", phrase, f, 'e')
                 for coll in colls:
                     phrase_hitset.union_update(phrase_hitsets[coll])
                 if len(phrase_hitset) > 0:
                     # okay, this phrase has some hits in colls, so add it:
                     browsed_phrases_in_colls.append([phrase, len(phrase_hitset)])
 
         ## were there hits in collections?
         if browsed_phrases_in_colls == []:
             if browsed_phrases != []:
                 #print_warning(req, """<p>No match close to <em>%s</em> found in given collections.
                 #Please try different term.<p>Displaying matches in any collection...""" % p_orig)
                 ## try to get nbhits for these phrases in any collection:
                 for phrase in browsed_phrases:
                     browsed_phrases_in_colls.append([phrase, get_nbhits_in_bibxxx(phrase, f)])
 
     ## display results now:
     out = websearch_templates.tmpl_browse_pattern(
             f=f,
             fn=get_field_i18nname(get_field_name(f) or f, ln, False),
             ln=ln,
             browsed_phrases_in_colls=browsed_phrases_in_colls,
             colls=colls,
             rg=rg,
           )
     req.write(out)
     return
 
 def browse_in_bibwords(req, p, f, ln=CFG_SITE_LANG):
     """Browse inside words indexes."""
     if not p:
         return
     _ = gettext_set_language(ln)
 
     urlargd = {}
     urlargd.update(req.argd)
     urlargd['action'] = 'search'
 
     nearest_box = create_nearest_terms_box(urlargd, p, f, 'w', ln=ln, intro_text_p=0)
 
     req.write(websearch_templates.tmpl_search_in_bibwords(
         p = p,
         f = f,
         ln = ln,
         nearest_box = nearest_box
     ))
     return
 
 def search_pattern(req=None, p=None, f=None, m=None, ap=0, of="id", verbose=0, ln=CFG_SITE_LANG, display_nearest_terms_box=True, wl=0):
     """Search for complex pattern 'p' within field 'f' according to
        matching type 'm'.  Return hitset of recIDs.
 
        The function uses multi-stage searching algorithm in case of no
        exact match found.  See the Search Internals document for
        detailed description.
 
        The 'ap' argument governs whether an alternative patterns are to
        be used in case there is no direct hit for (p,f,m).  For
        example, whether to replace non-alphanumeric characters by
        spaces if it would give some hits.  See the Search Internals
        document for detailed description.  (ap=0 forbits the
        alternative pattern usage, ap=1 permits it.)
 
        The 'of' argument governs whether to print or not some
        information to the user in case of no match found.  (Usually it
        prints the information in case of HTML formats, otherwise it's
        silent).
 
        The 'verbose' argument controls the level of debugging information
        to be printed (0=least, 9=most).
 
        All the parameters are assumed to have been previously washed.
 
        This function is suitable as a mid-level API.
     """
 
     _ = gettext_set_language(ln)
 
-    hitset_empty = HitSet()
+    hitset_empty = intbitset()
     # sanity check:
     if not p:
-        hitset_full = HitSet(trailing_bits=1)
+        hitset_full = intbitset(trailing_bits=1)
         hitset_full.discard(0)
         # no pattern, so return all universe
         return hitset_full
     # search stage 1: break up arguments into basic search units:
     if verbose and of.startswith("h"):
         t1 = os.times()[4]
     basic_search_units = create_basic_search_units(req, p, f, m, of)
     if verbose and of.startswith("h"):
         t2 = os.times()[4]
         print_warning(req, "Search stage 1: basic search units are: %s" % cgi.escape(repr(basic_search_units)))
         print_warning(req, "Search stage 1: execution took %.2f seconds." % (t2 - t1))
     # search stage 2: do search for each search unit and verify hit presence:
     if verbose and of.startswith("h"):
         t1 = os.times()[4]
     basic_search_units_hitsets = []
     #prepare hiddenfield-related..
     myhiddens = CFG_BIBFORMAT_HIDDEN_TAGS
     can_see_hidden = False
     if req:
         user_info = collect_user_info(req)
         can_see_hidden = (acc_authorize_action(user_info, 'runbibedit')[0] == 0)
     if can_see_hidden:
         myhiddens = []
 
     if CFG_INSPIRE_SITE and of.startswith('h'):
         # fulltext/caption search warnings for INSPIRE:
         fields_to_be_searched = [f for o,p,f,m in basic_search_units]
         if 'fulltext' in fields_to_be_searched:
             print_warning(req, _("Warning: full-text search is only available for a subset of papers mostly from 2006-2011."))
         elif 'caption' in fields_to_be_searched:
             print_warning(req, _("Warning: figure caption search is only available for a subset of papers mostly from 2008-2011."))
 
     for idx_unit in xrange(len(basic_search_units)):
         bsu_o, bsu_p, bsu_f, bsu_m = basic_search_units[idx_unit]
         if bsu_f and len(bsu_f) < 2:
             if of.startswith("h"):
                 print_warning(req, _("There is no index %s.  Searching for %s in all fields." % (bsu_f, bsu_p)))
             bsu_f = ''
             bsu_m = 'w'
             if of.startswith("h") and verbose:
                 print_warning(req, _('Instead searching %s.' % str([bsu_o, bsu_p, bsu_f, bsu_m])))
         try:
             basic_search_unit_hitset = search_unit(bsu_p, bsu_f, bsu_m, wl)
         except InvenioWebSearchWildcardLimitError, excp:
             basic_search_unit_hitset = excp.res
             if of.startswith("h"):
                 print_warning(req, _("Search term too generic, displaying only partial results..."))
         # FIXME: print warning if we use native full-text indexing
         if bsu_f == 'fulltext' and bsu_m != 'w' and of.startswith('h') and not CFG_SOLR_URL:
             print_warning(req, _("No phrase index available for fulltext yet, looking for word combination..."))
         #check that the user is allowed to search with this tag
         #if he/she tries it
         if bsu_f and len(bsu_f) > 1 and bsu_f[0].isdigit() and bsu_f[1].isdigit():
             for htag in myhiddens:
                 ltag = len(htag)
                 samelenfield = bsu_f[0:ltag]
                 if samelenfield == htag: #user searches by a hidden tag
                     #we won't show you anything..
-                    basic_search_unit_hitset = HitSet()
+                    basic_search_unit_hitset = intbitset()
                     if verbose >= 9 and of.startswith("h"):
                         print_warning(req, "Pattern %s hitlist omitted since \
                                             it queries in a hidden tag %s" %
                                       (repr(bsu_p), repr(myhiddens)))
                     display_nearest_terms_box=False #..and stop spying, too.
         if verbose >= 9 and of.startswith("h"):
             print_warning(req, "Search stage 1: pattern %s gave hitlist %s" % (cgi.escape(bsu_p), basic_search_unit_hitset))
         if len(basic_search_unit_hitset) > 0 or \
            ap==0 or \
            bsu_o=="|" or \
            ((idx_unit+1)<len(basic_search_units) and basic_search_units[idx_unit+1][0]=="|"):
             # stage 2-1: this basic search unit is retained, since
             # either the hitset is non-empty, or the approximate
             # pattern treatment is switched off, or the search unit
             # was joined by an OR operator to preceding/following
             # units so we do not require that it exists
             basic_search_units_hitsets.append(basic_search_unit_hitset)
         else:
             # stage 2-2: no hits found for this search unit, try to replace non-alphanumeric chars inside pattern:
             if re.search(r'[^a-zA-Z0-9\s\:]', bsu_p) and bsu_f != 'refersto' and bsu_f != 'citedby':
                 if bsu_p.startswith('"') and bsu_p.endswith('"'): # is it ACC query?
                     bsu_pn = re.sub(r'[^a-zA-Z0-9\s\:]+', "*", bsu_p)
                 else: # it is WRD query
                     bsu_pn = re.sub(r'[^a-zA-Z0-9\s\:]+', " ", bsu_p)
                 if verbose and of.startswith('h') and req:
                     print_warning(req, "Trying (%s,%s,%s)" % (cgi.escape(bsu_pn), cgi.escape(bsu_f), cgi.escape(bsu_m)))
                 basic_search_unit_hitset = search_pattern(req=None, p=bsu_pn, f=bsu_f, m=bsu_m, of="id", ln=ln, wl=wl)
                 if len(basic_search_unit_hitset) > 0:
                     # we retain the new unit instead
                     if of.startswith('h'):
                         print_warning(req, _("No exact match found for %(x_query1)s, using %(x_query2)s instead...") % \
                                       {'x_query1': "<em>" + cgi.escape(bsu_p) + "</em>",
                                        'x_query2': "<em>" + cgi.escape(bsu_pn) + "</em>"})
                     basic_search_units[idx_unit][1] = bsu_pn
                     basic_search_units_hitsets.append(basic_search_unit_hitset)
                 else:
                     # stage 2-3: no hits found either, propose nearest indexed terms:
                     if of.startswith('h') and display_nearest_terms_box:
                         if req:
                             if bsu_f == "recid":
                                 print_warning(req, _("Requested record does not seem to exist."))
                             else:
                                 print_warning(req, create_nearest_terms_box(req.argd, bsu_p, bsu_f, bsu_m, ln=ln))
                     return hitset_empty
             else:
                 # stage 2-3: no hits found either, propose nearest indexed terms:
                 if of.startswith('h') and display_nearest_terms_box:
                     if req:
                         if bsu_f == "recid":
                             print_warning(req, _("Requested record does not seem to exist."))
                         else:
                             print_warning(req, create_nearest_terms_box(req.argd, bsu_p, bsu_f, bsu_m, ln=ln))
                 return hitset_empty
     if verbose and of.startswith("h"):
         t2 = os.times()[4]
         for idx_unit in range(0, len(basic_search_units)):
             print_warning(req, "Search stage 2: basic search unit %s gave %d hits." %
                           (basic_search_units[idx_unit][1:], len(basic_search_units_hitsets[idx_unit])))
         print_warning(req, "Search stage 2: execution took %.2f seconds." % (t2 - t1))
     # search stage 3: apply boolean query for each search unit:
     if verbose and of.startswith("h"):
         t1 = os.times()[4]
     # let the initial set be the complete universe:
-    hitset_in_any_collection = HitSet(trailing_bits=1)
+    hitset_in_any_collection = intbitset(trailing_bits=1)
     hitset_in_any_collection.discard(0)
     for idx_unit in xrange(len(basic_search_units)):
         this_unit_operation = basic_search_units[idx_unit][0]
         this_unit_hitset = basic_search_units_hitsets[idx_unit]
         if this_unit_operation == '+':
             hitset_in_any_collection.intersection_update(this_unit_hitset)
         elif this_unit_operation == '-':
             hitset_in_any_collection.difference_update(this_unit_hitset)
         elif this_unit_operation == '|':
             hitset_in_any_collection.union_update(this_unit_hitset)
         else:
             if of.startswith("h"):
                 print_warning(req, "Invalid set operation %s." % cgi.escape(this_unit_operation), "Error")
     if len(hitset_in_any_collection) == 0:
         # no hits found, propose alternative boolean query:
         if of.startswith('h') and display_nearest_terms_box:
             nearestterms = []
             for idx_unit in range(0, len(basic_search_units)):
                 bsu_o, bsu_p, bsu_f, bsu_m = basic_search_units[idx_unit]
                 if bsu_p.startswith("%") and bsu_p.endswith("%"):
                     bsu_p = "'" + bsu_p[1:-1] + "'"
                 bsu_nbhits = len(basic_search_units_hitsets[idx_unit])
 
                 # create a similar query, but with the basic search unit only
                 argd = {}
                 argd.update(req.argd)
 
                 argd['p'] = bsu_p
                 argd['f'] = bsu_f
 
                 nearestterms.append((bsu_p, bsu_nbhits, argd))
 
             text = websearch_templates.tmpl_search_no_boolean_hits(
                      ln=ln,  nearestterms=nearestterms)
             print_warning(req, text)
     if verbose and of.startswith("h"):
         t2 = os.times()[4]
         print_warning(req, "Search stage 3: boolean query gave %d hits." % len(hitset_in_any_collection))
         print_warning(req, "Search stage 3: execution took %.2f seconds." % (t2 - t1))
     return hitset_in_any_collection
 
 def search_pattern_parenthesised(req=None, p=None, f=None, m=None, ap=0, of="id", verbose=0, ln=CFG_SITE_LANG, display_nearest_terms_box=True, wl=0):
     """Search for complex pattern 'p' containing parenthesis within field 'f' according to
        matching type 'm'.  Return hitset of recIDs.
 
        For more details on the parameters see 'search_pattern'
     """
     _ = gettext_set_language(ln)
     spires_syntax_converter = SpiresToInvenioSyntaxConverter()
     spires_syntax_query = False
 
     # if the pattern uses SPIRES search syntax, convert it to Invenio syntax
     if spires_syntax_converter.is_applicable(p):
         spires_syntax_query = True
         p = spires_syntax_converter.convert_query(p)
 
     # sanity check: do not call parenthesised parser for search terms
     # like U(1):
     if not re_pattern_parens.search(p):
         return search_pattern(req, p, f, m, ap, of, verbose, ln, display_nearest_terms_box=display_nearest_terms_box, wl=wl)
 
     # Try searching with parentheses
     try:
         parser = SearchQueryParenthesisedParser()
 
         # get a hitset with all recids
-        result_hitset = HitSet(trailing_bits=1)
+        result_hitset = intbitset(trailing_bits=1)
 
         # parse the query. The result is list of [op1, expr1, op2, expr2, ..., opN, exprN]
         parsing_result = parser.parse_query(p)
         if verbose  and of.startswith("h"):
             print_warning(req, "Search stage 1: search_pattern_parenthesised() searched %s." % repr(p))
             print_warning(req, "Search stage 1: search_pattern_parenthesised() returned %s." % repr(parsing_result))
 
         # go through every pattern
         # calculate hitset for it
         # combine pattern's hitset with the result using the corresponding operator
         for index in xrange(0, len(parsing_result)-1, 2 ):
             current_operator = parsing_result[index]
             current_pattern = parsing_result[index+1]
 
             if CFG_INSPIRE_SITE and spires_syntax_query:
                 # setting ap=0 to turn off approximate matching for 0 results.
                 # Doesn't work well in combinations.
                 # FIXME: The right fix involves collecting statuses for each
                 #        hitset, then showing a nearest terms box exactly once,
                 #        outside this loop.
                 ap = 0
                 display_nearest_terms_box=False
 
             # obtain a hitset for the current pattern
             current_hitset = search_pattern(req, current_pattern, f, m, ap, of, verbose, ln, display_nearest_terms_box=display_nearest_terms_box, wl=wl)
 
             # combine the current hitset with resulting hitset using the current operator
             if current_operator == '+':
                 result_hitset = result_hitset & current_hitset
             elif current_operator == '-':
                 result_hitset = result_hitset - current_hitset
             elif current_operator == '|':
                 result_hitset = result_hitset | current_hitset
             else:
                 assert False, "Unknown operator in search_pattern_parenthesised()"
 
         return result_hitset
 
     # If searching with parenteses fails, perform search ignoring parentheses
     except SyntaxError:
 
         print_warning(req, _("Search syntax misunderstood. Ignoring all parentheses in the query. If this doesn't help, please check your search and try again."))
 
         # remove the parentheses in the query. Current implementation removes all the parentheses,
         # but it could be improved to romove only these that are not inside quotes
         p = p.replace('(', ' ')
         p = p.replace(')', ' ')
 
         return search_pattern(req, p, f, m, ap, of, verbose, ln, display_nearest_terms_box=display_nearest_terms_box, wl=wl)
 
 
 def search_unit(p, f=None, m=None, wl=0):
     """Search for basic search unit defined by pattern 'p' and field
        'f' and matching type 'm'.  Return hitset of recIDs.
 
        All the parameters are assumed to have been previously washed.
        'p' is assumed to be already a ``basic search unit'' so that it
        is searched as such and is not broken up in any way.  Only
        wildcard and span queries are being detected inside 'p'.
 
        If CFG_WEBSEARCH_SYNONYM_KBRS is set and we are searching in
        one of the indexes that has defined runtime synonym knowledge
        base, then look up there and automatically enrich search
        results with results for synonyms.
 
        In case the wildcard limit (wl) is greater than 0 and this limit
        is reached an InvenioWebSearchWildcardLimitError will be raised.
        In case you want to call this function with no limit for the
        wildcard queries, wl should be 0.
 
        This function is suitable as a low-level API.
     """
 
     ## create empty output results set:
-    hitset = HitSet()
+    hitset = intbitset()
     if not p: # sanity checking
         return hitset
 
     ## eventually look up runtime synonyms:
-    hitset_synonyms = HitSet()
+    hitset_synonyms = intbitset()
     if CFG_WEBSEARCH_SYNONYM_KBRS.has_key(f):
         for p_synonym in get_synonym_terms(p,
                              CFG_WEBSEARCH_SYNONYM_KBRS[f][0],
                              CFG_WEBSEARCH_SYNONYM_KBRS[f][1]):
             if p_synonym != p:
                 hitset_synonyms |= search_unit(p_synonym, f, m, wl)
 
     ## look up hits:
     if CFG_SOLR_URL and f == 'fulltext':
         # redirect to Solr/Lucene
         return search_unit_in_solr(p, f, m)
     if f == 'datecreated':
         hitset = search_unit_in_bibrec(p, p, 'c')
     elif f == 'datemodified':
         hitset = search_unit_in_bibrec(p, p, 'm')
     elif f == 'refersto':
         # we are doing search by the citation count
         hitset = search_unit_refersto(p)
     elif f == 'citedby':
         # we are doing search by the citation count
         hitset = search_unit_citedby(p)
     elif m == 'a' or m == 'r':
         # we are doing either phrase search or regexp search
         if f == 'fulltext':
             # FIXME: workaround for not having phrase index yet
             return search_pattern(None, p, f, 'w')
         index_id = get_index_id_from_field(f)
         if index_id != 0:
             hitset = search_unit_in_idxphrases(p, f, m, wl)
         else:
             hitset = search_unit_in_bibxxx(p, f, m, wl)
     elif p.startswith("cited:"):
         # we are doing search by the citation count
         hitset = search_unit_by_times_cited(p[6:])
     else:
         # we are doing bibwords search by default
         hitset = search_unit_in_bibwords(p, f, m, wl=wl)
 
     ## merge synonym results and return total:
     hitset |= hitset_synonyms
     return hitset
 
 def search_unit_in_bibwords(word, f, m=None, decompress=zlib.decompress, wl=0):
     """Searches for 'word' inside bibwordsX table for field 'f' and returns hitset of recIDs."""
-    set = HitSet() # will hold output result set
+    set = intbitset() # will hold output result set
     set_used = 0 # not-yet-used flag, to be able to circumvent set operations
     limit_reached = 0 # flag for knowing if the query limit has been reached
     # deduce into which bibwordsX table we will search:
     stemming_language = get_index_stemming_language(get_index_id_from_field("anyfield"))
     bibwordsX = "idxWORD%02dF" % get_index_id_from_field("anyfield")
     if f:
         index_id = get_index_id_from_field(f)
         if index_id:
             bibwordsX = "idxWORD%02dF" % index_id
             stemming_language = get_index_stemming_language(index_id)
         else:
-            return HitSet() # word index f does not exist
+            return intbitset() # word index f does not exist
 
     # wash 'word' argument and run query:
     word = string.replace(word, '*', '%') # we now use '*' as the truncation character
     words = string.split(word, "->", 1) # check for span query
     if len(words) == 2:
         word0 = re_word.sub('', words[0])
         word1 = re_word.sub('', words[1])
         if stemming_language:
             word0 = lower_index_term(word0)
             word1 = lower_index_term(word1)
             word0 = stem(word0, stemming_language)
             word1 = stem(word1, stemming_language)
         try:
             res = run_sql_with_limit("SELECT term,hitlist FROM %s WHERE term BETWEEN %%s AND %%s" % bibwordsX,
                           (wash_index_term(word0), wash_index_term(word1)), wildcard_limit = wl)
         except InvenioDbQueryWildcardLimitError, excp:
             res = excp.res
             limit_reached = 1 # set the limit reached flag to true
     else:
         if f == 'journal':
             pass # FIXME: quick hack for the journal index
         else:
             word = re_word.sub('', word)
         if stemming_language:
             word = lower_index_term(word)
             word = stem(word, stemming_language)
         if string.find(word, '%') >= 0: # do we have wildcard in the word?
             if f == 'journal':
                 # FIXME: quick hack for the journal index
                 # FIXME: we can run a sanity check here for all indexes
                 res = ()
             else:
                 try:
                     res = run_sql_with_limit("SELECT term,hitlist FROM %s WHERE term LIKE %%s" % bibwordsX,
                                   (wash_index_term(word),), wildcard_limit = wl)
                 except InvenioDbQueryWildcardLimitError, excp:
                     res = excp.res
                     limit_reached = 1 # set the limit reached flag to true
         else:
             res = run_sql("SELECT term,hitlist FROM %s WHERE term=%%s" % bibwordsX,
                           (wash_index_term(word),))
     # fill the result set:
     for word, hitlist in res:
-        hitset_bibwrd = HitSet(hitlist)
+        hitset_bibwrd = intbitset(hitlist)
         # add the results:
         if set_used:
             set.union_update(hitset_bibwrd)
         else:
             set = hitset_bibwrd
             set_used = 1
     #check to see if the query limit was reached
     if limit_reached:
         #raise an exception, so we can print a nice message to the user
         raise InvenioWebSearchWildcardLimitError(set)
     # okay, return result set:
     return set
 
 def search_unit_in_idxphrases(p, f, type, wl=0):
     """Searches for phrase 'p' inside idxPHRASE*F table for field 'f' and returns hitset of recIDs found.
     The search type is defined by 'type' (e.g. equals to 'r' for a regexp search)."""
-    set = HitSet() # will hold output result set
+    set = intbitset() # will hold output result set
     set_used = 0 # not-yet-used flag, to be able to circumvent set operations
     limit_reached = 0 # flag for knowing if the query limit has been reached
     use_query_limit = False # flag for knowing if to limit the query results or not
     # deduce in which idxPHRASE table we will search:
     idxphraseX = "idxPHRASE%02dF" % get_index_id_from_field("anyfield")
     if f:
         index_id = get_index_id_from_field(f)
         if index_id:
             idxphraseX = "idxPHRASE%02dF" % index_id
         else:
-            return HitSet() # phrase index f does not exist
+            return intbitset() # phrase index f does not exist
     # detect query type (exact phrase, partial phrase, regexp):
     if type == 'r':
         query_addons = "REGEXP %s"
         query_params = (p,)
         use_query_limit = True
     else:
         p = string.replace(p, '*', '%') # we now use '*' as the truncation character
         ps = string.split(p, "->", 1) # check for span query:
         if len(ps) == 2 and not (ps[0].endswith(' ') or ps[1].startswith(' ')):
             query_addons = "BETWEEN %s AND %s"
             query_params = (ps[0], ps[1])
             use_query_limit = True
         else:
             if string.find(p, '%') > -1:
                 query_addons = "LIKE %s"
                 query_params = (p,)
                 use_query_limit = True
             else:
                 query_addons = "= %s"
                 query_params = (p,)
 
     # special washing for fuzzy author index:
     if f in ('author', 'firstauthor', 'exactauthor', 'exactfirstauthor'):
         query_params_washed = ()
         for query_param in query_params:
             query_params_washed += (wash_author_name(query_param),)
         query_params = query_params_washed
     # perform search:
     if use_query_limit:
         try:
             res = run_sql_with_limit("SELECT term,hitlist FROM %s WHERE term %s" % (idxphraseX, query_addons),
                       query_params, wildcard_limit=wl)
         except InvenioDbQueryWildcardLimitError, excp:
             res = excp.res
             limit_reached = 1 # set the limit reached flag to true
     else:
         res = run_sql("SELECT term,hitlist FROM %s WHERE term %s" % (idxphraseX, query_addons), query_params)
     # fill the result set:
     for word, hitlist in res:
-        hitset_bibphrase = HitSet(hitlist)
+        hitset_bibphrase = intbitset(hitlist)
         # add the results:
         if set_used:
             set.union_update(hitset_bibphrase)
         else:
             set = hitset_bibphrase
             set_used = 1
     #check to see if the query limit was reached
     if limit_reached:
         #raise an exception, so we can print a nice message to the user
         raise InvenioWebSearchWildcardLimitError(set)
     # okay, return result set:
     return set
 
 def search_unit_in_bibxxx(p, f, type, wl=0):
     """Searches for pattern 'p' inside bibxxx tables for field 'f' and returns hitset of recIDs found.
     The search type is defined by 'type' (e.g. equals to 'r' for a regexp search)."""
 
     # FIXME: quick hack for the journal index
     if f == 'journal':
         return search_unit_in_bibwords(p, f, wl=wl)
     p_orig = p # saving for eventual future 'no match' reporting
     limit_reached = 0 # flag for knowing if the query limit has been reached
     use_query_limit = False  # flag for knowing if to limit the query results or not
     query_addons = "" # will hold additional SQL code for the query
     query_params = () # will hold parameters for the query (their number may vary depending on TYPE argument)
     # wash arguments:
     f = string.replace(f, '*', '%') # replace truncation char '*' in field definition
     if type == 'r':
         query_addons = "REGEXP %s"
         query_params = (p,)
         use_query_limit = True
     else:
         p = string.replace(p, '*', '%') # we now use '*' as the truncation character
         ps = string.split(p, "->", 1) # check for span query:
         if len(ps) == 2 and not (ps[0].endswith(' ') or ps[1].startswith(' ')):
             query_addons = "BETWEEN %s AND %s"
             query_params = (ps[0], ps[1])
             use_query_limit = True
         else:
             if string.find(p, '%') > -1:
                 query_addons = "LIKE %s"
                 query_params = (p,)
                 use_query_limit = True
             else:
                 query_addons = "= %s"
                 query_params = (p,)
     # construct 'tl' which defines the tag list (MARC tags) to search in:
     tl = []
     if len(f) >= 2 and str(f[0]).isdigit() and str(f[1]).isdigit():
         tl.append(f) # 'f' seems to be okay as it starts by two digits
     else:
         # deduce desired MARC tags on the basis of chosen 'f'
         tl = get_field_tags(f)
         if not tl:
             # f index does not exist, nevermind
             pass
     # okay, start search:
     l = [] # will hold list of recID that matched
     for t in tl:
         # deduce into which bibxxx table we will search:
         digit1, digit2 = int(t[0]), int(t[1])
         bx = "bib%d%dx" % (digit1, digit2)
         bibx = "bibrec_bib%d%dx" % (digit1, digit2)
         # construct and run query:
         if t == "001":
             if query_addons.find('BETWEEN') > -1 or query_addons.find('=') > -1:
                 # verify that the params are integers (to avoid returning record 123 when searching for 123foo)
                 try:
                     query_params = tuple(int(param) for param in query_params)
                 except ValueError:
-                    return HitSet()
+                    return intbitset()
             if use_query_limit:
                 try:
                     res = run_sql_with_limit("SELECT id FROM bibrec WHERE id %s" % query_addons,
                               query_params, wildcard_limit=wl)
                 except InvenioDbQueryWildcardLimitError, excp:
                     res = excp.res
                     limit_reached = 1 # set the limit reached flag to true
             else:
                 res = run_sql("SELECT id FROM bibrec WHERE id %s" % query_addons,
                               query_params)
         else:
             query = "SELECT bibx.id_bibrec FROM %s AS bx LEFT JOIN %s AS bibx ON bx.id=bibx.id_bibxxx WHERE bx.value %s" % \
                     (bx, bibx, query_addons)
             if len(t) != 6 or t[-1:]=='%':
                 # wildcard query, or only the beginning of field 't'
                 # is defined, so add wildcard character:
                 query += " AND bx.tag LIKE %s"
                 query_params_and_tag = query_params + (t + '%',)
             else:
                 # exact query for 't':
                 query += " AND bx.tag=%s"
                 query_params_and_tag = query_params + (t,)
             if use_query_limit:
                 try:
                     res = run_sql_with_limit(query, query_params_and_tag, wildcard_limit=wl)
                 except InvenioDbQueryWildcardLimitError, excp:
                     res = excp.res
                     limit_reached = 1 # set the limit reached flag to true
             else:
                 res = run_sql(query, query_params_and_tag)
         # fill the result set:
         for id_bibrec in res:
             if id_bibrec[0]:
                 l.append(id_bibrec[0])
     # check no of hits found:
     nb_hits = len(l)
     # okay, return result set:
-    set = HitSet(l)
+    set = intbitset(l)
     #check to see if the query limit was reached
     if limit_reached:
         #raise an exception, so we can print a nice message to the user
         raise InvenioWebSearchWildcardLimitError(set)
     return set
 
 def search_unit_in_solr(p, f=None, m=None):
     """
     Query the Solr full-text index and return an intbitset corresponding
     to the result.  Parameters (p,f,m) are usual search unit ones.
     """
     if m and (m == 'a' or m == 'r'): # phrase/regexp query
         if p.startswith('%') and p.endswith('%'):
             p = p[1:-1] # fix for partial phrase
         p = '"' + p + '"'
     return solr_get_bitset(p, CFG_SOLR_URL)
 
 def search_unit_in_bibrec(datetext1, datetext2, type='c'):
     """
     Return hitset of recIDs found that were either created or modified
     (according to 'type' arg being 'c' or 'm') from datetext1 until datetext2, inclusive.
     Does not pay attention to pattern, collection, anything.  Useful
     to intersect later on with the 'real' query.
     """
-    set = HitSet()
+    set = intbitset()
     if type.startswith("m"):
         type = "modification_date"
     else:
         type = "creation_date" # by default we are searching for creation dates
 
     parts = datetext1.split('->')
     if len(parts) > 1 and datetext1 == datetext2:
         datetext1 = parts[0]
         datetext2 = parts[1]
 
     if datetext1 == datetext2:
         res = run_sql("SELECT id FROM bibrec WHERE %s LIKE %%s" % (type,),
                       (datetext1 + '%',))
     else:
         res = run_sql("SELECT id FROM bibrec WHERE %s>=%%s AND %s<=%%s" % (type, type),
                       (datetext1, datetext2))
     for row in res:
         set += row[0]
     return set
 
 def search_unit_by_times_cited(p):
     """
     Return histset of recIDs found that are cited P times.
     Usually P looks like '10->23'.
     """
     numstr = '"'+p+'"'
     #this is sort of stupid but since we may need to
     #get the records that do _not_ have cites, we have to
     #know the ids of all records, too
     #but this is needed only if bsu_p is 0 or 0 or 0->0
     allrecs = []
     if p == 0 or p == "0" or \
        p.startswith("0->") or p.endswith("->0"):
-        allrecs = HitSet(run_sql("SELECT id FROM bibrec"))
+        allrecs = intbitset(run_sql("SELECT id FROM bibrec"))
     return get_records_with_num_cites(numstr, allrecs)
 
 def search_unit_refersto(query):
     """
     Search for records satisfying the query (e.g. author:ellis) and
     return list of records referred to by these records.
     """
     if query:
         ahitset = search_pattern(p=query)
         if ahitset:
             return get_refersto_hitset(ahitset)
         else:
-            return HitSet([])
+            return intbitset([])
     else:
-        return HitSet([])
+        return intbitset([])
 
 def search_unit_citedby(query):
     """
     Search for records satisfying the query (e.g. author:ellis) and
     return list of records cited by these records.
     """
     if query:
         ahitset = search_pattern(p=query)
         if ahitset:
             return get_citedby_hitset(ahitset)
         else:
-            return HitSet([])
+            return intbitset([])
     else:
-        return HitSet([])
+        return intbitset([])
 
 def intersect_results_with_collrecs(req, hitset_in_any_collection, colls, ap=0, of="hb", verbose=0, ln=CFG_SITE_LANG, display_nearest_terms_box=True):
     """Return dict of hitsets given by intersection of hitset with the collection universes."""
     _ = gettext_set_language(ln)
 
     # search stage 4: intersect with the collection universe:
     if verbose and of.startswith("h"):
         t1 = os.times()[4]
     results = {}
     results_nbhits = 0
     for coll in colls:
         results[coll] = hitset_in_any_collection & get_collection_reclist(coll)
         results_nbhits += len(results[coll])
     if results_nbhits == 0:
         # no hits found, try to search in Home:
         results_in_Home = hitset_in_any_collection & get_collection_reclist(CFG_SITE_NAME)
         if len(results_in_Home) > 0:
             # some hits found in Home, so propose this search:
             if of.startswith("h") and display_nearest_terms_box:
                 url = websearch_templates.build_search_url(req.argd, cc=CFG_SITE_NAME, c=[])
                 print_warning(req, _("No match found in collection %(x_collection)s. Other public collections gave %(x_url_open)s%(x_nb_hits)d hits%(x_url_close)s.") %\
                               {'x_collection': '<em>' + string.join([get_coll_i18nname(coll, ln, False) for coll in colls], ', ') + '</em>',
                                'x_url_open': '<a class="nearestterms" href="%s">' % (url),
                                'x_nb_hits': len(results_in_Home),
                                'x_url_close': '</a>'})
             results = {}
         else:
             # no hits found in Home, recommend different search terms:
             if of.startswith("h") and display_nearest_terms_box:
                 print_warning(req, _("No public collection matched your query. "
                                      "If you were looking for a non-public document, please choose "
                                      "the desired restricted collection first."))
             results = {}
     if verbose and of.startswith("h"):
         t2 = os.times()[4]
         print_warning(req, "Search stage 4: intersecting with collection universe gave %d hits." % results_nbhits)
         print_warning(req, "Search stage 4: execution took %.2f seconds." % (t2 - t1))
     return results
 
 def intersect_results_with_hitset(req, results, hitset, ap=0, aptext="", of="hb"):
     """Return intersection of search 'results' (a dict of hitsets
        with collection as key) with the 'hitset', i.e. apply
        'hitset' intersection to each collection within search
        'results'.
 
        If the final 'results' set is to be empty, and 'ap'
        (approximate pattern) is true, and then print the `warningtext'
        and return the original 'results' set unchanged.  If 'ap' is
        false, then return empty results set.
     """
     if ap:
         results_ap = copy.deepcopy(results)
     else:
         results_ap = {} # will return empty dict in case of no hits found
     nb_total = 0
     for coll in results.keys():
         results[coll].intersection_update(hitset)
         nb_total += len(results[coll])
     if nb_total == 0:
         if of.startswith("h"):
             print_warning(req, aptext)
         results = results_ap
     return results
 
 def create_similarly_named_authors_link_box(author_name, ln=CFG_SITE_LANG):
     """Return a box similar to ``Not satisfied...'' one by proposing
        author searches for similar names.  Namely, take AUTHOR_NAME
        and the first initial of the firstame (after comma) and look
        into author index whether authors with e.g. middle names exist.
        Useful mainly for CERN Library that sometimes contains name
        forms like Ellis-N, Ellis-Nick, Ellis-Nicolas all denoting the
        same person.  The box isn't proposed if no similarly named
        authors are found to exist.
     """
     # return nothing if not configured:
     if CFG_WEBSEARCH_CREATE_SIMILARLY_NAMED_AUTHORS_LINK_BOX == 0:
         return ""
     # return empty box if there is no initial:
     if re.match(r'[^ ,]+, [^ ]', author_name) is None:
         return ""
     # firstly find name comma initial:
     author_name_to_search = re.sub(r'^([^ ,]+, +[^ ,]).*$', '\\1', author_name)
 
     # secondly search for similar name forms:
     similar_author_names = {}
     for name in author_name_to_search, strip_accents(author_name_to_search):
         for tag in get_field_tags("author"):
             # deduce into which bibxxx table we will search:
             digit1, digit2 = int(tag[0]), int(tag[1])
             bx = "bib%d%dx" % (digit1, digit2)
             bibx = "bibrec_bib%d%dx" % (digit1, digit2)
             if len(tag) != 6 or tag[-1:]=='%':
                 # only the beginning of field 't' is defined, so add wildcard character:
                 res = run_sql("""SELECT bx.value FROM %s AS bx
                                   WHERE bx.value LIKE %%s AND bx.tag LIKE %%s""" % bx,
                               (name + "%", tag + "%"))
             else:
                 res = run_sql("""SELECT bx.value FROM %s AS bx
                                   WHERE bx.value LIKE %%s AND bx.tag=%%s""" % bx,
                               (name + "%", tag))
             for row in res:
                 similar_author_names[row[0]] = 1
     # remove the original name and sort the list:
     try:
         del similar_author_names[author_name]
     except KeyError:
         pass
     # thirdly print the box:
     out = ""
     if similar_author_names:
         out_authors = similar_author_names.keys()
         out_authors.sort()
 
         tmp_authors = []
         for out_author in out_authors:
             nbhits = get_nbhits_in_bibxxx(out_author, "author")
             if nbhits:
                 tmp_authors.append((out_author, nbhits))
         out += websearch_templates.tmpl_similar_author_names(
                  authors=tmp_authors, ln=ln)
 
     return out
 
 def create_nearest_terms_box(urlargd, p, f, t='w', n=5, ln=CFG_SITE_LANG, intro_text_p=True):
     """Return text box containing list of 'n' nearest terms above/below 'p'
        for the field 'f' for matching type 't' (words/phrases) in
        language 'ln'.
        Propose new searches according to `urlargs' with the new words.
        If `intro_text_p' is true, then display the introductory message,
        otherwise print only the nearest terms in the box content.
     """
     # load the right message language
     _ = gettext_set_language(ln)
 
     out = ""
     nearest_terms = []
     if not p: # sanity check
         p = "."
     if p.startswith('%') and p.endswith('%'):
         p = p[1:-1] # fix for partial phrase
     index_id = get_index_id_from_field(f)
     if f == 'fulltext':
         if CFG_SOLR_URL:
             return _("No match found, please enter different search terms.")
         else:
             # FIXME: workaround for not having native phrase index yet
             t = 'w'
     # special indexes:
     if f == 'refersto':
         return _("There are no records referring to %s.") % cgi.escape(p)
     if f == 'citedby':
         return _("There are no records cited by %s.") % cgi.escape(p)
     # look for nearest terms:
     if t == 'w':
         nearest_terms = get_nearest_terms_in_bibwords(p, f, n, n)
         if not nearest_terms:
             return _("No word index is available for %s.") % \
                    ('<em>' + cgi.escape(get_field_i18nname(get_field_name(f) or f, ln, False)) + '</em>')
     else:
         nearest_terms = []
         if index_id:
             nearest_terms = get_nearest_terms_in_idxphrase(p, index_id, n, n)
         if f == 'datecreated' or f == 'datemodified':
             nearest_terms = get_nearest_terms_in_bibrec(p, f, n, n)
         if not nearest_terms:
             nearest_terms = get_nearest_terms_in_bibxxx(p, f, n, n)
         if not nearest_terms:
             return _("No phrase index is available for %s.") % \
                    ('<em>' + cgi.escape(get_field_i18nname(get_field_name(f) or f, ln, False)) + '</em>')
 
     terminfo = []
     for term in nearest_terms:
         if t == 'w':
             hits = get_nbhits_in_bibwords(term, f)
         else:
             if index_id:
                 hits = get_nbhits_in_idxphrases(term, f)
             elif f == 'datecreated' or f == 'datemodified':
                 hits = get_nbhits_in_bibrec(term, f)
             else:
                 hits = get_nbhits_in_bibxxx(term, f)
 
         argd = {}
         argd.update(urlargd)
 
         # check which fields contained the requested parameter, and replace it.
         for (px, fx) in ('p', 'f'), ('p1', 'f1'), ('p2', 'f2'), ('p3', 'f3'):
             if px in argd:
                 argd_px = argd[px]
                 if t == 'w':
                     # p was stripped of accents, to do the same:
                     argd_px = strip_accents(argd_px)
                 if f == argd[fx] or f == "anyfield" or f == "":
                     if string.find(argd_px, p) > -1:
                         argd[px] = string.replace(argd_px, p, term)
                         break
                 else:
                     if string.find(argd_px, f+':'+p) > -1:
                         if string.find(term.strip(), ' ') > -1:
                             term = '"' + term + '"'
                         argd[px] = string.replace(argd_px, f+':'+p, f+':'+term)
                         break
                     elif string.find(argd_px, f+':"'+p+'"') > -1:
                         argd[px] = string.replace(argd_px, f+':"'+p+'"', f+':"'+term+'"')
                         break
                     elif string.find(argd_px, f+':\''+p+'\'') > -1:
                         argd[px] = string.replace(argd_px, f+':\''+p+'\'', f+':\''+term+'\'')
                         break
 
         terminfo.append((term, hits, argd))
 
     intro = ""
     if intro_text_p: # add full leading introductory text
         if f:
             intro = _("Search term %(x_term)s inside index %(x_index)s did not match any record. Nearest terms in any collection are:") % \
                      {'x_term': "<em>" + cgi.escape(p.startswith("%") and p.endswith("%") and p[1:-1] or p) + "</em>",
                       'x_index': "<em>" + cgi.escape(get_field_i18nname(get_field_name(f) or f, ln, False)) + "</em>"}
         else:
             intro = _("Search term %s did not match any record. Nearest terms in any collection are:") % \
                      ("<em>" + cgi.escape(p.startswith("%") and p.endswith("%") and p[1:-1] or p) + "</em>")
 
     return websearch_templates.tmpl_nearest_term_box(p=p, ln=ln, f=f, terminfo=terminfo,
                                                      intro=intro)
 
 def get_nearest_terms_in_bibwords(p, f, n_below, n_above):
     """Return list of +n -n nearest terms to word `p' in index for field `f'."""
     nearest_words = [] # will hold the (sorted) list of nearest words to return
     # deduce into which bibwordsX table we will search:
     bibwordsX = "idxWORD%02dF" % get_index_id_from_field("anyfield")
     if f:
         index_id = get_index_id_from_field(f)
         if index_id:
             bibwordsX = "idxWORD%02dF" % index_id
         else:
             return nearest_words
     # firstly try to get `n' closest words above `p':
     res = run_sql("SELECT term FROM %s WHERE term<%%s ORDER BY term DESC LIMIT %%s" % bibwordsX,
                   (p, n_above))
     for row in res:
         nearest_words.append(row[0])
     nearest_words.reverse()
     # secondly insert given word `p':
     nearest_words.append(p)
     # finally try to get `n' closest words below `p':
     res = run_sql("SELECT term FROM %s WHERE term>%%s ORDER BY term ASC LIMIT %%s" % bibwordsX,
                   (p, n_below))
     for row in res:
         nearest_words.append(row[0])
     return nearest_words
 
 def get_nearest_terms_in_idxphrase(p, index_id, n_below, n_above):
     """Browse (-n_above, +n_below) closest bibliographic phrases
        for the given pattern p in the given field idxPHRASE table,
        regardless of collection.
        Return list of [phrase1, phrase2, ... , phrase_n]."""
     if CFG_INSPIRE_SITE and index_id in (3, 15): # FIXME: workaround due to new fuzzy index
         return [p,]
     idxphraseX = "idxPHRASE%02dF" % index_id
     res_above = run_sql("SELECT term FROM %s WHERE term<%%s ORDER BY term DESC LIMIT %%s" % idxphraseX, (p, n_above))
     res_above = map(lambda x: x[0], res_above)
     res_above.reverse()
 
     res_below = run_sql("SELECT term FROM %s WHERE term>=%%s ORDER BY term ASC LIMIT %%s" % idxphraseX, (p, n_below))
     res_below = map(lambda x: x[0], res_below)
 
     return res_above + res_below
 
 def get_nearest_terms_in_idxphrase_with_collection(p, index_id, n_below, n_above, collection):
     """Browse (-n_above, +n_below) closest bibliographic phrases
        for the given pattern p in the given field idxPHRASE table,
-       considering the collection (HitSet).
+       considering the collection (intbitset).
        Return list of [(phrase1, hitset), (phrase2, hitset), ... , (phrase_n, hitset)]."""
     idxphraseX = "idxPHRASE%02dF" % index_id
     res_above = run_sql("SELECT term,hitlist FROM %s WHERE term<%%s ORDER BY term DESC LIMIT %%s" % idxphraseX, (p, n_above * 3))
-    res_above = [(term, HitSet(hitlist) & collection) for term, hitlist in res_above]
+    res_above = [(term, intbitset(hitlist) & collection) for term, hitlist in res_above]
     res_above = [(term, len(hitlist)) for term, hitlist in res_above if hitlist]
 
     res_below = run_sql("SELECT term,hitlist FROM %s WHERE term>=%%s ORDER BY term ASC LIMIT %%s" % idxphraseX, (p, n_below * 3))
-    res_below = [(term, HitSet(hitlist) & collection) for term, hitlist in res_below]
+    res_below = [(term, intbitset(hitlist) & collection) for term, hitlist in res_below]
     res_below = [(term, len(hitlist)) for term, hitlist in res_below if hitlist]
 
     res_above.reverse()
     return res_above[-n_above:] + res_below[:n_below]
 
 
 def get_nearest_terms_in_bibxxx(p, f, n_below, n_above):
     """Browse (-n_above, +n_below) closest bibliographic phrases
        for the given pattern p in the given field f, regardless
        of collection.
        Return list of [phrase1, phrase2, ... , phrase_n]."""
     ## determine browse field:
     if not f and string.find(p, ":") > 0: # does 'p' contain ':'?
         f, p = string.split(p, ":", 1)
 
     # FIXME: quick hack for the journal index
     if f == 'journal':
         return get_nearest_terms_in_bibwords(p, f, n_below, n_above)
 
     ## We are going to take max(n_below, n_above) as the number of
     ## values to ferch from bibXXx.  This is needed to work around
     ## MySQL UTF-8 sorting troubles in 4.0.x.  Proper solution is to
     ## use MySQL 4.1.x or our own idxPHRASE in the future.
 
     index_id = get_index_id_from_field(f)
     if index_id:
         return get_nearest_terms_in_idxphrase(p, index_id, n_below, n_above)
 
     n_fetch = 2*max(n_below, n_above)
     ## construct 'tl' which defines the tag list (MARC tags) to search in:
     tl = []
     if str(f[0]).isdigit() and str(f[1]).isdigit():
         tl.append(f) # 'f' seems to be okay as it starts by two digits
     else:
         # deduce desired MARC tags on the basis of chosen 'f'
         tl = get_field_tags(f)
     ## start browsing to fetch list of hits:
     browsed_phrases = {} # will hold {phrase1: 1, phrase2: 1, ..., phraseN: 1} dict of browsed phrases (to make them unique)
     # always add self to the results set:
     browsed_phrases[p.startswith("%") and p.endswith("%") and p[1:-1] or p] = 1
     for t in tl:
         # deduce into which bibxxx table we will search:
         digit1, digit2 = int(t[0]), int(t[1])
         bx = "bib%d%dx" % (digit1, digit2)
         bibx = "bibrec_bib%d%dx" % (digit1, digit2)
         # firstly try to get `n' closest phrases above `p':
         if len(t) != 6 or t[-1:]=='%': # only the beginning of field 't' is defined, so add wildcard character:
             res = run_sql("""SELECT bx.value FROM %s AS bx
                               WHERE bx.value<%%s AND bx.tag LIKE %%s
                               ORDER BY bx.value DESC LIMIT %%s""" % bx,
                           (p, t + "%", n_fetch))
         else:
             res = run_sql("""SELECT bx.value FROM %s AS bx
                               WHERE bx.value<%%s AND bx.tag=%%s
                               ORDER BY bx.value DESC LIMIT %%s""" % bx,
                           (p, t, n_fetch))
         for row in res:
             browsed_phrases[row[0]] = 1
         # secondly try to get `n' closest phrases equal to or below `p':
         if len(t) != 6 or t[-1:]=='%': # only the beginning of field 't' is defined, so add wildcard character:
             res = run_sql("""SELECT bx.value FROM %s AS bx
                               WHERE bx.value>=%%s AND bx.tag LIKE %%s
                               ORDER BY bx.value ASC LIMIT %%s""" % bx,
                           (p, t + "%", n_fetch))
         else:
             res = run_sql("""SELECT bx.value FROM %s AS bx
                               WHERE bx.value>=%%s AND bx.tag=%%s
                               ORDER BY bx.value ASC LIMIT %%s""" % bx,
                           (p, t, n_fetch))
         for row in res:
             browsed_phrases[row[0]] = 1
     # select first n words only: (this is needed as we were searching
     # in many different tables and so aren't sure we have more than n
     # words right; this of course won't be needed when we shall have
     # one ACC table only for given field):
     phrases_out = browsed_phrases.keys()
     phrases_out.sort(lambda x, y: cmp(string.lower(strip_accents(x)),
                                       string.lower(strip_accents(y))))
     # find position of self:
     try:
         idx_p = phrases_out.index(p)
     except:
         idx_p = len(phrases_out)/2
     # return n_above and n_below:
     return phrases_out[max(0, idx_p-n_above):idx_p+n_below]
 
 def get_nearest_terms_in_bibrec(p, f, n_below, n_above):
     """Return list of nearest terms and counts from bibrec table.
     p is usually a date, and f either datecreated or datemodified.
 
     Note: below/above count is very approximative, not really respected.
     """
     col = 'creation_date'
     if f == 'datemodified':
         col = 'modification_date'
     res_above = run_sql("""SELECT DATE_FORMAT(%s,'%%%%Y-%%%%m-%%%%d %%%%H:%%%%i:%%%%s')
                              FROM bibrec WHERE %s < %%s
                             ORDER BY %s DESC LIMIT %%s""" % (col, col, col),
                         (p, n_above))
     res_below = run_sql("""SELECT DATE_FORMAT(%s,'%%%%Y-%%%%m-%%%%d %%%%H:%%%%i:%%%%s')
                              FROM bibrec WHERE %s > %%s
                             ORDER BY %s ASC LIMIT %%s""" % (col, col, col),
                         (p, n_below))
     out = set([])
     for row in res_above:
         out.add(row[0])
     for row in res_below:
         out.add(row[0])
     out_list = list(out)
     out_list.sort()
     return list(out_list)
 
 def get_nbhits_in_bibrec(term, f):
     """Return number of hits in bibrec table.  term is usually a date,
     and f is either 'datecreated' or 'datemodified'."""
     col = 'creation_date'
     if f == 'datemodified':
         col = 'modification_date'
     res = run_sql("SELECT COUNT(*) FROM bibrec WHERE %s LIKE %%s" % (col,),
                   (term + '%',))
     return res[0][0]
 
 def get_nbhits_in_bibwords(word, f):
     """Return number of hits for word 'word' inside words index for field 'f'."""
     out = 0
     # deduce into which bibwordsX table we will search:
     bibwordsX = "idxWORD%02dF" % get_index_id_from_field("anyfield")
     if f:
         index_id = get_index_id_from_field(f)
         if index_id:
             bibwordsX = "idxWORD%02dF" % index_id
         else:
             return 0
     if word:
         res = run_sql("SELECT hitlist FROM %s WHERE term=%%s" % bibwordsX,
                       (word,))
         for hitlist in res:
-            out += len(HitSet(hitlist[0]))
+            out += len(intbitset(hitlist[0]))
     return out
 
 def get_nbhits_in_idxphrases(word, f):
     """Return number of hits for word 'word' inside phrase index for field 'f'."""
     out = 0
     # deduce into which bibwordsX table we will search:
     idxphraseX = "idxPHRASE%02dF" % get_index_id_from_field("anyfield")
     if f:
         index_id = get_index_id_from_field(f)
         if index_id:
             idxphraseX = "idxPHRASE%02dF" % index_id
         else:
             return 0
     if word:
         res = run_sql("SELECT hitlist FROM %s WHERE term=%%s" % idxphraseX,
                       (word,))
         for hitlist in res:
-            out += len(HitSet(hitlist[0]))
+            out += len(intbitset(hitlist[0]))
     return out
 
 def get_nbhits_in_bibxxx(p, f):
     """Return number of hits for word 'word' inside words index for field 'f'."""
     ## determine browse field:
     if not f and string.find(p, ":") > 0: # does 'p' contain ':'?
         f, p = string.split(p, ":", 1)
 
     # FIXME: quick hack for the journal index
     if f == 'journal':
         return get_nbhits_in_bibwords(p, f)
 
     ## construct 'tl' which defines the tag list (MARC tags) to search in:
     tl = []
     if str(f[0]).isdigit() and str(f[1]).isdigit():
         tl.append(f) # 'f' seems to be okay as it starts by two digits
     else:
         # deduce desired MARC tags on the basis of chosen 'f'
         tl = get_field_tags(f)
     # start searching:
     recIDs = {} # will hold dict of {recID1: 1, recID2: 1, ..., }  (unique recIDs, therefore)
     for t in tl:
         # deduce into which bibxxx table we will search:
         digit1, digit2 = int(t[0]), int(t[1])
         bx = "bib%d%dx" % (digit1, digit2)
         bibx = "bibrec_bib%d%dx" % (digit1, digit2)
         if len(t) != 6 or t[-1:]=='%': # only the beginning of field 't' is defined, so add wildcard character:
             res = run_sql("""SELECT bibx.id_bibrec FROM %s AS bibx, %s AS bx
                               WHERE bx.value=%%s AND bx.tag LIKE %%s
                                 AND bibx.id_bibxxx=bx.id""" % (bibx, bx),
                           (p, t + "%"))
         else:
             res = run_sql("""SELECT bibx.id_bibrec FROM %s AS bibx, %s AS bx
                               WHERE bx.value=%%s AND bx.tag=%%s
                                 AND bibx.id_bibxxx=bx.id""" % (bibx, bx),
                           (p, t))
         for row in res:
             recIDs[row[0]] = 1
     return len(recIDs)
 
 def get_mysql_recid_from_aleph_sysno(sysno):
     """Returns DB's recID for ALEPH sysno passed in the argument (e.g. "002379334CER").
        Returns None in case of failure."""
     out = None
     res = run_sql("""SELECT bb.id_bibrec FROM bibrec_bib97x AS bb, bib97x AS b
                       WHERE b.value=%s AND b.tag='970__a' AND bb.id_bibxxx=b.id""",
                   (sysno,))
     if res:
         out = res[0][0]
     return out
 
 def guess_primary_collection_of_a_record(recID):
     """Return primary collection name a record recid belongs to, by
        testing 980 identifier.
        May lead to bad guesses when a collection is defined dynamically
        via dbquery.
        In that case, return 'CFG_SITE_NAME'."""
     out = CFG_SITE_NAME
     dbcollids = get_fieldvalues(recID, "980__a")
     if dbcollids:
         for dbcollid in dbcollids:
             dbquery = "collection:" + dbcollid
             res = run_sql("SELECT name FROM collection WHERE dbquery=%s", (dbquery,))
             if res:
                 out = res[0][0]
                 break
     if CFG_CERN_SITE:
         # dirty hack for ATLAS collections at CERN:
         if out in ('ATLAS Communications', 'ATLAS Internal Notes'):
             for alternative_collection in ('ATLAS Communications Physics',
                                            'ATLAS Communications General',
                                            'ATLAS Internal Notes Physics',
                                            'ATLAS Internal Notes General',):
                 if recID in get_collection_reclist(alternative_collection):
                     out = alternative_collection
                     break
     return out
 
 _re_collection_url = re.compile('/collection/(.+)')
 def guess_collection_of_a_record(recID, referer=None, recreate_cache_if_needed=True):
     """Return collection name a record recid belongs to, by first testing
        the referer URL if provided and otherwise returning the
        primary collection."""
     if referer:
         dummy, hostname, path, dummy, query, dummy = urlparse.urlparse(referer)
         #requests can come from different invenio installations, with different collections
         if CFG_SITE_URL.find(hostname) < 0:
             return guess_primary_collection_of_a_record(recID)
         g = _re_collection_url.match(path)
         if g:
             name = urllib.unquote_plus(g.group(1))
             #check if this collection actually exist (also normalize the name if case-insensitive)
             name = get_coll_normalised_name(name)
             if name and recID in get_collection_reclist(name):
                 return name
         elif path.startswith('/search'):
             if recreate_cache_if_needed:
                 collection_reclist_cache.recreate_cache_if_needed()
             query = cgi.parse_qs(query)
             for name in query.get('cc', []) + query.get('c', []):
                 name = get_coll_normalised_name(name)
                 if name and recID in get_collection_reclist(name, recreate_cache_if_needed=False):
                     return name
     return guess_primary_collection_of_a_record(recID)
 
 def is_record_in_any_collection(recID, recreate_cache_if_needed=True):
     """Return True if the record belongs to at least one collection. This is a
     good, although not perfect, indicator to guess if webcoll has already run
     after this record has been entered into the system.
     """
     if recreate_cache_if_needed:
         collection_reclist_cache.recreate_cache_if_needed()
     for name in collection_reclist_cache.cache.keys():
         if recID in get_collection_reclist(name, recreate_cache_if_needed=False):
             return True
     return False
 
 def get_all_collections_of_a_record(recID, recreate_cache_if_needed=True):
     """Return all the collection names a record belongs to.
     Note this function is O(n_collections)."""
     ret = []
     if recreate_cache_if_needed:
         collection_reclist_cache.recreate_cache_if_needed()
     for name in collection_reclist_cache.cache.keys():
         if recID in get_collection_reclist(name, recreate_cache_if_needed=False):
             ret.append(name)
     return ret
 
 def get_tag_name(tag_value, prolog="", epilog=""):
     """Return tag name from the known tag value, by looking up the 'tag' table.
        Return empty string in case of failure.
        Example: input='100__%', output=first author'."""
     out = ""
     res = run_sql("SELECT name FROM tag WHERE value=%s", (tag_value,))
     if res:
         out = prolog + res[0][0] + epilog
     return out
 
 def get_fieldcodes():
     """Returns a list of field codes that may have been passed as 'search options' in URL.
        Example: output=['subject','division']."""
     out = []
     res = run_sql("SELECT DISTINCT(code) FROM field")
     for row in res:
         out.append(row[0])
     return out
 
 def get_field_name(code):
     """Return the corresponding field_name given the field code.
     e.g. reportnumber -> report number."""
     res = run_sql("SELECT name FROM field WHERE code=%s", (code, ))
     if res:
         return res[0][0]
     else:
         return ""
 
 def get_field_tags(field):
     """Returns a list of MARC tags for the field code 'field'.
        Returns empty list in case of error.
        Example: field='author', output=['100__%','700__%']."""
     out = []
     query = """SELECT t.value FROM tag AS t, field_tag AS ft, field AS f
                 WHERE f.code=%s AND ft.id_field=f.id AND t.id=ft.id_tag
                 ORDER BY ft.score DESC"""
     res = run_sql(query, (field, ))
     for val in res:
         out.append(val[0])
     return out
 
 def get_fieldvalues_alephseq_like(recID, tags_in, can_see_hidden=False):
     """Return buffer of ALEPH sequential-like textual format with fields found
        in the list TAGS_IN for record RECID.
 
        If can_see_hidden is True, just print everything.  Otherwise hide fields
        from CFG_BIBFORMAT_HIDDEN_TAGS.
     """
 
     out = ""
     if type(tags_in) is not list:
         tags_in = [tags_in,]
     if len(tags_in) == 1 and len(tags_in[0]) == 6:
         ## case A: one concrete subfield asked, so print its value if found
         ##         (use with care: can mislead if field has multiple occurrences)
         out += string.join(get_fieldvalues(recID, tags_in[0]),"\n")
     else:
         ## case B: print our "text MARC" format; works safely all the time
         # find out which tags to output:
         dict_of_tags_out = {}
         if not tags_in:
             for i in range(0, 10):
                 for j in range(0, 10):
                     dict_of_tags_out["%d%d%%" % (i, j)] = 1
         else:
             for tag in tags_in:
                 if len(tag) == 0:
                     for i in range(0, 10):
                         for j in range(0, 10):
                             dict_of_tags_out["%d%d%%" % (i, j)] = 1
                 elif len(tag) == 1:
                     for j in range(0, 10):
                         dict_of_tags_out["%s%d%%" % (tag, j)] = 1
                 elif len(tag) < 5:
                     dict_of_tags_out["%s%%" % tag] = 1
                 elif tag >= 6:
                     dict_of_tags_out[tag[0:5]] = 1
         tags_out = dict_of_tags_out.keys()
         tags_out.sort()
         # search all bibXXx tables as needed:
         for tag in tags_out:
             digits = tag[0:2]
             try:
                 intdigits = int(digits)
                 if intdigits < 0 or intdigits > 99:
                     raise ValueError
             except ValueError:
                 # invalid tag value asked for
                 continue
             if tag.startswith("001") or tag.startswith("00%"):
                 if out:
                     out += "\n"
                 out += "%09d %s %d" % (recID, "001__", recID)
             bx = "bib%sx" % digits
             bibx = "bibrec_bib%sx" % digits
             query = "SELECT b.tag,b.value,bb.field_number FROM %s AS b, %s AS bb "\
                     "WHERE bb.id_bibrec=%%s AND b.id=bb.id_bibxxx AND b.tag LIKE %%s"\
                     "ORDER BY bb.field_number, b.tag ASC" % (bx, bibx)
             res = run_sql(query, (recID, str(tag)+'%'))
             # go through fields:
             field_number_old = -999
             field_old = ""
             for row in res:
                 field, value, field_number = row[0], row[1], row[2]
                 ind1, ind2 = field[3], field[4]
                 printme = True
                 #check the stuff in hiddenfields
                 if not can_see_hidden:
                     for htag in CFG_BIBFORMAT_HIDDEN_TAGS:
                         ltag = len(htag)
                         samelenfield = field[0:ltag]
                         if samelenfield == htag:
                             printme = False
                 if ind1 == "_":
                     ind1 = ""
                 if ind2 == "_":
                     ind2 = ""
                 # print field tag
                 if printme:
                     if field_number != field_number_old or field[:-1] != field_old[:-1]:
                         if out:
                             out += "\n"
                         out += "%09d %s " % (recID, field[:5])
                         field_number_old = field_number
                         field_old = field
                     # print subfield value
                     if field[0:2] == "00" and field[-1:] == "_":
                         out += value
                     else:
                         out += "$$%s%s" % (field[-1:], value)
     return out
 
 def record_exists(recID):
     """Return 1 if record RECID exists.
        Return 0 if it doesn't exist.
        Return -1 if it exists but is marked as deleted.
     """
     out = 0
     res = run_sql("SELECT id FROM bibrec WHERE id=%s", (recID,), 1)
     if res:
         try: # if recid is '123foo', mysql will return id=123, and we don't want that
             recID = int(recID)
         except ValueError:
             return 0
         # record exists; now check whether it isn't marked as deleted:
         dbcollids = get_fieldvalues(recID, "980__%")
         if ("DELETED" in dbcollids) or (CFG_CERN_SITE and "DUMMY" in dbcollids):
             out = -1 # exists, but marked as deleted
         else:
             out = 1 # exists fine
     return out
 
 def record_empty(recID):
     """
     Is this record empty, e.g. has only 001, waiting for integration?
 
     @param recID: the record identifier.
     @type recID: int
     @return: 1 if the record is empty, 0 otherwise.
     @rtype: int
     """
     record = get_record(recID)
     if record is None or len(record) < 2:
         return 1
     else:
         return 0
 
 def record_public_p(recID, recreate_cache_if_needed=True):
     """Return 1 if the record is public, i.e. if it can be found in the Home collection.
        Return 0 otherwise.
     """
     return recID in get_collection_reclist(CFG_SITE_NAME, recreate_cache_if_needed=recreate_cache_if_needed)
 
 def get_creation_date(recID, fmt="%Y-%m-%d"):
     "Returns the creation date of the record 'recID'."
     out = ""
     res = run_sql("SELECT DATE_FORMAT(creation_date,%s) FROM bibrec WHERE id=%s", (fmt, recID), 1)
     if res:
         out = res[0][0]
     return out
 
 def get_modification_date(recID, fmt="%Y-%m-%d"):
     "Returns the date of last modification for the record 'recID'."
     out = ""
     res = run_sql("SELECT DATE_FORMAT(modification_date,%s) FROM bibrec WHERE id=%s", (fmt, recID), 1)
     if res:
         out = res[0][0]
     return out
 
 def print_warning(req, msg, msg_type='', prologue='<br />', epilogue='<br />'):
     "Prints warning message and flushes output."
     if req and msg:
         req.write(websearch_templates.tmpl_print_warning(
                    msg = msg,
                    type = msg_type,
                    prologue = prologue,
                    epilogue = epilogue,
                  ))
         return
 
 def print_search_info(p, f, sf, so, sp, rm, of, ot, collection=CFG_SITE_NAME, nb_found=-1, jrec=1, rg=10,
                       aas=0, ln=CFG_SITE_LANG, p1="", p2="", p3="", f1="", f2="", f3="", m1="", m2="", m3="", op1="", op2="",
                       sc=1, pl_in_url="",
                       d1y=0, d1m=0, d1d=0, d2y=0, d2m=0, d2d=0, dt="",
                       cpu_time=-1, middle_only=0):
     """Prints stripe with the information on 'collection' and 'nb_found' results and CPU time.
        Also, prints navigation links (beg/next/prev/end) inside the results set.
        If middle_only is set to 1, it will only print the middle box information (beg/netx/prev/end/etc) links.
        This is suitable for displaying navigation links at the bottom of the search results page."""
 
     # sanity check:
     if jrec < 1:
         jrec = 1
     if jrec > nb_found:
         jrec = max(nb_found-rg+1, 1)
 
     return websearch_templates.tmpl_print_search_info(
              ln = ln,
              collection = collection,
              aas = aas,
              collection_name = get_coll_i18nname(collection, ln, False),
              collection_id = get_colID(collection),
              middle_only = middle_only,
              rg = rg,
              nb_found = nb_found,
              sf = sf,
              so = so,
              rm = rm,
              of = of,
              ot = ot,
              p = p,
              f = f,
              p1 = p1,
              p2 = p2,
              p3 = p3,
              f1 = f1,
              f2 = f2,
              f3 = f3,
              m1 = m1,
              m2 = m2,
              m3 = m3,
              op1 = op1,
              op2 = op2,
              pl_in_url = pl_in_url,
              d1y = d1y,
              d1m = d1m,
              d1d = d1d,
              d2y = d2y,
              d2m = d2m,
              d2d = d2d,
              dt = dt,
              jrec = jrec,
              sc = sc,
              sp = sp,
              all_fieldcodes = get_fieldcodes(),
              cpu_time = cpu_time,
            )
 
 def print_hosted_search_info(p, f, sf, so, sp, rm, of, ot, collection=CFG_SITE_NAME, nb_found=-1, jrec=1, rg=10,
                       aas=0, ln=CFG_SITE_LANG, p1="", p2="", p3="", f1="", f2="", f3="", m1="", m2="", m3="", op1="", op2="",
                       sc=1, pl_in_url="",
                       d1y=0, d1m=0, d1d=0, d2y=0, d2m=0, d2d=0, dt="",
                       cpu_time=-1, middle_only=0):
     """Prints stripe with the information on 'collection' and 'nb_found' results and CPU time.
        Also, prints navigation links (beg/next/prev/end) inside the results set.
        If middle_only is set to 1, it will only print the middle box information (beg/netx/prev/end/etc) links.
        This is suitable for displaying navigation links at the bottom of the search results page."""
 
     out = ""
 
     # sanity check:
     if jrec < 1:
         jrec = 1
     if jrec > nb_found:
         jrec = max(nb_found-rg+1, 1)
 
     return websearch_templates.tmpl_print_hosted_search_info(
              ln = ln,
              collection = collection,
              aas = aas,
              collection_name = get_coll_i18nname(collection, ln, False),
              collection_id = get_colID(collection),
              middle_only = middle_only,
              rg = rg,
              nb_found = nb_found,
              sf = sf,
              so = so,
              rm = rm,
              of = of,
              ot = ot,
              p = p,
              f = f,
              p1 = p1,
              p2 = p2,
              p3 = p3,
              f1 = f1,
              f2 = f2,
              f3 = f3,
              m1 = m1,
              m2 = m2,
              m3 = m3,
              op1 = op1,
              op2 = op2,
              pl_in_url = pl_in_url,
              d1y = d1y,
              d1m = d1m,
              d1d = d1d,
              d2y = d2y,
              d2m = d2m,
              d2d = d2d,
              dt = dt,
              jrec = jrec,
              sc = sc,
              sp = sp,
              all_fieldcodes = get_fieldcodes(),
              cpu_time = cpu_time,
            )
 
 def print_results_overview(colls, results_final_nb_total, results_final_nb, cpu_time, ln=CFG_SITE_LANG, ec=[], hosted_colls_potential_results_p=False):
     """Prints results overview box with links to particular collections below."""
 
     out = ""
     new_colls = []
     for coll in colls:
         new_colls.append({
                           'id': get_colID(coll),
                           'code': coll,
                           'name': get_coll_i18nname(coll, ln, False),
                          })
 
     return websearch_templates.tmpl_print_results_overview(
              ln = ln,
              results_final_nb_total = results_final_nb_total,
              results_final_nb = results_final_nb,
              cpu_time = cpu_time,
              colls = new_colls,
              ec = ec,
              hosted_colls_potential_results_p = hosted_colls_potential_results_p,
            )
 
 def print_hosted_results(url_and_engine, ln=CFG_SITE_LANG, of=None, req=None, no_records_found=False, search_timed_out=False, limit=CFG_EXTERNAL_COLLECTION_MAXRESULTS):
     """Prints the full results of a hosted collection"""
 
     if of.startswith("h"):
         if no_records_found:
             return "<br />No results found."
         if search_timed_out:
             return "<br />The search engine did not respond in time."
 
     return websearch_templates.tmpl_print_hosted_results(
         url_and_engine=url_and_engine,
         ln=ln,
         of=of,
         req=req,
         limit=limit
         )
 
 def sort_records(req, recIDs, sort_field='', sort_order='d', sort_pattern='', verbose=0, of='hb', ln=CFG_SITE_LANG):
     """Sort records in 'recIDs' list according sort field 'sort_field' in order 'sort_order'.
        If more than one instance of 'sort_field' is found for a given record, try to choose that that is given by
        'sort pattern', for example "sort by report number that starts by CERN-PS".
        Note that 'sort_field' can be field code like 'author' or MARC tag like '100__a' directly."""
 
     _ = gettext_set_language(ln)
 
     ## check arguments:
     if not sort_field:
         return recIDs
     if len(recIDs) > CFG_WEBSEARCH_NB_RECORDS_TO_SORT:
         if of.startswith('h'):
             print_warning(req, _("Sorry, sorting is allowed on sets of up to %d records only. Using default sort order.") % CFG_WEBSEARCH_NB_RECORDS_TO_SORT, "Warning")
         return recIDs
 
     sort_fields = string.split(sort_field, ",")
     recIDs_dict = {}
     recIDs_out = []
 
     ## first deduce sorting MARC tag out of the 'sort_field' argument:
     tags = []
     for sort_field in sort_fields:
         if sort_field and str(sort_field[0:2]).isdigit():
             # sort_field starts by two digits, so this is probably a MARC tag already
             tags.append(sort_field)
         else:
             # let us check the 'field' table
             query = """SELECT DISTINCT(t.value) FROM tag AS t, field_tag AS ft, field AS f
                         WHERE f.code=%s AND ft.id_field=f.id AND t.id=ft.id_tag
                         ORDER BY ft.score DESC"""
             res = run_sql(query, (sort_field, ))
             if res:
                 for row in res:
                     tags.append(row[0])
             else:
                 if of.startswith('h'):
                     print_warning(req, _("Sorry, %s does not seem to be a valid sort option. Choosing title sort instead.") % cgi.escape(sort_field), "Error")
                 tags.append("245__a")
     if verbose >= 3:
         print_warning(req, "Sorting by tags %s." % cgi.escape(repr(tags)))
         if sort_pattern:
             print_warning(req, "Sorting preferentially by %s." % cgi.escape(sort_pattern))
 
     ## check if we have sorting tag defined:
     if tags:
         # fetch the necessary field values:
         for recID in recIDs:
             val = "" # will hold value for recID according to which sort
             vals = [] # will hold all values found in sorting tag for recID
             for tag in tags:
                 if CFG_CERN_SITE and tag == '773__c':
                     # CERN hack: journal sorting
                     # 773__c contains page numbers, e.g. 3-13, and we want to sort by 3, and numerically:
                     vals.extend(["%050s" % x.split("-",1)[0] for x in get_fieldvalues(recID, tag)])
                 else:
                     vals.extend(get_fieldvalues(recID, tag))
             if sort_pattern:
                 # try to pick that tag value that corresponds to sort pattern
                 bingo = 0
                 for v in vals:
                     if v.lower().startswith(sort_pattern.lower()): # bingo!
                         bingo = 1
                         val = v
                         break
                 if not bingo: # sort_pattern not present, so add other vals after spaces
                     val = sort_pattern + "          " + string.join(vals)
             else:
                 # no sort pattern defined, so join them all together
                 val = string.join(vals)
             val = strip_accents(val.lower()) # sort values regardless of accents and case
             if recIDs_dict.has_key(val):
                 recIDs_dict[val].append(recID)
             else:
                 recIDs_dict[val] = [recID]
         # sort them:
         recIDs_dict_keys = recIDs_dict.keys()
         recIDs_dict_keys.sort()
         # now that keys are sorted, create output array:
         for k in recIDs_dict_keys:
             for s in recIDs_dict[k]:
                 recIDs_out.append(s)
         # ascending or descending?
         if sort_order == 'a':
             recIDs_out.reverse()
         # okay, we are done
         return recIDs_out
     else:
         # good, no sort needed
         return recIDs
 
 def print_records(req, recIDs, jrec=1, rg=10, format='hb', ot='', ln=CFG_SITE_LANG, relevances=[], relevances_prologue="(", relevances_epilogue="%%)", decompress=zlib.decompress, search_pattern='', print_records_prologue_p=True, print_records_epilogue_p=True, verbose=0, tab='', sf='', so='d', sp='', rm=''):
 
     """
     Prints list of records 'recIDs' formatted according to 'format' in
     groups of 'rg' starting from 'jrec'.
 
     Assumes that the input list 'recIDs' is sorted in reverse order,
     so it counts records from tail to head.
 
     A value of 'rg=-9999' means to print all records: to be used with care.
 
     Print also list of RELEVANCES for each record (if defined), in
     between RELEVANCE_PROLOGUE and RELEVANCE_EPILOGUE.
 
     Print prologue and/or epilogue specific to 'format' if
     'print_records_prologue_p' and/or print_records_epilogue_p' are
     True.
 
     'sf' is sort field and 'rm' is ranking method that are passed here
     only for proper linking purposes: e.g. when a certain ranking
     method or a certain sort field was selected, keep it selected in
     any dynamic search links that may be printed.
     """
 
     # load the right message language
     _ = gettext_set_language(ln)
 
     # sanity checking:
     if req is None:
         return
 
     # get user_info (for formatting based on user)
     if isinstance(req, cStringIO.OutputType):
         user_info = {}
     else:
         user_info = collect_user_info(req)
 
     if len(recIDs):
         nb_found = len(recIDs)
 
         if rg == -9999: # print all records
             rg = nb_found
         else:
             rg = abs(rg)
         if jrec < 1: # sanity checks
             jrec = 1
         if jrec > nb_found:
             jrec = max(nb_found-rg+1, 1)
 
         # will print records from irec_max to irec_min excluded:
         irec_max = nb_found - jrec
         irec_min = nb_found - jrec - rg
         if irec_min < 0:
             irec_min = -1
         if irec_max >= nb_found:
             irec_max = nb_found - 1
 
         #req.write("%s:%d-%d" % (recIDs, irec_min, irec_max))
 
         if format.startswith('x'):
 
             # print header if needed
             if print_records_prologue_p:
                 print_records_prologue(req, format)
 
             # print records
             recIDs_to_print = [recIDs[x] for x in range(irec_max, irec_min, -1)]
 
             format_records(recIDs_to_print,
                            format,
                            ln=ln,
                            search_pattern=search_pattern,
                            record_separator="\n",
                            user_info=user_info,
                            req=req)
             # print footer if needed
             if print_records_epilogue_p:
                 print_records_epilogue(req, format)
 
         elif format.startswith('t') or str(format[0:3]).isdigit():
             # we are doing plain text output:
             for irec in range(irec_max, irec_min, -1):
                 x = print_record(recIDs[irec], format, ot, ln, search_pattern=search_pattern,
                                  user_info=user_info, verbose=verbose, sf=sf, so=so, sp=sp, rm=rm)
                 req.write(x)
                 if x:
                     req.write('\n')
         elif format == 'excel':
             recIDs_to_print = [recIDs[x] for x in range(irec_max, irec_min, -1)]
             create_excel(recIDs=recIDs_to_print, req=req, ln=ln, ot=ot)
         else:
             # we are doing HTML output:
             if format == 'hp' or format.startswith("hb_") or format.startswith("hd_"):
                 # portfolio and on-the-fly formats:
                 for irec in range(irec_max, irec_min, -1):
                     req.write(print_record(recIDs[irec], format, ot, ln, search_pattern=search_pattern,
                                            user_info=user_info, verbose=verbose, sf=sf, so=so, sp=sp, rm=rm))
             elif format.startswith("hb"):
                 # HTML brief format:
 
                 display_add_to_basket = True
                 if user_info:
                     if user_info['email'] == 'guest':
                         if CFG_ACCESS_CONTROL_LEVEL_ACCOUNTS > 4:
                             display_add_to_basket = False
                     else:
                         if not user_info['precached_usebaskets']:
                             display_add_to_basket = False
                 req.write(websearch_templates.tmpl_record_format_htmlbrief_header(
                     ln = ln))
                 for irec in range(irec_max, irec_min, -1):
                     row_number = jrec+irec_max-irec
                     recid = recIDs[irec]
                     if relevances and relevances[irec]:
                         relevance = relevances[irec]
                     else:
                         relevance = ''
                     record = print_record(recIDs[irec], format, ot, ln, search_pattern=search_pattern,
                                                   user_info=user_info, verbose=verbose, sf=sf, so=so, sp=sp, rm=rm)
 
                     req.write(websearch_templates.tmpl_record_format_htmlbrief_body(
                         ln = ln,
                         recid = recid,
                         row_number = row_number,
                         relevance = relevance,
                         record = record,
                         relevances_prologue = relevances_prologue,
                         relevances_epilogue = relevances_epilogue,
                         display_add_to_basket = display_add_to_basket
                         ))
 
                 req.write(websearch_templates.tmpl_record_format_htmlbrief_footer(
                     ln = ln,
                     display_add_to_basket = display_add_to_basket))
 
             elif format.startswith("hd"):
                 # HTML detailed format:
                 for irec in range(irec_max, irec_min, -1):
                     if record_exists(recIDs[irec]) == -1:
                         print_warning(req, _("The record has been deleted."))
                         continue
                     unordered_tabs = get_detailed_page_tabs(get_colID(guess_primary_collection_of_a_record(recIDs[irec])),
                                                             recIDs[irec], ln=ln)
                     ordered_tabs_id = [(tab_id, values['order']) for (tab_id, values) in unordered_tabs.iteritems()]
                     ordered_tabs_id.sort(lambda x,y: cmp(x[1],y[1]))
 
                     link_ln = ''
 
                     if ln != CFG_SITE_LANG:
                         link_ln = '?ln=%s' % ln
 
                     recid = recIDs[irec]
                     recid_to_display = recid  # Record ID used to build the URL.
                     if CFG_WEBSEARCH_USE_ALEPH_SYSNOS:
                         try:
                             recid_to_display = get_fieldvalues(recid,
                                     CFG_BIBUPLOAD_EXTERNAL_SYSNO_TAG)[0]
                         except IndexError:
                             # No external sysno is available, keep using
                             # internal recid.
                             pass
 
                     tabs = [(unordered_tabs[tab_id]['label'], \
                              '%s/%s/%s/%s%s' % (CFG_SITE_URL, CFG_SITE_RECORD, recid_to_display, tab_id, link_ln), \
                              tab_id == tab,
                              unordered_tabs[tab_id]['enabled']) \
                             for (tab_id, order) in ordered_tabs_id
                             if unordered_tabs[tab_id]['visible'] == True]
 
                     tabs_counts = get_detailed_page_tabs_counts(recid)
                     citedbynum = tabs_counts['Citations']
                     references = tabs_counts['References']
                     discussions = tabs_counts['Discussions']
 
                     # load content
                     if tab == 'usage':
                         req.write(webstyle_templates.detailed_record_container_top(recIDs[irec],
                                                      tabs,
                                                      ln,
                                                      citationnum=citedbynum,
                                                      referencenum=references,
                                                      discussionnum=discussions))
                         r = calculate_reading_similarity_list(recIDs[irec], "downloads")
                         downloadsimilarity = None
                         downloadhistory = None
                         #if r:
                         #    downloadsimilarity = r
                         if CFG_BIBRANK_SHOW_DOWNLOAD_GRAPHS:
                             downloadhistory = create_download_history_graph_and_box(recIDs[irec], ln)
 
                         r = calculate_reading_similarity_list(recIDs[irec], "pageviews")
                         viewsimilarity = None
                         if r: viewsimilarity = r
                         content = websearch_templates.tmpl_detailed_record_statistics(recIDs[irec],
                                                                                       ln,
                                                                                       downloadsimilarity=downloadsimilarity,
                                                                                       downloadhistory=downloadhistory,
                                                                                       viewsimilarity=viewsimilarity)
                         req.write(content)
                         req.write(webstyle_templates.detailed_record_container_bottom(recIDs[irec],
                                                                                       tabs,
                                                                                       ln))
                     elif tab == 'citations':
                         recid = recIDs[irec]
                         req.write(webstyle_templates.detailed_record_container_top(recid,
                                                      tabs,
                                                      ln,
                                                      citationnum=citedbynum,
                                                      referencenum=references,
                                                      discussionnum=discussions))
                         req.write(websearch_templates.tmpl_detailed_record_citations_prologue(recid, ln))
 
                         # Citing
                         citinglist = calculate_cited_by_list(recid)
                         req.write(websearch_templates.tmpl_detailed_record_citations_citing_list(recid,
                                                                                                  ln,
                                                                                                  citinglist,
                                                                                                  sf=sf,
                                                                                                  so=so,
                                                                                                  sp=sp,
                                                                                                  rm=rm))
                         # Self-cited
                         selfcited = get_self_cited_by(recid)
                         req.write(websearch_templates.tmpl_detailed_record_citations_self_cited(recid,
                                   ln, selfcited=selfcited, citinglist=citinglist))
                         # Co-cited
                         s = calculate_co_cited_with_list(recid)
                         cociting = None
                         if s:
                             cociting = s
                         req.write(websearch_templates.tmpl_detailed_record_citations_co_citing(recid,
                                                                                                ln,
                                                                                                cociting=cociting))
                         # Citation history, if needed
                         citationhistory = None
                         if citinglist:
                             citationhistory = create_citation_history_graph_and_box(recid, ln)
                         #debug
                         if verbose > 3:
                             print_warning(req, "Citation graph debug: " + \
                                           str(len(citationhistory)))
 
                         req.write(websearch_templates.tmpl_detailed_record_citations_citation_history(recid, ln, citationhistory))
                         req.write(websearch_templates.tmpl_detailed_record_citations_epilogue(recid, ln))
                         req.write(webstyle_templates.detailed_record_container_bottom(recid,
                                                                                       tabs,
                                                                                       ln))
                     elif tab == 'references':
                         req.write(webstyle_templates.detailed_record_container_top(recIDs[irec],
                                                      tabs,
                                                      ln,
                                                      citationnum=citedbynum,
                                                      referencenum=references,
                                                      discussionnum=discussions))
 
                         req.write(format_record(recIDs[irec], 'HDREF', ln=ln, user_info=user_info, verbose=verbose))
                         req.write(webstyle_templates.detailed_record_container_bottom(recIDs[irec],
                                                                                       tabs,
                                                                                       ln))
                     elif tab == 'keywords':
                         import bibclassify_webinterface
                         recid = recIDs[irec]
                         bibclassify_webinterface.main_page(req, recid, tabs, ln, webstyle_templates)
                     elif tab == 'plots':
                         req.write(webstyle_templates.detailed_record_container_top(recIDs[irec],
                                                                                    tabs,
                                                                                    ln))
                         content = websearch_templates.tmpl_record_plots(recID=recIDs[irec],
                                                                          ln=ln)
                         req.write(content)
                         req.write(webstyle_templates.detailed_record_container_bottom(recIDs[irec],
                                                                                       tabs,
                                                                                       ln))
 
                     else:
                         # Metadata tab
                         req.write(webstyle_templates.detailed_record_container_top(recIDs[irec],
                                                      tabs,
                                                      ln,
                                                      show_short_rec_p=False,
                                                      citationnum=citedbynum, referencenum=references,
                                                      discussionnum=discussions))
 
                         creationdate = None
                         modificationdate = None
                         if record_exists(recIDs[irec]) == 1:
                             creationdate = get_creation_date(recIDs[irec])
                             modificationdate = get_modification_date(recIDs[irec])
 
                         content = print_record(recIDs[irec], format, ot, ln,
                                                search_pattern=search_pattern,
                                                user_info=user_info, verbose=verbose,
                                                sf=sf, so=so, sp=sp, rm=rm)
                         content = websearch_templates.tmpl_detailed_record_metadata(
                             recID = recIDs[irec],
                             ln = ln,
                             format = format,
                             creationdate = creationdate,
                             modificationdate = modificationdate,
                             content = content)
                         # display of the next-hit/previous-hit/back-to-search links
                         # on the detailed record pages
                         content += websearch_templates.tmpl_display_back_to_search(req,
                                                                                    recIDs[irec],
                                                                                    ln)
                         req.write(content)
                         req.write(webstyle_templates.detailed_record_container_bottom(recIDs[irec],
                                                                                       tabs,
                                                                                       ln,
                                                                                       creationdate=creationdate,
                                                                                       modificationdate=modificationdate,
                                                                                       show_short_rec_p=False))
 
                         if len(tabs) > 0:
                             # Add the mini box at bottom of the page
                             if CFG_WEBCOMMENT_ALLOW_REVIEWS:
                                 from invenio.webcomment import get_mini_reviews
                                 reviews = get_mini_reviews(recid = recIDs[irec], ln=ln)
                             else:
                                 reviews = ''
                             actions = format_record(recIDs[irec], 'HDACT', ln=ln, user_info=user_info, verbose=verbose)
                             files = format_record(recIDs[irec], 'HDFILE', ln=ln, user_info=user_info, verbose=verbose)
                             req.write(webstyle_templates.detailed_record_mini_panel(recIDs[irec],
                                                                                     ln,
                                                                                     format,
                                                                                     files=files,
                                                                                     reviews=reviews,
                                                                                     actions=actions))
             else:
                 # Other formats
                 for irec in range(irec_max, irec_min, -1):
                     req.write(print_record(recIDs[irec], format, ot, ln,
                                            search_pattern=search_pattern,
                                            user_info=user_info, verbose=verbose,
                                            sf=sf, so=so, sp=sp, rm=rm))
 
     else:
         print_warning(req, _("Use different search terms."))
 
 def print_records_prologue(req, format, cc=None):
     """
     Print the appropriate prologue for list of records in the given
     format.
     """
     prologue = "" # no prologue needed for HTML or Text formats
     if format.startswith('xm'):
         prologue = websearch_templates.tmpl_xml_marc_prologue()
     elif format.startswith('xn'):
         prologue = websearch_templates.tmpl_xml_nlm_prologue()
     elif format.startswith('xw'):
         prologue = websearch_templates.tmpl_xml_refworks_prologue()
     elif format.startswith('xr'):
         prologue = websearch_templates.tmpl_xml_rss_prologue(cc=cc)
     elif format.startswith('xe'):
         prologue = websearch_templates.tmpl_xml_endnote_prologue()
     elif format.startswith('xo'):
         prologue = websearch_templates.tmpl_xml_mods_prologue()
     elif format.startswith('xp'):
         prologue = websearch_templates.tmpl_xml_podcast_prologue(cc=cc)
     elif format.startswith('x'):
         prologue = websearch_templates.tmpl_xml_default_prologue()
     req.write(prologue)
 
 def print_records_epilogue(req, format):
     """
     Print the appropriate epilogue for list of records in the given
     format.
     """
     epilogue = "" # no epilogue needed for HTML or Text formats
     if format.startswith('xm'):
         epilogue = websearch_templates.tmpl_xml_marc_epilogue()
     elif format.startswith('xn'):
         epilogue = websearch_templates.tmpl_xml_nlm_epilogue()
     elif format.startswith('xw'):
         epilogue = websearch_templates.tmpl_xml_refworks_epilogue()
     elif format.startswith('xr'):
         epilogue = websearch_templates.tmpl_xml_rss_epilogue()
     elif format.startswith('xe'):
         epilogue = websearch_templates.tmpl_xml_endnote_epilogue()
     elif format.startswith('xo'):
         epilogue = websearch_templates.tmpl_xml_mods_epilogue()
     elif format.startswith('xp'):
         epilogue = websearch_templates.tmpl_xml_podcast_epilogue()
     elif format.startswith('x'):
         epilogue = websearch_templates.tmpl_xml_default_epilogue()
     req.write(epilogue)
 
 def get_record(recid):
     """Directly the record object corresponding to the recid."""
     if CFG_BIBUPLOAD_SERIALIZE_RECORD_STRUCTURE:
         value = run_sql("SELECT value FROM bibfmt WHERE id_bibrec=%s AND FORMAT='recstruct'",  (recid, ))
         if value:
             try:
                 return deserialize_via_marshal(value[0][0])
             except:
                 ### In case of corruption, let's rebuild it!
                 pass
     return create_record(print_record(recid, 'xm'))[0]
 
 def print_record(recID, format='hb', ot='', ln=CFG_SITE_LANG, decompress=zlib.decompress,
                  search_pattern=None, user_info=None, verbose=0, sf='', so='d', sp='', rm=''):
     """
     Prints record 'recID' formatted according to 'format'.
 
     'sf' is sort field and 'rm' is ranking method that are passed here
     only for proper linking purposes: e.g. when a certain ranking
     method or a certain sort field was selected, keep it selected in
     any dynamic search links that may be printed.
     """
     if format == 'recstruct':
         return get_record(recID)
 
     _ = gettext_set_language(ln)
 
     display_claim_this_paper = False
 
     try:
         display_claim_this_paper = user_info["precached_viewclaimlink"]
     except (KeyError, TypeError):
         display_claim_this_paper = False
     #check from user information if the user has the right to see hidden fields/tags in the
     #records as well
     can_see_hidden = (acc_authorize_action(user_info, 'runbibedit')[0] == 0)
 
     out = ""
 
     # sanity check:
     record_exist_p = record_exists(recID)
     if record_exist_p == 0: # doesn't exist
         return out
 
     # New Python BibFormat procedure for formatting
     # Old procedure follows further below
     # We must still check some special formats, but these
     # should disappear when BibFormat improves.
     if not (CFG_BIBFORMAT_USE_OLD_BIBFORMAT \
             or format.lower().startswith('t') \
             or format.lower().startswith('hm') \
             or str(format[0:3]).isdigit() \
             or ot):
 
         # Unspecified format is hd
         if format == '':
             format = 'hd'
 
         if record_exist_p == -1 and get_output_format_content_type(format) == 'text/html':
             # HTML output displays a default value for deleted records.
             # Other format have to deal with it.
             out += _("The record has been deleted.")
         else:
             out += call_bibformat(recID, format, ln, search_pattern=search_pattern,
                                   user_info=user_info, verbose=verbose)
 
             # at the end of HTML brief mode, print the "Detailed record" functionality:
             if format.lower().startswith('hb') and \
                    format.lower() != 'hb_p':
                 out += websearch_templates.tmpl_print_record_brief_links(ln=ln,
                                                                          recID=recID,
                                                                          sf=sf,
                                                                          so=so,
                                                                          sp=sp,
                                                                          rm=rm,
                                                                          display_claim_link=display_claim_this_paper)
         return out
 
     # Old PHP BibFormat procedure for formatting
     # print record opening tags, if needed:
     if format == "marcxml" or format == "oai_dc":
         out += "  <record>\n"
         out += "   <header>\n"
         for oai_id in get_fieldvalues(recID, CFG_OAI_ID_FIELD):
             out += "    <identifier>%s</identifier>\n" % oai_id
         out += "    <datestamp>%s</datestamp>\n" % get_modification_date(recID)
         out += "   </header>\n"
         out += "   <metadata>\n"
 
     if format.startswith("xm") or format == "marcxml":
         # look for detailed format existence:
         query = "SELECT value FROM bibfmt WHERE id_bibrec=%s AND format=%s"
         res = run_sql(query, (recID, format), 1)
         if res and record_exist_p == 1:
             # record 'recID' is formatted in 'format', so print it
             out += "%s" % decompress(res[0][0])
         else:
             # record 'recID' is not formatted in 'format' -- they are not in "bibfmt" table; so fetch all the data from "bibXXx" tables:
             if format == "marcxml":
                 out += """    <record xmlns="http://www.loc.gov/MARC21/slim">\n"""
                 out += "        <controlfield tag=\"001\">%d</controlfield>\n" % int(recID)
             elif format.startswith("xm"):
                 out += """    <record>\n"""
                 out += "        <controlfield tag=\"001\">%d</controlfield>\n" % int(recID)
             if record_exist_p == -1:
                 # deleted record, so display only OAI ID and 980:
                 oai_ids = get_fieldvalues(recID, CFG_OAI_ID_FIELD)
                 if oai_ids:
                     out += "<datafield tag=\"%s\" ind1=\"%s\" ind2=\"%s\"><subfield code=\"%s\">%s</subfield></datafield>\n" % \
                            (CFG_OAI_ID_FIELD[0:3], CFG_OAI_ID_FIELD[3:4], CFG_OAI_ID_FIELD[4:5], CFG_OAI_ID_FIELD[5:6], oai_ids[0])
                 out += "<datafield tag=\"980\" ind1=\"\" ind2=\"\"><subfield code=\"c\">DELETED</subfield></datafield>\n"
             else:
                 # controlfields
                 query = "SELECT b.tag,b.value,bb.field_number FROM bib00x AS b, bibrec_bib00x AS bb "\
                         "WHERE bb.id_bibrec=%s AND b.id=bb.id_bibxxx AND b.tag LIKE '00%%' "\
                         "ORDER BY bb.field_number, b.tag ASC"
                 res = run_sql(query, (recID, ))
                 for row in res:
                     field, value = row[0], row[1]
                     value = encode_for_xml(value)
                     out += """        <controlfield tag="%s" >%s</controlfield>\n""" % \
                            (encode_for_xml(field[0:3]), value)
                 # datafields
                 i = 1 # Do not process bib00x and bibrec_bib00x, as
                       # they are controlfields. So start at bib01x and
                       # bibrec_bib00x (and set i = 0 at the end of
                       # first loop)
                 for digit1 in range(0, 10):
                     for digit2 in range(i, 10):
                         bx = "bib%d%dx" % (digit1, digit2)
                         bibx = "bibrec_bib%d%dx" % (digit1, digit2)
                         query = "SELECT b.tag,b.value,bb.field_number FROM %s AS b, %s AS bb "\
                                 "WHERE bb.id_bibrec=%%s AND b.id=bb.id_bibxxx AND b.tag LIKE %%s"\
                                 "ORDER BY bb.field_number, b.tag ASC" % (bx, bibx)
                         res = run_sql(query, (recID, str(digit1)+str(digit2)+'%'))
                         field_number_old = -999
                         field_old = ""
                         for row in res:
                             field, value, field_number = row[0], row[1], row[2]
                             ind1, ind2 = field[3], field[4]
                             if ind1 == "_" or ind1 == "":
                                 ind1 = " "
                             if ind2 == "_" or ind2 == "":
                                 ind2 = " "
                             # print field tag, unless hidden
                             printme = True
                             if not can_see_hidden:
                                 for htag in CFG_BIBFORMAT_HIDDEN_TAGS:
                                     ltag = len(htag)
                                     samelenfield = field[0:ltag]
                                     if samelenfield == htag:
                                         printme = False
 
                             if printme:
                                 if field_number != field_number_old or field[:-1] != field_old[:-1]:
                                     if field_number_old != -999:
                                         out += """        </datafield>\n"""
                                     out += """        <datafield tag="%s" ind1="%s" ind2="%s">\n""" % \
                                                (encode_for_xml(field[0:3]), encode_for_xml(ind1), encode_for_xml(ind2))
                                     field_number_old = field_number
                                     field_old = field
                                 # print subfield value
                                 value = encode_for_xml(value)
                                 out += """            <subfield code="%s">%s</subfield>\n""" % \
                                    (encode_for_xml(field[-1:]), value)
 
                         # all fields/subfields printed in this run, so close the tag:
                         if field_number_old != -999:
                             out += """        </datafield>\n"""
                     i = 0 # Next loop should start looking at bib%0 and bibrec_bib00x
             # we are at the end of printing the record:
             out += "    </record>\n"
 
     elif format == "xd" or format == "oai_dc":
         # XML Dublin Core format, possibly OAI -- select only some bibXXx fields:
         out += """    <dc xmlns="http://purl.org/dc/elements/1.1/"
                          xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
                          xsi:schemaLocation="http://purl.org/dc/elements/1.1/
                                              http://www.openarchives.org/OAI/1.1/dc.xsd">\n"""
         if record_exist_p == -1:
             out += ""
         else:
             for f in get_fieldvalues(recID, "041__a"):
                 out += "        <language>%s</language>\n" % f
 
             for f in get_fieldvalues(recID, "100__a"):
                 out += "        <creator>%s</creator>\n" % encode_for_xml(f)
 
             for f in get_fieldvalues(recID, "700__a"):
                 out += "        <creator>%s</creator>\n" % encode_for_xml(f)
 
             for f in get_fieldvalues(recID, "245__a"):
                 out += "        <title>%s</title>\n" % encode_for_xml(f)
 
             for f in get_fieldvalues(recID, "65017a"):
                 out += "        <subject>%s</subject>\n" % encode_for_xml(f)
 
             for f in get_fieldvalues(recID, "8564_u"):
                 if f.split('.') == 'png':
                     continue
                 out += "        <identifier>%s</identifier>\n" % encode_for_xml(f)
 
             for f in get_fieldvalues(recID, "520__a"):
                 out += "        <description>%s</description>\n" % encode_for_xml(f)
 
             out += "        <date>%s</date>\n" % get_creation_date(recID)
         out += "    </dc>\n"
 
     elif len(format) == 6 and str(format[0:3]).isdigit():
         # user has asked to print some fields only
         if format == "001":
             out += "<!--%s-begin-->%s<!--%s-end-->\n" % (format, recID, format)
         else:
             vals = get_fieldvalues(recID, format)
             for val in vals:
                 out += "<!--%s-begin-->%s<!--%s-end-->\n" % (format, val, format)
 
     elif format.startswith('t'):
         ## user directly asked for some tags to be displayed only
         if record_exist_p == -1:
             out += get_fieldvalues_alephseq_like(recID, ["001", CFG_OAI_ID_FIELD, "980"], can_see_hidden)
         else:
             out += get_fieldvalues_alephseq_like(recID, ot, can_see_hidden)
 
     elif format == "hm":
         if record_exist_p == -1:
             out += "\n<pre>" + cgi.escape(get_fieldvalues_alephseq_like(recID, ["001", CFG_OAI_ID_FIELD, "980"], can_see_hidden)) + "</pre>"
         else:
             out += "\n<pre>" + cgi.escape(get_fieldvalues_alephseq_like(recID, ot, can_see_hidden)) + "</pre>"
 
     elif format.startswith("h") and ot:
         ## user directly asked for some tags to be displayed only
         if record_exist_p == -1:
             out += "\n<pre>" + get_fieldvalues_alephseq_like(recID, ["001", CFG_OAI_ID_FIELD, "980"], can_see_hidden) + "</pre>"
         else:
             out += "\n<pre>" + get_fieldvalues_alephseq_like(recID, ot, can_see_hidden) + "</pre>"
 
     elif format == "hd":
         # HTML detailed format
         if record_exist_p == -1:
             out += _("The record has been deleted.")
         else:
             # look for detailed format existence:
             query = "SELECT value FROM bibfmt WHERE id_bibrec=%s AND format=%s"
             res = run_sql(query, (recID, format), 1)
             if res:
                 # record 'recID' is formatted in 'format', so print it
                 out += "%s" % decompress(res[0][0])
             else:
                 # record 'recID' is not formatted in 'format', so try to call BibFormat on the fly or use default format:
                 out_record_in_format = call_bibformat(recID, format, ln, search_pattern=search_pattern,
                                                       user_info=user_info, verbose=verbose)
                 if out_record_in_format:
                     out += out_record_in_format
                 else:
                     out += websearch_templates.tmpl_print_record_detailed(
                              ln = ln,
                              recID = recID,
                            )
 
     elif format.startswith("hb_") or format.startswith("hd_"):
         # underscore means that HTML brief/detailed formats should be called on-the-fly; suitable for testing formats
         if record_exist_p == -1:
             out += _("The record has been deleted.")
         else:
             out += call_bibformat(recID, format, ln, search_pattern=search_pattern,
                                   user_info=user_info, verbose=verbose)
 
     elif format.startswith("hx"):
         # BibTeX format, called on the fly:
         if record_exist_p == -1:
             out += _("The record has been deleted.")
         else:
             out += call_bibformat(recID, format, ln, search_pattern=search_pattern,
                                   user_info=user_info, verbose=verbose)
 
     elif format.startswith("hs"):
         # for citation/download similarity navigation links:
         if record_exist_p == -1:
             out += _("The record has been deleted.")
         else:
             out += '<a href="%s">' % websearch_templates.build_search_url(recid=recID, ln=ln)
             # firstly, title:
             titles = get_fieldvalues(recID, "245__a")
             if titles:
                 for title in titles:
                     out += "<strong>%s</strong>" % title
             else:
                 # usual title not found, try conference title:
                 titles = get_fieldvalues(recID, "111__a")
                 if titles:
                     for title in titles:
                         out += "<strong>%s</strong>" % title
                 else:
                     # just print record ID:
                     out += "<strong>%s %d</strong>" % (get_field_i18nname("record ID", ln, False), recID)
             out += "</a>"
             # secondly, authors:
             authors = get_fieldvalues(recID, "100__a") + get_fieldvalues(recID, "700__a")
             if authors:
                 out += " - %s" % authors[0]
                 if len(authors) > 1:
                     out += " <em>et al</em>"
             # thirdly publication info:
             publinfos = get_fieldvalues(recID, "773__s")
             if not publinfos:
                 publinfos = get_fieldvalues(recID, "909C4s")
                 if not publinfos:
                     publinfos = get_fieldvalues(recID, "037__a")
                     if not publinfos:
                         publinfos = get_fieldvalues(recID, "088__a")
             if publinfos:
                 out += " - %s" % publinfos[0]
             else:
                 # fourthly publication year (if not publication info):
                 years = get_fieldvalues(recID, "773__y")
                 if not years:
                     years = get_fieldvalues(recID, "909C4y")
                     if not years:
                         years = get_fieldvalues(recID, "260__c")
                 if years:
                     out += " (%s)" % years[0]
     else:
         # HTML brief format by default
         if record_exist_p == -1:
             out += _("The record has been deleted.")
         else:
             query = "SELECT value FROM bibfmt WHERE id_bibrec=%s AND format=%s"
             res = run_sql(query, (recID, format))
             if res:
                 # record 'recID' is formatted in 'format', so print it
                 out += "%s" % decompress(res[0][0])
             else:
                 # record 'recID' is not formatted in 'format', so try to call BibFormat on the fly: or use default format:
                 if CFG_WEBSEARCH_CALL_BIBFORMAT:
                     out_record_in_format = call_bibformat(recID, format, ln, search_pattern=search_pattern,
                                                           user_info=user_info, verbose=verbose)
                     if out_record_in_format:
                         out += out_record_in_format
                     else:
                         out += websearch_templates.tmpl_print_record_brief(
                                  ln = ln,
                                  recID = recID,
                                )
                 else:
                     out += websearch_templates.tmpl_print_record_brief(
                              ln = ln,
                              recID = recID,
                            )
 
             # at the end of HTML brief mode, print the "Detailed record" functionality:
             if format == 'hp' or format.startswith("hb_") or format.startswith("hd_"):
                 pass # do nothing for portfolio and on-the-fly formats
             else:
                 out += websearch_templates.tmpl_print_record_brief_links(ln=ln,
                                                                          recID=recID,
                                                                          sf=sf,
                                                                          so=so,
                                                                          sp=sp,
                                                                          rm=rm,
                                                                          display_claim_link=display_claim_this_paper)
 
     # print record closing tags, if needed:
     if format == "marcxml" or format == "oai_dc":
         out += "   </metadata>\n"
         out += "  </record>\n"
 
     return out
 
 def call_bibformat(recID, format="HD", ln=CFG_SITE_LANG, search_pattern=None, user_info=None, verbose=0):
     """
     Calls BibFormat and returns formatted record.
 
     BibFormat will decide by itself if old or new BibFormat must be used.
     """
 
     from invenio.bibformat_utils import get_pdf_snippets
 
     keywords = []
     if search_pattern is not None:
         units = create_basic_search_units(None, str(search_pattern), None)
         keywords = [unit[1] for unit in units if (unit[0] != '-' and unit[2] in [None, 'fulltext'])]
 
     out = format_record(recID,
                          of=format,
                          ln=ln,
                          search_pattern=keywords,
                          user_info=user_info,
                          verbose=verbose)
 
     if CFG_WEBSEARCH_FULLTEXT_SNIPPETS and user_info and \
            'fulltext' in user_info['uri']:
         # check snippets only if URL contains fulltext
         # FIXME: make it work for CLI too, via new function arg
         if keywords:
             snippets = get_pdf_snippets(recID, keywords)
             if snippets:
                 out += snippets
 
     return out
 
 def log_query(hostname, query_args, uid=-1):
     """
     Log query into the query and user_query tables.
     Return id_query or None in case of problems.
     """
     id_query = None
     if uid >= 0:
         # log the query only if uid is reasonable
         res = run_sql("SELECT id FROM query WHERE urlargs=%s", (query_args,), 1)
         try:
             id_query = res[0][0]
         except:
             id_query = run_sql("INSERT INTO query (type, urlargs) VALUES ('r', %s)", (query_args,))
         if id_query:
             run_sql("INSERT INTO user_query (id_user, id_query, hostname, date) VALUES (%s, %s, %s, %s)",
                     (uid, id_query, hostname,
                      time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())))
     return id_query
 
 def log_query_info(action, p, f, colls, nb_records_found_total=-1):
     """Write some info to the log file for later analysis."""
     try:
         log = open(CFG_LOGDIR + "/search.log", "a")
         log.write(time.strftime("%Y%m%d%H%M%S#", time.localtime()))
         log.write(action+"#")
         log.write(p+"#")
         log.write(f+"#")
         for coll in colls[:-1]:
             log.write("%s," % coll)
         log.write("%s#" % colls[-1])
         log.write("%d" % nb_records_found_total)
         log.write("\n")
         log.close()
     except:
         pass
     return
 
 ### CALLABLES
 
 def perform_request_search(req=None, cc=CFG_SITE_NAME, c=None, p="", f="", rg=CFG_WEBSEARCH_DEF_RECORDS_IN_GROUPS, sf="", so="d", sp="", rm="", of="id", ot="", aas=0,
                            p1="", f1="", m1="", op1="", p2="", f2="", m2="", op2="", p3="", f3="", m3="", sc=0, jrec=0,
                            recid=-1, recidb=-1, sysno="", id=-1, idb=-1, sysnb="", action="", d1="",
                            d1y=0, d1m=0, d1d=0, d2="", d2y=0, d2m=0, d2d=0, dt="", verbose=0, ap=0, ln=CFG_SITE_LANG, ec=None, tab="", wl=CFG_WEBSEARCH_WILDCARD_LIMIT):
     """Perform search or browse request, without checking for
        authentication.  Return list of recIDs found, if of=id.
        Otherwise create web page.
 
        The arguments are as follows:
 
          req - mod_python Request class instance.
 
           cc - current collection (e.g. "ATLAS").  The collection the
                user started to search/browse from.
 
            c - collection list (e.g. ["Theses", "Books"]).  The
                collections user may have selected/deselected when
                starting to search from 'cc'.
 
            p - pattern to search for (e.g. "ellis and muon or kaon").
 
            f - field to search within (e.g. "author").
 
           rg - records in groups of (e.g. "10").  Defines how many hits
                per collection in the search results page are
                displayed.
 
           sf - sort field (e.g. "title").
 
           so - sort order ("a"=ascending, "d"=descending).
 
           sp - sort pattern (e.g. "CERN-") -- in case there are more
                values in a sort field, this argument tells which one
                to prefer
 
           rm - ranking method (e.g. "jif").  Defines whether results
                should be ranked by some known ranking method.
 
           of - output format (e.g. "hb").  Usually starting "h" means
                HTML output (and "hb" for HTML brief, "hd" for HTML
                detailed), "x" means XML output, "t" means plain text
                output, "id" means no output at all but to return list
                of recIDs found.  (Suitable for high-level API.)
 
           ot - output only these MARC tags (e.g. "100,700,909C0b").
                Useful if only some fields are to be shown in the
                output, e.g. for library to control some fields.
 
          aas - advanced search ("0" means no, "1" means yes).  Whether
                search was called from within the advanced search
                interface.
 
           p1 - first pattern to search for in the advanced search
                interface.  Much like 'p'.
 
           f1 - first field to search within in the advanced search
                interface.  Much like 'f'.
 
           m1 - first matching type in the advanced search interface.
                ("a" all of the words, "o" any of the words, "e" exact
                phrase, "p" partial phrase, "r" regular expression).
 
          op1 - first operator, to join the first and the second unit
                in the advanced search interface.  ("a" add, "o" or,
                "n" not).
 
           p2 - second pattern to search for in the advanced search
                interface.  Much like 'p'.
 
           f2 - second field to search within in the advanced search
                interface.  Much like 'f'.
 
           m2 - second matching type in the advanced search interface.
                ("a" all of the words, "o" any of the words, "e" exact
                phrase, "p" partial phrase, "r" regular expression).
 
          op2 - second operator, to join the second and the third unit
                in the advanced search interface.  ("a" add, "o" or,
                "n" not).
 
           p3 - third pattern to search for in the advanced search
                interface.  Much like 'p'.
 
           f3 - third field to search within in the advanced search
                interface.  Much like 'f'.
 
           m3 - third matching type in the advanced search interface.
                ("a" all of the words, "o" any of the words, "e" exact
                phrase, "p" partial phrase, "r" regular expression).
 
           sc - split by collection ("0" no, "1" yes).  Governs whether
                we want to present the results in a single huge list,
                or splitted by collection.
 
         jrec - jump to record (e.g. "234").  Used for navigation
                inside the search results.
 
        recid - display record ID (e.g. "20000").  Do not
                search/browse but go straight away to the Detailed
                record page for the given recID.
 
       recidb - display record ID bis (e.g. "20010").  If greater than
                'recid', then display records from recid to recidb.
                Useful for example for dumping records from the
                database for reformatting.
 
        sysno - display old system SYS number (e.g. "").  If you
                migrate to Invenio from another system, and store your
                old SYS call numbers, you can use them instead of recid
                if you wish so.
 
           id - the same as recid, in case recid is not set.  For
                backwards compatibility.
 
          idb - the same as recid, in case recidb is not set.  For
                backwards compatibility.
 
        sysnb - the same as sysno, in case sysno is not set.  For
                backwards compatibility.
 
       action - action to do.  "SEARCH" for searching, "Browse" for
                browsing.  Default is to search.
 
           d1 - first datetime in full YYYY-mm-dd HH:MM:DD format
                (e.g. "1998-08-23 12:34:56"). Useful for search limits
                on creation/modification date (see 'dt' argument
                below).  Note that 'd1' takes precedence over d1y, d1m,
                d1d if these are defined.
 
          d1y - first date's year (e.g. "1998").  Useful for search
                limits on creation/modification date.
 
          d1m - first date's month (e.g. "08").  Useful for search
                limits on creation/modification date.
 
          d1d - first date's day (e.g. "23").  Useful for search
                limits on creation/modification date.
 
           d2 - second datetime in full YYYY-mm-dd HH:MM:DD format
                (e.g. "1998-09-02 12:34:56"). Useful for search limits
                on creation/modification date (see 'dt' argument
                below).  Note that 'd2' takes precedence over d2y, d2m,
                d2d if these are defined.
 
          d2y - second date's year (e.g. "1998").  Useful for search
                limits on creation/modification date.
 
          d2m - second date's month (e.g. "09").  Useful for search
                limits on creation/modification date.
 
          d2d - second date's day (e.g. "02").  Useful for search
                limits on creation/modification date.
 
           dt - first and second date's type (e.g. "c").  Specifies
                whether to search in creation dates ("c") or in
                modification dates ("m").  When dt is not set and d1*
                and d2* are set, the default is "c".
 
      verbose - verbose level (0=min, 9=max).  Useful to print some
                internal information on the searching process in case
                something goes wrong.
 
           ap - alternative patterns (0=no, 1=yes).  In case no exact
                match is found, the search engine can try alternative
                patterns e.g. to replace non-alphanumeric characters by
                a boolean query.  ap defines if this is wanted.
 
           ln - language of the search interface (e.g. "en").  Useful
                for internationalization.
 
           ec - list of external search engines to search as well
                (e.g. "SPIRES HEP").
 
           wl - wildcard limit (ex: 100) the wildcard queries will be
                limited at 100 results
     """
 
     selected_external_collections_infos = None
 
     # wash output format:
     of = wash_output_format(of)
 
     # raise an exception when trying to print out html from the cli
     if of.startswith("h"):
         assert req
 
     # for every search engine request asking for an HTML output, we
     # first regenerate cache of collection and field I18N names if
     # needed; so that later we won't bother checking timestamps for
     # I18N names at all:
     if of.startswith("h"):
         collection_i18nname_cache.recreate_cache_if_needed()
         field_i18nname_cache.recreate_cache_if_needed()
 
     # wash all arguments requiring special care
     try:
         (cc, colls_to_display, colls_to_search, hosted_colls, wash_colls_debug) = wash_colls(cc, c, sc, verbose) # which colls to search and to display?
     except InvenioWebSearchUnknownCollectionError, exc:
         colname = exc.colname
         if of.startswith("h"):
             page_start(req, of, cc, aas, ln, getUid(req),
                        websearch_templates.tmpl_collection_not_found_page_title(colname, ln))
             req.write(websearch_templates.tmpl_collection_not_found_page_body(colname, ln))
             return page_end(req, of, ln)
         elif of == "id":
             return []
         elif of.startswith("x"):
             # Print empty, but valid XML
             print_records_prologue(req, of)
             print_records_epilogue(req, of)
             return page_end(req, of, ln)
         else:
             return page_end(req, of, ln)
 
     p = wash_pattern(p)
     f = wash_field(f)
     p1 = wash_pattern(p1)
     f1 = wash_field(f1)
     p2 = wash_pattern(p2)
     f2 = wash_field(f2)
     p3 = wash_pattern(p3)
     f3 = wash_field(f3)
     datetext1, datetext2 = wash_dates(d1, d1y, d1m, d1d, d2, d2y, d2m, d2d)
 
     # wash ranking method:
     if not is_method_valid(None, rm):
         rm = ""
 
     _ = gettext_set_language(ln)
 
     # backwards compatibility: id, idb, sysnb -> recid, recidb, sysno (if applicable)
     if sysnb != "" and sysno == "":
         sysno = sysnb
     if id > 0 and recid == -1:
         recid = id
     if idb > 0 and recidb == -1:
         recidb = idb
     # TODO deduce passed search limiting criterias (if applicable)
     pl, pl_in_url = "", "" # no limits by default
     if action != "browse" and req and not isinstance(req, cStringIO.OutputType) \
            and req.args: # we do not want to add options while browsing or while calling via command-line
         fieldargs = cgi.parse_qs(req.args)
         for fieldcode in get_fieldcodes():
             if fieldargs.has_key(fieldcode):
                 for val in fieldargs[fieldcode]:
                     pl += "+%s:\"%s\" " % (fieldcode, val)
                     pl_in_url += "&amp;%s=%s" % (urllib.quote(fieldcode), urllib.quote(val))
     # deduce recid from sysno argument (if applicable):
     if sysno: # ALEPH SYS number was passed, so deduce DB recID for the record:
         recid = get_mysql_recid_from_aleph_sysno(sysno)
         if recid is None:
             recid = 0 # use recid 0 to indicate that this sysno does not exist
     # deduce collection we are in (if applicable):
     if recid > 0:
         referer = None
         if req:
             referer = req.headers_in.get('Referer')
         cc = guess_collection_of_a_record(recid, referer)
     # deduce user id (if applicable):
     try:
         uid = getUid(req)
     except:
         uid = 0
     ## 0 - start output
     if recid >= 0: # recid can be 0 if deduced from sysno and if such sysno does not exist
         ## 1 - detailed record display
         title, description, keywords = \
                websearch_templates.tmpl_record_page_header_content(req, recid, ln)
 
         if req is not None and not req.header_only:
             page_start(req, of, cc, aas, ln, uid, title, description, keywords, recid, tab)
 
         # Default format is hb but we are in detailed -> change 'of'
         if of == "hb":
             of = "hd"
         if record_exists(recid):
             if recidb <= recid: # sanity check
                 recidb = recid + 1
             if of == "id":
                 return [recidx for recidx in range(recid, recidb) if record_exists(recidx)]
             else:
                 print_records(req, range(recid, recidb), -1, -9999, of, ot, ln, search_pattern=p, verbose=verbose, tab=tab, sf=sf, so=so, sp=sp, rm=rm)
             if req and of.startswith("h"): # register detailed record page view event
                 client_ip_address = str(req.remote_ip)
                 register_page_view_event(recid, uid, client_ip_address)
         else: # record does not exist
             if of == "id":
                 return []
             elif of.startswith("x"):
                 # Print empty, but valid XML
                 print_records_prologue(req, of)
                 print_records_epilogue(req, of)
             elif of.startswith("h"):
                 if req.header_only:
                     raise apache.SERVER_RETURN, apache.HTTP_NOT_FOUND
                 else:
                     print_warning(req, _("Requested record does not seem to exist."))
 
     elif action == "browse":
         ## 2 - browse needed
         of = 'hb'
         page_start(req, of, cc, aas, ln, uid, _("Browse"), p=create_page_title_search_pattern_info(p, p1, p2, p3))
         req.write(create_search_box(cc, colls_to_display, p, f, rg, sf, so, sp, rm, of, ot, aas, ln, p1, f1, m1, op1,
                                     p2, f2, m2, op2, p3, f3, m3, sc, pl, d1y, d1m, d1d, d2y, d2m, d2d, dt, jrec, ec, action))
         try:
             if aas == 1 or (p1 or p2 or p3):
                 browse_pattern(req, colls_to_search, p1, f1, rg, ln)
                 browse_pattern(req, colls_to_search, p2, f2, rg, ln)
                 browse_pattern(req, colls_to_search, p3, f3, rg, ln)
             else:
                 browse_pattern(req, colls_to_search, p, f, rg, ln)
         except:
             register_exception(req=req, alert_admin=True)
             if of.startswith("h"):
                 req.write(create_error_box(req, verbose=verbose, ln=ln))
             elif of.startswith("x"):
                 # Print empty, but valid XML
                 print_records_prologue(req, of)
                 print_records_epilogue(req, of)
             return page_end(req, of, ln)
 
     elif rm and p.startswith("recid:"):
         ## 3-ter - similarity search (or old-style citation search) needed
         if req and not req.header_only:
             page_start(req, of, cc, aas, ln, uid, _("Search Results"), p=create_page_title_search_pattern_info(p, p1, p2, p3))
         if of.startswith("h"):
             req.write(create_search_box(cc, colls_to_display, p, f, rg, sf, so, sp, rm, of, ot, aas, ln, p1, f1, m1, op1,
                                         p2, f2, m2, op2, p3, f3, m3, sc, pl, d1y, d1m, d1d, d2y, d2m, d2d, dt, jrec, ec, action))
         if record_exists(p[6:]) != 1:
             # record does not exist
             if of.startswith("h"):
                 if req.header_only:
                     raise apache.SERVER_RETURN, apache.HTTP_NOT_FOUND
                 else:
                     print_warning(req, _("Requested record does not seem to exist."))
             if of == "id":
                 return []
             elif of.startswith("x"):
                 # Print empty, but valid XML
                 print_records_prologue(req, of)
                 print_records_epilogue(req, of)
         else:
             # record well exists, so find similar ones to it
             t1 = os.times()[4]
             results_similar_recIDs, results_similar_relevances, results_similar_relevances_prologue, results_similar_relevances_epilogue, results_similar_comments = \
                                     rank_records(rm, 0, get_collection_reclist(cc), string.split(p), verbose)
             if results_similar_recIDs:
                 t2 = os.times()[4]
                 cpu_time = t2 - t1
                 if of.startswith("h"):
                     req.write(print_search_info(p, f, sf, so, sp, rm, of, ot, cc, len(results_similar_recIDs),
                                                 jrec, rg, aas, ln, p1, p2, p3, f1, f2, f3, m1, m2, m3, op1, op2,
                                                 sc, pl_in_url,
                                                 d1y, d1m, d1d, d2y, d2m, d2d, dt, cpu_time))
                     print_warning(req, results_similar_comments)
                     print_records(req, results_similar_recIDs, jrec, rg, of, ot, ln,
                                   results_similar_relevances, results_similar_relevances_prologue, results_similar_relevances_epilogue, search_pattern=p, verbose=verbose, sf=sf, so=so, sp=sp, rm=rm)
                 elif of=="id":
                     return results_similar_recIDs
                 elif of.startswith("x"):
                     print_records(req, results_similar_recIDs, jrec, rg, of, ot, ln,
                                   results_similar_relevances, results_similar_relevances_prologue, results_similar_relevances_epilogue, search_pattern=p, verbose=verbose, sf=sf, so=so, sp=sp, rm=rm)
             else:
                 # rank_records failed and returned some error message to display:
                 if of.startswith("h"):
                     print_warning(req, results_similar_relevances_prologue)
                     print_warning(req, results_similar_relevances_epilogue)
                     print_warning(req, results_similar_comments)
                 if of == "id":
                     return []
                 elif of.startswith("x"):
                     # Print empty, but valid XML
                     print_records_prologue(req, of)
                     print_records_epilogue(req, of)
 
     elif p.startswith("cocitedwith:"):  #WAS EXPERIMENTAL
         ## 3-terter - cited by search needed
         page_start(req, of, cc, aas, ln, uid, _("Search Results"), p=create_page_title_search_pattern_info(p, p1, p2, p3))
         if of.startswith("h"):
             req.write(create_search_box(cc, colls_to_display, p, f, rg, sf, so, sp, rm, of, ot, aas, ln, p1, f1, m1, op1,
                                         p2, f2, m2, op2, p3, f3, m3, sc, pl, d1y, d1m, d1d, d2y, d2m, d2d, dt, jrec, ec, action))
         recID = p[12:]
         if record_exists(recID) != 1:
             # record does not exist
             if of.startswith("h"):
                 print_warning(req, _("Requested record does not seem to exist."))
             if of == "id":
                 return []
             elif of.startswith("x"):
                 # Print empty, but valid XML
                 print_records_prologue(req, of)
                 print_records_epilogue(req, of)
         else:
             # record well exists, so find co-cited ones:
             t1 = os.times()[4]
             results_cocited_recIDs = map(lambda x: x[0], calculate_co_cited_with_list(int(recID)))
             if results_cocited_recIDs:
                 t2 = os.times()[4]
                 cpu_time = t2 - t1
                 if of.startswith("h"):
                     req.write(print_search_info(p, f, sf, so, sp, rm, of, ot, CFG_SITE_NAME, len(results_cocited_recIDs),
                                                 jrec, rg, aas, ln, p1, p2, p3, f1, f2, f3, m1, m2, m3, op1, op2,
                                                 sc, pl_in_url,
                                                 d1y, d1m, d1d, d2y, d2m, d2d, dt, cpu_time))
                     print_records(req, results_cocited_recIDs, jrec, rg, of, ot, ln, search_pattern=p, verbose=verbose, sf=sf, so=so, sp=sp, rm=rm)
                 elif of=="id":
                     return results_cocited_recIDs
                 elif of.startswith("x"):
                     print_records(req, results_cocited_recIDs, jrec, rg, of, ot, ln, search_pattern=p, verbose=verbose, sf=sf, so=so, sp=sp, rm=rm)
 
             else:
                 # cited rank_records failed and returned some error message to display:
                 if of.startswith("h"):
                     print_warning(req, "nothing found")
                 if of == "id":
                     return []
                 elif of.startswith("x"):
                     # Print empty, but valid XML
                     print_records_prologue(req, of)
                     print_records_epilogue(req, of)
     else:
         ## 3 - common search needed
         query_in_cache = False
         query_representation_in_cache = repr((p,f,colls_to_search, wl))
         page_start(req, of, cc, aas, ln, uid, p=create_page_title_search_pattern_info(p, p1, p2, p3))
 
         if of.startswith("h") and verbose and wash_colls_debug:
             print_warning(req, "wash_colls debugging info : %s" % wash_colls_debug)
 
         # search into the hosted collections only if the output format is html or xml
         if hosted_colls and (of.startswith("h") or of.startswith("x")) and not p.startswith("recid:"):
 
             # hosted_colls_results : the hosted collections' searches that did not timeout
             # hosted_colls_timeouts : the hosted collections' searches that timed out and will be searched later on again
             (hosted_colls_results, hosted_colls_timeouts) = calculate_hosted_collections_results(req, [p, p1, p2, p3], f, hosted_colls, verbose, ln, CFG_HOSTED_COLLECTION_TIMEOUT_ANTE_SEARCH)
 
             # successful searches
             if hosted_colls_results:
                 hosted_colls_true_results = []
                 for result in hosted_colls_results:
                     # if the number of results is None or 0 (or False) then just do nothing
                     if result[1] == None or result[1] == False:
                         # these are the searches the returned no or zero results
                         if verbose:
                             print_warning(req, "Hosted collections (perform_search_request): %s returned no results" % result[0][1].name)
                     else:
                         # these are the searches that actually returned results on time
                         hosted_colls_true_results.append(result)
                         if verbose:
                             print_warning(req, "Hosted collections (perform_search_request): %s returned %s results in %s seconds" % (result[0][1].name, result[1], result[2]))
             else:
                 if verbose:
                     print_warning(req, "Hosted collections (perform_search_request): there were no hosted collections results to be printed at this time")
             if hosted_colls_timeouts:
                 if verbose:
                     for timeout in hosted_colls_timeouts:
                         print_warning(req, "Hosted collections (perform_search_request): %s timed out and will be searched again later" % timeout[0][1].name)
         # we need to know for later use if there were any hosted collections to be searched even if they weren't in the end
         elif hosted_colls and ((not (of.startswith("h") or of.startswith("x"))) or p.startswith("recid:")):
             (hosted_colls_results, hosted_colls_timeouts) = (None, None)
         else:
             if verbose:
                 print_warning(req, "Hosted collections (perform_search_request): there were no hosted collections to be searched")
 
         ## let's define some useful boolean variables:
         # True means there are actual or potential hosted collections results to be printed
         hosted_colls_actual_or_potential_results_p = not (not hosted_colls or not ((hosted_colls_results and hosted_colls_true_results) or hosted_colls_timeouts))
 
         # True means there are hosted collections timeouts to take care of later
         # (useful for more accurate printing of results later)
         hosted_colls_potential_results_p = not (not hosted_colls or not hosted_colls_timeouts)
 
         # True means we only have hosted collections to deal with
         only_hosted_colls_actual_or_potential_results_p = not colls_to_search and hosted_colls_actual_or_potential_results_p
 
         if of.startswith("h"):
             req.write(create_search_box(cc, colls_to_display, p, f, rg, sf, so, sp, rm, of, ot, aas, ln, p1, f1, m1, op1,
                                         p2, f2, m2, op2, p3, f3, m3, sc, pl, d1y, d1m, d1d, d2y, d2m, d2d, dt, jrec, ec, action))
         t1 = os.times()[4]
-        results_in_any_collection = HitSet()
+        results_in_any_collection = intbitset()
         if aas == 1 or (p1 or p2 or p3):
             ## 3A - advanced search
             try:
                 results_in_any_collection = search_pattern_parenthesised(req, p1, f1, m1, ap=ap, of=of, verbose=verbose, ln=ln, wl=wl)
                 if len(results_in_any_collection) == 0:
                     if of.startswith("h"):
                         perform_external_collection_search(req, cc, [p, p1, p2, p3], f, ec, verbose, ln, selected_external_collections_infos)
                     elif of.startswith("x"):
                         # Print empty, but valid XML
                         print_records_prologue(req, of)
                         print_records_epilogue(req, of)
                     return page_end(req, of, ln)
                 if p2:
                     results_tmp = search_pattern_parenthesised(req, p2, f2, m2, ap=ap, of=of, verbose=verbose, ln=ln, wl=wl)
                     if op1 == "a": # add
                         results_in_any_collection.intersection_update(results_tmp)
                     elif op1 == "o": # or
                         results_in_any_collection.union_update(results_tmp)
                     elif op1 == "n": # not
                         results_in_any_collection.difference_update(results_tmp)
                     else:
                         if of.startswith("h"):
                             print_warning(req, "Invalid set operation %s." % cgi.escape(op1), "Error")
                     if len(results_in_any_collection) == 0:
                         if of.startswith("h"):
                             perform_external_collection_search(req, cc, [p, p1, p2, p3], f, ec, verbose, ln, selected_external_collections_infos)
                         elif of.startswith("x"):
                             # Print empty, but valid XML
                             print_records_prologue(req, of)
                             print_records_epilogue(req, of)
                         return page_end(req, of, ln)
                 if p3:
                     results_tmp = search_pattern_parenthesised(req, p3, f3, m3, ap=ap, of=of, verbose=verbose, ln=ln, wl=wl)
                     if op2 == "a": # add
                         results_in_any_collection.intersection_update(results_tmp)
                     elif op2 == "o": # or
                         results_in_any_collection.union_update(results_tmp)
                     elif op2 == "n": # not
                         results_in_any_collection.difference_update(results_tmp)
                     else:
                         if of.startswith("h"):
                             print_warning(req, "Invalid set operation %s." % cgi.escape(op2), "Error")
             except:
                 register_exception(req=req, alert_admin=True)
                 if of.startswith("h"):
                     req.write(create_error_box(req, verbose=verbose, ln=ln))
                     perform_external_collection_search(req, cc, [p, p1, p2, p3], f, ec, verbose, ln, selected_external_collections_infos)
                 elif of.startswith("x"):
                     # Print empty, but valid XML
                     print_records_prologue(req, of)
                     print_records_epilogue(req, of)
 
                 return page_end(req, of, ln)
         else:
             ## 3B - simple search
             if search_results_cache.cache.has_key(query_representation_in_cache):
                 # query is not in the cache already, so reuse it:
                 query_in_cache = True
                 results_in_any_collection = search_results_cache.cache[query_representation_in_cache]
                 if verbose and of.startswith("h"):
                     print_warning(req, "Search stage 0: query found in cache, reusing cached results.")
             else:
                 try:
                     # added the display_nearest_terms_box parameter to avoid printing out the "Nearest terms in any collection"
                     # recommendations when there are results only in the hosted collections. Also added the if clause to avoid
                     # searching in case we know we only have actual or potential hosted collections results
                     if not only_hosted_colls_actual_or_potential_results_p:
                         results_in_any_collection = search_pattern_parenthesised(req, p, f, ap=ap, of=of, verbose=verbose, ln=ln, display_nearest_terms_box=not hosted_colls_actual_or_potential_results_p, wl=wl)
                 except:
                     register_exception(req=req, alert_admin=True)
                     if of.startswith("h"):
                         req.write(create_error_box(req, verbose=verbose, ln=ln))
                         perform_external_collection_search(req, cc, [p, p1, p2, p3], f, ec, verbose, ln, selected_external_collections_infos)
                     return page_end(req, of, ln)
 
         if len(results_in_any_collection) == 0 and not hosted_colls_actual_or_potential_results_p:
             if of.startswith("h"):
                 perform_external_collection_search(req, cc, [p, p1, p2, p3], f, ec, verbose, ln, selected_external_collections_infos)
             elif of.startswith("x"):
                 # Print empty, but valid XML
                 print_records_prologue(req, of)
                 print_records_epilogue(req, of)
             return page_end(req, of, ln)
 
         # store this search query results into search results cache if needed:
         if CFG_WEBSEARCH_SEARCH_CACHE_SIZE and not query_in_cache:
             if len(search_results_cache.cache) > CFG_WEBSEARCH_SEARCH_CACHE_SIZE:
                 search_results_cache.clear()
             search_results_cache.cache[query_representation_in_cache] = results_in_any_collection
             if verbose and of.startswith("h"):
                 print_warning(req, "Search stage 3: storing query results in cache.")
 
         # search stage 4: intersection with collection universe:
         try:
             # added the display_nearest_terms_box parameter to avoid printing out the "Nearest terms in any collection"
             # recommendations when there results only in the hosted collections. Also added the if clause to avoid
             # searching in case we know since the last stage that we have no results in any collection
             if len(results_in_any_collection) != 0:
                 results_final = intersect_results_with_collrecs(req, results_in_any_collection, colls_to_search, ap, of, verbose, ln, display_nearest_terms_box=not hosted_colls_actual_or_potential_results_p)
             else:
                 results_final = {}
         except:
             register_exception(req=req, alert_admin=True)
             if of.startswith("h"):
                 req.write(create_error_box(req, verbose=verbose, ln=ln))
                 perform_external_collection_search(req, cc, [p, p1, p2, p3], f, ec, verbose, ln, selected_external_collections_infos)
             return page_end(req, of, ln)
 
         if results_final == {} and not hosted_colls_actual_or_potential_results_p:
             if of.startswith("h"):
                 perform_external_collection_search(req, cc, [p, p1, p2, p3], f, ec, verbose, ln, selected_external_collections_infos)
             if of.startswith("x"):
                 # Print empty, but valid XML
                 print_records_prologue(req, of)
                 print_records_epilogue(req, of)
             return page_end(req, of, ln)
 
         # search stage 5: apply search option limits and restrictions:
         if datetext1 != "" and results_final != {}:
             if verbose and of.startswith("h"):
                 print_warning(req, "Search stage 5: applying time etc limits, from %s until %s..." % (datetext1, datetext2))
             try:
                 results_final = intersect_results_with_hitset(req,
                                                               results_final,
                                                               search_unit_in_bibrec(datetext1, datetext2, dt),
                                                               ap,
                                                               aptext= _("No match within your time limits, "
                                                                         "discarding this condition..."),
                                                               of=of)
             except:
                 register_exception(req=req, alert_admin=True)
                 if of.startswith("h"):
                     req.write(create_error_box(req, verbose=verbose, ln=ln))
                     perform_external_collection_search(req, cc, [p, p1, p2, p3], f, ec, verbose, ln, selected_external_collections_infos)
                 return page_end(req, of, ln)
             if results_final == {} and not hosted_colls_actual_or_potential_results_p:
                 if of.startswith("h"):
                     perform_external_collection_search(req, cc, [p, p1, p2, p3], f, ec, verbose, ln, selected_external_collections_infos)
                 #if of.startswith("x"):
                 #    # Print empty, but valid XML
                 #    print_records_prologue(req, of)
                 #    print_records_epilogue(req, of)
                 return page_end(req, of, ln)
 
         if pl and results_final != {}:
             pl = wash_pattern(pl)
             if verbose and of.startswith("h"):
                 print_warning(req, "Search stage 5: applying search pattern limit %s..." % cgi.escape(pl))
             try:
                 results_final = intersect_results_with_hitset(req,
                                                               results_final,
                                                               search_pattern_parenthesised(req, pl, ap=0, ln=ln, wl=wl),
                                                               ap,
                                                               aptext=_("No match within your search limits, "
                                                                        "discarding this condition..."),
                                                               of=of)
             except:
                 register_exception(req=req, alert_admin=True)
                 if of.startswith("h"):
                     req.write(create_error_box(req, verbose=verbose, ln=ln))
                     perform_external_collection_search(req, cc, [p, p1, p2, p3], f, ec, verbose, ln, selected_external_collections_infos)
                 return page_end(req, of, ln)
             if results_final == {} and not hosted_colls_actual_or_potential_results_p:
                 if of.startswith("h"):
                     perform_external_collection_search(req, cc, [p, p1, p2, p3], f, ec, verbose, ln, selected_external_collections_infos)
                 if of.startswith("x"):
                     # Print empty, but valid XML
                     print_records_prologue(req, of)
                     print_records_epilogue(req, of)
                 return page_end(req, of, ln)
 
         t2 = os.times()[4]
         cpu_time = t2 - t1
         ## search stage 6: display results:
         results_final_nb_total = 0
         results_final_nb = {} # will hold number of records found in each collection
                               # (in simple dict to display overview more easily)
         for coll in results_final.keys():
             results_final_nb[coll] = len(results_final[coll])
             #results_final_nb_total += results_final_nb[coll]
 
         # Now let us calculate results_final_nb_total more precisely,
         # in order to get the total number of "distinct" hits across
         # searched collections; this is useful because a record might
         # have been attributed to more than one primary collection; so
         # we have to avoid counting it multiple times.  The price to
         # pay for this accuracy of results_final_nb_total is somewhat
         # increased CPU time.
         if results_final.keys() == 1:
             # only one collection; no need to union them
             results_final_for_all_selected_colls = results_final.values()[0]
             results_final_nb_total = results_final_nb.values()[0]
         else:
             # okay, some work ahead to union hits across collections:
-            results_final_for_all_selected_colls = HitSet()
+            results_final_for_all_selected_colls = intbitset()
             for coll in results_final.keys():
                 results_final_for_all_selected_colls.union_update(results_final[coll])
             results_final_nb_total = len(results_final_for_all_selected_colls)
 
         #if hosted_colls and (of.startswith("h") or of.startswith("x")):
         if hosted_colls_actual_or_potential_results_p:
             if hosted_colls_results:
                 for result in hosted_colls_true_results:
                     colls_to_search.append(result[0][1].name)
                     results_final_nb[result[0][1].name] = result[1]
                     results_final_nb_total += result[1]
                     cpu_time += result[2]
             if hosted_colls_timeouts:
                 for timeout in hosted_colls_timeouts:
                     colls_to_search.append(timeout[1].name)
                     # use -963 as a special number to identify the collections that timed out
                     results_final_nb[timeout[1].name] = -963
 
         # we continue past this point only if there is a hosted collection that has timed out and might offer potential results
         if results_final_nb_total ==0 and not hosted_colls_potential_results_p:
             if of.startswith("h"):
                 print_warning(req, "No match found, please enter different search terms.")
             elif of.startswith("x"):
                 # Print empty, but valid XML
                 print_records_prologue(req, of)
                 print_records_epilogue(req, of)
         else:
             # yes, some hits found: good!
             # collection list may have changed due to not-exact-match-found policy so check it out:
             for coll in results_final.keys():
                 if coll not in colls_to_search:
                     colls_to_search.append(coll)
             # print results overview:
             if of == "id":
                 # we have been asked to return list of recIDs
                 recIDs = list(results_final_for_all_selected_colls)
                 if sf: # do we have to sort?
                     recIDs = sort_records(req, recIDs, sf, so, sp, verbose, of)
                 elif rm: # do we have to rank?
                     results_final_for_all_colls_rank_records_output = rank_records(rm, 0, results_final_for_all_selected_colls,
                                                                                    string.split(p) + string.split(p1) +
                                                                                    string.split(p2) + string.split(p3), verbose)
                     if results_final_for_all_colls_rank_records_output[0]:
                         recIDs = results_final_for_all_colls_rank_records_output[0]
                 return recIDs
             elif of.startswith("h"):
                 if of not in ['hcs']:
                     # added the hosted_colls_potential_results_p parameter to help print out the overview more accurately
                     req.write(print_results_overview(colls_to_search, results_final_nb_total, results_final_nb, cpu_time, ln, ec, hosted_colls_potential_results_p=hosted_colls_potential_results_p))
                     selected_external_collections_infos = print_external_results_overview(req, cc, [p, p1, p2, p3], f, ec, verbose, ln)
             # print number of hits found for XML outputs:
             if of.startswith("x"):
                 req.write("<!-- Search-Engine-Total-Number-Of-Results: %s -->\n" % results_final_nb_total)
             # print records:
             if of in ['hcs']:
                 # feed the current search to be summarized:
                 from invenio.search_engine_summarizer import summarize_records
                 search_p = p
                 search_f = f
                 if not p and (aas == 1 or p1 or p2 or p3):
                     op_d = {'n': ' and not ', 'a': ' and ', 'o': ' or ', '': ''}
                     triples = ziplist([f1, f2, f3], [p1, p2, p3], [op1, op2, ''])
                     triples_len = len(triples)
                     for i in range(triples_len):
                         fi, pi, oi = triples[i]                       # e.g.:
                         if i < triples_len-1 and not triples[i+1][1]: # if p2 empty
                             triples[i+1][0] = ''                      #   f2 must be too
                             oi = ''                                   #   and o1
                         if ' ' in pi:
                             pi = '"'+pi+'"'
                         if fi:
                             fi = fi + ':'
                         search_p += fi + pi + op_d[oi]
                     search_f = ''
                 summarize_records(results_final_for_all_selected_colls, 'hcs', ln, search_p, search_f, req)
             else:
                 if len(colls_to_search)>1:
                     cpu_time = -1 # we do not want to have search time printed on each collection
                 print_records_prologue(req, of, cc=cc)
                 results_final_colls = []
                 wlqh_results_overlimit = 0
                 for coll in colls_to_search:
                     if results_final.has_key(coll) and len(results_final[coll]):
                         if of.startswith("h"):
                             req.write(print_search_info(p, f, sf, so, sp, rm, of, ot, coll, results_final_nb[coll],
                                                         jrec, rg, aas, ln, p1, p2, p3, f1, f2, f3, m1, m2, m3, op1, op2,
                                                         sc, pl_in_url,
                                                         d1y, d1m, d1d, d2y, d2m, d2d, dt, cpu_time))
                         results_final_recIDs = list(results_final[coll])
                         results_final_relevances = []
                         results_final_relevances_prologue = ""
                         results_final_relevances_epilogue = ""
                         if sf: # do we have to sort?
                             results_final_recIDs = sort_records(req, results_final_recIDs, sf, so, sp, verbose, of)
                         elif rm: # do we have to rank?
                             results_final_recIDs_ranked, results_final_relevances, results_final_relevances_prologue, results_final_relevances_epilogue, results_final_comments = \
                                                          rank_records(rm, 0, results_final[coll],
                                                                       string.split(p) + string.split(p1) +
                                                                       string.split(p2) + string.split(p3), verbose)
                             if of.startswith("h"):
                                 print_warning(req, results_final_comments)
                             if results_final_recIDs_ranked:
                                 results_final_recIDs = results_final_recIDs_ranked
                             else:
                                 # rank_records failed and returned some error message to display:
                                 print_warning(req, results_final_relevances_prologue)
                                 print_warning(req, results_final_relevances_epilogue)
 
                         if len(results_final_recIDs) < CFG_WEBSEARCH_PREV_NEXT_HIT_LIMIT:
                             results_final_colls.append(results_final_recIDs)
                         else:
                             wlqh_results_overlimit = 1
 
                         print_records(req, results_final_recIDs, jrec, rg, of, ot, ln,
                                       results_final_relevances,
                                       results_final_relevances_prologue,
                                       results_final_relevances_epilogue,
                                       search_pattern=p,
                                       print_records_prologue_p=False,
                                       print_records_epilogue_p=False,
                                       verbose=verbose,
                                       sf=sf,
                                       so=so,
                                       sp=sp,
                                       rm=rm)
 
                         if of.startswith("h"):
                             req.write(print_search_info(p, f, sf, so, sp, rm, of, ot, coll, results_final_nb[coll],
                                                         jrec, rg, aas, ln, p1, p2, p3, f1, f2, f3, m1, m2, m3, op1, op2,
                                                         sc, pl_in_url,
                                                         d1y, d1m, d1d, d2y, d2m, d2d, dt, cpu_time, 1))
 
                 # store the last search results page
                 if req and not isinstance(req, cStringIO.OutputType):
                     session_param_set(req, 'websearch-last-query', req.unparsed_uri)
                     if not wlqh_results_overlimit:
                         # store list of results if user wants to display hits
                         # in a single list, or store list of collections of records
                         # if user displays hits split by collections:
                         session_param_set(req, 'websearch-last-query-hits', results_final_colls)
                     else:
                         results_final_colls = []
                         session_param_set(req, 'websearch-last-query-hits', results_final_colls)
 
                 #if hosted_colls and (of.startswith("h") or of.startswith("x")):
                 if hosted_colls_actual_or_potential_results_p:
                     if hosted_colls_results:
                         # TODO: add a verbose message here
                         for result in hosted_colls_true_results:
                             if of.startswith("h"):
                                 req.write(print_hosted_search_info(p, f, sf, so, sp, rm, of, ot, result[0][1].name, results_final_nb[result[0][1].name],
                                                             jrec, rg, aas, ln, p1, p2, p3, f1, f2, f3, m1, m2, m3, op1, op2,
                                                             sc, pl_in_url,
                                                             d1y, d1m, d1d, d2y, d2m, d2d, dt, cpu_time))
                             req.write(print_hosted_results(url_and_engine=result[0], ln=ln, of=of, req=req, limit=rg))
                             if of.startswith("h"):
                                 req.write(print_hosted_search_info(p, f, sf, so, sp, rm, of, ot, result[0][1].name, results_final_nb[result[0][1].name],
                                                             jrec, rg, aas, ln, p1, p2, p3, f1, f2, f3, m1, m2, m3, op1, op2,
                                                             sc, pl_in_url,
                                                             d1y, d1m, d1d, d2y, d2m, d2d, dt, cpu_time, 1))
                     if hosted_colls_timeouts:
                         # TODO: add a verbose message here
                         # TODO: check if verbose messages still work when dealing with (re)calculations of timeouts
                         (hosted_colls_timeouts_results, hosted_colls_timeouts_timeouts) = do_calculate_hosted_collections_results(req, ln, None, verbose, None, hosted_colls_timeouts, CFG_HOSTED_COLLECTION_TIMEOUT_POST_SEARCH)
                         if hosted_colls_timeouts_results:
                             for result in hosted_colls_timeouts_results:
                                 if result[1] == None or result[1] == False:
                                     ## these are the searches the returned no or zero results
                                     ## also print a nearest terms box, in case this is the only
                                     ## collection being searched and it returns no results?
                                     if of.startswith("h"):
                                         req.write(print_hosted_search_info(p, f, sf, so, sp, rm, of, ot, result[0][1].name, -963,
                                                                     jrec, rg, aas, ln, p1, p2, p3, f1, f2, f3, m1, m2, m3, op1, op2,
                                                                     sc, pl_in_url,
                                                                     d1y, d1m, d1d, d2y, d2m, d2d, dt, cpu_time))
                                         req.write(print_hosted_results(url_and_engine=result[0], ln=ln, of=of, req=req, no_records_found=True, limit=rg))
                                         req.write(print_hosted_search_info(p, f, sf, so, sp, rm, of, ot, result[0][1].name, -963,
                                                                     jrec, rg, aas, ln, p1, p2, p3, f1, f2, f3, m1, m2, m3, op1, op2,
                                                                     sc, pl_in_url,
                                                                     d1y, d1m, d1d, d2y, d2m, d2d, dt, cpu_time, 1))
                                 else:
                                     # these are the searches that actually returned results on time
                                     if of.startswith("h"):
                                         req.write(print_hosted_search_info(p, f, sf, so, sp, rm, of, ot, result[0][1].name, result[1],
                                                                     jrec, rg, aas, ln, p1, p2, p3, f1, f2, f3, m1, m2, m3, op1, op2,
                                                                     sc, pl_in_url,
                                                                     d1y, d1m, d1d, d2y, d2m, d2d, dt, cpu_time))
                                     req.write(print_hosted_results(url_and_engine=result[0], ln=ln, of=of, req=req, limit=rg))
                                     if of.startswith("h"):
                                         req.write(print_hosted_search_info(p, f, sf, so, sp, rm, of, ot, result[0][1].name, result[1],
                                                                     jrec, rg, aas, ln, p1, p2, p3, f1, f2, f3, m1, m2, m3, op1, op2,
                                                                     sc, pl_in_url,
                                                                     d1y, d1m, d1d, d2y, d2m, d2d, dt, cpu_time, 1))
                         if hosted_colls_timeouts_timeouts:
                             for timeout in hosted_colls_timeouts_timeouts:
                                 if of.startswith("h"):
                                     req.write(print_hosted_search_info(p, f, sf, so, sp, rm, of, ot, timeout[1].name, -963,
                                                                 jrec, rg, aas, ln, p1, p2, p3, f1, f2, f3, m1, m2, m3, op1, op2,
                                                                 sc, pl_in_url,
                                                                 d1y, d1m, d1d, d2y, d2m, d2d, dt, cpu_time))
                                     req.write(print_hosted_results(url_and_engine=timeout[0], ln=ln, of=of, req=req, search_timed_out=True, limit=rg))
                                     req.write(print_hosted_search_info(p, f, sf, so, sp, rm, of, ot, timeout[1].name, -963,
                                                                 jrec, rg, aas, ln, p1, p2, p3, f1, f2, f3, m1, m2, m3, op1, op2,
                                                                 sc, pl_in_url,
                                                                 d1y, d1m, d1d, d2y, d2m, d2d, dt, cpu_time, 1))
 
                 print_records_epilogue(req, of)
                 if f == "author" and of.startswith("h"):
                     req.write(create_similarly_named_authors_link_box(p, ln))
 
             # log query:
             try:
                 id_query = log_query(req.remote_host, req.args, uid)
                 if of.startswith("h") and id_query:
                     if not of in ['hcs']:
                         # display alert/RSS teaser for non-summary formats:
                         user_info = collect_user_info(req)
                         display_email_alert_part = True
                         if user_info:
                             if user_info['email'] == 'guest':
                                 if CFG_ACCESS_CONTROL_LEVEL_ACCOUNTS > 4:
                                     display_email_alert_part = False
                             else:
                                 if not user_info['precached_usealerts']:
                                     display_email_alert_part = False
                         req.write(websearch_templates.tmpl_alert_rss_teaser_box_for_query(id_query, \
                                              ln=ln, display_email_alert_part=display_email_alert_part))
             except:
                 # do not log query if req is None (used by CLI interface)
                 pass
             log_query_info("ss", p, f, colls_to_search, results_final_nb_total)
 
     # External searches
     if of.startswith("h"):
         if not of in ['hcs']:
             perform_external_collection_search(req, cc, [p, p1, p2, p3], f, ec, verbose, ln, selected_external_collections_infos)
 
     return page_end(req, of, ln)
 
 def perform_request_cache(req, action="show"):
     """Manipulates the search engine cache."""
     req.content_type = "text/html"
     req.send_http_header()
     req.write("<html>")
     out = ""
     out += "<h1>Search Cache</h1>"
     # clear cache if requested:
     if action == "clear":
         search_results_cache.clear()
     req.write(out)
     # show collection reclist cache:
     out = "<h3>Collection reclist cache</h3>"
     out += "- collection table last updated: %s" % get_table_update_time('collection')
     out += "<br />- reclist cache timestamp: %s" % collection_reclist_cache.timestamp
     out += "<br />- reclist cache contents:"
     out += "<blockquote>"
     for coll in collection_reclist_cache.cache.keys():
         if collection_reclist_cache.cache[coll]:
             out += "%s (%d)<br />" % (coll, len(collection_reclist_cache.cache[coll]))
     out += "</blockquote>"
     req.write(out)
     # show search results cache:
     out = "<h3>Search Cache</h3>"
     out += "- search cache usage: %d queries cached (max. ~%d)" % \
            (len(search_results_cache.cache), CFG_WEBSEARCH_SEARCH_CACHE_SIZE)
     if len(search_results_cache.cache):
         out += "<br />- search cache contents:"
         out += "<blockquote>"
         for query, hitset in search_results_cache.cache.items():
             out += "<br />%s ... %s" % (query, hitset)
         out += """<p><a href="%s/search/cache?action=clear">clear search results cache</a>""" % CFG_SITE_URL
         out += "</blockquote>"
     req.write(out)
     # show field i18nname cache:
     out = "<h3>Field I18N names cache</h3>"
     out += "- fieldname table last updated: %s" % get_table_update_time('fieldname')
     out += "<br />- i18nname cache timestamp: %s" % field_i18nname_cache.timestamp
     out += "<br />- i18nname cache contents:"
     out += "<blockquote>"
     for field in field_i18nname_cache.cache.keys():
         for ln in field_i18nname_cache.cache[field].keys():
             out += "%s, %s = %s<br />" % (field, ln, field_i18nname_cache.cache[field][ln])
     out += "</blockquote>"
     req.write(out)
     # show collection i18nname cache:
     out = "<h3>Collection I18N names cache</h3>"
     out += "- collectionname table last updated: %s" % get_table_update_time('collectionname')
     out += "<br />- i18nname cache timestamp: %s" % collection_i18nname_cache.timestamp
     out += "<br />- i18nname cache contents:"
     out += "<blockquote>"
     for coll in collection_i18nname_cache.cache.keys():
         for ln in collection_i18nname_cache.cache[coll].keys():
             out += "%s, %s = %s<br />" % (coll, ln, collection_i18nname_cache.cache[coll][ln])
     out += "</blockquote>"
     req.write(out)
     req.write("</html>")
     return "\n"
 
 def perform_request_log(req, date=""):
     """Display search log information for given date."""
     req.content_type = "text/html"
     req.send_http_header()
     req.write("<html>")
     req.write("<h1>Search Log</h1>")
     if date: # case A: display stats for a day
         yyyymmdd = string.atoi(date)
         req.write("<p><big><strong>Date: %d</strong></big><p>" % yyyymmdd)
         req.write("""<table border="1">""")
         req.write("<tr><td><strong>%s</strong></td><td><strong>%s</strong></td><td><strong>%s</strong></td><td><strong>%s</strong></td><td><strong>%s</strong></td><td><strong>%s</strong></td></tr>" % ("No.", "Time", "Pattern", "Field", "Collection", "Number of Hits"))
         # read file:
         p = os.popen("grep ^%d %s/search.log" % (yyyymmdd, CFG_LOGDIR), 'r')
         lines = p.readlines()
         p.close()
         # process lines:
         i = 0
         for line in lines:
             try:
                 datetime, dummy_aas, p, f, c, nbhits = string.split(line,"#")
                 i += 1
                 req.write("<tr><td align=\"right\">#%d</td><td>%s:%s:%s</td><td>%s</td><td>%s</td><td>%s</td><td>%s</td></tr>" \
                           % (i, datetime[8:10], datetime[10:12], datetime[12:], p, f, c, nbhits))
             except:
                 pass # ignore eventual wrong log lines
         req.write("</table>")
     else: # case B: display summary stats per day
         yyyymm01 = int(time.strftime("%Y%m01", time.localtime()))
         yyyymmdd = int(time.strftime("%Y%m%d", time.localtime()))
         req.write("""<table border="1">""")
         req.write("<tr><td><strong>%s</strong></td><td><strong>%s</strong></tr>" % ("Day", "Number of Queries"))
         for day in range(yyyymm01, yyyymmdd + 1):
             p = os.popen("grep -c ^%d %s/search.log" % (day, CFG_LOGDIR), 'r')
             for line in p.readlines():
                 req.write("""<tr><td>%s</td><td align="right"><a href="%s/search/log?date=%d">%s</a></td></tr>""" % \
                           (day, CFG_SITE_URL, day, line))
             p.close()
         req.write("</table>")
     req.write("</html>")
     return "\n"
 
+def get_all_field_values(tag):
+    """
+    Return all existing values stored for a given tag.
+    @param tag: the full tag, e.g. 909C0b
+    @type tag: string
+    @return: the list of values
+    @rtype: list of strings
+    """
+    table = 'bib%2dx' % int(tag[:2])
+    return [row[0] for row in run_sql("SELECT DISTINCT(value) FROM %s WHERE tag=%%s" % table, (tag, ))]
 
 def get_most_popular_field_values(recids, tags, exclude_values=None, count_repetitive_values=True):
     """
     Analyze RECIDS and look for TAGS and return most popular values
     and the frequency with which they occur sorted according to
     descending frequency.
 
     If a value is found in EXCLUDE_VALUES, then do not count it.
 
     If COUNT_REPETITIVE_VALUES is True, then we count every occurrence
     of value in the tags.  If False, then we count the value only once
     regardless of the number of times it may appear in a record.
     (But, if the same value occurs in another record, we count it, of
     course.)
 
     Example:
      >>> get_most_popular_field_values(range(11,20), '980__a')
      (('PREPRINT', 10), ('THESIS', 7), ...)
      >>> get_most_popular_field_values(range(11,20), ('100__a', '700__a'))
      (('Ellis, J', 10), ('Ellis, N', 7), ...)
      >>> get_most_popular_field_values(range(11,20), ('100__a', '700__a'), ('Ellis, J'))
      (('Ellis, N', 7), ...)
     """
 
     def _get_most_popular_field_values_helper_sorter(val1, val2):
         "Compare VAL1 and VAL2 according to, firstly, frequency, then secondly, alphabetically."
         compared_via_frequencies = cmp(valuefreqdict[val2], valuefreqdict[val1])
         if compared_via_frequencies == 0:
             return cmp(val1.lower(), val2.lower())
         else:
             return compared_via_frequencies
 
     valuefreqdict = {}
     ## sanity check:
     if not exclude_values:
         exclude_values = []
     if isinstance(tags, str):
         tags = (tags,)
     ## find values to count:
     vals_to_count = []
     displaytmp = {}
     if count_repetitive_values:
         # counting technique A: can look up many records at once: (very fast)
         for tag in tags:
             vals_to_count.extend(get_fieldvalues(recids, tag))
     else:
         # counting technique B: must count record-by-record: (slow)
         for recid in recids:
             vals_in_rec = []
             for tag in tags:
                 for val in get_fieldvalues(recid, tag, False):
                     vals_in_rec.append(val)
             # do not count repetitive values within this record
             # (even across various tags, so need to unify again):
             dtmp = {}
             for val in vals_in_rec:
                 dtmp[val.lower()] = 1
                 displaytmp[val.lower()] = val
             vals_in_rec = dtmp.keys()
             vals_to_count.extend(vals_in_rec)
     ## are we to exclude some of found values?
     for val in vals_to_count:
         if val not in exclude_values:
             if valuefreqdict.has_key(val):
                 valuefreqdict[val] += 1
             else:
                 valuefreqdict[val] = 1
     ## sort by descending frequency of values:
     out = ()
     vals = valuefreqdict.keys()
     vals.sort(_get_most_popular_field_values_helper_sorter)
     for val in vals:
         tmpdisplv = ''
         if displaytmp.has_key(val):
             tmpdisplv = displaytmp[val]
         else:
             tmpdisplv = val
         out += (tmpdisplv, valuefreqdict[val]),
     return out
 
 def profile(p="", f="", c=CFG_SITE_NAME):
     """Profile search time."""
     import profile
     import pstats
     profile.run("perform_request_search(p='%s',f='%s', c='%s')" % (p, f, c), "perform_request_search_profile")
     p = pstats.Stats("perform_request_search_profile")
     p.strip_dirs().sort_stats("cumulative").print_stats()
     return 0
diff --git a/modules/websearch/lib/websearch_regression_tests.py b/modules/websearch/lib/websearch_regression_tests.py
index 41c182f72..5ad8e6d0e 100644
--- a/modules/websearch/lib/websearch_regression_tests.py
+++ b/modules/websearch/lib/websearch_regression_tests.py
@@ -1,1958 +1,1958 @@
 # -*- coding: utf-8 -*-
 ##
 ## This file is part of Invenio.
 ## Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011 CERN.
 ##
 ## Invenio is free software; you can redistribute it and/or
 ## modify it under the terms of the GNU General Public License as
 ## published by the Free Software Foundation; either version 2 of the
 ## License, or (at your option) any later version.
 ##
 ## Invenio is distributed in the hope that it will be useful, but
 ## WITHOUT ANY WARRANTY; without even the implied warranty of
 ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 ## General Public License for more details.
 ##
 ## You should have received a copy of the GNU General Public License
 ## along with Invenio; if not, write to the Free Software Foundation, Inc.,
 ## 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA.
 
 # pylint: disable=C0301
 # pylint: disable=E1102
 
 """WebSearch module regression tests."""
 
 __revision__ = "$Id$"
 
 import unittest
 import re
 import urlparse, cgi
 import sys
 
 if sys.hexversion < 0x2040000:
     # pylint: disable=W0622
     from sets import Set as set
     # pylint: enable=W0622
 
 from mechanize import Browser, LinkNotFoundError
 
 from invenio.config import CFG_SITE_URL, CFG_SITE_NAME, CFG_SITE_LANG, \
     CFG_SITE_RECORD, CFG_SITE_LANGS
 from invenio.testutils import make_test_suite, \
                               run_test_suite, \
                               make_url, make_surl, test_web_page_content, \
                               merge_error_messages
 from invenio.urlutils import same_urls_p
 from invenio.search_engine import perform_request_search, \
     guess_primary_collection_of_a_record, guess_collection_of_a_record, \
     collection_restricted_p, get_permitted_restricted_collections, \
     search_pattern, search_unit, search_unit_in_bibrec, \
     wash_colls
 from invenio.search_engine_utils import get_fieldvalues
 
 
 if 'fr' in CFG_SITE_LANGS:
     lang_french_configured = True
 else:
     lang_french_configured = False
 
 
 def parse_url(url):
     parts = urlparse.urlparse(url)
     query = cgi.parse_qs(parts[4], True)
 
     return parts[2].split('/')[1:], query
 
 class WebSearchWebPagesAvailabilityTest(unittest.TestCase):
     """Check WebSearch web pages whether they are up or not."""
 
     def test_search_interface_pages_availability(self):
         """websearch - availability of search interface pages"""
 
         baseurl = CFG_SITE_URL + '/'
 
         _exports = ['', 'collection/Poetry', 'collection/Poetry?as=1']
 
         error_messages = []
         for url in [baseurl + page for page in _exports]:
             error_messages.extend(test_web_page_content(url))
         if error_messages:
             self.fail(merge_error_messages(error_messages))
         return
 
     def test_search_results_pages_availability(self):
         """websearch - availability of search results pages"""
 
         baseurl = CFG_SITE_URL + '/search'
 
         _exports = ['', '?c=Poetry', '?p=ellis', '/cache', '/log']
 
         error_messages = []
         for url in [baseurl + page for page in _exports]:
             error_messages.extend(test_web_page_content(url))
         if error_messages:
             self.fail(merge_error_messages(error_messages))
         return
 
     def test_search_detailed_record_pages_availability(self):
         """websearch - availability of search detailed record pages"""
 
         baseurl = CFG_SITE_URL + '/'+ CFG_SITE_RECORD +'/'
 
         _exports = ['', '1', '1/', '1/files', '1/files/']
 
         error_messages = []
         for url in [baseurl + page for page in _exports]:
             error_messages.extend(test_web_page_content(url))
         if error_messages:
             self.fail(merge_error_messages(error_messages))
         return
 
     def test_browse_results_pages_availability(self):
         """websearch - availability of browse results pages"""
 
         baseurl = CFG_SITE_URL + '/search'
 
         _exports = ['?p=ellis&f=author&action_browse=Browse']
 
         error_messages = []
         for url in [baseurl + page for page in _exports]:
             error_messages.extend(test_web_page_content(url))
         if error_messages:
             self.fail(merge_error_messages(error_messages))
         return
 
     def test_help_page_availability(self):
         """websearch - availability of Help Central page"""
         self.assertEqual([],
                          test_web_page_content(CFG_SITE_URL + '/help',
                                                expected_text="Help Central"))
 
     if lang_french_configured:
         def test_help_page_availability_fr(self):
             """websearch - availability of Help Central page in french"""
             self.assertEqual([],
                              test_web_page_content(CFG_SITE_URL + '/help/?ln=fr',
                                                    expected_text="Centre d'aide"))
 
     def test_search_tips_page_availability(self):
         """websearch - availability of Search Tips"""
         self.assertEqual([],
                          test_web_page_content(CFG_SITE_URL + '/help/search-tips',
                                                expected_text="Search Tips"))
 
     if lang_french_configured:
         def test_search_tips_page_availability_fr(self):
             """websearch - availability of Search Tips in french"""
             self.assertEqual([],
                              test_web_page_content(CFG_SITE_URL + '/help/search-tips?ln=fr',
                                                    expected_text="Conseils de recherche"))
 
     def test_search_guide_page_availability(self):
         """websearch - availability of Search Guide"""
         self.assertEqual([],
                          test_web_page_content(CFG_SITE_URL + '/help/search-guide',
                                                expected_text="Search Guide"))
 
     if lang_french_configured:
         def test_search_guide_page_availability_fr(self):
             """websearch - availability of Search Guide in french"""
             self.assertEqual([],
                              test_web_page_content(CFG_SITE_URL + '/help/search-guide?ln=fr',
                                                    expected_text="Guide de recherche"))
 
 class WebSearchTestLegacyURLs(unittest.TestCase):
 
     """ Check that the application still responds to legacy URLs for
     navigating, searching and browsing."""
 
     def test_legacy_collections(self):
         """ websearch - collections handle legacy urls """
 
         browser = Browser()
 
         def check(legacy, new, browser=browser):
             browser.open(legacy)
             got = browser.geturl()
 
             self.failUnless(same_urls_p(got, new), got)
 
         # Use the root URL unless we need more
         check(make_url('/', c=CFG_SITE_NAME),
               make_url('/', ln=CFG_SITE_LANG))
 
         # Other collections are redirected in the /collection area
         check(make_url('/', c='Poetry'),
               make_url('/collection/Poetry', ln=CFG_SITE_LANG))
 
         # Drop unnecessary arguments, like ln and as (when they are
         # the default value)
         args = {'as': 0}
         check(make_url('/', c='Poetry', **args),
               make_url('/collection/Poetry', ln=CFG_SITE_LANG))
 
         # Otherwise, keep them
         args = {'as': 1, 'ln': CFG_SITE_LANG}
         check(make_url('/', c='Poetry', **args),
               make_url('/collection/Poetry', **args))
 
         # Support the /index.py addressing too
         check(make_url('/index.py', c='Poetry'),
               make_url('/collection/Poetry', ln=CFG_SITE_LANG))
 
 
     def test_legacy_search(self):
         """ websearch - search queries handle legacy urls """
 
         browser = Browser()
 
         def check(legacy, new, browser=browser):
             browser.open(legacy)
             got = browser.geturl()
 
             self.failUnless(same_urls_p(got, new), got)
 
         # /search.py is redirected on /search
         # Note that `as' is a reserved word in Python 2.5
         check(make_url('/search.py', p='nuclear', ln='en') + 'as=1',
               make_url('/search', p='nuclear', ln='en') + 'as=1')
 
     if lang_french_configured:
         def test_legacy_search_fr(self):
             """ websearch - search queries handle legacy urls """
 
             browser = Browser()
 
             def check(legacy, new, browser=browser):
                 browser.open(legacy)
                 got = browser.geturl()
 
                 self.failUnless(same_urls_p(got, new), got)
 
             # direct recid searches are redirected to /CFG_SITE_RECORD
             check(make_url('/search.py', recid=1, ln='fr'),
                   make_url('/%s/1' % CFG_SITE_RECORD, ln='fr'))
 
     def test_legacy_search_help_link(self):
         """websearch - legacy Search Help page link"""
         self.assertEqual([],
                          test_web_page_content(CFG_SITE_URL + '/help/search/index.en.html',
                                                expected_text="Help Central"))
 
     if lang_french_configured:
         def test_legacy_search_tips_link(self):
             """websearch - legacy Search Tips page link"""
             self.assertEqual([],
                              test_web_page_content(CFG_SITE_URL + '/help/search/tips.fr.html',
                                                    expected_text="Conseils de recherche"))
 
     def test_legacy_search_guide_link(self):
         """websearch - legacy Search Guide page link"""
         self.assertEqual([],
                          test_web_page_content(CFG_SITE_URL + '/help/search/guide.en.html',
                                                expected_text="Search Guide"))
 
 class WebSearchTestRecord(unittest.TestCase):
     """ Check the interface of the /CFG_SITE_RECORD results """
 
     def test_format_links(self):
         """ websearch - check format links for records """
 
         browser = Browser()
 
         # We open the record in all known HTML formats
         for hformat in ('hd', 'hx', 'hm'):
             browser.open(make_url('/%s/1' % CFG_SITE_RECORD, of=hformat))
 
             if hformat == 'hd':
                 # hd format should have a link to the following
                 # formats
                 for oformat in ('hx', 'hm', 'xm', 'xd'):
                     target = make_url('/%s/1/export/%s?ln=en' % (CFG_SITE_RECORD, oformat))
                     try:
                         browser.find_link(url=target)
                     except LinkNotFoundError:
                         self.fail('link %r should be in page' % target)
             else:
                 # non-hd HTML formats should have a link back to
                 # the main detailed record
                 target = make_url('/%s/1' % CFG_SITE_RECORD)
                 try:
                     browser.find_link(url=target)
                 except LinkNotFoundError:
                     self.fail('link %r should be in page' % target)
 
         return
 
     def test_exported_formats(self):
         """ websearch - check formats exported through /CFG_SITE_RECORD/1/export/ URLs"""
 
         self.assertEqual([],
                          test_web_page_content(make_url('/%s/1/export/hm' % CFG_SITE_RECORD),
                                                expected_text='245__ $$aALEPH experiment'))
         self.assertEqual([],
                          test_web_page_content(make_url('/%s/1/export/hd' % CFG_SITE_RECORD),
                                                expected_text='<strong>ALEPH experiment'))
         self.assertEqual([],
                          test_web_page_content(make_url('/%s/1/export/xm' % CFG_SITE_RECORD),
                                                expected_text='<subfield code="a">ALEPH experiment'))
         self.assertEqual([],
                          test_web_page_content(make_url('/%s/1/export/xd' % CFG_SITE_RECORD),
                                                expected_text='<dc:title>ALEPH experiment'))
         self.assertEqual([],
                          test_web_page_content(make_url('/%s/1/export/hs' % CFG_SITE_RECORD),
                                                expected_text='<a href="/%s/1?ln=%s">ALEPH experiment' % \
                                                (CFG_SITE_RECORD, CFG_SITE_LANG)))
         self.assertEqual([],
                          test_web_page_content(make_url('/%s/1/export/hx' % CFG_SITE_RECORD),
                                                expected_text='title        = "ALEPH experiment'))
         self.assertEqual([],
                          test_web_page_content(make_url('/%s/1/export/t?ot=245' % CFG_SITE_RECORD),
                                                expected_text='245__ $$aALEPH experiment'))
         self.assertNotEqual([],
                          test_web_page_content(make_url('/%s/1/export/t?ot=245' % CFG_SITE_RECORD),
                                                expected_text='001__'))
         self.assertEqual([],
                          test_web_page_content(make_url('/%s/1/export/h?ot=245' % CFG_SITE_RECORD),
                                                expected_text='245__ $$aALEPH experiment'))
         self.assertNotEqual([],
                          test_web_page_content(make_url('/%s/1/export/h?ot=245' % CFG_SITE_RECORD),
                                                expected_text='001__'))
         return
 
     def test_plots_tab(self):
         """ websearch - test to ensure the plots tab is working """
         self.assertEqual([],
                          test_web_page_content(make_url('/%s/8/plots' % CFG_SITE_RECORD),
                                                expected_text='div id="clip"',
                                                unexpected_text='Abstract'))
 
 class WebSearchTestCollections(unittest.TestCase):
 
     def test_traversal_links(self):
         """ websearch - traverse all the publications of a collection """
 
         browser = Browser()
 
         try:
             for aas in (0, 1):
                 args = {'as': aas}
                 browser.open(make_url('/collection/Preprints', **args))
 
                 for jrec in (11, 21, 11, 28):
                     args = {'jrec': jrec, 'cc': 'Preprints'}
                     if aas:
                         args['as'] = aas
 
                     url = make_url('/search', **args)
                     try:
                         browser.follow_link(url=url)
                     except LinkNotFoundError:
                         args['ln'] = CFG_SITE_LANG
                         url = make_url('/search', **args)
                         browser.follow_link(url=url)
 
         except LinkNotFoundError:
             self.fail('no link %r in %r' % (url, browser.geturl()))
 
     def test_collections_links(self):
         """ websearch - enter in collections and subcollections """
 
         browser = Browser()
 
         def tryfollow(url):
             cur = browser.geturl()
             body = browser.response().read()
             try:
                 browser.follow_link(url=url)
             except LinkNotFoundError:
                 print body
                 self.fail("in %r: could not find %r" % (
                     cur, url))
             return
 
         for aas in (0, 1):
             if aas:
                 kargs = {'as': 1}
             else:
                 kargs = {}
 
             kargs['ln'] = CFG_SITE_LANG
 
             # We navigate from immediate son to immediate son...
             browser.open(make_url('/', **kargs))
             tryfollow(make_url('/collection/Articles%20%26%20Preprints',
                                **kargs))
             tryfollow(make_url('/collection/Articles', **kargs))
 
             # But we can also jump to a grandson immediately
             browser.back()
             browser.back()
             tryfollow(make_url('/collection/ALEPH', **kargs))
 
         return
 
     def test_records_links(self):
         """ websearch - check the links toward records in leaf collections """
 
         browser = Browser()
         browser.open(make_url('/collection/Preprints'))
 
         def harvest():
 
             """ Parse all the links in the page, and check that for
             each link to a detailed record, we also have the
             corresponding link to the similar records."""
 
             records = set()
             similar = set()
 
             for link in browser.links():
                 path, q = parse_url(link.url)
 
                 if not path:
                     continue
 
                 if path[0] == CFG_SITE_RECORD:
                     records.add(int(path[1]))
                     continue
 
                 if path[0] == 'search':
                     if not q.get('rm') == ['wrd']:
                         continue
 
                     recid = q['p'][0].split(':')[1]
                     similar.add(int(recid))
 
             self.failUnlessEqual(records, similar)
 
             return records
 
         # We must have 10 links to the corresponding /CFG_SITE_RECORD
         found = harvest()
         self.failUnlessEqual(len(found), 10)
 
         # When clicking on the "Search" button, we must also have
         # these 10 links on the records.
         browser.select_form(name="search")
         browser.submit()
 
         found = harvest()
         self.failUnlessEqual(len(found), 10)
         return
 
 
 class WebSearchTestBrowse(unittest.TestCase):
 
     def test_browse_field(self):
         """ websearch - check that browsing works """
 
         browser = Browser()
         browser.open(make_url('/'))
 
         browser.select_form(name='search')
         browser['f'] = ['title']
         browser.submit(name='action_browse')
 
         def collect():
             # We'll get a few links to search for the actual hits, plus a
             # link to the following results.
             res = []
             for link in browser.links(url_regex=re.compile(CFG_SITE_URL +
                                                            r'/search\?')):
                 if link.text == 'Advanced Search':
                     continue
 
                 dummy, q = parse_url(link.url)
                 res.append((link, q))
 
             return res
 
         # if we follow the last link, we should get another
         # batch. There is an overlap of one item.
         batch_1 = collect()
 
         browser.follow_link(link=batch_1[-1][0])
 
         batch_2 = collect()
 
         # FIXME: we cannot compare the whole query, as the collection
         # set is not equal
         self.failUnlessEqual(batch_1[-2][1]['p'], batch_2[0][1]['p'])
 
 class WebSearchTestOpenURL(unittest.TestCase):
 
     def test_isbn_01(self):
         """ websearch - isbn query via OpenURL 0.1"""
 
         browser = Browser()
 
         # We do a precise search in an isolated collection
         browser.open(make_url('/openurl', isbn='0387940758'))
 
         dummy, current_q = parse_url(browser.geturl())
 
         self.failUnlessEqual(current_q, {
             'sc' : ['1'],
             'p' : ['isbn:"0387940758"'],
             'of' : ['hd']
         })
 
     def test_isbn_10_rft_id(self):
         """ websearch - isbn query via OpenURL 1.0 - rft_id"""
 
         browser = Browser()
 
         # We do a precise search in an isolated collection
         browser.open(make_url('/openurl', rft_id='urn:ISBN:0387940758'))
 
         dummy, current_q = parse_url(browser.geturl())
 
         self.failUnlessEqual(current_q, {
             'sc' : ['1'],
             'p' : ['isbn:"0387940758"'],
             'of' : ['hd']
         })
 
     def test_isbn_10(self):
         """ websearch - isbn query via OpenURL 1.0"""
 
         browser = Browser()
 
         # We do a precise search in an isolated collection
         browser.open(make_url('/openurl?rft.isbn=0387940758'))
 
         dummy, current_q = parse_url(browser.geturl())
 
         self.failUnlessEqual(current_q, {
             'sc' : ['1'],
             'p' : ['isbn:"0387940758"'],
             'of' : ['hd']
         })
 
 
 class WebSearchTestSearch(unittest.TestCase):
 
     def test_hits_in_other_collection(self):
         """ websearch - check extension of a query to the home collection """
 
         browser = Browser()
 
         # We do a precise search in an isolated collection
         browser.open(make_url('/collection/ISOLDE', ln='en'))
 
         browser.select_form(name='search')
         browser['f'] = ['author']
         browser['p'] = 'matsubara'
         browser.submit()
 
         dummy, current_q = parse_url(browser.geturl())
 
         link = browser.find_link(text_regex=re.compile('.*hit', re.I))
         dummy, target_q = parse_url(link.url)
 
         # the target query should be the current query without any c
         # or cc specified.
         for f in ('cc', 'c', 'action_search'):
             if f in current_q:
                 del current_q[f]
 
         self.failUnlessEqual(current_q, target_q)
 
     def test_nearest_terms(self):
         """ websearch - provide a list of nearest terms """
 
         browser = Browser()
         browser.open(make_url(''))
 
         # Search something weird
         browser.select_form(name='search')
         browser['p'] = 'gronf'
         browser.submit()
 
         dummy, original = parse_url(browser.geturl())
 
         for to_drop in ('cc', 'action_search', 'f'):
             if to_drop in original:
                 del original[to_drop]
 
         if 'ln' not in original:
             original['ln'] = [CFG_SITE_LANG]
 
         # we should get a few searches back, which are identical
         # except for the p field being substituted (and the cc field
         # being dropped).
         if 'cc' in original:
             del original['cc']
 
         for link in browser.links(url_regex=re.compile(CFG_SITE_URL + r'/search\?')):
             if link.text == 'Advanced Search':
                 continue
 
             dummy, target = parse_url(link.url)
 
             if 'ln' not in target:
                 target['ln'] = [CFG_SITE_LANG]
 
             original['p'] = [link.text]
             self.failUnlessEqual(original, target)
 
         return
 
     def test_switch_to_simple_search(self):
         """ websearch - switch to simple search """
 
         browser = Browser()
         args = {'as': 1}
         browser.open(make_url('/collection/ISOLDE', **args))
 
         browser.select_form(name='search')
         browser['p1'] = 'tandem'
         browser['f1'] = ['title']
         browser.submit()
 
         browser.follow_link(text='Simple Search')
 
         dummy, q = parse_url(browser.geturl())
 
         self.failUnlessEqual(q, {'cc': ['ISOLDE'],
                                  'p': ['tandem'],
                                  'f': ['title'],
                                  'ln': ['en']})
 
     def test_switch_to_advanced_search(self):
         """ websearch - switch to advanced search """
 
         browser = Browser()
         browser.open(make_url('/collection/ISOLDE'))
 
         browser.select_form(name='search')
         browser['p'] = 'tandem'
         browser['f'] = ['title']
         browser.submit()
 
         browser.follow_link(text='Advanced Search')
 
         dummy, q = parse_url(browser.geturl())
 
         self.failUnlessEqual(q, {'cc': ['ISOLDE'],
                                  'p1': ['tandem'],
                                  'f1': ['title'],
                                  'as': ['1'],
                                  'ln' : ['en']})
 
     def test_no_boolean_hits(self):
         """ websearch - check the 'no boolean hits' proposed links """
 
         browser = Browser()
         browser.open(make_url(''))
 
         browser.select_form(name='search')
         browser['p'] = 'quasinormal muon'
         browser.submit()
 
         dummy, q = parse_url(browser.geturl())
 
         for to_drop in ('cc', 'action_search', 'f'):
             if to_drop in q:
                 del q[to_drop]
 
         for bsu in ('quasinormal', 'muon'):
             l = browser.find_link(text=bsu)
             q['p'] = bsu
 
             if not same_urls_p(l.url, make_url('/search', **q)):
                 self.fail(repr((l.url, make_url('/search', **q))))
 
     def test_similar_authors(self):
         """ websearch - test similar authors box """
 
         browser = Browser()
         browser.open(make_url(''))
 
         browser.select_form(name='search')
         browser['p'] = 'Ellis, R K'
         browser['f'] = ['author']
         browser.submit()
 
         l = browser.find_link(text="Ellis, R S")
         self.failUnless(same_urls_p(l.url, make_url('/search',
                                                     p="Ellis, R S",
                                                     f='author',
                                                     ln='en')))
 
 class WebSearchTestWildcardLimit(unittest.TestCase):
     """Checks if the wildcard limit is correctly passed and that
     users without autorization can not exploit it"""
 
     def test_wildcard_limit_correctly_passed_when_not_set(self):
         """websearch - wildcard limit is correctly passed when default"""
         self.assertEqual(search_pattern(p='e*', f='author'),
                          search_pattern(p='e*', f='author', wl=1000))
 
     def test_wildcard_limit_correctly_passed_when_set(self):
         """websearch - wildcard limit is correctly passed when set"""
         self.assertEqual([],
             test_web_page_content(CFG_SITE_URL + '/search?p=e*&f=author&of=id&wl=5',
                                   expected_text="[9, 10, 11, 17, 46, 48, 50, 51, 52, 53, 54, 67, 72, 74, 81, 88, 92, 96]"))
 
     def test_wildcard_limit_correctly_not_active(self):
         """websearch - wildcard limit is not active when there is no wildcard query"""
         self.assertEqual(search_pattern(p='ellis', f='author'),
                          search_pattern(p='ellis', f='author', wl=1))
 
     def test_wildcard_limit_increased_by_authorized_users(self):
         """websearch - wildcard limit increased by authorized user"""
 
         browser = Browser()
 
         #try a search query, with no wildcard limit set by the user
         browser.open(make_url('/search?p=a*&of=id'))
         recid_list_guest_no_limit = browser.response().read() # so the limit is CGF_WEBSEARCH_WILDCARD_LIMIT
 
         #try a search query, with a wildcard limit imposed by the user
         #wl=1000000 - a very high limit,higher then what the CFG_WEBSEARCH_WILDCARD_LIMIT might be
         browser.open(make_url('/search?p=a*&of=id&wl=1000000'))
         recid_list_guest_with_limit = browser.response().read()
 
         #same results should be returned for a search without the wildcard limit set by the user
         #and for a search with a large limit set by the user
         #in this way we know that nomatter how large the limit is, the wildcard query will be
         #limitted by CFG_WEBSEARCH_WILDCARD_LIMIT (for a guest user)
         self.failIf(len(recid_list_guest_no_limit.split(',')) != len(recid_list_guest_with_limit.split(',')))
 
         ##login as admin
         browser.open(make_surl('/youraccount/login'))
         browser.select_form(nr=0)
         browser['p_un'] = 'admin'
         browser['p_pw'] = ''
         browser.submit()
 
         #try a search query, with a wildcard limit imposed by an authorized user
         #wl = 10000 a very high limit, higher then what the CFG_WEBSEARCH_WILDCARD_LIMIT might be
         browser.open(make_surl('/search?p=a*&of=id&wl=10000'))
         recid_list_authuser_with_limit = browser.response().read()
 
         #the authorized user can set whatever limit he might wish
         #so, the results returned for the auth. users should exceed the results returned for unauth. users
         self.failUnless(len(recid_list_guest_no_limit.split(',')) <= len(recid_list_authuser_with_limit.split(',')))
 
         #logout
         browser.open(make_surl('/youraccount/logout'))
         browser.response().read()
         browser.close()
 
 class WebSearchNearestTermsTest(unittest.TestCase):
     """Check various alternatives of searches leading to the nearest
     terms box."""
 
     def test_nearest_terms_box_in_okay_query(self):
         """ websearch - no nearest terms box for a successful query """
         self.assertEqual([],
                          test_web_page_content(CFG_SITE_URL + '/search?p=ellis',
                                                expected_text="jump to record"))
 
     def test_nearest_terms_box_in_unsuccessful_simple_query(self):
         """ websearch - nearest terms box for unsuccessful simple query """
         self.assertEqual([],
                          test_web_page_content(CFG_SITE_URL + '/search?p=ellisz',
                                                expected_text="Nearest terms in any collection are",
                                                expected_link_target=CFG_SITE_URL+"/search?ln=en&p=embed",
                                                expected_link_label='embed'))
 
     def test_nearest_terms_box_in_unsuccessful_simple_accented_query(self):
         """ websearch - nearest terms box for unsuccessful accented query """
         self.assertEqual([],
                          test_web_page_content(CFG_SITE_URL + '/search?p=elliszà',
                                                expected_text="Nearest terms in any collection are",
                                                expected_link_target=CFG_SITE_URL+"/search?ln=en&p=embed",
                                                expected_link_label='embed'))
 
     def test_nearest_terms_box_in_unsuccessful_structured_query(self):
         """ websearch - nearest terms box for unsuccessful structured query """
         self.assertEqual([],
                          test_web_page_content(CFG_SITE_URL + '/search?p=ellisz&f=author',
                                                expected_text="Nearest terms in any collection are",
                                                expected_link_target=CFG_SITE_URL+"/search?ln=en&p=fabbro&f=author",
                                                expected_link_label='fabbro'))
         self.assertEqual([],
                          test_web_page_content(CFG_SITE_URL + '/search?p=author%3Aellisz',
                                                expected_text="Nearest terms in any collection are",
                                                expected_link_target=CFG_SITE_URL+"/search?ln=en&p=author%3Afabbro",
                                                expected_link_label='fabbro'))
 
     def test_nearest_terms_box_in_query_with_invalid_index(self):
         """ websearch - nearest terms box for queries with invalid indexes specified """
         self.assertEqual([],
                          test_web_page_content(CFG_SITE_URL + '/search?p=bednarz%3Aellis',
                                                expected_text="Nearest terms in any collection are",
                                                expected_link_target=CFG_SITE_URL+"/search?ln=en&p=bednarz",
                                                expected_link_label='bednarz'))
         self.assertEqual([],
                          test_web_page_content(CFG_SITE_URL + '/search?p=1%3Aellis',
                                                expected_text="no index 1.",
                                                expected_link_target=CFG_SITE_URL+"/record/47?ln=en",
                                                expected_link_label="Detailed record"))
 
     def test_nearest_terms_box_in_unsuccessful_phrase_query(self):
         """ websearch - nearest terms box for unsuccessful phrase query """
         self.assertEqual([],
                          test_web_page_content(CFG_SITE_URL + '/search?p=author%3A%22Ellis%2C+Z%22',
                                                expected_text="Nearest terms in any collection are",
                                                expected_link_target=CFG_SITE_URL+"/search?ln=en&p=author%3A%22Enqvist%2C+K%22",
                                                expected_link_label='Enqvist, K'))
         self.assertEqual([],
                          test_web_page_content(CFG_SITE_URL + '/search?p=%22ellisz%22&f=author',
                                                expected_text="Nearest terms in any collection are",
                                                expected_link_target=CFG_SITE_URL+"/search?ln=en&p=%22Enqvist%2C+K%22&f=author",
                                                expected_link_label='Enqvist, K'))
         self.assertEqual([],
                          test_web_page_content(CFG_SITE_URL + '/search?p=%22elliszà%22&f=author',
                                                expected_text="Nearest terms in any collection are",
                                                expected_link_target=CFG_SITE_URL+"/search?ln=en&p=%22Enqvist%2C+K%22&f=author",
                                                expected_link_label='Enqvist, K'))
 
     def test_nearest_terms_box_in_unsuccessful_partial_phrase_query(self):
         """ websearch - nearest terms box for unsuccessful partial phrase query """
         self.assertEqual([],
                          test_web_page_content(CFG_SITE_URL + '/search?p=author%3A%27Ellis%2C+Z%27',
                                                expected_text="Nearest terms in any collection are",
                                                expected_link_target=CFG_SITE_URL+"/search?ln=en&p=author%3A%27Enqvist%2C+K%27",
                                                expected_link_label='Enqvist, K'))
         self.assertEqual([],
                          test_web_page_content(CFG_SITE_URL + '/search?p=%27ellisz%27&f=author',
                                                expected_text="Nearest terms in any collection are",
                                                expected_link_target=CFG_SITE_URL+"/search?ln=en&p=%27Enqvist%2C+K%27&f=author",
                                                expected_link_label='Enqvist, K'))
         self.assertEqual([],
                          test_web_page_content(CFG_SITE_URL + '/search?p=%27elliszà%27&f=author',
                                                expected_text="Nearest terms in any collection are",
                                                expected_link_target=CFG_SITE_URL+"/search?ln=en&p=%27Enqvist%2C+K%27&f=author",
                                                expected_link_label='Enqvist, K'))
 
     def test_nearest_terms_box_in_unsuccessful_partial_phrase_advanced_query(self):
         """ websearch - nearest terms box for unsuccessful partial phrase advanced search query """
         self.assertEqual([],
                          test_web_page_content(CFG_SITE_URL + '/search?p1=aaa&f1=title&m1=p&as=1',
                                                expected_text="Nearest terms in any collection are",
                                                expected_link_target=CFG_SITE_URL+"/search?ln=en&f1=title&as=1&p1=A+simple+functional+form+for+proton-nucleus+total+reaction+cross+sections&m1=p",
                                                expected_link_label='A simple functional form for proton-nucleus total reaction cross sections'))
 
     def test_nearest_terms_box_in_unsuccessful_exact_phrase_advanced_query(self):
         """ websearch - nearest terms box for unsuccessful exact phrase advanced search query """
         self.assertEqual([],
                          test_web_page_content(CFG_SITE_URL + '/search?p1=aaa&f1=title&m1=e&as=1',
                                                expected_text="Nearest terms in any collection are",
                                                expected_link_target=CFG_SITE_URL+"/search?ln=en&f1=title&as=1&p1=A+simple+functional+form+for+proton-nucleus+total+reaction+cross+sections&m1=e",
                                                expected_link_label='A simple functional form for proton-nucleus total reaction cross sections'))
 
     def test_nearest_terms_box_in_unsuccessful_boolean_query(self):
         """ websearch - nearest terms box for unsuccessful boolean query """
         self.assertEqual([],
                          test_web_page_content(CFG_SITE_URL + '/search?p=title%3Aellisz+author%3Aellisz',
                                                expected_text="Nearest terms in any collection are",
                                                expected_link_target=CFG_SITE_URL+"/search?ln=en&p=title%3Aenergi+author%3Aellisz",
                                                expected_link_label='energi'))
         self.assertEqual([],
                          test_web_page_content(CFG_SITE_URL + '/search?p=title%3Aenergi+author%3Aenergie',
                                                expected_text="Nearest terms in any collection are",
                                                expected_link_target=CFG_SITE_URL+"/search?ln=en&p=title%3Aenergi+author%3Aenqvist",
                                                expected_link_label='enqvist'))
         self.assertEqual([],
                          test_web_page_content(CFG_SITE_URL + '/search?ln=en&p=title%3Aellisz+author%3Aellisz&f=keyword',
                                                expected_text="Nearest terms in any collection are",
                                                expected_link_target=CFG_SITE_URL+"/search?ln=en&p=title%3Aenergi+author%3Aellisz&f=keyword",
                                                expected_link_label='energi'))
         self.assertEqual([],
                          test_web_page_content(CFG_SITE_URL + '/search?ln=en&p=title%3Aenergi+author%3Aenergie&f=keyword',
                                                expected_text="Nearest terms in any collection are",
                                                expected_link_target=CFG_SITE_URL+"/search?ln=en&p=title%3Aenergi+author%3Aenqvist&f=keyword",
                                                expected_link_label='enqvist'))
 
 class WebSearchBooleanQueryTest(unittest.TestCase):
     """Check various boolean queries."""
 
     def test_successful_boolean_query(self):
         """ websearch - successful boolean query """
         self.assertEqual([],
                          test_web_page_content(CFG_SITE_URL + '/search?p=ellis+muon',
                                                expected_text="records found",
                                                expected_link_label="Detailed record"))
 
     def test_unsuccessful_boolean_query_where_all_individual_terms_match(self):
         """ websearch - unsuccessful boolean query where all individual terms match """
         self.assertEqual([],
                          test_web_page_content(CFG_SITE_URL + '/search?p=ellis+muon+letter',
                                                expected_text="Boolean query returned no hits. Please combine your search terms differently."))
 
 class WebSearchAuthorQueryTest(unittest.TestCase):
     """Check various author-related queries."""
 
     def test_propose_similar_author_names_box(self):
         """ websearch - propose similar author names box """
         self.assertEqual([],
                          test_web_page_content(CFG_SITE_URL + '/search?p=Ellis%2C+R&f=author',
                                                expected_text="See also: similar author names",
                                                expected_link_target=CFG_SITE_URL+"/search?ln=en&p=Ellis%2C+R+K&f=author",
                                                expected_link_label="Ellis, R K"))
 
     def test_do_not_propose_similar_author_names_box(self):
         """ websearch - do not propose similar author names box """
         errmsgs = test_web_page_content(CFG_SITE_URL + '/search?p=author%3A%22Ellis%2C+R%22',
                                         expected_link_target=CFG_SITE_URL+"/search?ln=en&p=Ellis%2C+R+K&f=author",
                                         expected_link_label="Ellis, R K")
         if errmsgs[0].find("does not contain link to") > -1:
             pass
         else:
             self.fail("Should not propose similar author names box.")
         return
 
 class WebSearchSearchEnginePythonAPITest(unittest.TestCase):
     """Check typical search engine Python API calls on the demo data."""
 
     def test_search_engine_python_api_for_failed_query(self):
         """websearch - search engine Python API for failed query"""
         self.assertEqual([],
                          perform_request_search(p='aoeuidhtns'))
 
     def test_search_engine_python_api_for_successful_query(self):
         """websearch - search engine Python API for successful query"""
         self.assertEqual([8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 47],
                          perform_request_search(p='ellis'))
 
     def test_search_engine_python_api_for_existing_record(self):
         """websearch - search engine Python API for existing record"""
         self.assertEqual([8],
                          perform_request_search(recid=8))
 
     def test_search_engine_python_api_for_nonexisting_record(self):
         """websearch - search engine Python API for non-existing record"""
         self.assertEqual([],
                          perform_request_search(recid=16777215))
 
     def test_search_engine_python_api_for_nonexisting_collection(self):
         """websearch - search engine Python API for non-existing collection"""
         self.assertEqual([],
                          perform_request_search(c='Foo'))
 
     def test_search_engine_python_api_for_range_of_records(self):
         """websearch - search engine Python API for range of records"""
         self.assertEqual([1, 2, 3, 4, 5, 6, 7, 8, 9],
                          perform_request_search(recid=1, recidb=10))
 
     def test_search_engine_python_api_ranked_by_citation(self):
         """websearch - search engine Python API for citation ranking"""
         self.assertEqual([82, 83, 87, 89],
                 perform_request_search(p='recid:81', rm='citation'))
 
     def test_search_engine_python_api_textmarc(self):
         """websearch - search engine Python API for Text MARC output"""
         # we are testing example from /help/hacking/search-engine-api
         import cStringIO
         tmp = cStringIO.StringIO()
         perform_request_search(req=tmp, p='higgs', of='tm', ot=['100', '700'])
         out = tmp.getvalue()
         tmp.close()
         self.assertEqual(out, """\
 000000085 100__ $$aGirardello, L$$uINFN$$uUniversita di Milano-Bicocca
 000000085 700__ $$aPorrati, Massimo
 000000085 700__ $$aZaffaroni, A
 000000001 100__ $$aPhotolab
 """)
 
 
 class WebSearchSearchEngineWebAPITest(unittest.TestCase):
     """Check typical search engine Web API calls on the demo data."""
 
     def test_search_engine_web_api_for_failed_query(self):
         """websearch - search engine Web API for failed query"""
         self.assertEqual([],
                          test_web_page_content(CFG_SITE_URL + '/search?p=aoeuidhtns&of=id',
                                                expected_text="[]"))
 
 
     def test_search_engine_web_api_for_successful_query(self):
         """websearch - search engine Web API for successful query"""
         self.assertEqual([],
                          test_web_page_content(CFG_SITE_URL + '/search?p=ellis&of=id',
                                                expected_text="[8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 47]"))
 
     def test_search_engine_web_api_for_existing_record(self):
         """websearch - search engine Web API for existing record"""
         self.assertEqual([],
                          test_web_page_content(CFG_SITE_URL + '/search?recid=8&of=id',
                                                expected_text="[8]"))
 
     def test_search_engine_web_api_for_nonexisting_record(self):
         """websearch - search engine Web API for non-existing record"""
         self.assertEqual([],
                          test_web_page_content(CFG_SITE_URL + '/search?recid=123456789&of=id',
                                                expected_text="[]"))
 
     def test_search_engine_web_api_for_nonexisting_collection(self):
         """websearch - search engine Web API for non-existing collection"""
         self.assertEqual([],
                          test_web_page_content(CFG_SITE_URL + '/search?c=Foo&of=id',
                                                expected_text="[]"))
 
     def test_search_engine_web_api_for_range_of_records(self):
         """websearch - search engine Web API for range of records"""
         self.assertEqual([],
                          test_web_page_content(CFG_SITE_URL + '/search?recid=1&recidb=10&of=id',
                                                expected_text="[1, 2, 3, 4, 5, 6, 7, 8, 9]"))
 
 class WebSearchRestrictedCollectionTest(unittest.TestCase):
     """Test of the restricted Theses collection behaviour."""
 
     def test_restricted_collection_interface_page(self):
         """websearch - restricted collection interface page body"""
         # there should be no Latest additions box for restricted collections
         self.assertNotEqual([],
                             test_web_page_content(CFG_SITE_URL + '/collection/Theses',
                                                   expected_text="Latest additions"))
 
     def test_restricted_search_as_anonymous_guest(self):
         """websearch - restricted collection not searchable by anonymous guest"""
         browser = Browser()
         browser.open(CFG_SITE_URL + '/search?c=Theses')
         response = browser.response().read()
         if response.find("If you think you have right to access it, please authenticate yourself.") > -1:
             pass
         else:
             self.fail("Oops, searching restricted collection without password should have redirected to login dialog.")
         return
 
     def test_restricted_search_as_authorized_person(self):
         """websearch - restricted collection searchable by authorized person"""
         browser = Browser()
         browser.open(CFG_SITE_URL + '/search?c=Theses')
         browser.select_form(nr=0)
         browser['p_un'] = 'jekyll'
         browser['p_pw'] = 'j123ekyll'
         browser.submit()
         if browser.response().read().find("records found") > -1:
             pass
         else:
             self.fail("Oops, Dr. Jekyll should be able to search Theses collection.")
 
     def test_restricted_search_as_unauthorized_person(self):
         """websearch - restricted collection not searchable by unauthorized person"""
         browser = Browser()
         browser.open(CFG_SITE_URL + '/search?c=Theses')
         browser.select_form(nr=0)
         browser['p_un'] = 'hyde'
         browser['p_pw'] = 'h123yde'
         browser.submit()
         # Mr. Hyde should not be able to connect:
         if browser.response().read().find("Authorization failure") <= -1:
             # if we got here, things are broken:
             self.fail("Oops, Mr.Hyde should not be able to search Theses collection.")
 
     def test_restricted_detailed_record_page_as_anonymous_guest(self):
         """websearch - restricted detailed record page not accessible to guests"""
         browser = Browser()
         browser.open(CFG_SITE_URL + '/%s/35' % CFG_SITE_RECORD)
         if browser.response().read().find("You can use your nickname or your email address to login.") > -1:
             pass
         else:
             self.fail("Oops, searching restricted collection without password should have redirected to login dialog.")
         return
 
     def test_restricted_detailed_record_page_as_authorized_person(self):
         """websearch - restricted detailed record page accessible to authorized person"""
         browser = Browser()
         browser.open(CFG_SITE_URL + '/youraccount/login')
         browser.select_form(nr=0)
         browser['p_un'] = 'jekyll'
         browser['p_pw'] = 'j123ekyll'
         browser.submit()
         browser.open(CFG_SITE_URL + '/%s/35' % CFG_SITE_RECORD)
         # Dr. Jekyll should be able to connect
         # (add the pw to the whole CFG_SITE_URL because we shall be
         # redirected to '/reordrestricted/'):
         if browser.response().read().find("A High-performance Video Browsing System") > -1:
             pass
         else:
             self.fail("Oops, Dr. Jekyll should be able to access restricted detailed record page.")
 
     def test_restricted_detailed_record_page_as_unauthorized_person(self):
         """websearch - restricted detailed record page not accessible to unauthorized person"""
         browser = Browser()
         browser.open(CFG_SITE_URL + '/youraccount/login')
         browser.select_form(nr=0)
         browser['p_un'] = 'hyde'
         browser['p_pw'] = 'h123yde'
         browser.submit()
         browser.open(CFG_SITE_URL + '/%s/35' % CFG_SITE_RECORD)
         # Mr. Hyde should not be able to connect:
         if browser.response().read().find('You are not authorized') <= -1:
             # if we got here, things are broken:
             self.fail("Oops, Mr.Hyde should not be able to access restricted detailed record page.")
 
     def test_collection_restricted_p(self):
         """websearch - collection_restricted_p"""
         self.failUnless(collection_restricted_p('Theses'), True)
         self.failIf(collection_restricted_p('Books & Reports'))
 
     def test_get_permitted_restricted_collections(self):
         """websearch - get_permitted_restricted_collections"""
         from invenio.webuser import get_uid_from_email, collect_user_info
         self.assertEqual(get_permitted_restricted_collections(collect_user_info(get_uid_from_email('jekyll@cds.cern.ch'))), ['Theses'])
         self.assertEqual(get_permitted_restricted_collections(collect_user_info(get_uid_from_email('hyde@cds.cern.ch'))), [])
 
 class WebSearchRestrictedPicturesTest(unittest.TestCase):
     """
     Check whether restricted pictures on the demo site can be accessed
     well by people who have rights to access them.
     """
 
     def test_restricted_pictures_guest(self):
         """websearch - restricted pictures not available to guest"""
         error_messages = test_web_page_content(CFG_SITE_URL + '/%s/1/files/0106015_01.jpg' % CFG_SITE_RECORD,
                                                expected_text=['This file is restricted.  If you think you have right to access it, please authenticate yourself.'])
         if error_messages:
             self.fail(merge_error_messages(error_messages))
 
     def test_restricted_pictures_romeo(self):
         """websearch - restricted pictures available to Romeo"""
         error_messages = test_web_page_content(CFG_SITE_URL + '/%s/1/files/0106015_01.jpg' % CFG_SITE_RECORD,
                                                username='romeo',
                                                password='r123omeo',
                                                expected_text=[],
                                                unexpected_text=['This file is restricted',
                                                                 'You are not authorized'])
         if error_messages:
             self.fail(merge_error_messages(error_messages))
 
     def test_restricted_pictures_hyde(self):
         """websearch - restricted pictures not available to Mr. Hyde"""
 
         error_messages = test_web_page_content(CFG_SITE_URL + '/%s/1/files/0106015_01.jpg' % CFG_SITE_RECORD,
                                                username='hyde',
                                                password='h123yde',
                                                expected_text=['This file is restricted',
                                                               'You are not authorized'])
         if error_messages:
             self.failUnless("HTTP Error 401: Unauthorized" in merge_error_messages(error_messages))
 
 class WebSearchRSSFeedServiceTest(unittest.TestCase):
     """Test of the RSS feed service."""
 
     def test_rss_feed_service(self):
         """websearch - RSS feed service"""
         self.assertEqual([],
                          test_web_page_content(CFG_SITE_URL + '/rss',
                                                expected_text='<rss version="2.0"'))
 
 class WebSearchXSSVulnerabilityTest(unittest.TestCase):
     """Test possible XSS vulnerabilities of the search engine."""
 
     def test_xss_in_collection_interface_page(self):
         """websearch - no XSS vulnerability in collection interface pages"""
         self.assertEqual([],
                          test_web_page_content(CFG_SITE_URL + '/?c=%3CSCRIPT%3Ealert%28%22XSS%22%29%3B%3C%2FSCRIPT%3E',
                                                expected_text='Collection &amp;lt;SCRIPT&amp;gt;alert("XSS");&amp;lt;/SCRIPT&amp;gt; Not Found'))
 
     def test_xss_in_collection_search_page(self):
         """websearch - no XSS vulnerability in collection search pages"""
         self.assertEqual([],
                          test_web_page_content(CFG_SITE_URL + '/search?c=%3CSCRIPT%3Ealert%28%22XSS%22%29%3B%3C%2FSCRIPT%3E',
                                                expected_text='Collection &lt;SCRIPT&gt;alert("XSS");&lt;/SCRIPT&gt; Not Found'))
 
     def test_xss_in_simple_search(self):
         """websearch - no XSS vulnerability in simple search"""
         self.assertEqual([],
                          test_web_page_content(CFG_SITE_URL + '/search?p=%3CSCRIPT%3Ealert%28%22XSS%22%29%3B%3C%2FSCRIPT%3E',
                                                expected_text='Search term <em>&lt;SCRIPT&gt;alert("XSS");&lt;/SCRIPT&gt;</em> did not match any record.'))
 
     def test_xss_in_structured_search(self):
         """websearch - no XSS vulnerability in structured search"""
         self.assertEqual([],
                          test_web_page_content(CFG_SITE_URL + '/search?p=%3CSCRIPT%3Ealert%28%22XSS%22%29%3B%3C%2FSCRIPT%3E&f=%3CSCRIPT%3Ealert%28%22XSS%22%29%3B%3C%2FSCRIPT%3E',
                                                expected_text='No word index is available for <em>&lt;script&gt;alert("xss");&lt;/script&gt;</em>.'))
 
     def test_xss_in_advanced_search(self):
         """websearch - no XSS vulnerability in advanced search"""
         self.assertEqual([],
                          test_web_page_content(CFG_SITE_URL + '/search?as=1&p1=ellis&f1=author&op1=a&p2=%3CSCRIPT%3Ealert%28%22XSS%22%29%3B%3C%2FSCRIPT%3E&f2=%3CSCRIPT%3Ealert%28%22XSS%22%29%3B%3C%2FSCRIPT%3E&m2=e',
                                                expected_text='Search term <em>&lt;SCRIPT&gt;alert("XSS");&lt;/SCRIPT&gt;</em> inside index <em>&lt;script&gt;alert("xss");&lt;/script&gt;</em> did not match any record.'))
 
     def test_xss_in_browse(self):
         """websearch - no XSS vulnerability in browse"""
         self.assertEqual([],
                          test_web_page_content(CFG_SITE_URL + '/search?p=%3CSCRIPT%3Ealert%28%22XSS%22%29%3B%3C%2FSCRIPT%3E&f=%3CSCRIPT%3Ealert%28%22XSS%22%29%3B%3C%2FSCRIPT%3E&action_browse=Browse',
                                                expected_text='&lt;SCRIPT&gt;alert("XSS");&lt;/SCRIPT&gt;'))
 
 class WebSearchResultsOverview(unittest.TestCase):
     """Test of the search results page's Results overview box and links."""
 
     def test_results_overview_split_off(self):
         """websearch - results overview box when split by collection is off"""
         browser = Browser()
         browser.open(CFG_SITE_URL + '/search?p=of&sc=0')
         body = browser.response().read()
         if body.find("Results overview") > -1:
             self.fail("Oops, when split by collection is off, "
                       "results overview should not be present.")
         if body.find('<a name="1"></a>') == -1:
             self.fail("Oops, when split by collection is off, "
                       "Atlantis collection should be found.")
         if body.find('<a name="15"></a>') > -1:
             self.fail("Oops, when split by collection is off, "
                       "Multimedia & Arts should not be found.")
         try:
             browser.find_link(url='#15')
             self.fail("Oops, when split by collection is off, "
                       "a link to Multimedia & Arts should not be found.")
         except LinkNotFoundError:
             pass
 
     def test_results_overview_split_on(self):
         """websearch - results overview box when split by collection is on"""
         browser = Browser()
         browser.open(CFG_SITE_URL + '/search?p=of&sc=1')
         body = browser.response().read()
         if body.find("Results overview") == -1:
             self.fail("Oops, when split by collection is on, "
                       "results overview should be present.")
         if body.find('<a name="Atlantis%20Institute%20of%20Fictive%20Science"></a>') > -1:
             self.fail("Oops, when split by collection is on, "
                       "Atlantis collection should not be found.")
         if body.find('<a name="15"></a>') == -1:
             self.fail("Oops, when split by collection is on, "
                       "Multimedia & Arts should be found.")
         try:
             browser.find_link(url='#15')
         except LinkNotFoundError:
             self.fail("Oops, when split by collection is on, "
                       "a link to Multimedia & Arts should be found.")
 
 class WebSearchSortResultsTest(unittest.TestCase):
     """Test of the search results page's sorting capability."""
 
     def test_sort_results_default(self):
         """websearch - search results sorting, default method"""
         self.assertEqual([],
                          test_web_page_content(CFG_SITE_URL + '/search?p=of&f=title&rg=1',
                                                expected_text="[TESLA-FEL-99-07]"))
 
     def test_sort_results_ascending(self):
         """websearch - search results sorting, ascending field"""
         self.assertEqual([],
                          test_web_page_content(CFG_SITE_URL + '/search?p=of&f=title&rg=1&sf=reportnumber&so=a',
                                                expected_text="ISOLTRAP"))
 
     def test_sort_results_descending(self):
         """websearch - search results sorting, descending field"""
         self.assertEqual([],
                          test_web_page_content(CFG_SITE_URL + '/search?p=of&f=title&rg=1&sf=reportnumber&so=d',
                                                expected_text=" [TESLA-FEL-99-07]"))
 
     def test_sort_results_sort_pattern(self):
         """websearch - search results sorting, preferential sort pattern"""
         self.assertEqual([],
                          test_web_page_content(CFG_SITE_URL + '/search?p=of&f=title&rg=1&sf=reportnumber&so=d&sp=cern',
                                                expected_text="[CERN-TH-2002-069]"))
 
 class WebSearchSearchResultsXML(unittest.TestCase):
     """Test search results in various output"""
 
     def test_search_results_xm_output_split_on(self):
         """ websearch - check document element of search results in xm output (split by collection on)"""
         browser = Browser()
         browser.open(CFG_SITE_URL + '/search?sc=1&of=xm')
         body = browser.response().read()
 
         num_doc_element = body.count("<collection "
                                      "xmlns=\"http://www.loc.gov/MARC21/slim\">")
         if num_doc_element == 0:
             self.fail("Oops, no document element <collection "
                       "xmlns=\"http://www.loc.gov/MARC21/slim\">"
                       "found in search results.")
         elif num_doc_element > 1:
             self.fail("Oops, multiple document elements <collection> "
                       "found in search results.")
 
         num_doc_element = body.count("</collection>")
         if num_doc_element == 0:
             self.fail("Oops, no document element </collection> "
                       "found in search results.")
         elif num_doc_element > 1:
             self.fail("Oops, multiple document elements </collection> "
                       "found in search results.")
 
 
     def test_search_results_xm_output_split_off(self):
         """ websearch - check document element of search results in xm output (split by collection off)"""
         browser = Browser()
         browser.open(CFG_SITE_URL + '/search?sc=0&of=xm')
         body = browser.response().read()
 
         num_doc_element = body.count("<collection "
                                      "xmlns=\"http://www.loc.gov/MARC21/slim\">")
         if num_doc_element == 0:
             self.fail("Oops, no document element <collection "
                       "xmlns=\"http://www.loc.gov/MARC21/slim\">"
                       "found in search results.")
         elif num_doc_element > 1:
             self.fail("Oops, multiple document elements <collection> "
                       "found in search results.")
 
         num_doc_element = body.count("</collection>")
         if num_doc_element == 0:
             self.fail("Oops, no document element </collection> "
                       "found in search results.")
         elif num_doc_element > 1:
             self.fail("Oops, multiple document elements </collection> "
                       "found in search results.")
 
     def test_search_results_xd_output_split_on(self):
         """ websearch - check document element of search results in xd output (split by collection on)"""
         browser = Browser()
         browser.open(CFG_SITE_URL + '/search?sc=1&of=xd')
         body = browser.response().read()
 
         num_doc_element = body.count("<collection")
         if num_doc_element == 0:
             self.fail("Oops, no document element <collection "
                       "xmlns=\"http://www.loc.gov/MARC21/slim\">"
                       "found in search results.")
         elif num_doc_element > 1:
             self.fail("Oops, multiple document elements <collection> "
                       "found in search results.")
 
         num_doc_element = body.count("</collection>")
         if num_doc_element == 0:
             self.fail("Oops, no document element </collection> "
                       "found in search results.")
         elif num_doc_element > 1:
             self.fail("Oops, multiple document elements </collection> "
                       "found in search results.")
 
 
     def test_search_results_xd_output_split_off(self):
         """ websearch - check document element of search results in xd output (split by collection off)"""
         browser = Browser()
         browser.open(CFG_SITE_URL + '/search?sc=0&of=xd')
         body = browser.response().read()
 
         num_doc_element = body.count("<collection>")
         if num_doc_element == 0:
             self.fail("Oops, no document element <collection "
                       "xmlns=\"http://www.loc.gov/MARC21/slim\">"
                       "found in search results.")
         elif num_doc_element > 1:
             self.fail("Oops, multiple document elements <collection> "
                       "found in search results.")
 
         num_doc_element = body.count("</collection>")
         if num_doc_element == 0:
             self.fail("Oops, no document element </collection> "
                       "found in search results.")
         elif num_doc_element > 1:
             self.fail("Oops, multiple document elements </collection> "
                       "found in search results.")
 
 class WebSearchUnicodeQueryTest(unittest.TestCase):
     """Test of the search results for queries containing Unicode characters."""
 
     def test_unicode_word_query(self):
         """websearch - Unicode word query"""
         self.assertEqual([],
                          test_web_page_content(CFG_SITE_URL + '/search?of=id&p=title%3A%CE%99%CE%B8%CE%AC%CE%BA%CE%B7',
                                                expected_text="[76]"))
 
     def test_unicode_word_query_not_found_term(self):
         """websearch - Unicode word query, not found term"""
         self.assertEqual([],
                          test_web_page_content(CFG_SITE_URL + '/search?p=title%3A%CE%99%CE%B8',
                                                expected_text="ιθάκη"))
 
     def test_unicode_exact_phrase_query(self):
         """websearch - Unicode exact phrase query"""
         self.assertEqual([],
                          test_web_page_content(CFG_SITE_URL + '/search?of=id&p=title%3A%22%CE%99%CE%B8%CE%AC%CE%BA%CE%B7%22',
                                                expected_text="[76]"))
 
     def test_unicode_partial_phrase_query(self):
         """websearch - Unicode partial phrase query"""
         self.assertEqual([],
                          test_web_page_content(CFG_SITE_URL + '/search?of=id&p=title%3A%27%CE%B7%27',
                                                expected_text="[76]"))
 
     def test_unicode_regexp_query(self):
         """websearch - Unicode regexp query"""
         self.assertEqual([],
                          test_web_page_content(CFG_SITE_URL + '/search?of=id&p=title%3A%2F%CE%B7%2F',
                                                expected_text="[76]"))
 
 class WebSearchMARCQueryTest(unittest.TestCase):
     """Test of the search results for queries containing physical MARC tags."""
 
     def test_single_marc_tag_exact_phrase_query(self):
         """websearch - single MARC tag, exact phrase query (100__a)"""
         self.assertEqual([],
                          test_web_page_content(CFG_SITE_URL + '/search?of=id&p=100__a%3A%22Ellis%2C+J%22',
                                                expected_text="[9, 14, 18]"))
 
     def test_single_marc_tag_partial_phrase_query(self):
         """websearch - single MARC tag, partial phrase query (245__b)"""
         self.assertEqual([],
                          test_web_page_content(CFG_SITE_URL + '/search?of=id&p=245__b%3A%27and%27',
                                                expected_text="[28]"))
 
     def test_many_marc_tags_partial_phrase_query(self):
         """websearch - many MARC tags, partial phrase query (245)"""
         self.assertEqual([],
                          test_web_page_content(CFG_SITE_URL + '/search?of=id&p=245%3A%27and%27',
                                                expected_text="[1, 8, 9, 14, 15, 20, 22, 24, 28, 33, 47, 48, 49, 51, 53, 64, 69, 71, 79, 82, 83, 85, 91, 96]"))
 
     def test_single_marc_tag_regexp_query(self):
         """websearch - single MARC tag, regexp query"""
         self.assertEqual([],
                          test_web_page_content(CFG_SITE_URL + '/search?of=id&p=245%3A%2Fand%2F',
                                                expected_text="[1, 8, 9, 14, 15, 20, 22, 24, 28, 33, 47, 48, 49, 51, 53, 64, 69, 71, 79, 82, 83, 85, 91, 96]"))
 
 class WebSearchExtSysnoQueryTest(unittest.TestCase):
     """Test of queries using external system numbers."""
 
     def test_existing_sysno_html_output(self):
         """websearch - external sysno query, existing sysno, HTML output"""
         self.assertEqual([],
                          test_web_page_content(CFG_SITE_URL + '/search?sysno=000289446CER',
                                                expected_text="The wall of the cave"))
 
     def test_existing_sysno_id_output(self):
         """websearch - external sysno query, existing sysno, ID output"""
         self.assertEqual([],
                          test_web_page_content(CFG_SITE_URL + '/search?sysno=000289446CER&of=id',
                                                expected_text="[95]"))
 
     def test_nonexisting_sysno_html_output(self):
         """websearch - external sysno query, non-existing sysno, HTML output"""
         self.assertEqual([],
                          test_web_page_content(CFG_SITE_URL + '/search?sysno=000289446CERRRR',
                                                expected_text="Requested record does not seem to exist."))
 
     def test_nonexisting_sysno_id_output(self):
         """websearch - external sysno query, non-existing sysno, ID output"""
         self.assertEqual([],
                          test_web_page_content(CFG_SITE_URL + '/search?sysno=000289446CERRRR&of=id',
                                                expected_text="[]"))
 
 class WebSearchResultsRecordGroupingTest(unittest.TestCase):
     """Test search results page record grouping (rg)."""
 
     def test_search_results_rg_guest(self):
         """websearch - search results, records in groups of, guest"""
         self.assertEqual([],
                          test_web_page_content(CFG_SITE_URL + '/search?rg=17',
                                                expected_text="1 - 17"))
 
     def test_search_results_rg_nonguest(self):
         """websearch - search results, records in groups of, non-guest"""
         # This test used to fail due to saved user preference fetching
         # not overridden by URL rg argument.
         self.assertEqual([],
                          test_web_page_content(CFG_SITE_URL + '/search?rg=17',
                                                username='admin',
                                                expected_text="1 - 17"))
 
 class WebSearchSpecialTermsQueryTest(unittest.TestCase):
     """Test of the search results for queries containing special terms."""
 
     def test_special_terms_u1(self):
         """websearch - query for special terms, U(1)"""
         self.assertEqual([],
                          test_web_page_content(CFG_SITE_URL + '/search?of=id&p=U%281%29',
                                                expected_text="[57, 79, 80, 88]"))
 
     def test_special_terms_u1_and_sl(self):
         """websearch - query for special terms, U(1) SL(2,Z)"""
         self.assertEqual([],
                          test_web_page_content(CFG_SITE_URL + '/search?of=id&p=U%281%29+SL%282%2CZ%29',
                                                expected_text="[88]"))
 
     def test_special_terms_u1_and_sl_or(self):
         """websearch - query for special terms, U(1) OR SL(2,Z)"""
         self.assertEqual([],
                          test_web_page_content(CFG_SITE_URL + '/search?of=id&p=U%281%29+OR+SL%282%2CZ%29',
                                                expected_text="[57, 79, 80, 88]"))
 
     def test_special_terms_u1_and_sl_or_parens(self):
         """websearch - query for special terms, (U(1) OR SL(2,Z))"""
         self.assertEqual([],
                          test_web_page_content(CFG_SITE_URL + '/search?of=id&p=%28U%281%29+OR+SL%282%2CZ%29%29',
                                                expected_text="[57, 79, 80, 88]"))
 
 class WebSearchJournalQueryTest(unittest.TestCase):
     """Test of the search results for journal pubinfo queries."""
 
     def test_query_journal_title_only(self):
         """websearch - journal publication info query, title only"""
         self.assertEqual([],
                          test_web_page_content(CFG_SITE_URL + '/search?of=id&f=journal&p=Phys.+Lett.+B',
                                                expected_text="[77, 78, 85, 87]"))
 
     def test_query_journal_full_pubinfo(self):
         """websearch - journal publication info query, full reference"""
         self.assertEqual([],
                          test_web_page_content(CFG_SITE_URL + '/search?of=id&f=journal&p=Phys.+Lett.+B+531+%282002%29+301',
                                                expected_text="[78]"))
 
 class WebSearchStemmedIndexQueryTest(unittest.TestCase):
     """Test of the search results for queries using stemmed indexes."""
 
     def test_query_stemmed_lowercase(self):
         """websearch - stemmed index query, lowercase"""
         # note that dasse/Dasse is stemmed into dass/Dass, as expected
         self.assertEqual([],
                          test_web_page_content(CFG_SITE_URL + '/search?of=id&p=dasse',
                                                expected_text="[25, 26]"))
 
     def test_query_stemmed_uppercase(self):
         """websearch - stemmed index query, uppercase"""
         # ... but note also that DASSE is stemmed into DASSE(!); so
         # the test would fail if the search engine would not lower the
         # query term.  (Something that is not necessary for
         # non-stemmed indexes.)
         self.assertEqual([],
                          test_web_page_content(CFG_SITE_URL + '/search?of=id&p=DASSE',
                                                expected_text="[25, 26]"))
 
 class WebSearchSummarizerTest(unittest.TestCase):
     """Test of the search results summarizer functions."""
 
     def test_most_popular_field_values_singletag(self):
         """websearch - most popular field values, simple tag"""
         from invenio.search_engine import get_most_popular_field_values
         self.assertEqual((('PREPRINT', 37), ('ARTICLE', 28), ('BOOK', 14), ('THESIS', 8), ('PICTURE', 7), ('POETRY', 2), ('REPORT', 2),  ('ATLANTISTIMESNEWS', 1)),
                          get_most_popular_field_values(range(0,100), '980__a'))
 
     def test_most_popular_field_values_singletag_multiexclusion(self):
         """websearch - most popular field values, simple tag, multiple exclusions"""
         from invenio.search_engine import get_most_popular_field_values
         self.assertEqual((('PREPRINT', 37), ('ARTICLE', 28), ('BOOK', 14), ('REPORT', 2), ('ATLANTISTIMESNEWS', 1)),
                          get_most_popular_field_values(range(0,100), '980__a', ('THESIS', 'PICTURE', 'POETRY')))
 
     def test_most_popular_field_values_multitag(self):
         """websearch - most popular field values, multiple tags"""
         from invenio.search_engine import get_most_popular_field_values
         self.assertEqual((('Ellis, J', 3), ('Enqvist, K', 1), ('Ibanez, L E', 1), ('Nanopoulos, D V', 1), ('Ross, G G', 1)),
                          get_most_popular_field_values((9, 14, 18), ('100__a', '700__a')))
 
     def test_most_popular_field_values_multitag_singleexclusion(self):
         """websearch - most popular field values, multiple tags, single exclusion"""
         from invenio.search_engine import get_most_popular_field_values
         self.assertEqual((('Enqvist, K', 1), ('Ibanez, L E', 1), ('Nanopoulos, D V', 1), ('Ross, G G', 1)),
                          get_most_popular_field_values((9, 14, 18), ('100__a', '700__a'), ('Ellis, J')))
 
     def test_most_popular_field_values_multitag_countrepetitive(self):
         """websearch - most popular field values, multiple tags, counting repetitive occurrences"""
         from invenio.search_engine import get_most_popular_field_values
         self.assertEqual((('THESIS', 2), ('REPORT', 1)),
                          get_most_popular_field_values((41,), ('690C_a', '980__a'), count_repetitive_values=True))
         self.assertEqual((('REPORT', 1), ('THESIS', 1)),
                          get_most_popular_field_values((41,), ('690C_a', '980__a'), count_repetitive_values=False))
 
     def test_ellis_citation_summary(self):
         """websearch - query ellis, citation summary output format"""
         self.assertEqual([],
                          test_web_page_content(CFG_SITE_URL + '/search?p=ellis&of=hcs',
                                                expected_text="Less known papers (1-9)",
                                                expected_link_target=CFG_SITE_URL+"/search?p=ellis%20AND%20cited%3A1-%3E9&rm=citation",
                                                expected_link_label='1'))
 
     def test_ellis_not_quark_citation_summary_advanced(self):
         """websearch - ellis and not quark, citation summary format advanced"""
         self.assertEqual([],
                          test_web_page_content(CFG_SITE_URL + '/search?ln=en&as=1&m1=a&p1=ellis&f1=author&op1=n&m2=a&p2=quark&f2=&op2=a&m3=a&p3=&f3=&action_search=Search&sf=&so=a&rm=&rg=10&sc=1&of=hcs',
                                                expected_text="Less known papers (1-9)",
                                                expected_link_target=CFG_SITE_URL+'/search?p=author%3Aellis%20and%20not%20quark%20AND%20cited%3A1-%3E9&rm=citation',
                                                expected_link_label='1'))
 
     def test_ellis_not_quark_citation_summary_regular(self):
         """websearch - ellis and not quark, citation summary format advanced"""
         self.assertEqual([],
                          test_web_page_content(CFG_SITE_URL + '/search?ln=en&p=author%3Aellis+and+not+quark&f=&action_search=Search&sf=&so=d&rm=&rg=10&sc=0&of=hcs',
                                                expected_text="Less known papers (1-9)",
                                                expected_link_target=CFG_SITE_URL+'/search?p=author%3Aellis%20and%20not%20quark%20AND%20cited%3A1-%3E9&rm=citation',
                                                expected_link_label='1'))
 
 
 class WebSearchRecordCollectionGuessTest(unittest.TestCase):
     """Primary collection guessing tests."""
 
     def test_guess_primary_collection_of_a_record(self):
         """websearch - guess_primary_collection_of_a_record"""
         self.assertEqual(guess_primary_collection_of_a_record(96), 'Articles')
 
     def test_guess_collection_of_a_record(self):
         """websearch - guess_collection_of_a_record"""
         self.assertEqual(guess_collection_of_a_record(96), 'Articles')
         self.assertEqual(guess_collection_of_a_record(96, '%s/collection/Theoretical Physics (TH)?ln=en' % CFG_SITE_URL), 'Articles')
         self.assertEqual(guess_collection_of_a_record(12, '%s/collection/Theoretical Physics (TH)?ln=en' % CFG_SITE_URL), 'Theoretical Physics (TH)')
         self.assertEqual(guess_collection_of_a_record(12, '%s/collection/Theoretical%%20Physics%%20%%28TH%%29?ln=en' % CFG_SITE_URL), 'Theoretical Physics (TH)')
 
 class WebSearchGetFieldValuesTest(unittest.TestCase):
     """Testing get_fieldvalues() function."""
 
     def test_get_fieldvalues_001(self):
         """websearch - get_fieldvalues() for bibxxx-agnostic tags"""
         self.assertEqual(get_fieldvalues(10, '001___'), ['10'])
 
     def test_get_fieldvalues_980(self):
         """websearch - get_fieldvalues() for bibxxx-powered tags"""
         self.assertEqual(get_fieldvalues(18, '700__a'), ['Enqvist, K', 'Nanopoulos, D V'])
         self.assertEqual(get_fieldvalues(18, '909C1u'), ['CERN'])
 
     def test_get_fieldvalues_wildcard(self):
         """websearch - get_fieldvalues() for tag wildcards"""
         self.assertEqual(get_fieldvalues(18, '%'), [])
         self.assertEqual(get_fieldvalues(18, '7%'), [])
         self.assertEqual(get_fieldvalues(18, '700%'), ['Enqvist, K', 'Nanopoulos, D V'])
         self.assertEqual(get_fieldvalues(18, '909C0%'), ['1985', '13','TH'])
 
     def test_get_fieldvalues_recIDs(self):
         """websearch - get_fieldvalues() for list of recIDs"""
         self.assertEqual(get_fieldvalues([], '001___'), [])
         self.assertEqual(get_fieldvalues([], '700__a'), [])
         self.assertEqual(get_fieldvalues([10, 13], '001___'), ['10', '13'])
         self.assertEqual(get_fieldvalues([18, 13], '700__a'),
                          ['Dawson, S', 'Ellis, R K', 'Enqvist, K', 'Nanopoulos, D V'])
 
     def test_get_fieldvalues_repetitive(self):
         """websearch - get_fieldvalues() for repetitive values"""
         self.assertEqual(get_fieldvalues([17, 18], '909C1u'),
                          ['CERN', 'CERN'])
         self.assertEqual(get_fieldvalues([17, 18], '909C1u', repetitive_values=True),
                          ['CERN', 'CERN'])
         self.assertEqual(get_fieldvalues([17, 18], '909C1u', repetitive_values=False),
                          ['CERN'])
 
 class WebSearchAddToBasketTest(unittest.TestCase):
     """Test of the add-to-basket presence depending on user rights."""
 
     def test_add_to_basket_guest(self):
         """websearch - add-to-basket facility allowed for guests"""
         self.assertEqual([],
                          test_web_page_content(CFG_SITE_URL + '/search?p=recid%3A10',
                                                expected_text='Add to basket'))
         self.assertEqual([],
                          test_web_page_content(CFG_SITE_URL + '/search?p=recid%3A10',
                                                expected_text='<input name="recid" type="checkbox" value="10" />'))
 
     def test_add_to_basket_jekyll(self):
         """websearch - add-to-basket facility allowed for Dr. Jekyll"""
         self.assertEqual([],
                          test_web_page_content(CFG_SITE_URL + '/search?p=recid%3A10',
                                                expected_text='Add to basket',
                                                username='jekyll',
                                                password='j123ekyll'))
         self.assertEqual([],
                          test_web_page_content(CFG_SITE_URL + '/search?p=recid%3A10',
                                                expected_text='<input name="recid" type="checkbox" value="10" />',
                                                username='jekyll',
                                                password='j123ekyll'))
 
     def test_add_to_basket_hyde(self):
         """websearch - add-to-basket facility denied to Mr. Hyde"""
         self.assertEqual([],
                          test_web_page_content(CFG_SITE_URL + '/search?p=recid%3A10',
                                                unexpected_text='Add to basket',
                                                username='hyde',
                                                password='h123yde'))
         self.assertEqual([],
                          test_web_page_content(CFG_SITE_URL + '/search?p=recid%3A10',
                                                unexpected_text='<input name="recid" type="checkbox" value="10" />',
                                                username='hyde',
                                                password='h123yde'))
 
 class WebSearchAlertTeaserTest(unittest.TestCase):
     """Test of the alert teaser presence depending on user rights."""
 
     def test_alert_teaser_guest(self):
         """websearch - alert teaser allowed for guests"""
         self.assertEqual([],
                          test_web_page_content(CFG_SITE_URL + '/search?p=ellis',
                                                expected_link_label='email alert'))
         self.assertEqual([],
                          test_web_page_content(CFG_SITE_URL + '/search?p=ellis',
                                                expected_text='RSS feed'))
 
     def test_alert_teaser_jekyll(self):
         """websearch - alert teaser allowed for Dr. Jekyll"""
         self.assertEqual([],
                          test_web_page_content(CFG_SITE_URL + '/search?p=ellis',
                                                expected_text='email alert',
                                                username='jekyll',
                                                password='j123ekyll'))
         self.assertEqual([],
                          test_web_page_content(CFG_SITE_URL + '/search?p=ellis',
                                                expected_text='RSS feed',
                                                username='jekyll',
                                                password='j123ekyll'))
 
     def test_alert_teaser_hyde(self):
         """websearch - alert teaser allowed for Mr. Hyde"""
         self.assertEqual([],
                          test_web_page_content(CFG_SITE_URL + '/search?p=ellis',
                                                expected_text='email alert',
                                                username='hyde',
                                                password='h123yde'))
         self.assertEqual([],
                          test_web_page_content(CFG_SITE_URL + '/search?p=ellis',
                                                expected_text='RSS feed',
                                                username='hyde',
                                                password='h123yde'))
 
 
 class WebSearchSpanQueryTest(unittest.TestCase):
     """Test of span queries."""
 
     def test_span_in_word_index(self):
         """websearch - span query in a word index"""
         self.assertEqual([],
                          test_web_page_content(CFG_SITE_URL + '/search?p=year%3A1992-%3E1996&of=id&ap=0',
                                                expected_text='[17, 66, 69, 71]'))
 
     def test_span_in_phrase_index(self):
         """websearch - span query in a phrase index"""
         self.assertEqual([],
                          test_web_page_content(CFG_SITE_URL + '/search?p=year%3A%221992%22-%3E%221996%22&of=id&ap=0',
                                                expected_text='[17, 66, 69, 71]'))
 
     def test_span_in_bibxxx(self):
         """websearch - span query in MARC tables"""
         self.assertEqual([],
                          test_web_page_content(CFG_SITE_URL + '/search?p=909C0y%3A%221992%22-%3E%221996%22&of=id&ap=0',
                                                expected_text='[17, 66, 69, 71]'))
 
     def test_span_with_spaces(self):
         """websearch - no span query when a space is around"""
         # useful for reaction search
         self.assertEqual([],
                          test_web_page_content(CFG_SITE_URL + '/search?p=title%3A%27mu%20--%3E%20e%27&of=id&ap=0',
                                                expected_text='[67]'))
         self.assertEqual([],
                          test_web_page_content(CFG_SITE_URL + '/search?p=245%3A%27mu%20--%3E%20e%27&of=id&ap=0',
                                                expected_text='[67]'))
 
     def test_span_in_author(self):
         """websearch - span query in special author index"""
         self.assertEqual([],
                          test_web_page_content(CFG_SITE_URL + '/search?p=author%3A%22Ellis,%20K%22-%3E%22Ellis,%20RZ%22&of=id&ap=0',
                                                expected_text='[8, 11, 13, 17, 47]'))
 
 
 class WebSearchReferstoCitedbyTest(unittest.TestCase):
     """Test of refersto/citedby search operators."""
 
     def test_refersto_recid(self):
         'websearch - refersto:recid:84'
         self.assertEqual([],
                          test_web_page_content(CFG_SITE_URL + '/search?p=refersto%3Arecid%3A84&of=id&ap=0',
                                                expected_text='[85, 88, 91]'))
 
     def test_refersto_repno(self):
         'websearch - refersto:reportnumber:hep-th/0205061'
         self.assertEqual([],
                          test_web_page_content(CFG_SITE_URL + '/search?p=refersto%3Areportnumber%3Ahep-th/0205061&of=id&ap=0',
                                                expected_text='[91]'))
 
     def test_refersto_author_word(self):
         'websearch - refersto:author:klebanov'
         self.assertEqual([],
                          test_web_page_content(CFG_SITE_URL + '/search?p=refersto%3Aauthor%3Aklebanov&of=id&ap=0',
                                                expected_text='[85, 86, 88, 91]'))
 
     def test_refersto_author_phrase(self):
         'websearch - refersto:author:"Klebanov, I"'
         self.assertEqual([],
                          test_web_page_content(CFG_SITE_URL + '/search?p=refersto%3Aauthor%3A%22Klebanov,%20I%22&of=id&ap=0',
                                                expected_text='[85, 86, 88, 91]'))
 
     def test_citedby_recid(self):
         'websearch - citedby:recid:92'
         self.assertEqual([],
                          test_web_page_content(CFG_SITE_URL + '/search?p=citedby%3Arecid%3A92&of=id&ap=0',
                                                expected_text='[74, 91]'))
 
     def test_citedby_repno(self):
         'websearch - citedby:reportnumber:hep-th/0205061'
         self.assertEqual([],
                          test_web_page_content(CFG_SITE_URL + '/search?p=citedby%3Areportnumber%3Ahep-th/0205061&of=id&ap=0',
                                                expected_text='[78]'))
 
     def test_citedby_author_word(self):
         'websearch - citedby:author:klebanov'
         self.assertEqual([],
                          test_web_page_content(CFG_SITE_URL + '/search?p=citedby%3Aauthor%3Aklebanov&of=id&ap=0',
                                                expected_text='[95]'))
 
     def test_citedby_author_phrase(self):
         'websearch - citedby:author:"Klebanov, I"'
         self.assertEqual([],
                          test_web_page_content(CFG_SITE_URL + '/search?p=citedby%3Aauthor%3A%22Klebanov,%20I%22&of=id&ap=0',
                                                expected_text='[95]'))
 
     def test_refersto_bad_query(self):
         'websearch - refersto:title:'
         self.assertEqual([],
                          test_web_page_content(CFG_SITE_URL + '/search?p=refersto%3Atitle%3A',
                                                expected_text='There are no records referring to title:.'))
 
     def test_citedby_bad_query(self):
         'websearch - citedby:title:'
         self.assertEqual([],
                          test_web_page_content(CFG_SITE_URL + '/search?p=citedby%3Atitle%3A',
                                                expected_text='There are no records cited by title:.'))
 
 
 class WebSearchSPIRESSyntaxTest(unittest.TestCase):
     """Test of SPIRES syntax issues"""
 
     def test_and_not_parens(self):
         'websearch - find a ellis, j and not a enqvist'
         self.assertEqual([],
                          test_web_page_content(CFG_SITE_URL +'/search?p=find+a+ellis%2C+j+and+not+a+enqvist&of=id&ap=0',
                                                expected_text='[9, 12, 14, 47]'))
 
     def test_dadd_search(self):
         'websearch - find da > today - 3650'
         # XXX: assumes we've reinstalled our site in the last 10 years
         # should return every document in the system
         self.assertEqual([],
                          test_web_page_content(CFG_SITE_URL +'/search?ln=en&p=find+da+%3E+today+-+3650&f=&of=id',
                                                expected_text='[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104]'))
 
 
 class WebSearchDateQueryTest(unittest.TestCase):
     """Test various date queries."""
 
     def setUp(self):
         """Establish variables we plan to re-use"""
-        from invenio.intbitset import intbitset as HitSet
-        self.empty = HitSet()
+        from invenio.intbitset import intbitset
+        self.empty = intbitset()
 
     def test_search_unit_hits_for_datecreated_previous_millenia(self):
         """websearch - search_unit with datecreated returns >0 hits for docs in the last 1000 years"""
         self.assertNotEqual(self.empty, search_unit('1000-01-01->9999-12-31', 'datecreated'))
 
     def test_search_unit_hits_for_datemodified_previous_millenia(self):
         """websearch - search_unit with datemodified returns >0 hits for docs in the last 1000 years"""
         self.assertNotEqual(self.empty, search_unit('1000-01-01->9999-12-31', 'datemodified'))
 
     def test_search_unit_in_bibrec_for_datecreated_previous_millenia(self):
         """websearch - search_unit_in_bibrec with creationdate gets >0 hits for past 1000 years"""
         self.assertNotEqual(self.empty, search_unit_in_bibrec("1000-01-01", "9999-12-31", 'creationdate'))
 
     def test_search_unit_in_bibrec_for_datecreated_next_millenia(self):
         """websearch - search_unit_in_bibrec with creationdate gets 0 hits for after year 3000"""
         self.assertEqual(self.empty, search_unit_in_bibrec("3000-01-01", "9999-12-31", 'creationdate'))
 
 
 class WebSearchSynonymQueryTest(unittest.TestCase):
     """Test of queries using synonyms."""
 
     def test_journal_phrvd(self):
         """websearch - search-time synonym search, journal title"""
         self.assertEqual([],
                          test_web_page_content(CFG_SITE_URL + '/search?p=PHRVD&f=journal&of=id',
                                                expected_text="[66, 72]"))
 
     def test_journal_phrvd_54_1996_4234(self):
         """websearch - search-time synonym search, journal article"""
         self.assertEqual([],
                          test_web_page_content(CFG_SITE_URL + '/search?p=PHRVD%2054%20%281996%29%204234&f=journal&of=id',
                                                expected_text="[66]"))
 
     def test_journal_beta_decay_title(self):
         """websearch - index-time synonym search, beta decay in title"""
         self.assertEqual([],
                          test_web_page_content(CFG_SITE_URL + '/search?p=beta+decay&f=title&of=id',
                                                expected_text="[59]"))
         self.assertEqual([],
                          test_web_page_content(CFG_SITE_URL + '/search?p=%CE%B2+decay&f=title&of=id',
                                                expected_text="[59]"))
 
     def test_journal_beta_decay_global(self):
         """websearch - index-time synonym search, beta decay in any field"""
         self.assertEqual([],
                          test_web_page_content(CFG_SITE_URL + '/search?p=beta+decay&of=id',
                                                expected_text="[52, 59]"))
         self.assertEqual([],
                          test_web_page_content(CFG_SITE_URL + '/search?p=%CE%B2+decay&of=id',
                                                expected_text="[52, 59]"))
 
     def test_journal_beta_title(self):
         """websearch - index-time synonym search, beta in title"""
         self.assertEqual([],
                          test_web_page_content(CFG_SITE_URL + '/search?p=beta&f=title&of=id',
                                                expected_text="[59]"))
         self.assertEqual([],
                          test_web_page_content(CFG_SITE_URL + '/search?p=%CE%B2&f=title&of=id',
                                                expected_text="[59]"))
 
     def test_journal_beta_global(self):
         """websearch - index-time synonym search, beta in any field"""
         self.assertEqual([],
                          test_web_page_content(CFG_SITE_URL + '/search?p=beta&of=id',
                                                expected_text="[52, 59]"))
         self.assertEqual([],
                          test_web_page_content(CFG_SITE_URL + '/search?p=%CE%B2&of=id',
                                                expected_text="[52, 59]"))
 
 class WebSearchWashCollectionsTest(unittest.TestCase):
     """Test if the collection argument is washed correctly"""
 
     def test_wash_coll_when_coll_restricted(self):
         """websearch - washing of restricted daughter collections"""
         self.assertEqual(
             sorted(wash_colls(cc='', c=['Books & Reports', 'Theses'])[1]),
             ['Books & Reports', 'Theses'])
         self.assertEqual(
             sorted(wash_colls(cc='', c=['Books & Reports', 'Theses'])[2]),
             ['Books & Reports', 'Theses'])
 
 TEST_SUITE = make_test_suite(WebSearchWebPagesAvailabilityTest,
                              WebSearchTestSearch,
                              WebSearchTestBrowse,
                              WebSearchTestOpenURL,
                              WebSearchTestCollections,
                              WebSearchTestRecord,
                              WebSearchTestLegacyURLs,
                              WebSearchNearestTermsTest,
                              WebSearchBooleanQueryTest,
                              WebSearchAuthorQueryTest,
                              WebSearchSearchEnginePythonAPITest,
                              WebSearchSearchEngineWebAPITest,
                              WebSearchRestrictedCollectionTest,
                              WebSearchRestrictedPicturesTest,
                              WebSearchRSSFeedServiceTest,
                              WebSearchXSSVulnerabilityTest,
                              WebSearchResultsOverview,
                              WebSearchSortResultsTest,
                              WebSearchSearchResultsXML,
                              WebSearchUnicodeQueryTest,
                              WebSearchMARCQueryTest,
                              WebSearchExtSysnoQueryTest,
                              WebSearchResultsRecordGroupingTest,
                              WebSearchSpecialTermsQueryTest,
                              WebSearchJournalQueryTest,
                              WebSearchStemmedIndexQueryTest,
                              WebSearchSummarizerTest,
                              WebSearchRecordCollectionGuessTest,
                              WebSearchGetFieldValuesTest,
                              WebSearchAddToBasketTest,
                              WebSearchAlertTeaserTest,
                              WebSearchSpanQueryTest,
                              WebSearchReferstoCitedbyTest,
                              WebSearchSPIRESSyntaxTest,
                              WebSearchDateQueryTest,
                              WebSearchTestWildcardLimit,
                              WebSearchSynonymQueryTest,
                              WebSearchWashCollectionsTest)
 
 if __name__ == "__main__":
     run_test_suite(TEST_SUITE, warn_user=True)
diff --git a/modules/websearch/lib/websearch_webcoll.py b/modules/websearch/lib/websearch_webcoll.py
index dd0d22910..5b42c16c3 100644
--- a/modules/websearch/lib/websearch_webcoll.py
+++ b/modules/websearch/lib/websearch_webcoll.py
@@ -1,1076 +1,1076 @@
 ## This file is part of Invenio.
 ## Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011 CERN.
 ##
 ## Invenio is free software; you can redistribute it and/or
 ## modify it under the terms of the GNU General Public License as
 ## published by the Free Software Foundation; either version 2 of the
 ## License, or (at your option) any later version.
 ##
 ## Invenio is distributed in the hope that it will be useful, but
 ## WITHOUT ANY WARRANTY; without even the implied warranty of
 ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 ## General Public License for more details.
 ##
 ## You should have received a copy of the GNU General Public License
 ## along with Invenio; if not, write to the Free Software Foundation, Inc.,
 ## 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA.
 
 """Create Invenio collection cache."""
 
 __revision__ = "$Id$"
 
 import calendar
 import copy
 import sys
 import cgi
 import re
 import os
 import string
 import time
 
 from invenio.config import \
      CFG_CERN_SITE, \
      CFG_WEBSEARCH_INSTANT_BROWSE, \
      CFG_WEBSEARCH_NARROW_SEARCH_SHOW_GRANDSONS, \
      CFG_WEBSEARCH_I18N_LATEST_ADDITIONS, \
      CFG_CACHEDIR, \
      CFG_SITE_LANG, \
      CFG_SITE_NAME, \
      CFG_SITE_LANGS, \
      CFG_WEBSEARCH_ENABLED_SEARCH_INTERFACES, \
      CFG_WEBSEARCH_DEFAULT_SEARCH_INTERFACE
 from invenio.messages import gettext_set_language, language_list_long
-from invenio.search_engine import HitSet, search_pattern_parenthesised, get_creation_date, get_field_i18nname, collection_restricted_p, sort_records
+from invenio.search_engine import search_pattern_parenthesised, get_creation_date, get_field_i18nname, collection_restricted_p, sort_records
 from invenio.dbquery import run_sql, Error, get_table_update_time
 from invenio.bibrank_record_sorter import get_bibrank_methods
 from invenio.dateutils import convert_datestruct_to_dategui
 from invenio.bibformat import format_record
 from invenio.shellutils import mymkdir
 from invenio.intbitset import intbitset
 from invenio.websearch_external_collections import \
      external_collection_load_states, \
      dico_collection_external_searches, \
      external_collection_sort_engine_by_name
 from invenio.bibtask import task_init, task_get_option, task_set_option, \
     write_message, task_has_option, task_update_progress, \
     task_sleep_now_if_required
 import invenio.template
 websearch_templates = invenio.template.load('websearch')
 
 from invenio.websearch_external_collections_searcher import external_collections_dictionary
 from invenio.websearch_external_collections_config import CFG_EXTERNAL_COLLECTION_TIMEOUT
 from invenio.websearch_external_collections_config import CFG_HOSTED_COLLECTION_TIMEOUT_NBRECS
 
 ## global vars
 COLLECTION_HOUSE = {} # will hold collections we treat in this run of the program; a dict of {collname2, collobject1}, ...
 
 # CFG_CACHE_LAST_UPDATED_TIMESTAMP_TOLERANCE -- cache timestamp
 # tolerance (in seconds), to account for the fact that an admin might
 # accidentally happen to edit the collection definitions at exactly
 # the same second when some webcoll process was about to be started.
 # In order to be safe, let's put an exaggerated timestamp tolerance
 # value such as 20 seconds:
 CFG_CACHE_LAST_UPDATED_TIMESTAMP_TOLERANCE = 20
 
 # CFG_CACHE_LAST_UPDATED_TIMESTAMP_FILE -- location of the cache
 # timestamp file:
 CFG_CACHE_LAST_UPDATED_TIMESTAMP_FILE = "%s/collections/last_updated" % CFG_CACHEDIR
 
 # CFG_CACHE_LAST_FAST_UPDATED_TIMESTAMP_FILE -- location of the cache
 # timestamp file usef when running webcoll in the fast-mode.
 CFG_CACHE_LAST_FAST_UPDATED_TIMESTAMP_FILE = "%s/collections/last_fast_updated" % CFG_CACHEDIR
 
 
 def get_collection(colname):
     """Return collection object from the collection house for given colname.
        If does not exist, then create it."""
     if not COLLECTION_HOUSE.has_key(colname):
         colobject = Collection(colname)
         COLLECTION_HOUSE[colname] = colobject
     return COLLECTION_HOUSE[colname]
 
 ## auxiliary functions:
 def is_selected(var, fld):
     "Checks if the two are equal, and if yes, returns ' selected'.  Useful for select boxes."
     if var == fld:
         return ' selected="selected"'
     else:
         return ""
 
 def get_field(recID, tag):
     "Gets list of field 'tag' for the record with 'recID' system number."
 
     out = []
     digit = tag[0:2]
 
     bx = "bib%sx" % digit
     bibx = "bibrec_bib%sx" % digit
     query = "SELECT bx.value FROM %s AS bx, %s AS bibx WHERE bibx.id_bibrec='%s' AND bx.id=bibx.id_bibxxx AND bx.tag='%s'" \
             % (bx, bibx, recID, tag)
     res = run_sql(query)
     for row in res:
         out.append(row[0])
     return out
 
 def check_nbrecs_for_all_external_collections():
     """Check if any of the external collections have changed their total number of records, aka nbrecs.
     Return True if any of the total numbers of records have changed and False if they're all the same."""
     res = run_sql("SELECT name FROM collection WHERE dbquery LIKE 'hostedcollection:%';")
     for row in res:
         coll_name = row[0]
         if (get_collection(coll_name)).check_nbrecs_for_external_collection():
             write_message("External collection %s found updated." % coll_name, verbose=6)
             return True
     write_message("All external collections are up to date.", verbose=6)
     return False
 
 class Collection:
     "Holds the information on collections (id,name,dbquery)."
 
     def __init__(self, name=""):
         "Creates collection instance by querying the DB configuration database about 'name'."
         self.calculate_reclist_run_already = 0 # to speed things up without much refactoring
         self.update_reclist_run_already = 0 # to speed things up without much refactoring
-        self.reclist_with_nonpublic_subcolls = HitSet()
+        self.reclist_with_nonpublic_subcolls = intbitset()
         # used to store the temporary result of the calculation of nbrecs of an external collection
         self.nbrecs_tmp = None
         if not name:
             self.name = CFG_SITE_NAME # by default we are working on the home page
             self.id = 1
             self.dbquery = None
             self.nbrecs = None
-            self.reclist = HitSet()
+            self.reclist = intbitset()
         else:
             self.name = name
             try:
                 res = run_sql("""SELECT id,name,dbquery,nbrecs,reclist FROM collection
                                   WHERE name=%s""", (name,))
                 if res:
                     self.id = res[0][0]
                     self.name = res[0][1]
                     self.dbquery = res[0][2]
                     self.nbrecs = res[0][3]
                     try:
-                        self.reclist = HitSet(res[0][4])
+                        self.reclist = intbitset(res[0][4])
                     except:
-                        self.reclist = HitSet()
+                        self.reclist = intbitset()
                 else: # collection does not exist!
                     self.id = None
                     self.dbquery = None
                     self.nbrecs = None
-                    self.reclist = HitSet()
+                    self.reclist = intbitset()
             except Error, e:
                 print "Error %d: %s" % (e.args[0], e.args[1])
                 sys.exit(1)
 
     def get_example_search_queries(self):
         """Returns list of sample search queries for this collection.
         """
         res = run_sql("""SELECT example.body FROM example
         LEFT JOIN collection_example on example.id=collection_example.id_example
         WHERE collection_example.id_collection=%s ORDER BY collection_example.score""", (self.id,))
         return [query[0] for query in res]
 
     def get_name(self, ln=CFG_SITE_LANG, name_type="ln", prolog="", epilog="", prolog_suffix=" ", epilog_suffix=""):
         """Return nicely formatted collection name for language LN.
         The NAME_TYPE may be 'ln' (=long name), 'sn' (=short name), etc."""
         out = prolog
         i18name = ""
         res = run_sql("SELECT value FROM collectionname WHERE id_collection=%s AND ln=%s AND type=%s", (self.id, ln, name_type))
         try:
             i18name += res[0][0]
         except IndexError:
             pass
         if i18name:
             out += i18name
         else:
             out += self.name
         out += epilog
         return out
 
     def get_ancestors(self):
         "Returns list of ancestors of the current collection."
         ancestors = []
         ancestors_ids = intbitset()
         id_son = self.id
         while 1:
             query = "SELECT cc.id_dad,c.name FROM collection_collection AS cc, collection AS c "\
                     "WHERE cc.id_son=%d AND c.id=cc.id_dad" % int(id_son)
             res = run_sql(query, None, 1)
             if res:
                 col_ancestor = get_collection(res[0][1])
                 # looking for loops
                 if self.id in ancestors_ids:
                     write_message("Loop found in collection %s" % self.name, stream=sys.stderr)
                     raise OverflowError("Loop found in collection %s" % self.name)
                 else:
                     ancestors.append(col_ancestor)
                     ancestors_ids.add(col_ancestor.id)
                     id_son = res[0][0]
             else:
                 break
         ancestors.reverse()
         return ancestors
 
     def restricted_p(self):
         """Predicate to test if the collection is restricted or not.  Return the contect of the
          `restrited' column of the collection table (typically Apache group).  Otherwise return
          None if the collection is public."""
 
         if collection_restricted_p(self.name):
             return 1
         return None
 
     def get_sons(self, type='r'):
         "Returns list of direct sons of type 'type' for the current collection."
         sons = []
         id_dad = self.id
         query = "SELECT cc.id_son,c.name FROM collection_collection AS cc, collection AS c "\
                 "WHERE cc.id_dad=%d AND cc.type='%s' AND c.id=cc.id_son ORDER BY score DESC, c.name ASC" % (int(id_dad), type)
         res = run_sql(query)
         for row in res:
             sons.append(get_collection(row[1]))
         return sons
 
     def get_descendants(self, type='r'):
         "Returns list of all descendants of type 'type' for the current collection."
         descendants = []
         descendant_ids = intbitset()
         id_dad = self.id
         query = "SELECT cc.id_son,c.name FROM collection_collection AS cc, collection AS c "\
                 "WHERE cc.id_dad=%d AND cc.type='%s' AND c.id=cc.id_son ORDER BY score DESC" % (int(id_dad), type)
         res = run_sql(query)
         for row in res:
             col_desc = get_collection(row[1])
             # looking for loops
             if self.id in descendant_ids:
                 write_message("Loop found in collection %s" % self.name, stream=sys.stderr)
                 raise OverflowError("Loop found in collection %s" % self.name)
             else:
                 descendants.append(col_desc)
                 descendant_ids.add(col_desc.id)
                 tmp_descendants = col_desc.get_descendants()
                 for descendant in tmp_descendants:
                     descendant_ids.add(descendant.id)
                 descendants += tmp_descendants
         return descendants
 
     def write_cache_file(self, filename='', filebody=''):
         "Write a file inside collection cache."
         # open file:
         dirname = "%s/collections/%d" % (CFG_CACHEDIR, self.id)
         mymkdir(dirname)
         fullfilename = dirname + "/%s.html" % filename
         try:
             os.umask(022)
             f = open(fullfilename, "w")
         except IOError, v:
             try:
                 (code, message) = v
             except:
                 code = 0
                 message = v
             print "I/O Error: " + str(message) + " (" + str(code) + ")"
             sys.exit(1)
         # print user info:
         write_message("... creating %s" % fullfilename, verbose=6)
         sys.stdout.flush()
         # print page body:
         f.write(filebody)
         # close file:
         f.close()
 
     def update_webpage_cache(self):
         """Create collection page header, navtrail, body (including left and right stripes) and footer, and
            call write_cache_file() afterwards to update the collection webpage cache."""
 
         ## precalculate latest additions for non-aggregate
         ## collections (the info is ln and as independent)
         if self.dbquery and not CFG_WEBSEARCH_I18N_LATEST_ADDITIONS:
             self.create_latest_additions_info()
 
         ## do this for each language:
         for lang, lang_fullname in language_list_long():
 
             # but only if some concrete language was not chosen only:
             if lang in task_get_option("language", [lang]):
 
                 if self.dbquery and CFG_WEBSEARCH_I18N_LATEST_ADDITIONS:
                     self.create_latest_additions_info(ln=lang)
 
                 # load the right message language
                 _ = gettext_set_language(lang)
 
                 ## first, update navtrail:
                 for aas in CFG_WEBSEARCH_ENABLED_SEARCH_INTERFACES:
                     self.write_cache_file("navtrail-as=%s-ln=%s" % (aas, lang),
                                           self.create_navtrail_links(aas, lang))
 
                 ## second, update page body:
                 for aas in CFG_WEBSEARCH_ENABLED_SEARCH_INTERFACES: # do light, simple and advanced search pages:
                     body = websearch_templates.tmpl_webcoll_body(
                         ln=lang, collection=self.name,
                         te_portalbox = self.create_portalbox(lang, 'te'),
                         searchfor = self.create_searchfor(aas, lang),
                         np_portalbox = self.create_portalbox(lang, 'np'),
                         narrowsearch = self.create_narrowsearch(aas, lang, 'r'),
                         focuson = self.create_narrowsearch(aas, lang, "v") + \
                         self.create_external_collections_box(lang),
                         instantbrowse = self.create_instant_browse(aas=aas, ln=lang),
                         ne_portalbox = self.create_portalbox(lang, 'ne')
                         )
                     self.write_cache_file("body-as=%s-ln=%s" % (aas, lang), body)
                 ## third, write portalboxes:
                 self.write_cache_file("portalbox-tp-ln=%s" % lang, self.create_portalbox(lang, "tp"))
                 self.write_cache_file("portalbox-te-ln=%s" % lang, self.create_portalbox(lang, "te"))
                 self.write_cache_file("portalbox-lt-ln=%s" % lang, self.create_portalbox(lang, "lt"))
                 self.write_cache_file("portalbox-rt-ln=%s" % lang, self.create_portalbox(lang, "rt"))
                 ## fourth, write 'last updated' information:
                 self.write_cache_file("last-updated-ln=%s" % lang,
                                       convert_datestruct_to_dategui(time.localtime(),
                                                                     ln=lang))
         return
 
     def create_navtrail_links(self, aas=CFG_WEBSEARCH_DEFAULT_SEARCH_INTERFACE, ln=CFG_SITE_LANG):
         """Creates navigation trail links, i.e. links to collection
         ancestors (except Home collection).  If aas==1, then links to
         Advanced Search interfaces; otherwise Simple Search.
         """
 
         dads = []
         for dad in self.get_ancestors():
             if dad.name != CFG_SITE_NAME: # exclude Home collection
                 dads.append((dad.name, dad.get_name(ln)))
 
         return websearch_templates.tmpl_navtrail_links(
             aas=aas, ln=ln, dads=dads)
 
 
     def create_portalbox(self, lang=CFG_SITE_LANG, position="rt"):
         """Creates portalboxes of language CFG_SITE_LANG of the position POSITION by consulting DB configuration database.
            The position may be: 'lt'='left top', 'rt'='right top', etc."""
         out = ""
         query = "SELECT p.title,p.body FROM portalbox AS p, collection_portalbox AS cp "\
                 " WHERE cp.id_collection=%d AND p.id=cp.id_portalbox AND cp.ln='%s' AND cp.position='%s' "\
                 " ORDER BY cp.score DESC" % (self.id, lang, position)
         res = run_sql(query)
         for row in res:
             title, body = row[0], row[1]
             if title:
                 out += websearch_templates.tmpl_portalbox(title = title,
                                              body = body)
             else:
                 # no title specified, so print body ``as is'' only:
                 out += body
         return out
 
     def create_narrowsearch(self, aas=CFG_WEBSEARCH_DEFAULT_SEARCH_INTERFACE, ln=CFG_SITE_LANG, type="r"):
         """Creates list of collection descendants of type 'type' under title 'title'.
         If aas==1, then links to Advanced Search interfaces; otherwise Simple Search.
         Suitable for 'Narrow search' and 'Focus on' boxes."""
 
         # get list of sons and analyse it
         sons = self.get_sons(type)
 
         if not sons:
             return ''
 
         # get descendents
         descendants = self.get_descendants(type)
 
         grandsons = []
         if CFG_WEBSEARCH_NARROW_SEARCH_SHOW_GRANDSONS:
             # load grandsons for each son
             for son in sons:
                 grandsons.append(son.get_sons())
 
         # return ""
         return websearch_templates.tmpl_narrowsearch(
                  aas = aas,
                  ln = ln,
                  type = type,
                  father = self,
                  has_grandchildren = len(descendants)>len(sons),
                  sons = sons,
                  display_grandsons = CFG_WEBSEARCH_NARROW_SEARCH_SHOW_GRANDSONS,
                  grandsons = grandsons
                )
 
     def create_external_collections_box(self, ln=CFG_SITE_LANG):
         external_collection_load_states()
         if not dico_collection_external_searches.has_key(self.id):
             return ""
 
         engines_list = external_collection_sort_engine_by_name(dico_collection_external_searches[self.id])
 
         return websearch_templates.tmpl_searchalso(ln, engines_list, self.id)
 
     def create_latest_additions_info(self, rg=CFG_WEBSEARCH_INSTANT_BROWSE, ln=CFG_SITE_LANG):
         """
         Create info about latest additions that will be used for
         create_instant_browse() later.
         """
         self.latest_additions_info = []
         if self.nbrecs and self.reclist:
             # firstly, get last 'rg' records:
             recIDs = list(self.reclist)
             of = 'hb'
             # CERN hack begins: tweak latest additions for selected collections:
             if CFG_CERN_SITE:
                 # alter recIDs list for some CERN collections:
                 this_year = time.strftime("%Y", time.localtime())
                 if self.name in ['CERN Yellow Reports','Videos']:
                     last_year = str(int(this_year) - 1)
                     # detect recIDs only from this and past year:
                     recIDs = list(self.reclist & \
                                   search_pattern_parenthesised(p='year:%s or year:%s' % \
                                                  (this_year, last_year)))
                 elif self.name in ['VideosXXX']:
                     # detect recIDs only from this year:
                     recIDs = list(self.reclist & \
                                   search_pattern_parenthesised(p='year:%s' % this_year))
                 elif self.name == 'CMS Physics Analysis Summaries' and \
                          1281585 in self.reclist:
                     # REALLY, REALLY temporary hack
                     recIDs = list(self.reclist)
                     recIDs.remove(1281585)
                 # apply special filters:
                 if self.name in ['Videos']:
                     # select only videos with movies:
                     recIDs = list(intbitset(recIDs) & \
                                   search_pattern_parenthesised(p='collection:"PUBLVIDEOMOVIE"'))
                     of = 'hvp'
                 # sort some CERN collections specially:
                 if self.name in ['Videos',
                                  'Video Clips',
                                  'Video Movies',
                                  'Video News',
                                  'Video Rushes',
                                  'Webcast',
                                  'ATLAS Videos',
                                  'Restricted Video Movies',
                                  'Restricted Video Rushes',
                                  'LHC First Beam Videos',
                                  'CERN openlab Videos']:
                     recIDs = sort_records(None, recIDs, '269__c')
             # CERN hack ends.
 
             total = len(recIDs)
             to_display = min(rg, total)
 
             for idx in range(total-1, total-to_display-1, -1):
                 recid = recIDs[idx]
                 self.latest_additions_info.append({'id': recid,
                                                    'format': format_record(recid, of, ln=ln),
                                                    'date': get_creation_date(recid, fmt="%Y-%m-%d<br />%H:%i")})
         return
 
     def create_instant_browse(self, rg=CFG_WEBSEARCH_INSTANT_BROWSE, aas=CFG_WEBSEARCH_DEFAULT_SEARCH_INTERFACE, ln=CFG_SITE_LANG):
         "Searches database and produces list of last 'rg' records."
 
         if self.restricted_p():
             return websearch_templates.tmpl_box_restricted_content(ln = ln)
 
         if str(self.dbquery).startswith("hostedcollection:"):
             return websearch_templates.tmpl_box_hosted_collection(ln = ln)
 
         if rg == 0:
             # do not show latest additions box
             return ""
 
         # CERN hack: do not display latest additions for some CERN collections:
         if CFG_CERN_SITE and self.name in ['Periodicals', 'Electronic Journals',
                                            'Press Office Photo Selection',
                                            'Press Office Video Selection']:
             return ""
 
         try:
             self.latest_additions_info
             latest_additions_info_p = True
         except:
             latest_additions_info_p = False
 
         if latest_additions_info_p:
             passIDs = []
             for idx in range(0, min(len(self.latest_additions_info), rg)):
                 # CERN hack: display the records in a grid layout, so do not show the related links
                 if CFG_CERN_SITE and self.name in ['Videos']:
                     passIDs.append({'id': self.latest_additions_info[idx]['id'],
                                     'body': self.latest_additions_info[idx]['format'],
                                     'date': self.latest_additions_info[idx]['date']})
                 else:
                     passIDs.append({'id': self.latest_additions_info[idx]['id'],
                                     'body': self.latest_additions_info[idx]['format'] + \
                                      websearch_templates.tmpl_record_links(recid=self.latest_additions_info[idx]['id'],
                                                                               rm='citation',
                                                                               ln=ln),
                                     'date': self.latest_additions_info[idx]['date']})
 
             if self.nbrecs > rg:
                 url = websearch_templates.build_search_url(
                     cc=self.name, jrec=rg+1, ln=ln, aas=aas)
             else:
                 url = ""
             # CERN hack: display the records in a grid layout
             if CFG_CERN_SITE and self.name in ['Videos']:
                 return websearch_templates.tmpl_instant_browse(
                     aas=aas, ln=ln, recids=passIDs, more_link=url, grid_layout=True)
       
             return websearch_templates.tmpl_instant_browse(
                 aas=aas, ln=ln, recids=passIDs, more_link=url)
 
         return websearch_templates.tmpl_box_no_records(ln=ln)
 
     def create_searchoptions(self):
         "Produces 'Search options' portal box."
         box = ""
         query = """SELECT DISTINCT(cff.id_field),f.code,f.name FROM collection_field_fieldvalue AS cff, field AS f
                    WHERE cff.id_collection=%d AND cff.id_fieldvalue IS NOT NULL AND cff.id_field=f.id
                    ORDER BY cff.score DESC""" % self.id
         res = run_sql(query)
         if res:
             for row in res:
                 field_id = row[0]
                 field_code = row[1]
                 field_name = row[2]
                 query_bis = """SELECT fv.value,fv.name FROM fieldvalue AS fv, collection_field_fieldvalue AS cff
                                WHERE cff.id_collection=%d AND cff.type='seo' AND cff.id_field=%d AND fv.id=cff.id_fieldvalue
                                ORDER BY cff.score_fieldvalue DESC, cff.score DESC, fv.name ASC""" % (self.id, field_id)
                 res_bis = run_sql(query_bis)
                 if res_bis:
                     values = [{'value' : '', 'text' : 'any' + ' ' + field_name}] # FIXME: internationalisation of "any"
                     for row_bis in res_bis:
                         values.append({'value' : cgi.escape(row_bis[0], 1), 'text' : row_bis[1]})
 
                     box += websearch_templates.tmpl_select(
                                  fieldname = field_code,
                                  values = values
                                 )
         return box
 
     def create_sortoptions(self, ln=CFG_SITE_LANG):
         """Produces 'Sort options' portal box."""
 
 
         # load the right message language
         _ = gettext_set_language(ln)
 
         box = ""
         query = """SELECT f.code,f.name FROM field AS f, collection_field_fieldvalue AS cff
                    WHERE id_collection=%d AND cff.type='soo' AND cff.id_field=f.id
                    ORDER BY cff.score DESC, f.name ASC""" % self.id
         values = [{'value' : '', 'text': "- %s -" % _("latest first")}]
         res = run_sql(query)
         if res:
             for row in res:
                 values.append({'value' : row[0], 'text': row[1]})
         else:
             for tmp in ('title', 'author', 'report number', 'year'):
                 values.append({'value' : tmp.replace(' ', ''), 'text' : get_field_i18nname(tmp, ln)})
 
         box = websearch_templates.tmpl_select(
                    fieldname = 'sf',
                    css_class = 'address',
                    values = values
                   )
         box += websearch_templates.tmpl_select(
                     fieldname = 'so',
                     css_class = 'address',
                     values = [
                               {'value' : 'a' , 'text' : _("asc.")},
                               {'value' : 'd' , 'text' : _("desc.")}
                              ]
                    )
         return box
 
     def create_rankoptions(self, ln=CFG_SITE_LANG):
         "Produces 'Rank options' portal box."
 
         # load the right message language
         _ = gettext_set_language(ln)
 
         values = [{'value' : '', 'text': "- %s %s -" % (string.lower(_("OR")), _("rank by"))}]
         for (code, name) in get_bibrank_methods(self.id, ln):
             values.append({'value' : code, 'text': name})
         box = websearch_templates.tmpl_select(
                    fieldname = 'rm',
                    css_class = 'address',
                    values = values
                   )
         return box
 
     def create_displayoptions(self, ln=CFG_SITE_LANG):
         "Produces 'Display options' portal box."
 
         # load the right message language
         _ = gettext_set_language(ln)
 
         values = []
         for i in ['10', '25', '50', '100', '250', '500']:
             values.append({'value' : i, 'text' : i + ' ' + _("results")})
 
         box = websearch_templates.tmpl_select(
                    fieldname = 'rg',
                    css_class = 'address',
                    values = values
                   )
 
         if self.get_sons():
             box += websearch_templates.tmpl_select(
                         fieldname = 'sc',
                         css_class = 'address',
                         values = [
                                   {'value' : '1' , 'text' : _("split by collection")},
                                   {'value' : '0' , 'text' : _("single list")}
                                  ]
                        )
         return box
 
     def create_formatoptions(self, ln=CFG_SITE_LANG):
         "Produces 'Output format options' portal box."
 
         # load the right message language
         _ = gettext_set_language(ln)
 
         box = ""
         values = []
         query = """SELECT f.code,f.name FROM format AS f, collection_format AS cf
                    WHERE cf.id_collection=%d AND cf.id_format=f.id AND f.visibility='1'
                    ORDER BY cf.score DESC, f.name ASC"""  % self.id
         res = run_sql(query)
         if res:
             for row in res:
                 values.append({'value' : row[0], 'text': row[1]})
         else:
             values.append({'value' : 'hb', 'text' : "HTML %s" % _("brief")})
         box = websearch_templates.tmpl_select(
                    fieldname = 'of',
                    css_class = 'address',
                    values = values
                   )
         return box
 
     def create_searchwithin_selection_box(self, fieldname='f', value='', ln='en'):
         """Produces 'search within' selection box for the current collection."""
 
 
         # get values
         query = """SELECT f.code,f.name FROM field AS f, collection_field_fieldvalue AS cff
                    WHERE cff.type='sew' AND cff.id_collection=%d AND cff.id_field=f.id
                    ORDER BY cff.score DESC, f.name ASC"""  % self.id
         res = run_sql(query)
         values = [{'value' : '', 'text' : get_field_i18nname("any field", ln)}]
         if res:
             for row in res:
                 values.append({'value' : row[0], 'text' : get_field_i18nname(row[1], ln)})
         else:
             if CFG_CERN_SITE:
                 for tmp in ['title', 'author', 'abstract', 'report number', 'year']:
                     values.append({'value' : tmp.replace(' ', ''), 'text' : get_field_i18nname(tmp, ln)})
             else:
                 for tmp in ['title', 'author', 'abstract', 'keyword', 'report number', 'journal', 'year', 'fulltext', 'reference']:
                     values.append({'value' : tmp.replace(' ', ''), 'text' : get_field_i18nname(tmp, ln)})
 
         return websearch_templates.tmpl_searchwithin_select(
                                                 fieldname = fieldname,
                                                 ln = ln,
                                                 selected = value,
                                                 values = values
                                               )
     def create_searchexample(self):
         "Produces search example(s) for the current collection."
         out = "$collSearchExamples = getSearchExample(%d, $se);" % self.id
         return out
 
     def create_searchfor(self, aas=CFG_WEBSEARCH_DEFAULT_SEARCH_INTERFACE, ln=CFG_SITE_LANG):
         "Produces either Simple or Advanced 'Search for' box for the current collection."
         if aas == 1:
             return self.create_searchfor_advanced(ln)
         elif aas == 0:
             return self.create_searchfor_simple(ln)
         else:
             return self.create_searchfor_light(ln)
 
     def create_searchfor_light(self, ln=CFG_SITE_LANG):
         "Produces light 'Search for' box for the current collection."
 
         return websearch_templates.tmpl_searchfor_light(
           ln=ln,
           collection_id = self.name,
           collection_name=self.get_name(ln=ln),
           record_count=self.nbrecs,
           example_search_queries=self.get_example_search_queries(),
         )
 
     def create_searchfor_simple(self, ln=CFG_SITE_LANG):
         "Produces simple 'Search for' box for the current collection."
 
         return websearch_templates.tmpl_searchfor_simple(
           ln=ln,
           collection_id = self.name,
           collection_name=self.get_name(ln=ln),
           record_count=self.nbrecs,
           middle_option = self.create_searchwithin_selection_box(ln=ln),
         )
 
     def create_searchfor_advanced(self, ln=CFG_SITE_LANG):
         "Produces advanced 'Search for' box for the current collection."
 
         return websearch_templates.tmpl_searchfor_advanced(
           ln = ln,
           collection_id = self.name,
           collection_name=self.get_name(ln=ln),
           record_count=self.nbrecs,
 
           middle_option_1 = self.create_searchwithin_selection_box('f1', ln=ln),
           middle_option_2 = self.create_searchwithin_selection_box('f2', ln=ln),
           middle_option_3 = self.create_searchwithin_selection_box('f3', ln=ln),
 
           searchoptions = self.create_searchoptions(),
           sortoptions = self.create_sortoptions(ln),
           rankoptions = self.create_rankoptions(ln),
           displayoptions = self.create_displayoptions(ln),
           formatoptions = self.create_formatoptions(ln)
         )
 
     def calculate_reclist(self):
         """Calculate, set and return the (reclist, reclist_with_nonpublic_subcolls) tuple for given collection."""
         if self.calculate_reclist_run_already or str(self.dbquery).startswith("hostedcollection:"):
             # do we have to recalculate?
             return (self.reclist, self.reclist_with_nonpublic_subcolls)
         write_message("... calculating reclist of %s" % self.name, verbose=6)
-        reclist = HitSet() # will hold results for public sons only; good for storing into DB
-        reclist_with_nonpublic_subcolls = HitSet() # will hold results for both public and nonpublic sons; good for deducing total
+        reclist = intbitset() # will hold results for public sons only; good for storing into DB
+        reclist_with_nonpublic_subcolls = intbitset() # will hold results for both public and nonpublic sons; good for deducing total
                                                    # number of documents
         if not self.dbquery:
             # A - collection does not have dbquery, so query recursively all its sons
             #     that are either non-restricted or that have the same restriction rules
             for coll in self.get_sons():
                 coll_reclist, coll_reclist_with_nonpublic_subcolls = coll.calculate_reclist()
                 if ((coll.restricted_p() is None) or
                     (coll.restricted_p() == self.restricted_p())):
                     # add this reclist ``for real'' only if it is public
                     reclist.union_update(coll_reclist)
                 reclist_with_nonpublic_subcolls.union_update(coll_reclist_with_nonpublic_subcolls)
         else:
             # B - collection does have dbquery, so compute it:
             #     (note: explicitly remove DELETED records)
             if CFG_CERN_SITE:
                 reclist = search_pattern_parenthesised(None, self.dbquery + \
                                          ' -980__:"DELETED" -980__:"DUMMY"')
             else:
                 reclist = search_pattern_parenthesised(None, self.dbquery + ' -980__:"DELETED"')
             reclist_with_nonpublic_subcolls = copy.deepcopy(reclist)
         # store the results:
         self.nbrecs = len(reclist_with_nonpublic_subcolls)
         self.reclist = reclist
         self.reclist_with_nonpublic_subcolls = reclist_with_nonpublic_subcolls
         # last but not least, update the speed-up flag:
         self.calculate_reclist_run_already = 1
         # return the two sets:
         return (self.reclist, self.reclist_with_nonpublic_subcolls)
 
     def calculate_nbrecs_for_external_collection(self, timeout=CFG_EXTERNAL_COLLECTION_TIMEOUT):
         """Calculate the total number of records, aka nbrecs, for given external collection."""
         #if self.calculate_reclist_run_already:
             # do we have to recalculate?
             #return self.nbrecs
         #write_message("... calculating nbrecs of external collection %s" % self.name, verbose=6)
         if external_collections_dictionary.has_key(self.name):
             engine = external_collections_dictionary[self.name]
             if engine.parser:
                 self.nbrecs_tmp = engine.parser.parse_nbrecs(timeout)
                 if self.nbrecs_tmp >= 0: return self.nbrecs_tmp
                 # the parse_nbrecs() function returns negative values for some specific cases
                 # maybe we can handle these specific cases, some warnings or something
                 # for now the total number of records remains silently the same
                 else: return self.nbrecs
             else: write_message("External collection %s does not have a parser!" % self.name, verbose=6)
         else: write_message("External collection %s not found!" % self.name, verbose=6)
         return 0
         # last but not least, update the speed-up flag:
         #self.calculate_reclist_run_already = 1
 
     def check_nbrecs_for_external_collection(self):
         """Check if the external collections has changed its total number of records, aka nbrecs.
         Rerurns True if the total number of records has changed and False if it's the same"""
 
         write_message("*** self.nbrecs = %s / self.cal...ion = %s ***" % (str(self.nbrecs), str(self.calculate_nbrecs_for_external_collection())), verbose=6)
         write_message("*** self.nbrecs != self.cal...ion = %s ***" % (str(self.nbrecs != self.calculate_nbrecs_for_external_collection()),), verbose=6)
         return self.nbrecs != self.calculate_nbrecs_for_external_collection(CFG_HOSTED_COLLECTION_TIMEOUT_NBRECS)
 
     def set_nbrecs_for_external_collection(self):
         """Set this external collection's total number of records, aka nbrecs"""
 
         if self.calculate_reclist_run_already:
             # do we have to recalculate?
             return
         write_message("... calculating nbrecs of external collection %s" % self.name, verbose=6)
         if self.nbrecs_tmp:
             self.nbrecs = self.nbrecs_tmp
         else:
             self.nbrecs = self.calculate_nbrecs_for_external_collection(CFG_HOSTED_COLLECTION_TIMEOUT_NBRECS)
         # last but not least, update the speed-up flag:
         self.calculate_reclist_run_already = 1
 
     def update_reclist(self):
         "Update the record universe for given collection; nbrecs, reclist of the collection table."
         if self.update_reclist_run_already:
             # do we have to reupdate?
             return 0
         write_message("... updating reclist of %s (%s recs)" % (self.name, self.nbrecs), verbose=6)
         sys.stdout.flush()
         try:
             run_sql("UPDATE collection SET nbrecs=%s, reclist=%s WHERE id=%s",
                     (self.nbrecs, self.reclist.fastdump(), self.id))
             self.reclist_updated_since_start = 1
         except Error, e:
             print "Database Query Error %d: %s." % (e.args[0], e.args[1])
             sys.exit(1)
         # last but not least, update the speed-up flag:
         self.update_reclist_run_already = 1
         return 0
 
 def get_datetime(var, format_string="%Y-%m-%d %H:%M:%S"):
     """Returns a date string according to the format string.
        It can handle normal date strings and shifts with respect
        to now."""
     date = time.time()
     shift_re = re.compile("([-\+]{0,1})([\d]+)([dhms])")
     factors = {"d":24*3600, "h":3600, "m":60, "s":1}
     m = shift_re.match(var)
     if m:
         sign = m.groups()[0] == "-" and -1 or 1
         factor = factors[m.groups()[2]]
         value = float(m.groups()[1])
         date = time.localtime(date + sign * factor * value)
         date = time.strftime(format_string, date)
     else:
         date = time.strptime(var, format_string)
         date = time.strftime(format_string, date)
     return date
 
 def get_current_time_timestamp():
     """Return timestamp corresponding to the current time."""
     return time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())
 
 def compare_timestamps_with_tolerance(timestamp1,
                                       timestamp2,
                                       tolerance=0):
     """Compare two timestamps TIMESTAMP1 and TIMESTAMP2, of the form
        '2005-03-31 17:37:26'. Optionally receives a TOLERANCE argument
        (in seconds).  Return -1 if TIMESTAMP1 is less than TIMESTAMP2
        minus TOLERANCE, 0 if they are equal within TOLERANCE limit,
        and 1 if TIMESTAMP1 is greater than TIMESTAMP2 plus TOLERANCE.
     """
     # remove any trailing .00 in timestamps:
     timestamp1 = re.sub(r'\.[0-9]+$', '', timestamp1)
     timestamp2 = re.sub(r'\.[0-9]+$', '', timestamp2)
     # first convert timestamps to Unix epoch seconds:
     timestamp1_seconds = calendar.timegm(time.strptime(timestamp1, "%Y-%m-%d %H:%M:%S"))
     timestamp2_seconds = calendar.timegm(time.strptime(timestamp2, "%Y-%m-%d %H:%M:%S"))
     # now compare them:
     if timestamp1_seconds < timestamp2_seconds - tolerance:
         return -1
     elif timestamp1_seconds > timestamp2_seconds + tolerance:
         return 1
     else:
         return 0
 
 def get_database_last_updated_timestamp():
     """Return last updated timestamp for collection-related and
        record-related database tables.
     """
     database_tables_timestamps = []
     database_tables_timestamps.append(get_table_update_time('bibrec'))
     database_tables_timestamps.append(get_table_update_time('bibfmt'))
     try:
         database_tables_timestamps.append(get_table_update_time('idxWORD%'))
     except ValueError:
         # There are no indexes in the database. That's OK.
         pass
     database_tables_timestamps.append(get_table_update_time('collection%'))
     database_tables_timestamps.append(get_table_update_time('portalbox'))
     database_tables_timestamps.append(get_table_update_time('field%'))
     database_tables_timestamps.append(get_table_update_time('format%'))
     database_tables_timestamps.append(get_table_update_time('rnkMETHODNAME'))
     return max(database_tables_timestamps)
 
 def get_cache_last_updated_timestamp():
     """Return last updated cache timestamp."""
     try:
         f = open(CFG_CACHE_LAST_UPDATED_TIMESTAMP_FILE, "r")
     except:
         return "1970-01-01 00:00:00"
     timestamp = f.read()
     f.close()
     return timestamp
 
 def set_cache_last_updated_timestamp(timestamp):
     """Set last updated cache timestamp to TIMESTAMP."""
     try:
         f = open(CFG_CACHE_LAST_UPDATED_TIMESTAMP_FILE, "w")
     except:
         pass
     f.write(timestamp)
     f.close()
     return timestamp
 
 def main():
     """Main that construct all the bibtask."""
     task_init(authorization_action="runwebcoll",
             authorization_msg="WebColl Task Submission",
             description="""Description:
     webcoll updates the collection cache (record universe for a
     given collection plus web page elements) based on invenio.conf and DB
     configuration parameters. If the collection name is passed as an argument,
     only this collection's cache will be updated. If the recursive option is
     set as well, the collection's descendants will also be updated.\n""",
             help_specific_usage="  -c, --collection\t Update cache for the given "
                      "collection only. [all]\n"
                     "  -r, --recursive\t Update cache for the given collection and all its\n"
                     "\t\t\t descendants (to be used in combination with -c). [no]\n"
                     "  -f, --force\t\t Force update even if cache is up to date. [no]\n"
                     "  -p, --part\t\t Update only certain cache parts (1=reclist,"
                     " 2=webpage). [both]\n"
                     "  -l, --language\t Update pages in only certain language"
                     " (e.g. fr,it,...). [all]\n",
             version=__revision__,
             specific_params=("c:rfp:l:", [
                     "collection=",
                     "recursive",
                     "force",
                     "part=",
                     "language="
                 ]),
             task_submit_elaborate_specific_parameter_fnc=task_submit_elaborate_specific_parameter,
             task_submit_check_options_fnc=task_submit_check_options,
             task_run_fnc=task_run_core)
 
 def task_submit_elaborate_specific_parameter(key, value, opts, args):
     """ Given the string key it checks it's meaning, eventually using the value.
     Usually it fills some key in the options dict.
     It must return True if it has elaborated the key, False, if it doesn't
     know that key.
     eg:
     if key in ['-n', '--number']:
         self.options['number'] = value
         return True
     return False
     """
     if key in ("-c", "--collection"):
         task_set_option("collection", value)
     elif key in ("-r", "--recursive"):
         task_set_option("recursive", 1)
     elif key in ("-f", "--force"):
         task_set_option("force", 1)
     elif key in ("-p", "--part"):
         task_set_option("part", int(value))
     elif key in ("-l", "--language"):
         languages = task_get_option("language", [])
         languages += value.split(',')
         for ln in languages:
             if ln not in CFG_SITE_LANGS:
                 print 'ERROR: "%s" is not a recognized language code' % ln
                 return False
         task_set_option("language", languages)
     else:
         return False
     return True
 
 def task_submit_check_options():
     if task_has_option('collection'):
         coll = get_collection(task_get_option("collection"))
         if coll.id is None:
             print 'ERROR: Collection "%s" does not exist' % coll.name
             return False
     return True
 
 def task_run_core():
     """ Reimplement to add the body of the task."""
 ##
 ## ------->--->time--->------>
 ##  (-1)  |   ( 0)    |  ( 1)
 ##        |     |     |
 ## [T.db] |  [T.fc]   | [T.db]
 ##        |     |     |
 ##        |<-tol|tol->|
 ##
 ## the above is the compare_timestamps_with_tolerance result "diagram"
 ## [T.db] stands fore the database timestamp and [T.fc] for the file cache timestamp
 ## ( -1, 0, 1) stand for the returned value
 ## tol stands for the tolerance in seconds
 ##
 ## When a record has been added or deleted from one of the collections the T.db becomes greater that the T.fc
 ## and when webcoll runs it is fully ran. It recalculates the reclists and nbrecs, and since it updates the
 ## collections db table it also updates the T.db. The T.fc is set as the moment the task started running thus
 ## slightly before the T.db (practically the time distance between the start of the task and the last call of
 ## update_reclist). Therefore when webcoll runs again, and even if no database changes have taken place in the
 ## meanwhile, it fully runs (because compare_timestamps_with_tolerance returns 0). This time though, and if
 ## no databases changes have taken place, the T.db remains the same while T.fc is updated and as a result if
 ## webcoll runs again it will not be fully ran
 ##
     task_run_start_timestamp = get_current_time_timestamp()
     colls = []
     # decide whether we need to run or not, by comparing last updated timestamps:
     write_message("Database timestamp is %s." % get_database_last_updated_timestamp(), verbose=3)
     write_message("Collection cache timestamp is %s." % get_cache_last_updated_timestamp(), verbose=3)
     if task_has_option("part"):
         write_message("Running cache update part %s only." % task_get_option("part"), verbose=3)
     if check_nbrecs_for_all_external_collections() or task_has_option("force") or \
     compare_timestamps_with_tolerance(get_database_last_updated_timestamp(),
                                         get_cache_last_updated_timestamp(),
                                         CFG_CACHE_LAST_UPDATED_TIMESTAMP_TOLERANCE) >= 0:
         ## either forced update was requested or cache is not up to date, so recreate it:
         # firstly, decide which collections to do:
         if task_has_option("collection"):
             coll = get_collection(task_get_option("collection"))
             colls.append(coll)
             if task_has_option("recursive"):
                 r_type_descendants = coll.get_descendants(type='r')
                 colls += r_type_descendants
                 v_type_descendants = coll.get_descendants(type='v')
                 colls += v_type_descendants
         else:
             res = run_sql("SELECT name FROM collection ORDER BY id")
             for row in res:
                 colls.append(get_collection(row[0]))
         # secondly, update collection reclist cache:
         if task_get_option('part', 1) == 1:
             i = 0
             for coll in colls:
                 i += 1
                 write_message("%s / reclist cache update" % coll.name)
                 if str(coll.dbquery).startswith("hostedcollection:"):
                     coll.set_nbrecs_for_external_collection()
                 else:
                     coll.calculate_reclist()
                 task_sleep_now_if_required()
                 coll.update_reclist()
                 task_update_progress("Part 1/2: done %d/%d" % (i, len(colls)))
                 task_sleep_now_if_required(can_stop_too=True)
         # thirdly, update collection webpage cache:
         if task_get_option("part", 2) == 2:
             i = 0
             for coll in colls:
                 i += 1
                 write_message("%s / webpage cache update" % coll.name)
                 coll.update_webpage_cache()
                 task_update_progress("Part 2/2: done %d/%d" % (i, len(colls)))
                 task_sleep_now_if_required(can_stop_too=True)
 
         # finally update the cache last updated timestamp:
         # (but only when all collections were updated, not when only
         # some of them were forced-updated as per admin's demand)
         if not task_has_option("collection"):
             set_cache_last_updated_timestamp(task_run_start_timestamp)
             write_message("Collection cache timestamp is set to %s." % get_cache_last_updated_timestamp(), verbose=3)
     else:
         ## cache up to date, we don't have to run
         write_message("Collection cache is up to date, no need to run.")
     ## we are done:
     return True
 
 ### okay, here we go:
 if __name__ == '__main__':
     main()
diff --git a/modules/webstyle/lib/webinterface_handler_wsgi.py b/modules/webstyle/lib/webinterface_handler_wsgi.py
index 5c535d22c..67b3ca78a 100644
--- a/modules/webstyle/lib/webinterface_handler_wsgi.py
+++ b/modules/webstyle/lib/webinterface_handler_wsgi.py
@@ -1,638 +1,648 @@
 # -*- coding: utf-8 -*-
 ## This file is part of Invenio.
 ## Copyright (C) 2009, 2010, 2011 CERN.
 ##
 ## Invenio is free software; you can redistribute it and/or
 ## modify it under the terms of the GNU General Public License as
 ## published by the Free Software Foundation; either version 2 of the
 ## License, or (at your option) any later version.
 ##
 ## Invenio is distributed in the hope that it will be useful, but
 ## WITHOUT ANY WARRANTY; without even the implied warranty of
 ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 ## General Public License for more details.
 ##
 ## You should have received a copy of the GNU General Public License
 ## along with Invenio; if not, write to the Free Software Foundation, Inc.,
 ## 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA.
 
 """mod_python->WSGI Framework"""
 
 import sys
 import os
 import re
 import cgi
 import inspect
 from fnmatch import fnmatch
 from urlparse import urlparse, urlunparse
 
 from wsgiref.validate import validator
 from wsgiref.util import FileWrapper, guess_scheme
 
 if __name__ != "__main__":
     # Chances are that we are inside mod_wsgi.
     ## You can't write to stdout in mod_wsgi, but some of our
     ## dependecies do this! (e.g. 4Suite)
     sys.stdout = sys.stderr
 
 from invenio.session import get_session
 from invenio.webinterface_handler import CFG_HAS_HTTPS_SUPPORT, CFG_FULL_HTTPS
 from invenio.webinterface_layout import invenio_handler
 from invenio.webinterface_handler_wsgi_utils import table, FieldStorage
 from invenio.webinterface_handler_config import \
     HTTP_STATUS_MAP, SERVER_RETURN, OK, DONE, \
     HTTP_NOT_FOUND, HTTP_INTERNAL_SERVER_ERROR
 from invenio.config import CFG_WEBDIR, CFG_SITE_LANG, \
     CFG_WEBSTYLE_HTTP_STATUS_ALERT_LIST, CFG_DEVEL_SITE, CFG_SITE_URL, \
     CFG_SITE_SECURE_URL
 from invenio.errorlib import register_exception, get_pretty_traceback
 
 ## Static files are usually handled directly by the webserver (e.g. Apache)
 ## However in case WSGI is required to handle static files too (such
 ## as when running wsgiref simple server), then this flag can be
 ## turned on (it is done automatically by wsgi_handler_test).
 CFG_WSGI_SERVE_STATIC_FILES = False
 
 
 ## Magic regexp to search for usage of CFG_SITE_URL within src/href or
 ## any src usage of an external website
 _RE_HTTPS_REPLACES = re.compile(r"\b((?:src\s*=|url\s*\()\s*[\"']?)http\://", re.I)
 
 def _http_replace_func(match):
     ## src external_site -> CFG_SITE_SECURE_URL/sslredirect/external_site
     return match.group(1) + CFG_SITE_SECURE_URL + '/sslredirect/'
 
 _ESCAPED_CFG_SITE_URL = cgi.escape(CFG_SITE_URL, True)
 _ESCAPED_CFG_SITE_SECURE_URL = cgi.escape(CFG_SITE_SECURE_URL, True)
 def https_replace(html):
     html = html.replace(_ESCAPED_CFG_SITE_URL, _ESCAPED_CFG_SITE_SECURE_URL)
     return _RE_HTTPS_REPLACES.sub(_http_replace_func, html)
 
 class InputProcessed(object):
     """
     Auxiliary class used when reading input.
     @see: <http://www.wsgi.org/wsgi/Specifications/handling_post_forms>.
     """
     def read(self, *args):
         raise EOFError('The wsgi.input stream has already been consumed')
     readline = readlines = __iter__ = read
 
 class SimulatedModPythonRequest(object):
     """
     mod_python like request object.
     Minimum and cleaned implementation to make moving out of mod_python
     easy.
     @see: <http://www.modpython.org/live/current/doc-html/pyapi-mprequest.html>
     """
     def __init__(self, environ, start_response):
         self.__environ = environ
         self.__start_response = start_response
         self.__response_sent_p = False
         self.__buffer = ''
         self.__low_level_headers = []
         self.__headers = table(self.__low_level_headers)
         self.__headers.add = self.__headers.add_header
         self.__status = "200 OK"
         self.__filename = None
         self.__disposition_type = None
         self.__bytes_sent = 0
         self.__allowed_methods = []
         self.__cleanups = []
         self.headers_out = self.__headers
         ## See: <http://www.python.org/dev/peps/pep-0333/#the-write-callable>
         self.__write = None
         self.__write_error = False
         self.__errors = environ['wsgi.errors']
         self.__headers_in = table([])
         self.__tainted = False
         self.__is_https = int(guess_scheme(self.__environ) == 'https')
         self.__replace_https = False
-
+        self.track_writings = False
+        self.__what_was_written = ""
         for key, value in environ.iteritems():
             if key.startswith('HTTP_'):
                 self.__headers_in[key[len('HTTP_'):].replace('_', '-')] = value
         if environ.get('CONTENT_LENGTH'):
             self.__headers_in['content-length'] = environ['CONTENT_LENGTH']
         if environ.get('CONTENT_TYPE'):
             self.__headers_in['content-type'] = environ['CONTENT_TYPE']
 
     def get_wsgi_environ(self):
         return self.__environ
 
     def get_post_form(self):
         self.__tainted = True
         post_form = self.__environ.get('wsgi.post_form')
         input = self.__environ['wsgi.input']
         if (post_form is not None
             and post_form[0] is input):
             return post_form[2]
         # This must be done to avoid a bug in cgi.FieldStorage
         self.__environ.setdefault('QUERY_STRING', '')
 
         ## Video handler hack:
         uri = self.__environ['PATH_INFO']
         if uri.endswith("upload_video"):
             tmp_shared = True
         else:
             tmp_shared = False
         fs = FieldStorage(self, keep_blank_values=1, to_tmp_shared=tmp_shared)
         if fs.wsgi_input_consumed:
             new_input = InputProcessed()
             post_form = (new_input, input, fs)
             self.__environ['wsgi.post_form'] = post_form
             self.__environ['wsgi.input'] = new_input
         else:
             post_form = (input, None, fs)
             self.__environ['wsgi.post_form'] = post_form
         return fs
 
     def get_response_sent_p(self):
         return self.__response_sent_p
 
     def get_low_level_headers(self):
         return self.__low_level_headers
 
     def get_buffer(self):
         return self.__buffer
 
     def write(self, string, flush=1):
         if isinstance(string, unicode):
             self.__buffer += string.encode('utf8')
         else:
             self.__buffer += string
         if flush:
             self.flush()
 
     def flush(self):
         self.send_http_header()
         if self.__buffer:
             self.__bytes_sent += len(self.__buffer)
             try:
                 if not self.__write_error:
                     if self.__replace_https:
                         self.__write(https_replace(self.__buffer))
                     else:
                         self.__write(self.__buffer)
+                    if self.track_writings:
+                        if self.__replace_https:
+                            self.__what_was_written += https_replace(self.__buffer)
+                        else:
+                            self.__what_was_written += self.__buffer
             except IOError, err:
                 if "failed to write data" in str(err) or "client connection closed" in str(err):
                     ## Let's just log this exception without alerting the admin:
                     register_exception(req=self)
                     self.__write_error = True ## This flag is there just
                         ## to not report later other errors to the admin.
                 else:
                     raise
             self.__buffer = ''
 
     def set_content_type(self, content_type):
         self.__headers['content-type'] = content_type
         if self.__is_https:
             if content_type.startswith("text/html") or content_type.startswith("application/rss+xml"):
                 self.__replace_https = True
 
     def get_content_type(self):
         return self.__headers['content-type']
 
     def send_http_header(self):
         if not self.__response_sent_p:
             self.__tainted = True
             if self.__allowed_methods and self.__status.startswith('405 ') or self.__status.startswith('501 '):
                 self.__headers['Allow'] = ', '.join(self.__allowed_methods)
 
             ## See: <http://www.python.org/dev/peps/pep-0333/#the-write-callable>
             #print self.__low_level_headers
             self.__write = self.__start_response(self.__status, self.__low_level_headers)
             self.__response_sent_p = True
             #print "Response sent: %s" % self.__headers
 
     def get_unparsed_uri(self):
         return '?'.join([self.__environ['PATH_INFO'], self.__environ['QUERY_STRING']])
 
     def get_uri(self):
         return self.__environ['PATH_INFO']
 
     def get_headers_in(self):
         return self.__headers_in
 
     def get_subprocess_env(self):
         return self.__environ
 
     def add_common_vars(self):
         pass
 
     def get_args(self):
         return self.__environ['QUERY_STRING']
 
     def get_remote_ip(self):
         if 'X-FORWARDED-FOR' in self.__headers_in and \
                 self.__headers_in.get('X-FORWARDED-SERVER', '') == \
                 self.__headers_in.get('X-FORWARDED-HOST', '') == \
                 urlparse(CFG_SITE_URL)[1]:
             ip = self.__headers_in['X-FORWARDED-FOR'].split(',')[0]
             if ip:
                 return ip
         return self.__environ.get('REMOTE_ADDR')
 
     def get_remote_host(self):
         return self.__environ.get('REMOTE_HOST')
 
     def get_header_only(self):
         return self.__environ['REQUEST_METHOD'] == 'HEAD'
 
     def set_status(self, status):
         self.__status = '%s %s' % (status, HTTP_STATUS_MAP.get(int(status), 'Explanation not available'))
 
     def get_status(self):
         return int(self.__status.split(' ')[0])
 
     def get_wsgi_status(self):
         return self.__status
 
     def sendfile(self, path, offset=0, the_len=-1):
         try:
             self.send_http_header()
             file_to_send = open(path)
             file_to_send.seek(offset)
             file_wrapper = FileWrapper(file_to_send)
             count = 0
             if the_len < 0:
                 for chunk in file_wrapper:
                     count += len(chunk)
                     self.__bytes_sent += len(chunk)
                     self.__write(chunk)
             else:
                 for chunk in file_wrapper:
                     if the_len >= len(chunk):
                         the_len -= len(chunk)
                         count += len(chunk)
                         self.__bytes_sent += len(chunk)
                         self.__write(chunk)
                     else:
                         count += the_len
                         self.__bytes_sent += the_len
                         self.__write(chunk[:the_len])
                         break
         except IOError, err:
             if "failed to write data" in str(err) or "client connection closed" in str(err):
                 ## Let's just log this exception without alerting the admin:
                 register_exception(req=self)
             else:
                 raise
         return self.__bytes_sent
 
     def set_content_length(self, content_length):
         if content_length is not None:
             self.__headers['content-length'] = str(content_length)
         else:
             del self.__headers['content-length']
 
     def is_https(self):
         return self.__is_https
 
     def get_method(self):
         return self.__environ['REQUEST_METHOD']
 
     def get_hostname(self):
         return self.__environ.get('HTTP_HOST', '')
 
     def set_filename(self, filename):
         self.__filename = filename
         if self.__disposition_type is None:
             self.__disposition_type = 'inline'
         self.__headers['content-disposition'] = '%s; filename=%s' % (self.__disposition_type, self.__filename)
 
     def set_encoding(self, encoding):
         if encoding:
             self.__headers['content-encoding'] = str(encoding)
         else:
             del self.__headers['content-encoding']
 
     def get_bytes_sent(self):
         return self.__bytes_sent
 
     def log_error(self, message):
         self.__errors.write(message.strip() + '\n')
 
     def get_content_type_set_p(self):
         return bool(self.__headers['content-type'])
 
     def allow_methods(self, methods, reset=0):
         if reset:
             self.__allowed_methods = []
         self.__allowed_methods += [method.upper().strip() for method in methods]
 
     def get_allowed_methods(self):
         return self.__allowed_methods
 
     def readline(self, hint=None):
         try:
             return self.__environ['wsgi.input'].readline(hint)
         except TypeError:
             ## the hint param is not part of wsgi pep, although
             ## it's great to exploit it in when reading FORM
             ## with large files, in order to avoid filling up the memory
             ## Too bad it's not there :-(
             return self.__environ['wsgi.input'].readline()
 
     def readlines(self, hint=None):
         return self.__environ['wsgi.input'].readlines(hint)
 
     def read(self, hint=None):
         return self.__environ['wsgi.input'].read(hint)
 
     def register_cleanup(self, callback, data=None):
         self.__cleanups.append((callback, data))
 
     def get_cleanups(self):
         return self.__cleanups
 
     def get_referer(self):
         return self.headers_in.get('referer')
 
+    def get_what_was_written(self):
+        return self.__what_was_written
+
     def __str__(self):
         from pprint import pformat
         out = ""
         for key in dir(self):
             try:
                 if not callable(getattr(self, key)) and not key.startswith("_SimulatedModPythonRequest") and not key.startswith('__'):
                     out += 'req.%s: %s\n' % (key, pformat(getattr(self, key)))
             except:
                 pass
         return out
 
     def get_original_wsgi_environment(self):
         """
         Return the original WSGI environment used to initialize this request
         object.
         @return: environ, start_response
         @raise AssertionError: in case the environment has been altered, i.e.
             either the input has been consumed or something has already been
             written to the output.
         """
         assert not self.__tainted, "The original WSGI environment is tainted since at least req.write or req.form has been used."
         return self.__environ, self.__start_response
 
     content_type = property(get_content_type, set_content_type)
     unparsed_uri = property(get_unparsed_uri)
     uri = property(get_uri)
     headers_in = property(get_headers_in)
     subprocess_env = property(get_subprocess_env)
     args = property(get_args)
     header_only = property(get_header_only)
     status = property(get_status, set_status)
     method = property(get_method)
     hostname = property(get_hostname)
     filename = property(fset=set_filename)
     encoding = property(fset=set_encoding)
     bytes_sent = property(get_bytes_sent)
     content_type_set_p = property(get_content_type_set_p)
     allowed_methods = property(get_allowed_methods)
     response_sent_p = property(get_response_sent_p)
     form = property(get_post_form)
     remote_ip = property(get_remote_ip)
     remote_host = property(get_remote_host)
     referer = property(get_referer)
+    what_was_written = property(get_what_was_written)
 
 def alert_admin_for_server_status_p(status, referer):
     """
     Check the configuration variable
     CFG_WEBSTYLE_HTTP_STATUS_ALERT_LIST to see if the exception should
     be registered and the admin should be alerted.
     """
     status = str(status)
     for pattern in CFG_WEBSTYLE_HTTP_STATUS_ALERT_LIST:
         pattern = pattern.lower()
         must_have_referer = False
         if pattern.endswith('r'):
             ## e.g. "404 r"
             must_have_referer = True
             pattern = pattern[:-1].strip() ## -> "404"
         if fnmatch(status, pattern) and (not must_have_referer or referer):
             return True
     return False
 
 def application(environ, start_response):
     """
     Entry point for wsgi.
     """
     ## Needed for mod_wsgi, see: <http://code.google.com/p/modwsgi/wiki/ApplicationIssues>
     req = SimulatedModPythonRequest(environ, start_response)
     #print 'Starting mod_python simulation'
     try:
         try:
             possible_module, possible_handler = is_mp_legacy_publisher_path(environ['PATH_INFO'])
             if possible_module is not None:
                 mp_legacy_publisher(req, possible_module, possible_handler)
             elif CFG_WSGI_SERVE_STATIC_FILES:
                 possible_static_path = is_static_path(environ['PATH_INFO'])
                 if possible_static_path is not None:
                     from invenio.bibdocfile import stream_file
                     stream_file(req, possible_static_path)
                 else:
                     ret = invenio_handler(req)
             else:
                 ret = invenio_handler(req)
             req.flush()
         except SERVER_RETURN, status:
             status = int(str(status))
             if status not in (OK, DONE):
                 req.status = status
                 req.headers_out['content-type'] = 'text/html'
                 admin_to_be_alerted = alert_admin_for_server_status_p(status,
                                                   req.headers_in.get('referer'))
                 if admin_to_be_alerted:
                     register_exception(req=req, alert_admin=True)
                 if not req.response_sent_p:
                     start_response(req.get_wsgi_status(), req.get_low_level_headers(), sys.exc_info())
                 return generate_error_page(req, admin_to_be_alerted)
             else:
                 req.flush()
         except:
             register_exception(req=req, alert_admin=True)
             if not req.response_sent_p:
                 req.status = HTTP_INTERNAL_SERVER_ERROR
                 req.headers_out['content-type'] = 'text/html'
                 start_response(req.get_wsgi_status(), req.get_low_level_headers(), sys.exc_info())
                 if CFG_DEVEL_SITE:
                     return ["<pre>%s</pre>" % cgi.escape(get_pretty_traceback(req=req, exc_info=sys.exc_info()))]
                     from cgitb import html
                     return [html(sys.exc_info())]
                 return generate_error_page(req)
             else:
                 return generate_error_page(req, page_already_started=True)
     finally:
         for (callback, data) in req.get_cleanups():
             callback(data)
     return []
 
 def generate_error_page(req, admin_was_alerted=True, page_already_started=False):
     """
     Returns an iterable with the error page to be sent to the user browser.
     """
     from invenio.webpage import page
     from invenio import template
     webstyle_templates = template.load('webstyle')
     ln = req.form.get('ln', CFG_SITE_LANG)
     if page_already_started:
         return [webstyle_templates.tmpl_error_page(status=req.get_wsgi_status(), ln=ln, admin_was_alerted=admin_was_alerted)]
     else:
         return [page(title=req.get_wsgi_status(), body=webstyle_templates.tmpl_error_page(status=req.get_wsgi_status(), ln=ln, admin_was_alerted=admin_was_alerted), language=ln, req=req)]
 
 def is_static_path(path):
     """
     Returns True if path corresponds to an exsting file under CFG_WEBDIR.
     @param path: the path.
     @type path: string
     @return: True if path corresponds to an exsting file under CFG_WEBDIR.
     @rtype: bool
     """
     path = os.path.abspath(CFG_WEBDIR + path)
     if path.startswith(CFG_WEBDIR) and os.path.isfile(path):
         return path
     return None
 
 def is_mp_legacy_publisher_path(path):
     """
     Checks path corresponds to an exsting Python file under CFG_WEBDIR.
     @param path: the path.
     @type path: string
     @return: the path of the module to load and the function to call there.
     @rtype: tuple
     """
     path = path.split('/')
     for index, component in enumerate(path):
         if component.endswith('.py'):
             possible_module = os.path.abspath(CFG_WEBDIR + os.path.sep + os.path.sep.join(path[:index + 1]))
             possible_handler = '/'.join(path[index + 1:]).strip()
             if possible_handler.startswith('_'):
                 return None, None
             if not possible_handler:
                 possible_handler = 'index'
             if os.path.exists(possible_module) and possible_module.startswith(CFG_WEBDIR):
                 return (possible_module, possible_handler)
     else:
         return None, None
 
 def mp_legacy_publisher(req, possible_module, possible_handler):
     """
     mod_python legacy publisher minimum implementation.
     """
     the_module = open(possible_module).read()
     module_globals = {}
     exec(the_module, module_globals)
     if possible_handler in module_globals and callable(module_globals[possible_handler]):
         from invenio.webinterface_handler import _check_result
         ## req is the required first parameter of any handler
         expected_args = list(inspect.getargspec(module_globals[possible_handler])[0])
         if not expected_args or 'req' != expected_args[0]:
             ## req was not the first argument. Too bad!
             raise SERVER_RETURN, HTTP_NOT_FOUND
         ## the req.form must be casted to dict because of Python 2.4 and earlier
         ## otherwise any object exposing the mapping interface can be
         ## used with the magic **
         form = dict(req.form)
         for key, value in form.items():
             ## FIXME: this is a backward compatibility workaround
             ## because most of the old administration web handler
             ## expect parameters to be of type str.
             ## When legacy publisher will be removed all this
             ## pain will go away anyway :-)
             if isinstance(value, str):
                 form[key] = str(value)
             else:
                 ## NOTE: this is a workaround for e.g. legacy webupload
                 ## that is still using legacy publisher and expect to
                 ## have a file (Field) instance instead of a string.
                 form[key] = value
 
         if (CFG_FULL_HTTPS or CFG_HAS_HTTPS_SUPPORT and get_session(req).need_https) and not req.is_https():
             from invenio.urlutils import redirect_to_url
             # We need to isolate the part of the URI that is after
             # CFG_SITE_URL, and append that to our CFG_SITE_SECURE_URL.
             original_parts = urlparse(req.unparsed_uri)
             plain_prefix_parts = urlparse(CFG_SITE_URL)
             secure_prefix_parts = urlparse(CFG_SITE_SECURE_URL)
 
             # Compute the new path
             plain_path = original_parts[2]
             plain_path = secure_prefix_parts[2] + \
                          plain_path[len(plain_prefix_parts[2]):]
 
             # ...and recompose the complete URL
             final_parts = list(secure_prefix_parts)
             final_parts[2] = plain_path
             final_parts[-3:] = original_parts[-3:]
 
             target = urlunparse(final_parts)
             redirect_to_url(req, target)
 
         try:
             return _check_result(req, module_globals[possible_handler](req, **form))
         except TypeError, err:
             if ("%s() got an unexpected keyword argument" % possible_handler) in str(err) or ('%s() takes at least' % possible_handler) in str(err):
                 inspected_args = inspect.getargspec(module_globals[possible_handler])
                 expected_args = list(inspected_args[0])
                 expected_defaults = list(inspected_args[3])
                 expected_args.reverse()
                 expected_defaults.reverse()
                 register_exception(req=req, prefix="Wrong GET parameter set in calling a legacy publisher handler for %s: expected_args=%s, found_args=%s" % (possible_handler, repr(expected_args), repr(req.form.keys())), alert_admin=CFG_DEVEL_SITE)
                 cleaned_form = {}
                 for index, arg in enumerate(expected_args):
                     if arg == 'req':
                         continue
                     if index < len(expected_defaults):
                         cleaned_form[arg] = form.get(arg, expected_defaults[index])
                     else:
                         cleaned_form[arg] = form.get(arg, None)
                 return _check_result(req, module_globals[possible_handler](req, **cleaned_form))
             else:
                 raise
     else:
         raise SERVER_RETURN, HTTP_NOT_FOUND
 
 def check_wsgiref_testing_feasability():
     """
     In order to use wsgiref for running Invenio, CFG_SITE_URL and
     CFG_SITE_SECURE_URL must not use HTTPS because SSL is not supported.
     """
     if CFG_SITE_URL.lower().startswith('https'):
         print >> sys.stderr, """
 ERROR: SSL is not supported by the wsgiref simple server implementation.
 Please set CFG_SITE_URL not to start with "https".
 Currently CFG_SITE_URL is set to: "%s".""" % CFG_SITE_URL
         sys.exit(1)
     if CFG_SITE_SECURE_URL.lower().startswith('https'):
         print >> sys.stderr, """
 ERROR: SSL is not supported by the wsgiref simple server implementation.
 Please set CFG_SITE_SECURE_URL not to start with "https".
 Currently CFG_SITE_SECURE_URL is set to: "%s".""" % CFG_SITE_SECURE_URL
         sys.exit(1)
 
 def wsgi_handler_test(port=80):
     """
     Simple WSGI testing environment based on wsgiref.
     """
     from wsgiref.simple_server import make_server
     global CFG_WSGI_SERVE_STATIC_FILES
     CFG_WSGI_SERVE_STATIC_FILES = True
     check_wsgiref_testing_feasability()
     validator_app = validator(application)
     httpd = make_server('', port, validator_app)
     print "Serving on port %s..." % port
     httpd.serve_forever()
 
 def main():
     from optparse import OptionParser
     parser = OptionParser()
     parser.add_option('-t', '--test', action='store_true',
                       dest='test', default=False,
                       help="Run a WSGI test server via wsgiref (not using Apache).")
     parser.add_option('-p', '--port', type='int', dest='port', default='80',
                       help="The port where the WSGI test server will listen. [80]")
     (options, args) = parser.parse_args()
     if options.test:
         wsgi_handler_test(options.port)
     else:
         parser.print_help()
 
 if __name__ == "__main__":
     main()