diff --git a/config/invenio-autotools.conf.in b/config/invenio-autotools.conf.in
index 0cb0f8f3f..0caf11342 100644
--- a/config/invenio-autotools.conf.in
+++ b/config/invenio-autotools.conf.in
@@ -1,76 +1,77 @@
 ## $Id$
 
 ## This file is part of CDS Invenio.
 ## Copyright (C) 2002, 2003, 2004, 2005, 2006, 2007, 2008 CERN.
 ##
 ## CDS Invenio is free software; you can redistribute it and/or
 ## modify it under the terms of the GNU General Public License as
 ## published by the Free Software Foundation; either version 2 of the
 ## License, or (at your option) any later version.
 ##
 ## CDS Invenio is distributed in the hope that it will be useful, but
 ## WITHOUT ANY WARRANTY; without even the implied warranty of
 ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 ## General Public License for more details.
 ##
 ## You should have received a copy of the GNU General Public License
 ## along with CDS Invenio; if not, write to the Free Software Foundation, Inc.,
 ## 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA.
 
 ## DO NOT EDIT THIS FILE.
 
 ## YOU SHOULD NOT EDIT THESE VALUES.  THEY WERE AUTOMATICALLY
 ## CALCULATED BY AUTOTOOLS DURING THE "CONFIGURE" STAGE.
 
 [Invenio]
 
 ## Invenio version:
 CFG_VERSION = @VERSION@
 
 ## directories detected from 'configure --prefix ...' parameters:
 CFG_PREFIX = @prefix@
 CFG_BINDIR = @prefix@/bin
 CFG_PYLIBDIR = @prefix@/lib/python
 CFG_LOGDIR = @localstatedir@/log
 CFG_ETCDIR = @prefix@/etc
 CFG_LOCALEDIR = @prefix@/share/locale
 CFG_TMPDIR = @localstatedir@/tmp
 CFG_CACHEDIR = @localstatedir@/cache
 CFG_WEBDIR = @localstatedir@/www
 
 ## path to interesting programs:
 CFG_PATH_PHP = @PHP@
 CFG_PATH_ACROREAD = @ACROREAD@
 CFG_PATH_GZIP = @GZIP@
 CFG_PATH_GUNZIP = @GUNZIP@
 CFG_PATH_TAR = @TAR@
 CFG_PATH_DISTILLER = @PS2PDF@
 CFG_PATH_GFILE = @FILE@
 CFG_PATH_CONVERT = @CONVERT@
 CFG_PATH_PDFTOTEXT = @PDFTOTEXT@
 CFG_PATH_PDFTK = @PDFTK@
 CFG_PATH_PDF2PS = @PDF2PS@
 CFG_PATH_PSTOTEXT = @PSTOTEXT@
 CFG_PATH_PSTOASCII = @PSTOASCII@
 CFG_PATH_ANTIWORD = @ANTIWORD@
 CFG_PATH_CATDOC = @CATDOC@
 CFG_PATH_WVTEXT = @WVTEXT@
 CFG_PATH_PPTHTML = @PPTHTML@
 CFG_PATH_XLHTML = @XLHTML@
 CFG_PATH_HTMLTOTEXT = @HTMLTOTEXT@
 CFG_PATH_WGET = @WGET@
+CFG_PATH_MD5SUM = @MD5SUM@
 
 ## CFG_BIBINDEX_PATH_TO_STOPWORDS_FILE -- path to the stopwords file.  You
 ## probably don't want to change this path, although you may want to
 ## change the content of that file.  Note that the file is used by the
 ## rank engine internally, so it should be given even if stopword
 ## removal in the indexes is not used.
 CFG_BIBINDEX_PATH_TO_STOPWORDS_FILE = @prefix@/etc/bibrank/stopwords.kb
 
 ## helper style of variables for WebSubmit:
 CFG_WEBSUBMIT_COUNTERSDIR = @localstatedir@/data/submit/counters
 CFG_WEBSUBMIT_STORAGEDIR = @localstatedir@/data/submit/storage
 CFG_WEBSUBMIT_FILEDIR = @localstatedir@/data/files
 CFG_WEBSUBMIT_BIBCONVERTCONFIGDIR = @prefix@/etc/bibconvert/config
 
 ## - end of file -
\ No newline at end of file
diff --git a/configure.ac b/configure.ac
index 520def50a..a5a395472 100644
--- a/configure.ac
+++ b/configure.ac
@@ -1,674 +1,685 @@
 ## $Id$
 
 ## This file is part of CDS Invenio.
 ## Copyright (C) 2002, 2003, 2004, 2005, 2006, 2007, 2008 CERN.
 ##
 ## CDS Invenio is free software; you can redistribute it and/or
 ## modify it under the terms of the GNU General Public License as
 ## published by the Free Software Foundation; either version 2 of the
 ## License, or (at your option) any later version.
 ##
 ## CDS Invenio is distributed in the hope that it will be useful, but
 ## WITHOUT ANY WARRANTY; without even the implied warranty of
 ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 ## General Public License for more details.
 ##
 ## You should have received a copy of the GNU General Public License
 ## along with CDS Invenio; if not, write to the Free Software Foundation, Inc.,
 ## 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA.
 
 ## This is CDS Invenio main configure.ac file.  If you change this
 ## file, then please run "autoreconf" to regenerate the "configure"
 ## script.
 
 ## Initialize autoconf and automake:
 AC_INIT(cds-invenio, 0.99.0.20080629, cds.support@cern.ch)
 AM_INIT_AUTOMAKE(cds-invenio, 0.99.0.20080629)
 
 ## By default we shall install into /opt/cds-invenio.  (Do not use
 ## AC_PREFIX_DEFAULT for this, because it would not work well with
 ## the localstatedir hack below.)
 test "${prefix}" = NONE && prefix=/opt/cds-invenio
 
 ## Check for install:
 AC_PROG_INSTALL
 
 ## Check for gettext support:
 AM_GNU_GETTEXT(external)
 AM_GNU_GETTEXT_VERSION(0.14.4)
 
 ## Check for MySQL client:
 AC_MSG_CHECKING(for mysql)
 AC_ARG_WITH(mysql, AC_HELP_STRING([--with-mysql], [path to a specific MySQL binary (optional)]), MYSQL=${withval})
 if test -n "$MYSQL"; then
    AC_MSG_RESULT($MYSQL)
 else
    AC_PATH_PROG(MYSQL, mysql)
    if test -z "$MYSQL"; then
       AC_MSG_ERROR([
-      MySQL command-line client was not found in your PATH.  
+      MySQL command-line client was not found in your PATH.
       Please install it first.
       Available from <http://mysql.com/>.])
    fi
 fi
 
 ## Check for Python:
 AC_MSG_CHECKING(for python)
 AC_ARG_WITH(python, AC_HELP_STRING([--with-python], [path to a specific Python binary (optional)]), PYTHON=${withval})
 if test -n "$PYTHON"; then
    AC_MSG_RESULT($PYTHON)
 else
    AC_PATH_PROG(PYTHON, python)
    if test -z "$PYTHON"; then
       AC_MSG_ERROR([
       Python was not found in your PATH.  Please either install it
       in your PATH or specify --with-python configure option.
       Python is available from <http://python.org/>.])
    fi
 fi
 
 ## Check for Python version and modules:
 AC_MSG_CHECKING(for required Python modules)
 $PYTHON ${srcdir}/configure-tests.py
 if test $? -ne 0; then
    AC_MSG_ERROR([Please fix the above Python problem before continuing.])
 fi
 AC_MSG_RESULT(found)
 
 ## Check for PHP:
 AC_PATH_PROG(PHP, php)
 
 ## Check for Acrobat Reader:
 AC_PATH_PROG(ACROREAD, acroread)
 if test -z "$ACROREAD"; then
    AC_MSG_WARN([
    Acrobat Reader was not found in your PATH.  It is used in
    the WebSubmit module for automatic conversion of submitted documents.
    You can continue without it but you will miss some CDS Invenio
    functionality.  We recommend you to install it first and to rerun
    the configure script.  Acrobat Reader is available from
    <http://www.adobe.com/products/acrobat/readstep.html>.])
 fi
 
 ## Check for gzip:
 AC_PATH_PROG(GZIP, gzip)
 if test -z "$GZIP"; then
    AC_MSG_WARN([
    Gzip was not found in your PATH.  It is used in
    the WebSubmit module to compress the data submitted in an archive.
    You can continue without it but you will miss some CDS Invenio
    functionality.  We recommend you to install it first and to rerun
    the configure script.  Gzip is available from
    <http://www.gzip.org/>.])
 fi
 
 ## Check for gunzip:
 AC_PATH_PROG(GUNZIP, gunzip)
 if test -z "$GUNZIP"; then
    AC_MSG_WARN([
    Gunzip was not found in your PATH.  It is used in
    the WebSubmit module to correctly deal with submitted compressed
    files.
    You can continue without it but you will miss some CDS Invenio
    functionality.  We recommend you to install it first and to rerun
    the configure script.  Gunzip is available from
    <http://www.gzip.org/>.])
 fi
 
 ## Check for tar:
 AC_PATH_PROG(TAR, tar)
 if test -z "$TAR"; then
    AC_MSG_WARN([
    Tar was not found in your PATH.  It is used in
    the WebSubmit module to pack the submitted data into an archive.
    You can continue without it but you will miss some CDS Invenio
    functionality.  We recommend you to install it first and to rerun
    the configure script.  Tar is available from
    <ftp://prep.ai.mit.edu/pub/gnu/tar/>.])
 fi
 
 ## Check for wget:
 AC_PATH_PROG(WGET, wget)
 if test -z "$WGET"; then
    AC_MSG_WARN([
    wget was not found in your PATH.  It is used for the fulltext file
    retrieval.
    You can continue without it but we recomend you to install it first
    and to rerun the configure script.  wget is available from
    <http://www.gnu.org/software/wget/>.])
 fi
 
+## Check for md5sum:
+AC_PATH_PROG(MD5SUM, md5sum)
+if test -z "$MD5SUM"; then
+   AC_MSG_WARN([
+   md5sum was not found in your PATH.  It is used for the fulltext file
+   checksumming.
+   You can continue without it but we recomend you to install it first
+   and to rerun the configure script.  md5sum is available from
+   <http://www.gnu.org/software/coreutils/>.])
+fi
+
 ## Check for ps2pdf:
 AC_PATH_PROG(PS2PDF, ps2pdf)
 if test -z "$PS2PDF"; then
    AC_MSG_WARN([
    ps2pdf was not found in your PATH.  It is used in
    the WebSubmit module to convert submitted PostScripts into PDF.
    You can continue without it but you will miss some CDS Invenio
    functionality.  We recommend you to install it first and to rerun
    the configure script.  ps2pdf is available from
    <http://www.cs.wisc.edu/~ghost/doc/AFPL/>.])
 fi
 
 ## Check for pdftotext:
 AC_PATH_PROG(PDFTOTEXT, pdftotext)
 if test -z "$PDFTOTEXT"; then
    AC_MSG_WARN([
    pdftotext was not found in your PATH.  It is used for the fulltext indexation
    of PDF files.
    You can continue without it but you may miss fulltext searching capability
    of CDS Invenio.  We recomend you to install it first and to rerun the configure
    script.  pdftotext is available from <http://www.foolabs.com/xpdf/home.html>.
    ])
 fi
 
 ## Check for pdftk:
 AC_PATH_PROG(PDFTK, pdftk)
 if test -z "$PDFTK"; then
    AC_MSG_WARN([
    pdftk was not found in your PATH.  It is used for the fulltext file stamping.
-   You can continue without it but you may miss this feature of CDS Invenio.  
+   You can continue without it but you may miss this feature of CDS Invenio.
    We recomend you to install it first and to rerun the configure
    script.  pdftk is available from <http://www.accesspdf.com/pdftk/>.
    ])
 fi
 
 ## Check for pdf2ps:
 AC_PATH_PROG(PDF2PS, pdf2ps)
 if test -z "$PDF2PS"; then
    AC_MSG_WARN([
    pdf2ps was not found in your PATH.  It is used in
    the WebSubmit module to convert submitted PDFs into PostScript.
    You can continue without it but you will miss some CDS Invenio
    functionality.  We recommend you to install it first and to rerun
    the configure script.  pdf2ps is available from
    <http://www.cs.wisc.edu/~ghost/doc/AFPL/>.])
 fi
 
 ## Check for pstotext:
 AC_PATH_PROG(PSTOTEXT, pstotext)
 if test -z "$PSTOTEXT"; then
    AC_MSG_WARN([
    pstotext was not found in your PATH.  It is used for the fulltext indexation
    of PDF and PostScript files.
    You can continue without it but you may miss fulltext searching capability
    of CDS Invenio.  We recomend you to install it first and to rerun the configure
    script.  pstotext is available from <http://www.cs.wisc.edu/~ghost/doc/AFPL/>.
    ])
 fi
 
 ## Check for ps2ascii:
 AC_PATH_PROG(PSTOASCII, ps2ascii)
 if test -z "$PSTOASCII"; then
    AC_MSG_WARN([
    ps2ascii was not found in your PATH.  It is used for the fulltext indexation
    of PostScript files.
    You can continue without it but you may miss fulltext searching capability
    of CDS Invenio.  We recomend you to install it first and to rerun the configure
    script.  ps2ascii is available from <http://www.cs.wisc.edu/~ghost/doc/AFPL/>.
    ])
 fi
 
 ## Check for antiword:
 AC_PATH_PROG(ANTIWORD, antiword)
 if test -z "$ANTIWORD"; then
    AC_MSG_WARN([
    antiword was not found in your PATH.  It is used for the fulltext indexation
    of Microsoft Word files.
    You can continue without it but you may miss fulltext searching capability
    of CDS Invenio.  We recomend you to install it first and to rerun the configure
    script.  antiword is available from <http://www.winfield.demon.nl/index.html>.
    ])
 fi
 
 ## Check for catdoc:
 AC_PATH_PROG(CATDOC, catdoc)
 if test -z "$CATDOC"; then
    AC_MSG_WARN([
    catdoc was not found in your PATH.  It is used for the fulltext indexation
    of Microsoft Word files.
    You can continue without it but you may miss fulltext searching capability
    of CDS Invenio.  We recomend you to install it first and to rerun the configure
    script.  catdoc is available from <http://www.ice.ru/~vitus/catdoc/index.html>.
    ])
 fi
 
 ## Check for wvText:
 AC_PATH_PROG(WVTEXT, wvText)
 if test -z "$WVTEXT"; then
    AC_MSG_WARN([
    wvText was not found in your PATH.  It is used for the fulltext indexation
    of Microsoft Word files.
    You can continue without it but you may miss fulltext searching capability
    of CDS Invenio.  We recomend you to install it first and to rerun the configure
    script.  wvText is available from <http://sourceforge.net/projects/wvware>.
    ])
 fi
 
 ## Check for ppthtml:
 AC_PATH_PROG(PPTHTML, ppthtml)
 if test -z "$PPTHTML"; then
    AC_MSG_WARN([
    ppthtml was found in your PATH. It is used for the fulltext indexation
    of Microsoft PowerPoint files.
    You can continue without it but you may miss fulltext searching capability
    of CDS Invenio.  We recomend you to install it first and to rerun the configure
    script.  ppthtml is available from <http://www.xlhtml.org/>.
    ])
 fi
 
 ## Check for xlhtml:
 AC_PATH_PROG(XLHTML, xlhtml)
 if test -z "$XLHTML"; then
    AC_MSG_WARN([
    xlhtml was found in your PATH. It is used for the fulltext indexation
    of Microsoft Excel files.
    You can continue without it but you may miss fulltext searching capability
    of CDS Invenio.  We recomend you to install it first and to rerun the configure
    script.  xlhtml is available from <http://chicago.sourceforge.net/xlhtml/>.
    ])
 fi
 
 ## Check for html2text:
 AC_PATH_PROG(HTMLTOTEXT, html2text)
 if test -z "$HTMLTOTEXT"; then
    AC_MSG_WARN([
    html2text was found in your PATH. It is used for the fulltext indexation
    of Microsoft PowerPoint and Excel files.
    You can continue without it but you may miss fulltext searching capability
    of CDS Invenio.  We recomend you to install it first and to rerun the configure
    script.  html2text is available from <http://userpage.fu-berlin.de/~mbayer/tools/html2text.html>.
    ])
 fi
 
 ## Check for Giftext:
 AC_PATH_PROG(GIFTEXT, giftext)
 if test -z "$GIFTEXT"; then
    AC_MSG_WARN([
    Giftext was not found in your PATH.  It is used in
    the WebSubmit module to create an icon from a submitted picture.
    You can continue without it but you will miss some CDS Invenio
    functionality.  We recommend you to install it first and to rerun
    the configure script.  Giftext is available from
    <http://prtr-13.ucsc.edu/~badger/software/libungif/getting.shtml>.])
 fi
 
 ## Check for file:
 AC_PATH_PROG(FILE, file)
 if test -z "$FILE"; then
    AC_MSG_WARN([
    File was not found in your PATH.  It is used in
    the WebSubmit module to check the validity of the submitted files.
    You can continue without it but you will miss some CDS Invenio
    functionality.  We recommend you to install it first and to rerun
    the configure script.  File is available from
    <ftp://ftp.astron.com/pub/file/>.])
 fi
 
 ## Check for convert:
 AC_PATH_PROG(CONVERT, convert)
 if test -z "$CONVERT"; then
    AC_MSG_WARN([
    Convert was not found in your PATH.  It is used in
    the WebSubmit module to create an icon from a submitted picture.
    You can continue without it but you will miss some CDS Invenio
    functionality.  We recommend you to install it first and to rerun
    the configure script.  Convert is available from
    <http://www.imagemagick.org/>.])
 fi
 
 ## Check for CLISP:
 AC_MSG_CHECKING(for clisp)
 AC_ARG_WITH(clisp, AC_HELP_STRING([--with-clisp], [path to a specific CLISP binary (optional)]), CLISP=${withval})
 if test -n "$CLISP"; then
    AC_MSG_RESULT($CLISP)
 else
    AC_PATH_PROG(CLISP, clisp)
    if test -z "$CLISP"; then
       AC_MSG_WARN([
       GNU CLISP was not found in your PATH.  It is used by the WebStat
       module to produce statistics about CDS Invenio usage.  (Alternatively,
       SBCL or CMUCL can be used instead of CLISP.)
       You can continue without it but you will miss this feature.  We
       recommend you to install it first (if you don't have neither CMUCL
       nor SBCL) and to rerun the configure script.
       GNU CLISP is available from <http://clisp.cons.org/>.])
    fi
 fi
 
 ## Check for CMUCL:
 AC_MSG_CHECKING(for cmucl)
 AC_ARG_WITH(cmucl, AC_HELP_STRING([--with-cmucl], [path to a specific CMUCL binary (optional)]), CMUCL=${withval})
 if test -n "$CMUCL"; then
    AC_MSG_RESULT($CMUCL)
 else
    AC_PATH_PROG(CMUCL, cmucl)
    if test -z "$CMUCL"; then
       AC_MSG_CHECKING(for lisp) # CMUCL can also be installed under `lisp' exec name
       AC_PATH_PROG(CMUCL, lisp)
    fi
    if test -z "$CMUCL"; then
       AC_MSG_WARN([
       CMUCL was not found in your PATH.  It is used by the WebStat
       module to produce statistics about CDS Invenio usage.  (Alternatively,
       CLISP or SBCL can be used instead of CMUCL.)
       You can continue without it but you will miss this feature.  We
       recommend you to install it first (if you don't have neither CLISP
       nor SBCL) and to rerun the configure script.
       CMUCL is available from <http://www.cons.org/cmucl/>.])
    fi
 fi
 
 ## Check for SBCL:
 AC_MSG_CHECKING(for sbcl)
 AC_ARG_WITH(sbcl, AC_HELP_STRING([--with-sbcl], [path to a specific SBCL binary (optional)]), SBCL=${withval})
 if test -n "$SBCL"; then
    AC_MSG_RESULT($SBCL)
 else
    AC_PATH_PROG(SBCL, sbcl)
    if test -z "$SBCL"; then
       AC_MSG_WARN([
       SBCL was not found in your PATH.  It is used by the WebStat
       module to produce statistics about CDS Invenio usage.  (Alternatively,
       CLISP or CMUCL can be used instead of SBCL.)
       You can continue without it but you will miss this feature.  We
       recommend you to install it first (if you don't have neither CLISP
       nor CMUCL) and to rerun the configure script.
       SBCL is available from <http://sbcl.sourceforge.net/>.])
    fi
 fi
 
 ## Check for gnuplot:
 AC_PATH_PROG(GNUPLOT, gnuplot)
 if test -z "$GNUPLOT"; then
    AC_MSG_WARN([
    Gnuplot was not found in your PATH.  It is used by the BibRank
    module to produce graphs about download and citation history.
    You can continue without it but you will miss these graphs.  We
    recommend you to install it first and to rerun the configure script.
    Gnuplot is available from <http://www.gnuplot.info/>.])
 fi
 
 ## Substitute variables:
 AC_SUBST(VERSION)
 AC_SUBST(MYSQL)
 AC_SUBST(PHP)
 AC_SUBST(PYTHON)
 AC_SUBST(CLIDIR)
 AC_SUBST(PDFTOTEXT)
 AC_SUBST(PDFTK)
 AC_SUBST(PDF2PS)
 AC_SUBST(PSTOTEXT)
 AC_SUBST(PSTOASCII)
 AC_SUBST(ANTIWORD)
 AC_SUBST(CATDOC)
 AC_SUBST(WVTEXT)
 AC_SUBST(PPTHTML)
 AC_SUBST(XLHTML)
 AC_SUBST(HTMLTOTEXT)
 AC_SUBST(localstatedir, `eval echo "${localstatedir}"`)
 AC_SUBST(CACHEDIR)
 AC_SUBST(CLISP)
 AC_SUBST(CMUCL)
 AC_SUBST(SBCL)
 AC_SUBST(GNUPLOT)
 AC_SUBST(DJPEG)
 AC_SUBST(CONVERT)
 AC_SUBST(GIFTEXT)
 AC_SUBST(JPEGSIZE)
 AC_SUBST(PNMSCALE)
 AC_SUBST(PPMQUANT)
 AC_SUBST(PPMTOGIF)
 AC_SUBST(GIFINTER)
 AC_SUBST(GIFRSIZE)
 
 ## Define output files:
 AC_CONFIG_FILES([config.nice  \
 	  Makefile \
           po/Makefile.in \
 	  config/Makefile \
 	  config/invenio-autotools.conf \
 	  modules/Makefile \
 	  modules/bibconvert/Makefile \
 	  modules/bibconvert/bin/Makefile \
 	  modules/bibconvert/bin/bibconvert \
 	  modules/bibconvert/doc/Makefile \
 	  modules/bibconvert/doc/admin/Makefile \
 	  modules/bibconvert/doc/hacking/Makefile \
 	  modules/bibconvert/etc/Makefile \
 	  modules/bibconvert/lib/Makefile \
 	  modules/bibmatch/Makefile \
 	  modules/bibmatch/bin/Makefile \
 	  modules/bibmatch/bin/bibmatch \
 	  modules/bibmatch/doc/Makefile \
 	  modules/bibmatch/doc/admin/Makefile \
 	  modules/bibmatch/etc/Makefile \
 	  modules/bibmatch/lib/Makefile \
 	  modules/bibedit/Makefile \
 	  modules/bibedit/bin/Makefile \
 	  modules/bibedit/bin/refextract \
 	  modules/bibedit/bin/xmlmarclint \
 	  modules/bibedit/bin/xmlmarc2textmarc \
 	  modules/bibedit/bin/bibedit \
 	  modules/bibedit/doc/Makefile \
 	  modules/bibedit/doc/admin/Makefile \
 	  modules/bibedit/doc/hacking/Makefile \
 	  modules/bibedit/etc/Makefile \
 	  modules/bibedit/lib/Makefile \
 	  modules/bibedit/web/Makefile \
 	  modules/bibedit/web/admin/Makefile \
 	  modules/bibformat/Makefile \
 	  modules/bibformat/bin/Makefile \
 	  modules/bibformat/bin/bibreformat \
 	  modules/bibformat/doc/Makefile \
 	  modules/bibformat/doc/admin/Makefile \
 	  modules/bibformat/doc/hacking/Makefile \
 	  modules/bibformat/etc/Makefile \
 	  modules/bibformat/etc/output_formats/Makefile \
 	  modules/bibformat/etc/format_templates/Makefile \
 	  modules/bibformat/lib/Makefile \
 	  modules/bibformat/lib/elements/Makefile \
 	  modules/bibformat/web/Makefile \
 	  modules/bibformat/web/admin/Makefile \
 	  modules/bibharvest/Makefile \
 	  modules/bibharvest/bin/Makefile \
 	  modules/bibharvest/bin/bibharvest \
 	  modules/bibharvest/bin/oaiharvest \
 	  modules/bibharvest/bin/oaiarchive \
 	  modules/bibharvest/doc/Makefile \
 	  modules/bibharvest/doc/admin/Makefile \
 	  modules/bibharvest/doc/hacking/Makefile \
 	  modules/bibharvest/lib/Makefile \
 	  modules/bibharvest/web/Makefile \
 	  modules/bibharvest/web/admin/Makefile \
 	  modules/bibclassify/Makefile \
 	  modules/bibclassify/bin/Makefile \
 	  modules/bibclassify/bin/bibclassify \
 	  modules/bibclassify/bin/bibclassifyd \
 	  modules/bibclassify/doc/Makefile \
 	  modules/bibclassify/doc/admin/Makefile \
 	  modules/bibclassify/doc/hacking/Makefile \
 	  modules/bibclassify/etc/Makefile
 	  modules/bibclassify/lib/Makefile
 	  modules/bibindex/Makefile \
 	  modules/bibindex/bin/Makefile \
 	  modules/bibindex/bin/bibindex \
 	  modules/bibindex/bin/bibstat \
 	  modules/bibindex/doc/Makefile \
 	  modules/bibindex/doc/admin/Makefile \
 	  modules/bibindex/doc/hacking/Makefile \
 	  modules/bibindex/lib/Makefile \
 	  modules/bibindex/web/Makefile \
 	  modules/bibindex/web/admin/Makefile \
 	  modules/bibrank/Makefile \
 	  modules/bibrank/bin/Makefile \
 	  modules/bibrank/bin/bibrank \
 	  modules/bibrank/bin/bibrankgkb \
 	  modules/bibrank/doc/Makefile \
 	  modules/bibrank/doc/admin/Makefile \
 	  modules/bibrank/doc/hacking/Makefile \
 	  modules/bibrank/etc/Makefile \
 	  modules/bibrank/etc/bibrankgkb.cfg \
 	  modules/bibrank/etc/demo_jif.cfg \
 	  modules/bibrank/etc/template_single_tag_rank_method.cfg \
 	  modules/bibrank/lib/Makefile \
 	  modules/bibrank/web/Makefile \
 	  modules/bibrank/web/admin/Makefile \
 	  modules/bibsched/Makefile \
 	  modules/bibsched/bin/Makefile \
 	  modules/bibsched/bin/bibsched \
 	  modules/bibsched/bin/bibtaskex \
 	  modules/bibsched/doc/Makefile \
 	  modules/bibsched/doc/admin/Makefile \
 	  modules/bibsched/doc/hacking/Makefile \
 	  modules/bibsched/lib/Makefile \
 	  modules/bibupload/Makefile \
 	  modules/bibupload/bin/Makefile \
 	  modules/bibupload/bin/bibupload \
 	  modules/bibupload/doc/Makefile \
 	  modules/bibupload/doc/admin/Makefile \
 	  modules/bibupload/doc/hacking/Makefile \
 	  modules/bibupload/lib/Makefile \
 	  modules/elmsubmit/Makefile \
 	  modules/elmsubmit/bin/Makefile \
 	  modules/elmsubmit/bin/elmsubmit \
 	  modules/elmsubmit/doc/Makefile \
 	  modules/elmsubmit/doc/admin/Makefile \
 	  modules/elmsubmit/doc/hacking/Makefile \
 	  modules/elmsubmit/etc/Makefile \
 	  modules/elmsubmit/etc/elmsubmit.cfg \
 	  modules/elmsubmit/lib/Makefile \
 	  modules/miscutil/Makefile \
 	  modules/miscutil/bin/Makefile \
 	  modules/miscutil/bin/dbexec \
 	  modules/miscutil/bin/inveniocfg \
 	  modules/miscutil/lib/Makefile \
 	  modules/miscutil/demo/Makefile \
 	  modules/miscutil/sql/Makefile \
 	  modules/miscutil/web/Makefile \
 	  modules/miscutil/doc/Makefile \
 	  modules/miscutil/doc/hacking/Makefile \
 	  modules/webaccess/Makefile \
 	  modules/webaccess/bin/Makefile \
 	  modules/webaccess/bin/authaction \
 	  modules/webaccess/bin/webaccessadmin \
 	  modules/webaccess/doc/Makefile \
 	  modules/webaccess/doc/admin/Makefile \
 	  modules/webaccess/doc/hacking/Makefile \
 	  modules/webaccess/lib/Makefile \
 	  modules/webaccess/web/Makefile \
 	  modules/webaccess/web/admin/Makefile \
 	  modules/webalert/Makefile \
 	  modules/webalert/bin/Makefile \
 	  modules/webalert/bin/alertengine \
 	  modules/webalert/doc/Makefile \
 	  modules/webalert/doc/admin/Makefile \
 	  modules/webalert/doc/hacking/Makefile \
 	  modules/webalert/lib/Makefile \
 	  modules/webalert/web/Makefile \
 	  modules/webhelp/Makefile \
 	  modules/webhelp/web/Makefile \
 	  modules/webhelp/web/hacking/Makefile \
 	  modules/webhelp/web/admin/Makefile \
 	  modules/webhelp/web/admin/howto/Makefile \
 	  modules/websearch/Makefile \
 	  modules/websearch/bin/Makefile \
 	  modules/websearch/bin/webcoll \
 	  modules/websearch/doc/Makefile \
 	  modules/websearch/doc/admin/Makefile \
 	  modules/websearch/doc/hacking/Makefile \
 	  modules/websearch/lib/Makefile \
 	  modules/websearch/web/Makefile \
 	  modules/websearch/web/admin/Makefile \
 	  modules/websession/Makefile \
 	  modules/websession/bin/Makefile \
 	  modules/websession/bin/inveniogc \
 	  modules/websession/doc/Makefile \
 	  modules/websession/doc/admin/Makefile \
 	  modules/websession/doc/hacking/Makefile \
 	  modules/websession/lib/Makefile \
 	  modules/websession/web/Makefile \
 	  modules/webstat/Makefile \
 	  modules/webstat/bin/Makefile \
 	  modules/webstat/bin/webstat \
 	  modules/webstat/bin/webstatadmin \
 	  modules/webstat/doc/Makefile \
 	  modules/webstat/doc/admin/Makefile \
 	  modules/webstat/doc/hacking/Makefile \
 	  modules/webstat/etc/Makefile \
 	  modules/webstat/lib/Makefile \
 	  modules/webstyle/Makefile \
 	  modules/webstyle/bin/Makefile \
 	  modules/webstyle/bin/webdoc \
 	  modules/webstyle/css/Makefile \
 	  modules/webstyle/doc/Makefile \
 	  modules/webstyle/etc/Makefile \
 	  modules/webstyle/doc/admin/Makefile \
 	  modules/webstyle/doc/hacking/Makefile \
 	  modules/webstyle/img/Makefile \
 	  modules/webstyle/lib/Makefile \
 	  modules/webcomment/Makefile \
 	  modules/webcomment/doc/Makefile \
 	  modules/webcomment/doc/admin/Makefile \
 	  modules/webcomment/doc/hacking/Makefile \
 	  modules/webcomment/lib/Makefile \
 	  modules/webcomment/web/Makefile \
 	  modules/webcomment/web/admin/Makefile \
 	  modules/webbasket/Makefile \
 	  modules/webbasket/doc/Makefile \
 	  modules/webbasket/doc/admin/Makefile \
 	  modules/webbasket/doc/hacking/Makefile \
 	  modules/webbasket/lib/Makefile \
 	  modules/webbasket/web/Makefile \
 	  modules/webjournal/Makefile \
 	  modules/webjournal/doc/Makefile \
 	  modules/webjournal/doc/admin/Makefile \
 	  modules/webjournal/doc/hacking/Makefile \
 	  modules/webjournal/lib/Makefile \
 	  modules/webjournal/lib/widgets/Makefile \
 	  modules/webjournal/web/Makefile \
 	  modules/webjournal/web/admin/Makefile \
 	  modules/webmessage/Makefile \
 	  modules/webmessage/bin/Makefile \
 	  modules/webmessage/bin/webmessageadmin \
 	  modules/webmessage/doc/Makefile \
 	  modules/webmessage/doc/admin/Makefile \
 	  modules/webmessage/doc/hacking/Makefile \
 	  modules/webmessage/lib/Makefile \
 	  modules/webmessage/web/Makefile \
 	  modules/websubmit/Makefile \
 	  modules/websubmit/bin/Makefile \
 	  modules/websubmit/bin/bibdocfile \
 	  modules/websubmit/etc/Makefile \
 	  modules/websubmit/doc/Makefile \
 	  modules/websubmit/doc/admin/Makefile \
 	  modules/websubmit/doc/hacking/Makefile \
 	  modules/websubmit/lib/Makefile \
 	  modules/websubmit/lib/functions/Makefile \
 	  modules/websubmit/web/Makefile \
 	  modules/websubmit/web/admin/Makefile \
 	  ])
 
 ## Finally, write output files:
 AC_OUTPUT
 
 ## Write help:
 AC_MSG_RESULT([****************************************************************************])
 AC_MSG_RESULT([** Your CDS Invenio installation is now ready for building.               **])
 AC_MSG_RESULT([** You have entered the following parameters:                             **])
 AC_MSG_RESULT([**   - CDS Invenio main install directory: ${prefix}])
 AC_MSG_RESULT([**   - Python executable: $PYTHON])
 AC_MSG_RESULT([**   - MySQL client executable: $MYSQL])
 AC_MSG_RESULT([**   - CLISP executable: $CLISP])
 AC_MSG_RESULT([**   - CMUCL executable: $CMUCL])
 AC_MSG_RESULT([**   - SBCL executable: $SBCL])
 AC_MSG_RESULT([** Here are the steps to continue the building process:                   **])
 AC_MSG_RESULT([**   1) Type 'make' to build your CDS Invenio system.                     **])
 AC_MSG_RESULT([**   2) Type 'make install' to install your CDS Invenio system.           **])
 AC_MSG_RESULT([** After that you can start customizing your installation as documented   **])
 AC_MSG_RESULT([** in the INSTALL file (i.e. edit invenio.conf, run inveniocfg, etc).     **])
 AC_MSG_RESULT([** Good luck, and thanks for choosing CDS Invenio.                        **])
 AC_MSG_RESULT([**              -- CDS Development Group <cds.support@cern.ch>            **])
 AC_MSG_RESULT([****************************************************************************])
 
 ## end of file
diff --git a/modules/bibupload/lib/bibupload.py b/modules/bibupload/lib/bibupload.py
index 6bc709670..1b98a468f 100644
--- a/modules/bibupload/lib/bibupload.py
+++ b/modules/bibupload/lib/bibupload.py
@@ -1,1711 +1,1751 @@
 # -*- coding: utf-8 -*-
 ##
 ## $Id$
 ##
 ## This file is part of CDS Invenio.
 ## Copyright (C) 2002, 2003, 2004, 2005, 2006, 2007, 2008 CERN.
 ##
 ## CDS Invenio is free software; you can redistribute it and/or
 ## modify it under the terms of the GNU General Public License as
 ## published by the Free Software Foundation; either version 2 of the
 ## License, or (at your option) any later version.
 ##
 ## CDS Invenio is distributed in the hope that it will be useful, but
 ## WITHOUT ANY WARRANTY; without even the implied warranty of
 ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 ## General Public License for more details.
 ##
 ## You should have received a copy of the GNU General Public License
 ## along with CDS Invenio; if not, write to the Free Software Foundation, Inc.,
 ## 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA.
 
 """
 BibUpload: Receive MARC XML file and update the appropriate database
 tables according to options.
 
     Usage: bibupload [options] input.xml
     Examples:
       $ bibupload -i input.xml
 
     Options:
      -a, --append            new fields are appended to the existing record
      -c, --correct           fields are replaced by the new ones in the
         existing record
      -f, --format            takes only the FMT fields into account.
         Does not update
      -i, --insert            insert the new record in the database
      -r, --replace           the existing record is entirely replaced
         by the new one
      -z, --reference         update references (update only 999 fields)
      -s, --stage=STAGE       stage to start from in the algorithm
         (0: always done; 1: FMT tags;
          2: FFT tags; 3: BibFmt; 4: Metadata update; 5: time update)
      -n,  --notimechange     do not change record last modification date
         when updating
 
     Scheduling options:
      -u, --user=USER         user name to store task, password needed
 
     General options:
      -h, --help              print this help and exit
      -v, --verbose=LEVEL     verbose level (from 0 to 9, default 1)
      -V  --version           print the script version
 """
 
 __revision__ = "$Id$"
 
 import os
 import sys
 import time
 from zlib import compress
 import urllib2
 import urllib
 import socket
 
 from invenio.config import CFG_OAI_ID_FIELD, CFG_SITE_URL, \
      CFG_SITE_SECURE_URL, \
      CFG_BIBUPLOAD_REFERENCE_TAG, \
      CFG_BIBUPLOAD_EXTERNAL_SYSNO_TAG, \
      CFG_BIBUPLOAD_EXTERNAL_OAIID_TAG, \
      CFG_BIBUPLOAD_STRONG_TAGS
 
 from invenio.bibupload_config import CFG_BIBUPLOAD_CONTROLFIELD_TAGS, \
     CFG_BIBUPLOAD_SPECIAL_TAGS
 from invenio.dbquery import run_sql, \
                             Error
 from invenio.bibrecord import create_records, \
                               create_record, \
                               record_add_field, \
                               record_delete_field, \
                               record_xml_output, \
                               record_get_field_instances, \
                               record_get_field_values, \
                               field_get_subfield_values
 from invenio.dateutils import convert_datestruct_to_datetext
 from invenio.errorlib import register_exception
 from invenio.bibformat import format_record
 from invenio.config import CFG_WEBSUBMIT_FILEDIR
 from invenio.bibtask import task_init, write_message, \
     task_set_option, task_get_option, task_get_task_param, task_update_status, \
     task_update_progress, task_sleep_now_if_required, fix_argv_paths
 from invenio.bibdocfile import BibRecDocs, file_strip_ext, normalize_format, \
     get_docname_from_url, get_format_from_url, check_valid_url, download_url, \
-    KEEP_OLD_VALUE
+    KEEP_OLD_VALUE, decompose_bibdocfile_old_url, decompose_bibdocfile_url
 
 #Statistic variables
 stat = {}
 stat['nb_records_to_upload'] = 0
 stat['nb_records_updated'] = 0
 stat['nb_records_inserted'] = 0
 stat['nb_errors'] = 0
 stat['exectime'] = time.localtime()
 
 ## Let's set a reasonable timeout for URL request (e.g. FFT)
 socket.setdefaulttimeout(40)
 
 ### bibupload engine functions:
 def bibupload(record, opt_tag=None, opt_mode=None,
         opt_stage_to_start_from=1, opt_notimechange=0):
     """Main function: process a record and fit it in the tables
     bibfmt, bibrec, bibrec_bibxxx, bibxxx with proper record
     metadata.
 
     Return (error_code, recID) of the processed record.
     """
     assert(opt_mode in ('insert', 'replace', 'replace_or_insert', 'reference',
         'correct', 'append', 'format'))
     error = None
     # If there are special tags to proceed check if it exists in the record
     if opt_tag is not None and not(record.has_key(opt_tag)):
         write_message("    Failed: Tag not found, enter a valid tag to update.",
                     verbose=1, stream=sys.stderr)
         return (1, -1)
 
     # Extraction of the Record Id from 001, SYSNO or OAIID tags:
     rec_id = retrieve_rec_id(record, opt_mode)
     if rec_id == -1:
         return (1, -1)
     elif rec_id > 0:
         write_message("   -Retrieve record ID (found %s): DONE." % rec_id, verbose=2)
         if not record.has_key('001'):
             # Found record ID by means of SYSNO or OAIID, and the
             # input MARCXML buffer does not have this 001 tag, so we
             # should add it now:
             error = record_add_field(record, '001', '', '', rec_id, [], 0)
             if error is None:
                 write_message("   Failed: " \
                                             "Error during adding the 001 controlfield "  \
                                             "to the record", verbose=1, stream=sys.stderr)
                 return (1, int(rec_id))
             else:
                 error = None
             write_message("   -Added tag 001: DONE.", verbose=2)
     write_message("   -Check if the xml marc file is already in the database: DONE" , verbose=2)
 
     # Reference mode check if there are reference tag
     if opt_mode == 'reference':
         error = extract_tag_from_record(record, CFG_BIBUPLOAD_REFERENCE_TAG)
         if error is None:
             write_message("   Failed: No reference tags has been found...",
                         verbose=1, stream=sys.stderr)
             return (1, -1)
         else:
             error = None
             write_message("   -Check if reference tags exist: DONE", verbose=2)
 
     if opt_mode == 'insert' or \
     (opt_mode == 'replace_or_insert' and rec_id is None):
         insert_mode_p = True
         # Insert the record into the bibrec databases to have a recordId
         rec_id = create_new_record()
         write_message("   -Creation of a new record id (%d): DONE" % rec_id, verbose=2)
 
         # we add the record Id control field to the record
         error = record_add_field(record, '001', '', '', rec_id, [], 0)
         if error is None:
             write_message("   Failed: " \
                                         "Error during adding the 001 controlfield "  \
                                         "to the record", verbose=1, stream=sys.stderr)
             return (1, int(rec_id))
         else:
             error = None
 
     elif opt_mode != 'insert' and opt_mode != 'format' and \
             opt_stage_to_start_from != 5:
         insert_mode_p = False
         # Update Mode
         # Retrieve the old record to update
         rec_old = create_record(format_record(int(rec_id), 'xm'), 2)[0]
         if rec_old is None:
             write_message("   Failed during the creation of the old record!",
                         verbose=1, stream=sys.stderr)
             return (1, int(rec_id))
         else:
             write_message("   -Retrieve the old record to update: DONE", verbose=2)
 
         # In Replace mode, take over old strong tags if applicable:
         if opt_mode == 'replace' or \
             opt_mode == 'replace_or_insert':
             copy_strong_tags_from_old_record(record, rec_old)
 
         # Delete tags to correct in the record
         if opt_mode == 'correct' or opt_mode == 'reference':
             delete_tags_to_correct(record, rec_old, opt_tag)
             write_message("   -Delete the old tags to correct in the old record: DONE",
                         verbose=2)
 
         # Append new tag to the old record and update the new record with the old_record modified
         if opt_mode == 'append' or opt_mode == 'correct' or \
             opt_mode == 'reference':
             record = append_new_tag_to_old_record(record, rec_old,
                 opt_tag, opt_mode)
             write_message("   -Append new tags to the old record: DONE", verbose=2)
 
         # now we clear all the rows from bibrec_bibxxx from the old
         # record (they will be populated later (if needed) during
         # stage 4 below):
         delete_bibrec_bibxxx(rec_old, rec_id)
         write_message("   -Clean bibrec_bibxxx: DONE", verbose=2)
     write_message("   -Stage COMPLETED", verbose=2)
 
     # Have a look if we have FMT tags
     write_message("Stage 1: Start (Insert of FMT tags if exist).", verbose=2)
     if opt_stage_to_start_from <= 1 and \
         extract_tag_from_record(record, 'FMT') is not None:
         record = insert_fmt_tags(record, rec_id, opt_mode)
         if record is None:
             write_message("   Stage 1 failed: Error while inserting FMT tags",
                         verbose=1, stream=sys.stderr)
             return (1, int(rec_id))
         elif record == 0:
             # Mode format finished
             stat['nb_records_updated'] += 1
             return (0, int(rec_id))
         write_message("   -Stage COMPLETED", verbose=2)
     else:
         write_message("   -Stage NOT NEEDED", verbose=2)
 
     # Have a look if we have FFT tags
     write_message("Stage 2: Start (Process FFT tags if exist).", verbose=2)
     record_had_FFT = False
     if opt_stage_to_start_from <= 2 and \
         extract_tag_from_record(record, 'FFT') is not None:
         record_had_FFT = True
         if not writing_rights_p():
             write_message("   Stage 2 failed: Error no rights to write fulltext files",
                 verbose=1, stream=sys.stderr)
             task_update_status("ERROR")
             sys.exit(1)
         try:
             record = elaborate_fft_tags(record, rec_id, opt_mode)
         except Exception, e:
             register_exception()
             write_message("   Stage 2 failed: Error while elaborating FFT tags: %s" % e,
                 verbose=1, stream=sys.stderr)
             return (1, int(rec_id))
         if record is None:
             write_message("   Stage 2 failed: Error while elaborating FFT tags",
                         verbose=1, stream=sys.stderr)
             return (1, int(rec_id))
         write_message("   -Stage COMPLETED", verbose=2)
     else:
         write_message("   -Stage NOT NEEDED", verbose=2)
 
     # Have a look if we have FFT tags
     write_message("Stage 2B: Start (Synchronize 8564 tags).", verbose=2)
     if opt_stage_to_start_from <= 2 and (record_had_FFT or extract_tag_from_record(record, '856') is not None):
         try:
-            record = synchronize_8564(rec_id, record)
+            record = synchronize_8564(rec_id, record, record_had_FFT)
         except Exception, e:
             register_exception()
             write_message("   Stage 2B failed: Error while synchronizing 8564 tags: %s" % e,
                 verbose=1, stream=sys.stderr)
             return (1, int(rec_id))
         if record is None:
             write_message("   Stage 2B failed: Error while synchronizing 8564 tags",
                         verbose=1, stream=sys.stderr)
             return (1, int(rec_id))
         write_message("   -Stage COMPLETED", verbose=2)
     else:
         write_message("   -Stage NOT NEEDED", verbose=2)
 
     # Update of the BibFmt
     write_message("Stage 3: Start (Update bibfmt).", verbose=2)
     if opt_stage_to_start_from <= 3:
         # format the single record as xml
         rec_xml_new = record_xml_output(record)
         # Update bibfmt with the format xm of this record
         if opt_mode != 'format':
             error = update_bibfmt_format(rec_id, rec_xml_new, 'xm')
         if error == 1:
             write_message("   Failed: error during update_bibfmt_format",
                         verbose=1, stream=sys.stderr)
             return (1, int(rec_id))
         # archive MARCXML format of this record for version history purposes:
         if opt_mode != 'format':
             error = archive_marcxml_for_history(rec_id)
             if error == 1:
                 write_message("   Failed to archive MARCXML for history",
                               verbose=1, stream=sys.stderr)
                 return (1, int(rec_id))
             else:
                 write_message("   -Archived MARCXML for history : DONE", verbose=2)
         write_message("   -Stage COMPLETED", verbose=2)
 
     # Update the database MetaData
     write_message("Stage 4: Start (Update the database with the metadata).",
                 verbose=2)
     if opt_stage_to_start_from <= 4:
         if opt_mode == 'insert' or \
         opt_mode == 'replace' or \
         opt_mode == 'replace_or_insert' or \
         opt_mode == 'append' or \
         opt_mode == 'correct' or \
         opt_mode == 'reference':
             update_database_with_metadata(record, rec_id)
         else:
             write_message("   -Stage NOT NEEDED in mode %s" % opt_mode,
                         verbose=2)
         write_message("   -Stage COMPLETED", verbose=2)
     else:
         write_message("   -Stage NOT NEEDED", verbose=2)
 
     # Finally we update the bibrec table with the current date
     write_message("Stage 5: Start (Update bibrec table with current date).",
                 verbose=2)
     if opt_stage_to_start_from <= 5 and \
     opt_notimechange == 0 and \
     not insert_mode_p:
         now = convert_datestruct_to_datetext(time.localtime())
         write_message("   -Retrieved current localtime: DONE", verbose=2)
         update_bibrec_modif_date(now, rec_id)
         write_message("   -Stage COMPLETED", verbose=2)
     else:
         write_message("   -Stage NOT NEEDED", verbose=2)
 
     # Increase statistics
     if insert_mode_p:
         stat['nb_records_inserted'] += 1
     else:
         stat['nb_records_updated'] += 1
 
     # Upload of this record finish
     write_message("Record "+str(rec_id)+" DONE", verbose=1)
     return (0, int(rec_id))
 
 def print_out_bibupload_statistics():
     """Print the statistics of the process"""
     out = "Task stats: %(nb_input)d input records, %(nb_updated)d updated, " \
           "%(nb_inserted)d inserted, %(nb_errors)d errors.  " \
           "Time %(nb_sec).2f sec." % { \
               'nb_input': stat['nb_records_to_upload'],
               'nb_updated': stat['nb_records_updated'],
               'nb_inserted': stat['nb_records_inserted'],
               'nb_errors': stat['nb_errors'],
               'nb_sec': time.time() - time.mktime(stat['exectime']) }
     write_message(out)
 
 def open_marc_file(path):
     """Open a file and return the data"""
     try:
         # open the file containing the marc document
         marc_file = open(path,'r')
         marc = marc_file.read()
         marc_file.close()
     except IOError, erro:
         write_message("Error: %s" % erro, verbose=1, stream=sys.stderr)
         write_message("Exiting.", sys.stderr)
         task_update_status("ERROR")
         sys.exit(1)
     return marc
 
 def xml_marc_to_records(xml_marc):
     """create the records"""
     # Creation of the records from the xml Marc in argument
     recs = create_records(xml_marc, 1, 1)
     if recs == []:
         write_message("Error: Cannot parse MARCXML file.", verbose=1, stream=sys.stderr)
         write_message("Exiting.", sys.stderr)
         task_update_status("ERROR")
         sys.exit(1)
     elif recs[0][0] is None:
         write_message("Error: MARCXML file has wrong format: %s" % recs,
             verbose=1, stream=sys.stderr)
         write_message("Exiting.", sys.stderr)
         task_update_status("ERROR")
         sys.exit(1)
     else:
         recs = map((lambda x:x[0]), recs)
         return recs
 
 def find_record_format(rec_id, format):
     """Look whether record REC_ID is formatted in FORMAT,
        i.e. whether FORMAT exists in the bibfmt table for this record.
 
        Return the number of times it is formatted: 0 if not, 1 if yes,
        2 if found more than once (should never occur).
     """
     out = 0
     query = """SELECT COUNT(id) FROM bibfmt WHERE id_bibrec=%s AND format=%s"""
     params = (rec_id, format)
     res = []
     try:
         res = run_sql(query, params)
         out = res[0][0]
     except Error, error:
         write_message("   Error during find_record_format() : %s " % error, verbose=1, stream=sys.stderr)
     return out
 
 def find_record_from_recid(rec_id):
     """
     Try to find record in the database from the REC_ID number.
     Return record ID if found, None otherwise.
     """
     try:
         res = run_sql("SELECT id FROM bibrec WHERE id=%s",
                       (rec_id,))
     except Error, error:
         write_message("   Error during find_record_bibrec() : %s "
             % error, verbose=1, stream=sys.stderr)
     if res:
         return res[0][0]
     else:
         return None
 
 def find_record_from_sysno(sysno):
     """
     Try to find record in the database from the external SYSNO number.
     Return record ID if found, None otherwise.
     """
     bibxxx = 'bib'+CFG_BIBUPLOAD_EXTERNAL_SYSNO_TAG[0:2]+'x'
     bibrec_bibxxx = 'bibrec_' + bibxxx
     try:
         res = run_sql("""SELECT bb.id_bibrec FROM %(bibrec_bibxxx)s AS bb,
             %(bibxxx)s AS b WHERE b.tag=%%s AND b.value=%%s
             AND bb.id_bibxxx=b.id""" % \
                       {'bibxxx': bibxxx,
                        'bibrec_bibxxx': bibrec_bibxxx},
                       (CFG_BIBUPLOAD_EXTERNAL_SYSNO_TAG, sysno,))
     except Error, error:
         write_message("   Error during find_record_from_sysno(): %s " % error,
                       verbose=1, stream=sys.stderr)
     if res:
         return res[0][0]
     else:
         return None
 
 def find_record_from_extoaiid(extoaiid):
     """
     Try to find record in the database from the external EXTOAIID number.
     Return record ID if found, None otherwise.
     """
     bibxxx = 'bib'+CFG_BIBUPLOAD_EXTERNAL_OAIID_TAG[0:2]+'x'
     bibrec_bibxxx = 'bibrec_' + bibxxx
     try:
         res = run_sql("""SELECT bb.id_bibrec FROM %(bibrec_bibxxx)s AS bb,
             %(bibxxx)s AS b WHERE b.tag=%%s AND b.value=%%s
             AND bb.id_bibxxx=b.id""" % \
                       {'bibxxx': bibxxx,
                        'bibrec_bibxxx': bibrec_bibxxx},
                       (CFG_BIBUPLOAD_EXTERNAL_OAIID_TAG, extoaiid,))
     except Error, error:
         write_message("   Error during find_record_from_extoaiid(): %s "
             % error, verbose=1, stream=sys.stderr)
     if res:
         return res[0][0]
     else:
         return None
 
 def find_record_from_oaiid(oaiid):
     """
     Try to find record in the database from the OAI ID number.
     Return record ID if found, None otherwise.
     """
     bibxxx = 'bib'+CFG_OAI_ID_FIELD[0:2]+'x'
     bibrec_bibxxx = 'bibrec_' + bibxxx
     try:
         res = run_sql("""SELECT bb.id_bibrec FROM %(bibrec_bibxxx)s AS bb,
             %(bibxxx)s AS b WHERE b.tag=%%s AND b.value=%%s
             AND bb.id_bibxxx=b.id""" % \
                       {'bibxxx': bibxxx,
                        'bibrec_bibxxx': bibrec_bibxxx},
                       (CFG_OAI_ID_FIELD, oaiid,))
     except Error, error:
         write_message("   Error during find_record_from_oaiid(): %s " % error,
                       verbose=1, stream=sys.stderr)
     if res:
         return res[0][0]
     else:
         return None
 
 def extract_tag_from_record(record, tag_number):
     """ Extract the tag_number for record."""
     # first step verify if the record is not already in the database
     if record:
         return record.get(tag_number, None)
     return None
 
 def retrieve_rec_id(record, opt_mode):
     """Retrieve the record Id from a record by using tag 001 or SYSNO or OAI ID
     tag. opt_mod is the desired mode."""
 
     rec_id = None
 
     # 1st step: we look for the tag 001
     tag_001 = extract_tag_from_record(record, '001')
     if tag_001 is not None:
         # We extract the record ID from the tag
         rec_id = tag_001[0][3]
         # if we are in insert mode => error
         if opt_mode == 'insert':
             write_message("   Failed : Error tag 001 found in the xml" \
                           " submitted, you should use the option replace," \
                           " correct or append to replace an existing" \
                           " record. (-h for help)",
                           verbose=1, stream=sys.stderr)
             return -1
         else:
             # we found the rec id and we are not in insert mode => continue
             # we try to match rec_id against the database:
             if find_record_from_recid(rec_id) is not None:
                 # okay, 001 corresponds to some known record
                 return rec_id
             else:
                 # The record doesn't exist yet. We shall have try to check
                 # the SYSNO or OAI id later.
                 write_message("   -Tag 001 value not found in database.",
                               verbose=9)
                 rec_id = None
     else:
         write_message("   -Tag 001 not found in the xml marc file.", verbose=9)
 
     if rec_id is None:
         # 2nd step we look for the SYSNO
         sysnos = record_get_field_values(record,
             CFG_BIBUPLOAD_EXTERNAL_SYSNO_TAG[0:3],
             CFG_BIBUPLOAD_EXTERNAL_SYSNO_TAG[3:4] != "_" and \
             CFG_BIBUPLOAD_EXTERNAL_SYSNO_TAG[3:4] or "",
             CFG_BIBUPLOAD_EXTERNAL_SYSNO_TAG[4:5] != "_" and \
             CFG_BIBUPLOAD_EXTERNAL_SYSNO_TAG[4:5] or "",
             CFG_BIBUPLOAD_EXTERNAL_SYSNO_TAG[5:6])
         if sysnos:
             sysno = sysnos[0] # there should be only one external SYSNO
             write_message("   -Checking if SYSNO " + sysno + \
                           " exists in the database", verbose=9)
             # try to find the corresponding rec id from the database
             rec_id = find_record_from_sysno(sysno)
             if rec_id is not None:
                 # rec_id found
                 pass
             else:
                 # The record doesn't exist yet. We will try to check
                 # external and internal OAI ids later.
                 write_message("   -Tag SYSNO value not found in database.",
                               verbose=9)
                 rec_id = None
         else:
             write_message("   -Tag SYSNO not found in the xml marc file.",
                 verbose=9)
 
     if rec_id is None:
         # 2nd step we look for the external OAIID
         extoaiids = record_get_field_values(record,
             CFG_BIBUPLOAD_EXTERNAL_OAIID_TAG[0:3],
             CFG_BIBUPLOAD_EXTERNAL_OAIID_TAG[3:4] != "_" and \
             CFG_BIBUPLOAD_EXTERNAL_OAIID_TAG[3:4] or "",
             CFG_BIBUPLOAD_EXTERNAL_OAIID_TAG[4:5] != "_" and \
             CFG_BIBUPLOAD_EXTERNAL_OAIID_TAG[4:5] or "",
             CFG_BIBUPLOAD_EXTERNAL_OAIID_TAG[5:6])
         if extoaiids:
             extoaiid = extoaiids[0] # there should be only one external OAI ID
             write_message("   -Checking if EXTOAIID " + extoaiid + \
                           " exists in the database", verbose=9)
             # try to find the corresponding rec id from the database
             rec_id = find_record_from_extoaiid(extoaiid)
             if rec_id is not None:
                 # rec_id found
                 pass
             else:
                 # The record doesn't exist yet. We will try to check
                 # OAI id later.
                 write_message("   -Tag EXTOAIID value not found in database.",
                               verbose=9)
                 rec_id = None
         else:
             write_message("   -Tag EXTOAIID not found in the xml marc file.", verbose=9)
 
     if rec_id is None:
         # 4th step we look for the OAI ID
         oaiidvalues = record_get_field_values(record,
             CFG_OAI_ID_FIELD[0:3],
             CFG_OAI_ID_FIELD[3:4] != "_" and \
             CFG_OAI_ID_FIELD[3:4] or "",
             CFG_OAI_ID_FIELD[4:5] != "_" and \
             CFG_OAI_ID_FIELD[4:5] or "",
             CFG_OAI_ID_FIELD[5:6])
         if oaiidvalues:
             oaiid = oaiidvalues[0] # there should be only one OAI ID
             write_message("   -Check if local OAI ID " + oaiid + \
                           " exist in the database", verbose=9)
 
             # try to find the corresponding rec id from the database
             rec_id = find_record_from_oaiid(oaiid)
             if rec_id is not None:
                 # rec_id found
                 pass
             else:
                 write_message("   -Tag OAI ID value not found in database.",
                               verbose=9)
                 rec_id = None
         else:
             write_message("   -Tag SYSNO not found in the xml marc file.",
                 verbose=9)
 
     # Now we should have detected rec_id from SYSNO or OAIID
     # tags.  (None otherwise.)
     if rec_id:
         if opt_mode == 'insert':
             write_message("   Failed : Record found in the database," \
                           " you should use the option replace," \
                           " correct or append to replace an existing" \
                           " record. (-h for help)",
                           verbose=1, stream=sys.stderr)
             return -1
     else:
         if opt_mode != 'insert' and \
            opt_mode != 'replace_or_insert':
             write_message("   Failed : Record not found in the database."\
                           " Please insert the file before updating it."\
                           " (-h for help)", verbose=1, stream=sys.stderr)
             return -1
 
     return rec_id
 
 ### Insert functions
 
 def create_new_record():
     """Create new record in the database"""
     now = convert_datestruct_to_datetext(time.localtime())
     query = """INSERT INTO bibrec (creation_date, modification_date)
                 VALUES (%s, %s)"""
     params = (now, now)
     try:
         rec_id = run_sql(query, params)
         return rec_id
     except Error, error:
         write_message("   Error during the creation_new_record function : %s "
             % error, verbose=1, stream=sys.stderr)
     return None
 
 def insert_bibfmt(id_bibrec, marc, format, modification_date='1970-01-01 00:00:00'):
     """Insert the format in the table bibfmt"""
     # compress the marc value
     pickled_marc =  compress(marc)
     try:
         time.strptime(modification_date, "%Y-%m-%d %H:%M:%S")
     except ValueError:
         modification_date = '1970-01-01 00:00:00'
 
     query = """INSERT INTO  bibfmt (id_bibrec, format, last_updated, value)
         VALUES (%s, %s, %s, %s)"""
     try:
         row_id  = run_sql(query, (id_bibrec, format, modification_date, pickled_marc))
         return row_id
     except Error, error:
         write_message("   Error during the insert_bibfmt function : %s "
             % error, verbose=1, stream=sys.stderr)
     return None
 
 def insert_record_bibxxx(tag, value):
     """Insert the record into bibxxx"""
     # determine into which table one should insert the record
     table_name = 'bib'+tag[0:2]+'x'
 
     # check if the tag, value combination exists in the table
     query = """SELECT id,value FROM %s """ % table_name
     query += """ WHERE tag=%s AND value=%s"""
     params = (tag, value)
     try:
         res = run_sql(query, params)
     except Error, error:
         write_message("   Error during the insert_record_bibxxx function : %s "
             % error, verbose=1, stream=sys.stderr)
 
     # Note: compare now the found values one by one and look for
     # string binary equality (e.g. to respect lowercase/uppercase
     # match), regardless of the charset etc settings.  Ideally we
     # could use a BINARY operator in the above SELECT statement, but
     # we would have to check compatibility on various MySQLdb versions
     # etc; this approach checks all matched values in Python, not in
     # MySQL, which is less cool, but more conservative, so it should
     # work better on most setups.
     for row in res:
         row_id = row[0]
         row_value = row[1]
         if row_value == value:
             return (table_name, row_id)
 
     # We got here only when the tag,value combination was not found,
     # so it is now necessary to insert the tag,value combination into
     # bibxxx table as new.
     query = """INSERT INTO %s """ % table_name
     query += """ (tag, value) values (%s , %s)"""
     params = (tag, value)
     try:
         row_id = run_sql(query, params)
     except Error, error:
         write_message("   Error during the insert_record_bibxxx function : %s "
             % error, verbose=1, stream=sys.stderr)
     return (table_name, row_id)
 
 def insert_record_bibrec_bibxxx(table_name, id_bibxxx,
         field_number, id_bibrec):
     """Insert the record into bibrec_bibxxx"""
     # determine into which table one should insert the record
     full_table_name = 'bibrec_'+ table_name
 
     # insert the proper row into the table
     query = """INSERT INTO %s """ % full_table_name
     query += """(id_bibrec,id_bibxxx, field_number) values (%s , %s, %s)"""
     params = (id_bibrec, id_bibxxx, field_number)
     try:
         res = run_sql(query, params)
     except Error, error:
         write_message("   Error during the insert_record_bibrec_bibxxx"
             " function 2nd query : %s " % error, verbose=1, stream=sys.stderr)
     return res
 
-def synchronize_8564(rec_id, record):
+def synchronize_8564(rec_id, record, record_had_FFT):
     """Sinchronize the 8564 tags for record with actual files. descriptions
     should be a dictionary docname:description for the new description to be
-    inserted."""
+    inserted.
+    If record_had_FFT the algorithm assume that every fulltext operation
+    has been performed through FFT, hence it discard current 8564 local tags,
+    and rebuild them after bibdocfile tables. Otherwise it first import
+    from 8564 tags the $y and $z subfields corresponding to local files and
+    merge them into bibdocfile tables.
+    """
+    def merge_marc_into_bibdocfile(field):
+        """Given the 8564 tag it retrieve the corresponding bibdoc and
+        merge the $y and $z subfields."""
+        url = field_get_subfield_values(field, 'u')[:1]
+        description = field_get_subfield_values(field, 'y')[:1]
+        comment = field_get_subfield_values(field, 'z')[:1]
+        if url:
+            url = url[0]
+        else:
+            return
+        if description:
+            description = description[0]
+        if comment:
+            comment = comment[0]
+        if url.endswith('/files/'):
+            ## Old url /record/123/files/ only
+            ## We import comment and description for all the bibdocs
+            recid = decompose_bibdocfile_old_url(url)
+            bibrecdocs = BibRecDocs(recid)
+            for bibdoc in bibrecdocs.list_bibdocs():
+                if comment:
+                    bibdoc.set_description(description, format)
+                if description:
+                    bibdoc.set_comment(comment, format)
+        else:
+            recid, docname, format = decompose_bibdocfile_url(url)
+            bibdoc = BibRecDocs(recid).get_bibdoc(docname)
+            if description:
+                bibdoc.set_description(description, format)
+            if comment:
+                bibdoc.set_comment(comment, format)
+
     write_message("Synchronizing MARC of recid '%s' with:\n%s" % (rec_id, record), verbose=9)
     tags8564s = record_get_field_instances(record, '856', '4', ' ')
     filtered_tags8564s = []
 
     # Let's discover all the previous internal urls, in order to rebuild them!
     for field in tags8564s:
         to_be_removed = False
         for value in field_get_subfield_values(field, 'u') + field_get_subfield_values(field, 'q'):
             if value.startswith('%s/record/%s/files/' % (CFG_SITE_URL, rec_id)) or \
                 value.startswith('%s/record/%s/files/' % (CFG_SITE_SECURE_URL, rec_id)):
+                if not record_had_FFT:
+                    merge_marc_into_bibdocfile(field)
                 to_be_removed = True
         if not to_be_removed:
             filtered_tags8564s.append(field)
 
     # Let's keep in the record only external 8564
     record_delete_field(record, '856', '4', ' ') # First we delete 8564
     for field in filtered_tags8564s: # Then we readd external ones
         record_add_field(record, '856', '4', ' ', '', field[0])
 
     # Now we refresh with existing internal 8564
     bibrecdocs = BibRecDocs(rec_id)
     latest_files = bibrecdocs.list_latest_files()
     for afile in latest_files:
         url = afile.get_url()
         description = afile.get_description()
         comment = afile.get_comment()
         new_subfield = [('u', url)]
         if description:
             new_subfield.append(('y', description))
         if comment:
             new_subfield.append(('z', comment))
         record_add_field(record, '856', '4', ' ', '', new_subfield)
 
     # Let'handle all the icons
     for bibdoc in bibrecdocs.list_bibdocs():
         icon = bibdoc.get_icon()
         if icon:
             icon = icon.list_all_files()
             if icon:
                 url = icon[0].get_url() ## The 1st format found should be ok
                 new_subfield = [('q', url)]
                 new_subfield.append(('x', 'icon'))
                 record_add_field(record, '856', '4', ' ', '', new_subfield)
     return record
 
 def elaborate_fft_tags(record, rec_id, mode):
     """
     Process FFT tags that should contain $a with file pathes or URLs
     to get the fulltext from.  This function enriches record with
     proper 8564 URL tags, downloads fulltext files and stores them
     into var/data structure where appropriate.
 
     CFG_BIBUPLOAD_WGET_SLEEP_TIME defines time to sleep in seconds in
     between URL downloads.
 
     Note: if an FFT tag contains multiple $a subfields, we upload them
     into different 856 URL tags in the metadata.  See regression test
     case test_multiple_fft_insert_via_http().
     """
 
     # Let's define some handy sub procedure.
     def _add_new_format(bibdoc, url, format, docname, doctype, newname, description, comment):
         """Adds a new format for a given bibdoc. Returns True when everything's fine."""
         write_message('Add new format to %s url: %s, format: %s, docname: %s, doctype: %s, newname: %s, description: %s, comment: %s' % (repr(bibdoc), url, format, docname, doctype, newname, description, comment), verbose=9)
         try:
             if not url: # Not requesting a new url. Just updating comment & description
                 return _update_description_and_comment(bibdoc, docname, format, description, comment)
             tmpurl = download_url(url, format)
             try:
                 try:
                     bibdoc.add_file_new_format(tmpurl, description=description, comment=comment)
                 except StandardError, e:
                     write_message("('%s', '%s', '%s', '%s', '%s', '%s', '%s') not inserted because format already exists (%s)." % (url, format, docname, doctype, newname, description, comment, e), stream=sys.stderr)
                     raise
             finally:
                 os.remove(tmpurl)
         except Exception, e:
             write_message("Error in downloading '%s' because of: %s" % (url, e), stream=sys.stderr)
             raise
         return True
 
     def _add_new_version(bibdoc, url, format, docname, doctype, newname, description, comment):
         """Adds a new version for a given bibdoc. Returns True when everything's fine."""
         write_message('Add new version to %s url: %s, format: %s, docname: %s, doctype: %s, newname: %s, description: %s, comment: %s' % (repr(bibdoc), url, format, docname, doctype, newname, description, comment))
         try:
             if not url:
                 return _update_description_and_comment(bibdoc, docname, format, description, comment)
             tmpurl = download_url(url, format)
             try:
                 try:
                     bibdoc.add_file_new_version(tmpurl, description=description, comment=comment)
                 except StandardError, e:
                     write_message("('%s', '%s', '%s', '%s', '%s', '%s', '%s') not inserted because '%s'." % (url, format, docname, doctype, newname, description, comment, e), stream=sys.stderr)
                     raise
             finally:
                 os.remove(tmpurl)
         except Exception, e:
             write_message("Error in downloading '%s' because of: %s" % (url, e), stream=sys.stderr)
             raise
         return True
 
     def _update_description_and_comment(bibdoc, docname, format, description, comment):
         """Directly update comments and descriptions."""
         write_message('Just updating description and comment for %s with format %s with description %s and comment %s' % (docname, format, description, comment), verbose=9)
         try:
             bibdoc.set_description(description, format)
             bibdoc.set_comment(comment, format)
         except StandardError, e:
             write_message("('%s', '%s', '%s', '%s') description and comment not updated because '%s'." % (docname, format, description, comment, e))
             raise
         return True
 
     def _add_new_icon(bibdoc, url, restriction):
         """Adds a new icon to an existing bibdoc, replacing the previous one if it exists. If url is empty, just remove the current icon."""
         if not url:
             bibdoc.delete_icon()
         else:
             try:
                 path = urllib2.urlparse.urlsplit(url)[2]
                 filename = os.path.split(path)[-1]
                 format = filename[len(file_strip_ext(filename)):]
                 tmpurl = download_url(url, format)
                 try:
                     try:
                         icondoc = bibdoc.add_icon(tmpurl, 'icon-%s' % bibdoc.get_docname())
                         if restriction and restriction != KEEP_OLD_VALUE:
                             icondoc.set_status(restriction)
                     except StandardError, e:
                         write_message("('%s', '%s') icon not added because '%s'." % (url, format, e), stream=sys.stderr)
                         raise
                 finally:
                     os.remove(tmpurl)
             except Exception, e:
                 write_message("Error in downloading '%s' because of: %s" % (url, e), stream=sys.stderr)
                 raise
         return True
 
     tuple_list = extract_tag_from_record(record, 'FFT')
     if tuple_list: # FFT Tags analysis
         write_message("FFTs: "+str(tuple_list), verbose=9)
         docs = {} # docnames and their data
 
         for fft in record_get_field_instances(record, 'FFT', ' ', ' '):
             # Let's discover the type of the document
             # This is a legacy field and will not be enforced any particular
             # check on it.
             doctype = field_get_subfield_values(fft, 't')
             if doctype:
                 doctype = doctype[0]
             else: # Default is Main
                 doctype = 'Main'
 
             # Let's discover the url.
             url = field_get_subfield_values(fft, 'a')
             if url:
                 url = url[0]
                 try:
                     check_valid_url(url)
                 except StandardError, e:
                     raise StandardError, "fft '%s' specify an url ('%s') with problems: %s" % (fft, url, e)
             else:
                 url = ''
 
             # Let's discover the description
             description = field_get_subfield_values(fft, 'd')
             if description != []:
                 description = description[0]
             else:
                 if mode == 'correct':
                     ## If the user require to correct, and do not specify
                     ## a description this means she really want to
                     ## modify the description.
                     description = ''
                 else:
                     description = KEEP_OLD_VALUE
 
             # Let's discover the desired docname to be created/altered
             name = field_get_subfield_values(fft, 'n')
             if name:
                 name = file_strip_ext(name[0])
             else:
                 if url:
                     name = get_docname_from_url(url)
                 else:
                     raise StandardError, "fft '%s' doesn't specifies neither a url nor a name" % str(fft)
 
             # Let's discover the desired new docname in case we want to change it
             newname = field_get_subfield_values(fft, 'm')
             if newname:
                 newname = file_strip_ext(newname[0])
             else:
                 newname = name
 
             # Let's discover the desired format
             format = field_get_subfield_values(fft, 'f')
             if format:
                 format = format[0]
             else:
                 if url:
                     format = get_format_from_url(url)
                 else:
                     format = ''
 
             format = normalize_format(format)
 
             # Let's discover the icon
             icon = field_get_subfield_values(fft, 'x')
             if icon != []:
                 icon = icon[0]
                 if icon != KEEP_OLD_VALUE:
                     try:
                         check_valid_url(icon)
                     except StandardError, e:
                         raise StandardError, "fft '%s' specify an icon ('%s') with problems: %s" % (fft, icon, e)
             else:
                 if mode == 'correct':
                     ## See comment on description
                     icon = ''
                 else:
                     icon = KEEP_OLD_VALUE
 
             # Let's discover the comment
             comment = field_get_subfield_values(fft, 'z')
             if comment != []:
                 comment = comment[0]
             else:
                 if mode == 'correct':
                     ## See comment on description
                     comment = ''
                 else:
                     comment = KEEP_OLD_VALUE
 
             # Let's discover the restriction
             restriction = field_get_subfield_values(fft, 'r')
             if restriction != []:
                 restriction = restriction[0]
             else:
                 if mode == 'correct':
                     ## See comment on description
                     restriction = ''
                 else:
                     restriction = KEEP_OLD_VALUE
 
             version = field_get_subfield_values(fft, 'v')
             if version:
                 version = version[0]
             else:
                 version = ''
 
             if docs.has_key(name): # new format considered
                 (doctype2, newname2, restriction2, icon2, version2, urls) = docs[name]
                 if doctype2 != doctype:
                     raise StandardError, "fft '%s' specifies a different doctype from previous fft with docname '%s'" % (str(fft), name)
                 if newname2 != newname:
                     raise StandardError, "fft '%s' specifies a different newname from previous fft with docname '%s'" % (str(fft), name)
                 if restriction2 != restriction:
                     raise StandardError, "fft '%s' specifies a different restriction from previous fft with docname '%s'" % (str(fft), name)
                 if icon2 != icon:
                     raise StandardError, "fft '%x' specifies a different icon than the previous fft with docname '%s'" % (str(fft), name)
                 if version2 != version:
                     raise StandardError, "fft '%x' specifies a different version than the previous fft with docname '%s'" % (str(fft), name)
                 for (url2, format2, description2, comment2) in urls:
                     if format == format2:
                         raise StandardError, "fft '%s' specifies a second file '%s' with the same format '%s' from previous fft with docname '%s'" % (str(fft), url, format, name)
                 if url or format:
                     urls.append((url, format, description, comment))
             else:
                 if url or format:
                     docs[name] = (doctype, newname, restriction, icon, version, [(url, format, description, comment)])
                 else:
                     docs[name] = (doctype, newname, restriction, icon, version, [])
 
         write_message('Result of FFT analysis:\n\tDocs: %s' % (docs,), verbose=9)
 
         # Let's remove all FFT tags
         record_delete_field(record, 'FFT', ' ', ' ')
 
         # Preprocessed data elaboration
         bibrecdocs = BibRecDocs(rec_id)
 
         if mode == 'replace': # First we erase previous bibdocs
             for bibdoc in bibrecdocs.list_bibdocs():
                 bibdoc.delete()
             bibrecdocs.build_bibdoc_list()
 
         for docname, (doctype, newname, restriction, icon, version, urls) in docs.iteritems():
             write_message("Elaborating olddocname: '%s', newdocname: '%s', doctype: '%s', restriction: '%s', icon: '%s', urls: '%s', mode: '%s'" % (docname, newname, doctype, restriction, icon, urls, mode), verbose=9)
             if mode in ('insert', 'replace'): # new bibdocs, new docnames, new marc
                 if newname in bibrecdocs.get_bibdoc_names():
                     write_message("('%s', '%s') not inserted because docname already exists." % (newname, urls), stream=sys.stderr)
                     raise StandardError
                 try:
                     bibdoc = bibrecdocs.add_bibdoc(doctype, newname)
                     bibdoc.set_status(restriction)
                 except Exception, e:
                     write_message("('%s', '%s', '%s') not inserted because: '%s'." % (doctype, newname, urls, e), stream=sys.stderr)
                     raise StandardError
                 for (url, format, description, comment) in urls:
                     assert(_add_new_format(bibdoc, url, format, docname, doctype, newname, description, comment))
                 if icon and not icon == KEEP_OLD_VALUE:
                     assert(_add_new_icon(bibdoc, icon, restriction))
             elif mode == 'replace_or_insert': # to be thought as correct_or_insert
                 for bibdoc in bibrecdocs.list_bibdocs():
                     if bibdoc.get_docname() == docname:
                         if doctype not in ('PURGE', 'DELETE', 'EXPUNGE', 'REVERT', 'FIX-ALL', 'FIX-MARC', 'DELETE-FILE'):
                             if newname != docname:
                                 try:
                                     bibdoc.change_name(newname)
                                     icon = bibdoc.get_icon()
                                     if icon:
                                         icon.change_name('icon-%s' % newname)
                                 except StandardError, e:
                                     write_message(e, stream=sys.stderr)
                                     raise
                 found_bibdoc = False
                 for bibdoc in bibrecdocs.list_bibdocs():
                     if bibdoc.get_docname() == newname:
                         found_bibdoc = True
                         if doctype == 'PURGE':
                             bibdoc.purge()
                         elif doctype == 'DELETE':
                             bibdoc.delete()
                         elif doctype == 'EXPUNGE':
                             bibdoc.expunge()
                         elif doctype == 'FIX-ALL':
                             bibrecdocs.fix(docname)
                         elif doctype == 'FIX-MARC':
                             pass
                         elif doctype == 'DELETE-FILE':
                             if urls:
                                 for (url, format, description, comment) in urls:
                                     bibdoc.delete_file(format, version)
                         elif doctype == 'REVERT':
                             try:
                                 bibdoc.revert(version)
                             except Exception, e:
                                 write_message('(%s, %s) not correctly reverted: %s' % (newname, version, e), stream=sys.stderr)
                                 raise
                         else:
                             if restriction != KEEP_OLD_VALUE:
                                 bibdoc.set_status(restriction)
                             # Since the docname already existed we have to first
                             # bump the version by pushing the first new file
                             # then pushing the other files.
                             if urls:
                                 (first_url, first_format, first_description, first_comment) = urls[0]
                                 other_urls = urls[1:]
                                 assert(_add_new_version(bibdoc, first_url, first_format, docname, doctype, newname, first_description, first_comment))
                                 for (url, format, description, comment) in other_urls:
                                     assert(_add_new_format(bibdoc, url, format, docname, doctype, newname, description, comment))
                         if icon != KEEP_OLD_VALUE:
                             assert(_add_new_icon(bibdoc, icon, restriction))
                 if not found_bibdoc:
                     bibdoc = bibrecdocs.add_bibdoc(doctype, newname)
                     for (url, format, description, comment) in urls:
                         assert(_add_new_format(bibdoc, url, format, docname, doctype, newname, description, comment))
                     if icon and not icon == KEEP_OLD_VALUE:
                         assert(_add_new_icon(bibdoc, icon, restriction))
             elif mode == 'correct':
                 for bibdoc in bibrecdocs.list_bibdocs():
                     if bibdoc.get_docname() == docname:
                         if doctype not in ('PURGE', 'DELETE', 'EXPUNGE', 'REVERT', 'FIX-ALL', 'FIX-MARC', 'DELETE-FILE'):
                             if newname != docname:
                                 try:
                                     bibdoc.change_name(newname)
                                     icon = bibdoc.get_icon()
                                     if icon:
                                         icon.change_name('icon-%s' % newname)
                                 except StandardError, e:
                                     write_message('Error in renaming %s to %s: %s' % (docname, newname, e), stream=sys.stderr)
                                     raise
                 found_bibdoc = False
                 for bibdoc in bibrecdocs.list_bibdocs():
                     if bibdoc.get_docname() == newname:
                         found_bibdoc = True
                         if doctype == 'PURGE':
                             bibdoc.purge()
                         elif doctype == 'DELETE':
                             bibdoc.delete()
                         elif doctype == 'EXPUNGE':
                             bibdoc.expunge()
                         elif doctype == 'FIX-ALL':
                             bibrecdocs.fix(newname)
                         elif doctype == 'FIX-MARC':
                             pass
                         elif doctype == 'DELETE-FILE':
                             if urls:
                                 for (url, format, description, comment) in urls:
                                     bibdoc.delete_file(format, version)
                         elif doctype == 'REVERT':
                             try:
                                 bibdoc.revert(version)
                             except Exception, e:
                                 write_message('(%s, %s) not correctly reverted: %s' % (newname, version, e), stream=sys.stderr)
                                 raise
                         else:
                             if restriction != KEEP_OLD_VALUE:
                                 bibdoc.set_status(restriction)
                             if urls:
                                 (first_url, first_format, first_description, first_comment) = urls[0]
                                 other_urls = urls[1:]
                                 assert(_add_new_version(bibdoc, first_url, first_format, docname, doctype, newname, first_description, first_comment))
                                 for (url, format, description, comment) in other_urls:
                                     assert(_add_new_format(bibdoc, url, format, docname, description, doctype, newname, description, comment))
                         if icon != KEEP_OLD_VALUE:
                             _add_new_icon(bibdoc, icon, restriction)
                 if not found_bibdoc:
                     write_message("('%s', '%s', '%s') not added because '%s' docname didn't existed." % (doctype, newname, urls, docname), stream=sys.stderr)
                     raise StandardError
             elif mode == 'append':
                 try:
                     found_bibdoc = False
                     for bibdoc in bibrecdocs.list_bibdocs():
                         if bibdoc.get_docname() == docname:
                             found_bibdoc = True
                             for (url, format, description, comment) in urls:
                                 assert(_add_new_format(bibdoc, url, format, docname, doctype, newname, description, comment))
                             if icon not in ('', KEEP_OLD_VALUE):
                                 assert(_add_new_icon(bibdoc, icon, restriction))
                     if not found_bibdoc:
                         try:
                             bibdoc = bibrecdocs.add_bibdoc(doctype, docname)
                             bibdoc.set_status(restriction)
                             for (url, format, description, comment) in urls:
                                 assert(_add_new_format(bibdoc, url, format, docname, doctype, newname, description, comment))
                             if icon and not icon == KEEP_OLD_VALUE:
                                 assert(_add_new_icon(bibdoc, icon, restriction))
                         except Exception, e:
                             register_exception()
                             write_message("('%s', '%s', '%s') not appended because: '%s'." % (doctype, newname, urls, e), stream=sys.stderr)
                             raise
                 except:
                     register_exception()
                     raise
     return record
 
 def insert_fmt_tags(record, rec_id, opt_mode):
     """Process and insert FMT tags"""
 
     fmt_fields = record_get_field_instances(record, 'FMT')
     if fmt_fields:
         for fmt_field in fmt_fields:
             # Get the d, f, g subfields of the FMT tag
             try:
                 d_value = field_get_subfield_values(fmt_field, "d")[0]
             except IndexError:
                 d_value = ""
             try:
                 f_value = field_get_subfield_values(fmt_field, "f")[0]
             except IndexError:
                 f_value = ""
             try:
                 g_value = field_get_subfield_values(fmt_field, "g")[0]
             except IndexError:
                 g_value = ""
             # Update the format
             res = update_bibfmt_format(rec_id, g_value, f_value, d_value)
             if res == 1:
                 write_message("   Failed: Error during update_bibfmt", verbose=1, stream=sys.stderr)
 
         # If we are in format mode, we only care about the FMT tag
         if opt_mode == 'format':
             return 0
         # We delete the FMT Tag of the record
         record_delete_field(record, 'FMT')
         write_message("   -Delete field FMT from record : DONE", verbose=2)
         return record
 
     elif opt_mode == 'format':
         write_message("   Failed: Format updated failed : No tag FMT found", verbose=1, stream=sys.stderr)
         return None
     else:
         return record
 
 
 ### Update functions
 
 def update_bibrec_modif_date(now, bibrec_id):
     """Update the date of the record in bibrec table """
     query = """UPDATE bibrec SET modification_date=%s WHERE id=%s"""
     params = (now, bibrec_id)
     try:
         run_sql(query, params)
         write_message("   -Update record modification date : DONE" , verbose=2)
     except Error, error:
         write_message("   Error during update_bibrec_modif_date function : %s" % error,
                       verbose=1, stream=sys.stderr)
 
 def update_bibfmt_format(id_bibrec, format_value, format_name, modification_date=None):
     """Update the format in the table bibfmt"""
     if modification_date is None:
         modification_date = time.strftime('%Y-%m-%d %H:%M:%S')
     else:
         try:
             time.strptime(modification_date, "%Y-%m-%d %H:%M:%S")
         except ValueError:
             modification_date = '1970-01-01 00:00:00'
 
     # We check if the format is already in bibFmt
     nb_found = find_record_format(id_bibrec, format_name)
     if nb_found == 1:
         # we are going to update the format
         # compress the format_value value
         pickled_format_value =  compress(format_value)
         # update the format:
         query = """UPDATE bibfmt SET last_updated=%s, value=%s WHERE id_bibrec=%s AND format=%s"""
         params = (modification_date, pickled_format_value, id_bibrec, format_name)
         try:
             row_id  = run_sql(query, params)
             if row_id is None:
                 write_message("   Failed: Error during update_bibfmt_format function", verbose=1, stream=sys.stderr)
                 return 1
             else:
                 write_message("   -Update the format %s in bibfmt : DONE" % format_name , verbose=2)
                 return 0
         except Error, error:
             write_message("   Error during the update_bibfmt_format function : %s " % error, verbose=1, stream=sys.stderr)
 
     elif nb_found > 1:
         write_message("   Failed: Same format %s found several time in bibfmt for the same record." % format_name, verbose=1, stream=sys.stderr)
         return 1
     else:
         # Insert the format information in BibFMT
         res = insert_bibfmt(id_bibrec, format_value, format_name, modification_date)
         if res is None:
             write_message("   Failed: Error during insert_bibfmt", verbose=1, stream=sys.stderr)
             return 1
         else:
             write_message("   -Insert the format %s in bibfmt : DONE" % format_name , verbose=2)
             return 0
 
 def archive_marcxml_for_history(recID):
     """
     Archive current MARCXML format of record RECID from BIBFMT table
     into hstRECORD table.  Useful to keep MARCXML history of records.
 
     Return 0 if everything went fine.  Return 1 otherwise.
     """
     try:
         res = run_sql("SELECT id_bibrec, value, last_updated FROM bibfmt WHERE format='xm' AND id_bibrec=%s",
                       (recID,))
         if res:
             run_sql("""INSERT INTO hstRECORD (id_bibrec, marcxml, job_id, job_name, job_person, job_date, job_details)
                                       VALUES (%s,%s,%s,%s,%s,%s,%s)""",
                     (res[0][0], res[0][1], task_get_task_param('task_id', 0), 'bibupload', task_get_task_param('user','UNKNOWN'), res[0][2],
                      'mode: ' + task_get_option('mode','UNKNOWN') + '; file: ' + task_get_option('file_path','UNKNOWN') + '.'))
     except Error, error:
         write_message("   Error during archive_marcxml_for_history: %s " % error,
                       verbose=1, stream=sys.stderr)
         return 1
     return 0
 
 def update_database_with_metadata(record, rec_id):
     """Update the database tables with the record and the record id given in parameter"""
     for tag in record.keys():
         # check if tag is not a special one:
         if tag not in CFG_BIBUPLOAD_SPECIAL_TAGS:
             # for each tag there is a list of tuples representing datafields
             tuple_list = record[tag]
             # this list should contain the elements of a full tag [tag, ind1, ind2, subfield_code]
             tag_list = []
             tag_list.append(tag)
             for single_tuple in tuple_list:
                 # these are the contents of a single tuple
                 subfield_list = single_tuple[0]
                 ind1 = single_tuple[1]
                 ind2 = single_tuple[2]
                 # append the ind's to the full tag
                 if ind1 == '' or ind1 == ' ':
                     tag_list.append('_')
                 else:
                     tag_list.append(ind1)
                 if ind2 == '' or ind2 == ' ':
                     tag_list.append('_')
                 else:
                     tag_list.append(ind2)
                 datafield_number = single_tuple[4]
 
                 if tag in CFG_BIBUPLOAD_SPECIAL_TAGS:
                     # nothing to do for special tags (FFT, FMT)
                     pass
                 elif tag in CFG_BIBUPLOAD_CONTROLFIELD_TAGS and tag != "001":
                     value = single_tuple[3]
                     # get the full tag
                     full_tag = ''.join(tag_list)
 
                     # update the tables
                     write_message("   insertion of the tag "+full_tag+" with the value "+value, verbose=9)
                     # insert the tag and value into into bibxxx
                     (table_name, bibxxx_row_id) = insert_record_bibxxx(full_tag, value)
                     #print 'tname, bibrow', table_name, bibxxx_row_id;
                     if table_name is None or bibxxx_row_id is None:
                         write_message("   Failed : during insert_record_bibxxx", verbose=1, stream=sys.stderr)
                     # connect bibxxx and bibrec with the table bibrec_bibxxx
                     res = insert_record_bibrec_bibxxx(table_name, bibxxx_row_id, datafield_number, rec_id)
                     if res is None:
                         write_message("   Failed : during insert_record_bibrec_bibxxx", verbose=1, stream=sys.stderr)
                 else:
                     # get the tag and value from the content of each subfield
                     for subfield in subfield_list:
                         subtag = subfield[0]
                         value = subfield[1]
                         tag_list.append(subtag)
                         # get the full tag
                         full_tag = ''.join(tag_list)
                         # update the tables
                         write_message("   insertion of the tag "+full_tag+" with the value "+value, verbose=9)
                         # insert the tag and value into into bibxxx
                         (table_name, bibxxx_row_id) = insert_record_bibxxx(full_tag, value)
                         if table_name is None or bibxxx_row_id is None:
                             write_message("   Failed : during insert_record_bibxxx", verbose=1, stream=sys.stderr)
                         # connect bibxxx and bibrec with the table bibrec_bibxxx
                         res = insert_record_bibrec_bibxxx(table_name, bibxxx_row_id, datafield_number, rec_id)
                         if res is None:
                             write_message("   Failed : during insert_record_bibrec_bibxxx", verbose=1, stream=sys.stderr)
                         # remove the subtag from the list
                         tag_list.pop()
                 tag_list.pop()
                 tag_list.pop()
             tag_list.pop()
     write_message("   -Update the database with metadata : DONE", verbose=2)
 
 def append_new_tag_to_old_record(record, rec_old, opt_tag, opt_mode):
     """Append new tags to a old record"""
     if opt_tag is not None:
         tag = opt_tag
         if tag in CFG_BIBUPLOAD_CONTROLFIELD_TAGS:
             if tag == '001':
                 pass
             else:
                 # if it is a controlfield,just access the value
                 for single_tuple in record[tag]:
                     controlfield_value = single_tuple[3]
                     # add the field to the old record
                     newfield_number = record_add_field(rec_old, tag, "", "", controlfield_value)
                     if newfield_number is None:
                         write_message("   Error when adding the field"+tag, verbose=1, stream=sys.stderr)
         else:
             # For each tag there is a list of tuples representing datafields
             for single_tuple in record[tag]:
                 # We retrieve the information of the tag
                 subfield_list = single_tuple[0]
                 ind1 = single_tuple[1]
                 ind2 = single_tuple[2]
                 # We add the datafield to the old record
                 write_message("      Adding tag: %s ind1=%s ind2=%s code=%s" % (tag, ind1, ind2, subfield_list), verbose=9)
                 newfield_number = record_add_field(rec_old, tag, ind1, ind2, "", subfield_list)
                 if newfield_number is None:
                     write_message("Error when adding the field"+tag, verbose=1, stream=sys.stderr)
     else:
         # Go through each tag in the appended record
         for tag in record.keys():
             # Reference mode append only reference tag
             if opt_mode == 'reference':
                 if tag == CFG_BIBUPLOAD_REFERENCE_TAG:
                     for single_tuple in record[tag]:
                         # We retrieve the information of the tag
                         subfield_list = single_tuple[0]
                         ind1 = single_tuple[1]
                         ind2 = single_tuple[2]
                         # We add the datafield to the old record
                         write_message("      Adding tag: %s ind1=%s ind2=%s code=%s" % (tag, ind1, ind2, subfield_list), verbose=9)
                         newfield_number = record_add_field(rec_old, tag, ind1, ind2, "", subfield_list)
                         if newfield_number is None:
                             write_message("   Error when adding the field"+tag, verbose=1, stream=sys.stderr)
             else:
                 if tag in CFG_BIBUPLOAD_CONTROLFIELD_TAGS:
                     if tag == '001':
                         pass
                     else:
                         # if it is a controlfield,just access the value
                         for single_tuple in record[tag]:
                             controlfield_value = single_tuple[3]
                             # add the field to the old record
                             newfield_number = record_add_field(rec_old, tag, "", "", controlfield_value)
                             if newfield_number is None:
                                 write_message("   Error when adding the field"+tag, verbose=1, stream=sys.stderr)
                 else:
                     # For each tag there is a list of tuples representing datafields
                     for single_tuple in record[tag]:
                         # We retrieve the information of the tag
                         subfield_list = single_tuple[0]
                         ind1 = single_tuple[1]
                         ind2 = single_tuple[2]
                         # We add the datafield to the old record
                         write_message("      Adding tag: %s ind1=%s ind2=%s code=%s" % (tag, ind1, ind2, subfield_list), verbose=9)
                         newfield_number = record_add_field(rec_old, tag, ind1, ind2, "", subfield_list)
                         if newfield_number is None:
                             write_message("   Error when adding the field"+tag, verbose=1, stream=sys.stderr)
     return rec_old
 
 def copy_strong_tags_from_old_record(record, rec_old):
     """
     Look for strong tags in RECORD and REC_OLD.  If no strong tags are
     found in RECORD, then copy them over from REC_OLD.  This function
     modifies RECORD structure on the spot.
     """
     for strong_tag in CFG_BIBUPLOAD_STRONG_TAGS:
         if not record_get_field_instances(record, strong_tag):
             strong_tag_old_field_instances = record_get_field_instances(rec_old, strong_tag)
             if strong_tag_old_field_instances:
                 for strong_tag_old_field_instance in strong_tag_old_field_instances:
                     sf_vals, fi_ind1, fi_ind2, controlfield, dummy = strong_tag_old_field_instance
                     record_add_field(record, strong_tag, fi_ind1, fi_ind2, controlfield, sf_vals)
     return
 
 ### Delete functions
 
 def delete_tags_to_correct(record, rec_old, opt_tag):
     """
     Delete tags from REC_OLD which are also existing in RECORD.  When
     deleting, pay attention not only to tags, but also to indicators,
     so that fields with the same tags but different indicators are not
     deleted.
     """
     # browse through all the tags from the MARCXML file:
     for tag in record.keys():
         # do we have to delete only a special tag or any tag?
         if opt_tag is None or opt_tag == tag:
             # check if the tag exists in the old record too:
             if rec_old.has_key(tag) and tag != '001':
                 # the tag does exist, so delete all record's tag+ind1+ind2 combinations from rec_old
                 for dummy_sf_vals, ind1, ind2, dummy_cf, dummy_field_number in record[tag]:
                     write_message("      Delete tag: " + tag + " ind1=" + ind1 + " ind2=" + ind2, verbose=9)
                     record_delete_field(rec_old, tag, ind1, ind2)
 
 def delete_bibrec_bibxxx(record, id_bibrec):
     """Delete the database record from the table bibxxx given in parameters"""
     # we clear all the rows from bibrec_bibxxx from the old record
     for tag in record.keys():
         if tag not in CFG_BIBUPLOAD_SPECIAL_TAGS:
             # for each name construct the bibrec_bibxxx table name
             table_name = 'bibrec_bib'+tag[0:2]+'x'
             # delete all the records with proper id_bibrec
             query = """DELETE FROM `%s` where id_bibrec = %s"""
             params = (table_name, id_bibrec)
             try:
                 run_sql(query % params)
             except Error, error:
                 write_message("   Error during the delete_bibrec_bibxxx function : %s " % error, verbose=1, stream=sys.stderr)
 
 def wipe_out_record_from_all_tables(recid):
     """
     Wipe out completely the record and all its traces of RECID from
     the database (bibrec, bibrec_bibxxx, bibxxx, bibfmt).  Useful for
     the time being for test cases.
     """
     # delete all the linked bibdocs
     for bibdoc in BibRecDocs(recid).list_bibdocs():
         bibdoc.expunge()
     # delete from bibrec:
     run_sql("DELETE FROM bibrec WHERE id=%s", (recid,))
     # delete from bibrec_bibxxx:
     for i in range(0, 10):
         for j in range(0, 10):
             run_sql("DELETE FROM %(bibrec_bibxxx)s WHERE id_bibrec=%%s" % \
                     {'bibrec_bibxxx': "bibrec_bib%i%ix" % (i, j)},
                     (recid,))
     # delete all unused bibxxx values:
     for i in range(0, 10):
         for j in range(0, 10):
             run_sql("DELETE %(bibxxx)s FROM %(bibxxx)s " \
                     " LEFT JOIN %(bibrec_bibxxx)s " \
                     " ON %(bibxxx)s.id=%(bibrec_bibxxx)s.id_bibxxx " \
                     " WHERE %(bibrec_bibxxx)s.id_bibrec IS NULL" % \
                     {'bibxxx': "bib%i%ix" % (i, j),
                      'bibrec_bibxxx': "bibrec_bib%i%ix" % (i, j)})
     # delete from bibfmt:
     run_sql("DELETE FROM bibfmt WHERE id_bibrec=%s", (recid,))
     # delete from bibrec_bibdoc:
     run_sql("DELETE FROM bibrec_bibdoc WHERE id_bibrec=%s", (recid,))
     return
 
 def delete_bibdoc(id_bibrec):
     """Delete document from bibdoc which correspond to the bibrec id given in parameter"""
     query = """UPDATE bibdoc SET status='DELETED'
                 WHERE id IN (SELECT id_bibdoc FROM bibrec_bibdoc
                               WHERE id_bibrec=%s)"""
     params = (id_bibrec,)
     try:
         run_sql(query, params)
     except Error, error:
         write_message("   Error during the delete_bibdoc function : %s " % error,
                       verbose=1, stream=sys.stderr)
 
 def delete_bibrec_bibdoc(id_bibrec):
     """Delete the bibrec record from the table bibrec_bibdoc given in parameter"""
     # delete all the records with proper id_bibrec
     query = """DELETE FROM bibrec_bibdoc WHERE id_bibrec=%s"""
     params = (id_bibrec,)
     try:
         run_sql(query, params)
     except Error, error:
         write_message("   Error during the delete_bibrec_bibdoc function : %s " % error,
                       verbose=1, stream=sys.stderr)
 def main():
     """Main that construct all the bibtask."""
     task_init(authorization_action='runbibupload',
             authorization_msg="BibUpload Task Submission",
             description="""Receive MARC XML file and update appropriate database
 tables according to options.
 Examples:
     $ bibupload -i input.xml
 """,
             help_specific_usage="""  -a, --append\t\tnew fields are appended to the existing record
   -c, --correct\t\tfields are replaced by the new ones in the existing record
   -f, --format\t\ttakes only the FMT fields into account. Does not update
   -i, --insert\t\tinsert the new record in the database
   -r, --replace\t\tthe existing record is entirely replaced by the new one
   -z, --reference\tupdate references (update only 999 fields)
   -S, --stage=STAGE\tstage to start from in the algorithm (0: always done; 1: FMT tags;
 \t\t\t2: FFT tags; 3: BibFmt; 4: Metadata update; 5: time update)
   -n, --notimechange\tdo not change record last modification date when updating
 """,
             version=__revision__,
             specific_params=("ircazS:fn",
                  [
                    "insert",
                    "replace",
                    "correct",
                    "append",
                    "reference",
                    "stage=",
                    "format",
                    "notimechange",
                  ]),
             task_submit_elaborate_specific_parameter_fnc=task_submit_elaborate_specific_parameter,
             task_run_fnc=task_run_core)
 
 def task_submit_elaborate_specific_parameter(key, value, opts, args):
     """ Given the string key it checks it's meaning, eventually using the
     value. Usually it fills some key in the options dict.
     It must return True if it has elaborated the key, False, if it doesn't
     know that key.
     eg:
     if key in ['-n', '--number']:
         task_get_option(\1) = value
         return True
     return False
     """
 
     # No time change option
     if key in ("-n", "--notimechange"):
         task_set_option('notimechange', 1)
 
     # Insert mode option
     elif key in ("-i", "--insert"):
         if task_get_option('mode') == 'replace':
             # if also replace found, then set to replace_or_insert
             task_set_option('mode', 'replace_or_insert')
         else:
             task_set_option('mode', 'insert')
         fix_argv_paths([args[0]])
         task_set_option('file_path', os.path.realpath(args[0]))
 
     # Replace mode option
     elif key in ("-r", "--replace"):
         if task_get_option('mode') == 'insert':
             # if also insert found, then set to replace_or_insert
             task_set_option('mode', 'replace_or_insert')
         else:
             task_set_option('mode', 'replace')
         fix_argv_paths([args[0]])
         task_set_option('file_path', os.path.realpath(args[0]))
 
     # Correct mode option
     elif key in ("-c", "--correct"):
         task_set_option('mode', 'correct')
         fix_argv_paths([args[0]])
         task_set_option('file_path', os.path.realpath(args[0]))
 
     # Append mode option
     elif key in ("-a", "--append"):
         task_set_option('mode', 'append')
         fix_argv_paths([args[0]])
         task_set_option('file_path', os.path.realpath(args[0]))
 
     # Reference mode option
     elif key in ("-z", "--reference"):
         task_set_option('mode', 'reference')
         fix_argv_paths([args[0]])
         task_set_option('file_path', os.path.realpath(args[0]))
 
     # Format mode option
     elif key in ("-f", "--format"):
         task_set_option('mode', 'format')
         fix_argv_paths([args[0]])
         task_set_option('file_path', os.path.realpath(args[0]))
 
     # Stage
     elif key in ("-S", "--stage"):
         try:
             value = int(value)
         except ValueError:
             print >> sys.stderr, """The value specified for --stage must be a valid integer, not %s""" % value
             return False
         if not (0 <= value <= 5):
             print >> sys.stderr, """The value specified for --stage must be comprised between 0 and 5"""
             return False
         task_set_option('stage_to_start_from', value)
 
     else:
         return False
     return True
 
 
 def task_submit_check_options():
     """ Reimplement this method for having the possibility to check options
     before submitting the task, in order for example to provide default
     values. It must return False if there are errors in the options.
     """
     if task_get_option('mode') is None:
         write_message("Please specify at least one update/insert mode!")
         return False
 
     if task_get_option('file_path') is None:
         write_message("Missing filename! -h for help.")
         return False
     return True
 
 def writing_rights_p():
     """Return True in case bibupload has the proper rights to write in the
     fulltext file folder."""
     filename = os.path.join(CFG_WEBSUBMIT_FILEDIR, 'test.txt')
     try:
         if not os.path.exists(CFG_WEBSUBMIT_FILEDIR):
             os.makedirs(CFG_WEBSUBMIT_FILEDIR)
         open(filename, 'w').write('TEST')
         assert(open(filename).read() == 'TEST')
         os.remove(filename)
     except:
         register_exception()
         return False
     return True
 
 def task_run_core():
     """ Reimplement to add the body of the task."""
     error = 0
     write_message("Input file '%s', input mode '%s'." %
             (task_get_option('file_path'), task_get_option('mode')))
     write_message("STAGE 0:", verbose=2)
 
     if task_get_option('file_path') is not None:
         recs = xml_marc_to_records(open_marc_file(task_get_option('file_path')))
         stat['nb_records_to_upload'] = len(recs)
         write_message("   -Open XML marc: DONE", verbose=2)
         task_sleep_now_if_required(can_stop_too=True)
         if recs is not None:
             # We proceed each record by record
             for record in recs:
                 task_sleep_now_if_required(can_stop_too=True)
                 error = bibupload(
                     record,
                     opt_tag=task_get_option('tag'),
                     opt_mode=task_get_option('mode'),
                     opt_stage_to_start_from=task_get_option('stage_to_start_from'),
                     opt_notimechange=task_get_option('notimechange'))
                 if error[0] == 1:
                     if record:
                         write_message(record_xml_output(record),
                             stream=sys.stderr)
                     else:
                         write_message("Record could not have been parsed",
                             stream=sys.stderr)
                     stat['nb_errors'] += 1
                 elif error[0] == 2:
                     if record:
                         write_message(record_xml_output(record),
                             stream=sys.stderr)
                     else:
                         write_message("Record could not have been parsed",
                             stream=sys.stderr)
 
                 task_update_progress("Done %d out of %d." % \
                                     (stat['nb_records_inserted'] + \
                                     stat['nb_records_updated'],
                                     stat['nb_records_to_upload']))
         else:
             write_message("   Error bibupload failed: No record found",
                         verbose=1, stream=sys.stderr)
 
     if task_get_task_param('verbose') >= 1:
         # Print out the statistics
         print_out_bibupload_statistics()
 
     # Check if they were errors
     return not stat['nb_errors'] >= 1
 
 if __name__ == "__main__":
     main()
diff --git a/modules/websubmit/lib/bibdocfile.py b/modules/websubmit/lib/bibdocfile.py
index 5e65a733e..0689771ca 100644
--- a/modules/websubmit/lib/bibdocfile.py
+++ b/modules/websubmit/lib/bibdocfile.py
@@ -1,1875 +1,1899 @@
 ## $Id$
 
 ## This file is part of CDS Invenio.
 ## Copyright (C) 2002, 2003, 2004, 2005, 2006, 2007, 2008 CERN.
 ##
 ## CDS Invenio is free software; you can redistribute it and/or
 ## modify it under the terms of the GNU General Public License as
 ## published by the Free Software Foundation; either version 2 of the
 ## License, or (at your option) any later version.
 ##
 ## CDS Invenio is distributed in the hope that it will be useful, but
 ## WITHOUT ANY WARRANTY; without even the implied warranty of
 ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 ## General Public License for more details.
 ##
 ## You should have received a copy of the GNU General Public License
 ## along with CDS Invenio; if not, write to the Free Software Foundation, Inc.,
 ## 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA.
 
 __revision__ = "$Id$"
 
 import os
 import re
 import shutil
 import md5
 import filecmp
 import time
 import socket
 import urllib2
 import urllib
 import tempfile
 import cPickle
 from datetime import datetime
 from xml.sax.saxutils import quoteattr
 from mimetypes import MimeTypes
 
 ## Let's set a reasonable timeout for URL request (e.g. FFT)
 socket.setdefaulttimeout(40)
 
 try:
     set
 except NameError:
     from sets import Set as set
 
 from invenio.shellutils import run_shell_command, escape_shell_arg
 from invenio.dbquery import run_sql, DatabaseError, blob_to_string
 from invenio.errorlib import register_exception
 from invenio.bibrecord import create_record, record_get_field_instances, \
     field_get_subfield_values, field_get_subfield_instances, \
     encode_for_xml
 from invenio.access_control_engine import acc_authorize_action
 from invenio.config import CFG_SITE_LANG, CFG_SITE_URL, CFG_SITE_URL, \
     CFG_WEBDIR, CFG_WEBSUBMIT_FILEDIR,\
     CFG_WEBSUBMIT_ADDITIONAL_KNOWN_FILE_EXTENSIONS, \
     CFG_WEBSUBMIT_FILESYSTEM_BIBDOC_GROUP_LIMIT, CFG_SITE_SECURE_URL, \
     CFG_BIBUPLOAD_FFT_ALLOWED_LOCAL_PATHS, \
-    CFG_TMPDIR, CFG_PATH_WGET
+    CFG_TMPDIR, CFG_PATH_WGET, CFG_PATH_MD5SUM
 from invenio.bibformat import format_record
 import invenio.template
 websubmit_templates = invenio.template.load('websubmit')
 websearch_templates = invenio.template.load('websearch')
 
 CFG_BIBDOCFILE_MD5_THRESHOLD = 256 * 1024
 CFG_BIBDOCFILE_MD5_BUFFER = 1024 * 1024
-CFG_BIBDOCFILE_MD5SUM_EXISTS = os.system('which md5sum 2>&1 > /dev/null') == 0
+CFG_BIBDOCFILE_STRONG_FORMAT_NORMALIZATION = False
 
 KEEP_OLD_VALUE = 'KEEP-OLD-VALUE'
 
 _mimes = MimeTypes()
 _mimes.suffix_map.update({'.tbz2' : '.tar.bz2'})
 _mimes.encodings_map.update({'.bz2' : 'bzip2'})
 _extensions = _mimes.encodings_map.keys() + \
               _mimes.suffix_map.keys() + \
               _mimes.types_map[1].keys() + \
               CFG_WEBSUBMIT_ADDITIONAL_KNOWN_FILE_EXTENSIONS
 _extensions.sort()
 _extensions.reverse()
 _extensions = set([ext.lower() for ext in _extensions])
 
 class InvenioWebSubmitFileError(Exception):
     pass
 
 def file_strip_ext(afile):
     """Strip in the best way the extension from a filename"""
     lowfile = afile.lower()
     ext = '.'
     while ext:
         ext = ''
         for c_ext in _extensions:
             if lowfile.endswith(c_ext):
                 lowfile = lowfile[0:-len(c_ext)]
                 ext = c_ext
                 break
     return afile[:len(lowfile)]
 
 def normalize_format(format):
     """Normalize the format."""
-    #format = format.lower()
     if format and format[0] != '.':
         format = '.' + format
-    #format = format.replace('.jpg', '.jpeg')
+    if CFG_BIBDOCFILE_STRONG_FORMAT_NORMALIZATION:
+        if format not in ('.Z', '.H', '.C', '.CC'):
+            format = format.lower()
+        format = format.replace('.jpg', '.jpeg')
     return format
 
 _docname_re = re.compile(r'[^-\w.]*')
 def normalize_docname(docname):
     """Normalize the docname (only digit and alphabetic letters and underscore are allowed)"""
     #return _docname_re.sub('', docname)
     return docname
 
 def normalize_version(version):
     """Normalize the version."""
     try:
         int(version)
     except ValueError:
         if version.lower().strip() == 'all':
             return 'all'
         else:
             return ''
     return str(version)
 
 _path_re = re.compile(r'.*[\\/:]')
 def decompose_file(afile):
     """Decompose a file into dirname, basename and extension"""
     basename = _path_re.sub('', afile)
     dirname = afile[:-len(basename)-1]
     base = file_strip_ext(basename)
     extension = basename[len(base) + 1:]
     if extension:
         extension = '.' + extension
     return (dirname, base, extension)
 
 def propose_unique_name(afile, use_version=False):
     """Propose a unique name, taking in account the version"""
     if use_version:
         version = ';'+re.sub('.*;', '', afile)
         afile = afile[:-len(version)]
     else:
         version = ''
     (basedir, basename, extension) = decompose_file(afile)
     goodname = "%s%s%s" % (basename, extension, version)
     i = 1
     listdir = os.listdir(basedir)
     while goodname in listdir:
         i += 1
         goodname = "%s_%s%s%s" % (basename, i, extension, version)
     return "%s/%s" % (basedir, goodname)
 
 class BibRecDocs:
     """this class represents all the files attached to one record"""
     def __init__(self, recid, deleted_too=False):
         self.id = recid
         self.deleted_too = deleted_too
         self.bibdocs = []
         self.build_bibdoc_list()
 
     def __repr__(self):
         if self.deleted_too:
             return 'BibRecDocs(%s, True)' % self.id
         else:
             return 'BibRecDocs(%s)' % self.id
 
     def __str__(self):
         out = '%i::::total bibdocs attached=%i\n' % (self.id, len(self.bibdocs))
         out += '%i::::total size latest version=%s\n' % (self.id, nice_size(self.get_total_size_latest_version()))
         out += '%i::::total size all files=%s\n' % (self.id, nice_size(self.get_total_size()))
         for bibdoc in self.bibdocs:
             out += str(bibdoc)
         return out
 
     def empty_p(self):
         """Return True if the bibrec is empty, i.e. it has no bibdocs
         connected."""
         return len(self.bibdocs) == 0
 
     def deleted_p(self):
         """Return True if the bibrec has been deleted."""
         from invenio.search_engine import record_exists
         return record_exists(self.id) == -1
 
     def get_xml_8564(self):
         """Return a snippet of XML representing the 8564 corresponding to the
         current state"""
         out = ''
         xml = format_record(self.id, of='xm')
         record = create_record(xml)[0]
         fields = record_get_field_instances(record, '856', '4', ' ')
         for field in fields:
             url = field_get_subfield_values(field, 'u')
             if not bibdocfile_url_p(url):
                 out += '\t<datafield tag="856" ind1="4" ind2=" ">\n'
                 for subfield, value in field_get_subfield_instances(field):
                     out += '\t\t<subfield code="%s">%s</subfield>\n' % (subfield, encode_for_xml(value))
                 out += '\t</datafield>\n'
 
         for afile in self.list_latest_files():
             out += '\t<datafield tag="856" ind1="4" ind2=" ">\n'
             url = afile.get_url()
             description = afile.get_description()
             comment = afile.get_comment()
             if url:
                 out += '\t\t<subfield code="u">%s</subfield>\n' % encode_for_xml(url)
             if description:
                 out += '\t\t<subfield code="y">%s</subfield>\n' % encode_for_xml(description)
             if comment:
                 out += '\t\t<subfield code="z">%s</subfield>\n' % encode_for_xml(comment)
             out += '\t</datafield>\n'
 
         for bibdoc in self.bibdocs:
             icon = bibdoc.get_icon()
             if icon:
                 icon = icon.list_all_files()
                 if icon:
                     out += '\t<datafield tag="856" ind1="4" ind2=" ">\n'
                     out += '\t\t<subfield code="q">%s</subfield>\n' % encode_for_xml(icon[0].get_url())
                     out += '\t\t<subfield code="x">icon</subfield>\n'
                     out += '\t</datafield>\n'
 
         return out
 
     def get_total_size_latest_version(self):
         """Return the total size used on disk of all the files belonging
         to this record and corresponding to the latest version."""
         size = 0
         for bibdoc in self.bibdocs:
             size += bibdoc.get_total_size_latest_version()
         return size
 
     def get_total_size(self):
         """Return the total size used on disk of all the files belonging
         to this record of any version."""
         size = 0
         for bibdoc in self.bibdocs:
             size += bibdoc.get_total_size()
         return size
 
     def build_bibdoc_list(self):
         """This function must be called everytime a bibdoc connected to this
         recid is added, removed or modified.
         """
         self.bibdocs = []
         if self.deleted_too:
             res = run_sql("""SELECT id_bibdoc, type FROM bibrec_bibdoc JOIN
                          bibdoc ON id=id_bibdoc WHERE id_bibrec=%s
                          ORDER BY docname ASC""", (self.id,))
         else:
             res = run_sql("""SELECT id_bibdoc, type FROM bibrec_bibdoc JOIN
                          bibdoc ON id=id_bibdoc WHERE id_bibrec=%s AND
                          status<>'DELETED' ORDER BY docname ASC""", (self.id,))
         for row in res:
             cur_doc = BibDoc(docid=row[0], recid=self.id, doctype=row[1])
             self.bibdocs.append(cur_doc)
 
     def list_bibdocs(self, doctype=''):
         """Returns the list all bibdocs object belonging to a recid.
         If doctype is set, it returns just the bibdocs of that doctype.
         """
         if not doctype:
             return self.bibdocs
         else:
             return [bibdoc for bibdoc in self.bibdocs if doctype == bibdoc.doctype]
 
     def get_bibdoc_names(self, doctype=''):
         """Returns the names of the files associated with the bibdoc of a
         paritcular doctype"""
         return [bibdoc.docname for bibdoc in self.list_bibdocs(doctype)]
 
     def check_file_exists(self, path):
         """Returns 1 if the recid has a file identical to the one stored in path."""
         size = os.path.getsize(path)
 
         # Let's consider all the latest files
         files = self.list_latest_files()
 
         # Let's consider all the latest files with same size
         potential = [afile for afile in files if afile.get_size() == size]
 
         if potential:
             checksum = calculate_md5(path)
 
             # Let's consider all the latest files with the same size and the
             # same checksum
             potential = [afile for afile in potential if afile.get_checksum() == checksum]
 
             if potential:
                 potential = [afile for afile in potential if filecmp.cmp(afile.get_full_path(), path)]
 
                 if potential:
                     return True
                 else:
                     # Gosh! How unlucky, same size, same checksum but not same
                     # content!
                     pass
         return False
 
     def propose_unique_docname(self, docname):
         """Propose a unique docname."""
         docname = normalize_docname(docname)
         goodname = docname
         i = 1
         while goodname in self.get_bibdoc_names():
             i += 1
             goodname = "%s_%s" % (docname, i)
         return goodname
 
     def get_docid(self, docname):
         """Returns the docid corresponding to the given docname, if the docname
         is valid.
         """
         for bibdoc in self.bibdocs:
             if bibdoc.docname == docname:
                 return bibdoc.id
         raise InvenioWebSubmitFileError, "Recid '%s' is not connected with a " \
             "docname '%s'" % (self.id, docname)
 
     def get_docname(self, docid):
         """Returns the docname corresponding to the given docid, if the docid
         is valid.
         """
         for bibdoc in self.bibdocs:
             if bibdoc.id == docid:
                 return bibdoc.docname
         raise InvenioWebSubmitFileError, "Recid '%s' is not connected with a " \
             "docid '%s'" % (self.id, docid)
 
     def has_docname_p(self, docname):
         """Return True if a bibdoc with a particular docname belong to this
         record."""
         for bibdoc in self.bibdocs:
             if bibdoc.docname == docname:
                 return True
         return False
 
     def get_bibdoc(self, docname):
         """Returns the bibdoc with a particular docname associated with
         this recid"""
         for bibdoc in self.bibdocs:
             if bibdoc.docname == docname:
                 return bibdoc
         raise InvenioWebSubmitFileError, "Recid '%s' is not connected with " \
             " docname '%s'" % (self.id, docname)
 
     def delete_bibdoc(self, docname):
         """Deletes a docname associated with the recid."""
         for bibdoc in self.bibdocs:
             if bibdoc.docname == docname:
                 bibdoc.delete()
         self.build_bibdoc_list()
 
     def add_bibdoc(self, doctype="Main", docname='file', never_fail=False):
         """Creates a new bibdoc associated with the recid, with a file
         called docname and a particular doctype. It returns the bibdoc object
         which was just created.
         If never_fail is True then the system will always be able
         to create a bibdoc.
         """
         try:
             docname = normalize_docname(docname)
             if never_fail:
                 docname = self.propose_unique_docname(docname)
             if docname in self.get_bibdoc_names():
                 raise InvenioWebSubmitFileError, "%s has already a bibdoc with docname %s" % (self.id, docname)
             else:
                 bibdoc = BibDoc(recid=self.id, doctype=doctype, docname=docname)
                 self.build_bibdoc_list()
                 return bibdoc
-        except:
+        except Exception, e:
             register_exception()
-            raise
+            raise InvenioWebSubmitFileError(str(e))
 
     def add_new_file(self, fullpath, doctype="Main", docname='', never_fail=False):
         """Adds a new file with the following policy: if the docname is not set
         it is retrieved from the name of the file. If bibdoc with the given
         docname doesn't exist, it is created and the file is added to it.
         It it exist but it doesn't contain the format that is being added, the
         new format is added. If the format already exists then if never_fail
         is True a new bibdoc is created with a similar name but with a progressive
         number as a suffix and the file is added to it. The elaborated bibdoc
         is returned.
         """
         if not docname:
             docname = decompose_file(fullpath)[1]
         docname = normalize_docname(docname)
         try:
             bibdoc = self.get_bibdoc(docname)
         except InvenioWebSubmitFileError:
             # bibdoc doesn't already exists!
             bibdoc = self.add_bibdoc(doctype, docname, False)
             bibdoc.add_file_new_version(fullpath)
         else:
             try:
                 bibdoc.add_file_new_format(fullpath)
             except InvenioWebSubmitFileError, e:
                 # Format already exist!
                 if never_fail:
                     bibdoc = self.add_bibdoc(doctype, docname, True)
                     bibdoc.add_file_new_version(fullpath)
                 else:
                     raise e
         return bibdoc
 
     def add_new_version(self, fullpath, docname=None, description=None, comment=None):
         """Adds a new fullpath file to an already existent docid making the
         previous files associated with the same bibdocids obsolete.
         It returns the bibdoc object.
         """
         if docname is None:
             docname = decompose_file(fullpath)[1]
         bibdoc = self.get_bibdoc(docname=docname)
         bibdoc.add_file_new_version(fullpath, description, comment)
         return bibdoc
 
     def add_new_format(self, fullpath, docname=None, description=None, comment=None):
         """Adds a new format for a fullpath file to an already existent
         docid along side already there files.
         It returns the bibdoc object.
         """
         if docname is None:
             docname = decompose_file(fullpath)[1]
         bibdoc = self.get_bibdoc(docname=docname)
         bibdoc.add_file_new_format(fullpath, description, comment)
         return bibdoc
 
     def list_latest_files(self, doctype=''):
         """Returns a list which is made up by all the latest docfile of every
         bibdoc (of a particular doctype).
         """
         docfiles = []
         for bibdoc in self.list_bibdocs(doctype):
             docfiles += bibdoc.list_latest_files()
         return docfiles
 
     def display(self, docname="", version="", doctype="", ln=CFG_SITE_LANG, verbose=0):
         """Returns a formatted panel with information and links about a given
         docid of a particular version (or any), of a particular doctype (or any)
         """
         t = ""
         if docname:
             try:
                 bibdocs = [self.get_bibdoc(docname)]
             except InvenioWebSubmitFileError:
                 bibdocs = self.list_bibdocs(doctype)
         else:
             bibdocs = self.list_bibdocs(doctype)
         if bibdocs:
             types = list_types_from_array(bibdocs)
             fulltypes = []
             for mytype in types:
                 fulltype = {
                             'name' : mytype,
                             'content' : [],
                            }
                 for bibdoc in bibdocs:
                     if mytype == bibdoc.get_type():
                         fulltype['content'].append(bibdoc.display(version,
                             ln = ln))
                 fulltypes.append(fulltype)
 
             if verbose >= 9:
                 verbose_files = str(self)
             else:
                 verbose_files = ''
 
             t = websubmit_templates.tmpl_bibrecdoc_filelist(
                   ln=ln,
                   types = fulltypes,
                   verbose_files=verbose_files
                 )
         return t
 
     def fix(self, docname):
         """Algorithm that transform an a broken/old bibdoc into a coherent one:
         i.e. the corresponding folder will have files named after the bibdoc
         name. Proper .recid, .type, .md5 files will be created/updated.
         In case of more than one file with the same format revision a new bibdoc
         will be created in order to put does files.
         Returns the list of newly created bibdocs if any.
         """
-
         bibdoc = self.get_bibdoc(docname)
         versions = {}
         res = []
         new_bibdocs = [] # List of files with the same version/format of
                         # existing file which need new bibdoc.
         counter = 0
         zero_version_bug = False
         if os.path.exists(bibdoc.basedir):
             for filename in os.listdir(bibdoc.basedir):
                 if filename[0] != '.' and ';' in filename:
                     name, version = filename.split(';')
                     try:
                         version = int(version)
                     except ValueError:
                         # Strange name
                         register_exception()
                         raise InvenioWebSubmitFileError, "A file called %s exists under %s. This is not a valid name. After the ';' there must be an integer representing the file revision. Please, manually fix this file either by renaming or by deleting it." % (filename, bibdoc.basedir)
                     if version == 0:
                         zero_version_bug = True
                     format = name[len(file_strip_ext(name)):]
                     format = normalize_format(format)
                     if not versions.has_key(version):
                         versions[version] = {}
                     new_name = 'FIXING-%s-%s' % (str(counter), name)
                     try:
                         shutil.move('%s/%s' % (bibdoc.basedir, filename), '%s/%s' % (bibdoc.basedir, new_name))
                     except Exception, e:
                         register_exception()
                         raise InvenioWebSubmitFileError, "Error in renaming '%s' to '%s': '%s'" % ('%s/%s' % (bibdoc.basedir, filename), '%s/%s' % (bibdoc.basedir, new_name), e)
                     if versions[version].has_key(format):
                         new_bibdocs.append((new_name, version))
                     else:
                         versions[version][format] = new_name
                     counter += 1
                 elif filename[0] != '.':
                     # Strange name
                     register_exception()
                     raise InvenioWebSubmitFileError, "A file called %s exists under %s. This is not a valid name. There should be a ';' followed by an integer representing the file revision. Please, manually fix this file either by renaming or by deleting it." % (filename, bibdoc.basedir)
         else:
             # we create the corresponding storage directory
             old_umask = os.umask(022)
             os.makedirs(bibdoc.basedir)
             # and save the father record id if it exists
             try:
                 if self.id != "":
                     recid_fd = open("%s/.recid" % bibdoc.basedir, "w")
                     recid_fd.write(str(self.id))
                     recid_fd.close()
                 if bibdoc.doctype != "":
                     type_fd = open("%s/.type" % bibdoc.basedir, "w")
                     type_fd.write(str(bibdoc.doctype))
                     type_fd.close()
             except Exception, e:
                 register_exception()
                 raise InvenioWebSubmitFileError, e
             os.umask(old_umask)
 
 
         if not versions:
             bibdoc.delete()
         else:
             for version, formats in versions.iteritems():
                 if zero_version_bug:
                     version += 1
                 for format, filename in formats.iteritems():
                     destination = '%s%s;%i' % (docname, format, version)
                     try:
                         shutil.move('%s/%s' % (bibdoc.basedir, filename), '%s/%s' % (bibdoc.basedir, destination))
                     except Exception, e:
                         register_exception()
                         raise InvenioWebSubmitFileError, "Error in renaming '%s' to '%s': '%s'" % ('%s/%s' % (bibdoc.basedir, filename), '%s/%s' % (bibdoc.basedir, destination), e)
 
             try:
                 recid_fd = open("%s/.recid" % bibdoc.basedir, "w")
                 recid_fd.write(str(self.id))
                 recid_fd.close()
                 type_fd = open("%s/.type" % bibdoc.basedir, "w")
                 type_fd.write(str(bibdoc.doctype))
                 type_fd.close()
             except Exception, e:
                 register_exception()
                 raise InvenioWebSubmitFileError, "Error in creating .recid and .type file for '%s' folder: '%s'" % (bibdoc.basedir, e)
 
             self.build_bibdoc_list()
 
             res = []
 
             for (filename, version) in new_bibdocs:
                 if zero_version_bug:
                     version += 1
                 new_bibdoc = self.add_bibdoc(doctype=bibdoc.doctype, docname=docname, never_fail=True)
                 new_bibdoc.add_file_new_format('%s/%s' % (bibdoc.basedir, filename), version)
                 res.append(new_bibdoc)
                 try:
                     os.remove('%s/%s' % (bibdoc.basedir, filename))
                 except Exception, e:
                     register_exception()
                     raise InvenioWebSubmitFileError, "Error in removing '%s': '%s'" % ('%s/%s' % (bibdoc.basedir, filename), e)
 
             Md5Folder(bibdoc.basedir).update(only_new=False)
         bibdoc._build_file_list()
         self.build_bibdoc_list()
 
         for bibdoc in self.bibdocs:
             if not run_sql('SELECT more_info FROM bibdoc WHERE id=%s', (bibdoc.id,)):
                 ## Import from MARC only if the bibdoc has never had
                 ## its more_info initialized.
                 try:
                     bibdoc.import_descriptions_and_comments_from_marc()
                 except Exception, e:
                     register_exception()
                     raise InvenioWebSubmitFileError, "Error in importing description and comment from %s for record %s: %s" % (repr(bibdoc), self.id, e)
         return res
 
 class BibDoc:
     """this class represents one file attached to a record
         there is a one to one mapping between an instance of this class and
         an entry in the bibdoc db table"""
 
     def __init__ (self, docid="", recid="", docname="file", doctype="Main"):
         """Constructor of a bibdoc. At least the docid or the recid/docname
         pair is needed."""
         # docid is known, the document already exists
         docname = normalize_docname(docname)
         self.docfiles = []
         self.md5s = None
         self.related_files = []
         if docid != "":
             if recid == "":
                 recid = None
                 self.doctype = ""
                 res = run_sql("select id_bibrec,type from bibrec_bibdoc "
                     "where id_bibdoc=%s", (docid,))
                 if len(res) > 0:
                     recid = res[0][0]
                     self.doctype = res[0][1]
                 else:
                     res = run_sql("select id_bibdoc1 from bibdoc_bibdoc "
                                   "where id_bibdoc2=%s", (docid,))
                     if len(res) > 0 :
                         main_bibdoc = res[0][0]
                         res = run_sql("select id_bibrec,type from bibrec_bibdoc "
                                       "where id_bibdoc=%s", (main_bibdoc,))
                         if len(res) > 0:
                             recid = res[0][0]
                             self.doctype = res[0][1]
             else:
                 res = run_sql("select type from bibrec_bibdoc "
                     "where id_bibrec=%s and id_bibdoc=%s", (recid, docid,))
                 if len(res) > 0:
                     self.doctype = res[0][0]
                 else:
                     #this bibdoc isn't associated with the corresponding bibrec.
                     raise InvenioWebSubmitFileError, "No docid associated with the recid %s" % recid
             # gather the other information
             res = run_sql("select id,status,docname,creation_date,"
                 "modification_date,more_info from bibdoc where id=%s", (docid,))
             if len(res) > 0:
                 self.cd = res[0][3]
                 self.md = res[0][4]
                 self.recid = recid
                 self.docname = res[0][2]
                 self.id = docid
                 self.status = res[0][1]
                 self.more_info = BibDocMoreInfo(docid, blob_to_string(res[0][5]))
                 self.basedir = _make_base_dir(self.id)
             else:
                 # this bibdoc doesn't exist
                 raise InvenioWebSubmitFileError, "The docid %s does not exist." % docid
         # else it is a new document
         else:
             if docname == "" or doctype == "":
                 raise InvenioWebSubmitFileError, "Argument missing for creating a new bibdoc"
             else:
                 self.recid = recid
                 self.doctype = doctype
                 self.docname = docname
                 self.status = ''
                 if recid:
                     res = run_sql("SELECT b.id FROM bibrec_bibdoc bb JOIN bibdoc b on bb.id_bibdoc=b.id WHERE bb.id_bibrec=%s AND b.docname=%s", (recid, docname))
                     if res:
                         raise InvenioWebSubmitFileError, "A bibdoc called %s already exists for recid %s" % (docname, recid)
                 self.id = run_sql("INSERT INTO bibdoc (status,docname,creation_date,modification_date) "
                     "values(%s,%s,NOW(),NOW())", (self.status, docname))
                 if self.id is not None:
                     # we link the document to the record if a recid was
                     # specified
                     self.more_info = BibDocMoreInfo(self.id)
-                    if self.recid != "":
-                        run_sql("INSERT INTO bibrec_bibdoc (id_bibrec, id_bibdoc, type) VALUES (%s,%s,%s)",
-                            (recid, self.id, self.doctype,))
-                    res = run_sql("select creation_date, modification_date from bibdoc where id=%s", (self.id,))
+                    res = run_sql("SELECT creation_date, modification_date FROM bibdoc WHERE id=%s", (self.id,))
                     self.cd = res[0][0]
                     self.md = res[0][0]
                 else:
                     raise InvenioWebSubmitFileError, "New docid cannot be created"
-                self.basedir = _make_base_dir(self.id)
-                # we create the corresponding storage directory
-                if not os.path.exists(self.basedir):
-                    old_umask = os.umask(022)
-                    os.makedirs(self.basedir)
-                    # and save the father record id if it exists
-                    try:
-                        if self.recid != "":
-                            recid_fd = open("%s/.recid" % self.basedir, "w")
-                            recid_fd.write(str(self.recid))
-                            recid_fd.close()
-                        if self.doctype != "":
-                            type_fd = open("%s/.type" % self.basedir, "w")
-                            type_fd.write(str(self.doctype))
-                            type_fd.close()
-                    except Exception, e:
-                        register_exception()
-                        raise InvenioWebSubmitFileError, e
-                    os.umask(old_umask)
+                try:
+                    self.basedir = _make_base_dir(self.id)
+                    # we create the corresponding storage directory
+                    if not os.path.exists(self.basedir):
+                        old_umask = os.umask(022)
+                        os.makedirs(self.basedir)
+                        # and save the father record id if it exists
+                        try:
+                            if self.recid != "":
+                                recid_fd = open("%s/.recid" % self.basedir, "w")
+                                recid_fd.write(str(self.recid))
+                                recid_fd.close()
+                            if self.doctype != "":
+                                type_fd = open("%s/.type" % self.basedir, "w")
+                                type_fd.write(str(self.doctype))
+                                type_fd.close()
+                        except Exception, e:
+                            register_exception()
+                            raise InvenioWebSubmitFileError, e
+                        os.umask(old_umask)
+                    if self.recid != "":
+                        run_sql("INSERT INTO bibrec_bibdoc (id_bibrec, id_bibdoc, type) VALUES (%s,%s,%s)",
+                            (recid, self.id, self.doctype,))
+                except Exception, e:
+                    run_sql('DELETE FROM bibdoc WHERE id=%s', (self.id, ))
+                    run_sql('DELETE FROM bibrec_bibdoc WHERE id_bibdoc=%s', (self.id, ))
+                    register_exception()
+                    raise InvenioWebSubmitFileError, e
         # build list of attached files
         self._build_file_list('init')
         # link with related_files
         self._build_related_file_list()
 
     def __repr__(self):
         return 'BibDoc(%s, %s, %s, %s)' % (repr(self.id), repr(self.recid), repr(self.docname), repr(self.doctype))
 
     def __str__(self):
         out = '%s:%i:::docname=%s\n' % (self.recid or '', self.id, self.docname)
         out += '%s:%i:::doctype=%s\n' % (self.recid or '', self.id, self.doctype)
         out += '%s:%i:::status=%s\n' % (self.recid or '', self.id, self.status)
         out += '%s:%i:::basedir=%s\n' % (self.recid or '', self.id, self.basedir)
         out += '%s:%i:::creation date=%s\n' % (self.recid or '', self.id, self.cd)
         out += '%s:%i:::modification date=%s\n' % (self.recid or '', self.id, self.md)
         out += '%s:%i:::total file attached=%s\n' % (self.recid or '', self.id, len(self.docfiles))
         out += '%s:%i:::total size latest version=%s\n' % (self.recid or '', self.id, nice_size(self.get_total_size_latest_version()))
         out += '%s:%i:::total size all files=%s\n' % (self.recid or '', self.id, nice_size(self.get_total_size()))
         for docfile in self.docfiles:
             out += str(docfile)
         icon = self.get_icon()
         if icon:
             out += str(self.get_icon())
         return out
 
     def get_status(self):
         """Retrieve the status."""
         return self.status
 
     def touch(self):
         """Update the modification time of the bibdoc."""
         run_sql('UPDATE bibdoc SET modification_date=NOW() WHERE id=%s', (self.id, ))
-        if self.recid:
-            run_sql('UPDATE bibrec SET modification_date=NOW() WHERE id=%s', (self.recid, ))
+        #if self.recid:
+            #run_sql('UPDATE bibrec SET modification_date=NOW() WHERE id=%s', (self.recid, ))
 
     def set_status(self, new_status):
         """Set a new status."""
         if new_status != KEEP_OLD_VALUE:
             if new_status == 'DELETED':
                 raise InvenioWebSubmitFileError('DELETED is a reserved word and can not be used for setting the status')
             run_sql('UPDATE bibdoc SET status=%s WHERE id=%s', (new_status, self.id))
             self.status = new_status
             self.touch()
             self._build_file_list()
             self._build_related_file_list()
 
     def add_file_new_version(self, filename, description=None, comment=None):
         """Add a new version of a file."""
         try:
             latestVersion = self.get_latest_version()
             if latestVersion == 0:
                 myversion = 1
             else:
                 myversion = latestVersion + 1
             if os.path.exists(filename):
                 dummy, dummy, format = decompose_file(filename)
                 destination = "%s/%s%s;%i" % (self.basedir, self.docname, format, myversion)
                 try:
                     shutil.copyfile(filename, destination)
                     os.chmod(destination, 0644)
                 except Exception, e:
                     register_exception()
                     raise InvenioWebSubmitFileError, "Encountered an exception while copying '%s' to '%s': '%s'" % (filename, destination, e)
                 self.more_info.set_description(description, format, myversion)
                 self.more_info.set_comment(comment, format, myversion)
             else:
                 raise InvenioWebSubmitFileError, "'%s' does not exists!" % filename
         finally:
             self.touch()
             Md5Folder(self.basedir).update()
             self._build_file_list()
 
     def purge(self):
         """Phisically Remove all the previous version of the given bibdoc"""
         version = self.get_latest_version()
         if version > 1:
             for afile in self.docfiles:
                 if afile.get_version() < version:
                     self.more_info.unset_comment(afile.get_format(), afile.get_version())
                     self.more_info.unset_description(afile.get_format(), afile.get_version())
                     try:
                         os.remove(afile.get_full_path())
                     except Exception, e:
                         register_exception()
             Md5Folder(self.basedir).update()
             self.touch()
             self._build_file_list()
 
     def expunge(self):
         """Phisically remove all the traces of a given bibdoc"""
         for afile in self.docfiles:
             try:
                 self.more_info.unset_comment(afile.get_format(), afile.get_version())
                 self.more_info.unset_description(afile.get_format(), afile.get_version())
                 os.remove(afile.get_full_path())
             except Exception, e:
                 register_exception()
         Md5Folder(self.basedir).update()
         self.touch()
         self._build_file_list()
 
     def revert(self, version):
         """Revert to a given version by copying its differnt formats to a new
         version."""
         try:
             version = int(version)
             new_version = self.get_latest_version() + 1
             for docfile in self.list_version_files(version):
                 destination = "%s/%s%s;%i" % (self.basedir, self.docname, docfile.get_format(), new_version)
                 if os.path.exists(destination):
                     raise InvenioWebSubmitFileError, "A file for docname '%s' for the recid '%s' already exists for the format '%s'" % (self.docname, self.recid, docfile.get_format())
                 try:
                     shutil.copyfile(docfile.get_full_path(), destination)
                     os.chmod(destination, 0644)
                     self.more_info.set_comment(self.more_info.get_comment(docfile.get_format(), version), docfile.get_format(), new_version)
                     self.more_info.set_description(self.more_info.get_description(docfile.get_format(), version), docfile.get_format(), new_version)
                 except Exception, e:
                     register_exception()
                     raise InvenioWebSubmitFileError, "Encountered an exception while copying '%s' to '%s': '%s'" % (docfile.get_full_path(), destination, e)
         finally:
             Md5Folder(self.basedir).update()
             self.touch()
             self._build_file_list()
 
-    def import_descriptions_and_comments_from_marc(self):
-        """Import description & comment from the corresponding marc."""
+    def import_descriptions_and_comments_from_marc(self, record=None):
+        """Import description & comment from the corresponding marc.
+        if record is passed it is directly used, otherwise it is
+        calculated after the xm stored in the database."""
         ## Let's get the record
-        xml = format_record(self.id, of='xm')
-        record = create_record(xml)[0]
+        if record is None:
+            xml = format_record(self.id, of='xm')
+            record = create_record(xml)[0]
         fields = record_get_field_instances(record, '856', '4', ' ')
 
         global_comment = None
         global_description = None
         local_comment = {}
         local_description = {}
 
         for field in fields:
             url = field_get_subfield_values(field, 'u')
             if url:
                 ## Given a url
                 url = url[0]
                 if url == '%s/record/%s/files/' % (CFG_SITE_URL, self.recid):
                     ## If it is a traditional /record/1/files/ one
                     ## We have global description/comment for all the formats
                     description = field_get_subfield_values(field, 'y')
                     if description:
                         global_description = description[0]
                     comment = field_get_subfield_values(field, 'z')
                     if comment:
                         global_comment = comment[0]
                 elif bibdocfile_url_p(url):
                     ## Otherwise we have description/comment per format
                     dummy, docname, format = decompose_bibdocfile_url(url)
                     if docname == self.docname:
                         description = field_get_subfield_values(field, 'y')
                         if description:
                             local_description[format] = description[0]
                         comment = field_get_subfield_values(field, 'z')
                         if comment:
                             local_comment[format] = comment[0]
 
         ## Let's update the tables
         version = self.get_latest_version()
         for docfile in self.list_latest_files():
             format = docfile.get_format()
             if format in local_comment:
                 self.set_comment(local_comment[format], format, version)
             else:
                 self.set_comment(global_comment, format, version)
             if format in local_description:
                 self.set_description(local_description[format], format, version)
             else:
                 self.set_description(global_description, format, version)
         self._build_file_list('init')
 
     def add_file_new_format(self, filename, version=None, description=None, comment=None):
         """add a new format of a file to an archive"""
         try:
             if version is None:
                 version = self.get_latest_version()
             if version == 0:
                 version = 1
             if os.path.exists(filename):
                 dummy, dummy, format = decompose_file(filename)
                 destination = "%s/%s%s;%i" % (self.basedir, self.docname, format, version)
                 if os.path.exists(destination):
                     raise InvenioWebSubmitFileError, "A file for docname '%s' for the recid '%s' already exists for the format '%s'" % (self.docname, self.recid, format)
                 try:
                     shutil.copyfile(filename, destination)
                     os.chmod(destination, 0644)
                 except Exception, e:
                     register_exception()
                     raise InvenioWebSubmitFileError, "Encountered an exception while copying '%s' to '%s': '%s'" % (filename, destination, e)
                 self.more_info.set_comment(comment, format, version)
                 self.more_info.set_description(description, format, version)
             else:
                 raise InvenioWebSubmitFileError, "'%s' does not exists!" % filename
         finally:
             Md5Folder(self.basedir).update()
             self.touch()
             self._build_file_list()
 
     def get_icon(self):
         """Returns the bibdoc corresponding to an icon of the given bibdoc."""
         if self.related_files.has_key('Icon'):
             return self.related_files['Icon'][0]
         else:
             return None
 
     def add_icon(self, filename, basename=''):
         """Links an icon with the bibdoc object. Return the icon bibdoc"""
         #first check if an icon already exists
         existing_icon = self.get_icon()
         if existing_icon is not None:
             existing_icon.delete()
         #then add the new one
         if not basename:
             basename = decompose_file(filename)[1]
         newicon = BibDoc(doctype='Icon', docname=basename)
         newicon.add_file_new_version(filename)
-        run_sql("INSERT INTO bibdoc_bibdoc (id_bibdoc1, id_bibdoc2, type) VALUES (%s,%s,'Icon')",
-            (self.id, newicon.get_id(),))
         try:
             try:
                 old_umask = os.umask(022)
                 recid_fd = open("%s/.docid" % newicon.get_base_dir(), "w")
                 recid_fd.write(str(self.id))
                 recid_fd.close()
                 type_fd = open("%s/.type" % newicon.get_base_dir(), "w")
                 type_fd.write(str(self.doctype))
                 type_fd.close()
                 os.umask(old_umask)
+                run_sql("INSERT INTO bibdoc_bibdoc (id_bibdoc1, id_bibdoc2, type) VALUES (%s,%s,'Icon')", (self.id, newicon.get_id(),))
             except Exception, e:
                 register_exception()
                 raise InvenioWebSubmitFileError, "Encountered an exception while writing .docid and .doctype for folder '%s': '%s'" % (newicon.get_base_dir(), e)
         finally:
             Md5Folder(newicon.basedir).update()
             self.touch()
             self._build_related_file_list()
         return newicon
 
     def delete_icon(self):
         """Removes the current icon if it exists."""
         existing_icon = self.get_icon()
         if existing_icon is not None:
             existing_icon.delete()
         self.touch()
         self._build_related_file_list()
 
     def display(self, version="", ln = CFG_SITE_LANG):
         """Returns a formatted representation of the files linked with
         the bibdoc.
         """
         t = ""
         if version == "all":
             docfiles = self.list_all_files()
         elif version != "":
             version = int(version)
             docfiles = self.list_version_files(version)
         else:
             docfiles = self.list_latest_files()
         existing_icon = self.get_icon()
         if existing_icon is not None:
             existing_icon = existing_icon.list_all_files()[0]
             imageurl = "%s/record/%s/files/%s" % \
                 (CFG_SITE_URL, self.recid, urllib.quote(existing_icon.get_full_name()))
         else:
             imageurl = "%s/img/smallfiles.gif" % CFG_SITE_URL
 
         versions = []
         for version in list_versions_from_array(docfiles):
             currversion = {
                             'version' : version,
                             'previous' : 0,
                             'content' : []
                           }
             if version == self.get_latest_version() and version != 1:
                 currversion['previous'] = 1
             for docfile in docfiles:
                 if docfile.get_version() == version:
                     currversion['content'].append(docfile.display(ln = ln))
             versions.append(currversion)
 
         t = websubmit_templates.tmpl_bibdoc_filelist(
               ln = ln,
               versions = versions,
               imageurl = imageurl,
               docname = self.docname,
               recid = self.recid
             )
         return t
 
     def change_name(self, newname):
         """Rename the bibdoc name. New name must not be already used by the linked
         bibrecs."""
-        newname = normalize_docname(newname)
-        res = run_sql("SELECT b.id FROM bibrec_bibdoc bb JOIN bibdoc b on bb.id_bibdoc=b.id WHERE bb.id_bibrec=%s AND b.docname=%s", (self.recid, newname))
-        if res:
-            raise InvenioWebSubmitFileError, "A bibdoc called %s already exists for recid %s" % (newname, self.recid)
-        run_sql("update bibdoc set docname=%s where id=%s", (newname, self.id,))
-        for f in os.listdir(self.basedir):
-            if f.startswith(self.docname):
-                shutil.move('%s/%s' % (self.basedir, f), '%s/%s' % (self.basedir, f.replace(self.docname, newname, 1)))
-        self.docname = newname
-        Md5Folder(self.basedir).update()
-        self.touch()
-        self._build_file_list('rename')
-        self._build_related_file_list()
+        try:
+            newname = normalize_docname(newname)
+            res = run_sql("SELECT b.id FROM bibrec_bibdoc bb JOIN bibdoc b on bb.id_bibdoc=b.id WHERE bb.id_bibrec=%s AND b.docname=%s", (self.recid, newname))
+            if res:
+                raise InvenioWebSubmitFileError, "A bibdoc called %s already exists for recid %s" % (newname, self.recid)
+            try:
+                for f in os.listdir(self.basedir):
+                    if f.startswith(self.docname):
+                        shutil.move('%s/%s' % (self.basedir, f), '%s/%s' % (self.basedir, f.replace(self.docname, newname, 1)))
+            except Exception, e:
+                register_exception()
+                raise InvenioWebSubmitFileError("Error in renaming the bibdoc %s to %s for recid %s: %s" % (self.docname, newname, self.recid, e))
+            run_sql("update bibdoc set docname=%s where id=%s", (newname, self.id,))
+            self.docname = newname
+        finally:
+            Md5Folder(self.basedir).update()
+            self.touch()
+            self._build_file_list('rename')
+            self._build_related_file_list()
 
     def set_comment(self, comment, format, version=None):
         """Update the comment of a format/version."""
         if version is None:
             version = self.get_latest_version()
         self.more_info.set_comment(comment, format, version)
         self.touch()
         self._build_file_list('init')
 
     def set_description(self, description, format, version=None):
         """Update the description of a format/version."""
         if version is None:
             version = self.get_latest_version()
         self.more_info.set_description(description, format, version)
         self.touch()
         self._build_file_list('init')
 
     def get_comment(self, format, version=None):
         """Get a comment for a given format/version."""
         if version is None:
             version = self.get_latest_version()
         return self.more_info.get_comment(format, version)
 
     def get_description(self, format, version=None):
         """Get a description for a given format/version."""
         if version is None:
             version = self.get_latest_version()
         return self.more_info.get_description(format, version)
 
     def get_docname(self):
         """retrieve bibdoc name"""
         return self.docname
 
     def get_base_dir(self):
         """retrieve bibdoc base directory, e.g. /soft/cdsweb/var/data/files/123"""
         return self.basedir
 
     def get_type(self):
         """retrieve bibdoc doctype"""
         return self.doctype
 
     def get_recid(self):
         """retrieve bibdoc recid"""
         return self.recid
 
     def get_id(self):
         """retrieve bibdoc id"""
         return self.id
 
     def get_file(self, format, version=""):
         """Return a DocFile with docname name, with format (the extension), and
         with the given version.
         """
         if version == "":
             docfiles = self.list_latest_files()
         else:
             version = int(version)
             docfiles = self.list_version_files(version)
 
         format = normalize_format(format)
 
         for docfile in docfiles:
             if (docfile.get_format()==format or not format):
                 return docfile
         raise InvenioWebSubmitFileError, "No file called '%s' of format '%s', version '%s'" % (self.docname, format, version)
 
     def list_versions(self):
         """Returns the list of existing version numbers for a given bibdoc."""
         versions = []
         for docfile in self.docfiles:
             if not docfile.get_version() in versions:
                 versions.append(docfile.get_version())
         return versions
 
     def delete(self):
         """delete the current bibdoc instance."""
         try:
             self.change_name('DELETED-%s-%s' % (datetime.today().strftime('%Y%m%d%H%M%S'), self.docname))
             run_sql("UPDATE bibdoc SET status='DELETED' WHERE id=%s", (self.id,))
         except Exception, e:
             register_exception()
             raise InvenioWebSubmitFileError, "It's impossible to delete bibdoc %s: %s" % (self.id, e)
 
     def deleted_p(self):
         """Return True if the bibdoc has been deleted."""
         return self.status == 'DELETED'
 
     def empty_p(self):
         """Return True if the bibdoc is empty, i.e. it has no bibdocfile
         connected."""
         return len(self.docfiles) == 0
 
     def undelete(self, previous_status=''):
         """undelete a deleted file (only if it was actually deleted). The
         previous status, i.e. the restriction key can be provided.
         Otherwise the bibdoc will pe public."""
         try:
             run_sql("UPDATE bibdoc SET status=%s WHERE id=%s AND status='DELETED'", (self.id, previous_status))
         except Exception, e:
             register_exception()
             raise InvenioWebSubmitFileError, "It's impossible to undelete bibdoc %s: %s" % (self.id, e)
         if self.docname.startswith('DELETED-'):
             try:
                 # Let's remove DELETED-20080214144322- in front of the docname
                 original_name = '-'.join(self.docname.split('-')[2:])
                 self.change_name(original_name)
             except Exception, e:
                 register_exception()
                 raise InvenioWebSubmitFileError, "It's impossible to restore the previous docname %s. %s kept as docname because: %s" % (original_name, self.docname, e)
         else:
             raise InvenioWebSubmitFileError, "Strange just undeleted docname isn't called DELETED-somedate-docname but %s" % self.docname
 
     def delete_file(self, format, version):
         """Delete on the filesystem the particular format version.
         Note, this operation is not reversible!"""
         try:
             afile = self.get_file(format, version)
         except InvenioWebSubmitFileError:
             return
         try:
             os.remove(afile.get_full_path())
         except OSError:
             pass
         self.touch()
         self._build_file_list()
 
     def get_history(self):
         """Return a string with a line for each row in the history for the
         given docid."""
         ret = []
         hst = run_sql("""SELECT action, docname, docformat, docversion,
                 docsize, docchecksum, doctimestamp
                 FROM hstDOCUMENT
                 WHERE id_bibdoc=%s ORDER BY doctimestamp ASC""", (self.id, ))
         for row in hst:
             ret.append("%s %s '%s', format: '%s', version: %i, size: %s, checksum: '%s'" % (row[6].strftime('%Y-%m-%d %H:%M:%S'), row[0], row[1], row[2], row[3], nice_size(row[4]), row[5]))
         return ret
 
     def _build_file_list(self, context=''):
         """Lists all files attached to the bibdoc. This function should be
         called everytime the bibdoc is modified.
         As a side effect it log everything that has happened to the bibdocfiles
         in the log facility, according to the context:
         "init": means that the function has been called;
         for the first time by a constructor, hence no logging is performed
         "": by default means to log every deleted file as deleted and every
         added file as added;
         "rename": means that every appearently deleted file is logged as
         renamef and every new file as renamet.
         """
 
         def log_action(action, docid, docname, format, version, size, checksum, timestamp=''):
             """Log an action into the bibdoclog table."""
             try:
                 if timestamp:
                     run_sql('INSERT DELAYED INTO hstDOCUMENT(action, id_bibdoc, docname, docformat, docversion, docsize, docchecksum, doctimestamp) VALUES(%s, %s, %s, %s, %s, %s, %s, %s)', (action, docid, docname, format, version, size, checksum, timestamp))
                 else:
                     run_sql('INSERT DELAYED INTO hstDOCUMENT(action, id_bibdoc, docname, docformat, docversion, docsize, docchecksum, doctimestamp) VALUES(%s, %s, %s, %s, %s, %s, %s, NOW())', (action, docid, docname, format, version, size, checksum))
             except DatabaseError:
                 register_exception()
 
         def make_removed_added_bibdocfiles(previous_file_list):
             """Internal function for build the log of changed files."""
 
             # Let's rebuild the previous situation
             old_files = {}
             for bibdocfile in previous_file_list:
                 old_files[(bibdocfile.name, bibdocfile.format, bibdocfile.version)] = (bibdocfile.size, bibdocfile.checksum, bibdocfile.md)
 
             # Let's rebuild the new situation
             new_files = {}
             for bibdocfile in self.docfiles:
                 new_files[(bibdocfile.name, bibdocfile.format, bibdocfile.version)] = (bibdocfile.size, bibdocfile.checksum, bibdocfile.md)
 
             # Let's subtract from added file all the files that are present in
             # the old list, and let's add to deleted files that are not present
             # added file.
             added_files = dict(new_files)
             deleted_files = {}
             for key, value in old_files.iteritems():
                 if added_files.has_key(key):
                     del added_files[key]
                 else:
                     deleted_files[key] = value
             return (added_files, deleted_files)
 
         if context != 'init':
             previous_file_list = list(self.docfiles)
         self.docfiles = []
         if os.path.exists(self.basedir):
             self.md5s = Md5Folder(self.basedir)
             files = os.listdir(self.basedir)
             files.sort()
             for afile in files:
                 if not afile.startswith('.'):
                     try:
                         filepath = os.path.join(self.basedir, afile)
                         fileversion = int(re.sub(".*;", "", afile))
                         fullname = afile.replace(";%s" % fileversion, "")
                         checksum = self.md5s.get_checksum(afile)
                         (dirname, basename, format) = decompose_file(fullname)
                         comment = self.more_info.get_comment(format, fileversion)
                         description = self.more_info.get_description(format, fileversion)
                         # we can append file:
                         self.docfiles.append(BibDocFile(filepath, self.doctype,
                             fileversion, basename, format,
                             self.recid, self.id, self.status, checksum, description, comment))
                     except Exception, e:
                         register_exception()
         if context == 'init':
             return
         else:
             added_files, deleted_files = make_removed_added_bibdocfiles(previous_file_list)
             deletedstr = "DELETED"
             addedstr = "ADDED"
             if context == 'rename':
                 deletedstr = "RENAMEDFROM"
                 addedstr = "RENAMEDTO"
             for (docname, format, version), (size, checksum, md) in added_files.iteritems():
                 if context == 'rename':
                     md = '' # No modification time
                 log_action(addedstr, self.id, docname, format, version, size, checksum, md)
             for (docname, format, version), (size, checksum, md) in deleted_files.iteritems():
                 if context == 'rename':
                     md = '' # No modification time
                 log_action(deletedstr, self.id, docname, format, version, size, checksum, md)
 
     def _build_related_file_list(self):
         """Lists all files attached to the bibdoc. This function should be
         called everytime the bibdoc is modified within e.g. its icon.
         """
         self.related_files = {}
         res = run_sql("SELECT ln.id_bibdoc2,ln.type,bibdoc.status FROM "
             "bibdoc_bibdoc AS ln,bibdoc WHERE id=ln.id_bibdoc2 AND "
             "ln.id_bibdoc1=%s", (self.id,))
         for row in res:
             docid = row[0]
             doctype = row[1]
             if row[2] != 'DELETED':
                 if not self.related_files.has_key(doctype):
                     self.related_files[doctype] = []
                 cur_doc = BibDoc(docid=docid)
                 self.related_files[doctype].append(cur_doc)
 
     def get_total_size_latest_version(self):
         """Return the total size used on disk of all the files belonging
         to this bibdoc and corresponding to the latest version."""
         ret = 0
         for bibdocfile in self.list_latest_files():
             ret += bibdocfile.get_size()
         return ret
 
     def get_total_size(self):
         """Return the total size used on disk of all the files belonging
         to this bibdoc."""
         ret = 0
         for bibdocfile in self.list_all_files():
             ret += bibdocfile.get_size()
         return ret
 
     def list_all_files(self):
         """Returns all the docfiles linked with the given bibdoc."""
         return self.docfiles
 
     def list_latest_files(self):
         """Returns all the docfiles within the last version."""
         return self.list_version_files(self.get_latest_version())
 
     def list_version_files(self, version):
         """Return all the docfiles of a particular version."""
         version = int(version)
         return [docfile for docfile in self.docfiles if docfile.get_version() == version]
 
     def get_latest_version(self):
         """ Returns the latest existing version number for the given bibdoc.
         If no file is associated to this bibdoc, returns '0'.
         """
         version = 0
         for bibdocfile in self.docfiles:
             if bibdocfile.get_version() > version:
                 version = bibdocfile.get_version()
         return version
 
     def get_file_number(self):
         """Return the total number of files."""
         return len(self.docfiles)
 
     def register_download(self, ip_address, version, format, userid=0):
         """Register the information about a download of a particular file."""
         format = normalize_format(format)
         if format[:1] == '.':
             format = format[1:]
         format = format.upper()
         return run_sql("INSERT INTO rnkDOWNLOADS "
             "(id_bibrec,id_bibdoc,file_version,file_format,"
             "id_user,client_host,download_time) VALUES "
             "(%s,%s,%s,%s,%s,INET_ATON(%s),NOW())",
             (self.recid, self.id, version, format,
             userid, ip_address,))
 
 class BibDocFile:
     """This class represents a physical file in the CDS Invenio filesystem.
     It should never be instantiated directly"""
 
     def __init__(self, fullpath, doctype, version, name, format, recid, docid, status, checksum, description=None, comment=None):
         self.fullpath = fullpath
         self.doctype = doctype
         self.docid = docid
         self.recid = recid
         self.version = version
         self.status = status
         self.checksum = checksum
         self.description = description
         self.comment = comment
         self.size = os.path.getsize(fullpath)
         self.md = datetime.fromtimestamp(os.path.getmtime(fullpath))
         try:
             self.cd = datetime.fromtimestamp(os.path.getctime(fullpath))
         except OSError:
             self.cd = self.md
         self.name = name
         self.format = normalize_format(format)
         self.dir = os.path.dirname(fullpath)
         self.url = '%s/record/%s/files/%s%s' % (CFG_SITE_URL, self.recid, urllib.quote(self.name), urllib.quote(self.format))
         if format == "":
             self.mime = "text/plain"
             self.encoding = ""
             self.fullname = name
         else:
             self.fullname = "%s%s" % (name, self.format)
             (self.mime, self.encoding) = _mimes.guess_type(self.fullname)
             if self.mime is None:
                 self.mime = "text/plain"
 
     def __repr__(self):
         return ('BibDocFile(%s, %s, %i, %s, %s, %i, %i, %s, %s, %s, %s)' % (repr(self.fullpath), repr(self.doctype), self.version, repr(self.name), repr(self.format), self.recid, self.docid, repr(self.status), repr(self.checksum), repr(self.description), repr(self.comment)))
 
     def __str__(self):
         out = '%s:%s:%s:%s:fullpath=%s\n' % (self.recid, self.docid, self.version, self.format, self.fullpath)
         out += '%s:%s:%s:%s:fullname=%s\n' % (self.recid, self.docid, self.version, self.format, self.fullname)
         out += '%s:%s:%s:%s:name=%s\n' % (self.recid, self.docid, self.version, self.format, self.name)
         out += '%s:%s:%s:%s:status=%s\n' % (self.recid, self.docid, self.version, self.format, self.status)
         out += '%s:%s:%s:%s:checksum=%s\n' % (self.recid, self.docid, self.version, self.format, self.checksum)
         out += '%s:%s:%s:%s:size=%s\n' % (self.recid, self.docid, self.version, self.format, nice_size(self.size))
         out += '%s:%s:%s:%s:creation time=%s\n' % (self.recid, self.docid, self.version, self.format, self.cd)
         out += '%s:%s:%s:%s:modification time=%s\n' % (self.recid, self.docid, self.version, self.format, self.md)
         out += '%s:%s:%s:%s:encoding=%s\n' % (self.recid, self.docid, self.version, self.format, self.encoding)
         out += '%s:%s:%s:%s:url=%s\n' % (self.recid, self.docid, self.version, self.format, self.url)
         out += '%s:%s:%s:%s:description=%s\n' % (self.recid, self.docid, self.version, self.format, self.description)
         out += '%s:%s:%s:%s:comment=%s\n' % (self.recid, self.docid, self.version, self.format, self.comment)
         return out
 
     def display(self, ln = CFG_SITE_LANG):
         """Returns a formatted representation of this docfile."""
         return websubmit_templates.tmpl_bibdocfile_filelist(
                  ln = ln,
                  recid = self.recid,
                  version = self.version,
                  name = self.name,
                  format = self.format,
                  size = self.size,
                )
 
     def is_restricted(self, req):
         """Returns restriction state. (see acc_authorize_action return values)"""
         if self.status not in ('', 'DELETED'):
             return acc_authorize_action(req, 'viewrestrdoc', status=self.status)
         elif self.status == 'DELETED':
             return (1, 'File has ben deleted')
         else:
             return (0, '')
 
     def get_url(self):
         return self.url
 
     def get_type(self):
         return self.doctype
 
     def get_path(self):
         return self.fullpath
 
     def get_bibdocid(self):
         return self.docid
 
     def get_name(self):
         return self.name
 
     def get_full_name(self):
         return self.fullname
 
     def get_full_path(self):
         return self.fullpath
 
     def get_format(self):
         return self.format
 
     def get_size(self):
         return self.size
 
     def get_version(self):
         return self.version
 
     def get_checksum(self):
         return self.checksum
 
     def get_description(self):
         return self.description
 
     def get_comment(self):
         return self.comment
 
     def get_content(self):
         """Returns the binary content of the file."""
         content_fd = open(self.fullpath, 'rb')
         content = content_fd.read()
         content_fd.close()
         return content
 
     def get_recid(self):
         """Returns the recid connected with the bibdoc of this file."""
         return self.recid
 
     def get_status(self):
         """Returns the status of the file, i.e. either '', 'DELETED' or a
         restriction keyword."""
         return self.status
 
     def check(self):
         """Return True if the checksum corresponds to the file."""
         return calculate_md5(self.fullpath) == self.checksum
 
     def stream(self, req):
         """Stream the file."""
         if self.status:
             (auth_code, auth_message) = acc_authorize_action(req, 'viewrestrdoc', status=self.status)
         else:
             auth_code = 0
         if auth_code == 0:
             if os.path.exists(self.fullpath):
                 if calculate_md5(self.fullpath) != self.checksum:
                     raise InvenioWebSubmitFileError, "File %s, version %i, for record %s is corrupted!" % (self.fullname, self.version, self.recid)
                 req.content_type = self.mime
                 req.encoding = self.encoding
                 req.filename = self.fullname
                 req.headers_out["Content-Disposition"] = \
                     "inline; filename=%s" % quoteattr(self.fullname)
                 req.set_content_length(self.size)
                 req.send_http_header()
                 try:
                     req.sendfile(self.fullpath)
                     return ""
                 except IOError, e:
                     register_exception(req=req)
                     raise InvenioWebSubmitFileError, "Encountered exception while reading '%s': '%s'" % (self.fullpath, e)
             else:
                 raise InvenioWebSubmitFileError, "%s does not exists!" % self.fullpath
         else:
             raise InvenioWebSubmitFileError, "You are not authorized to download %s: %s" % (self.fullname, auth_message)
 
 def stream_restricted_icon(req):
     """Return the content of the "Restricted Icon" file."""
     req.content_type = 'image/gif'
     req.encoding = None
     req.filename = 'restricted'
     req.headers_out["Content-Disposition"] = \
         "inline; filename=%s" % quoteattr('restricted')
     req.set_content_length(os.path.getsize('%s/img/restricted.gif' % CFG_WEBDIR))
     req.send_http_header()
     try:
         req.sendfile('%s/img/restricted.gif' % CFG_WEBDIR)
         return ""
     except Exception, e:
         register_exception(req=req)
         raise InvenioWebSubmitFileError, "Encountered exception while streaming restricted icon: '%s'" % (e, )
 
 
 def list_types_from_array(bibdocs):
     """Retrieves the list of types from the given bibdoc list."""
     types = []
     for bibdoc in bibdocs:
         if not bibdoc.get_type() in types:
             types.append(bibdoc.get_type())
     return types
 
 def list_versions_from_array(docfiles):
     """Retrieve the list of existing versions from the given docfiles list."""
     versions = []
     for docfile in docfiles:
         if not docfile.get_version() in versions:
             versions.append(docfile.get_version())
     return versions
 
 def order_files_with_version(docfile1, docfile2):
     """order docfile objects according to their version"""
     version1 = docfile1.get_version()
     version2 = docfile2.get_version()
     return cmp(version2, version1)
 
 def _make_base_dir(docid):
     """Given a docid it returns the complete path that should host its files."""
     group = "g" + str(int(int(docid) / CFG_WEBSUBMIT_FILESYSTEM_BIBDOC_GROUP_LIMIT))
     return os.path.join(CFG_WEBSUBMIT_FILEDIR, group, str(docid))
 
 
 class Md5Folder:
     """Manage all the Md5 checksum about a folder"""
     def __init__(self, folder):
         """Initialize the class from the md5 checksum of a given path"""
         self.folder = folder
         try:
             self.load()
         except InvenioWebSubmitFileError:
             self.md5s = {}
             self.update()
 
     def update(self, only_new = True):
         """Update the .md5 file with the current files. If only_new
         is specified then only not already calculated file are calculated."""
         if not only_new:
             self.md5s = {}
         if os.path.exists(self.folder):
             for filename in os.listdir(self.folder):
                 if filename not in self.md5s and not filename.startswith('.'):
                     self.md5s[filename] = calculate_md5(os.path.join(self.folder, filename))
         self.store()
 
     def store(self):
         """Store the current md5 dictionary into .md5"""
         try:
             old_umask = os.umask(022)
             md5file = open(os.path.join(self.folder, ".md5"), "w")
             for key, value in self.md5s.items():
                 md5file.write('%s *%s\n' % (value, key))
             md5file.close()
             os.umask(old_umask)
         except Exception, e:
             register_exception()
             raise InvenioWebSubmitFileError, "Encountered an exception while storing .md5 for folder '%s': '%s'" % (self.folder, e)
 
     def load(self):
         """Load .md5 into the md5 dictionary"""
         self.md5s = {}
         try:
             md5file = open(os.path.join(self.folder, ".md5"), "r")
             for row in md5file:
                 md5hash = row[:32]
                 filename = row[34:].strip()
                 self.md5s[filename] = md5hash
             md5file.close()
         except IOError:
             self.update()
         except Exception, e:
             register_exception()
             raise InvenioWebSubmitFileError, "Encountered an exception while loading .md5 for folder '%s': '%s'" % (self.folder, e)
 
     def check(self, filename = ''):
         """Check the specified file or all the files for which it exists a hash
         for being coherent with the stored hash."""
         if filename and filename in self.md5s.keys():
             try:
                 return self.md5s[filename] == calculate_md5(os.path.join(self.folder, filename))
             except Exception, e:
                 register_exception()
                 raise InvenioWebSubmitFileError, "Encountered an exception while loading '%s': '%s'" % (os.path.join(self.folder, filename), e)
         else:
             for filename, md5hash in self.md5s.items():
                 try:
                     if calculate_md5(os.path.join(self.folder, filename)) != md5hash:
                         return False
                 except Exception, e:
                     register_exception()
                     raise InvenioWebSubmitFileError, "Encountered an exception while loading '%s': '%s'" % (os.path.join(self.folder, filename), e)
             return True
 
     def get_checksum(self, filename):
         """Return the checksum of a physical file."""
         md5hash = self.md5s.get(filename, None)
         if md5hash is None:
             self.update()
         # Now it should not fail!
         md5hash = self.md5s[filename]
         return md5hash
 
 def calculate_md5_external(filename):
     """Calculate the md5 of a physical file through md5sum Command Line Tool.
     This is suitable for file larger than 256Kb."""
     try:
-        md5_result = os.popen('md5sum -b "%s"' % filename)
+        md5_result = os.popen(CFG_PATH_MD5SUM + ' -b %s' % escape_shell_arg(filename))
         ret = md5_result.read()[:32]
         md5_result.close()
         if len(ret) != 32:
             # Error in running md5sum. Let's fallback to internal
             # algorithm.
             return calculate_md5(filename, force_internal=True)
         else:
             return ret
     except Exception, e:
         raise InvenioWebSubmitFileError, "Encountered an exception while calculating md5 for file '%s': '%s'" % (filename, e)
 
 def calculate_md5(filename, force_internal=False):
     """Calculate the md5 of a physical file. This is suitable for files smaller
     than 256Kb."""
-    if not CFG_BIBDOCFILE_MD5SUM_EXISTS or force_internal or os.path.getsize(filename) < CFG_BIBDOCFILE_MD5_THRESHOLD:
+    if not CFG_PATH_MD5SUM or force_internal or os.path.getsize(filename) < CFG_BIBDOCFILE_MD5_THRESHOLD:
         try:
             to_be_read = open(filename, "rb")
             computed_md5 = md5.new()
             while True:
                 buf = to_be_read.read(CFG_BIBDOCFILE_MD5_BUFFER)
                 if buf:
                     computed_md5.update(buf)
                 else:
                     break
             to_be_read.close()
             return computed_md5.hexdigest()
         except Exception, e:
             register_exception()
             raise InvenioWebSubmitFileError, "Encountered an exception while calculating md5 for file '%s': '%s'" % (filename, e)
     else:
         return calculate_md5_external(filename)
 
 
 def bibdocfile_url_to_bibrecdocs(url):
     """Given an URL in the form CFG_SITE_[SECURE_]URL/record/xxx/files/... it returns
     a BibRecDocs object for the corresponding recid."""
 
     recid = decompose_bibdocfile_url(url)[0]
     return BibRecDocs(recid)
 
 def bibdocfile_url_to_bibdoc(url):
     """Given an URL in the form CFG_SITE_[SECURE_]URL/record/xxx/files/... it returns
     a BibDoc object for the corresponding recid/docname."""
 
     docname = decompose_bibdocfile_url(url)[1]
     return bibdocfile_url_to_bibrecdocs(url).get_bibdoc(docname)
 
 def bibdocfile_url_to_bibdocfile(url):
     """Given an URL in the form CFG_SITE_[SECURE_]URL/record/xxx/files/... it returns
     a BibDocFile object for the corresponding recid/docname/format."""
     dummy, dummy, format = decompose_bibdocfile_url(url)
     return bibdocfile_url_to_bibdoc(url).get_file(format)
 
 def bibdocfile_url_to_fullpath(url):
     """Given an URL in the form CFG_SITE_[SECURE_]URL/record/xxx/files/... it returns
     the fullpath for the corresponding recid/docname/format."""
 
     return bibdocfile_url_to_bibdocfile(url).get_full_path()
 
 def bibdocfile_url_p(url):
     """Return True when the url is a potential valid url pointing to a
     fulltext owned by a system."""
     if not (url.startswith('%s/record/' % CFG_SITE_URL) or url.startswith('%s/record/' % CFG_SITE_SECURE_URL)):
         return False
     splitted_url = url.split('/files/')
     return len(splitted_url) == 2 and splitted_url[0] != '' and splitted_url[1] != ''
 
 def decompose_bibdocfile_url(url):
     """Given a bibdocfile_url return a triple (recid, docname, format)."""
     if url.startswith('%s/record/' % CFG_SITE_URL):
         recid_file = url[len('%s/record/' % CFG_SITE_URL):]
     elif url.startswith('%s/record/' % CFG_SITE_SECURE_URL):
         recid_file = url[len('%s/record/' % CFG_SITE_SECURE_URL):]
     else:
         raise InvenioWebSubmitFileError, "Url %s doesn't correspond to a valid record inside the system." % url
     recid_file = recid_file.replace('/files/', '/')
     recid, docname, format = decompose_file(urllib.unquote(recid_file))
     return (int(recid), docname, format)
 
+re_bibdocfile_old_url = re.compile(r'/record/(\d*)/files/')
+def decompose_bibdocfile_old_url(url):
+    """Given a bibdocfile old url (e.g. CFG_SITE_URL/record/123/files)
+    it returns the recid."""
+    g = re_bibdocfile_old_url.search(url)
+    if g:
+        return int(g.group(1))
+    raise InvenioWebSubmitFileError('%s is not a valid old bibdocfile url' % url)
+
 def nice_size(size):
     """Return a nicely printed size in kilo."""
     unit = 'B'
     if size > 1024:
         size /= 1024.0
         unit = 'KB'
         if size > 1024:
             size /= 1024.0
             unit = 'MB'
             if size > 1024:
                 size /= 1024.0
                 unit = 'GB'
     return '%s %s' % (websearch_templates.tmpl_nice_number(size, max_ndigits_after_dot=2), unit)
 
 def get_docname_from_url(url):
     """Return a potential docname given a url"""
     path = urllib2.urlparse.urlsplit(urllib.unquote(url))[2]
     filename = os.path.split(path)[-1]
     return file_strip_ext(filename)
 
 def get_format_from_url(url):
     """Return a potential format given a url"""
     path = urllib2.urlparse.urlsplit(urllib.unquote(url))[2]
     filename = os.path.split(path)[-1]
     return filename[len(file_strip_ext(filename)):]
 
 def clean_url(url):
     """Given a local url e.g. a local path it render it a realpath."""
     protocol = urllib2.urlparse.urlsplit(url)[0]
     if protocol in ('', 'file'):
         path = urllib2.urlparse.urlsplit(urllib.unquote(url))[2]
         return os.path.realpath(path)
     else:
         return url
 
 def check_valid_url(url):
     """Check for validity of a url or a file."""
     try:
         protocol = urllib2.urlparse.urlsplit(url)[0]
         if protocol in ('', 'file'):
             path = urllib2.urlparse.urlsplit(urllib.unquote(url))[2]
             if os.path.realpath(path) != path:
                 raise StandardError, "%s is not a normalized path (would be %s)." % (path, os.path.normpath(path))
             for allowed_path in CFG_BIBUPLOAD_FFT_ALLOWED_LOCAL_PATHS + [CFG_TMPDIR]:
                 if path.startswith(allowed_path):
                     dummy_fd = open(path)
                     dummy_fd.close()
                     return
             raise StandardError, "%s is not in one of the allowed paths." % path
         else:
             urllib2.urlopen(url)
     except Exception, e:
         raise StandardError, "%s is not a correct url: %s" % (url, e)
 
 def download_url(url, format, sleep=2):
     """Download a url (if it corresponds to a remote file) and return a local url
     to it."""
     format = normalize_format(format)
     protocol = urllib2.urlparse.urlsplit(url)[0]
     tmpfd, tmppath = tempfile.mkstemp(suffix=format, dir=CFG_TMPDIR)
     try:
         try:
             if protocol in ('', 'file'):
                 path = urllib2.urlparse.urlsplit(urllib.unquote(url))[2]
                 if os.path.realpath(path) != path:
                     raise StandardError, "%s is not a normalized path (would be %s)." % (path, os.path.normpath(path))
                 for allowed_path in CFG_BIBUPLOAD_FFT_ALLOWED_LOCAL_PATHS + [CFG_TMPDIR]:
                     if path.startswith(allowed_path):
                         shutil.copy(path, tmppath)
                         return tmppath
                 raise StandardError, "%s is not in one of the allowed paths." % path
             else:
                 #urllib.urlretrieve(url, tmppath)
                 cmd_exit_code, cmd_out, cmd_err = run_shell_command(CFG_PATH_WGET + ' %s -O %s -t 2 -T 40' % \
                                                                     (escape_shell_arg(url), escape_shell_arg(tmppath)))
                 if cmd_exit_code:
                     raise StandardError, "It's impossible to download %s: %s" % (url, cmd_err)
                 return tmppath
         except:
             os.remove(tmppath)
             raise
     finally:
         os.close(tmpfd)
 
 class BibDocMoreInfo:
     """Class to wrap the serialized bibdoc more_info. At the moment
     it stores descriptions and comments for each BibDoc."""
     def __init__(self, docid, more_info=None):
         try:
             assert(type(docid) in (long, int) and docid > 0)
             self.docid = docid
             try:
                 if more_info is None:
                     res = run_sql('SELECT more_info FROM bibdoc WHERE id=%s', (docid, ))
                     if res and res[0][0]:
                         self.more_info = cPickle.loads(blob_to_string(res[0][0]))
                     else:
                         self.more_info = {}
                 else:
                     self.more_info = cPickle.loads(more_info)
             except:
                 self.more_info = {}
             if 'descriptions' not in self.more_info:
                 self.more_info['descriptions'] = {}
             if 'comments' not in self.more_info:
                 self.more_info['comments'] = {}
         except:
             register_exception()
             raise
 
     def flush(self):
         """if __dirty is True reserialize di DB."""
         run_sql('UPDATE bibdoc SET more_info=%s WHERE id=%s', (cPickle.dumps(self.more_info), self.docid))
 
     def get_comment(self, format, version):
         """Return the comment corresponding to the given docid/format/version."""
         try:
             assert(type(version) is int)
             return self.more_info['comments'].get(version, {}).get(format)
         except:
             register_exception()
             raise
 
     def get_description(self, format, version):
         """Return the description corresponding to the given docid/format/version."""
         try:
             assert(type(version) is int)
             return self.more_info['descriptions'].get(version, {}).get(format)
         except:
             register_exception()
             raise
 
     def set_comment(self, comment, format, version):
         """Store a comment corresponding to the given docid/format/version."""
         try:
             assert(type(version) is int and version > 0)
             if comment == KEEP_OLD_VALUE:
                 comment = self.get_comment(format, version) or self.get_comment(format, version - 1)
             if not comment:
                 self.unset_comment(format, version)
                 self.flush()
                 return
             if not version in self.more_info['comments']:
                 self.more_info['comments'][version] = {}
             self.more_info['comments'][version][format] = comment
             self.flush()
         except:
             register_exception()
             raise
 
     def set_description(self, description, format, version):
         """Store a description corresponding to the given docid/format/version."""
         try:
             assert(type(version) is int and version > 0)
             if description == KEEP_OLD_VALUE:
                 description = self.get_description(format, version) or self.get_description(format, version - 1)
             if not description:
                 self.unset_description(format, version)
                 self.flush()
                 return
             if not version in self.more_info['descriptions']:
                 self.more_info['descriptions'][version] = {}
             self.more_info['descriptions'][version][format] = description
             self.flush()
         except:
             register_exception()
             raise
 
     def unset_comment(self, format, version):
         """Remove a comment."""
         try:
             assert(type(version) is int and version > 0)
             del self.more_info['comments'][version][format]
             self.flush()
         except KeyError:
             pass
         except:
             register_exception()
             raise
 
     def unset_description(self, format, version):
         """Remove a description."""
         try:
             assert(type(version) is int and version > 0)
             del self.more_info['descriptions'][version][format]
             self.flush()
         except KeyError:
             pass
         except:
             register_exception()
             raise
 
     def serialize(self):
         """Return the serialized version of the more_info."""
         return cPickle.dumps(self.more_info)
 
 def readfile(filename):
     """Try to read a file. Return '' in case of any error.
     This function is useful for quick implementation of websubmit functions.
     """
     try:
         fd = open(filename)
         content = fd.read()
         fd.close()
         return content
     except:
         return ''