diff --git a/modules/miscutil/lib/Makefile.am b/modules/miscutil/lib/Makefile.am
index 3f65d792c..41bd3515b 100644
--- a/modules/miscutil/lib/Makefile.am
+++ b/modules/miscutil/lib/Makefile.am
@@ -1,88 +1,89 @@
 ## $Id$
 
 ## This file is part of CDS Invenio.
 ## Copyright (C) 2002, 2003, 2004, 2005, 2006, 2007 CERN.
 ##
 ## CDS Invenio is free software; you can redistribute it and/or
 ## modify it under the terms of the GNU General Public License as
 ## published by the Free Software Foundation; either version 2 of the
 ## License, or (at your option) any later version.
 ##
 ## CDS Invenio is distributed in the hope that it will be useful, but
 ## WITHOUT ANY WARRANTY; without even the implied warranty of
 ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 ## General Public License for more details.  
 ##
 ## You should have received a copy of the GNU General Public License
 ## along with CDS Invenio; if not, write to the Free Software Foundation, Inc.,
 ## 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA.
 
 pylibdir = $(libdir)/python/invenio
 pylib_DATA = __init__.py \
              errorlib.py \
              errorlib_tests.py \
              errorlib_webinterface.py \
              errorlib_regression_tests.py \
              config.py \
              data_cacher.py \
              dbquery.py \
              dbquery_tests.py \
 	     miscutil_config.py \
              messages.py \
              textutils.py \
              dateutils.py \
              dateutils_tests.py \
              htmlutils.py \
 	     htmlutils_tests.py \
 	     testutils.py \
 	     testutils_regression_tests.py \
-	     urlutils.py             
+	     urlutils.py \
+	     w3c_validator.py
 
 noinst_DATA = testimport.py \
               kwalitee.py
 
 phplibdir = $(libdir)/php/invenio/errors
 phplib_DATA = errorHandling.php
 
 EXTRA_DIST = __init__.py \
              errorlib.py \
              errorlib_tests.py \
              errorlib_webinterface.py \
              errorlib_regression_tests.py \
 	     miscutil_config.py \
              config.py.wml \
              dbquery.py.wml \
              dbquery_tests.py \
              messages.py.wml \
              errorHandling.php.wml \
              textutils.py \
              dateutils.py \
              dateutils_tests.py \
              htmlutils.py \
 	     htmlutils_tests.py \
 	     testutils.py \
 	     testutils_regression_tests.py \
              urlutils.py \
              testimport.py \
              kwalitee.py
 
 install-data-hook:
 	$(PYTHON) $(srcdir)/testimport.py ${prefix}
 
 CLEANFILES = config.py dbquery.py messages.py $(phplib_DATA) *~ *.tmp *.pyc
 
 config.py: config.py.wml $(top_srcdir)/config/config.wml $(top_builddir)/config/configbis.wml \
       $(top_srcdir)/config/cdswmllib.wml
 	$(WML) -o $@ $<
 
 dbquery.py: dbquery.py.wml $(top_srcdir)/config/config.wml $(top_builddir)/config/configbis.wml \
       $(top_srcdir)/config/cdswmllib.wml
 	$(WML) -o $@ $<
 
 messages.py: messages.py.wml $(top_srcdir)/config/config.wml $(top_builddir)/config/configbis.wml \
       $(top_srcdir)/config/cdswmllib.wml
 	$(WML) -o $@ $<
 
 errorHandling.php: errorHandling.php.wml $(top_srcdir)/config/config.wml $(top_builddir)/config/configbis.wml \
       $(top_srcdir)/config/cdswmllib.wml
 	$(WML) -o $@ $<
diff --git a/modules/miscutil/lib/testutils.py b/modules/miscutil/lib/testutils.py
index 439540f02..33067d7ab 100644
--- a/modules/miscutil/lib/testutils.py
+++ b/modules/miscutil/lib/testutils.py
@@ -1,213 +1,222 @@
 ## $Id$
 ##
 ## This file is part of CDS Invenio.
 ## Copyright (C) 2002, 2003, 2004, 2005, 2006, 2007 CERN.
 ##
 ## CDS Invenio is free software; you can redistribute it and/or
 ## modify it under the terms of the GNU General Public License as
 ## published by the Free Software Foundation; either version 2 of the
 ## License, or (at your option) any later version.
 ##
 ## CDS Invenio is distributed in the hope that it will be useful, but
 ## WITHOUT ANY WARRANTY; without even the implied warranty of
 ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-## General Public License for more details.  
+## General Public License for more details.
 ##
 ## You should have received a copy of the GNU General Public License
 ## along with CDS Invenio; if not, write to the Free Software Foundation, Inc.,
 ## 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA.
 
 """
 Helper functions for building and running test suites.
 """
 
 __revision__ = "$Id$"
 
 # if verbose level is set to 9, many debugging messages will be
 # printed on stdout, so you may want to run:
 #   $ regressiontestsuite > /tmp/z.log
 # or even:
 #   $ regressiontestsuite > /tmp/z.log 2> /tmp/z.err
 cfg_testutils_verbose = 1
 
 import string
 import sys
 import time
 import unittest
 
 from urllib import urlencode
 
 from invenio.config import weburl, sweburl
+from invenio.w3c_validator import w3c_validate, w3c_errors_to_str, CFG_TESTS_REQUIRE_HTML_VALIDATION
 
 def warn_user_about_tests():
     """ Put a standard warning about running tests that might modify
     user data"""
-    
+
     # Provide a command line option to avoid having to type the
     # confirmation every time during development.
     if '--yes-i-know' in sys.argv:
         return
 
     sys.stderr.write("""\
 **********************************************************************
 **                                                                  **
 **  ***  I M P O R T A N T   W A R N I N G  ***                     **
 **                                                                  **
 ** The regression test suite needs to be run on a clean demo site   **
 ** that you can obtain by doing:                                    **
 **                                                                  **
 **    $ make drop-tables                                            **
 **    $ make create-tables                                          **
 **    $ make create-demo-site                                       **
 **    $ make load-demo-records                                      **
 **                                                                  **
 ** Note that DOING THE ABOVE WILL ERASE YOUR ENTIRE DATABASE.       **
 **                                                                  **
 ** In addition, due to the write nature of some of the tests,       **
 ** the demo DATABASE will be ALTERED WITH JUNK DATA, so that        **
 ** it is recommended to rebuild the demo site anew afterwards.      **
 **                                                                  **
 **********************************************************************
 
 Please confirm by typing "Yes, I know!": """)
 
     answer = raw_input('')
     if answer != 'Yes, I know!':
         sys.stderr.write("Aborted.\n")
         raise SystemExit(0)
 
     return
 
 def warn_user_about_tests_and_run(testsuite):
     """ Convenience function to embed in test suites """
     warn_user_about_tests()
     unittest.TextTestRunner(verbosity=2).run(testsuite)
-    
+
 
 def make_test_suite(*test_cases):
     """ Build up a test suite given separate test cases"""
-    
+
     return unittest.TestSuite([unittest.makeSuite(case, 'test')
                                for case in test_cases])
 
 def make_url(path, **kargs):
     """ Helper to generate an absolute invenio URL with query
     arguments"""
-    
+
     url = weburl + path
-    
+
     if kargs:
         url += '?' + urlencode(kargs, doseq=True)
 
     return url
 
 def make_surl(path, **kargs):
     """ Helper to generate an absolute invenio Secure URL with query
     arguments"""
-    
+
     url = sweburl + path
-    
+
     if kargs:
         url += '?' + urlencode(kargs, doseq=True)
 
     return url
 
 class InvenioTestUtilsBrowserException(Exception):
     """Helper exception for the regression test suite browser."""
     pass
 
 def test_web_page_content(url,
                           username="guest",
                           password="",
                           expected_text="</html>",
                           expected_link_target=None,
-                          expected_link_label=None):
+                          expected_link_label=None,
+                          require_validate_p=CFG_TESTS_REQUIRE_HTML_VALIDATION):
     """Test whether web page URL as seen by user USERNAME contains
        text EXPECTED_TEXT and, eventually, contains a link to
        EXPECTED_LINK_TARGET (if set) labelled EXPECTED_LINK_LABEL (if
        set).  The EXPECTED_TEXT is checked via substring matching, the
        EXPECTED_LINK_TARGET and EXPECTED_LINK_LABEL via exact string
        matching.
 
        Before doing the tests, login as USERNAME with password
        PASSWORD.  E.g. interesting values for USERNAME are "guest" or
        "admin".
 
        Return empty list in case of problems, otherwise list of error
        messages that may have been encountered during processing of
        page.
     """
-    
+
     error_messages = []
     try:
         import mechanize
     except ImportError:
         return ['WARNING: Cannot import mechanize, test skipped.']
     browser = mechanize.Browser()
     try:
         # firstly login:
         if username == "guest":
             pass
         else:
             browser.open(sweburl + "/youraccount/login")
             browser.select_form(nr=0)
             browser['p_un'] = username
             browser['p_pw'] = password
             browser.submit()
             username_account_page_body = browser.response().read()
             try:
                 string.index(username_account_page_body,
                              "You are logged in as %s." % username)
             except ValueError:
                 raise InvenioTestUtilsBrowserException, \
                       'ERROR: Cannot login as %s, test skipped.' % username
-        
+
         # secondly read page body:
         browser.open(url)
         url_body = browser.response().read()
 
         # now test for EXPECTED_TEXT:
         try:
             string.index(url_body, expected_text)
         except ValueError:
             raise InvenioTestUtilsBrowserException, \
                   'ERROR: Page %s (login %s) does not contain %s.' % \
                               (url, username, expected_text)
 
         # now test for EXPECTED_LINK_TARGET and EXPECTED_LINK_LABEL:
         if expected_link_target or expected_link_label:
             try:
                 browser.find_link(url=expected_link_target,
                                   text=expected_link_label)
             except mechanize.LinkNotFoundError:
                 raise InvenioTestUtilsBrowserException, \
                       'ERROR: Page %s (login %s) does not contain link to %s entitled %s.' % \
                                   (url, username, expected_link_target, expected_link_label)
-                  
+
+        if require_validate_p:
+            valid_p, errors, warnings = w3c_validate(url_body)
+            if not valid_p:
+                raise InvenioTestUtilsBrowserException, \
+                      'ERROR: Page %s (login %s) does not validate:\n %s' % \
+                                  (url, username, w3c_errors_to_str(errors, warnings))
+
     except mechanize.HTTPError, msg:
         error_messages.append('ERROR: Page %s (login %s) not accessible. %s' % \
                               (url, username, msg))
     except InvenioTestUtilsBrowserException, msg:
         error_messages.append('ERROR: Page %s (login %s) led to an error: %s.' % \
                               (url, username, msg))
 
     # logout after tests:
     browser.open(sweburl + "/youraccount/logout")
 
     if cfg_testutils_verbose >= 9:
         print "%s test_web_page_content(), tested page `%s', login `%s', expected text `%s', errors `%s'." % \
               (time.strftime("%Y-%m-%d %H:%M:%S -->", time.localtime()),
                url, username, expected_text,
                string.join(error_messages, ","))
 
     return error_messages
 
 def merge_error_messages(error_messages):
     """If the ERROR_MESSAGES list is non-empty, merge them and return nicely
        formatted string suitable for printing.  Otherwise return empty
-       string.   
+       string.
     """
     out = ""
     if error_messages:
         out = "\n*** " + string.join(error_messages, "\n*** ")
     return out
diff --git a/modules/miscutil/lib/w3c_validator.py b/modules/miscutil/lib/w3c_validator.py
new file mode 100644
index 000000000..7499c1cc4
--- /dev/null
+++ b/modules/miscutil/lib/w3c_validator.py
@@ -0,0 +1,186 @@
+## $Id$
+##
+## This file is part of CDS Invenio.
+## Copyright (C) 2002, 2003, 2004, 2005, 2006, 2007 CERN.
+##
+## CDS Invenio is free software; you can redistribute it and/or
+## modify it under the terms of the GNU General Public License as
+## published by the Free Software Foundation; either version 2 of the
+## License, or (at your option) any later version.
+##
+## CDS Invenio is distributed in the hope that it will be useful, but
+## WITHOUT ANY WARRANTY; without even the implied warranty of
+## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+## General Public License for more details.
+##
+## You should have received a copy of the GNU General Public License
+## along with CDS Invenio; if not, write to the Free Software Foundation, Inc.,
+## 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA.
+
+"""
+Exports just one function w3c_validate which validate a text against the W3C validator
+"""
+
+__revision__ = "$Id$"
+
+import httplib
+import mimetypes
+import re
+import time
+from xml.sax.saxutils import unescape
+from invenio.config import CFG_CERN_SITE
+
+
+if CFG_CERN_SITE:
+    # A host mirroring W3C validator
+    CFG_W3C_VALIDATOR_HOST = 'pcuds12.cern.ch'
+
+    # The selector for checking the page
+    CFG_W3C_VALIDATOR_SELECTOR = '/w3c-markup-validator/check'
+
+    # Whethever to sleep for 1s for kindness to the server
+    CFG_W3C_VALIDATOR_SLEEP_P = False
+else:
+    CFG_W3C_VALIDATOR_HOST = 'validator.w3.org'
+    CFG_W3C_VALIDATOR_SELECTOR = '/check'
+    CFG_W3C_VALIDATOR_SLEEP_P = True
+
+
+# Whethever we automatically exploit regression tests for validating pages.
+CFG_TESTS_REQUIRE_HTML_VALIDATION = False
+
+
+def w3c_validate(text, host=CFG_W3C_VALIDATOR_HOST,
+        selector=CFG_W3C_VALIDATOR_SELECTOR,
+        sleep_p=CFG_W3C_VALIDATOR_SLEEP_P):
+    """ Validate the text against W3C validator like host, with a given selector
+    and eventually sleeping for a second.
+    Return a triple, with True if the document validate as the first element.
+    If False, then the second and third elements contain respectively a list of
+    errors and of warnings of the form: (line number, column, error, row involved).
+    """
+
+    if sleep_p:
+        time.sleep(1)
+    h = _post_multipart(host, selector, \
+        [('output', 'soap12')], [('uploaded_file', 'foobar.html', text)])
+    errcode, errmsg, headers = h.getreply()
+    if headers.has_key('X-W3C-Validator-Status'):
+        if headers['X-W3C-Validator-Status'] == 'Valid':
+            return (True, [], [])
+        else:
+            errors, warnings = _parse_validator_soap(h.file.read(), text.split('\n'))
+            return (False, errors, warnings)
+    else:
+        return (False, [], [])
+
+def w3c_errors_to_str(errors, warnings):
+    """ Pretty print errors and warnings coming from w3c_validate """
+    ret = ''
+    if errors:
+        ret += '%s errors:\n' % len(errors)
+        for line, col, msg, text in errors:
+            ret += '%s (%s:%s):\n' % (unescape(msg, {'&quot;': "'"}), line, col)
+            ret += text + '\n'
+            ret += ' '*(int(col)-1) + '^\n'
+            ret += '---\n'
+    if warnings:
+        ret += '%s warnings:\n' % len(warnings)
+        for line, col, msg, text in warnings:
+            ret += '%s (%s:%s):\n' % (unescape(msg, {'&quot;': "'"}), line, col)
+            ret += text + '\n'
+            ret += ' '*(int(col)-1) + '^\n'
+            ret += '---\n'
+    return ret
+
+def w3c_validate_p(text, host=CFG_W3C_VALIDATOR_HOST,
+        selector=CFG_W3C_VALIDATOR_SELECTOR,
+        sleep_p=CFG_W3C_VALIDATOR_SLEEP_P):
+    """ Validate the text against W3C validator like host, with a given selector
+    and eventually sleeping for a second.
+    Return a True if the document validate.
+    """
+
+    if sleep_p:
+        time.sleep(1)
+    h = _post_multipart(host, selector, \
+        [('output', 'soap12')], [('uploaded_file', 'foobar.html', text)])
+    errcode, errmsg, headers = h.getreply()
+    if headers.has_key('X-W3C-Validator-Status'):
+        return headers['X-W3C-Validator-Status'] == 'Valid'
+    return False
+
+
+_errors_re = re.compile(r'<m:errors>.*<m:errorcount>(?P<errorcount>[\d]+)\</m:errorcount>.*<m:errorlist>(?P<errors>.*)</m:errorlist>.*</m:errors>', re.M | re.S)
+_warnings_re = re.compile(r'<m:warnings>.*<m:warningcount>(?P<warningcount>[\d]+)</m:warningcount>.*<m:warninglist>(?P<warnings>.*)</m:warninglist>.*</m:warnings>', re.M | re.S)
+
+_error_re = re.compile(r'<m:error>.*<m:line>(?P<line>[\d]+)</m:line>.*<m:col>(?P<col>[\d]+)</m:col>.*<m:message>(?P<message>.+)</m:message>.*</m:error>', re.M | re.S)
+
+_warning_re = re.compile(r'<m:warning>.*<m:line>(?P<line>[\d]+)</m:line>.*<m:col>(?P<col>[\d]+)</m:col>.*<m:message>(?P<message>.+)</m:message>.*</m:warning>', re.M | re.S)
+
+
+def _parse_validator_soap(soap_output, rows):
+    """ Given the soap output provided by W3C validator it returns a tuple
+    containing the list of errors in the form (line, col, error_msg) and
+    the list of warnings in the same form.
+    """
+
+    errors = _errors_re.search(soap_output)
+    warnings = _warnings_re.search(soap_output)
+    if errors:
+        errors = _error_re.findall(errors.group('errors'))
+        errors = map(lambda error: (error[0], error[1], error[2], rows[int(error[0])-1]), errors)
+    else:
+        errors = []
+    if warnings:
+        warnings = _warning_re.findall(warnings.group('warnings'))
+        warnings = map(lambda warning: (warning[0], warning[1], warning[2], rows[int(warning[0])-1]), warnings)
+    else:
+        warnings = []
+    return (errors, warnings)
+
+def _post_multipart(host, selector, fields, files):
+    """
+    Post fields and files to an http host as multipart/form-data.
+    fields is a sequence of (name, value) elements for regular form fields.
+    files is a sequence of (name, filename, value) elements for data to be uploaded as files
+    Return the server's responses.
+    """
+    content_type, body = _encode_multipart_formdata(fields, files)
+    h = httplib.HTTP(host)
+    h.putrequest('POST', selector)
+    h.putheader('content-type', content_type)
+    h.putheader('content-length', str(len(body)))
+    h.endheaders()
+    h.send(body)
+    return h
+
+def _encode_multipart_formdata(fields, files):
+    """
+    fields is a sequence of (name, value) elements for regular form fields.
+    files is a sequence of (name, filename, value) elements for data to be uploaded as files
+    Return (content_type, body) ready for httplib.HTTP instance
+    """
+    BOUNDARY = '----------ThIs_Is_tHe_bouNdaRY_$'
+    CRLF = '\r\n'
+    L = []
+    for (key, value) in fields:
+        L.append('--' + BOUNDARY)
+        L.append('Content-Disposition: form-data; name="%s"' % key)
+        L.append('')
+        L.append(value)
+    for (key, filename, value) in files:
+        L.append('--' + BOUNDARY)
+        L.append('Content-Disposition: form-data; name="%s"; filename="%s"' % (key, filename))
+        L.append('Content-Type: %s' % _get_content_type(filename))
+        L.append('')
+        L.append(value)
+    L.append('--' + BOUNDARY + '--')
+    L.append('')
+    body = CRLF.join(L)
+    content_type = 'multipart/form-data; boundary=%s' % BOUNDARY
+    return content_type, body
+
+def _get_content_type(filename):
+    return mimetypes.guess_type(filename)[0] or 'application/octet-stream'
+