diff --git a/modules/miscutil/lib/Makefile.am b/modules/miscutil/lib/Makefile.am index 3f65d792c..41bd3515b 100644 --- a/modules/miscutil/lib/Makefile.am +++ b/modules/miscutil/lib/Makefile.am @@ -1,88 +1,89 @@ ## $Id$ ## This file is part of CDS Invenio. ## Copyright (C) 2002, 2003, 2004, 2005, 2006, 2007 CERN. ## ## CDS Invenio is free software; you can redistribute it and/or ## modify it under the terms of the GNU General Public License as ## published by the Free Software Foundation; either version 2 of the ## License, or (at your option) any later version. ## ## CDS Invenio is distributed in the hope that it will be useful, but ## WITHOUT ANY WARRANTY; without even the implied warranty of ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ## General Public License for more details. ## ## You should have received a copy of the GNU General Public License ## along with CDS Invenio; if not, write to the Free Software Foundation, Inc., ## 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA. pylibdir = $(libdir)/python/invenio pylib_DATA = __init__.py \ errorlib.py \ errorlib_tests.py \ errorlib_webinterface.py \ errorlib_regression_tests.py \ config.py \ data_cacher.py \ dbquery.py \ dbquery_tests.py \ miscutil_config.py \ messages.py \ textutils.py \ dateutils.py \ dateutils_tests.py \ htmlutils.py \ htmlutils_tests.py \ testutils.py \ testutils_regression_tests.py \ - urlutils.py + urlutils.py \ + w3c_validator.py noinst_DATA = testimport.py \ kwalitee.py phplibdir = $(libdir)/php/invenio/errors phplib_DATA = errorHandling.php EXTRA_DIST = __init__.py \ errorlib.py \ errorlib_tests.py \ errorlib_webinterface.py \ errorlib_regression_tests.py \ miscutil_config.py \ config.py.wml \ dbquery.py.wml \ dbquery_tests.py \ messages.py.wml \ errorHandling.php.wml \ textutils.py \ dateutils.py \ dateutils_tests.py \ htmlutils.py \ htmlutils_tests.py \ testutils.py \ testutils_regression_tests.py \ urlutils.py \ testimport.py \ kwalitee.py install-data-hook: $(PYTHON) $(srcdir)/testimport.py ${prefix} CLEANFILES = config.py dbquery.py messages.py $(phplib_DATA) *~ *.tmp *.pyc config.py: config.py.wml $(top_srcdir)/config/config.wml $(top_builddir)/config/configbis.wml \ $(top_srcdir)/config/cdswmllib.wml $(WML) -o $@ $< dbquery.py: dbquery.py.wml $(top_srcdir)/config/config.wml $(top_builddir)/config/configbis.wml \ $(top_srcdir)/config/cdswmllib.wml $(WML) -o $@ $< messages.py: messages.py.wml $(top_srcdir)/config/config.wml $(top_builddir)/config/configbis.wml \ $(top_srcdir)/config/cdswmllib.wml $(WML) -o $@ $< errorHandling.php: errorHandling.php.wml $(top_srcdir)/config/config.wml $(top_builddir)/config/configbis.wml \ $(top_srcdir)/config/cdswmllib.wml $(WML) -o $@ $< diff --git a/modules/miscutil/lib/testutils.py b/modules/miscutil/lib/testutils.py index 439540f02..33067d7ab 100644 --- a/modules/miscutil/lib/testutils.py +++ b/modules/miscutil/lib/testutils.py @@ -1,213 +1,222 @@ ## $Id$ ## ## This file is part of CDS Invenio. ## Copyright (C) 2002, 2003, 2004, 2005, 2006, 2007 CERN. ## ## CDS Invenio is free software; you can redistribute it and/or ## modify it under the terms of the GNU General Public License as ## published by the Free Software Foundation; either version 2 of the ## License, or (at your option) any later version. ## ## CDS Invenio is distributed in the hope that it will be useful, but ## WITHOUT ANY WARRANTY; without even the implied warranty of ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -## General Public License for more details. +## General Public License for more details. ## ## You should have received a copy of the GNU General Public License ## along with CDS Invenio; if not, write to the Free Software Foundation, Inc., ## 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA. """ Helper functions for building and running test suites. """ __revision__ = "$Id$" # if verbose level is set to 9, many debugging messages will be # printed on stdout, so you may want to run: # $ regressiontestsuite > /tmp/z.log # or even: # $ regressiontestsuite > /tmp/z.log 2> /tmp/z.err cfg_testutils_verbose = 1 import string import sys import time import unittest from urllib import urlencode from invenio.config import weburl, sweburl +from invenio.w3c_validator import w3c_validate, w3c_errors_to_str, CFG_TESTS_REQUIRE_HTML_VALIDATION def warn_user_about_tests(): """ Put a standard warning about running tests that might modify user data""" - + # Provide a command line option to avoid having to type the # confirmation every time during development. if '--yes-i-know' in sys.argv: return sys.stderr.write("""\ ********************************************************************** ** ** ** *** I M P O R T A N T W A R N I N G *** ** ** ** ** The regression test suite needs to be run on a clean demo site ** ** that you can obtain by doing: ** ** ** ** $ make drop-tables ** ** $ make create-tables ** ** $ make create-demo-site ** ** $ make load-demo-records ** ** ** ** Note that DOING THE ABOVE WILL ERASE YOUR ENTIRE DATABASE. ** ** ** ** In addition, due to the write nature of some of the tests, ** ** the demo DATABASE will be ALTERED WITH JUNK DATA, so that ** ** it is recommended to rebuild the demo site anew afterwards. ** ** ** ********************************************************************** Please confirm by typing "Yes, I know!": """) answer = raw_input('') if answer != 'Yes, I know!': sys.stderr.write("Aborted.\n") raise SystemExit(0) return def warn_user_about_tests_and_run(testsuite): """ Convenience function to embed in test suites """ warn_user_about_tests() unittest.TextTestRunner(verbosity=2).run(testsuite) - + def make_test_suite(*test_cases): """ Build up a test suite given separate test cases""" - + return unittest.TestSuite([unittest.makeSuite(case, 'test') for case in test_cases]) def make_url(path, **kargs): """ Helper to generate an absolute invenio URL with query arguments""" - + url = weburl + path - + if kargs: url += '?' + urlencode(kargs, doseq=True) return url def make_surl(path, **kargs): """ Helper to generate an absolute invenio Secure URL with query arguments""" - + url = sweburl + path - + if kargs: url += '?' + urlencode(kargs, doseq=True) return url class InvenioTestUtilsBrowserException(Exception): """Helper exception for the regression test suite browser.""" pass def test_web_page_content(url, username="guest", password="", expected_text="", expected_link_target=None, - expected_link_label=None): + expected_link_label=None, + require_validate_p=CFG_TESTS_REQUIRE_HTML_VALIDATION): """Test whether web page URL as seen by user USERNAME contains text EXPECTED_TEXT and, eventually, contains a link to EXPECTED_LINK_TARGET (if set) labelled EXPECTED_LINK_LABEL (if set). The EXPECTED_TEXT is checked via substring matching, the EXPECTED_LINK_TARGET and EXPECTED_LINK_LABEL via exact string matching. Before doing the tests, login as USERNAME with password PASSWORD. E.g. interesting values for USERNAME are "guest" or "admin". Return empty list in case of problems, otherwise list of error messages that may have been encountered during processing of page. """ - + error_messages = [] try: import mechanize except ImportError: return ['WARNING: Cannot import mechanize, test skipped.'] browser = mechanize.Browser() try: # firstly login: if username == "guest": pass else: browser.open(sweburl + "/youraccount/login") browser.select_form(nr=0) browser['p_un'] = username browser['p_pw'] = password browser.submit() username_account_page_body = browser.response().read() try: string.index(username_account_page_body, "You are logged in as %s." % username) except ValueError: raise InvenioTestUtilsBrowserException, \ 'ERROR: Cannot login as %s, test skipped.' % username - + # secondly read page body: browser.open(url) url_body = browser.response().read() # now test for EXPECTED_TEXT: try: string.index(url_body, expected_text) except ValueError: raise InvenioTestUtilsBrowserException, \ 'ERROR: Page %s (login %s) does not contain %s.' % \ (url, username, expected_text) # now test for EXPECTED_LINK_TARGET and EXPECTED_LINK_LABEL: if expected_link_target or expected_link_label: try: browser.find_link(url=expected_link_target, text=expected_link_label) except mechanize.LinkNotFoundError: raise InvenioTestUtilsBrowserException, \ 'ERROR: Page %s (login %s) does not contain link to %s entitled %s.' % \ (url, username, expected_link_target, expected_link_label) - + + if require_validate_p: + valid_p, errors, warnings = w3c_validate(url_body) + if not valid_p: + raise InvenioTestUtilsBrowserException, \ + 'ERROR: Page %s (login %s) does not validate:\n %s' % \ + (url, username, w3c_errors_to_str(errors, warnings)) + except mechanize.HTTPError, msg: error_messages.append('ERROR: Page %s (login %s) not accessible. %s' % \ (url, username, msg)) except InvenioTestUtilsBrowserException, msg: error_messages.append('ERROR: Page %s (login %s) led to an error: %s.' % \ (url, username, msg)) # logout after tests: browser.open(sweburl + "/youraccount/logout") if cfg_testutils_verbose >= 9: print "%s test_web_page_content(), tested page `%s', login `%s', expected text `%s', errors `%s'." % \ (time.strftime("%Y-%m-%d %H:%M:%S -->", time.localtime()), url, username, expected_text, string.join(error_messages, ",")) return error_messages def merge_error_messages(error_messages): """If the ERROR_MESSAGES list is non-empty, merge them and return nicely formatted string suitable for printing. Otherwise return empty - string. + string. """ out = "" if error_messages: out = "\n*** " + string.join(error_messages, "\n*** ") return out diff --git a/modules/miscutil/lib/w3c_validator.py b/modules/miscutil/lib/w3c_validator.py new file mode 100644 index 000000000..7499c1cc4 --- /dev/null +++ b/modules/miscutil/lib/w3c_validator.py @@ -0,0 +1,186 @@ +## $Id$ +## +## This file is part of CDS Invenio. +## Copyright (C) 2002, 2003, 2004, 2005, 2006, 2007 CERN. +## +## CDS Invenio is free software; you can redistribute it and/or +## modify it under the terms of the GNU General Public License as +## published by the Free Software Foundation; either version 2 of the +## License, or (at your option) any later version. +## +## CDS Invenio is distributed in the hope that it will be useful, but +## WITHOUT ANY WARRANTY; without even the implied warranty of +## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +## General Public License for more details. +## +## You should have received a copy of the GNU General Public License +## along with CDS Invenio; if not, write to the Free Software Foundation, Inc., +## 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA. + +""" +Exports just one function w3c_validate which validate a text against the W3C validator +""" + +__revision__ = "$Id$" + +import httplib +import mimetypes +import re +import time +from xml.sax.saxutils import unescape +from invenio.config import CFG_CERN_SITE + + +if CFG_CERN_SITE: + # A host mirroring W3C validator + CFG_W3C_VALIDATOR_HOST = 'pcuds12.cern.ch' + + # The selector for checking the page + CFG_W3C_VALIDATOR_SELECTOR = '/w3c-markup-validator/check' + + # Whethever to sleep for 1s for kindness to the server + CFG_W3C_VALIDATOR_SLEEP_P = False +else: + CFG_W3C_VALIDATOR_HOST = 'validator.w3.org' + CFG_W3C_VALIDATOR_SELECTOR = '/check' + CFG_W3C_VALIDATOR_SLEEP_P = True + + +# Whethever we automatically exploit regression tests for validating pages. +CFG_TESTS_REQUIRE_HTML_VALIDATION = False + + +def w3c_validate(text, host=CFG_W3C_VALIDATOR_HOST, + selector=CFG_W3C_VALIDATOR_SELECTOR, + sleep_p=CFG_W3C_VALIDATOR_SLEEP_P): + """ Validate the text against W3C validator like host, with a given selector + and eventually sleeping for a second. + Return a triple, with True if the document validate as the first element. + If False, then the second and third elements contain respectively a list of + errors and of warnings of the form: (line number, column, error, row involved). + """ + + if sleep_p: + time.sleep(1) + h = _post_multipart(host, selector, \ + [('output', 'soap12')], [('uploaded_file', 'foobar.html', text)]) + errcode, errmsg, headers = h.getreply() + if headers.has_key('X-W3C-Validator-Status'): + if headers['X-W3C-Validator-Status'] == 'Valid': + return (True, [], []) + else: + errors, warnings = _parse_validator_soap(h.file.read(), text.split('\n')) + return (False, errors, warnings) + else: + return (False, [], []) + +def w3c_errors_to_str(errors, warnings): + """ Pretty print errors and warnings coming from w3c_validate """ + ret = '' + if errors: + ret += '%s errors:\n' % len(errors) + for line, col, msg, text in errors: + ret += '%s (%s:%s):\n' % (unescape(msg, {'"': "'"}), line, col) + ret += text + '\n' + ret += ' '*(int(col)-1) + '^\n' + ret += '---\n' + if warnings: + ret += '%s warnings:\n' % len(warnings) + for line, col, msg, text in warnings: + ret += '%s (%s:%s):\n' % (unescape(msg, {'"': "'"}), line, col) + ret += text + '\n' + ret += ' '*(int(col)-1) + '^\n' + ret += '---\n' + return ret + +def w3c_validate_p(text, host=CFG_W3C_VALIDATOR_HOST, + selector=CFG_W3C_VALIDATOR_SELECTOR, + sleep_p=CFG_W3C_VALIDATOR_SLEEP_P): + """ Validate the text against W3C validator like host, with a given selector + and eventually sleeping for a second. + Return a True if the document validate. + """ + + if sleep_p: + time.sleep(1) + h = _post_multipart(host, selector, \ + [('output', 'soap12')], [('uploaded_file', 'foobar.html', text)]) + errcode, errmsg, headers = h.getreply() + if headers.has_key('X-W3C-Validator-Status'): + return headers['X-W3C-Validator-Status'] == 'Valid' + return False + + +_errors_re = re.compile(r'.*(?P[\d]+)\.*(?P.*).*', re.M | re.S) +_warnings_re = re.compile(r'.*(?P[\d]+).*(?P.*).*', re.M | re.S) + +_error_re = re.compile(r'.*(?P[\d]+).*(?P[\d]+).*(?P.+).*', re.M | re.S) + +_warning_re = re.compile(r'.*(?P[\d]+).*(?P[\d]+).*(?P.+).*', re.M | re.S) + + +def _parse_validator_soap(soap_output, rows): + """ Given the soap output provided by W3C validator it returns a tuple + containing the list of errors in the form (line, col, error_msg) and + the list of warnings in the same form. + """ + + errors = _errors_re.search(soap_output) + warnings = _warnings_re.search(soap_output) + if errors: + errors = _error_re.findall(errors.group('errors')) + errors = map(lambda error: (error[0], error[1], error[2], rows[int(error[0])-1]), errors) + else: + errors = [] + if warnings: + warnings = _warning_re.findall(warnings.group('warnings')) + warnings = map(lambda warning: (warning[0], warning[1], warning[2], rows[int(warning[0])-1]), warnings) + else: + warnings = [] + return (errors, warnings) + +def _post_multipart(host, selector, fields, files): + """ + Post fields and files to an http host as multipart/form-data. + fields is a sequence of (name, value) elements for regular form fields. + files is a sequence of (name, filename, value) elements for data to be uploaded as files + Return the server's responses. + """ + content_type, body = _encode_multipart_formdata(fields, files) + h = httplib.HTTP(host) + h.putrequest('POST', selector) + h.putheader('content-type', content_type) + h.putheader('content-length', str(len(body))) + h.endheaders() + h.send(body) + return h + +def _encode_multipart_formdata(fields, files): + """ + fields is a sequence of (name, value) elements for regular form fields. + files is a sequence of (name, filename, value) elements for data to be uploaded as files + Return (content_type, body) ready for httplib.HTTP instance + """ + BOUNDARY = '----------ThIs_Is_tHe_bouNdaRY_$' + CRLF = '\r\n' + L = [] + for (key, value) in fields: + L.append('--' + BOUNDARY) + L.append('Content-Disposition: form-data; name="%s"' % key) + L.append('') + L.append(value) + for (key, filename, value) in files: + L.append('--' + BOUNDARY) + L.append('Content-Disposition: form-data; name="%s"; filename="%s"' % (key, filename)) + L.append('Content-Type: %s' % _get_content_type(filename)) + L.append('') + L.append(value) + L.append('--' + BOUNDARY + '--') + L.append('') + body = CRLF.join(L) + content_type = 'multipart/form-data; boundary=%s' % BOUNDARY + return content_type, body + +def _get_content_type(filename): + return mimetypes.guess_type(filename)[0] or 'application/octet-stream' +