diff --git a/modules/miscutil/lib/urlutils.py b/modules/miscutil/lib/urlutils.py index 4be48c8d5..cc9ce0ffa 100644 --- a/modules/miscutil/lib/urlutils.py +++ b/modules/miscutil/lib/urlutils.py @@ -1,277 +1,280 @@ # -*- coding: utf-8 -*- ## $Id$ ## ## This file is part of CDS Invenio. ## Copyright (C) 2002, 2003, 2004, 2005, 2006, 2007, 2008 CERN. ## ## CDS Invenio is free software; you can redistribute it and/or ## modify it under the terms of the GNU General Public License as ## published by the Free Software Foundation; either version 2 of the ## License, or (at your option) any later version. ## ## CDS Invenio is distributed in the hope that it will be useful, but ## WITHOUT ANY WARRANTY; without even the implied warranty of ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ## General Public License for more details. ## ## You should have received a copy of the GNU General Public License ## along with CDS Invenio; if not, write to the Free Software Foundation, Inc., ## 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA. """ urlutils.py -- helper functions for URL related problems such as argument washing, redirection, etc. """ __revision__ = "$Id$" import re from urllib import urlencode, quote_plus, quote from urlparse import urlparse from cgi import parse_qs, escape try: from mod_python import apache, util except ImportError: pass from invenio.config import CFG_SITE_LANG def wash_url_argument(var, new_type): """ Wash argument into 'new_type', that can be 'list', 'str', 'int', 'tuple' or 'dict'. If needed, the check 'type(var) is not None' should be done before calling this function. @param var: variable value @param new_type: variable type, 'list', 'str', 'int', 'tuple' or 'dict' @return as much as possible, value var as type new_type If var is a list, will change first element into new_type. If int check unsuccessful, returns 0 """ out = [] if new_type == 'list': # return lst if type(var) is list: out = var else: out = [var] elif new_type == 'str': # return str if type(var) is list: try: out = "%s" % var[0] except: out = "" elif type(var) is str: out = var else: out = "%s" % var elif new_type == 'int': # return int if type(var) is list: try: out = int(var[0]) except: out = 0 elif type(var) is int: out = var elif type(var) is str: try: out = int(var) except: out = 0 else: out = 0 elif new_type == 'tuple': # return tuple if type(var) is tuple: out = var else: out = (var,) elif new_type == 'dict': # return dictionary if type(var) is dict: out = var else: out = {0:var} return out -def redirect_to_url(req, url, redirection_type=apache.HTTP_TEMPORARY_REDIRECT): +def redirect_to_url(req, url, redirection_type=None): """ Redirect current page to url. @param req: request as received from apache @param url: url to redirect to @param redirection_type: what kind of redirection is required: e.g.: apache.HTTP_MULTIPLE_CHOICES = 300 apache.HTTP_MOVED_PERMANENTLY = 301 apache.HTTP_MOVED_TEMPORARILY = 302 apache.HTTP_SEE_OTHER = 303 apache.HTTP_NOT_MODIFIED = 304 apache.HTTP_USE_PROXY = 305 apache.HTTP_TEMPORARY_REDIRECT = 307 + The default is apache.HTTP_TEMPORARY_REDIRECT Please see: http://www.w3.org/Protocols/rfc2616/rfc2616-sec10.html#sec10.3 """ + if redirection_type is None: + redirection_type = apache.HTTP_TEMPORARY_REDIRECT req.err_headers_out["Location"] = url req.err_headers_out["Cache-Control"] = "no-cache" if req.sent_bodyct: raise IOError, "Cannot redirect after headers have already been sent." req.status = redirection_type req.write('<p>Please go to <a href="%s">here</a></p>\n' % url) raise apache.SERVER_RETURN, apache.DONE def get_client_ip_address(req): """ Returns IP address as string from an apache request. """ return str(req.get_remote_host(apache.REMOTE_NOLOOKUP)) def get_referer(req, replace_ampersands=False): """ Return the referring page of a request. Referer (wikipedia): Referer is a common misspelling of the word "referrer"; so common, in fact, that it made it into the official specification of HTTP. When visiting a webpage, the referer or referring page is the URL of the previous webpage from which a link was followed. @param req: request @param replace_ampersands: if 1, replace & by & in url (correct HTML cannot contain & characters alone). """ try: referer = req.headers_in['Referer'] if replace_ampersands == 1: return referer.replace('&', '&') return referer except KeyError: return '' def drop_default_urlargd(urlargd, default_urlargd): lndefault = {} lndefault.update(default_urlargd) ## Commented out. An Invenio URL now should always specify the desired ## language, in order not to raise the automatic language discovery ## (client browser language can be used now in place of CFG_SITE_LANG) # lndefault['ln'] = (str, CFG_SITE_LANG) canonical = {} canonical.update(urlargd) for k, v in urlargd.items(): try: d = lndefault[k] if d[1] == v: del canonical[k] except KeyError: pass return canonical def make_canonical_urlargd(urlargd, default_urlargd): """ Build up the query part of an URL from the arguments passed in the 'urlargd' dictionary. 'default_urlargd' is a secondary dictionary which contains tuples of the form (type, default value) for the query arguments (this is the same dictionary as the one you can pass to webinterface_handler.wash_urlargd). When a query element has its default value, it is discarded, so that the simplest (canonical) url query is returned. The result contains the initial '?' if there are actual query items remaining. """ canonical = drop_default_urlargd(urlargd, default_urlargd) if canonical: return '?' + urlencode(canonical, doseq=True).replace('&', '&') return '' def create_html_link(urlbase, urlargd, link_label, linkattrd={}, escape_urlargd=True, escape_linkattrd=True): """Creates a W3C compliant link. @param urlbase: base url (e.g. invenio.config.CFG_SITE_URL/search) @param urlargd: dictionary of parameters. (e.g. p={'recid':3, 'of'='hb'}) @param link_label: text displayed in a browser (has to be already escaped) @param linkattrd: dictionary of attributes (e.g. a={'class': 'img'}) @param escape_urlargd: boolean indicating if the function should escape arguments (e.g. < becomes < or " becomes ") @param escape_linkattrd: boolean indicating if the function should escape attributes (e.g. < becomes < or " becomes ") """ attributes_separator = ' ' output = '<a href="' + create_url(urlbase, urlargd, escape_urlargd) + '"' if linkattrd: output += ' ' if escape_linkattrd: attributes = [escape(str(key), quote=True) + '="' + \ escape(str(linkattrd[key]), quote=True) + '"' for key in linkattrd.keys()] else: attributes = [str(key) + '="' + str(linkattrd[key]) + '"' for key in linkattrd.keys()] output += attributes_separator.join(attributes) output += '>' + link_label + '</a>' return output def create_url(urlbase, urlargd, escape_urlargd=True): """Creates a W3C compliant URL. Output will look like this: 'urlbase?param1=value1&param2=value2' @param urlbase: base url (e.g. invenio.config.CFG_SITE_URL/search) @param urlargd: dictionary of parameters. (e.g. p={'recid':3, 'of'='hb'} @param escape_urlargd: boolean indicating if the function should escape arguments (e.g. < becomes < or " becomes ") """ separator = '&' output = urlbase if urlargd: output += '?' if escape_urlargd: arguments = [escape(quote(str(key)), quote=True) + '=' + \ escape(quote(str(urlargd[key])), quote=True) for key in urlargd.keys()] else: arguments = [str(key) + '=' + str(urlargd[key]) for key in urlargd.keys()] output += separator.join(arguments) return output def same_urls_p(a, b): """ Compare two URLs, ignoring reorganizing of query arguments """ ua = list(urlparse(a)) ub = list(urlparse(b)) ua[4] = parse_qs(ua[4]) ub[4] = parse_qs(ub[4]) return ua == ub def urlargs_replace_text_in_arg(urlargs, regexp_argname, text_old, text_new): """Analyze `urlargs' (URL CGI GET query arguments in string form) and for each occurrence of argument matching `regexp_argname' replace every substring `text_old' by `text_new'. Return the resulting new URL. Used to be used for search engine's create_nearest_terms_box, now it is not used there anymore. It is left here in case it will become possibly useful later. """ out = "" # parse URL arguments into a dictionary: urlargsdict = parse_qs(urlargs) ## construct new URL arguments: urlargsdictnew = {} for key in urlargsdict.keys(): if re.match(regexp_argname, key): # replace `arg' by new values urlargsdictnew[key] = [] for parg in urlargsdict[key]: urlargsdictnew[key].append(parg.replace(text_old, text_new)) else: # keep old values urlargsdictnew[key] = urlargsdict[key] # build new URL for this word: for key in urlargsdictnew.keys(): for val in urlargsdictnew[key]: out += "&" + key + "=" + quote_plus(val, '') if out.startswith("&"): out = out[5:] return out