urlutils.py
No OneTemporary
Actions

Subscribers

None

File Metadata

Created: Fri, Jun 28, 05:25

urlutils.py
View Options

	# -- coding: utf-8 --
	#
	# This file is part of Invenio.
	# Copyright (C) 2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012, 2013 CERN.
	#
	# Invenio is free software; you can redistribute it and/or
	# modify it under the terms of the GNU General Public License as
	# published by the Free Software Foundation; either version 2 of the
	# License, or (at your option) any later version.
	#
	# Invenio is distributed in the hope that it will be useful, but
	# WITHOUT ANY WARRANTY; without even the implied warranty of
	# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
	# General Public License for more details.
	#
	# You should have received a copy of the GNU General Public License
	# along with Invenio; if not, write to the Free Software Foundation, Inc.,
	# 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA.

	"""
	urlutils.py -- helper functions for URL related problems such as
	argument washing, redirection, etc.
	"""

	__revision__ = "$Id$"

	import time
	import base64
	import hmac
	import re
	import sys
	import os
	import inspect
	import urllib
	import urllib2
	from urllib import urlencode, quote_plus, quote, FancyURLopener
	from urlparse import urlparse, urlunparse
	from cgi import parse_qs, parse_qsl, escape

	try:
	import BeautifulSoup
	BEAUTIFUL_SOUP_IMPORTED = True
	except ImportError:
	BEAUTIFUL_SOUP_IMPORTED = False

	try:
	from hashlib import sha256, sha1, md5
	HASHLIB_IMPORTED = True
	except ImportError:
	from md5 import md5
	HASHLIB_IMPORTED = False

	from invenio import webinterface_handler_config as apache
	from invenio.config import \
	CFG_SITE_URL, \
	CFG_WEBSTYLE_EMAIL_ADDRESSES_OBFUSCATION_MODE, \
	CFG_WEBDIR, CFG_SITE_NAME, CFG_VERSION, CFG_SITE_LANGS

	def wash_url_argument(var, new_type):
	"""
	Wash argument into 'new_type', that can be 'list', 'str',
	'int', 'tuple' or 'dict'.
	If needed, the check 'type(var) is not None' should be done before
	calling this function.
	@param var: variable value
	@param new_type: variable type, 'list', 'str', 'int', 'tuple' or 'dict'
	@return: as much as possible, value var as type new_type
	If var is a list, will change first element into new_type.
	If int check unsuccessful, returns 0
	"""
	out = []
	if new_type == 'list': # return lst
	if isinstance(var, list):
	out = var
	else:
	out = [var]
	elif new_type == 'str': # return str
	if isinstance(var, list):
	try:
	out = "%s" % var[0]
	except:
	out = ""
	elif isinstance(var, str):
	out = var
	else:
	out = "%s" % var
	elif new_type == 'int': # return int
	if isinstance(var, list):
	try:
	out = int(var[0])
	except:
	out = 0
	elif isinstance(var, (int, long)):
	out = var
	elif isinstance(var, str):
	try:
	out = int(var)
	except:
	out = 0
	else:
	out = 0
	elif new_type == 'tuple': # return tuple
	if isinstance(var, tuple):
	out = var
	else:
	out = (var, )
	elif new_type == 'dict': # return dictionary
	if isinstance(var, dict):
	out = var
	else:
	out = {0: var}
	return out


	def redirect_to_url(req, url, redirection_type=None, norobot=False):
	"""
	Redirect current page to url.
	@param req: request as received from apache
	@param url: url to redirect to
	@param redirection_type: what kind of redirection is required:
	e.g.: apache.HTTP_MULTIPLE_CHOICES = 300
	apache.HTTP_MOVED_PERMANENTLY = 301
	apache.HTTP_MOVED_TEMPORARILY = 302
	apache.HTTP_SEE_OTHER = 303
	apache.HTTP_NOT_MODIFIED = 304
	apache.HTTP_USE_PROXY = 305
	apache.HTTP_TEMPORARY_REDIRECT = 307
	The default is apache.HTTP_MOVED_TEMPORARILY
	@param norobot: wether to instruct crawlers and robots such as GoogleBot
	not to index past this point.
	@see: http://www.w3.org/Protocols/rfc2616/rfc2616-sec10.html#sec10.3
	"""
	url = url.strip()
	if redirection_type is None:
	redirection_type = apache.HTTP_MOVED_TEMPORARILY

	del req.headers_out["Cache-Control"]
	req.headers_out["Cache-Control"] = "no-cache, private, no-store, " \
	"must-revalidate, post-check=0, pre-check=0, max-age=0"
	req.headers_out["Pragma"] = "no-cache"

	if norobot:
	req.headers_out["X-Robots-Tag"] = "noarchive, nosnippet, noindex, nocache"

	user_agent = req.headers_in.get('User-Agent', '')
	if 'Microsoft Office Existence Discovery' in user_agent or 'ms-office' in user_agent:
	## HACK: this is to workaround Microsoft Office trying to be smart
	## when users click on URLs in Office documents that require
	## authentication. Office will check the validity of the URL
	## but will pass the browser the redirected URL rather than
	## the original one. This is incompatible with e.g. Shibboleth
	## based SSO since the referer would be lost.
	## See: http://support.microsoft.com/kb/899927
	req.status = 200
	req.content_type = 'text/html'
	if req.method != 'HEAD':
	req.write("""
	<html>
	<head>
	<title>Intermediate page for URLs clicked on MS Office Documents</title>
	<meta http-equiv="REFRESH" content="5;url=%(url)s"></meta>
	</head>
	<body>
	<p>You are going to be redirected to the desired content within 5 seconds. If the redirection does not happen automatically please click on <a href="%(url)s">%(url_ok)s</a>.</p>
	</body>
	</html>""" % {
	'url': escape(req.unparsed_uri, True),
	'url_ok': escape(req.unparsed_uri)
	})
	raise apache.SERVER_RETURN(apache.DONE)

	req.headers_out["Location"] = url

	if req.response_sent_p:
	raise IOError("Cannot redirect after headers have already been sent.")

	req.status = redirection_type
	req.write('<p>Please go to <a href="%s">here</a></p>\n' % url)

	raise apache.SERVER_RETURN, apache.DONE


	def get_referer(req, replace_ampersands=False):
	""" Return the referring page of a request.
	Referer (wikipedia): Referer is a common misspelling of the word
	"referrer"; so common, in fact, that it made it into the official
	specification of HTTP. When visiting a webpage, the referer or
	referring page is the URL of the previous webpage from which a link was
	followed.
	@param req: request
	@param replace_ampersands: if 1, replace & by & in url
	(correct HTML cannot contain & characters alone)
	"""
	try:
	referer = req.headers_in['Referer']
	if replace_ampersands == 1:
	return referer.replace('&', '&')
	return referer
	except KeyError:
	return ''


	def drop_default_urlargd(urlargd, default_urlargd):
	lndefault = {}
	lndefault.update(default_urlargd)

	## Commented out. An Invenio URL now should always specify the desired
	## language, in order not to raise the automatic language discovery
	## (client browser language can be used now in place of CFG_SITE_LANG)
	# lndefault['ln'] = (str, CFG_SITE_LANG)

	canonical = {}
	canonical.update(urlargd)

	for k, v in urlargd.items():
	try:
	d = lndefault[k]

	if d[1] == v:
	del canonical[k]

	except KeyError:
	pass

	return canonical


	def make_canonical_urlargd(urlargd, default_urlargd):
	""" Build up the query part of an URL from the arguments passed in
	the 'urlargd' dictionary. 'default_urlargd' is a secondary dictionary which
	contains tuples of the form (type, default value) for the query
	arguments (this is the same dictionary as the one you can pass to
	webinterface_handler.wash_urlargd).

	When a query element has its default value, it is discarded, so
	that the simplest (canonical) url query is returned.

	The result contains the initial '?' if there are actual query
	items remaining.
	"""

	canonical = drop_default_urlargd(urlargd, default_urlargd)

	if canonical:
	return '?' + urlencode(canonical, doseq=True).replace('&', '&')

	return ''


	def create_html_link(urlbase, urlargd, link_label, linkattrd=None,
	escape_urlargd=True, escape_linkattrd=True,
	urlhash=None):
	"""Creates a W3C compliant link.
	@param urlbase: base url (e.g. invenio.config.CFG_SITE_URL/search)
	@param urlargd: dictionary of parameters. (e.g. p={'recid':3, 'of'='hb'})
	@param link_label: text displayed in a browser (has to be already escaped)
	@param linkattrd: dictionary of attributes (e.g. a={'class': 'img'})
	@param escape_urlargd: boolean indicating if the function should escape
	arguments (e.g. < becomes < or " becomes ")
	@param escape_linkattrd: boolean indicating if the function should escape
	attributes (e.g. < becomes < or " becomes ")
	@param urlhash: hash string to add at the end of the link
	"""
	attributes_separator = ' '
	output = '<a href="' + \
	create_url(urlbase, urlargd, escape_urlargd, urlhash) + '"'
	if linkattrd:
	output += ' '
	if escape_linkattrd:
	attributes = [escape(str(key), quote=True) + '="' + \
	escape(str(linkattrd[key]), quote=True) + '"'
	for key in linkattrd.keys()]
	else:
	attributes = [str(key) + '="' + str(linkattrd[key]) + '"'
	for key in linkattrd.keys()]
	output += attributes_separator.join(attributes)
	output += '>' + link_label + '</a>'
	return output


	def create_html_mailto(email, subject=None, body=None, cc=None, bcc=None,
	link_label="%(email)s", linkattrd=None,
	escape_urlargd=True, escape_linkattrd=True,
	email_obfuscation_mode=CFG_WEBSTYLE_EMAIL_ADDRESSES_OBFUSCATION_MODE):

	"""Creates a W3C compliant 'mailto' link.

	Encode/encrypt given email to reduce undesired automated email
	harvesting when embedded in a web page.

	NOTE: there is no ultimate solution to protect against email
	harvesting. All have drawbacks and can more or less be
	circumvented. There are other techniques to protect email
	adresses. We implement the less annoying one for users.

	@param email: the recipient of the email
	@param subject: a default subject for the email (must not contain
	line feeds)
	@param body: a default body for the email
	@param cc: the co-recipient(s) of the email
	@param bcc: the hidden co-recpient(s) of the email
	@param link_label: the label of this mailto link. String
	replacement is performed on key %(email)s with
	the email address if needed.
	@param linkattrd: dictionary of attributes (e.g. a={'class': 'img'})
	@param escape_urlargd: boolean indicating if the function should escape
	arguments (e.g. < becomes < or " becomes ")
	@param escape_linkattrd: boolean indicating if the function should escape
	attributes (e.g. < becomes < or " becomes ")
	@param email_obfuscation_mode: the protection mode. See below:

	You can choose among several modes to protect emails. It is
	advised to keep the default
	CFG_MISCUTIL_EMAIL_HARVESTING_PROTECTION value, so that it is
	possible for an admin to change the policy globally.

	Available modes ([t] means "transparent" for the user):

	-1: hide all emails, excepted CFG_SITE_ADMIN_EMAIL and
	CFG_SITE_SUPPORT_EMAIL.

	[t] 0 : no protection, email returned as is.
	foo@example.com => foo@example.com

	1 : basic email munging: replaces @ by [at] and . by [dot]
	foo@example.com => foo [at] example [dot] com

	[t] 2 : transparent name mangling: characters are replaced by
	equivalent HTML entities.
	foo@example.com => foo@example.com

	[t] 3 : javascript insertion. Requires Javascript enabled on client side.

	4 : replaces @ and . characters by gif equivalents.
	foo@example.com => foo<img src="at.gif" alt=" [at] ">example<img src="dot.gif" alt=" [dot] ">com
	"""
	# TODO: implement other protection modes to encode/encript email:
	#
	## [t] 5 : form submission. User is redirected to a form that he can
	## fills in to send the email (??Use webmessage??).
	## Depending on WebAccess, ask to answer a question.
	##
	## [t] 6 : if user can see (controlled by WebAccess), display. Else
	## ask to login to see email. If user cannot see, display
	## form submission.

	if linkattrd is None:
	linkattrd = {}

	parameters = {}
	if subject:
	parameters["subject"] = subject
	if body:
	parameters["body"] = body.replace('\r\n', '\n').replace('\n', '\r\n')
	if cc:
	parameters["cc"] = cc
	if bcc:
	parameters["bcc"] = bcc

	# Preprocessing values for some modes
	if email_obfuscation_mode == 1:
	# Basic Munging
	email = email.replace("@", " [at] ").replace(".", " [dot] ")
	elif email_obfuscation_mode == 2:
	# Transparent name mangling
	email = string_to_numeric_char_reference(email)

	if '%(email)s' in link_label:
	link_label = link_label % {'email': email}

	mailto_link = create_html_link('mailto:' + email, parameters,
	link_label, linkattrd,
	escape_urlargd, escape_linkattrd)

	if email_obfuscation_mode == 0:
	# Return "as is"
	return mailto_link
	elif email_obfuscation_mode == 1:
	# Basic Munging
	return mailto_link
	elif email_obfuscation_mode == 2:
	# Transparent name mangling
	return mailto_link
	elif email_obfuscation_mode == 3:
	# Javascript-based
	return '''<script language="JavaScript" ''' \
	'''type="text/javascript">''' \
	'''document.write('%s'.split("").reverse().join(""))''' \
	'''</script>''' % \
	mailto_link[::-1].replace("'", "\\'")
	elif email_obfuscation_mode == 4:
	# GIFs-based
	email = email.replace('.',
	'<img src="%s/img/dot.gif" alt=" [dot] " '
	'style="vertical-align:bottom" />' % CFG_SITE_URL)
	email = email.replace('@',
	'<img src="%s/img/at.gif" alt=" [at] " '
	'style="vertical-align:baseline" />' % CFG_SITE_URL)
	return email

	# All other cases, including mode -1:
	return ""


	def string_to_numeric_char_reference(string):
	"""
	Encode a string to HTML-compatible numeric character reference.
	Eg: encode_html_entities("abc") == 'abc'
	"""
	out = ""
	for char in string:
	out += "&#" + str(ord(char)) + ";"
	return out

	def get_canonical_and_alternates_urls(url, drop_ln=True, washed_argd=None, quote_path=False):
	"""
	Given an Invenio URL returns a tuple with two elements. The first is the
	canonical URL, that is the original URL with CFG_SITE_URL prefix, and
	where the ln= argument stripped. The second element element is mapping,
	language code -> alternate URL

	@param quote_path: if True, the path section of the given C{url}
	is quoted according to RFC 2396
	"""
	dummy_scheme, dummy_netloc, path, dummy_params, query, fragment = urlparse(url)
	canonical_scheme, canonical_netloc = urlparse(CFG_SITE_URL)[0:2]
	parsed_query = washed_argd or parse_qsl(query)
	no_ln_parsed_query = [(key, value) for (key, value) in parsed_query if key != 'ln']
	if drop_ln:
	canonical_parsed_query = no_ln_parsed_query
	else:
	canonical_parsed_query = parsed_query
	if quote_path:
	path = urllib.quote(path)
	canonical_query = urlencode(canonical_parsed_query)
	canonical_url = urlunparse((canonical_scheme, canonical_netloc, path, dummy_params, canonical_query, fragment))
	alternate_urls = {}
	for ln in CFG_SITE_LANGS:
	alternate_query = urlencode(no_ln_parsed_query + [('ln', ln)])
	alternate_url = urlunparse((canonical_scheme, canonical_netloc, path, dummy_params, alternate_query, fragment))
	alternate_urls[ln] = alternate_url
	return canonical_url, alternate_urls

	def create_url(urlbase, urlargd, escape_urlargd=True, urlhash=None):
	"""Creates a W3C compliant URL. Output will look like this:
	'urlbase?param1=value1&param2=value2'
	@param urlbase: base url (e.g. invenio.config.CFG_SITE_URL/search)
	@param urlargd: dictionary of parameters. (e.g. p={'recid':3, 'of'='hb'}
	@param escape_urlargd: boolean indicating if the function should escape
	arguments (e.g. < becomes < or " becomes ")
	@param urlhash: hash string to add at the end of the link
	"""
	separator = '&'
	output = urlbase
	if urlargd:
	output += '?'
	if escape_urlargd:
	arguments = [escape(quote(str(key)), quote=True) + '=' + \
	escape(quote(str(urlargd[key])), quote=True)
	for key in urlargd.keys()]
	else:
	arguments = [str(key) + '=' + str(urlargd[key])
	for key in urlargd.keys()]
	output += separator.join(arguments)
	if urlhash:
	output += "#" + escape(quote(str(urlhash)))
	return output


	def same_urls_p(a, b):
	""" Compare two URLs, ignoring reorganizing of query arguments """

	ua = list(urlparse(a))
	ub = list(urlparse(b))

	ua[4] = parse_qs(ua[4])
	ub[4] = parse_qs(ub[4])

	return ua == ub


	def urlargs_replace_text_in_arg(urlargs, regexp_argname, text_old, text_new):
	"""Analyze `urlargs' (URL CGI GET query arguments in string form)
	and for each occurrence of argument matching `regexp_argname'
	replace every substring `text_old' by `text_new'. Return the
	resulting new URL.

	Used to be used for search engine's create_nearest_terms_box,
	now it is not used there anymore. It is left here in case it
	will become possibly useful later.
	"""
	out = ""
	# parse URL arguments into a dictionary:
	urlargsdict = parse_qs(urlargs)
	## construct new URL arguments:
	urlargsdictnew = {}
	for key in urlargsdict.keys():
	if re.match(regexp_argname, key): # replace `arg' by new values
	urlargsdictnew[key] = []
	for parg in urlargsdict[key]:
	urlargsdictnew[key].append(parg.replace(text_old, text_new))
	else: # keep old values
	urlargsdictnew[key] = urlargsdict[key]
	# build new URL for this word:
	for key in urlargsdictnew.keys():
	for val in urlargsdictnew[key]:
	out += "&" + key + "=" + quote_plus(val, '')
	if out.startswith("&"):
	out = out[5:]
	return out

	def get_title_of_page(url):
	"""
	@param url: page to get the title from
	@return: the page title in utf-8 or None in case
	that any kind of exception occured e.g. connection error,
	URL not known
	"""
	if BEAUTIFUL_SOUP_IMPORTED:
	try:
	opener = make_invenio_opener('UrlUtils')
	soup = BeautifulSoup.BeautifulSoup(opener.open(url))
	return soup.title.string.encode("utf-8")
	except:
	return None
	else:
	return "Title not available"

	def make_user_agent_string(component=None):
	"""
	Return a nice and uniform user-agent string to be used when Invenio
	act as a client in HTTP requests.
	"""
	ret = "Invenio-%s (+%s; \"%s\")" % (CFG_VERSION, CFG_SITE_URL, CFG_SITE_NAME)
	if component:
	ret += " %s" % component
	return ret

	class InvenioFancyURLopener(FancyURLopener):
	## Provide default user agent string
	version = make_user_agent_string()
	def prompt_user_passwd(self, host, realm):
	"""Don't prompt"""
	return None, None

	# Let's override default useragent string
	# See: http://docs.python.org/release/2.4.4/lib/module-urllib.html
	urllib._urlopener = InvenioFancyURLopener()

	def make_invenio_opener(component=None):
	"""
	Return an urllib2 opener with the useragent already set in the appropriate
	way.
	"""
	opener = urllib2.build_opener()
	opener.addheaders = [('User-agent', make_user_agent_string(component))]
	return opener

	def create_AWS_request_url(base_url, argd, _amazon_secret_access_key,
	_timestamp=None):
	"""
	Create a signed AWS (Amazon Web Service) request URL corresponding
	to the given parameters.

	Example:
	>> create_AWS_request_url("http://ecs.amazon.com/onca/xml",
	{'AWSAccessKeyID': '0000000000',
	'Service': 'AWSECommerceService',
	'Operation': 'ItemLookup',
	'ItemID': '0679722769',
	'ResponseGroup': 'ItemAttributes,Offers,Images,Review'},
	"1234567890")

	@param base_url: Service URL of the Amazon store to query
	@param argd: dictionary of arguments defining the query
	@param _amazon_secret_access_key: your Amazon secret key
	@param _timestamp: for testing purpose only (default: current timestamp)

	@type base_url: string
	@type argd: dict
	@type _amazon_secret_access_key: string
	@type _timestamp: string

	@return signed URL of the request (string)
	"""

	## First define a few util functions

	def get_AWS_signature(argd, _amazon_secret_access_key,
	method="GET", request_host="webservices.amazon.com",
	request_uri="/onca/xml",
	_timestamp=None):
	"""
	Returns the signature of an Amazon request, based on the
	arguments of the request.

	@param argd: dictionary of arguments defining the query
	@param _amazon_secret_access_key: your Amazon secret key
	@param method: method of the request POST or GET
	@param request_host: host contacted for the query. To embed in the signature.
	@param request_uri: uri contacted at 'request_host'. To embed in the signature.
	@param _timestamp: for testing purpose only (default: current timestamp)

	@type argd: dict
	@type _amazon_secret_access_key: string
	@type method: string
	@type host_header: string
	@type http_request_uri: string
	@type _timestamp: string

	@return signature of the request (string)
	"""

	# Add timestamp
	if not _timestamp:
	argd["Timestamp"] = time.strftime("%Y-%m-%dT%H:%M:%SZ",
	time.gmtime())
	else:
	argd["Timestamp"] = _timestamp

	# Order parameter keys by byte value
	parameter_keys = argd.keys()
	parameter_keys.sort()

	# Encode arguments, according to RFC 3986. Make sure we
	# generate a list which is ordered by byte value of the keys
	arguments = [quote(str(key), safe="~/") + "=" + \
	quote(str(argd[key]), safe="~/") \
	for key in parameter_keys]

	# Join
	parameters_string = "&".join(arguments)

	# Prefix
	parameters_string = method.upper() + "\n" + \
	request_host.lower() + "\n" + \
	(request_uri or "/") + "\n" + \
	parameters_string

	# Sign and return
	return calculate_RFC2104_HMAC(parameters_string,
	_amazon_secret_access_key)

	def calculate_RFC2104_HMAC(data, _amazon_secret_access_key):
	"""
	Computes a RFC 2104 compliant HMAC Signature and then Base64
	encodes it.

	Module hashlib must be installed if Python < 2.5
	<http://pypi.python.org/pypi/hashlib/20081119>

	@param data: data to sign
	@param _amazon_secret_access_key: your Amazon secret key

	@type data: string
	@type _amazon_secret_access_key: string. Empty if hashlib module not installed
	"""
	if not HASHLIB_IMPORTED:
	try:
	raise Exception("Module hashlib not installed. Please install it.")
	except:
	from invenio.errorlib import register_exception
	register_exception(stream='warning', alert_admin=True, subject='Cannot create AWS signature')
	return ""
	else:
	if sys.version_info < (2, 5):
	# compatibility mode for Python < 2.5 and hashlib
	my_digest_algo = _MySHA256(sha256())
	else:
	my_digest_algo = sha256

	return base64.encodestring(hmac.new(_amazon_secret_access_key,
	data, my_digest_algo).digest()).strip()
	## End util functions

	parsed_url = urlparse(base_url)
	signature = get_AWS_signature(argd, _amazon_secret_access_key,
	request_host=parsed_url[1],
	request_uri=parsed_url[2],
	_timestamp=_timestamp)
	if signature:
	argd["Signature"] = signature
	return base_url + "?" + urlencode(argd)


	def create_Indico_request_url(base_url, indico_what, indico_loc, indico_id, indico_type, indico_params, indico_key, indico_sig, _timestamp=None):
	"""
	Create a signed Indico request URL to access Indico HTTP Export APIs.

	See U{http://indico.cern.ch/ihelp/html/ExportAPI/index.html} for more
	information.

	Example:
	>> create_Indico_request_url("https://indico.cern.ch",
	"categ",
	"",
	[1, 7],
	"xml",
	{'onlypublic': 'yes',
	'order': 'title',
	'from': 'today',
	'to': 'tomorrow'},
	'00000000-0000-0000-0000-000000000000',
	'00000000-0000-0000-0000-000000000000')

	@param base_url: Service base URL of the Indico instance to query
	@param indico_what: element to export
	@type indico_what: one of the strings: C{categ}, C{event}, C{room}, C{reservation}
	@param indico_loc: location of the element(s) specified by ID (only used for some elements)
	@param indico_id: ID of the element to be exported
	@type indico_id: a string or a list/tuple of strings
	@param indico_type: output format
	@type indico_type: one of the strings: C{json}, C{jsonp}, C{xml}, C{html}, C{ics}, C{atom}
	@param indico_params: parameters of the query. See U{http://indico.cern.ch/ihelp/html/ExportAPI/common.html}
	@param indico_key: API key provided for the given Indico instance
	@param indico_sig: API secret key (signature) provided for the given Indico instance
	@param _timestamp: for testing purpose only (default: current timestamp)

	@return signed URL of the request (string)
	"""

	url = '/export/' + indico_what + '/'
	if indico_loc:
	url += indico_loc + '/'
	if type(indico_id) in (list, tuple):
	# dash separated list of values
	indico_id = '-'.join([str(x) for x in indico_id])
	url += indico_id + '.' + str(indico_type)

	if hasattr(indico_params, 'items'):
	items = indico_params.items()
	else:
	items = list(indico_params)
	if indico_key:
	items.append(('apikey', indico_key))
	if indico_sig and HASHLIB_IMPORTED:
	if _timestamp:
	items.append(('timestamp', str(_timestamp)))
	else:
	items.append(('timestamp', str(int(time.time()))))
	items = sorted(items, key=lambda x: x[0].lower())
	url_to_sign = '%s?%s' % (url, urlencode(items))
	if sys.version_info < (2, 5):
	# compatibility mode for Python < 2.5 and hashlib
	my_digest_algo = _MySHA1(sha1())
	else:
	my_digest_algo = sha1
	signature = hmac.new(indico_sig, url_to_sign, my_digest_algo).hexdigest()
	items.append(('signature', signature))
	elif not HASHLIB_IMPORTED:
	try:
	raise Exception("Module hashlib not installed. Please install it.")
	except:
	from invenio.errorlib import register_exception
	register_exception(stream='warning', alert_admin=True, subject='Cannot create AWS signature')
	if not items:
	return url

	url = '%s%s?%s' % (base_url.strip('/'), url, urlencode(items))
	return url

	class _MyHashlibAlgo(object):
	'''
	Define a subclass of any hashlib algorithm class, with an additional "new()"
	function, to work with the Python < 2.5 version of the hmac module.

	(This class is more complex than it should, but it is not
	possible to subclass haslib algorithm)
	'''

	def __init__(self, obj):
	"""Set the wrapped object."""
	super(_MyHashlibAlgo, self).__setattr__('_obj', obj)

	methods = []
	for name_value in inspect.getmembers(obj, inspect.ismethod):
	methods.append(name_value[0])
	super(_MyHashlibAlgo, self).__setattr__('__methods__', methods)

	def isnotmethod(object_):
	"Opposite of ismethod(..)"
	return not inspect.ismethod(object_)
	members = []
	for name_value in inspect.getmembers(obj, isnotmethod):
	members.append(name_value[0])
	super(_MyHashlibAlgo, self).__setattr__('__members__', members)

	def __getattr__(self, name):
	"""Redirect unhandled get attribute to self._obj."""
	if not hasattr(self._obj, name):
	raise AttributeError, ("'%s' has no attribute %s" %
	(self.__class__.__name__, name))
	else:
	return getattr(self._obj, name)

	def __setattr__(self, name, value):
	"""Redirect set attribute to self._obj if necessary."""
	self_has_attr = True
	try:
	super(_MyHashlibAlgo, self).__getattribute__(name)
	except AttributeError:
	self_has_attr = False

	if (name == "_obj" or not hasattr(self, "_obj") or
	not hasattr(self._obj, name) or self_has_attr):
	return super(_MyHashlibAlgo, self).__setattr__(name, value)
	else:
	return setattr(self._obj, name, value)

	class _MySHA256(_MyHashlibAlgo):
	"A _MyHashlibAlgo subsclass for sha256"
	new = lambda d = '': sha256()


	class _MySHA1(_MyHashlibAlgo):
	"A _MyHashlibAlgo subsclass for sha1"
	new = lambda d = '': sha1()

	def auto_version_url(file_path):
	""" Appends modification time of the file to the request URL in order for the
	browser to refresh the cache when file changes

	@param file_path: path to the file, e.g js/foo.js
	@return: file_path with modification time appended to URL
	"""
	file_md5 = ""
	try:
	file_md5 = md5(open(CFG_WEBDIR + os.sep + file_path).read()).hexdigest()
	except IOError:
	pass
	return file_path + "?%s" % file_md5

	def get_relative_url(url):
	"""
	Returns the relative URL from a URL. For example:

	'http://web.net' -> ''
	'http://web.net/' -> ''
	'http://web.net/1222' -> '/1222'
	'http://web.net/wsadas/asd' -> '/wsadas/asd'

	It will never return a trailing "/".

	@param url: A url to transform
	@type url: str

	@return: relative URL
	"""
	# remove any protocol info before
	stripped_site_url = url.replace("://", "")
	baseurl = "/" + "/".join(stripped_site_url.split("/")[1:])

	# remove any trailing slash ("/")
	if baseurl[-1] == "/":
	return baseurl[:-1]
	else:
	return baseurl

urlutils.pyNo OneTemporaryActions

File Metadata

urlutils.pyView Options

Event Timeline

urlutils.py
No OneTemporary
Actions

urlutils.py
View Options