Page Menu
Home
c4science
Search
Configure Global Search
Log In
Files
F68626886
urlutils.py
No One
Temporary
Actions
Download File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Award Token
Subscribers
None
File Metadata
Details
File Info
Storage
Attached
Created
Fri, Jun 28, 05:54
Size
10 KB
Mime Type
text/x-python
Expires
Sun, Jun 30, 05:54 (1 d, 23 h)
Engine
blob
Format
Raw Data
Handle
18607093
Attached To
R3600 invenio-infoscience
urlutils.py
View Options
# -*- coding: utf-8 -*-
## $Id$
##
## This file is part of CDS Invenio.
## Copyright (C) 2002, 2003, 2004, 2005, 2006, 2007, 2008 CERN.
##
## CDS Invenio is free software; you can redistribute it and/or
## modify it under the terms of the GNU General Public License as
## published by the Free Software Foundation; either version 2 of the
## License, or (at your option) any later version.
##
## CDS Invenio is distributed in the hope that it will be useful, but
## WITHOUT ANY WARRANTY; without even the implied warranty of
## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
## General Public License for more details.
##
## You should have received a copy of the GNU General Public License
## along with CDS Invenio; if not, write to the Free Software Foundation, Inc.,
## 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA.
"""
urlutils.py -- helper functions for URL related problems such as
argument washing, redirection, etc.
"""
__revision__
=
"$Id$"
import
re
from
urllib
import
urlencode
,
quote_plus
,
quote
from
urlparse
import
urlparse
from
cgi
import
parse_qs
,
escape
try
:
from
mod_python
import
apache
,
util
except
ImportError
:
pass
from
invenio.config
import
CFG_SITE_LANG
def
wash_url_argument
(
var
,
new_type
):
"""
Wash argument into 'new_type', that can be 'list', 'str',
'int', 'tuple' or 'dict'.
If needed, the check 'type(var) is not None' should be done before
calling this function.
@param var: variable value
@param new_type: variable type, 'list', 'str', 'int', 'tuple' or 'dict'
@return as much as possible, value var as type new_type
If var is a list, will change first element into new_type.
If int check unsuccessful, returns 0
"""
out
=
[]
if
new_type
==
'list'
:
# return lst
if
type
(
var
)
is
list
:
out
=
var
else
:
out
=
[
var
]
elif
new_type
==
'str'
:
# return str
if
type
(
var
)
is
list
:
try
:
out
=
"
%s
"
%
var
[
0
]
except
:
out
=
""
elif
type
(
var
)
is
str
:
out
=
var
else
:
out
=
"
%s
"
%
var
elif
new_type
==
'int'
:
# return int
if
type
(
var
)
is
list
:
try
:
out
=
int
(
var
[
0
])
except
:
out
=
0
elif
type
(
var
)
is
int
:
out
=
var
elif
type
(
var
)
is
str
:
try
:
out
=
int
(
var
)
except
:
out
=
0
else
:
out
=
0
elif
new_type
==
'tuple'
:
# return tuple
if
type
(
var
)
is
tuple
:
out
=
var
else
:
out
=
(
var
,)
elif
new_type
==
'dict'
:
# return dictionary
if
type
(
var
)
is
dict
:
out
=
var
else
:
out
=
{
0
:
var
}
return
out
def
redirect_to_url
(
req
,
url
,
redirection_type
=
None
):
"""
Redirect current page to url.
@param req: request as received from apache
@param url: url to redirect to
@param redirection_type: what kind of redirection is required:
e.g.: apache.HTTP_MULTIPLE_CHOICES = 300
apache.HTTP_MOVED_PERMANENTLY = 301
apache.HTTP_MOVED_TEMPORARILY = 302
apache.HTTP_SEE_OTHER = 303
apache.HTTP_NOT_MODIFIED = 304
apache.HTTP_USE_PROXY = 305
apache.HTTP_TEMPORARY_REDIRECT = 307
The default is apache.HTTP_TEMPORARY_REDIRECT
Please see: http://www.w3.org/Protocols/rfc2616/rfc2616-sec10.html#sec10.3
"""
if
redirection_type
is
None
:
redirection_type
=
apache
.
HTTP_TEMPORARY_REDIRECT
req
.
err_headers_out
[
"Location"
]
=
url
req
.
err_headers_out
[
"Cache-Control"
]
=
"no-cache"
if
req
.
sent_bodyct
:
raise
IOError
,
"Cannot redirect after headers have already been sent."
req
.
status
=
redirection_type
req
.
write
(
'<p>Please go to <a href="
%s
">here</a></p>
\n
'
%
url
)
raise
apache
.
SERVER_RETURN
,
apache
.
DONE
def
get_client_ip_address
(
req
):
""" Returns IP address as string from an apache request. """
return
str
(
req
.
get_remote_host
(
apache
.
REMOTE_NOLOOKUP
))
def
get_referer
(
req
,
replace_ampersands
=
False
):
""" Return the referring page of a request.
Referer (wikipedia): Referer is a common misspelling of the word "referrer";
so common, in fact, that it made it into the official specification of HTTP.
When visiting a webpage, the referer or referring page is the URL of the
previous webpage from which a link was followed.
@param req: request
@param replace_ampersands: if 1, replace & by & in url
(correct HTML cannot contain & characters alone).
"""
try
:
referer
=
req
.
headers_in
[
'Referer'
]
if
replace_ampersands
==
1
:
return
referer
.
replace
(
'&'
,
'&'
)
return
referer
except
KeyError
:
return
''
def
drop_default_urlargd
(
urlargd
,
default_urlargd
):
lndefault
=
{}
lndefault
.
update
(
default_urlargd
)
## Commented out. An Invenio URL now should always specify the desired
## language, in order not to raise the automatic language discovery
## (client browser language can be used now in place of CFG_SITE_LANG)
# lndefault['ln'] = (str, CFG_SITE_LANG)
canonical
=
{}
canonical
.
update
(
urlargd
)
for
k
,
v
in
urlargd
.
items
():
try
:
d
=
lndefault
[
k
]
if
d
[
1
]
==
v
:
del
canonical
[
k
]
except
KeyError
:
pass
return
canonical
def
make_canonical_urlargd
(
urlargd
,
default_urlargd
):
""" Build up the query part of an URL from the arguments passed in
the 'urlargd' dictionary. 'default_urlargd' is a secondary dictionary which
contains tuples of the form (type, default value) for the query
arguments (this is the same dictionary as the one you can pass to
webinterface_handler.wash_urlargd).
When a query element has its default value, it is discarded, so
that the simplest (canonical) url query is returned.
The result contains the initial '?' if there are actual query
items remaining.
"""
canonical
=
drop_default_urlargd
(
urlargd
,
default_urlargd
)
if
canonical
:
return
'?'
+
urlencode
(
canonical
,
doseq
=
True
)
.
replace
(
'&'
,
'&'
)
return
''
def
create_html_link
(
urlbase
,
urlargd
,
link_label
,
linkattrd
=
{},
escape_urlargd
=
True
,
escape_linkattrd
=
True
):
"""Creates a W3C compliant link.
@param urlbase: base url (e.g. invenio.config.CFG_SITE_URL/search)
@param urlargd: dictionary of parameters. (e.g. p={'recid':3, 'of'='hb'})
@param link_label: text displayed in a browser (has to be already escaped)
@param linkattrd: dictionary of attributes (e.g. a={'class': 'img'})
@param escape_urlargd: boolean indicating if the function should escape
arguments (e.g. < becomes < or " becomes ")
@param escape_linkattrd: boolean indicating if the function should escape
attributes (e.g. < becomes < or " becomes ")
"""
attributes_separator
=
' '
output
=
'<a href="'
+
create_url
(
urlbase
,
urlargd
,
escape_urlargd
)
+
'"'
if
linkattrd
:
output
+=
' '
if
escape_linkattrd
:
attributes
=
[
escape
(
str
(
key
),
quote
=
True
)
+
'="'
+
\
escape
(
str
(
linkattrd
[
key
]),
quote
=
True
)
+
'"'
for
key
in
linkattrd
.
keys
()]
else
:
attributes
=
[
str
(
key
)
+
'="'
+
str
(
linkattrd
[
key
])
+
'"'
for
key
in
linkattrd
.
keys
()]
output
+=
attributes_separator
.
join
(
attributes
)
output
+=
'>'
+
link_label
+
'</a>'
return
output
def
create_url
(
urlbase
,
urlargd
,
escape_urlargd
=
True
):
"""Creates a W3C compliant URL. Output will look like this:
'urlbase?param1=value1&param2=value2'
@param urlbase: base url (e.g. invenio.config.CFG_SITE_URL/search)
@param urlargd: dictionary of parameters. (e.g. p={'recid':3, 'of'='hb'}
@param escape_urlargd: boolean indicating if the function should escape
arguments (e.g. < becomes < or " becomes ")
"""
separator
=
'&'
output
=
urlbase
if
urlargd
:
output
+=
'?'
if
escape_urlargd
:
arguments
=
[
escape
(
quote
(
str
(
key
)),
quote
=
True
)
+
'='
+
\
escape
(
quote
(
str
(
urlargd
[
key
])),
quote
=
True
)
for
key
in
urlargd
.
keys
()]
else
:
arguments
=
[
str
(
key
)
+
'='
+
str
(
urlargd
[
key
])
for
key
in
urlargd
.
keys
()]
output
+=
separator
.
join
(
arguments
)
return
output
def
same_urls_p
(
a
,
b
):
""" Compare two URLs, ignoring reorganizing of query arguments """
ua
=
list
(
urlparse
(
a
))
ub
=
list
(
urlparse
(
b
))
ua
[
4
]
=
parse_qs
(
ua
[
4
])
ub
[
4
]
=
parse_qs
(
ub
[
4
])
return
ua
==
ub
def
urlargs_replace_text_in_arg
(
urlargs
,
regexp_argname
,
text_old
,
text_new
):
"""Analyze `urlargs' (URL CGI GET query arguments in string form)
and for each occurrence of argument matching `regexp_argname'
replace every substring `text_old' by `text_new'. Return the
resulting new URL.
Used to be used for search engine's create_nearest_terms_box,
now it is not used there anymore. It is left here in case it
will become possibly useful later.
"""
out
=
""
# parse URL arguments into a dictionary:
urlargsdict
=
parse_qs
(
urlargs
)
## construct new URL arguments:
urlargsdictnew
=
{}
for
key
in
urlargsdict
.
keys
():
if
re
.
match
(
regexp_argname
,
key
):
# replace `arg' by new values
urlargsdictnew
[
key
]
=
[]
for
parg
in
urlargsdict
[
key
]:
urlargsdictnew
[
key
]
.
append
(
parg
.
replace
(
text_old
,
text_new
))
else
:
# keep old values
urlargsdictnew
[
key
]
=
urlargsdict
[
key
]
# build new URL for this word:
for
key
in
urlargsdictnew
.
keys
():
for
val
in
urlargsdictnew
[
key
]:
out
+=
"&"
+
key
+
"="
+
quote_plus
(
val
,
''
)
if
out
.
startswith
(
"&"
):
out
=
out
[
5
:]
return
out
Event Timeline
Log In to Comment