Page Menu
Home
c4science
Search
Configure Global Search
Log In
Files
F96287613
htmlutils_tests.py
No One
Temporary
Actions
Download File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Award Token
Subscribers
None
File Metadata
Details
File Info
Storage
Attached
Created
Tue, Dec 24, 14:22
Size
5 KB
Mime Type
text/x-python
Expires
Thu, Dec 26, 14:22 (1 d, 11 h)
Engine
blob
Format
Raw Data
Handle
23150777
Attached To
R3600 invenio-infoscience
htmlutils_tests.py
View Options
# -*- coding: utf-8 -*-
##
## This file is part of CDS Invenio.
## Copyright (C) 2002, 2003, 2004, 2005, 2006, 2007, 2008 CERN.
##
## CDS Invenio is free software; you can redistribute it and/or
## modify it under the terms of the GNU General Public License as
## published by the Free Software Foundation; either version 2 of the
## License, or (at your option) any later version.
##
## CDS Invenio is distributed in the hope that it will be useful, but
## WITHOUT ANY WARRANTY; without even the implied warranty of
## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
## General Public License for more details.
##
## You should have received a copy of the GNU General Public License
## along with CDS Invenio; if not, write to the Free Software Foundation, Inc.,
## 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA.
"""Unit tests for htmlutils library."""
__revision__
=
"$Id$"
import
unittest
from
invenio
import
dbquery
from
invenio.htmlutils
import
HTMLWasher
,
nmtoken_from_string
from
invenio.testutils
import
make_test_suite
,
run_test_suite
class
XSSEscapingTest
(
unittest
.
TestCase
):
"""Test functions related to the prevention of XSS attacks."""
def
__init__
(
self
,
methodName
=
'test'
):
self
.
washer
=
HTMLWasher
()
unittest
.
TestCase
.
__init__
(
self
,
methodName
)
def
test_forbidden_formatting_tags
(
self
):
"""htmlutils - washing of tags altering formatting of a page (e.g. </html>)"""
test_str
=
"""</html></body></div></pre>"""
self
.
assertEqual
(
self
.
washer
.
wash
(
html_buffer
=
test_str
),
''
)
self
.
assertEqual
(
self
.
washer
.
wash
(
html_buffer
=
test_str
,
render_unallowed_tags
=
True
),
'</html></body></div></pre>'
)
def
test_forbidden_script_tags
(
self
):
"""htmlutils - washing of tags defining scripts (e.g. <script>)"""
test_str
=
"""<script>malicious_function();</script>"""
self
.
assertEqual
(
self
.
washer
.
wash
(
html_buffer
=
test_str
),
'malicious_function();'
)
self
.
assertEqual
(
self
.
washer
.
wash
(
html_buffer
=
test_str
,
render_unallowed_tags
=
True
),
'<script>malicious_function();</script>'
)
def
test_forbidden_attributes
(
self
):
"""htmlutils - washing of forbidden attributes in allowed tags (e.g. onLoad)"""
# onload
test_str
=
"""<p onload="javascript:malicious_functtion();">"""
self
.
assertEqual
(
self
.
washer
.
wash
(
html_buffer
=
test_str
),
'<p>'
)
# tricky: css calling a javascript
test_str
=
"""<p style="background: url('http://malicious_site.com/malicious_script.js');">"""
self
.
assertEqual
(
self
.
washer
.
wash
(
html_buffer
=
test_str
),
'<p>'
)
def
test_fake_url
(
self
):
"""htmlutils - washing of fake URLs which execute scripts"""
test_str
=
"""<a href="javascript:malicious_function();">link</a>"""
self
.
assertEqual
(
self
.
washer
.
wash
(
html_buffer
=
test_str
),
'<a href="">link</a>'
)
# Pirates could encode ascii values, or use uppercase letters...
test_str
=
"""<a href="javasCRipt:malicious_function();">link</a>"""
self
.
assertEqual
(
self
.
washer
.
wash
(
html_buffer
=
test_str
),
'<a href="">link</a>'
)
# MSIE treats 'java\ns\ncript:' the same way as 'javascript:'
# Here we test with:
# j
# avas
# crIPt :
test_str
=
"""<a href="j\n avas\n crIPt :malicious_function();">link</a>"""
self
.
assertEqual
(
self
.
washer
.
wash
(
html_buffer
=
test_str
),
'<a href="">link</a>'
)
class
CharactersEscapingTest
(
unittest
.
TestCase
):
"""Test functions related to escaping reserved or forbidden characters """
def
test_convert_string_to_nmtoken
(
self
):
"""htmlutils - converting string to Nmtoken"""
# TODO: possibly extend this test to include 'extenders' and
# 'combining characters' as defined in
# http://www.w3.org/TR/2000/REC-xml-20001006#NT-Nmtoken
ascii_str
=
""
.
join
([
chr
(
i
)
for
i
in
range
(
0
,
256
)])
nmtoken
=
nmtoken_from_string
(
ascii_str
)
for
char
in
nmtoken
:
self
.
assert_
(
char
in
[
'.'
,
'-'
,
'_'
,
':'
]
or
char
.
isalnum
())
class
HTMLWashingTest
(
unittest
.
TestCase
):
"""Test functions related to general washing of HTML source"""
def
__init__
(
self
,
methodName
=
'test'
):
self
.
washer
=
HTMLWasher
()
unittest
.
TestCase
.
__init__
(
self
,
methodName
)
def
test_wash_html
(
self
):
"""htmlutils - washing HTML tags"""
# Simple test case
test_str
=
'Spam and <b><blink>eggs</blink></b>'
self
.
assertEqual
(
self
.
washer
.
wash
(
html_buffer
=
test_str
),
'Spam and <b>eggs</b>'
)
# Show 'escaped' tags
test_str
=
'Spam and <b><blink>eggs</blink></b>'
self
.
assertEqual
(
self
.
washer
.
wash
(
html_buffer
=
test_str
,
render_unallowed_tags
=
True
),
'Spam and <b><blink>eggs</blink></b>'
)
# Keep entity and character references
test_str
=
'<b> a < b > c </b> ÷'
self
.
assertEqual
(
self
.
washer
.
wash
(
html_buffer
=
test_str
),
'<b> a < b > c </b> ÷'
)
TEST_SUITE
=
make_test_suite
(
XSSEscapingTest
,
CharactersEscapingTest
,
HTMLWashingTest
,)
if
__name__
==
"__main__"
:
run_test_suite
(
TEST_SUITE
)
Event Timeline
Log In to Comment