diff --git a/src/markup/__tests__/PhutilMarkupTestCase.php b/src/markup/__tests__/PhutilMarkupTestCase.php index 7f638dd..8bdfceb 100644 --- a/src/markup/__tests__/PhutilMarkupTestCase.php +++ b/src/markup/__tests__/PhutilMarkupTestCase.php @@ -1,211 +1,261 @@ assertEqual( (string)phutil_tag('br'), (string)phutil_tag('br', array())); $this->assertEqual( (string)phutil_tag('br', array()), (string)phutil_tag('br', array(), null)); } public function testTagEmpty() { $this->assertEqual( '
', (string)phutil_tag('br', array(), null)); $this->assertEqual( '
', (string)phutil_tag('div', array(), null)); $this->assertEqual( '
', (string)phutil_tag('div', array(), '')); } public function testTagBasics() { $this->assertEqual( '
', (string)phutil_tag('br')); $this->assertEqual( '
y
', (string)phutil_tag('div', array(), 'y')); } public function testTagAttributes() { $this->assertEqual( '
y
', (string)phutil_tag('div', array('u' => 'v'), 'y')); $this->assertEqual( '
', (string)phutil_tag('br', array('u' => 'v'))); } public function testTagEscapes() { $this->assertEqual( '
', (string)phutil_tag('br', array('u' => '<'))); $this->assertEqual( '

', (string)phutil_tag('div', array(), phutil_tag('br'))); } public function testTagNullAttribute() { $this->assertEqual( '
', (string)phutil_tag('br', array('y' => null))); } + public function testDefaultRelNoreferrer() { + $map = array( + // These should not have rel="nofollow" inserted implicitly. + '/' => false, + '/path/to/local.html' => false, + '#example' => false, + '' => false, + + // These should get the implicit insertion. + 'http://www.example.org/' => true, + '///evil.com/' => true, + ' http://www.example.org/' => true, + 'ftp://filez.com' => true, + 'mailto:santa@northpole.com' => true, + ); + + foreach ($map as $input => $expect) { + $tag = phutil_tag( + 'a', + array( + 'href' => $input, + ), + 'link'); + $tag = (string)$tag; + $this->assertEqual($expect, (bool)preg_match('/noreferrer/', $tag)); + } + + // With an explicit `rel` present, we should not override it. + $tag = phutil_tag( + 'a', + array( + 'href' => 'http://www.example.org/', + 'rel' => 'nofollow', + ), + 'link'); + + $this->assertFalse((bool)preg_match('/noreferrer/', (string)$tag)); + + // For tags other than `a`, we should not insert `rel`. + $tag = phutil_tag( + 'link', + array( + 'href' => 'http://www.example.org/', + ), + 'link'); + + $this->assertFalse((bool)preg_match('/noreferrer/', (string)$tag)); + } + + public function testTagJavascriptProtocolRejection() { $hrefs = array( 'javascript:alert(1)' => true, 'JAVASCRIPT:alert(2)' => true, ' javascript:alert(3)' => true, '/' => false, '/path/to/stuff/' => false, '' => false, 'http://example.com/' => false, '#' => false, 'javascript://anything' => true, // Chrome 33 and IE11, at a minimum, treat this as Javascript. "javascript\n:alert(4)" => true, // Opera currently accepts a variety of unicode spaces. This test case // has a smattering of them. "\xE2\x80\x89javascript:" => true, "javascript\xE2\x80\x89:" => true, "\xE2\x80\x84javascript:" => true, "javascript\xE2\x80\x84:" => true, // Because we're aggressive, all of unicode should trigger detection // by default. "\xE2\x98\x83javascript:" => true, "javascript\xE2\x98\x83:" => true, "\xE2\x98\x83javascript\xE2\x98\x83:" => true, // We're aggressive about this, so we'll intentionally raise false // positives in these cases. 'javascript~:alert(5)' => true, '!!!javascript!!!!:alert(6)' => true, // However, we should raise true negatives in these slightly more // reasonable cases. 'javascript/:docs.html' => false, 'javascripts:x.png' => false, 'COOLjavascript:page' => false, '/javascript:alert(1)' => false, ); foreach (array(true, false) as $use_uri) { foreach ($hrefs as $href => $expect) { if ($use_uri) { $href = new PhutilURI($href); } $caught = null; try { phutil_tag('a', array('href' => $href), 'click for candy'); } catch (Exception $ex) { $caught = $ex; } $this->assertEqual( $expect, $caught instanceof Exception, "Rejected href: {$href}"); } } } public function testURIEscape() { $this->assertEqual( '%2B/%20%3F%23%26%3A%21xyz%25', phutil_escape_uri('+/ ?#&:!xyz%')); } public function testURIPathComponentEscape() { $this->assertEqual( 'a%252Fb', phutil_escape_uri_path_component('a/b')); $str = ''; for ($ii = 0; $ii <= 255; $ii++) { $str .= chr($ii); } $this->assertEqual( $str, phutil_unescape_uri_path_component( rawurldecode( // Simulates webserver. phutil_escape_uri_path_component($str)))); } public function testHsprintf() { $this->assertEqual( '
<3
', (string)hsprintf('
%s
', '<3')); } public function testAppendHTML() { $html = phutil_tag('hr'); $html->appendHTML(phutil_tag('br'), ''); $this->assertEqual('

<evil>', $html->getHTMLContent()); } public function testArrayEscaping() { $this->assertEqual( '
<div>
', phutil_escape_html( array( hsprintf('
'), array( array( '<', array( 'd', array( array( hsprintf('i'), ), 'v', ), ), array( array( '>', ), ), ), ), hsprintf('
'), ))); $this->assertEqual( '


', phutil_tag( 'div', array( ), array( array( array( phutil_tag('br'), array( phutil_tag('hr'), ), phutil_tag('wbr'), ), ), ))->getHTMLContent()); } } diff --git a/src/markup/render.php b/src/markup/render.php index eb1c3e0..d6c8d11 100644 --- a/src/markup/render.php +++ b/src/markup/render.php @@ -1,248 +1,291 @@ ` tags, if the `rel` attribute is not specified, it + * is interpreted as `rel="noreferrer"`. + * - When rendering `` tags, the `href` attribute may not begin with + * `javascript:`. + * + * These special cases can not be disabled. + * + * IMPORTANT: The `$tag` attribute and the keys of the `$attributes` array are + * trusted blindly, and not escaped. You should not pass user data in these + * parameters. + * + * @param string The name of the tag, like `a` or `div`. + * @param map A map of tag attributes. + * @param wild Content to put in the tag. + * @return PhutilSafeHTML Tag object. */ function phutil_tag($tag, array $attributes = array(), $content = null) { + + // If the `href` attribute is present: + // - make sure it is not a "javascript:" URI. We never permit these. + // - if the tag is an `` and the link is to some foreign resource, + // add `rel="nofollow"` by default. if (!empty($attributes['href'])) { // This might be a URI object, so cast it to a string. $href = (string)$attributes['href']; - // Block 'javascript:' hrefs at the tag level: no well-designed application - // should ever use them, and they are a potent attack vector. + if (isset($href[0])) { + $is_anchor_href = ($href[0] == '#'); - // This function is deep in the core and performance sensitive, so we're - // doing a cheap version of this test first to avoid calling preg_match() - // on URIs which begin with '/'. This covers essentially all URIs in - // Phabricator. + // Is this a link to a resource on the same domain? The second part of + // this excludes "///evil.com/" protocol-relative hrefs. + $is_domain_href = ($href[0] == '/') && + (!isset($href[1]) || $href[1] != '/'); - if (isset($href[0]) && ($href[0] != '/')) { + // If the `rel` attribute is not specified, fill in `rel="noreferrer"`. + // Effectively, this serves to make the default behavior for offsite + // links "do not send a referrer", which is broadly desirable. Specifying + // some non-null `rel` will skip this. + if (!isset($attributes['rel'])) { + if (!$is_anchor_href && !$is_domain_href) { + if ($tag == 'a') { + $attributes['rel'] = 'noreferrer'; + } + } + } - // Chrome 33 and IE 11 both interpret "javascript\n:" as a Javascript URI, - // and all browsers interpret " javascript:" as a Javascript URI, so be - // aggressive about looking for "javascript:" in the initial section of - // the string. + // Block 'javascript:' hrefs at the tag level: no well-designed + // application should ever use them, and they are a potent attack vector. - $normalized_href = preg_replace('([^a-z0-9/:]+)i', '', $href); - if (preg_match('/^javascript:/i', $normalized_href)) { - throw new Exception( - pht( - "Attempting to render a tag with an 'href' attribute that begins ". - "with 'javascript:'. This is either a serious security concern ". - "or a serious architecture concern. Seek urgent remedy.")); + // This function is deep in the core and performance sensitive, so we're + // doing a cheap version of this test first to avoid calling preg_match() + // on URIs which begin with '/' or `#`. These cover essentially all URIs + // in Phabricator. + if (!$is_anchor_href && !$is_domain_href) { + // Chrome 33 and IE 11 both interpret "javascript\n:" as a Javascript + // URI, and all browsers interpret " javascript:" as a Javascript URI, + // so be aggressive about looking for "javascript:" in the initial + // section of the string. + + $normalized_href = preg_replace('([^a-z0-9/:]+)i', '', $href); + if (preg_match('/^javascript:/i', $normalized_href)) { + throw new Exception( + pht( + "Attempting to render a tag with an 'href' attribute that ". + "begins with 'javascript:'. This is either a serious security ". + "concern or a serious architecture concern. Seek urgent ". + "remedy.")); + } } } } // For tags which can't self-close, treat null as the empty string -- for // example, always render `
`, never `
`. static $self_closing_tags = array( 'area' => true, 'base' => true, 'br' => true, 'col' => true, 'command' => true, 'embed' => true, 'frame' => true, 'hr' => true, 'img' => true, 'input' => true, 'keygen' => true, 'link' => true, 'meta' => true, 'param' => true, 'source' => true, 'track' => true, 'wbr' => true, ); $attr_string = ''; foreach ($attributes as $k => $v) { if ($v === null) { continue; } $v = phutil_escape_html($v); $attr_string .= ' '.$k.'="'.$v.'"'; } if ($content === null) { if (isset($self_closing_tags[$tag])) { return new PhutilSafeHTML('<'.$tag.$attr_string.' />'); } else { $content = ''; } } else { $content = phutil_escape_html($content); } return new PhutilSafeHTML('<'.$tag.$attr_string.'>'.$content.''); } /** * @group markup */ function phutil_tag_div($class, $content = null) { return phutil_tag('div', array('class' => $class), $content); } /** * @group markup */ function phutil_escape_html($string) { if ($string instanceof PhutilSafeHTML) { return $string; } else if ($string instanceof PhutilSafeHTMLProducerInterface) { $result = $string->producePhutilSafeHTML(); if ($result instanceof PhutilSafeHTML) { return phutil_escape_html($result); } else if (is_array($result)) { return phutil_escape_html($result); } else if ($result instanceof PhutilSafeHTMLProducerInterface) { return phutil_escape_html($result); } else { try { assert_stringlike($result); return phutil_escape_html((string)$result); } catch (Exception $ex) { $class = get_class($string); throw new Exception( "Object (of class '{$class}') implements ". "PhutilSafeHTMLProducerInterface but did not return anything ". "renderable from producePhutilSafeHTML()."); } } } else if (is_array($string)) { $result = ''; foreach ($string as $item) { $result .= phutil_escape_html($item); } return $result; } return htmlspecialchars($string, ENT_QUOTES, 'UTF-8'); } /** * @group markup */ function phutil_escape_html_newlines($string) { return PhutilSafeHTML::applyFunction('nl2br', $string); } /** * Mark string as safe for use in HTML. * * @group markup */ function phutil_safe_html($string) { if ($string == '') { return $string; } else if ($string instanceof PhutilSafeHTML) { return $string; } else { return new PhutilSafeHTML($string); } } /** * HTML safe version of implode(). * * @group markup */ function phutil_implode_html($glue, array $pieces) { $glue = phutil_escape_html($glue); foreach ($pieces as $k => $piece) { $pieces[$k] = phutil_escape_html($piece); } return phutil_safe_html(implode($glue, $pieces)); } /** * Format a HTML code. This function behaves like sprintf(), except that all * the normal conversions (like %s) will be properly escaped. * * @group markup */ function hsprintf($html/* , ... */) { $args = func_get_args(); array_shift($args); return new PhutilSafeHTML( vsprintf($html, array_map('phutil_escape_html', $args))); } /** * Escape text for inclusion in a URI or a query parameter. Note that this * method does NOT escape '/', because "%2F" is invalid in paths and Apache * will automatically 404 the page if it's present. This will produce correct * (the URIs will work) and desirable (the URIs will be readable) behavior in * these cases: * * '/path/?param='.phutil_escape_uri($string); # OK: Query Parameter * '/path/to/'.phutil_escape_uri($string); # OK: URI Suffix * * It will potentially produce the WRONG behavior in this special case: * * COUNTEREXAMPLE * '/path/to/'.phutil_escape_uri($string).'/thing/'; # BAD: URI Infix * * In this case, any '/' characters in the string will not be escaped, so you * will not be able to distinguish between the string and the suffix (unless * you have more information, like you know the format of the suffix). For infix * URI components, use @{function:phutil_escape_uri_path_component} instead. * * @param string Some string. * @return string URI encoded string, except for '/'. * * @group markup */ function phutil_escape_uri($string) { return str_replace('%2F', '/', rawurlencode($string)); } /** * Escape text for inclusion as an infix URI substring. See discussion at * @{function:phutil_escape_uri}. This function covers an unusual special case; * @{function:phutil_escape_uri} is usually the correct function to use. * * This function will escape a string into a format which is safe to put into * a URI path and which does not contain '/' so it can be correctly parsed when * embedded as a URI infix component. * * However, you MUST decode the string with * @{function:phutil_unescape_uri_path_component} before it can be used in the * application. * * @param string Some string. * @return string URI encoded string that is safe for infix composition. * * @group markup */ function phutil_escape_uri_path_component($string) { return rawurlencode(rawurlencode($string)); } /** * Unescape text that was escaped by * @{function:phutil_escape_uri_path_component}. See * @{function:phutil_escape_uri} for discussion. * * Note that this function is NOT the inverse of * @{function:phutil_escape_uri_path_component}! It undoes additional escaping * which is added to survive the implied unescaping performed by the webserver * when interpreting the request. * * @param string Some string emitted * from @{function:phutil_escape_uri_path_component} and * then accessed via a web server. * @return string Original string. * @group markup */ function phutil_unescape_uri_path_component($string) { return rawurldecode($string); }