diff --git a/src/markup/__tests__/PhutilMarkupTestCase.php b/src/markup/__tests__/PhutilMarkupTestCase.php index 8bdfceb..f7b5e38 100644 --- a/src/markup/__tests__/PhutilMarkupTestCase.php +++ b/src/markup/__tests__/PhutilMarkupTestCase.php @@ -1,261 +1,268 @@ assertEqual( (string)phutil_tag('br'), (string)phutil_tag('br', array())); $this->assertEqual( (string)phutil_tag('br', array()), (string)phutil_tag('br', array(), null)); } public function testTagEmpty() { $this->assertEqual( '
', (string)phutil_tag('br', array(), null)); $this->assertEqual( '
', (string)phutil_tag('div', array(), null)); $this->assertEqual( '
', (string)phutil_tag('div', array(), '')); } public function testTagBasics() { $this->assertEqual( '
', (string)phutil_tag('br')); $this->assertEqual( '
y
', (string)phutil_tag('div', array(), 'y')); } public function testTagAttributes() { $this->assertEqual( '
y
', (string)phutil_tag('div', array('u' => 'v'), 'y')); $this->assertEqual( '
', (string)phutil_tag('br', array('u' => 'v'))); } public function testTagEscapes() { $this->assertEqual( '
', (string)phutil_tag('br', array('u' => '<'))); $this->assertEqual( '

', (string)phutil_tag('div', array(), phutil_tag('br'))); } public function testTagNullAttribute() { $this->assertEqual( '
', (string)phutil_tag('br', array('y' => null))); } public function testDefaultRelNoreferrer() { $map = array( // These should not have rel="nofollow" inserted implicitly. '/' => false, '/path/to/local.html' => false, '#example' => false, '' => false, // These should get the implicit insertion. 'http://www.example.org/' => true, '///evil.com/' => true, ' http://www.example.org/' => true, 'ftp://filez.com' => true, 'mailto:santa@northpole.com' => true, ); foreach ($map as $input => $expect) { $tag = phutil_tag( 'a', array( 'href' => $input, ), 'link'); $tag = (string)$tag; $this->assertEqual($expect, (bool)preg_match('/noreferrer/', $tag)); } // With an explicit `rel` present, we should not override it. $tag = phutil_tag( 'a', array( 'href' => 'http://www.example.org/', 'rel' => 'nofollow', ), 'link'); $this->assertFalse((bool)preg_match('/noreferrer/', (string)$tag)); // For tags other than `a`, we should not insert `rel`. $tag = phutil_tag( 'link', array( 'href' => 'http://www.example.org/', ), 'link'); $this->assertFalse((bool)preg_match('/noreferrer/', (string)$tag)); } public function testTagJavascriptProtocolRejection() { $hrefs = array( 'javascript:alert(1)' => true, 'JAVASCRIPT:alert(2)' => true, - ' javascript:alert(3)' => true, + + // NOTE: When interpreted as a URI, this is dropped because of leading + // whitespace. + ' javascript:alert(3)' => array(true, false), '/' => false, '/path/to/stuff/' => false, '' => false, 'http://example.com/' => false, '#' => false, 'javascript://anything' => true, // Chrome 33 and IE11, at a minimum, treat this as Javascript. "javascript\n:alert(4)" => true, // Opera currently accepts a variety of unicode spaces. This test case // has a smattering of them. "\xE2\x80\x89javascript:" => true, "javascript\xE2\x80\x89:" => true, "\xE2\x80\x84javascript:" => true, "javascript\xE2\x80\x84:" => true, // Because we're aggressive, all of unicode should trigger detection // by default. "\xE2\x98\x83javascript:" => true, "javascript\xE2\x98\x83:" => true, "\xE2\x98\x83javascript\xE2\x98\x83:" => true, // We're aggressive about this, so we'll intentionally raise false // positives in these cases. 'javascript~:alert(5)' => true, '!!!javascript!!!!:alert(6)' => true, // However, we should raise true negatives in these slightly more // reasonable cases. 'javascript/:docs.html' => false, 'javascripts:x.png' => false, 'COOLjavascript:page' => false, '/javascript:alert(1)' => false, ); foreach (array(true, false) as $use_uri) { foreach ($hrefs as $href => $expect) { + if (is_array($expect)) { + $expect = ($use_uri ? $expect[1] : $expect[0]); + } + if ($use_uri) { $href = new PhutilURI($href); } $caught = null; try { phutil_tag('a', array('href' => $href), 'click for candy'); } catch (Exception $ex) { $caught = $ex; } $this->assertEqual( $expect, $caught instanceof Exception, "Rejected href: {$href}"); } } } public function testURIEscape() { $this->assertEqual( '%2B/%20%3F%23%26%3A%21xyz%25', phutil_escape_uri('+/ ?#&:!xyz%')); } public function testURIPathComponentEscape() { $this->assertEqual( 'a%252Fb', phutil_escape_uri_path_component('a/b')); $str = ''; for ($ii = 0; $ii <= 255; $ii++) { $str .= chr($ii); } $this->assertEqual( $str, phutil_unescape_uri_path_component( rawurldecode( // Simulates webserver. phutil_escape_uri_path_component($str)))); } public function testHsprintf() { $this->assertEqual( '
<3
', (string)hsprintf('
%s
', '<3')); } public function testAppendHTML() { $html = phutil_tag('hr'); $html->appendHTML(phutil_tag('br'), ''); $this->assertEqual('

<evil>', $html->getHTMLContent()); } public function testArrayEscaping() { $this->assertEqual( '
<div>
', phutil_escape_html( array( hsprintf('
'), array( array( '<', array( 'd', array( array( hsprintf('i'), ), 'v', ), ), array( array( '>', ), ), ), ), hsprintf('
'), ))); $this->assertEqual( '


', phutil_tag( 'div', array( ), array( array( array( phutil_tag('br'), array( phutil_tag('hr'), ), phutil_tag('wbr'), ), ), ))->getHTMLContent()); } } diff --git a/src/markup/engine/__tests__/remarkup/link-alternate.txt b/src/markup/engine/__tests__/remarkup/link-alternate.txt index 87072a6..2abcb4a 100644 --- a/src/markup/engine/__tests__/remarkup/link-alternate.txt +++ b/src/markup/engine/__tests__/remarkup/link-alternate.txt @@ -1,12 +1,12 @@ [Example](http://www.example.com/) x[0][1](**ptr); ~~~~~~~~~~ -

Example

+

Example

x[0][1](**ptr);

~~~~~~~~~~ Example x[0][1](**ptr); diff --git a/src/markup/engine/__tests__/remarkup/link-brackets.txt b/src/markup/engine/__tests__/remarkup/link-brackets.txt index 0e6d0f5..b2f48f1 100644 --- a/src/markup/engine/__tests__/remarkup/link-brackets.txt +++ b/src/markup/engine/__tests__/remarkup/link-brackets.txt @@ -1,5 +1,5 @@ ~~~~~~~~~~ -

http://www.zany.com/omg/weird_url,,,

+

http://www.zany.com/omg/weird_url,,,

~~~~~~~~~~ http://www.zany.com/omg/weird_url,,, diff --git a/src/markup/engine/__tests__/remarkup/link-edge-cases.txt b/src/markup/engine/__tests__/remarkup/link-edge-cases.txt index 70fa904..2ee5ec3 100644 --- a/src/markup/engine/__tests__/remarkup/link-edge-cases.txt +++ b/src/markup/engine/__tests__/remarkup/link-edge-cases.txt @@ -1,35 +1,35 @@ http://www.example.com/ (http://www.example.com/) http://www.example.com/wiki/example_(disambiguation) (example http://www.example.com/) Quick! http://www.example.com/! ~~~~~~~~~~ -

http://www.example.com/

+

http://www.example.com/

-

(http://www.example.com/)

+

(http://www.example.com/)

-

http://www.example.com/

+

http://www.example.com/

-

http://www.example.com/wiki/example_(disambiguation)

+

http://www.example.com/wiki/example_(disambiguation)

-

(example http://www.example.com/)

+

(example http://www.example.com/)

-

Quick! http://www.example.com/!

+

Quick! http://www.example.com/!

~~~~~~~~~~ http://www.example.com/ (http://www.example.com/) http://www.example.com/ http://www.example.com/wiki/example_(disambiguation) (example http://www.example.com/) Quick! http://www.example.com/! diff --git a/src/markup/engine/__tests__/remarkup/link-mixed.txt b/src/markup/engine/__tests__/remarkup/link-mixed.txt index 08faf99..7281f89 100644 --- a/src/markup/engine/__tests__/remarkup/link-mixed.txt +++ b/src/markup/engine/__tests__/remarkup/link-mixed.txt @@ -1,18 +1,18 @@ [[http://www.example.com/ | Example]](http://www.alternate.org/) (http://www.alternate.org/)[[http://www.example.com/ | Example]] ~~~~~~~~~~ -

Example(http://www.alternate.org/)

+

Example(http://www.alternate.org/)

-

(http://www.alternate.org/)Example

+

(http://www.alternate.org/)Example

-

<http://www.example.com/ Example>

+

<http://www.example.com/ Example>

~~~~~~~~~~ Example (http://www.alternate.org/) (http://www.alternate.org/)Example > diff --git a/src/markup/engine/__tests__/remarkup/link-square.txt b/src/markup/engine/__tests__/remarkup/link-square.txt index 1cae76e..dc143cd 100644 --- a/src/markup/engine/__tests__/remarkup/link-square.txt +++ b/src/markup/engine/__tests__/remarkup/link-square.txt @@ -1,29 +1,29 @@ [[http://www.example.com/]] [[http://www.example.com/ | example.com]] [[/]] [[#anchor]] [[#anchor | Anchors ]] ~~~~~~~~~~ -

http://www.example.com/

+

http://www.example.com/

-

example.com

+

example.com

/

#anchor

Anchors

~~~~~~~~~~ http://www.example.com/ example.com http://www.example.com/ http://www.example.com/#anchor Anchors diff --git a/src/markup/engine/__tests__/remarkup/link-with-punctuation.txt b/src/markup/engine/__tests__/remarkup/link-with-punctuation.txt index ce28b5b..33cb6fa 100644 --- a/src/markup/engine/__tests__/remarkup/link-with-punctuation.txt +++ b/src/markup/engine/__tests__/remarkup/link-with-punctuation.txt @@ -1,9 +1,9 @@ http://www.example.com/, http://www.example.com/.. http://www.example.com/!!! ~~~~~~~~~~ -

http://www.example.com/, -http://www.example.com/.. -http://www.example.com/!!!

+

http://www.example.com/, +http://www.example.com/.. +http://www.example.com/!!!

~~~~~~~~~~ http://www.example.com/, http://www.example.com/.. http://www.example.com/!!! diff --git a/src/markup/engine/__tests__/remarkup/link-with-tilde.txt b/src/markup/engine/__tests__/remarkup/link-with-tilde.txt index f45e056..5b22225 100644 --- a/src/markup/engine/__tests__/remarkup/link-with-tilde.txt +++ b/src/markup/engine/__tests__/remarkup/link-with-tilde.txt @@ -1,5 +1,5 @@ http://www.example.com/~ ~~~~~~~~~~ -

http://www.example.com/~

+

http://www.example.com/~

~~~~~~~~~~ http://www.example.com/~ diff --git a/src/markup/engine/__tests__/remarkup/link.txt b/src/markup/engine/__tests__/remarkup/link.txt index a46d657..112e7c0 100644 --- a/src/markup/engine/__tests__/remarkup/link.txt +++ b/src/markup/engine/__tests__/remarkup/link.txt @@ -1,5 +1,5 @@ http://www.example.com/ ~~~~~~~~~~ -

http://www.example.com/

+

http://www.example.com/

~~~~~~~~~~ http://www.example.com/ diff --git a/src/markup/engine/__tests__/remarkup/list-checkboxes.txt b/src/markup/engine/__tests__/remarkup/list-checkboxes.txt new file mode 100644 index 0000000..02b29fa --- /dev/null +++ b/src/markup/engine/__tests__/remarkup/list-checkboxes.txt @@ -0,0 +1,35 @@ +- [] a +- [ ] b +- [X] c +- d + +[ ] A +[X] B + [ ] C + [ ] D + +~~~~~~~~~~ +
    +
  • a
  • +
  • b
  • +
  • c
  • +
  • d
  • +
+ +
    +
  • A
  • +
  • B
      +
    • C
    • +
    • D
    • +
  • +
+~~~~~~~~~~ +[ ] a +[ ] b +[X] c +- d + +[ ] A +[X] B + [ ] C + [ ] D diff --git a/src/markup/engine/__tests__/remarkup/percent-block-multiline.txt b/src/markup/engine/__tests__/remarkup/percent-block-multiline.txt index 3b29bfe..f19ad47 100644 --- a/src/markup/engine/__tests__/remarkup/percent-block-multiline.txt +++ b/src/markup/engine/__tests__/remarkup/percent-block-multiline.txt @@ -1,21 +1,21 @@ **foo** %%%- first - second - third%%% [[http://hello | world]] ~~~~~~~~~~

foo

- first
- second
- third -

world

+

world

~~~~~~~~~~ **foo** - first - second - third world diff --git a/src/markup/engine/__tests__/remarkup/simple-table-with-link.txt b/src/markup/engine/__tests__/remarkup/simple-table-with-link.txt index b174ef3..35df4d5 100644 --- a/src/markup/engine/__tests__/remarkup/simple-table-with-link.txt +++ b/src/markup/engine/__tests__/remarkup/simple-table-with-link.txt @@ -1,7 +1,7 @@ | [[ http://example.com | name ]] | [x] | ~~~~~~~~~~ - +
name[x]
name[x]
~~~~~~~~~~ | name | [x] | diff --git a/src/markup/engine/__tests__/remarkup/toc.txt b/src/markup/engine/__tests__/remarkup/toc.txt index 5cc7466..a4d7de6 100644 --- a/src/markup/engine/__tests__/remarkup/toc.txt +++ b/src/markup/engine/__tests__/remarkup/toc.txt @@ -1,29 +1,29 @@ = [[ http://www.example.com/ | link_name ]] = == **bold** == = http://www.example.com = ~~~~~~~~~~ -

link_name

+

link_name

bold

-

http://www.example.com

+

http://www.example.com

~~~~~~~~~~ [[ http://www.example.com/ | link_name ]] ========================================= **bold** -------- http://www.example.com ====================== diff --git a/src/markup/engine/remarkup/blockrule/PhutilRemarkupEngineRemarkupListBlockRule.php b/src/markup/engine/remarkup/blockrule/PhutilRemarkupEngineRemarkupListBlockRule.php index b3481d6..3d051f1 100644 --- a/src/markup/engine/remarkup/blockrule/PhutilRemarkupEngineRemarkupListBlockRule.php +++ b/src/markup/engine/remarkup/blockrule/PhutilRemarkupEngineRemarkupListBlockRule.php @@ -1,433 +1,479 @@ $line) { $matches = null; if (preg_match($regex, $line)) { $regex = self::CONT_BLOCK_PATTERN; if (preg_match('/^(\s+)/', $line, $matches)) { $space = strlen($matches[1]); } else { $space = 0; } $min_space = min($min_space, $space); } } $regex = self::START_BLOCK_PATTERN; if ($min_space) { foreach ($lines as $key => $line) { if (preg_match($regex, $line)) { $regex = self::CONT_BLOCK_PATTERN; $lines[$key] = substr($line, $min_space); } } } // The input text may have linewraps in it, like this: // // - derp derp derp derp // derp derp derp derp // - blarp blarp blarp blarp // // Group text lines together into list items, stored in $items. So the // result in the above case will be: // // array( // array( // "- derp derp derp derp", // " derp derp derp derp", // ), // array( // "- blarp blarp blarp blarp", // ), // ); $item = array(); $regex = self::START_BLOCK_PATTERN; foreach ($lines as $line) { if (preg_match($regex, $line)) { $regex = self::CONT_BLOCK_PATTERN; if ($item) { $items[] = $item; $item = array(); } } $item[] = $line; } if ($item) { $items[] = $item; } // Process each item to normalize the text, remove line wrapping, and // determine its depth (indentation level) and style (ordered vs unordered). // // Given the above example, the processed array will look like: // // array( // array( // 'text' => 'derp derp derp derp derp derp derp derp', // 'depth' => 0, // 'style' => '-', // ), // array( // 'text' => 'blarp blarp blarp blarp', // 'depth' => 0, // 'style' => '-', // ), // ); + $has_marks = false; foreach ($items as $key => $item) { $item = preg_replace('/\s*\n\s*/', ' ', implode("\n", $item)); $item = rtrim($item); if (!strlen($item)) { unset($items[$key]); continue; } $matches = null; if (preg_match('/^\s*([-*#]{2,})/', $item, $matches)) { // Alternate-style indents; use number of list item symbols. $depth = strlen($matches[1]) - 1; } else if (preg_match('/^(\s+)/', $item, $matches)) { // Markdown-style indents; use indent depth. $depth = strlen($matches[1]); } else { $depth = 0; } if (preg_match('/^\s*(?:#|[0-9])/', $item)) { $style = '#'; } else { $style = '-'; } - // If we don't match the block pattern (for example, because the user - // has typed only " " or " -"), treat the line as containing no text. - // This prevents newly added items from rendering with a bullet and - // the text "-", e.g. - $text = preg_replace(self::CONT_BLOCK_PATTERN, '', $item); - if ($text == $item) { - $text = ''; + // Strip leading indicators off the item. + $text = preg_replace(self::STRIP_BLOCK_PATTERN, '', $item); + + // Look for "[]", "[ ]", "[*]", "[x]", etc., which we render as a + // checkbox. + $mark = null; + $matches = null; + if (preg_match('/^\s*\[(.?)\]\s*/', $text, $matches)) { + if (strlen(trim($matches[1]))) { + $mark = true; + } else { + $mark = false; + } + $has_marks = true; + $text = substr($text, strlen($matches[0])); } $items[$key] = array( 'text' => $text, 'depth' => $depth, 'style' => $style, + 'mark' => $mark, ); } $items = array_values($items); // Users can create a sub-list by indenting any deeper amount than the // previous list, so these are both valid: // // - a // - b // // - a // - b // // In the former case, we'll have depths (0, 2). In the latter case, depths // (0, 4). We don't actually care about how many spaces there are, only // how many list indentation levels (that is, we want to map both of // those cases to (0, 1), indicating "outermost list" and "first sublist"). // // This is made more complicated because lists at two different indentation // levels might be at the same list level: // // - a // - b // - c // - d // // Here, 'b' and 'd' are at the same list level (2) but different indent // levels (2, 4). // // Users can also create "staircases" like this: // // - a // - b // # c // // While this is silly, we'd like to render it as faithfully as possible. // // In order to do this, we convert the list of nodes into a tree, // normalizing indentation levels and inserting dummy nodes as necessary to // make the tree well-formed. See additional notes at buildTree(). // // In the case above, the result is a tree like this: // // - // - // - a // - b // # c $l = 0; $r = count($items); $tree = $this->buildTree($items, $l, $r, $cur_level = 0); // We may need to open a list on a node, but they do not have // list style information yet. We need to propagate list style inforamtion // backward through the tree. In the above example, the tree now looks // like this: // // - // - // - a // - b // # c $this->adjustTreeStyleInformation($tree); // Finally, we have enough information to render the tree. - $out = $this->renderTree($tree); + $out = $this->renderTree($tree, 0, $has_marks); if ($this->getEngine()->isTextMode()) { $out = implode('', $out); $out = rtrim($out, "\n"); $out = preg_replace('/ +$/m', '', $out); return $out; } return phutil_implode_html('', $out); } /** * See additional notes in markupText(). */ private function buildTree(array $items, $l, $r, $cur_level) { if ($l == $r) { return array(); } if ($cur_level > self::MAXIMUM_LIST_NESTING_DEPTH) { // This algorithm is recursive and we don't need you blowing the stack // with your oh-so-clever 50,000-item-deep list. Cap indentation levels // at a reasonable number and just shove everything deeper up to this // level. $nodes = array(); for ($ii = $l; $ii < $r; $ii++) { $nodes[] = array( 'level' => $cur_level, 'items' => array(), ) + $items[$ii]; } return $nodes; } $min = $l; for ($ii = $r - 1; $ii >= $l; $ii--) { if ($items[$ii]['depth'] < $items[$min]['depth']) { $min = $ii; } } $min_depth = $items[$min]['depth']; $nodes = array(); if ($min != $l) { $nodes[] = array( 'text' => null, 'level' => $cur_level, 'style' => null, + 'mark' => null, 'items' => $this->buildTree($items, $l, $min, $cur_level + 1), ); } $last = $min; for ($ii = $last + 1; $ii < $r; $ii++) { if ($items[$ii]['depth'] == $min_depth) { $nodes[] = array( 'level' => $cur_level, 'items' => $this->buildTree($items, $last + 1, $ii, $cur_level + 1), ) + $items[$last]; $last = $ii; } } $nodes[] = array( 'level' => $cur_level, 'items' => $this->buildTree($items, $last + 1, $r, $cur_level + 1), ) + $items[$last]; return $nodes; } /** * See additional notes in markupText(). */ private function adjustTreeStyleInformation(array &$tree) { // The effect here is just to walk backward through the nodes at this level // and apply the first style in the list to any empty nodes we inserted // before it. As we go, also recurse down the tree. $style = '-'; for ($ii = count($tree) - 1; $ii >= 0; $ii--) { if ($tree[$ii]['style'] !== null) { // This is the earliest node we've seen with style, so set the // style to its style. $style = $tree[$ii]['style']; } else { // This node has no style, so apply the current style. $tree[$ii]['style'] = $style; } if ($tree[$ii]['items']) { $this->adjustTreeStyleInformation($tree[$ii]['items']); } } } /** * See additional notes in markupText(). */ - private function renderTree(array $tree, $level = 0) { + private function renderTree(array $tree, $level, $has_marks) { $style = idx(head($tree), 'style'); $out = array(); if (!$this->getEngine()->isTextMode()) { switch ($style) { case '#': - $out[] = hsprintf("
    \n"); + $tag = 'ol'; break; case '-': - $out[] = hsprintf("
      \n"); + $tag = 'ul'; break; } + + if ($has_marks) { + $out[] = hsprintf('<%s class="remarkup-list-with-checkmarks">', $tag); + } else { + $out[] = hsprintf('<%s>', $tag); + } + + $out[] = "\n"; } $number = 1; foreach ($tree as $item) { if ($this->getEngine()->isTextMode()) { $out[] = str_repeat(' ', 2 * $level); - switch ($style) { - case '#': - $out[] = $number.'. '; - $number++; - break; - case '-': - $out[] = '- '; - break; + if ($item['mark'] !== null) { + if ($item['mark']) { + $out[] = '[X] '; + } else { + $out[] = '[ ] '; + } + } else { + switch ($style) { + case '#': + $out[] = $number.'. '; + $number++; + break; + case '-': + $out[] = '- '; + break; + } } $out[] = $this->applyRules($item['text'])."\n"; } else if ($item['text'] === null) { $out[] = hsprintf('
    • '); } else { - $out[] = hsprintf('
    • '); + if ($item['mark'] !== null) { + if ($item['mark'] == true) { + $out[] = hsprintf('
    • '); + } else { + $out[] = hsprintf('
    • '); + } + $out[] = phutil_tag( + 'input', + array( + 'type' => 'checkbox', + 'checked' => ($item['mark'] ? 'checked' : null), + 'disabled' => 'disabled', + )); + $out[] = ' '; + } else { + $out[] = hsprintf('
    • '); + } + $out[] = $this->applyRules($item['text']); } if ($item['items']) { - foreach ($this->renderTree($item['items'], $level + 1) as $i) { + $subitems = $this->renderTree($item['items'], $level + 1, $has_marks); + foreach ($subitems as $i) { $out[] = $i; } } if (!$this->getEngine()->isTextMode()) { $out[] = hsprintf("
    • \n"); } } if (!$this->getEngine()->isTextMode()) { switch ($style) { case '#': $out[] = hsprintf('
'); break; case '-': $out[] = hsprintf(''); break; } } return $out; } }