diff --git a/src/parser/xhpast/api/node/XHPASTNode.php b/src/parser/xhpast/api/node/XHPASTNode.php index 0c200e4..77c33e6 100644 --- a/src/parser/xhpast/api/node/XHPASTNode.php +++ b/src/parser/xhpast/api/node/XHPASTNode.php @@ -1,305 +1,358 @@ id = $id; $this->typeID = $data[0]; $this->l = idx($data, 1, -1); $this->r = idx($data, 2, -1); $this->tree = $tree; } public function setParentNode($parent_node) { $this->parentNode = $parent_node; return $this; } public function getParentNode() { return $this->parentNode; } public function setChildren(array $children) { $this->children = $children; return $this; } public function getID() { return $this->id; } public function getTypeID() { return $this->typeID; } public function getTypeName() { static $map; if (empty($map)) { $map = xhp_parser_node_constants(); } $type_id = $this->getTypeID(); if (empty($map[$type_id])) { throw new Exception("No type name for node type ID '{$type_id}'."); } return $map[$type_id]; } public function getChildren() { return $this->children; } public function getChildOfType($index, $type) { $child = $this->getChildByIndex($index); if ($child->getTypeName() != $type) { throw new Exception( "Child in position '{$index}' is not of type '{$type}': ". $this->getDescription()); } return $child; } public function getChildByIndex($index) { $child = idx(array_values($this->children), $index); if (!$child) { throw new Exception( "No child with index '{$index}'."); } return $child; } public function selectDescendantsOfType($type_name) { $type = $this->getTypeIDFromTypeName($type_name); return XHPASTNodeList::newFromTreeAndNodes( $this->tree, $this->executeSelectDescendantsOfType($this, $type)); } protected function executeSelectDescendantsOfType($node, $type) { $results = array(); foreach ($node->getChildren() as $id => $child) { if ($child->getTypeID() == $type) { $results[$id] = $child; } $results += $this->executeSelectDescendantsOfType($child, $type); } return $results; } public function getTokens() { if ($this->l == -1 || $this->r == -1) { return array(); } $tokens = $this->tree->getRawTokenStream(); $result = array(); foreach (range($this->l, $this->r) as $token_id) { $result[$token_id] = $tokens[$token_id]; } return $result; } public function getConcreteString() { $values = array(); foreach ($this->getTokens() as $token) { $values[] = $token->getValue(); } return implode('', $values); } - public function getStringLiteralValue() { - // TODO: This function should accommodate concatenation of literals and - // return 'null' if the literal contains variables. - - if ($this->getTypeName() != 'n_STRING_SCALAR') { - return null; - } - $value = $this->getConcreteString(); - $value = substr($value, 1, -1); - $value = stripcslashes($value); - return $value; - } - public function getSemanticString() { $tokens = $this->getTokens(); foreach ($tokens as $id => $token) { if ($token->isComment()) { unset($tokens[$id]); } } return implode('', mpull($tokens, 'getValue')); } public function getDescription() { $concrete = $this->getConcreteString(); if (strlen($concrete) > 75) { $concrete = substr($concrete, 0, 36).'...'.substr($concrete, -36); } $concrete = addcslashes($concrete, "\\\n\""); return 'a node of type '.$this->getTypeName().': "'.$concrete.'"'; } protected function getTypeIDFromTypeName($type_name) { static $node_types; if (empty($node_types)) { $node_types = xhp_parser_node_constants(); $node_types = array_flip($node_types); } if (empty($node_types[$type_name])) { throw new Exception("Unknown XHPAST Node type name '{$type_name}'!"); } return $node_types[$type_name]; } public function getOffset() { $first_token = idx($this->tree->getRawTokenStream(), $this->l); if (!$first_token) { return null; } return $first_token->getOffset(); } public function isStaticScalar() { return ($this->getTypeName() == 'n_STRING_SCALAR' || $this->getTypeName() == 'n_NUMERIC_SCALAR'); } public function getSurroundingNonsemanticTokens() { $before = array(); $after = array(); $tokens = $this->tree->getRawTokenStream(); if ($this->l != -1) { $before = $tokens[$this->l]->getNonsemanticTokensBefore(); } if ($this->r != -1) { $after = $tokens[$this->r]->getNonsemanticTokensAfter(); } return array($before, $after); } public function getDocblockToken() { if ($this->l == -1) { return null; } $tokens = $this->tree->getRawTokenStream(); for ($ii = $this->l - 1; $ii >= 0; $ii--) { if ($tokens[$ii]->getTypeName() == 'T_DOC_COMMENT') { return $tokens[$ii]; } if (!$tokens[$ii]->isAnyWhitespace()) { return null; } } return null; } public function evalStatic() { switch ($this->getTypeName()) { case 'n_STATEMENT': return $this->getChildByIndex(0)->evalStatic(); break; case 'n_STRING_SCALAR': - $value = $this->getSemanticString(); - $value = substr($value, 1, -1); - // NOTE: This intentionally treats '$' in strings as a literal dollar - // symbol. - $value = stripcslashes($value); - return (string)$value; + return (string)$this->getStringLiteralValue(); case 'n_NUMERIC_SCALAR': $value = $this->getSemanticString(); if (preg_match('/^0x/i', $value)) { // Hex return (int)base_convert(substr($value, 2), 16, 10); } else if (preg_match('/^0\d+$/i', $value)) { // Octal return (int)base_convert(substr($value, 1), 8, 10); } else if (preg_match('/^\d+$/', $value)) { return (int)$value; } else { return (double)$value; } break; case 'n_SYMBOL_NAME': $value = $this->getSemanticString(); if ($value == 'INF') { return INF; } switch (strtolower($value)) { case 'true': return true; case 'false': return false; case 'null': return null; default: throw new Exception('Unrecognized symbol name.'); } break; case 'n_UNARY_PREFIX_EXPRESSION': $operator = $this->getChildOfType(0, 'n_OPERATOR'); $operand = $this->getChildByIndex(1); switch ($operator->getSemanticString()) { case '-': return -$operand->evalStatic(); break; case '+': return $operand->evalStatic(); break; default: throw new Exception("Unexpected operator in static expression."); } break; case 'n_ARRAY_LITERAL': $result = array(); $values = $this->getChildOfType(0, 'n_ARRAY_VALUE_LIST'); foreach ($values->getChildren() as $child) { $key = $child->getChildByIndex(0); $val = $child->getChildByIndex(1); if ($key->getTypeName() == 'n_EMPTY') { $result[] = $val->evalStatic(); } else { $result[$key->evalStatic()] = $val->evalStatic(); } } return $result; default: throw new Exception("Unexpected node."); } } + public function getStringLiteralValue() { + if ($this->getTypeName() != 'n_STRING_SCALAR') { + return null; + } + + $value = $this->getSemanticString(); + $type = $value[0]; + $value = substr($value, 1, -1); + $esc = false; + $len = strlen($value); + $out = ''; + + if ($type == "'") { + // Single quoted strings treat everything as a literal except "\\" and + // "\'". + return str_replace( + array('\\\\', '\\\''), + array('\\', "'"), + $value); + } + + // Double quoted strings treat "\X" as a literal if X isn't specifically + // a character which needs to be escaped -- e.g., "\q" and "\'" are + // literally "\q" and "\'". stripcslashes() is too aggressive, so find + // all these under-escaped backslashes and escape them. + + for ($ii = 0; $ii < $len; $ii++) { + $c = $value[$ii]; + if ($esc) { + $esc = false; + switch ($c) { + case 'x': + $u = isset($value[$ii + 1]) ? $value[$ii + 1] : null; + if (!preg_match('/^[a-z0-9]/i', $u)) { + // PHP treats \x followed by anything which is not a hex digit + // as a literal \x. + $out .= '\\\\'.$c; + break; + } + case 'n': + case 'r': + case 'f': + case 'v': + case '"': + case '$': + case 't': + case '0': + case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': + $out .= '\\'.$c; + break; + default: + $out .= '\\\\'.$c; + break; + } + } else if ($c == '\\') { + $esc = true; + } else { + $out .= $c; + } + } + + return stripcslashes($out); + } + + } diff --git a/src/parser/xhpast/api/tree/__tests__/XHPASTTreeTestCase.php b/src/parser/xhpast/api/tree/__tests__/XHPASTTreeTestCase.php index 0fb512b..c2d5038 100644 --- a/src/parser/xhpast/api/tree/__tests__/XHPASTTreeTestCase.php +++ b/src/parser/xhpast/api/tree/__tests__/XHPASTTreeTestCase.php @@ -1,83 +1,145 @@ assertEval(1, '1'); $this->assertEval("a", '"a"'); $this->assertEval(-1.1, '-1.1'); $this->assertEval( array('foo', 'bar', -1, +2, -3.4, +4.3, 1e10, 1e-5, -2.3e7), "array('foo', 'bar', -1, +2, -3.4, +4.3, 1e10, 1e-5, -2.3e7)"); $this->assertEval( array(), "array()"); $this->assertEval( array(42 => 7, 'a' => 5, 1, 2, 3, 4, 1 => 'goo'), "array(42 => 7, 'a' => 5, 1, 2, 3, 4, 1 => 'goo')"); $this->assertEval( array('a' => 'a', 'b' => array(1, 2, array(3))), "array('a' => 'a', 'b' => array(1, 2, array(3)))"); $this->assertEval( array(true, false, null), "array(true, false, null)"); // Duplicate keys $this->assertEval( array(0 => '1', 0 => '2'), "array(0 => '1', 0 => '2')"); $this->assertEval('simple string', "'simple string'"); $this->assertEval('42', "'42'"); $this->assertEval(3.1415926, "3.1415926"); $this->assertEval(42, '42'); $this->assertEval( array(2147483648, 2147483647, -2147483648, -2147483647), "array(2147483648, 2147483647, -2147483648, -2147483647)"); $this->assertEval(INF, 'INF'); $this->assertEval(-INF, '-INF'); $this->assertEval(0x1b, '0x1b'); $this->assertEval(0X0A, '0X0A'); // Octal $this->assertEval(010, '010'); $this->assertEval(080, '080'); // Invalid! // Leading 0, but float, not octal. $this->assertEval(0.11e1, '0.11e1'); $this->assertEval(0e1, '0e1'); $this->assertEval(0, '0'); // Static evaluation treats '$' as a literal dollar glyph. $this->assertEval('$asdf', '"$asdf"'); + + $this->assertEval( + '\a\b\c\d\e\f\g\h\i\j\k\l\m\n\o\p\q\r\s\t\u\v\w\x\y\z'. + '\1\2\3\4\5\6\7\8\9\0'. + '\!\@\#\$\%\^\&\*\(\)'. + '\`\~\\\|\[\]\{\}\<\>\,\.\/\?\:\;\-\_\=\+', + + "'\\a\\b\\c\\d\\e\\f\\g\\h\\i\\j\\k\\l\\m\\n\\o\\p\\q". + "\\r\\s\\t\\u\\v\\w\\x\\y\\z". + "\\1\\2\\3\\4\\5\\6\\7\\8\\9\\0". + "\\!\\@\\#\\$\\%\\^\\&\\*\\(\\)". + "\\`\\~\\\\\\|\\[\\]\\{\\}\\<\\>\\,\\.\\/\\?\\:\\;\\-\\_\\=\\+". + "'"); + + $this->assertEval( + "\a\b\c\d\e\f\g\h\i\j\k\l\m\n\o\p\q\r\s\t\u\v\w\x\y\z". + "\1\2\3\4\5\6\7\8\9\0". + "\!\@\#\$\%\^\&\*\(\)". + "\`\~\\\|\[\]\{\}\<\>\,\.\/\?\:\;\-\_\=\+", + + '"\\a\\b\\c\\d\\e\\f\\g\\h\\i\\j\\k\\l\\m\\n\\o\\p\\q'. + '\\r\\s\\t\\u\\v\\w\\x\\y\\z'. + '\\1\\2\\3\\4\\5\\6\\7\\8\\9\\0'. + '\\!\\@\\#\\$\\%\\^\\&\\*\\(\\)'. + '\\`\\~\\\\\\|\\[\\]\\{\\}\\<\\>\\,\\.\\/\\?\\:\\;\\-\\_\\=\\+"'); + + $this->assertEval( + '\' "', + "'\\' \"'"); + + $this->assertEval( + '\\ \\\\ ', + '\'\\\\ \\\\\\\\ \''); + + $this->assertEval( + '\ \\ ', + "'\\ \\\\ '"); + + $this->assertEval( + '\x92', + '\'\x92\''); + + $this->assertEval( + "\x92", + '"\x92"'); + + $this->assertEval( + "\x", + '"\x"'); + + $this->assertEval( + "\x1", + '"\x1"'); + + $this->assertEval( + "\x000 !", + '"\x000 !"'); + + $this->assertEval( + "\x0", + '"\x0"'); + } private function assertEval($value, $string) { $this->assertEqual( $value, XHPASTTree::evalStaticString($string), $string); } }