diff --git a/resources/test/diverse_symbols.php b/resources/test/diverse_symbols.php index 5476345..dc1463b 100644 --- a/resources/test/diverse_symbols.php +++ b/resources/test/diverse_symbols.php @@ -1,56 +1,56 @@ write('')->resolvex(); $this->assertEqual('', $stdout); } public function testKeepPipe() { // NOTE: This is mostly testing the semantics of $keep_pipe in write(). list($stdout) = id(new ExecFuture('cat')) ->write('', true) ->start() ->write('x', true) ->write('y', true) ->write('z', false) ->resolvex(); $this->assertEqual('xyz', $stdout); } public function testLargeBuffer() { // NOTE: This is mostly a coverage test to hit branches where we're still // flushing a buffer. $data = str_repeat('x', 1024 * 1024 * 4); list($stdout) = id(new ExecFuture('cat'))->write($data)->resolvex(); $this->assertEqual($data, $stdout); } public function testBufferLimit() { $data = str_repeat('x', 1024 * 1024); list($stdout) = id(new ExecFuture('cat')) ->setStdoutSizeLimit(1024) ->write($data) ->resolvex(); $this->assertEqual(substr($data, 0, 1024), $stdout); } public function testResolveTimeoutTestShouldRunLessThan1Sec() { // NOTE: This tests interactions between the resolve() timeout and the // ExecFuture timeout, which are similar but not identical. $future = id(new ExecFuture('sleep 32000'))->start(); $future->setTimeout(32000); // We expect this to return in 0.01s. $result = $future->resolve(0.01); $this->assertEqual($result, null); // We expect this to now force the time out / kill immediately. If we don't // do this, we'll hang when exiting until our subprocess exits (32000 // seconds!) $future->setTimeout(0.01); $future->resolve(); } public function testTimeoutTestShouldRunLessThan1Sec() { // NOTE: This is partly testing that we choose appropriate select wait // times; this test should run for significantly less than 1 second. $future = new ExecFuture('sleep 32000'); list($err) = $future->setTimeout(0.01)->resolve(); $this->assertTrue($err > 0); $this->assertTrue($future->getWasKilledByTimeout()); } public function testMultipleTimeoutsTestShouldRunLessThan1Sec() { $futures = array(); for ($ii = 0; $ii < 4; $ii++) { $futures[] = id(new ExecFuture('sleep 32000'))->setTimeout(0.01); } foreach (new FutureIterator($futures) as $future) { - list ($err) = $future->resolve(); + list($err) = $future->resolve(); $this->assertTrue($err > 0); $this->assertTrue($future->getWasKilledByTimeout()); } } public function testNoHangOnExecFutureDestructionWithRunningChild() { $start = microtime(true); $future = new ExecFuture('sleep 30'); $future->start(); unset($future); $end = microtime(true); // If ExecFuture::__destruct() hangs until the child closes, we won't make // it here in time. $this->assertTrue(($end - $start) < 5); } public function testMultipleResolves() { // It should be safe to call resolve(), resolvex(), resolveKill(), etc., // as many times as you want on the same process. $future = new ExecFuture('echo quack'); $future->resolve(); $future->resolvex(); list($err) = $future->resolveKill(); $this->assertEqual(0, $err); } public function testReadBuffering() { $str_len_8 = 'abcdefgh'; $str_len_4 = 'abcd'; // This is a write/read with no read buffer. $future = new ExecFuture('cat'); $future->write($str_len_8); do { $future->isReady(); list($read) = $future->read(); if (strlen($read)) { break; } } while (true); // We expect to get the entire string back in the read. $this->assertEqual($str_len_8, $read); $future->resolve(); // This is a write/read with a read buffer. $future = new ExecFuture('cat'); $future->write($str_len_8); // Set the read buffer size. $future->setReadBufferSize(4); do { $future->isReady(); list($read) = $future->read(); if (strlen($read)) { break; } } while (true); // We expect to get the entire string back in the read. $this->assertEqual($str_len_4, $read); $future->resolve(); } } diff --git a/src/lexer/PhutilPHPFragmentLexer.php b/src/lexer/PhutilPHPFragmentLexer.php index e932f49..1d6ce3f 100644 --- a/src/lexer/PhutilPHPFragmentLexer.php +++ b/src/lexer/PhutilPHPFragmentLexer.php @@ -1,269 +1,281 @@ array( array('<\\?(?i:php)?', 'cp', 'php'), array('[^<]+', null), array('<', null), ), 'php' => array_merge(array( array('\\?>', 'cp', '!pop'), array( '<<<([\'"]?)('.$identifier_pattern.')\\1\\n.*?\\n\\2\\;?\\n', 's', ), ), $nonsemantic_rules, array( array('(?i:__halt_compiler)\\b', 'cp', 'halt_compiler'), array('(->|::)', 'o', 'attr'), array('[~!%^&*+=|:.<>/?@-]+', 'o'), array('[\\[\\]{}();,]', 'o'), // After 'new', try to match an unadorned symbol. array('(?i:new|instanceof)\\b', 'k', 'possible_classname'), array('(?i:function)\\b', 'k', 'function_definition'), // After 'extends' or 'implements', match a list of classes/interfaces. array('(?i:extends|implements)\\b', 'k', 'class_list'), array('(?i:catch)\\b', 'k', 'catch'), array('(?i:'.implode('|', $keywords).')\\b', 'k'), array('(?i:'.implode('|', $constants).')\\b', 'kc'), array('\\$+'.$identifier_pattern, 'nv'), // Match "f(" as a function and "C::" as a class. These won't work // if you put a comment between the symbol and the operator, but // that's a bizarre usage. array($identifier_ns_pattern.'(?=\s*[\\(])', 'nf'), - array($identifier_ns_pattern.'(?=\s*::)', 'nc', 'context_attr', + array( + $identifier_ns_pattern.'(?=\s*::)', + 'nc', + 'context_attr', array( 'context' => 'push', ), ), array($identifier_ns_pattern, 'no'), array('(\\d+\\.\\d*|\\d*\\.\\d+)([eE][+-]?[0-9]+)?', 'mf'), array('\\d+[eE][+-]?[0-9]+', 'mf'), array('0[0-7]+', 'mo'), array('0[xX][a-fA-F0-9]+', 'mh'), array('0[bB][0-1]+', 'm'), array('\d+', 'mi'), array("'", 's1', 'string1'), array('`', 'sb', 'stringb'), array('"', 's2', 'string2'), array('.', null), )), // We've just matched a class name, with a "::" lookahead. The name of // the class is on the top of the context stack. We want to try to match // the attribute or method (e.g., "X::C" or "X::f()"). 'context_attr' => array_merge($nonsemantic_rules, array( array('::', 'o'), - array($identifier_pattern.'(?=\s*[\\(])', 'nf', '!pop', + array( + $identifier_pattern.'(?=\s*[\\(])', + 'nf', + '!pop', array( 'context' => 'pop', ), ), - array($identifier_pattern, 'na', '!pop', + array( + $identifier_pattern, + 'na', + '!pop', array( 'context' => 'pop', ), ), - array('', null, '!pop', + array( + '', + null, + '!pop', array( 'context' => 'discard', ), ), )), // After '->' or '::', a symbol is an attribute name. Note that we end // up in 'context_attr' instead of here in some cases. 'attr' => array_merge($nonsemantic_rules, array( array($identifier_pattern, 'na', '!pop'), array('', null, '!pop'), )), // After 'new', a symbol is a class name. 'possible_classname' => array_merge($nonsemantic_rules, array( array($identifier_ns_pattern, 'nc', '!pop'), array('', null, '!pop'), )), 'string1' => array( array('[^\'\\\\]+', 's1'), array("'", 's1', '!pop'), array('\\\\.', 'k'), array('\\\\$', 'k'), ), 'stringb' => array( array('[^`\\\\]+', 'sb'), array('`', 'sb', '!pop'), array('\\\\.', 'k'), array('\\\\$', 'k'), ), 'string2' => array( array('[^"\\\\]+', 's2'), array('"', 's2', '!pop'), array('\\\\.', 'k'), array('\\\\$', 'k'), ), // In a function definition (after "function"), we don't link the name // as a "nf" (name.function) since it is its own definition. 'function_definition' => array_merge($nonsemantic_rules, array( array('&', 'o'), array('\\(', 'o', '!pop'), array($identifier_pattern, 'no', '!pop'), array('', null, '!pop'), )), // For "//" and "#" comments, we need to break out if we see "?" followed // by ">". 'line_comment' => array( array('[^?\\n]+', 'c'), array('\\n', null, '!pop'), array('(?=\\?>)', null, '!pop'), array('\\?', 'c'), ), // We've seen __halt_compiler. Grab the '();' afterward and then eat // the rest of the file as raw data. 'halt_compiler' => array_merge($nonsemantic_rules, array( array('[()]', 'o'), array(';', 'o', 'compiler_halted'), array('\\?>', 'o', 'compiler_halted'), // Just halt on anything else. array('', null, 'compiler_halted'), )), // __halt_compiler has taken effect. 'compiler_halted' => array( array('.+', null), ), 'class_list' => array_merge($nonsemantic_rules, array( array(',', 'o'), array('(?i:implements)', 'k'), array($identifier_ns_pattern, 'nc'), array('', null, '!pop'), )), 'catch' => array_merge($nonsemantic_rules, array( array('\\(', 'o'), array($identifier_ns_pattern, 'nc'), array('', null, '!pop'), )), ); } } diff --git a/src/markup/engine/remarkup/blockrule/PhutilRemarkupBlockRule.php b/src/markup/engine/remarkup/blockrule/PhutilRemarkupBlockRule.php index 2916927..dc6a495 100644 --- a/src/markup/engine/remarkup/blockrule/PhutilRemarkupBlockRule.php +++ b/src/markup/engine/remarkup/blockrule/PhutilRemarkupBlockRule.php @@ -1,163 +1,163 @@ engine = $engine; $this->updateRules(); return $this; } final protected function getEngine() { return $this->engine; } public function setMarkupRules(array $rules) { assert_instances_of($rules, 'PhutilRemarkupRule'); $this->rules = $rules; $this->updateRules(); return $this; } private function updateRules() { $engine = $this->getEngine(); if ($engine) { $this->rules = msort($this->rules, 'getPriority'); foreach ($this->rules as $rule) { $rule->setEngine($engine); } } return $this; } final public function getMarkupRules() { return $this->rules; } final public function postprocess() { $this->didMarkupText(); } final protected function applyRules($text) { foreach ($this->getMarkupRules() as $rule) { $text = $rule->apply($text); } return $text; } public function supportsChildBlocks() { return false; } public function extractChildText($text) { throw new PhutilMethodNotImplementedException(); } protected function renderRemarkupTable(array $out_rows) { assert_instances_of($out_rows, 'array'); if ($this->getEngine()->isTextMode()) { $lengths = array(); foreach ($out_rows as $r => $row) { foreach ($row['content'] as $c => $cell) { $text = $this->getEngine()->restoreText($cell['content']); $lengths[$c][$r] = phutil_utf8_strlen($text); } } $max_lengths = array_map('max', $lengths); $out = array(); foreach ($out_rows as $r => $row) { $headings = false; foreach ($row['content'] as $c => $cell) { $length = $max_lengths[$c] - $lengths[$c][$r]; $out[] = '| '.$cell['content'].str_repeat(' ', $length).' '; if ($cell['type'] == 'th') { $headings = true; } } $out[] = "|\n"; if ($headings) { foreach ($row['content'] as $c => $cell) { $char = ($cell['type'] == 'th' ? '-' : ' '); $out[] = '| '.str_repeat($char, $max_lengths[$c]).' '; } $out[] = "|\n"; } } return rtrim(implode('', $out), "\n"); } if ($this->getEngine()->isHTMLMailMode()) { $table_attributes = array( 'style' => 'border-collapse: separate; border-spacing: 1px; background: #d3d3d3; margin: 12px 0;', ); - $cell_attributes = array ( + $cell_attributes = array( 'style' => 'background: #ffffff; padding: 3px 6px;', ); } else { $table_attributes = array( 'class' => 'remarkup-table', ); $cell_attributes = array(); } $out = array(); $out[] = "\n"; foreach ($out_rows as $row) { $cells = array(); foreach ($row['content'] as $cell) { $cells[] = phutil_tag( $cell['type'], $cell_attributes, $cell['content']); } $out[] = phutil_tag($row['type'], array(), $cells); $out[] = "\n"; } return phutil_tag('table', $table_attributes, $out); } } diff --git a/src/markup/engine/remarkup/blockrule/PhutilRemarkupCodeBlockRule.php b/src/markup/engine/remarkup/blockrule/PhutilRemarkupCodeBlockRule.php index 3be2018..d3904be 100644 --- a/src/markup/engine/remarkup/blockrule/PhutilRemarkupCodeBlockRule.php +++ b/src/markup/engine/remarkup/blockrule/PhutilRemarkupCodeBlockRule.php @@ -1,228 +1,228 @@ false, 'lang' => null, 'name' => null, 'lines' => null, ); $parser = new PhutilSimpleOptions(); $custom = $parser->parse(head($lines)); if ($custom) { $valid = true; foreach ($custom as $key => $value) { if (!array_key_exists($key, $options)) { $valid = false; break; } } if ($valid) { array_shift($lines); $options = $custom + $options; } } // Normalize the text back to a 0-level indent. $min_indent = 80; foreach ($lines as $line) { for ($ii = 0; $ii < strlen($line); $ii++) { if ($line[$ii] != ' ') { $min_indent = min($ii, $min_indent); break; } } } $text = implode("\n", $lines); if ($min_indent) { $indent_string = str_repeat(' ', $min_indent); $text = preg_replace('/^'.$indent_string.'/m', '', $text); } if ($this->getEngine()->isTextMode()) { $out = array(); $header = array(); if ($options['counterexample']) { $header[] = 'counterexample'; } if ($options['name'] != '') { $header[] = 'name='.$options['name']; } if ($header) { $out[] = implode(', ', $header); } $text = preg_replace('/^/m', ' ', $text); $out[] = $text; return implode("\n", $out); } if (empty($options['lang'])) { // If the user hasn't specified "lang=..." explicitly, try to guess the // language. If we fail, fall back to configured defaults. $lang = PhutilLanguageGuesser::guessLanguage($text); if (!$lang) { $lang = nonempty( $this->getEngine()->getConfig('phutil.codeblock.language-default'), 'text'); } $options['lang'] = $lang; } $code_body = $this->highlightSource($text, $options); $name_header = null; if ($this->getEngine()->isHTMLMailMode()) { - $header_attributes = array ( + $header_attributes = array( 'style' => 'padding: 6px 8px; background: #fdf5d4; color: rgba(0,0,0,.75); font-weight: bold; display: inline-block; border-top: 1px solid #f1c40f; border-left: 1px solid #f1c40f; border-right: 1px solid #f1c40f; margin-bottom: -1px;', ); } else { $header_attributes = array( 'class' => 'remarkup-code-header', ); } if ($options['name']) { $name_header = phutil_tag( 'div', $header_attributes, $options['name']); } $class = 'remarkup-code-block'; if ($options['counterexample']) { $class = 'remarkup-code-block code-block-counterexample'; } $attributes = array( 'class' => $class, 'data-code-lang' => $options['lang'], 'data-sigil' => 'remarkup-code-block', ); return phutil_tag( 'div', $attributes, array($name_header, $code_body)); } private function highlightSource($text, array $options) { if ($options['counterexample']) { $aux_class = ' remarkup-counterexample'; } else { $aux_class = null; } $aux_style = null; if ($this->getEngine()->isHTMLMailMode()) { if ($options['counterexample']) { $aux_style = 'border: 1px solid #c0392b; background: #f4dddb; font-size: 10x; padding: 8px;'; } else { $aux_style = 'border: 1px solid #f1c40f; background: #fdf5d4; font-size: 10x; padding: 8px;'; } } if ($options['lines']) { // Put a minimum size on this because the scrollbar is otherwise // unusable. $height = max(6, (int)$options['lines']); $aux_style = $aux_style .' ' .'max-height: ' .(2 * $height) .'em; overflow: auto;'; } $engine = $this->getEngine()->getConfig('syntax-highlighter.engine'); if (!$engine) { $engine = 'PhutilDefaultSyntaxHighlighterEngine'; } $engine = newv($engine, array()); $engine->setConfig( 'pygments.enabled', $this->getEngine()->getConfig('pygments.enabled')); return phutil_tag( 'pre', array( 'class' => 'remarkup-code'.$aux_class, 'style' => $aux_style, ), PhutilSafeHTML::applyFunction( 'rtrim', $engine->highlightSource($options['lang'], $text))); } } diff --git a/src/parser/argument/__tests__/PhutilArgumentParserTestCase.php b/src/parser/argument/__tests__/PhutilArgumentParserTestCase.php index 0b3dcbd..5d722f2 100644 --- a/src/parser/argument/__tests__/PhutilArgumentParserTestCase.php +++ b/src/parser/argument/__tests__/PhutilArgumentParserTestCase.php @@ -1,412 +1,426 @@ 'flag', ), ); $args = new PhutilArgumentParser(array('bin')); $args->parseFull($specs); $this->assertEqual(false, $args->getArg('flag')); $args = new PhutilArgumentParser(array('bin', '--flag')); $args->parseFull($specs); $this->assertEqual(true, $args->getArg('flag')); } public function testWildcards() { $specs = array( array( 'name' => 'flag', ), array( 'name' => 'files', 'wildcard' => true, ), ); $args = new PhutilArgumentParser(array('bin', '--flag', 'a', 'b')); $args->parseFull($specs); $this->assertEqual(true, $args->getArg('flag')); $this->assertEqual( array('a', 'b'), $args->getArg('files')); $caught = null; try { $args = new PhutilArgumentParser(array('bin', '--derp', 'a', 'b')); $args->parseFull($specs); } catch (PhutilArgumentUsageException $ex) { $caught = $ex; } $this->assertTrue($caught instanceof Exception); $args = new PhutilArgumentParser(array('bin', '--', '--derp', 'a', 'b')); $args->parseFull($specs); $this->assertEqual( array('--derp', 'a', 'b'), $args->getArg('files')); } public function testPartialParse() { $specs = array( array( 'name' => 'flag', ), ); $args = new PhutilArgumentParser(array('bin', 'a', '--flag', '--', 'b')); $args->parsePartial($specs); $this->assertEqual( array('a', '--', 'b'), $args->getUnconsumedArgumentVector()); } public function testBadArg() { $args = new PhutilArgumentParser(array('bin')); $args->parseFull(array()); $caught = null; try { $args->getArg('flag'); } catch (PhutilArgumentSpecificationException $ex) { $caught = $ex; } $this->assertTrue($caught instanceof Exception); } public function testDuplicateNames() { $args = new PhutilArgumentParser(array('bin')); $caught = null; try { $args->parseFull( array( array( 'name' => 'x', ), array( 'name' => 'x', ), )); } catch (PhutilArgumentSpecificationException $ex) { $caught = $ex; } $this->assertTrue($caught instanceof Exception); } public function testDuplicateNamesWithParsePartial() { $args = new PhutilArgumentParser(array('bin')); $caught = null; try { $args->parsePartial( array( array( 'name' => 'x', ), )); $args->parsePartial( array( array( 'name' => 'x', ), )); } catch (PhutilArgumentSpecificationException $ex) { $caught = $ex; } $this->assertTrue($caught instanceof Exception); } public function testDuplicateShortAliases() { $args = new PhutilArgumentParser(array('bin')); $caught = null; try { $args->parseFull( array( array( 'name' => 'x', 'short' => 'x', ), array( 'name' => 'y', 'short' => 'x', ), )); } catch (PhutilArgumentSpecificationException $ex) { $caught = $ex; } $this->assertTrue($caught instanceof Exception); } public function testDuplicateWildcards() { $args = new PhutilArgumentParser(array('bin')); $caught = null; try { $args->parseFull( array( array( 'name' => 'x', 'wildcard' => true, ), array( 'name' => 'y', 'wildcard' => true, ), )); } catch (PhutilArgumentSpecificationException $ex) { $caught = $ex; } $this->assertTrue($caught instanceof Exception); } public function testDuplicatePartialWildcards() { $args = new PhutilArgumentParser(array('bin')); $caught = null; try { $args->parsePartial( array( array( 'name' => 'x', 'wildcard' => true, ), )); $args->parsePartial( array( array( 'name' => 'y', 'wildcard' => true, ), )); } catch (PhutilArgumentSpecificationException $ex) { $caught = $ex; } $this->assertTrue($caught instanceof Exception); } public function testConflictSpecificationWithUnrecognizedArg() { $args = new PhutilArgumentParser(array('bin')); $caught = null; try { $args->parseFull( array( array( 'name' => 'x', 'conflicts' => array( 'y' => true, ), ), )); } catch (PhutilArgumentSpecificationException $ex) { $caught = $ex; } $this->assertTrue($caught instanceof Exception); } public function testConflictSpecificationWithSelf() { $args = new PhutilArgumentParser(array('bin')); $caught = null; try { $args->parseFull( array( array( 'name' => 'x', 'conflicts' => array( 'x' => true, ), ), )); } catch (PhutilArgumentSpecificationException $ex) { $caught = $ex; } $this->assertTrue($caught instanceof Exception); } public function testUnrecognizedFlag() { $args = new PhutilArgumentParser(array('bin', '--flag')); $caught = null; try { $args->parseFull(array()); } catch (PhutilArgumentUsageException $ex) { $caught = $ex; } $this->assertTrue($caught instanceof Exception); } public function testDuplicateFlag() { $args = new PhutilArgumentParser(array('bin', '--flag', '--flag')); $caught = null; try { $args->parseFull( array( array( 'name' => 'flag', ), )); } catch (PhutilArgumentUsageException $ex) { $caught = $ex; } $this->assertTrue($caught instanceof Exception); } public function testMissingParameterValue() { $args = new PhutilArgumentParser(array('bin', '--with')); $caught = null; try { $args->parseFull( array( array( 'name' => 'with', 'param' => 'stuff', ), )); } catch (PhutilArgumentUsageException $ex) { $caught = $ex; } $this->assertTrue($caught instanceof Exception); } public function testExtraParameterValue() { $args = new PhutilArgumentParser(array('bin', '--true=apple')); $caught = null; try { $args->parseFull( array( array( 'name' => 'true', ), )); } catch (PhutilArgumentUsageException $ex) { $caught = $ex; } $this->assertTrue($caught instanceof Exception); } public function testConflictParameterValue() { $args = new PhutilArgumentParser(array('bin', '--true', '--false')); $caught = null; try { $args->parseFull( array( array( 'name' => 'true', 'conflicts' => array( 'false' => true, ), ), array( 'name' => 'false', 'conflicts' => array( 'true' => true, ), ), )); } catch (PhutilArgumentUsageException $ex) { $caught = $ex; } $this->assertTrue($caught instanceof Exception); } public function testParameterValues() { $specs = array( array( 'name' => 'a', 'param' => 'value', ), array( 'name' => 'b', 'param' => 'value', ), array( 'name' => 'cee', 'short' => 'c', 'param' => 'value', ), array( 'name' => 'dee', 'short' => 'd', 'param' => 'value', ), ); $args = new PhutilArgumentParser( array( - 'bin', '--a', 'a', '--b=b', '-c', 'c', '-d=d', + 'bin', + '--a', + 'a', + '--b=b', + '-c', + 'c', + '-d=d', )); $args->parseFull($specs); $this->assertEqual('a', $args->getArg('a')); $this->assertEqual('b', $args->getArg('b')); $this->assertEqual('c', $args->getArg('cee')); $this->assertEqual('d', $args->getArg('dee')); } public function testStdinValidParameter() { $specs = array( array( 'name' => 'file', 'param' => 'file', ), ); $args = new PhutilArgumentParser( array( - 'bin', '-', '--file', '-', + 'bin', + '-', + '--file', + '-', )); $args->parsePartial($specs); $this->assertEqual('-', $args->getArg('file')); } public function testRepeatableFlag() { $specs = array( array( 'name' => 'verbose', 'short' => 'v', 'repeat' => true, ), ); $args = new PhutilArgumentParser(array('bin', '-v', '-v', '-v')); $args->parseFull($specs); $this->assertEqual(3, $args->getArg('verbose')); } public function testRepeatableParam() { $specs = array( array( 'name' => 'eat', 'param' => 'fruit', 'repeat' => true, ), ); $args = new PhutilArgumentParser(array( - 'bin', '--eat', 'apple', '--eat', 'pear', '--eat=orange', + 'bin', + '--eat', + 'apple', + '--eat', + 'pear', + '--eat=orange', )); $args->parseFull($specs); $this->assertEqual( array('apple', 'pear', 'orange'), $args->getArg('eat')); } } diff --git a/src/utils/__tests__/PhutilUTF8TestCase.php b/src/utils/__tests__/PhutilUTF8TestCase.php index 6b2e6a9..3185c30 100644 --- a/src/utils/__tests__/PhutilUTF8TestCase.php +++ b/src/utils/__tests__/PhutilUTF8TestCase.php @@ -1,631 +1,645 @@ assertEqual($input, phutil_utf8ize($input)); } public function testUTF8izeUTF8Ignored() { $input = "\xc3\x9c \xc3\xbc \xe6\x9d\xb1!"; $this->assertEqual($input, phutil_utf8ize($input)); } public function testUTF8izeLongStringNosegfault() { // For some reason my laptop is segfaulting on long inputs inside // preg_match(). Forestall this craziness in the common case, at least. phutil_utf8ize(str_repeat('x', 1024 * 1024)); $this->assertTrue(true); } public function testUTF8izeInvalidUTF8Fixed() { $input = "\xc3 this has \xe6\x9d some invalid utf8 \xe6"; $expect = "\xEF\xBF\xBD this has \xEF\xBF\xBD\xEF\xBF\xBD some invalid utf8 ". "\xEF\xBF\xBD"; $result = phutil_utf8ize($input); $this->assertEqual($expect, $result); } public function testUTF8izeOwlIsCuteAndFerocious() { // This was once a ferocious owl when we used to use "?" as the replacement // character instead of U+FFFD, but now he is sort of not as cute or // ferocious. $input = "M(o\xEE\xFF\xFFo)M"; $expect = "M(o\xEF\xBF\xBD\xEF\xBF\xBD\xEF\xBF\xBDo)M"; $result = phutil_utf8ize($input); $this->assertEqual($expect, $result); } public function testUTF8len() { $strings = array( '' => 0, 'x' => 1, "\xEF\xBF\xBD" => 1, "x\xe6\x9d\xb1y" => 3, 'xyz' => 3, 'quack' => 5, ); foreach ($strings as $str => $expect) { $this->assertEqual($expect, phutil_utf8_strlen($str), 'Length of '.$str); } } public function testUTF8v() { $strings = array( '' => array(), 'x' => array('x'), 'quack' => array('q', 'u', 'a', 'c', 'k'), "x\xe6\x9d\xb1y" => array('x', "\xe6\x9d\xb1", 'y'), // This is a combining character. "x\xCD\xA0y" => array('x', "\xCD\xA0", 'y'), ); foreach ($strings as $str => $expect) { $this->assertEqual($expect, phutil_utf8v($str), 'Vector of '.$str); } } public function testUTF8vCodepoints() { $strings = array( '' => array(), 'x' => array(0x78), 'quack' => array(0x71, 0x75, 0x61, 0x63, 0x6B), "x\xe6\x9d\xb1y" => array(0x78, 0x6771, 0x79), "\xC2\xBB" => array(0x00BB), "\xE2\x98\x83" => array(0x2603), "\xEF\xBF\xBF" => array(0xFFFF), "\xF0\x9F\x92\xA9" => array(0x1F4A9), // This is a combining character. "x\xCD\xA0y" => array(0x78, 0x0360, 0x79), ); foreach ($strings as $str => $expect) { $this->assertEqual( $expect, phutil_utf8v_codepoints($str), pht('Codepoint Vector of %s', $str)); } } public function testUTF8ConsoleStrlen() { $strings = array( '' => 0, "\0" => 0, 'x' => 1, // Double-width chinese character. "\xe6\x9d\xb1" => 2, // Combining character. "x\xCD\xA0y" => 2, // Combining plus double-width. "\xe6\x9d\xb1\xCD\xA0y" => 3, // Colors and formatting. "\x1B[1mx\x1B[m" => 1, "\x1B[1m\x1B[31mx\x1B[m" => 1, ); foreach ($strings as $str => $expect) { $this->assertEqual( $expect, phutil_utf8_console_strlen($str), pht('Console Length of %s', $str)); } } public function testUTF8shorten() { $inputs = array( array('1erp derp derp', 9, '', '1erp derp'), array('2erp derp derp', 12, '...', '2erp derp...'), array('derpxderpxderp', 12, '...', 'derpxderp...'), array("derp\xE2\x99\x83derpderp", 12, '...', "derp\xE2\x99\x83derp..."), array('', 12, '...', ''), array('derp', 12, '...', 'derp'), array('11111', 5, '2222', '11111'), array('111111', 5, '2222', '12222'), array('D1rp. Derp derp.', 7, '...', 'D1rp.'), // "D2rp." is a better shortening of this, but it's dramatically more // complicated to implement with the newer byte/glyph/character // shortening code. array('D2rp. Derp derp.', 5, '...', 'D2...'), array('D3rp. Derp derp.', 4, '...', 'D...'), array('D4rp. Derp derp.', 14, '...', 'D4rp. Derp...'), array('D5rpderp, derp derp', 16, '...', 'D5rpderp...'), array('D6rpderp, derp derp', 17, '...', 'D6rpderp, derp...'), // Strings with combining characters. array("Gr\xCD\xA0mpyCatSmiles", 8, '...', "Gr\xCD\xA0mpy..."), array("X\xCD\xA0\xCD\xA0\xCD\xA0Y", 1, '', "X\xCD\xA0\xCD\xA0\xCD\xA0"), // This behavior is maybe a little bad, but it seems mostly reasonable, // at least for latin languages. array( - 'Derp, supercalafragalisticexpialadoshus', 30, '...', + 'Derp, supercalafragalisticexpialadoshus', + 30, + '...', 'Derp...', ), // If a string has only word-break characters in it, we should just cut // it, not produce only the terminal. array('((((((((((', 8, '...', '(((((...'), // Terminal is longer than requested input. array('derp', 3, 'quack', 'quack'), ); foreach ($inputs as $input) { list($string, $length, $terminal, $expect) = $input; $result = id(new PhutilUTF8StringTruncator()) ->setMaximumGlyphs($length) ->setTerminator($terminal) ->truncateString($string); $this->assertEqual($expect, $result, pht('Shortening of %s', $string)); } } public function testUTF8StringTruncator() { $cases = array( array( "o\xCD\xA0o\xCD\xA0o\xCD\xA0o\xCD\xA0o\xCD\xA0", - 6, "o\xCD\xA0!", - 6, "o\xCD\xA0o\xCD\xA0!", - 6, "o\xCD\xA0o\xCD\xA0o\xCD\xA0o\xCD\xA0o\xCD\xA0", + 6, + "o\xCD\xA0!", + 6, + "o\xCD\xA0o\xCD\xA0!", + 6, + "o\xCD\xA0o\xCD\xA0o\xCD\xA0o\xCD\xA0o\xCD\xA0", ), array( "X\xCD\xA0\xCD\xA0\xCD\xA0Y", - 6, '!', - 6, "X\xCD\xA0\xCD\xA0\xCD\xA0Y", - 6, "X\xCD\xA0\xCD\xA0\xCD\xA0Y", + 6, + '!', + 6, + "X\xCD\xA0\xCD\xA0\xCD\xA0Y", + 6, + "X\xCD\xA0\xCD\xA0\xCD\xA0Y", ), array( "X\xCD\xA0\xCD\xA0\xCD\xA0YZ", - 6, '!', - 5, "X\xCD\xA0\xCD\xA0\xCD\xA0!", - 2, "X\xCD\xA0\xCD\xA0\xCD\xA0!", + 6, + '!', + 5, + "X\xCD\xA0\xCD\xA0\xCD\xA0!", + 2, + "X\xCD\xA0\xCD\xA0\xCD\xA0!", ), array( "\xE2\x98\x83\xE2\x98\x83\xE2\x98\x83\xE2\x98\x83", - 4, "\xE2\x98\x83!", - 3, "\xE2\x98\x83\xE2\x98\x83!", - 3, "\xE2\x98\x83\xE2\x98\x83!", + 4, + "\xE2\x98\x83!", + 3, + "\xE2\x98\x83\xE2\x98\x83!", + 3, + "\xE2\x98\x83\xE2\x98\x83!", ), ); foreach ($cases as $case) { list($input, $b_len, $b_out, $p_len, $p_out, $g_len, $g_out) = $case; $result = id(new PhutilUTF8StringTruncator()) ->setMaximumBytes($b_len) ->setTerminator('!') ->truncateString($input); $this->assertEqual($b_out, $result, pht('byte-short of %s', $input)); $result = id(new PhutilUTF8StringTruncator()) ->setMaximumCodepoints($p_len) ->setTerminator('!') ->truncateString($input); $this->assertEqual($p_out, $result, pht('codepoint-short of %s', $input)); $result = id(new PhutilUTF8StringTruncator()) ->setMaximumGlyphs($g_len) ->setTerminator('!') ->truncateString($input); $this->assertEqual($g_out, $result, pht('glyph-short of %s', $input)); } } public function testUTF8Wrap() { $inputs = array( array( 'aaaaaaa', 3, array( 'aaa', 'aaa', 'a', ), ), array( 'aaaaaaa', 3, array( 'aaa', 'aaa', 'a', ), ), array( 'aa&aaaa', 3, array( 'aa&', 'aaa', 'a', ), ), array( "aa\xe6\x9d\xb1aaaa", 3, array( "aa\xe6\x9d\xb1", 'aaa', 'a', ), ), array( '', 80, array( ), ), array( 'a', 80, array( 'a', ), ), ); foreach ($inputs as $input) { list($string, $width, $expect) = $input; $this->assertEqual( $expect, phutil_utf8_hard_wrap_html($string, $width), pht("Wrapping of '%s'.", $string)); } } public function testUTF8NonHTMLWrap() { $inputs = array( array( 'aaaaaaa', 3, array( 'aaa', 'aaa', 'a', ), ), array( 'abracadabra!', 4, array( 'abra', 'cada', 'bra!', ), ), array( '', 10, array( ), ), array( 'a', 20, array( 'a', ), ), array( "aa\xe6\x9d\xb1aaaa", 3, array( "aa\xe6\x9d\xb1", 'aaa', 'a', ), ), array( "mmm\nmmm\nmmmm", 3, array( 'mmm', 'mmm', 'mmm', 'm', ), ), ); foreach ($inputs as $input) { list($string, $width, $expect) = $input; $this->assertEqual( $expect, phutil_utf8_hard_wrap($string, $width), pht("Wrapping of '%s'", $string)); } } public function testUTF8ConvertParams() { $caught = null; try { phutil_utf8_convert('', 'utf8', ''); } catch (Exception $ex) { $caught = $ex; } $this->assertTrue((bool)$caught, pht('Requires source encoding.')); $caught = null; try { phutil_utf8_convert('', '', 'utf8'); } catch (Exception $ex) { $caught = $ex; } $this->assertTrue((bool)$caught, pht('Requires target encoding.')); } public function testUTF8Convert() { if (!function_exists('mb_convert_encoding')) { $this->assertSkipped(pht('Requires %s extension.', 'mbstring')); } // "[ae]gis se[n]or [(c)] 1970 [+/-] 1 [degree]" $input = "\xE6gis SE\xD1OR \xA9 1970 \xB11\xB0"; $expect = "\xC3\xA6gis SE\xC3\x91OR \xC2\xA9 1970 \xC2\xB11\xC2\xB0"; $output = phutil_utf8_convert($input, 'UTF-8', 'ISO-8859-1'); $this->assertEqual($expect, $output, pht('Conversion from ISO-8859-1.')); $caught = null; try { phutil_utf8_convert('xyz', 'moon language', 'UTF-8'); } catch (Exception $ex) { $caught = $ex; } $this->assertTrue((bool)$caught, pht('Conversion with bogus encoding.')); } public function testUTF8ucwords() { $tests = array( '' => '', 'x' => 'X', 'X' => 'X', 'five short graybles' => 'Five Short Graybles', 'xXxSNiPeRKiLLeRxXx' => 'XXxSNiPeRKiLLeRxXx', ); foreach ($tests as $input => $expect) { $this->assertEqual( $expect, phutil_utf8_ucwords($input), 'phutil_utf8_ucwords("'.$input.'")'); } } public function testUTF8strtolower() { $tests = array( '' => '', 'a' => 'a', 'A' => 'a', '!' => '!', 'OMG!~ LOLolol ROFLwaffle11~' => 'omg!~ lololol roflwaffle11~', "\xE2\x98\x83" => "\xE2\x98\x83", ); foreach ($tests as $input => $expect) { $this->assertEqual( $expect, phutil_utf8_strtolower($input), 'phutil_utf8_strtolower("'.$input.'")'); } } public function testUTF8strtoupper() { $tests = array( '' => '', 'a' => 'A', 'A' => 'A', '!' => '!', 'Cats have 9 lives.' => 'CATS HAVE 9 LIVES.', "\xE2\x98\x83" => "\xE2\x98\x83", ); foreach ($tests as $input => $expect) { $this->assertEqual( $expect, phutil_utf8_strtoupper($input), 'phutil_utf8_strtoupper("'.$input.'")'); } } public function testUTF8IsCombiningCharacter() { $character = "\xCD\xA0"; $this->assertEqual( true, phutil_utf8_is_combining_character($character)); $character = 'a'; $this->assertEqual( false, phutil_utf8_is_combining_character($character)); } public function testUTF8vCombined() { // Empty string. $string = ''; $this->assertEqual(array(), phutil_utf8v_combined($string)); // Single character. $string = 'x'; $this->assertEqual(array('x'), phutil_utf8v_combined($string)); // No combining characters. $string = 'cat'; $this->assertEqual(array('c', 'a', 't'), phutil_utf8v_combined($string)); // String with a combining character in the middle. $string = "ca\xCD\xA0t"; $this->assertEqual( array('c', "a\xCD\xA0", 't'), phutil_utf8v_combined($string)); // String starting with a combined character. $string = "c\xCD\xA0at"; $this->assertEqual( array("c\xCD\xA0", 'a', 't'), phutil_utf8v_combined($string)); // String with trailing combining character. $string = "cat\xCD\xA0"; $this->assertEqual( array('c', 'a', "t\xCD\xA0"), phutil_utf8v_combined($string)); // String with muliple combined characters. $string = "c\xCD\xA0a\xCD\xA0t\xCD\xA0"; $this->assertEqual( array("c\xCD\xA0", "a\xCD\xA0", "t\xCD\xA0"), phutil_utf8v_combined($string)); // String with multiple combining characters. $string = "ca\xCD\xA0\xCD\xA0t"; $this->assertEqual( array('c', "a\xCD\xA0\xCD\xA0", 't'), phutil_utf8v_combined($string)); // String beginning with a combining character. $string = "\xCD\xA0\xCD\xA0c"; $this->assertEqual( array(" \xCD\xA0\xCD\xA0", 'c'), phutil_utf8v_combined($string)); } public function testUTF8BMPSegfaults() { // This test case fails by segfaulting, or passes by not segfaulting. See // the function implementation for details. $input = str_repeat("\xEF\xBF\xBF", 1024 * 32); phutil_is_utf8_with_only_bmp_characters($input); $this->assertTrue(true); } public function testUTF8BMP() { $tests = array( '' => array( true, true, pht('empty string'), ), 'a' => array( true, true, 'a', ), "a\xCD\xA0\xCD\xA0" => array( true, true, pht('%s with combining', 'a'), ), "\xE2\x98\x83" => array( true, true, pht('snowman'), ), // This is the last character in BMP, U+FFFF. "\xEF\xBF\xBF" => array( true, true, 'U+FFFF', ), // This isn't valid. "\xEF\xBF\xC0" => array( false, false, pht('Invalid, byte range.'), ), // This is an invalid nonminimal representation. "\xF0\x81\x80\x80" => array( false, false, pht('Nonminimal 4-byte character.'), ), // This is the first character above BMP, U+10000. "\xF0\x90\x80\x80" => array( true, false, 'U+10000', ), "\xF0\x9D\x84\x9E" => array( true, false, 'gclef', ), "musical \xF0\x9D\x84\x9E g-clef" => array( true, false, pht('gclef text'), ), "\xF0\x9D\x84" => array( false, false, pht('Invalid, truncated.'), ), "\xE0\x80\x80" => array( false, false, pht('Nonminimal 3-byte character.'), ), // Partial BMP characters. "\xCD" => array( false, false, pht('Partial 2-byte character.'), ), "\xE0\xA0" => array( false, false, pht('Partial BMP 0xE0 character.'), ), "\xE2\x98" => array( false, false, pht('Partial BMP cahracter.'), ), ); foreach ($tests as $input => $test) { list($expect_utf8, $expect_bmp, $test_name) = $test; // Depending on what's installed on the system, this may use an // extension. $this->assertEqual( $expect_utf8, phutil_is_utf8($input), pht('is_utf(%s)', $test_name)); // Also test this against the pure PHP implementation, explicitly. $this->assertEqual( $expect_utf8, phutil_is_utf8_slowly($input), pht('is_utf_slowly(%s)', $test_name)); $this->assertEqual( $expect_bmp, phutil_is_utf8_with_only_bmp_characters($input), pht('is_utf_bmp(%s)', $test_name)); } } }