diff --git a/scripts/__init_script__.php b/scripts/__init_script__.php index 603dc63..81a4de2 100644 --- a/scripts/__init_script__.php +++ b/scripts/__init_script__.php @@ -1,77 +1,81 @@ 0) { ob_end_clean(); } error_reporting(E_ALL | E_STRICT); $config_map = array( // Always display script errors. Without this, they may not appear, which is // unhelpful when users encounter a problem. On the web this is a security // concern because you don't want to expose errors to clients, but in a // script context we always want to show errors. 'display_errors' => true, // Set the error log to the default, so errors go to stderr. Without this // errors may end up in some log, and users may not know where the log is // or check it. 'error_log' => null, // XDebug raises a fatal error if the call stack gets too deep, but the // default setting is 100, which we may exceed legitimately with module // includes (and in other cases, like recursive filesystem operations // applied to 100+ levels of directory nesting). Stop it from triggering: // we explicitly limit recursive algorithms which should be limited. 'xdebug.max_nesting_level' => null, + + // Don't limit memory, doing so just generally just prevents us from + // processing large inputs without many tangible benefits. + 'memory_limit' => -1, ); foreach ($config_map as $config_key => $config_value) { ini_set($config_key, $config_value); } if (!ini_get('date.timezone')) { // If the timezone isn't set, PHP issues a warning whenever you try to parse // a date (like those from Git or Mercurial logs), even if the date contains // timezone information (like "PST" or "-0700") which makes the // environmental timezone setting is completely irrelevant. We never rely on // the system timezone setting in any capacity, so prevent PHP from flipping // out by setting it to a safe default (UTC) if it isn't set to some other // value. date_default_timezone_set('UTC'); } // Now, load libphutil. $root = dirname(dirname(__FILE__)); require_once $root.'/src/__phutil_library_init__.php'; } __phutil_init_script__(); diff --git a/src/__phutil_library_map__.php b/src/__phutil_library_map__.php index affc36a..db06cab 100644 --- a/src/__phutil_library_map__.php +++ b/src/__phutil_library_map__.php @@ -1,272 +1,280 @@ array( 'AASTNode' => 'parser/aast/api/node', 'AASTNodeList' => 'parser/aast/api/list', 'AASTToken' => 'parser/aast/api/token', 'AASTTree' => 'parser/aast/api/tree', 'AbstractDirectedGraph' => 'utils/abstractgraph', 'AbstractDirectedGraphTestCase' => 'utils/abstractgraph/__tests__', 'BaseHTTPFuture' => 'future/http/base', 'CommandException' => 'future/exec', 'ConduitClient' => 'conduit/client', 'ConduitClientException' => 'conduit/client', 'ConduitFuture' => 'conduit/client', 'ExecFuture' => 'future/exec', 'ExecFutureTestCase' => 'future/exec/__tests__', 'FileFinder' => 'filesystem/filefinder', 'FileList' => 'filesystem/filelist', 'Filesystem' => 'filesystem', 'FilesystemException' => 'filesystem', 'Future' => 'future', 'FutureIterator' => 'future', 'FutureProxy' => 'future/proxy', 'HTTPFuture' => 'future/http/http', 'HTTPFutureResponseStatus' => 'future/http/status/base', 'HTTPFutureResponseStatusCURL' => 'future/http/status/curl', 'HTTPFutureResponseStatusHTTP' => 'future/http/status/http', 'HTTPFutureResponseStatusParse' => 'future/http/status/parse', 'HTTPFutureResponseStatusTransport' => 'future/http/status/transport', 'HTTPSFuture' => 'future/http/https', 'ImmediateFuture' => 'future/immediate', - 'LinesOfALargeFile' => 'filesystem/linesofalargefile', + 'LinesOfALarge' => 'filesystem/linesofalarge/base', + 'LinesOfALargeExecFuture' => 'filesystem/linesofalarge/execfuture', + 'LinesOfALargeExecFutureTestCase' => 'filesystem/linesofalarge/execfuture/__tests__', + 'LinesOfALargeFile' => 'filesystem/linesofalarge/file', + 'LinesOfALargeFileTestCase' => 'filesystem/linesofalarge/file/__tests__', 'MFilterTestHelper' => 'utils/__tests__', 'PhutilAWSEC2Future' => 'future/aws/ec2', 'PhutilAWSException' => 'future/aws/exception', 'PhutilAWSFuture' => 'future/aws/base', 'PhutilAggregateException' => 'error/aggregate', 'PhutilArgumentParser' => 'parser/argument/parser', 'PhutilArgumentParserException' => 'parser/argument/exception/base', 'PhutilArgumentParserTestCase' => 'parser/argument/parser/__tests__', 'PhutilArgumentSpecification' => 'parser/argument/spec', 'PhutilArgumentSpecificationException' => 'parser/argument/exception/specification', 'PhutilArgumentSpecificationTestCase' => 'parser/argument/spec/__tests__', 'PhutilArgumentUsageException' => 'parser/argument/exception/usage', 'PhutilArgumentWorkflow' => 'parser/argument/workflow/base', 'PhutilConsoleFormatter' => 'console', 'PhutilConsoleStdinNotInteractiveException' => 'console/exception', 'PhutilConsoleWrapTestCase' => 'console/__tests__', 'PhutilDaemon' => 'daemon/base', 'PhutilDaemonOverseer' => 'daemon/overseer', 'PhutilDefaultSyntaxHighlighter' => 'markup/syntax/highlighter/default', 'PhutilDefaultSyntaxHighlighterEngine' => 'markup/syntax/engine/default', 'PhutilDefaultSyntaxHighlighterEnginePygmentsFuture' => 'markup/syntax/highlighter/pygments/future', 'PhutilDefaultSyntaxHighlighterEngineTestCase' => 'markup/syntax/engine/default/__tests__', 'PhutilDeferredLog' => 'filesystem/deferredlog', 'PhutilDeferredLogTestCase' => 'filesystem/deferredlog/__tests__', 'PhutilDivinerSyntaxHighlighter' => 'markup/syntax/highlighter/diviner', 'PhutilDocblockParser' => 'parser/docblock', 'PhutilDocblockParserTestCase' => 'parser/docblock/__tests__', 'PhutilEmailAddress' => 'parser/emailaddress', 'PhutilEmailAddressTestCase' => 'parser/emailaddress/__tests__', 'PhutilErrorHandler' => 'error', 'PhutilEvent' => 'events/event', 'PhutilEventConstants' => 'events/constant/base', 'PhutilEventEngine' => 'events/engine', 'PhutilEventListener' => 'events/listener', 'PhutilEventType' => 'events/constant/type', 'PhutilExcessiveServiceCallsDaemon' => 'daemon/torture/excessiveservicecalls', 'PhutilFatalDaemon' => 'daemon/torture/fatal', 'PhutilHangForeverDaemon' => 'daemon/torture/hangforever', 'PhutilHelpArgumentWorkflow' => 'parser/argument/workflow/help', 'PhutilInteractiveEditor' => 'console/editor', 'PhutilJSON' => 'parser/json', 'PhutilJSONTestCase' => 'parser/json/__tests__', 'PhutilLanguageGuesser' => 'parser/languageguesser', 'PhutilLanguageGuesserTestCase' => 'parser/languageguesser/__tests__', 'PhutilMarkupEngine' => 'markup/engine', 'PhutilMarkupTestCase' => 'markup/__tests__', 'PhutilMissingSymbolException' => 'symbols/exception/missing', 'PhutilNiceDaemon' => 'daemon/torture/nice', 'PhutilProcessGroupDaemon' => 'daemon/torture/processgroup', 'PhutilPygmentsSyntaxHighlighter' => 'markup/syntax/highlighter/pygments', 'PhutilRainbowSyntaxHighlighter' => 'markup/syntax/highlighter/rainbow', 'PhutilReadableSerializer' => 'readableserializer', 'PhutilRemarkupBlockStorage' => 'markup/engine/remarkup/blockstorage', 'PhutilRemarkupEngine' => 'markup/engine/remarkup', 'PhutilRemarkupEngineBlockRule' => 'markup/engine/remarkup/blockrule/base', 'PhutilRemarkupEngineRemarkupCodeBlockRule' => 'markup/engine/remarkup/blockrule/remarkupcode', 'PhutilRemarkupEngineRemarkupDefaultBlockRule' => 'markup/engine/remarkup/blockrule/remarkupdefault', 'PhutilRemarkupEngineRemarkupHeaderBlockRule' => 'markup/engine/remarkup/blockrule/remarkupheader', 'PhutilRemarkupEngineRemarkupInlineBlockRule' => 'markup/engine/remarkup/blockrule/remarkupinline', 'PhutilRemarkupEngineRemarkupListBlockRule' => 'markup/engine/remarkup/blockrule/remarkuplist', 'PhutilRemarkupEngineRemarkupLiteralBlockRule' => 'markup/engine/remarkup/blockrule/remarkupliteral', 'PhutilRemarkupEngineRemarkupNoteBlockRule' => 'markup/engine/remarkup/blockrule/remarkupnote', 'PhutilRemarkupEngineRemarkupQuotesBlockRule' => 'markup/engine/remarkup/blockrule/remarkupquotes', 'PhutilRemarkupEngineTestCase' => 'markup/engine/remarkup/__tests__', 'PhutilRemarkupRule' => 'markup/engine/remarkup/markuprule/base', 'PhutilRemarkupRuleBold' => 'markup/engine/remarkup/markuprule/bold', 'PhutilRemarkupRuleDel' => 'markup/engine/remarkup/markuprule/del', 'PhutilRemarkupRuleEscapeHTML' => 'markup/engine/remarkup/markuprule/escapehtml', 'PhutilRemarkupRuleEscapeRemarkup' => 'markup/engine/remarkup/markuprule/escaperemarkup', 'PhutilRemarkupRuleHyperlink' => 'markup/engine/remarkup/markuprule/hyperlink', 'PhutilRemarkupRuleItalic' => 'markup/engine/remarkup/markuprule/italics', 'PhutilRemarkupRuleLinebreaks' => 'markup/engine/remarkup/markuprule/linebreaks', 'PhutilRemarkupRuleMonospace' => 'markup/engine/remarkup/markuprule/monospace', 'PhutilSaturateStdoutDaemon' => 'daemon/torture/saturatestdout', 'PhutilServiceProfiler' => 'serviceprofiler', 'PhutilSimpleOptions' => 'parser/simpleoptions', 'PhutilSimpleOptionsTestCase' => 'parser/simpleoptions/__tests__', 'PhutilSymbolLoader' => 'symbols', 'PhutilSyntaxHighlighter' => 'markup/syntax/highlighter/base', 'PhutilSyntaxHighlighterEngine' => 'markup/syntax/engine/base', 'PhutilSyntaxHighlighterException' => 'markup/syntax/highlighter/exception', 'PhutilTortureTestDaemon' => 'daemon/torture/base', 'PhutilURI' => 'parser/uri', 'PhutilURITestCase' => 'parser/uri/__tests__', 'PhutilUTF8TestCase' => 'utils/__tests__', 'PhutilUtilsTestCase' => 'utils/__tests__', 'PhutilXHPASTSyntaxHighlighter' => 'markup/syntax/highlighter/xhpast', 'PhutilXHPASTSyntaxHighlighterTestCase' => 'markup/syntax/highlighter/xhpast/__tests__', 'TempFile' => 'filesystem/tempfile', 'TestAbstractDirectedGraph' => 'utils/abstractgraph/__tests__', 'XHPASTNode' => 'parser/xhpast/api/node', 'XHPASTSyntaxErrorException' => 'parser/xhpast/api/exception', 'XHPASTToken' => 'parser/xhpast/api/token', 'XHPASTTree' => 'parser/xhpast/api/tree', 'XHPASTTreeTestCase' => 'parser/xhpast/api/tree/__tests__', ), 'function' => array( 'Futures' => 'future', 'array_mergev' => 'utils', 'array_select_keys' => 'utils', 'assert_instances_of' => 'utils', 'coalesce' => 'utils', 'csprintf' => 'xsprintf/csprintf', 'exec_manual' => 'future/exec', 'execx' => 'future/exec', 'head' => 'utils', 'head_key' => 'utils', 'hsprintf' => 'markup', 'id' => 'utils', 'idx' => 'utils', 'ifilter' => 'utils', 'igroup' => 'utils', 'ipull' => 'utils', 'isort' => 'utils', 'jsprintf' => 'xsprintf/jsprintf', 'last' => 'utils', 'last_key' => 'utils', 'mfilter' => 'utils', 'mgroup' => 'utils', 'mpull' => 'utils', 'msort' => 'utils', 'newv' => 'utils', 'nonempty' => 'utils', 'phlog' => 'error', 'phutil_console_confirm' => 'console', 'phutil_console_format' => 'console', 'phutil_console_prompt' => 'console', 'phutil_console_require_tty' => 'console', 'phutil_console_wrap' => 'console', 'phutil_deprecated' => 'moduleutils', 'phutil_error_listener_example' => 'error', 'phutil_escape_html' => 'markup', 'phutil_escape_uri' => 'markup', 'phutil_escape_uri_path_component' => 'markup', 'phutil_get_library_name_for_root' => 'moduleutils', 'phutil_get_library_root' => 'moduleutils', 'phutil_get_library_root_for_path' => 'moduleutils', 'phutil_is_utf8' => 'utils', 'phutil_passthru' => 'future/exec', 'phutil_render_tag' => 'markup', 'phutil_unescape_uri_path_component' => 'markup', 'phutil_utf8_hard_wrap_html' => 'utils', 'phutil_utf8_shorten' => 'utils', 'phutil_utf8_strlen' => 'utils', 'phutil_utf8ize' => 'utils', 'phutil_utf8v' => 'utils', 'vcsprintf' => 'xsprintf/csprintf', 'vjsprintf' => 'xsprintf/jsprintf', 'xhp_parser_node_constants' => 'parser/xhpast/constants', 'xhpast_get_binary_path' => 'parser/xhpast/bin', 'xhpast_get_build_instructions' => 'parser/xhpast/bin', 'xhpast_get_parser_future' => 'parser/xhpast/bin', 'xhpast_is_available' => 'parser/xhpast/bin', 'xhpast_parser_token_constants' => 'parser/xhpast/constants', 'xsprintf' => 'xsprintf', 'xsprintf_callback_example' => 'xsprintf', 'xsprintf_command' => 'xsprintf/csprintf', 'xsprintf_javascript' => 'xsprintf/jsprintf', ), 'requires_class' => array( 'AbstractDirectedGraphTestCase' => 'ArcanistPhutilTestCase', 'BaseHTTPFuture' => 'Future', 'ConduitFuture' => 'FutureProxy', 'ExecFuture' => 'Future', 'ExecFutureTestCase' => 'ArcanistPhutilTestCase', 'FutureProxy' => 'Future', 'HTTPFuture' => 'BaseHTTPFuture', 'HTTPFutureResponseStatusCURL' => 'HTTPFutureResponseStatus', 'HTTPFutureResponseStatusHTTP' => 'HTTPFutureResponseStatus', 'HTTPFutureResponseStatusParse' => 'HTTPFutureResponseStatus', 'HTTPFutureResponseStatusTransport' => 'HTTPFutureResponseStatus', 'HTTPSFuture' => 'BaseHTTPFuture', 'ImmediateFuture' => 'Future', + 'LinesOfALargeExecFuture' => 'LinesOfALarge', + 'LinesOfALargeExecFutureTestCase' => 'ArcanistPhutilTestCase', + 'LinesOfALargeFile' => 'LinesOfALarge', + 'LinesOfALargeFileTestCase' => 'ArcanistPhutilTestCase', 'PhutilAWSEC2Future' => 'PhutilAWSFuture', 'PhutilAWSFuture' => 'FutureProxy', 'PhutilArgumentParserTestCase' => 'ArcanistPhutilTestCase', 'PhutilArgumentSpecificationException' => 'PhutilArgumentParserException', 'PhutilArgumentSpecificationTestCase' => 'ArcanistPhutilTestCase', 'PhutilArgumentUsageException' => 'PhutilArgumentParserException', 'PhutilConsoleWrapTestCase' => 'ArcanistPhutilTestCase', 'PhutilDefaultSyntaxHighlighterEngine' => 'PhutilSyntaxHighlighterEngine', 'PhutilDefaultSyntaxHighlighterEnginePygmentsFuture' => 'FutureProxy', 'PhutilDefaultSyntaxHighlighterEngineTestCase' => 'ArcanistPhutilTestCase', 'PhutilDeferredLogTestCase' => 'ArcanistPhutilTestCase', 'PhutilDocblockParserTestCase' => 'ArcanistPhutilTestCase', 'PhutilEmailAddressTestCase' => 'ArcanistPhutilTestCase', 'PhutilEventType' => 'PhutilEventConstants', 'PhutilExcessiveServiceCallsDaemon' => 'PhutilTortureTestDaemon', 'PhutilFatalDaemon' => 'PhutilTortureTestDaemon', 'PhutilHangForeverDaemon' => 'PhutilTortureTestDaemon', 'PhutilHelpArgumentWorkflow' => 'PhutilArgumentWorkflow', 'PhutilJSONTestCase' => 'ArcanistPhutilTestCase', 'PhutilLanguageGuesserTestCase' => 'ArcanistPhutilTestCase', 'PhutilMarkupTestCase' => 'ArcanistPhutilTestCase', 'PhutilNiceDaemon' => 'PhutilTortureTestDaemon', 'PhutilProcessGroupDaemon' => 'PhutilTortureTestDaemon', 'PhutilRemarkupEngine' => 'PhutilMarkupEngine', 'PhutilRemarkupEngineRemarkupCodeBlockRule' => 'PhutilRemarkupEngineBlockRule', 'PhutilRemarkupEngineRemarkupDefaultBlockRule' => 'PhutilRemarkupEngineBlockRule', 'PhutilRemarkupEngineRemarkupHeaderBlockRule' => 'PhutilRemarkupEngineBlockRule', 'PhutilRemarkupEngineRemarkupInlineBlockRule' => 'PhutilRemarkupEngineBlockRule', 'PhutilRemarkupEngineRemarkupListBlockRule' => 'PhutilRemarkupEngineBlockRule', 'PhutilRemarkupEngineRemarkupLiteralBlockRule' => 'PhutilRemarkupEngineBlockRule', 'PhutilRemarkupEngineRemarkupNoteBlockRule' => 'PhutilRemarkupEngineBlockRule', 'PhutilRemarkupEngineRemarkupQuotesBlockRule' => 'PhutilRemarkupEngineBlockRule', 'PhutilRemarkupEngineTestCase' => 'ArcanistPhutilTestCase', 'PhutilRemarkupRuleBold' => 'PhutilRemarkupRule', 'PhutilRemarkupRuleDel' => 'PhutilRemarkupRule', 'PhutilRemarkupRuleEscapeHTML' => 'PhutilRemarkupRule', 'PhutilRemarkupRuleEscapeRemarkup' => 'PhutilRemarkupRule', 'PhutilRemarkupRuleHyperlink' => 'PhutilRemarkupRule', 'PhutilRemarkupRuleItalic' => 'PhutilRemarkupRule', 'PhutilRemarkupRuleLinebreaks' => 'PhutilRemarkupRule', 'PhutilRemarkupRuleMonospace' => 'PhutilRemarkupRule', 'PhutilSaturateStdoutDaemon' => 'PhutilTortureTestDaemon', 'PhutilSimpleOptionsTestCase' => 'ArcanistPhutilTestCase', 'PhutilTortureTestDaemon' => 'PhutilDaemon', 'PhutilURITestCase' => 'ArcanistPhutilTestCase', 'PhutilUTF8TestCase' => 'ArcanistPhutilTestCase', 'PhutilUtilsTestCase' => 'ArcanistPhutilTestCase', 'PhutilXHPASTSyntaxHighlighterTestCase' => 'ArcanistPhutilTestCase', 'TestAbstractDirectedGraph' => 'AbstractDirectedGraph', 'XHPASTNode' => 'AASTNode', 'XHPASTToken' => 'AASTToken', 'XHPASTTree' => 'AASTTree', 'XHPASTTreeTestCase' => 'ArcanistPhutilTestCase', ), 'requires_interface' => array( ), )); diff --git a/src/filesystem/linesofalarge/base/LinesOfALarge.php b/src/filesystem/linesofalarge/base/LinesOfALarge.php new file mode 100644 index 0000000..a1f29bc --- /dev/null +++ b/src/filesystem/linesofalarge/base/LinesOfALarge.php @@ -0,0 +1,227 @@ +delimiter = $character; + return $this; + } + + +/* -( Internals )---------------------------------------------------------- */ + + + /** + * Hook, called before @{method:rewind()}. Allows a concrete implementation + * to open resources or reset state. + * + * @return void + * @task internals + */ + abstract protected function willRewind(); + + + /** + * Called when the iterator needs more data. The subclass should return more + * data, or empty string to indicate end-of-stream. + * + * @return string Data, or empty string for end-of-stream. + * @task internals + */ + abstract protected function readMore(); + + +/* -( Iterator Interface )------------------------------------------------- */ + + + /** + * @task iterator + */ + final public function rewind() { + $this->willRewind(); + + $this->buf = ''; + $this->pos = 0; + $this->num = 0; + $this->eof = false; + $this->valid = true; + + $this->next(); + } + + + /** + * @task iterator + */ + final public function key() { + return $this->num; + } + + + /** + * @task iterator + */ + final public function current() { + return $this->line; + } + + + /** + * @task iterator + */ + final public function valid() { + return $this->valid; + } + + + /** + * @task iterator + */ + final public function next() { + + // Consume the stream a chunk at a time into an internal buffer, then + // read lines out of that buffer. This gives us flexibility (stream sources + // only need to be able to read blocks of bytes) and performance (we can + // read in reasonably-sized chunks of many lines), at the cost of some + // complexity in buffer management. + + // We do this in a loop to avoid recursion when consuming more bytes, in + // case the size of a line is very large compared to the chunk size we + // read. + while (true) { + if (strlen($this->buf)) { + // If we already have some data buffered, try to get the next line from + // the buffer. Search through the buffer for a delimiter. This should be + // the common case. + $endl = strpos($this->buf, $this->delimiter, $this->pos); + + if ($endl !== false) { + // We found a delimiter, so return the line it delimits. We leave + // the buffer as-is so we don't need to reallocate it, in case it is + // large relative to the size of a line. Instead, we move our cursor + // within the buffer forward. + $this->num++; + $this->line = substr($this->buf, $this->pos, ($endl - $this->pos)); + $this->pos = $endl + 1; + return; + } + + // We only have part of a line left in the buffer (no delimiter in the + // remaining piece), so throw away the part we've already emitted and + // continue below. + $this->buf = substr($this->buf, $this->pos); + $this->pos = 0; + } + + // We weren't able to produce the next line from the bytes we already had + // buffered, so read more bytes from the input stream. + + if ($this->eof) { + // NOTE: We keep track of EOF (an empty read) so we don't make any more + // reads afterward. Normally, we'll return from the first EOF read, + // emit the line, and then next() will be called again. Without tracking + // EOF, we'll attempt another read. A well-behaved impelmentation should + // still return empty string, but we can protect against any issues + // here by keeping a flag. + $more = ''; + } else { + $more = $this->readMore(); + } + + if (strlen($more)) { + // We got some bytes, so add them to the buffer and then try again. + $this->buf .= $more; + continue; + } else { + // No more bytes. If we have a buffer, return its contents. We + // potentially return part of a line here if the last line had no + // delimiter, but that currently seems reasonable as a default + // behaivor. If we don't have a buffer, we're done. + $this->eof = true; + if (strlen($this->buf)) { + $this->num++; + $this->line = $this->buf; + $this->buf = null; + } else { + $this->valid = false; + } + break; + } + } + } + +} diff --git a/src/filesystem/linesofalargefile/__init__.php b/src/filesystem/linesofalarge/base/__init__.php similarity index 52% copy from src/filesystem/linesofalargefile/__init__.php copy to src/filesystem/linesofalarge/base/__init__.php index 6b9ee34..84dcde3 100644 --- a/src/filesystem/linesofalargefile/__init__.php +++ b/src/filesystem/linesofalarge/base/__init__.php @@ -1,12 +1,10 @@ future = $future; + } + + +/* -( Internals )---------------------------------------------------------- */ + + + /** + * On destruction, we terminate the subprocess if it hasn't exited already. + * + * @return void + * @task internals + */ + public function __destruct() { + if (!$this->future->isReady()) { + $this->future->resolveKill(); + } + } + + + /** + * The PHP foreach() construct calls rewind() once, so we allow the first + * rewind(), without effect. Subsequent rewinds mean misuse. + * + * @return void + * @task internals + */ + protected function willRewind() { + if ($this->didRewind) { + throw new Exception( + "You can not reiterate over a LinesOfALargeExecFuture object. The ". + "entire goal of the construct is to avoid keeping output in memory. ". + "What you are attempting to do is silly and doesn't make any sense."); + } + $this->didRewind = true; + } + + + /** + * Read more data from the subprocess. + * + * @return string Bytes read from stdout. + * @task internals + */ + protected function readMore() { + $future = $this->future; + + while (true) { + // Read is nonblocking, so we need to sit in this loop waiting for input + // or we'll incorrectly signal EOF to the parent. + list($stdout) = $future->read(); + $future->discardBuffers(); + + if (strlen($stdout)) { + return $stdout; + } + + // If we didn't read anything, we can exit the loop if the subprocess + // has exited. + + if ($future->isReady()) { + // Throw if the process exits with a nozero status code. This makes + // error handling simpler, and prevents us from returning part of a line + // if the process terminates mid-output. + $future->resolvex(); + + // Read and return anything that's left. + list($stdout) = $future->read(); + $future->discardBuffers(); + + return $stdout; + } + } + } + +} diff --git a/src/filesystem/linesofalarge/execfuture/__init__.php b/src/filesystem/linesofalarge/execfuture/__init__.php new file mode 100644 index 0000000..cd6dfa1 --- /dev/null +++ b/src/filesystem/linesofalarge/execfuture/__init__.php @@ -0,0 +1,12 @@ +writeAndRead( + "cat\ndog\nbird\n", + array( + "cat", + "dog", + "bird", + )); + } + + public function testExecLargeFile() { + $line = "The quick brown fox jumps over the lazy dog."; + $n = 100; + + $this->writeAndRead( + str_repeat($line."\n", $n), + array_fill(0, $n, $line)); + } + + public function testExecLongLine() { + $line = str_repeat('x', 64 * 1024); + $this->writeAndRead($line, array($line)); + } + + public function testExecException() { + $caught = null; + try { + $future = new ExecFuture('does-not-exist.exe.sh'); + foreach (new LinesOfALargeExecFuture($future) as $line) { + // ignore + } + } catch (Exception $ex) { + $caught = $ex; + } + $this->assertEqual(true, $caught instanceof CommandException); + } + + private function writeAndRead($write, $read) { + $future = new ExecFuture('cat'); + $future->write($write); + + $lines = array(); + foreach (new LinesOfALargeExecFuture($future) as $line) { + $lines[] = $line; + } + + $this->assertEqual( + $read, + $lines, + "Write: ".phutil_utf8_shorten($write, 32)); + } + +} diff --git a/src/filesystem/linesofalarge/execfuture/__tests__/__init__.php b/src/filesystem/linesofalarge/execfuture/__tests__/__init__.php new file mode 100644 index 0000000..93c1a56 --- /dev/null +++ b/src/filesystem/linesofalarge/execfuture/__tests__/__init__.php @@ -0,0 +1,16 @@ +fileName = Filesystem::resolvePath((string)$file_name); + } + + +/* -( Internals )---------------------------------------------------------- */ + + + /** + * Closes the file handle. + * + * @return void + * @task internals + */ + public function __destruct() { + $this->closeHandle(); + } + + + /** + * Close the file handle, if it is open. + * + * @return $this + * @task internals + */ + private function closeHandle() { + if ($this->handle) { + fclose($this->handle); + $this->handle = null; + } + return $this; + } + + + /** + * Closes the file handle if it is open, and reopens it. + * + * @return void + * @task internals + */ + protected function willRewind() { + $this->closeHandle(); + $this->handle = @fopen($this->fileName, 'r'); + if (!$this->handle) { + throw new FilesystemException( + $this->fileName, + "Failed to open file!"); + } + } + + + /** + * Read the file chunk-by-chunk. + * + * @return string Next chunk of the file. + * @task internals + */ + public function readMore() { + + // NOTE: At least on OSX in reasonably normal test cases, increasing the + // size of this read has no impact on performance. + + $more = @fread($this->handle, 2048); + if ($more === false) { + throw new FilesystemException( + $this->fileName, + "Failed to read file!"); + } + return $more; + } + +} diff --git a/src/filesystem/linesofalargefile/__init__.php b/src/filesystem/linesofalarge/file/__init__.php similarity index 75% rename from src/filesystem/linesofalargefile/__init__.php rename to src/filesystem/linesofalarge/file/__init__.php index 6b9ee34..a69aa68 100644 --- a/src/filesystem/linesofalargefile/__init__.php +++ b/src/filesystem/linesofalarge/file/__init__.php @@ -1,12 +1,13 @@ writeAndRead( + "abcd", + array( + "abcd", + )); + } + + public function testTerminalDelimiterPresent() { + $this->writeAndRead( + "bat\ncat\ndog\n", + array( + "bat", + "cat", + "dog", + )); + } + + public function testTerminalDelimiterAbsent() { + $this->writeAndRead( + "bat\ncat\ndog", + array( + "bat", + "cat", + "dog", + )); + } + + public function testChangeDelimiter() { + $this->writeAndRead( + "bat\1cat\1dog\1", + array( + "bat", + "cat", + "dog", + ), + "\1"); + } + + public function testEmptyLines() { + $this->writeAndRead( + "\n\nbat\n", + array( + '', + '', + 'bat', + )); + } + + public function testLargeFile() { + $line = "The quick brown fox jumps over the lazy dog."; + $n = 100; + + $this->writeAndRead( + str_repeat($line."\n", $n), + array_fill(0, $n, $line)); + } + + public function testLongLine() { + $line = str_repeat('x', 64 * 1024); + $this->writeAndRead($line, array($line)); + } + + public function testReadFailure() { + $caught = null; + try { + $f = new LinesOfALargeFile('/does/not/exist.void'); + $f->rewind(); + } catch (FilesystemException $ex) { + $caught = $ex; + } + + $this->assertEqual(true, $caught instanceof $ex); + } + + private function writeAndRead($write, $read, $delimiter = "\n") { + $tmp = new TempFile(); + Filesystem::writeFile($tmp, $write); + + $lines = array(); + $iterator = id(new LinesOfALargeFile($tmp))->setDelimiter($delimiter); + foreach ($iterator as $line) { + $lines[] = $line; + } + + $this->assertEqual( + $read, + $lines, + "Write: ".phutil_utf8_shorten($write, 32)); + } + +} diff --git a/src/filesystem/linesofalarge/file/__tests__/__init__.php b/src/filesystem/linesofalarge/file/__tests__/__init__.php new file mode 100644 index 0000000..502cb75 --- /dev/null +++ b/src/filesystem/linesofalarge/file/__tests__/__init__.php @@ -0,0 +1,17 @@ +file = $file; - } - - public function __destruct() { - $this->closeHandle(); - } - - private function closeHandle() { - if ($this->handle) { - fclose($this->handle); - $this->handle = null; - } - } - - public function rewind() { - $this->closeHandle(); - $this->handle = @fopen($this->file, 'r'); - if (!$this->handle) { - throw new FilesystemException( - $this->file, - "Failed to open file!"); - } - - $this->buf = ''; - $this->pos = 0; - $this->num = 0; - $this->valid = true; - - $this->next(); - } - - public function key() { - return $this->num; - } - - public function current() { - return $this->line; - } - - public function valid() { - return $this->valid; - } - - public function next() { - if (strlen($this->buf)) { - $endl = strpos($this->buf, "\n", $this->pos); - if ($endl !== false) { - $this->num++; - $this->line = substr($this->buf, $this->pos, ($endl - $this->pos)); - $this->pos = $endl + 1; - return; - } - $this->buf = substr($this->buf, $this->pos); - $this->pos = 0; - } - - $more = fread($this->handle, 2048); - if (strlen($more)) { - $this->buf .= $more; - $this->next(); - } else { - if (strlen($this->buf)) { - $this->num++; - $this->line = $this->buf; - $this->buf = null; - } else { - $this->valid = false; - } - } - } - -} diff --git a/src/future/exec/ExecFuture.php b/src/future/exec/ExecFuture.php index 8743771..fad0b36 100644 --- a/src/future/exec/ExecFuture.php +++ b/src/future/exec/ExecFuture.php @@ -1,669 +1,693 @@ array('pipe', 'r'), // stdin 1 => array('pipe', 'w'), // stdout 2 => array('pipe', 'w'), // stderr ); /* -( Creating ExecFutures )----------------------------------------------- */ /** * Create a new ExecFuture. * * $future = new ExecFuture('wc -l %s', $file_path); * * @param string ##sprintf()##-style command string which will be passed * through @{function:csprintf} with the rest of the arguments. * @param ... Zero or more additional arguments for @{function:csprintf}. * @return ExecFuture ExecFuture for running the specified command. * @task create */ public function __construct($command) { $argv = func_get_args(); $this->command = call_user_func_array('csprintf', $argv); } -/* -( Configuring Execution )---------------------------------------------- */ +/* -( Command Information )------------------------------------------------ */ /** * Retrieve the raw command to be executed. * * @return string Raw command. * @task info */ public function getCommand() { return $this->command; } /** * Retrieve the byte limit for the stderr buffer. * * @return int Maximum buffer size, in bytes. * @task info */ public function getStderrSizeLimit() { return $this->stderrSizeLimit; } /** * Retrieve the byte limit for the stdout buffer. * * @return int Maximum buffer size, in bytes. * @task info */ public function getStdoutSizeLimit() { return $this->stdoutSizeLimit; } /** * Get the process's pid. This only works after execution is initiated, e.g. * by a call to start(). * * @return int Process ID of the executing process. * @task info */ public function getPID() { $status = $this->procGetStatus(); return $status['pid']; } /* -( Configuring Execution )---------------------------------------------- */ /** * Set a maximum size for the stdout read buffer. To limit stderr, see * @{method:setStderrSizeLimit}. The major use of these methods is to use less * memory if you are running a command which sometimes produces huge volumes * of output that you don't really care about. * * NOTE: Setting this to 0 means "no buffer", not "unlimited buffer". * * @param int Maximum size of the stdout read buffer. * @return this * @task config */ public function setStdoutSizeLimit($limit) { $this->stdoutSizeLimit = $limit; return $this; } /** * Set a maximum size for the stderr read buffer. * See @{method:setStdoutSizeLimit} for discussion. * * @param int Maximum size of the stderr read buffer. * @return this * @task config */ public function setStderrSizeLimit($limit) { $this->stderrSizeLimit = $limit; return $this; } /** * Set the current working directory to use when executing the command. * * @param string Directory to set as CWD before executing the command. * @return this * @task config */ public function setCWD($cwd) { $this->cwd = $cwd; return $this; } /* -( Interacting With Commands )------------------------------------------ */ /** * Read and return output from stdout and stderr, if any is available. This * method keeps a read cursor on each stream, but the entire streams are * still returned when the future resolves. You can call read() again after * resolving the future to retrieve only the parts of the streams you did not * previously read: * * $future = new ExecFuture('...'); * // ... * list($stdout) = $future->read(); // Returns output so far * list($stdout) = $future->read(); // Returns new output since first call * // ... * list($stdout) = $future->resolvex(); // Returns ALL output * list($stdout) = $future->read(); // Returns unread output * * NOTE: If you set a limit with @{method:setStdoutSizeLimit} or * @{method:setStderrSizeLimit}, this method will not be able to read data * past the limit. * * NOTE: If you call @{method:discardBuffers}, all the stdout/stderr data * will be thrown away and the cursors will be reset. * * @return pair <$stdout, $stderr> pair with new output since the last call * to this method. * @task interact */ public function read() { if ($this->start) { $this->isReady(); // Sync } $result = array( (string)substr($this->stdout, $this->stdoutPos), (string)substr($this->stderr, $this->stderrPos), ); $this->stdoutPos = strlen($this->stdout); $this->stderrPos = strlen($this->stderr); return $result; } /** * Write data to stdin of the command. * * @param string Data to write. * @param bool If true, keep the pipe open for writing. By default, the pipe * will be closed as soon as possible so that commands which * listen for EOF will execute. If you want to keep the pipe open * past the start of command execution, do an empty write with * `$keep_pipe = true` first. * @return this * @task interact */ public function write($data, $keep_pipe = false) { $this->stdin .= $data; $this->closePipe = !$keep_pipe; return $this; } /** * Permanently discard the stdout and stderr buffers and reset the read * cursors. This is basically useful only if you are streaming a large amount * of data from some process: * * $future = new ExecFuture('zcat huge_file.gz'); * do { * $done = $future->resolve(0.1); // Every 100ms, * list($stdout) = $future->read(); // read output... * echo $stdout; // send it somewhere... * $future->discardBuffers(); // and then free the buffers. * } while ($done === null); * * Conceivably you might also need to do this if you're writing a client using * ExecFuture and ##netcat##, but you probably should not do that. * * NOTE: This completely discards the data. It won't be available when the * future resolves. This is almost certainly only useful if you need the * buffer memory for some reason. * * @return this * @task interact */ public function discardBuffers() { $this->stdout = ''; $this->stderr = ''; $this->stdoutPos = 0; $this->stderrPos = 0; return $this; } + /** + * Returns true if this future was killed by a timeout configured with + * @{method:setTimeout}. + * + * @return bool True if the future was killed for exceeding its time limit. + */ + public function getWasKilledByTimeout() { + return $this->killedByTimeout; + } + + /* -( Configuring Execution )---------------------------------------------- */ /** * Set a hard limit on execution time. If the command runs longer, it will - * be killed and the future will resolve with error code - * ##ExecFuture::TIMED_OUT_EXIT_CODE##. + * be killed and the future will resolve with an error code. You can test + * if a future was killed by a timeout with @{method:getWasKilledByTimeout}. * * @param int Maximum number of seconds this command may execute for. * @return this * @task config */ public function setTimeout($seconds) { $this->timeout = $seconds; return $this; } /* -( Resolving Execution )------------------------------------------------ */ /** * Resolve a command you expect to exit with return code 0. Works like * @{method:resolve}, but throws if $err is nonempty. Returns only * $stdout and $stderr. See also @{function:execx}. * * list($stdout, $stderr) = $future->resolvex(); * * @param float Optional timeout after which resolution will pause and * execution will return to the caller. * @return pair <$stdout, $stderr> pair. * @task resolve */ public function resolvex($timeout = null) { list($err, $stdout, $stderr) = $this->resolve($timeout); if ($err) { $cmd = $this->command; throw new CommandException( "Command '{$cmd}' failed with error #{$err}:\n". "stdout:\n{$stdout}\n". "stderr:\n{$stderr}\n", $cmd, $err, $stdout, $stderr); } return array($stdout, $stderr); } /** * Resolve a command you expect to return valid JSON. Works like * @{method:resolvex}, but also throws if stderr is nonempty, or stdout is not * valid JSON. Returns a PHP array, decoded from the JSON command output. * * @param float Optional timeout after which resolution will pause and * execution will return to the caller. * @return array PHP array, decoded from JSON command output. * @task resolve */ public function resolveJSON($timeout = null) { list($stdout, $stderr) = $this->resolvex($timeout); if (strlen($stderr)) { $cmd = $this->command; throw new CommandException( "JSON command '{$cmd}' emitted text to stderr when none was expected: ". $stderr, $cmd, 0, $stdout, $stderr); } $object = json_decode($stdout, true); if (!is_array($object)) { $cmd = $this->command; throw new CommandException( "JSON command '{$cmd}' did not produce a valid JSON object on stdout: ". $stdout, $cmd, 0, $stdout, $stderr); } return $object; } + /** + * Resolve the process by abruptly terminating it. + * + * @return list List of results. + * @task resolve + */ + public function resolveKill() { + if (defined('SIGKILL')) { + $signal = SIGKILL; + } else { + $signal = 9; + } + + proc_terminate($this->proc, $signal); + $this->result = array( + 128 + $signal, + $this->stdout, + $this->stderr); + $this->__destruct(); + $this->endProfile(); + + return $this->result; + } + /* -( Internals )---------------------------------------------------------- */ /** * Provides read sockets to the future core. * * @return list List of read sockets. * @task internal */ public function getReadSockets() { list($stdin, $stdout, $stderr) = $this->pipes; $sockets = array(); if (isset($stdout) && !feof($stdout)) { $sockets[] = $stdout; } if (isset($stderr) && !feof($stderr)) { $sockets[] = $stderr; } return $sockets; } /** * Provides write sockets to the future core. * * @return list List of write sockets. * @task internal */ public function getWriteSockets() { list($stdin, $stdout, $stderr) = $this->pipes; $sockets = array(); if (isset($stdin) && strlen($this->stdin) && !feof($stdin)) { $sockets[] = $stdin; } return $sockets; } /** * Reads some bytes from a stream, discarding output once a certain amount * has been accumulated. * * @param resource Stream to read from. * @param int Maximum number of bytes to return from $stream. If * additional bytes are available, they will be read and * discarded. * @param string Human-readable description of stream, for exception * message. * @return string The data read from the stream. * @task internal */ protected function readAndDiscard($stream, $limit, $description) { $output = ''; do { $data = fread($stream, 4096); if (false === $data) { throw new Exception('Failed to read from '.$description); } $read_bytes = strlen($data); if ($read_bytes > 0 && $limit > 0) { if ($read_bytes > $limit) { $data = substr($data, 0, $limit); } $output .= $data; $limit -= strlen($data); } } while ($read_bytes > 0); return $output; } /** * Begin or continue command execution. * * @return bool True if future has resolved. * @task internal */ public function isReady() { if (!$this->pipes) { $profiler = PhutilServiceProfiler::getInstance(); $this->profilerCallID = $profiler->beginServiceCall( array( 'type' => 'exec', 'command' => $this->command, )); if (!$this->start) { // We might already have started the timer via initating resolution. $this->start = microtime(true); } $pipes = array(); $proc = proc_open( $this->command, self::$descriptorSpec, $pipes, $this->cwd); if (!is_resource($proc)) { throw new Exception('Failed to open process.'); } $this->pipes = $pipes; $this->proc = $proc; list($stdin, $stdout, $stderr) = $pipes; if (!phutil_is_windows()) { // On Windows, there's no such thing as nonblocking interprocess I/O. // Just leave the sockets blocking and hope for the best. Some features // will not work. if ((!stream_set_blocking($stdout, false)) || (!stream_set_blocking($stderr, false)) || (!stream_set_blocking($stdin, false))) { $this->__destruct(); throw new Exception('Failed to set streams nonblocking.'); } } $this->tryToCloseStdin(); return false; } if (!$this->proc) { return true; } list($stdin, $stdout, $stderr) = $this->pipes; $close_stdin = false; if (isset($this->stdin) && strlen($this->stdin)) { $bytes = fwrite($stdin, $this->stdin); if ($bytes === false) { throw new Exception('Unable to write to stdin!'); } else if ($bytes) { $this->stdin = substr($this->stdin, $bytes); } } $this->tryToCloseStdin(); // Read status before reading pipes so that we can never miss data that // arrives between our last read and the process exiting. $status = $this->procGetStatus(); $this->stdout .= $this->readAndDiscard( $stdout, $this->getStdoutSizeLimit() - strlen($this->stdout), 'stdout'); $this->stderr .= $this->readAndDiscard( $stderr, $this->getStderrSizeLimit() - strlen($this->stderr), 'stderr'); if (!$status['running']) { $this->result = array( $status['exitcode'], $this->stdout, $this->stderr, ); $this->__destruct(); $this->endProfile(); return true; } $elapsed = (microtime(true) - $this->start); if ($this->timeout && ($elapsed >= $this->timeout)) { - if (defined('SIGKILL')) { - $signal = SIGKILL; - } else { - $signal = 9; - } - proc_terminate($this->proc, $signal); - $this->result = array( - self::TIMED_OUT_EXIT_CODE, - $this->stdout, - $this->stderr."\n". - "(This process was prematurely terminated by timeout.)"); - $this->__destruct(); - $this->endProfile(); + $this->killedByTimeout = true; + $this->resolveKill(); return true; } } /** * Close and free resources if necessary. * * @return void * @task internal */ public function __destruct() { foreach ($this->pipes as $pipe) { if (isset($pipe)) { @fclose($pipe); } } $this->pipes = array(null, null, null); if ($this->proc) { @proc_close($this->proc); $this->proc = null; } $this->stdin = null; } + /** * End the service call profiler for this command. * * @return void * @task internal */ private function endProfile() { if ($this->profilerCallID !== null) { $profiler = PhutilServiceProfiler::getInstance(); $profiler->endServiceCall( $this->profilerCallID, array( 'err' => $this->result ? idx($this->result, 0) : null, )); } } /** * Execute proc_get_status(), but avoid pitfalls. * * @return dict Process status. * @task internal */ private function procGetStatus() { // After the process exits, we only get one chance to read proc_get_status() // before it starts returning garbage. Make sure we don't throw away the // last good read. if ($this->procStatus) { if (!$this->procStatus['running']) { return $this->procStatus; } } $this->procStatus = proc_get_status($this->proc); return $this->procStatus; } /** * Try to close stdin, if we're done using it. This keeps us from hanging if * the process on the other end of the pipe is waiting for EOF. * * @return void * @task internal */ private function tryToCloseStdin() { if (!$this->closePipe) { // We've been told to keep the pipe open by a call to write(..., true). return; } if (strlen($this->stdin)) { // We still have bytes to write. return; } list($stdin) = $this->pipes; if (!$stdin) { // We've already closed stdin. return; } // There's nothing stopping us from closing stdin, so close it. @fclose($stdin); $this->pipes[0] = null; } public function getDefaultWait() { $wait = parent::getDefaultWait(); if ($this->timeout) { if (!$this->start) { $this->start = microtime(true); } $elapsed = (microtime(true) - $this->start); $wait = max(0, min($this->timeout - $elapsed, $wait)); } return $wait; } } diff --git a/src/future/exec/__tests__/ExecFutureTestCase.php b/src/future/exec/__tests__/ExecFutureTestCase.php index c9f1e39..96b8260 100644 --- a/src/future/exec/__tests__/ExecFutureTestCase.php +++ b/src/future/exec/__tests__/ExecFutureTestCase.php @@ -1,113 +1,113 @@ write('')->resolvex(); $this->assertEqual('', $stdout); } public function testKeepPipe() { // NOTE: This is mosty testing the semantics of $keep_pipe in write(). list($stdout) = id(new ExecFuture('cat')) ->write('', true) ->start() ->write('x', true) ->write('y', true) ->write('z', false) ->resolvex(); $this->assertEqual('xyz', $stdout); } public function testLargeBuffer() { // NOTE: This is mostly a coverage test to hit branches where we're still // flushing a buffer. $data = str_repeat('x', 1024 * 1024 * 4); list($stdout) = id(new ExecFuture('cat'))->write($data)->resolvex(); $this->assertEqual($data, $stdout); } public function testBufferLimit() { $data = str_repeat('x', 1024 * 1024); list($stdout) = id(new ExecFuture('cat')) ->setStdoutSizeLimit(1024) ->write($data) ->resolvex(); $this->assertEqual(substr($data, 0, 1024), $stdout); } public function testResolveTimeoutTestShouldRunLessThan1Sec() { // NOTE: This tests interactions between the resolve() timeout and the // ExecFuture timeout, which are similar but not identical. $future = id(new ExecFuture('sleep 32000'))->start(); $future->setTimeout(32000); // We expect this to return in 0.01s. $result = $future->resolve(0.01); $this->assertEqual($result, null); // We expect this to now force the time out / kill immediately. If we don't // do this, we'll hang when exiting until our subprocess exits (32000 // seconds!) $future->setTimeout(0.01); $future->resolve(); } public function testTimeoutTestShouldRunLessThan1Sec() { // NOTE: This is partly testing that we choose appropriate select wait // times; this test should run for significantly less than 1 second. - list($err) = id(new ExecFuture('sleep 32000'))->setTimeout(0.01)->resolve(); + $future = new ExecFuture('sleep 32000'); + list($err) = $future->setTimeout(0.01)->resolve(); - $this->assertEqual( - ExecFuture::TIMED_OUT_EXIT_CODE, - $err); + $this->assertEqual(true, $err > 0); + $this->assertEqual(true, $future->getWasKilledByTimeout()); } public function testMultipleTimeoutsTestShouldRunLessThan1Sec() { $futures = array(); for ($ii = 0; $ii < 4; $ii++) { $futures[] = id(new ExecFuture('sleep 32000'))->setTimeout(0.01); } foreach (Futures($futures) as $future) { list ($err) = $future->resolve(); - $this->assertEqual( - ExecFuture::TIMED_OUT_EXIT_CODE, - $err); + + $this->assertEqual(true, $err > 0); + $this->assertEqual(true, $future->getWasKilledByTimeout()); } } }