diff --git a/src/__phutil_library_init__.php b/src/__phutil_library_init__.php index 80adcb0..4b07952 100644 --- a/src/__phutil_library_init__.php +++ b/src/__phutil_library_init__.php @@ -1,46 +1,43 @@ setType('class') ->setName($class_name) ->selectAndLoadSymbols(); if (!$symbols) { throw new PhutilMissingSymbolException( $class_name, 'class or interface', "the class or interface '{$class_name}' is not defined in the library ". "map for any loaded phutil library."); } } catch (PhutilMissingSymbolException $ex) { // If there are other SPL autoloaders installed, we need to give them a // chance to load the class. Throw the exception if we're the last // autoloader; if not, swallow it and let them take a shot. $autoloaders = spl_autoload_functions(); $last = end($autoloaders); if ($last == '__phutil_autoload') { throw $ex; } } } spl_autoload_register('__phutil_autoload', $throw = true); phutil_register_library('phutil', __FILE__); diff --git a/src/aphront/storage/connection/AphrontDatabaseConnection.php b/src/aphront/storage/connection/AphrontDatabaseConnection.php index b8366ed..af171c8 100644 --- a/src/aphront/storage/connection/AphrontDatabaseConnection.php +++ b/src/aphront/storage/connection/AphrontDatabaseConnection.php @@ -1,194 +1,192 @@ getTransactionState(); $point = $state->getSavepointName(); $depth = $state->getDepth(); $new_transaction = ($depth == 0); if ($new_transaction) { $this->query('START TRANSACTION'); } else { $this->query('SAVEPOINT '.$point); } $state->increaseDepth(); return $this; } /** * Commit a transaction, or stage a savepoint for commit once the entire * transaction completes if inside a transaction stack. * * @return this * @task xaction */ public function saveTransaction() { $state = $this->getTransactionState(); $depth = $state->decreaseDepth(); if ($depth == 0) { $this->query('COMMIT'); } return $this; } /** * Rollback a transaction, or unstage the last savepoint if inside a * transaction stack. * * @return this */ public function killTransaction() { $state = $this->getTransactionState(); $depth = $state->decreaseDepth(); if ($depth == 0) { $this->query('ROLLBACK'); } else { $this->query('ROLLBACK TO SAVEPOINT '.$state->getSavepointName()); } return $this; } /** * Returns true if the connection is transactional. * * @return bool True if the connection is currently transactional. * @task xaction */ public function isInsideTransaction() { $state = $this->getTransactionState(); return ($state->getDepth() > 0); } /** * Get the current @{class:AphrontDatabaseTransactionState} object, or create * one if none exists. * * @return AphrontDatabaseTransactionState Current transaction state. * @task xaction */ protected function getTransactionState() { if (!$this->transactionState) { $this->transactionState = new AphrontDatabaseTransactionState(); } return $this->transactionState; } /** * @task xaction */ public function beginReadLocking() { $this->getTransactionState()->beginReadLocking(); return $this; } /** * @task xaction */ public function endReadLocking() { $this->getTransactionState()->endReadLocking(); return $this; } /** * @task xaction */ public function isReadLocking() { return $this->getTransactionState()->isReadLocking(); } /** * @task xaction */ public function beginWriteLocking() { $this->getTransactionState()->beginWriteLocking(); return $this; } /** * @task xaction */ public function endWriteLocking() { $this->getTransactionState()->endWriteLocking(); return $this; } /** * @task xaction */ public function isWriteLocking() { return $this->getTransactionState()->isWriteLocking(); } } diff --git a/src/aphront/storage/connection/AphrontDatabaseTransactionState.php b/src/aphront/storage/connection/AphrontDatabaseTransactionState.php index f952fb7..be05301 100644 --- a/src/aphront/storage/connection/AphrontDatabaseTransactionState.php +++ b/src/aphront/storage/connection/AphrontDatabaseTransactionState.php @@ -1,88 +1,86 @@ depth; } public function increaseDepth() { return ++$this->depth; } public function decreaseDepth() { if ($this->depth == 0) { throw new Exception( 'Too many calls to saveTransaction() or killTransaction()!'); } return --$this->depth; } public function getSavepointName() { return 'Aphront_Savepoint_'.$this->depth; } public function beginReadLocking() { $this->readLockLevel++; return $this; } public function endReadLocking() { if ($this->readLockLevel == 0) { throw new Exception('Too many calls to endReadLocking()!'); } $this->readLockLevel--; return $this; } public function isReadLocking() { return ($this->readLockLevel > 0); } public function beginWriteLocking() { $this->writeLockLevel++; return $this; } public function endWriteLocking() { if ($this->writeLockLevel == 0) { throw new Exception('Too many calls to endWriteLocking()!'); } $this->writeLockLevel--; return $this; } public function isWriteLocking() { return ($this->writeLockLevel > 0); } public function __destruct() { if ($this->depth) { throw new Exception( 'Process exited with an open transaction! The transaction will be '. 'implicitly rolled back. Calls to openTransaction() must always be '. 'paired with a call to saveTransaction() or killTransaction().'); } if ($this->readLockLevel) { throw new Exception( 'Process exited with an open read lock! Call to beginReadLocking() '. 'must always be paired with a call to endReadLocking().'); } if ($this->writeLockLevel) { throw new Exception( 'Process exited with an open write lock! Call to beginWriteLocking() '. 'must always be paired with a call to endWriteLocking().'); } } } diff --git a/src/aphront/storage/connection/AphrontIsolatedDatabaseConnection.php b/src/aphront/storage/connection/AphrontIsolatedDatabaseConnection.php index 74146f2..73d339c 100644 --- a/src/aphront/storage/connection/AphrontIsolatedDatabaseConnection.php +++ b/src/aphront/storage/connection/AphrontIsolatedDatabaseConnection.php @@ -1,122 +1,119 @@ configuration = $configuration; if (self::$nextInsertID === null) { // Generate test IDs into a distant ID space to reduce the risk of // collisions and make them distinctive. self::$nextInsertID = 55555000000 + mt_rand(0, 1000); } } public function close() { return; } public function escapeUTF8String($string) { return ''; } public function escapeBinaryString($string) { return ''; } public function escapeColumnName($name) { return ''; } public function escapeMultilineComment($comment) { return ''; } public function escapeStringForLikeClause($value) { return ''; } private function getConfiguration($key, $default = null) { return idx($this->configuration, $key, $default); } public function getInsertID() { return $this->insertID; } public function getAffectedRows() { return $this->affectedRows; } public function selectAllResults() { return $this->allResults; } public function executeRawQuery($raw_query) { // NOTE: "[\s<>K]*" allows any number of (properly escaped) comments to // appear prior to the allowed keyword, since this connection escapes // them as "" (above). $keywords = array( 'INSERT', 'UPDATE', 'DELETE', 'START', 'SAVEPOINT', 'COMMIT', 'ROLLBACK', ); $preg_keywords = array(); foreach ($keywords as $key => $word) { $preg_keywords[] = preg_quote($word, '/'); } $preg_keywords = implode('|', $preg_keywords); if (!preg_match('/^[\s<>K]*('.$preg_keywords.')\s*/i', $raw_query)) { throw new AphrontQueryNotSupportedException( "Database isolation currently only supports some queries. You are ". "trying to issue a query which does not begin with an allowed ". "keyword (".implode(', ', $keywords)."): '".$raw_query."'"); } $this->transcript[] = $raw_query; // NOTE: This method is intentionally simplified for now, since we're only // using it to stub out inserts/updates. In the future it will probably need // to grow more powerful. $this->allResults = array(); // NOTE: We jitter the insert IDs to keep tests honest; a test should cover // the relationship between objects, not their exact insertion order. This // guarantees that IDs are unique but makes it impossible to hard-code tests // against this specific implementation detail. self::$nextInsertID += mt_rand(1, 10); $this->insertID = self::$nextInsertID; $this->affectedRows = 1; } public function executeRawQueries(array $raw_queries) { $results = array(); foreach ($raw_queries as $id => $raw_query) { $results[$id] = array(); } return $results; } public function getQueryTranscript() { return $this->transcript; } } diff --git a/src/aphront/storage/connection/mysql/AphrontMySQLDatabaseConnection.php b/src/aphront/storage/connection/mysql/AphrontMySQLDatabaseConnection.php index c59b0c8..01ebc78 100644 --- a/src/aphront/storage/connection/mysql/AphrontMySQLDatabaseConnection.php +++ b/src/aphront/storage/connection/mysql/AphrontMySQLDatabaseConnection.php @@ -1,215 +1,212 @@ validateUTF8String($string); return $this->escapeBinaryString($string); } public function escapeBinaryString($string) { return mysql_real_escape_string($string, $this->requireConnection()); } public function getInsertID() { return mysql_insert_id($this->requireConnection()); } public function getAffectedRows() { return mysql_affected_rows($this->requireConnection()); } protected function closeConnection() { mysql_close($this->requireConnection()); } protected function connect() { if (!function_exists('mysql_connect')) { // We have to '@' the actual call since it can spew all sorts of silly // noise, but it will also silence fatals caused by not having MySQL // installed, which has bitten me on three separate occasions. Make sure // such failures are explicit and loud. throw new Exception( 'About to call mysql_connect(), but the PHP MySQL extension is not '. 'available!'); } $user = $this->getConfiguration('user'); $host = $this->getConfiguration('host'); $port = $this->getConfiguration('port'); if ($port) { $host .= ':'.$port; } $database = $this->getConfiguration('database'); $pass = $this->getConfiguration('pass'); if ($pass instanceof PhutilOpaqueEnvelope) { $pass = $pass->openEnvelope(); } $conn = @mysql_connect( $host, $user, $pass, $new_link = true, $flags = 0); if (!$conn) { $errno = mysql_errno(); $error = mysql_error(); throw new AphrontQueryConnectionException( "Attempt to connect to {$user}@{$host} failed with error ". "#{$errno}: {$error}.", $errno); } if ($database !== null) { $ret = @mysql_select_db($database, $conn); if (!$ret) { $this->throwQueryException($conn); } } mysql_set_charset('utf8', $conn); return $conn; } protected function rawQuery($raw_query) { return @mysql_query($raw_query, $this->requireConnection()); } /** * @phutil-external-symbol function mysql_multi_query * @phutil-external-symbol function mysql_fetch_result * @phutil-external-symbol function mysql_more_results * @phutil-external-symbol function mysql_next_result */ protected function rawQueries(array $raw_queries) { $conn = $this->requireConnection(); $results = array(); if (!function_exists('mysql_multi_query')) { foreach ($raw_queries as $key => $raw_query) { $results[$key] = $this->processResult($this->rawQuery($raw_query)); } return $results; } if (!mysql_multi_query(implode("\n;\n\n", $raw_queries), $conn)) { $ex = $this->processResult(false); return array_fill_keys(array_keys($raw_queries), $ex); } $processed_all = false; foreach ($raw_queries as $key => $raw_query) { $results[$key] = $this->processResult(@mysql_fetch_result($conn)); if (!mysql_more_results($conn)) { $processed_all = true; break; } mysql_next_result($conn); } if (!$processed_all) { throw new Exception('There are some results left in the result set.'); } return $results; } protected function freeResult($result) { mysql_free_result($result); } public function supportsParallelQueries() { // fb_parallel_query() doesn't support results with different columns. return false; } /** * @phutil-external-symbol function fb_parallel_query */ public function executeParallelQueries( array $queries, array $conns = array()) { assert_instances_of($conns, 'AphrontMySQLDatabaseConnection'); $map = array(); $is_write = false; foreach ($queries as $id => $query) { $is_write = $is_write || $this->checkWrite($query); $conn = idx($conns, $id, $this); $host = $conn->getConfiguration('host'); $port = 0; $match = null; if (preg_match('/(.+):(.+)/', $host, $match)) { list(, $host, $port) = $match; } $pass = $conn->getConfiguration('pass'); if ($pass instanceof PhutilOpaqueEnvelope) { $pass = $pass->openEnvelope(); } $map[$id] = array( 'sql' => $query, 'ip' => $host, 'port' => $port, 'username' => $conn->getConfiguration('user'), 'password' => $pass, 'db' => $conn->getConfiguration('database'), ); } $profiler = PhutilServiceProfiler::getInstance(); $call_id = $profiler->beginServiceCall( array( 'type' => 'multi-query', 'queries' => $queries, 'write' => $is_write, )); $map = fb_parallel_query($map); $profiler->endServiceCall($call_id, array()); $results = array(); $pos = 0; $err_pos = 0; foreach ($queries as $id => $query) { $errno = idx(idx($map, 'errno', array()), $err_pos); $err_pos++; if ($errno) { try { $this->throwQueryCodeException($errno, $map['error'][$id]); } catch (Exception $ex) { $results[$id] = $ex; } continue; } $results[$id] = $map['result'][$pos]; $pos++; } return $results; } protected function fetchAssoc($result) { return mysql_fetch_assoc($result); } protected function getErrorCode($connection) { return mysql_errno($connection); } protected function getErrorDescription($connection) { return mysql_error($connection); } } diff --git a/src/aphront/storage/connection/mysql/AphrontMySQLDatabaseConnectionBase.php b/src/aphront/storage/connection/mysql/AphrontMySQLDatabaseConnectionBase.php index 5c394b3..6b943ca 100644 --- a/src/aphront/storage/connection/mysql/AphrontMySQLDatabaseConnectionBase.php +++ b/src/aphront/storage/connection/mysql/AphrontMySQLDatabaseConnectionBase.php @@ -1,342 +1,339 @@ configuration = $configuration; } public function __clone() { $this->establishConnection(); } public function close() { if ($this->lastResult) { $this->lastResult = null; } if ($this->connection) { $this->closeConnection(); $this->connection = null; } } public function escapeColumnName($name) { return '`'.str_replace('`', '``', $name).'`'; } public function escapeMultilineComment($comment) { // These can either terminate a comment, confuse the hell out of the parser, // make MySQL execute the comment as a query, or, in the case of semicolon, // are quasi-dangerous because the semicolon could turn a broken query into // a working query plus an ignored query. static $map = array( '--' => '(DOUBLEDASH)', '*/' => '(STARSLASH)', '//' => '(SLASHSLASH)', '#' => '(HASH)', '!' => '(BANG)', ';' => '(SEMICOLON)', ); $comment = str_replace( array_keys($map), array_values($map), $comment); // For good measure, kill anything else that isn't a nice printable // character. $comment = preg_replace('/[^\x20-\x7F]+/', ' ', $comment); return '/* '.$comment.' */'; } public function escapeStringForLikeClause($value) { $value = addcslashes($value, '\%_'); $value = $this->escapeUTF8String($value); return $value; } protected function getConfiguration($key, $default = null) { return idx($this->configuration, $key, $default); } private function establishConnection() { $host = $this->getConfiguration('host'); $database = $this->getConfiguration('database'); $profiler = PhutilServiceProfiler::getInstance(); $call_id = $profiler->beginServiceCall( array( 'type' => 'connect', 'host' => $host, 'database' => $database, )); $retries = max(1, $this->getConfiguration('retries', 3)); while ($retries--) { try { $conn = $this->connect(); $profiler->endServiceCall($call_id, array()); break; } catch (AphrontQueryException $ex) { if ($retries && $ex->getCode() == 2003) { $class = get_class($ex); $message = $ex->getMessage(); phlog("Retrying ({$retries}) after {$class}: {$message}"); } else { $profiler->endServiceCall($call_id, array()); throw $ex; } } } $this->connection = $conn; } protected function requireConnection() { if (!$this->connection) { if ($this->connectionPool) { $this->connection = array_pop($this->connectionPool); } else { $this->establishConnection(); } } return $this->connection; } protected function beginAsyncConnection() { $connection = $this->requireConnection(); $this->connection = null; return $connection; } protected function endAsyncConnection($connection) { if ($this->connection) { $this->connectionPool[] = $this->connection; } $this->connection = $connection; } public function selectAllResults() { $result = array(); $res = $this->lastResult; if ($res == null) { throw new Exception('No query result to fetch from!'); } while (($row = $this->fetchAssoc($res))) { $result[] = $row; } return $result; } public function executeRawQuery($raw_query) { $this->lastResult = null; $retries = max(1, $this->getConfiguration('retries', 3)); while ($retries--) { try { $this->requireConnection(); $is_write = $this->checkWrite($raw_query); $profiler = PhutilServiceProfiler::getInstance(); $call_id = $profiler->beginServiceCall( array( 'type' => 'query', 'config' => $this->configuration, 'query' => $raw_query, 'write' => $is_write, )); $result = $this->rawQuery($raw_query); $profiler->endServiceCall($call_id, array()); if ($this->nextError) { $result = null; } if ($result) { $this->lastResult = $result; break; } $this->throwQueryException($this->connection); } catch (AphrontQueryConnectionLostException $ex) { if ($this->isInsideTransaction()) { // Zero out the transaction state to prevent a second exception // ("program exited with open transaction") from being thrown, since // we're about to throw a more relevant/useful one instead. $state = $this->getTransactionState(); while ($state->getDepth()) { $state->decreaseDepth(); } // We can't close the connection before this because // isInsideTransaction() and getTransactionState() depend on the // connection. $this->close(); throw $ex; } $this->close(); if (!$retries) { throw $ex; } } } } public function executeRawQueries(array $raw_queries) { if (!$raw_queries) { return array(); } $is_write = false; foreach ($raw_queries as $key => $raw_query) { $is_write = $is_write || $this->checkWrite($raw_query); $raw_queries[$key] = rtrim($raw_query, "\r\n\t ;"); } $profiler = PhutilServiceProfiler::getInstance(); $call_id = $profiler->beginServiceCall( array( 'type' => 'multi-query', 'config' => $this->configuration, 'queries' => $raw_queries, 'write' => $is_write, )); $results = $this->rawQueries($raw_queries); $profiler->endServiceCall($call_id, array()); return $results; } protected function processResult($result) { if (!$result) { try { $this->throwQueryException($this->requireConnection()); } catch (Exception $ex) { return $ex; } } else if (is_bool($result)) { return $this->getAffectedRows(); } $rows = array(); while (($row = $this->fetchAssoc($result))) { $rows[] = $row; } $this->freeResult($result); return $rows; } protected function checkWrite($raw_query) { // NOTE: The opening "(" allows queries in the form of: // // (SELECT ...) UNION (SELECT ...) $is_write = !preg_match('/^[(]*(SELECT|SHOW|EXPLAIN)\s/', $raw_query); if ($is_write) { AphrontWriteGuard::willWrite(); return true; } return false; } protected function throwQueryException($connection) { if ($this->nextError) { $errno = $this->nextError; $error = 'Simulated error.'; $this->nextError = null; } else { $errno = $this->getErrorCode($connection); $error = $this->getErrorDescription($connection); } $this->throwQueryCodeException($errno, $error); } protected function throwQueryCodeException($errno, $error) { $exmsg = "#{$errno}: {$error}"; switch ($errno) { case 2013: // Connection Dropped throw new AphrontQueryConnectionLostException($exmsg); case 2006: // Gone Away $more = "This error may occur if your MySQL 'wait_timeout' ". "or 'max_allowed_packet' configuration values are set too low."; throw new AphrontQueryConnectionLostException("{$exmsg}\n\n{$more}"); case 1213: // Deadlock case 1205: // Lock wait timeout exceeded throw new AphrontQueryDeadlockException($exmsg); case 1062: // Duplicate Key // NOTE: In some versions of MySQL we get a key name back here, but // older versions just give us a key index ("key 2") so it's not // portable to parse the key out of the error and attach it to the // exception. throw new AphrontQueryDuplicateKeyException($exmsg); case 1044: // Access denied to database case 1045: // Access denied (auth) case 1142: // Access denied to table case 1143: // Access denied to column throw new AphrontQueryAccessDeniedException($exmsg); case 1146: // No such table case 1049: // No such database case 1054: // Unknown column "..." in field list throw new AphrontQuerySchemaException($exmsg); default: // TODO: 1064 is syntax error, and quite terrible in production. throw new AphrontQueryException($exmsg); } } /** * Force the next query to fail with a simulated error. This should be used * ONLY for unit tests. */ public function simulateErrorOnNextQuery($error) { $this->nextError = $error; return $this; } /** * Check inserts for characters outside of the BMP. Even with the strictest * settings, MySQL will silently truncate data when it encounters these, which * can lead to data loss and security problems. */ protected function validateUTF8String($string) { if (phutil_is_utf8_with_only_bmp_characters($string)) { return; } throw new AphrontQueryCharacterSetException( pht( 'Attempting to construct a query containing characters outside of '. 'the Unicode Basic Multilingual Plane. MySQL will silently truncate '. 'this data if it is inserted into a `utf8` column. Use the `%%B` '. 'conversion to escape binary strings data.')); } } diff --git a/src/aphront/storage/connection/mysql/AphrontMySQLiDatabaseConnection.php b/src/aphront/storage/connection/mysql/AphrontMySQLiDatabaseConnection.php index e62258f..fd24145 100644 --- a/src/aphront/storage/connection/mysql/AphrontMySQLiDatabaseConnection.php +++ b/src/aphront/storage/connection/mysql/AphrontMySQLiDatabaseConnection.php @@ -1,162 +1,160 @@ validateUTF8String($string); return $this->escapeBinaryString($string); } public function escapeBinaryString($string) { return $this->requireConnection()->escape_string($string); } public function getInsertID() { return $this->requireConnection()->insert_id; } public function getAffectedRows() { return $this->requireConnection()->affected_rows; } protected function closeConnection() { $this->requireConnection()->close(); } protected function connect() { if (!class_exists('mysqli', false)) { throw new Exception( 'About to call new mysqli(), but the PHP MySQLi extension is not '. 'available!'); } $user = $this->getConfiguration('user'); $host = $this->getConfiguration('host'); $port = $this->getConfiguration('port'); $database = $this->getConfiguration('database'); $pass = $this->getConfiguration('pass'); if ($pass instanceof PhutilOpaqueEnvelope) { $pass = $pass->openEnvelope(); } // If the host is "localhost", the port is ignored and mysqli attempts to // connect over a socket. if ($port) { if ($host === 'localhost' || $host === null) { $host = '127.0.0.1'; } } $conn = @new mysqli( $host, $user, $pass, $database, $port); $errno = $conn->connect_errno; if ($errno) { $error = $conn->connect_error; throw new AphrontQueryConnectionException( "Attempt to connect to {$user}@{$host} failed with error ". "#{$errno}: {$error}.", $errno); } $conn->set_charset('utf8'); return $conn; } protected function rawQuery($raw_query) { return @$this->requireConnection()->query($raw_query); } protected function rawQueries(array $raw_queries) { $conn = $this->requireConnection(); $have_result = false; $results = array(); foreach ($raw_queries as $key => $raw_query) { if (!$have_result) { // End line in front of semicolon to allow single line comments at the // end of queries. $have_result = $conn->multi_query(implode("\n;\n\n", $raw_queries)); } else { $have_result = $conn->next_result(); } array_shift($raw_queries); $result = $conn->store_result(); if (!$result && !$this->getErrorCode($conn)) { $result = true; } $results[$key] = $this->processResult($result); } if ($conn->more_results()) { throw new Exception('There are some results left in the result set.'); } return $results; } protected function freeResult($result) { $result->free_result(); } protected function fetchAssoc($result) { return $result->fetch_assoc(); } protected function getErrorCode($connection) { return $connection->errno; } protected function getErrorDescription($connection) { return $connection->error; } public function supportsAsyncQueries() { return defined('MYSQLI_ASYNC'); } public function asyncQuery($raw_query) { $this->checkWrite($raw_query); $async = $this->beginAsyncConnection(); $async->query($raw_query, MYSQLI_ASYNC); return $async; } public static function resolveAsyncQueries(array $conns, array $asyncs) { assert_instances_of($conns, 'AphrontMySQLiDatabaseConnection'); assert_instances_of($asyncs, 'mysqli'); $read = $error = $reject = array(); foreach ($asyncs as $async) { $read[] = $error[] = $reject[] = $async; } if (!mysqli::poll($read, $error, $reject, 0)) { return array(); } $results = array(); foreach ($read as $async) { $key = array_search($async, $asyncs, $strict = true); $conn = $conns[$key]; $conn->endAsyncConnection($async); $results[$key] = $conn->processResult($async->reap_async_query()); } return $results; } } diff --git a/src/aphront/storage/exception/AphrontQueryAccessDeniedException.php b/src/aphront/storage/exception/AphrontQueryAccessDeniedException.php index b92d0a1..a26dcda 100644 --- a/src/aphront/storage/exception/AphrontQueryAccessDeniedException.php +++ b/src/aphront/storage/exception/AphrontQueryAccessDeniedException.php @@ -1,7 +1,4 @@ query = $query; } public function getQuery() { return $this->query; } } diff --git a/src/aphront/storage/exception/AphrontQueryRecoverableException.php b/src/aphront/storage/exception/AphrontQueryRecoverableException.php index ce05afd..713423e 100644 --- a/src/aphront/storage/exception/AphrontQueryRecoverableException.php +++ b/src/aphront/storage/exception/AphrontQueryRecoverableException.php @@ -1,9 +1,3 @@ dispose(); * * Normally, you do not need to manage guards yourself -- the Aphront stack * handles it for you. * * This class accepts a callback, which will be invoked when a write is * attempted. The callback should validate the presence of a CSRF token in * the request, or abort the request (e.g., by throwing an exception) if a * valid token isn't present. * * @param callable CSRF callback. * @return this * @task manage */ public function __construct($callback) { if (self::$instance) { throw new Exception( 'An AphrontWriteGuard already exists. Dispose of the previous guard '. 'before creating a new one.'); } if (self::$allowUnguardedWrites) { throw new Exception( 'An AphrontWriteGuard is being created in a context which permits '. 'unguarded writes unconditionally. This is not allowed and indicates '. 'a serious error.'); } if (!self::$abruptExitlistenerIsInstalled) { self::$abruptExitlistenerIsInstalled = true; $event_listener = new AphrontWriteGuardExitEventListener(); $event_listener->register(); } $this->callback = $callback; self::$instance = $this; } /** * Dispose of the active write guard. You must call this method when you are * done with a write guard. You do not normally need to call this yourself. * * @return void * @task manage */ public function dispose() { if (!self::$instance) { throw new Exception( 'Attempting to dispose of write guard, but no write guard is active!'); } if ($this->allowDepth > 0) { throw new Exception( 'Imbalanced AphrontWriteGuard: more beginUnguardedWrites() calls than '. 'endUnguardedWrites() calls.'); } self::$instance = null; } /** * This is used for clearing the write guard without performing any checks. * This is used in conjunction with phutil_exit for abrupt exits. * * @return void */ public function disposeAbruptly() { self::$instance = null; } /** * Determine if there is an active write guard. * * @return bool * @task manage */ public static function isGuardActive() { return (bool)self::$instance; } /** * Return on instance of AphrontWriteGuard if it's active, or null * * @return AphrontWriteGuard|null */ public static function getInstance() { return self::$instance; } /* -( Protecting Writes )-------------------------------------------------- */ /** * Declare intention to perform a write, validating that writes are allowed. * You should call this method before executing a write whenever you implement * a new storage engine where information can be permanently kept. * * Writes are permitted if: * * - The request has valid CSRF tokens. * - Unguarded writes have been temporarily enabled by a call to * @{method:beginUnguardedWrites}. * - All write guarding has been disabled with * @{method:allowDangerousUnguardedWrites}. * * If none of these conditions are true, this method will throw and prevent * the write. * * @return void * @task protect */ public static function willWrite() { if (!self::$instance) { if (!self::$allowUnguardedWrites) { throw new Exception( 'Unguarded write! There must be an active AphrontWriteGuard to '. 'perform writes.'); } else { // Unguarded writes are being allowed unconditionally. return; } } $instance = self::$instance; if ($instance->allowDepth == 0) { call_user_func($instance->callback); } } /* -( Disabling Write Protection )----------------------------------------- */ /** * Enter a scope which permits unguarded writes. This works like * @{method:beginUnguardedWrites} but returns an object which will end * the unguarded write scope when its __destruct() method is called. This * is useful to more easily handle exceptions correctly in unguarded write * blocks: * * // Restores the guard even if do_logging() throws. * function unguarded_scope() { * $unguarded = AphrontWriteGuard::beginScopedUnguardedWrites(); * do_logging(); * } * * @return AphrontScopedUnguardedWriteCapability Object which ends unguarded * writes when it leaves scope. * @task disable */ public static function beginScopedUnguardedWrites() { self::beginUnguardedWrites(); return new AphrontScopedUnguardedWriteCapability(); } /** * Begin a block which permits unguarded writes. You should use this very * sparingly, and only for things like logging where CSRF is not a concern. * * You must pair every call to @{method:beginUnguardedWrites} with a call to * @{method:endUnguardedWrites}: * * AphrontWriteGuard::beginUnguardedWrites(); * do_logging(); * AphrontWriteGuard::endUnguardedWrites(); * * @return void * @task disable */ public static function beginUnguardedWrites() { if (!self::$instance) { return; } self::$instance->allowDepth++; } /** * Declare that you have finished performing unguarded writes. You must * call this exactly once for each call to @{method:beginUnguardedWrites}. * * @return void * @task disable */ public static function endUnguardedWrites() { if (!self::$instance) { return; } if (self::$instance->allowDepth <= 0) { throw new Exception( 'Imbalanced AphrontWriteGuard: more endUnguardedWrites() calls than '. 'beginUnguardedWrites() calls.'); } self::$instance->allowDepth--; } /** * Allow execution of unguarded writes. This is ONLY appropriate for use in * script contexts or other contexts where you are guaranteed to never be * vulnerable to CSRF concerns. Calling this method is EXTREMELY DANGEROUS * if you do not understand the consequences. * * If you need to perform unguarded writes on an otherwise guarded workflow * which is vulnerable to CSRF, use @{method:beginUnguardedWrites}. * * @return void * @task disable */ public static function allowDangerousUnguardedWrites($allow) { if (self::$instance) { throw new Exception( 'You can not unconditionally disable AphrontWriteGuard by calling '. 'allowDangerousUnguardedWrites() while a write guard is active. Use '. 'beginUnguardedWrites() to temporarily allow unguarded writes.'); } self::$allowUnguardedWrites = true; } /* -( Internals )---------------------------------------------------------- */ /** * When the object is destroyed, make sure @{method:dispose} was called. * * @task internal */ public function __destruct() { if (isset(self::$instance)) { throw new Exception( 'AphrontWriteGuard was not properly disposed of! Call dispose() on '. 'every AphrontWriteGuard object you instantiate or use phutil_exit() '. 'to exit abruptly while debugging.'); } } + } diff --git a/src/aphront/writeguard/event/AphrontWriteGuardExitEventListener.php b/src/aphront/writeguard/event/AphrontWriteGuardExitEventListener.php index 655a530..8c8da4d 100644 --- a/src/aphront/writeguard/event/AphrontWriteGuardExitEventListener.php +++ b/src/aphront/writeguard/event/AphrontWriteGuardExitEventListener.php @@ -1,21 +1,20 @@ listen(PhutilEventType::TYPE_WILLEXITABRUPTLY); return $this; } public function handleEvent(PhutilEvent $event) { if (AphrontWriteGuard::isGuardActive()) { AphrontWriteGuard::getInstance()->disposeAbruptly(); } } + } diff --git a/src/cache/PhutilKeyValueCache.php b/src/cache/PhutilKeyValueCache.php index 20e95de..b5e0415 100644 --- a/src/cache/PhutilKeyValueCache.php +++ b/src/cache/PhutilKeyValueCache.php @@ -1,122 +1,121 @@ getKeys(array($key)); return idx($map, $key, $default); } /** * Set a single key in cache. See @{method:setKeys} to set multiple keys at * once. * * See @{method:setKeys} for a description of TTLs. * * @param string Key to set. * @param wild Value to set. * @param int|null Optional TTL. * @return this * @task kvimpl */ final public function setKey($key, $value, $ttl = null) { return $this->setKeys(array($key => $value), $ttl); } /** * Delete a key from the cache. See @{method:deleteKeys} to delete multiple * keys at once. * * @param string Key to delete. * @return this * @task kvimpl */ final public function deleteKey($key) { return $this->deleteKeys(array($key)); } /** * Get data from the cache. * * @param list List of cache keys to retrieve. * @return dict Dictionary of keys that were found in the * cache. Keys not present in the cache are * omitted, so you can detect a cache miss. * @task kvimpl */ abstract public function getKeys(array $keys); /** * Put data into the key-value cache. * * With a TTL ("time to live"), the cache will automatically delete the key * after a specified number of seconds. By default, there is no expiration * policy and data will persist in cache indefinitely. * * @param dict Map of cache keys to values. * @param int|null TTL for cache keys, in seconds. * @return this * @task kvimpl */ abstract public function setKeys(array $keys, $ttl = null); /** * Delete a list of keys from the cache. * * @param list List of keys to delete. * @return this * @task kvimpl */ abstract public function deleteKeys(array $keys); /** * Completely destroy all data in the cache. * * @return this * @task kvimpl */ abstract public function destroyCache(); } diff --git a/src/cache/PhutilKeyValueCacheAPC.php b/src/cache/PhutilKeyValueCacheAPC.php index 8ef208c..31f0c2d 100644 --- a/src/cache/PhutilKeyValueCacheAPC.php +++ b/src/cache/PhutilKeyValueCacheAPC.php @@ -1,58 +1,56 @@ $value) { apc_store($key, $value, $ttl); } return $this; } public function deleteKeys(array $keys) { foreach ($keys as $key) { apc_delete($key); } return $this; } public function destroyCache() { apc_clear_cache('user'); return $this; } } diff --git a/src/cache/PhutilKeyValueCacheDirectory.php b/src/cache/PhutilKeyValueCacheDirectory.php index d92f1ef..b1cab12 100644 --- a/src/cache/PhutilKeyValueCacheDirectory.php +++ b/src/cache/PhutilKeyValueCacheDirectory.php @@ -1,242 +1,241 @@ validateKeys($keys); try { $this->lockCache(); } catch (PhutilLockException $ex) { return array(); } $now = time(); $results = array(); foreach ($keys as $key) { $key_file = $this->getKeyFile($key); try { $data = Filesystem::readFile($key_file); } catch (FilesystemException $ex) { continue; } $data = unserialize($data); if (!$data) { continue; } if (isset($data['ttl']) && $data['ttl'] < $now) { continue; } $results[$key] = $data['value']; } $this->unlockCache(); return $results; } public function setKeys(array $keys, $ttl = null) { $this->validateKeys(array_keys($keys)); $this->lockCache(15); if ($ttl) { $ttl_epoch = time() + $ttl; } else { $ttl_epoch = null; } foreach ($keys as $key => $value) { $dict = array( 'value' => $value, ); if ($ttl_epoch) { $dict['ttl'] = $ttl_epoch; } try { $key_file = $this->getKeyFile($key); $key_dir = dirname($key_file); if (!Filesystem::pathExists($key_dir)) { Filesystem::createDirectory( $key_dir, $mask = 0777, $recursive = true); } $new_file = $key_file.'.new'; Filesystem::writeFile($new_file, serialize($dict)); Filesystem::rename($new_file, $key_file); } catch (FilesystemException $ex) { phlog($ex); } } $this->unlockCache(); return $this; } public function deleteKeys(array $keys) { $this->validateKeys($keys); $this->lockCache(15); foreach ($keys as $key) { $path = $this->getKeyFile($key); Filesystem::remove($path); // If removing this key leaves the directory empty, clean it up. Then // clean up any empty parent directories. $path = dirname($path); do { if (!Filesystem::isDescendant($path, $this->getCacheDirectory())) { break; } if (Filesystem::listDirectory($path, true)) { break; } Filesystem::remove($path); $path = dirname($path); } while (true); } $this->unlockCache(); return $this; } public function destroyCache() { Filesystem::remove($this->getCacheDirectory()); return $this; } /* -( Cache Storage )------------------------------------------------------ */ /** * @task storage */ public function setCacheDirectory($directory) { $this->cacheDirectory = rtrim($directory, '/').'/'; return $this; } /** * @task storage */ private function getCacheDirectory() { if (!$this->cacheDirectory) { throw new Exception( 'Call setCacheDirectory() before using a directory cache!'); } return $this->cacheDirectory; } /** * @task storage */ private function getKeyFile($key) { // Colon is a drive separator on Windows. $key = str_replace(':', '_', $key); // NOTE: We add ".cache" to each file so we don't get a collision if you // set the keys "a" and "a/b". Without ".cache", the file "a" would need // to be both a file and a directory. return $this->getCacheDirectory().$key.'.cache'; } /** * @task storage */ private function validateKeys(array $keys) { foreach ($keys as $key) { // NOTE: Use of "." is reserved for ".lock", "key.new" and "key.cache". // Use of "_" is reserved for converting ":". if (!preg_match('@^[a-zA-Z0-9/:-]+$@', $key)) { throw new Exception( "Invalid key '{$key}': directory caches may only contain letters, ". "numbers, hyphen, colon and slash."); } } } /** * @task storage */ private function lockCache($wait = 0) { if ($this->lock) { throw new Exception('Trying to lockCache() with a lock!'); } if (!Filesystem::pathExists($this->getCacheDirectory())) { Filesystem::createDirectory($this->getCacheDirectory(), 0777, true); } $lock = PhutilFileLock::newForPath($this->getCacheDirectory().'.lock'); $lock->lock($wait); $this->lock = $lock; } /** * @task storage */ private function unlockCache() { if (!$this->lock) { throw new Exception( 'Call lockCache() before unlockCache()!'); } $this->lock->unlock(); $this->lock = null; } } diff --git a/src/cache/PhutilKeyValueCacheInRequest.php b/src/cache/PhutilKeyValueCacheInRequest.php index 9dd7a99..fac1717 100644 --- a/src/cache/PhutilKeyValueCacheInRequest.php +++ b/src/cache/PhutilKeyValueCacheInRequest.php @@ -1,120 +1,118 @@ limit = $limit; return $this; } /* -( Key-Value Cache Implementation )------------------------------------- */ public function isAvailable() { return true; } public function getKeys(array $keys) { $results = array(); $now = time(); foreach ($keys as $key) { if (!isset($this->cache[$key]) && !array_key_exists($key, $this->cache)) { continue; } if (isset($this->ttl[$key]) && ($this->ttl[$key] < $now)) { continue; } $results[$key] = $this->cache[$key]; } return $results; } public function setKeys(array $keys, $ttl = null) { foreach ($keys as $key => $value) { $this->cache[$key] = $value; } if ($ttl) { $end = time() + $ttl; foreach ($keys as $key => $value) { $this->ttl[$key] = $end; } } else { foreach ($keys as $key => $value) { unset($this->ttl[$key]); } } if ($this->limit) { $count = count($this->cache); if ($count > $this->limit) { $remove = array(); foreach ($this->cache as $key => $value) { $remove[] = $key; $count--; if ($count <= $this->limit) { break; } } $this->deleteKeys($remove); } } return $this; } public function deleteKeys(array $keys) { foreach ($keys as $key) { unset($this->cache[$key]); unset($this->ttl[$key]); } return $this; } public function getAllKeys() { return $this->cache; } public function destroyCache() { $this->cache = array(); $this->ttl = array(); return $this; } } diff --git a/src/cache/PhutilKeyValueCacheMemcache.php b/src/cache/PhutilKeyValueCacheMemcache.php index 3ef8e64..e437fe9 100644 --- a/src/cache/PhutilKeyValueCacheMemcache.php +++ b/src/cache/PhutilKeyValueCacheMemcache.php @@ -1,151 +1,150 @@ bucketKeys($keys); $results = array(); foreach ($buckets as $bucket => $bucket_keys) { $conn = $this->getConnection($bucket); $result = $conn->get($bucket_keys); if (!$result) { // If the call fails, treat it as a miss on all keys. $result = array(); } $results += $result; } return $results; } public function setKeys(array $keys, $ttl = null) { $buckets = $this->bucketKeys(array_keys($keys)); // Memcache interprets TTLs as: // // - Seconds from now, for values from 1 to 2592000 (30 days). // - Epoch timestamp, for values larger than 2592000. // // We support only relative TTLs, so convert excessively large relative // TTLs into epoch TTLs. if ($ttl > 2592000) { $effective_ttl = time() + $ttl; } else { $effective_ttl = $ttl; } foreach ($buckets as $bucket => $bucket_keys) { $conn = $this->getConnection($bucket); foreach ($bucket_keys as $key) { $conn->set($key, $keys[$key], 0, $effective_ttl); } } return $this; } public function deleteKeys(array $keys) { $buckets = $this->bucketKeys($keys); foreach ($buckets as $bucket => $bucket_keys) { $conn = $this->getConnection($bucket); foreach ($bucket_keys as $key) { $conn->delete($key); } } return $this; } public function destroyCache() { foreach ($this->servers as $key => $spec) { $this->getConnection($key)->flush(); } return $this; } /* -( Managing Memcache )-------------------------------------------------- */ /** * Set available memcache servers. For example: * * $cache->setServers( * array( * array( * 'host' => '10.0.0.20', * 'port' => 11211, * ), * array( * 'host' => '10.0.0.21', * 'port' => 11211, * ), * )); * * @param list List of server specifications. * @return this * @task memcache */ public function setServers(array $servers) { $this->servers = array_values($servers); return $this; } private function bucketKeys(array $keys) { $buckets = array(); $n = count($this->servers); if (!$n) { throw new Exception('Call setServers() before using Memcache!'); } foreach ($keys as $key) { $bucket = (int)((crc32($key) & 0x7FFFFFFF) % $n); $buckets[$bucket][] = $key; } return $buckets; } /** * @phutil-external-symbol function memcache_pconnect */ private function getConnection($server) { if (empty($this->connections[$server])) { $spec = $this->servers[$server]; $host = $spec['host']; $port = $spec['port']; $conn = memcache_pconnect($host, $spec['port']); if (!$conn) { throw new Exception( "Unable to connect to memcache server ({$host}@{$port})!"); } $this->connections[$server] = $conn; } return $this->connections[$server]; } } diff --git a/src/cache/PhutilKeyValueCacheOnDisk.php b/src/cache/PhutilKeyValueCacheOnDisk.php index cf44066..4b2197f 100644 --- a/src/cache/PhutilKeyValueCacheOnDisk.php +++ b/src/cache/PhutilKeyValueCacheOnDisk.php @@ -1,204 +1,203 @@ wait = $wait; return $this; } public function getKeys(array $keys) { $now = time(); $results = array(); $reloaded = false; foreach ($keys as $key) { // Try to read the value from cache. If we miss, load (or reload) the // cache. while (true) { if (isset($this->cache[$key])) { $val = $this->cache[$key]; if (empty($val['ttl']) || $val['ttl'] >= $now) { $results[$key] = $val['val']; break; } } if ($reloaded) { break; } $this->loadCache($hold_lock = false); $reloaded = true; } } return $results; } public function setKeys(array $keys, $ttl = null) { if ($ttl) { $ttl_epoch = time() + $ttl; } else { $ttl_epoch = null; } $dicts = array(); foreach ($keys as $key => $value) { $dict = array( 'val' => $value, ); if ($ttl_epoch) { $dict['ttl'] = $ttl_epoch; } $dicts[$key] = $dict; } $this->loadCache($hold_lock = true); foreach ($dicts as $key => $dict) { $this->cache[$key] = $dict; } $this->saveCache(); return $this; } public function deleteKeys(array $keys) { $this->loadCache($hold_lock = true); foreach ($keys as $key) { unset($this->cache[$key]); } $this->saveCache(); return $this; } public function destroyCache() { Filesystem::remove($this->getCacheFile()); return $this; } /* -( Cache Storage )------------------------------------------------------ */ /** * @task storage */ public function setCacheFile($file) { $this->cacheFile = $file; return $this; } /** * @task storage */ private function loadCache($hold_lock) { if ($this->lock) { throw new Exception('Trying to loadCache() with a lock!'); } $lock = PhutilFileLock::newForPath($this->getCacheFile().'.lock'); try { $lock->lock($this->wait); } catch (PhutilLockException $ex) { if ($hold_lock) { throw $ex; } else { $this->cache = array(); return; } } try { $this->cache = array(); if (Filesystem::pathExists($this->getCacheFile())) { $cache = unserialize(Filesystem::readFile($this->getCacheFile())); if ($cache) { $this->cache = $cache; } } } catch (Exception $ex) { $lock->unlock(); throw $ex; } if ($hold_lock) { $this->lock = $lock; } else { $lock->unlock(); } } /** * @task storage */ private function saveCache() { if (!$this->lock) { throw new Exception( 'Call loadCache($hold_lock=true) before saveCache()!'); } // We're holding a lock so we're safe to do a write to a well-known file. // Write to the same directory as the cache so the rename won't imply a // copy across volumes. $new = $this->getCacheFile().'.new'; Filesystem::writeFile($new, serialize($this->cache)); Filesystem::rename($new, $this->getCacheFile()); $this->lock->unlock(); $this->lock = null; } /** * @task storage */ private function getCacheFile() { if (!$this->cacheFile) { throw new Exception('Call setCacheFile() before using a disk cache!'); } return $this->cacheFile; } } diff --git a/src/cache/PhutilKeyValueCacheStack.php b/src/cache/PhutilKeyValueCacheStack.php index d84a30c..c8da0f9 100644 --- a/src/cache/PhutilKeyValueCacheStack.php +++ b/src/cache/PhutilKeyValueCacheStack.php @@ -1,132 +1,131 @@ Ordered list of key-value caches. * @return this * @task config */ public function setCaches(array $caches) { assert_instances_of($caches, 'PhutilKeyValueCache'); $this->cachesForward = $caches; $this->cachesBackward = array_reverse($caches); return $this; } /** * Set the readthrough TTL for the next cache operation. The TTL applies to * any keys set by the next call to @{method:getKey} or @{method:getKeys}, * and is reset after the call finishes. * * // If this causes any caches to fill, they'll fill with a 15-second TTL. * $stack->setNextTTL(15)->getKey('porcupine'); * * // TTL does not persist; this will use no TTL. * $stack->getKey('hedgehog'); * * @param int TTL in seconds. * @return this * * @task config */ public function setNextTTL($ttl) { $this->ttl = $ttl; return $this; } /* -( Key-Value Cache Implementation )------------------------------------- */ public function getKeys(array $keys) { $remaining = array_fuse($keys); $results = array(); $missed = array(); try { foreach ($this->cachesForward as $cache) { $result = $cache->getKeys($remaining); $remaining = array_diff_key($remaining, $result); $results += $result; if (!$remaining) { while ($cache = array_pop($missed)) { // TODO: This sets too many results in the closer caches, although // it probably isn't a big deal in most cases; normally we're just // filling the request cache. $cache->setKeys($result, $this->nextTTL); } break; } $missed[] = $cache; } $this->nextTTL = null; } catch (Exception $ex) { $this->nextTTL = null; throw $ex; } return $results; } public function setKeys(array $keys, $ttl = null) { foreach ($this->cachesBackward as $cache) { $cache->setKeys($keys, $ttl); } } public function deleteKeys(array $keys) { foreach ($this->cachesBackward as $cache) { $cache->deleteKeys($keys); } } public function destroyCache() { foreach ($this->cachesBackward as $cache) { $cache->destroyCache(); } } } diff --git a/src/channel/PhutilChannel.php b/src/channel/PhutilChannel.php index af8cb22..9f1de2b 100644 --- a/src/channel/PhutilChannel.php +++ b/src/channel/PhutilChannel.php @@ -1,425 +1,423 @@ obuf = new PhutilRope(); } /* -( Reading and Writing )------------------------------------------------ */ /** * Read from the channel. A channel defines the format of data that is read * from it, so this method may return strings, objects, or anything else. * * The default implementation returns bytes. * * @return wild Data from the channel, normally bytes. * * @task io */ public function read() { $result = $this->ibuf; $this->ibuf = ''; return $result; } /** * Write to the channel. A channel defines what data format it accepts, * so this method may take strings, objects, or anything else. * * The default implementation accepts bytes. * * @param wild Data to write to the channel, normally bytes. * @return this * * @task io */ public function write($bytes) { if (!is_scalar($bytes)) { throw new Exception('PhutilChannel->write() may only write strings!'); } $this->obuf->append($bytes); return $this; } /* -( Waiting for Activity )----------------------------------------------- */ /** * Wait for any activity on a list of channels. Convenience wrapper around * @{method:waitForActivity}. * * @param list A list of channels to wait for. * @param dict Options, see above. * @return void * * @task wait */ public static function waitForAny(array $channels, array $options = array()) { return self::waitForActivity($channels, $channels, $options); } /** * Wait (using select()) for channels to become ready for reads or writes. * This method blocks until some channel is ready to be updated. * * It does not provide a way to determine which channels are ready to be * updated. The expectation is that you'll just update every channel. This * might change eventually. * * Available options are: * * - 'read' (list) Additional streams to select for read. * - 'write' (list) Additional streams to select for write. * - 'except' (list) Additional streams to select for except. * - 'timeout' (float) Select timeout, defaults to 1. * * NOTE: Extra streams must be //streams//, not //sockets//, because this * method uses `stream_select()`, not `socket_select()`. * * @param list List of channels to wait for reads on. * @param list List of channels to wait for writes on. * @return void * * @task wait */ public static function waitForActivity( array $reads, array $writes, array $options = array()) { assert_instances_of($reads, 'PhutilChannel'); assert_instances_of($writes, 'PhutilChannel'); $read = idx($options, 'read', array()); $write = idx($options, 'write', array()); $except = idx($options, 'except', array()); $wait = idx($options, 'timeout', 1); // TODO: It would be nice to just be able to categorically reject these as // unselectable. foreach (array($reads, $writes) as $channels) { foreach ($channels as $channel) { $r_sockets = $channel->getReadSockets(); $w_sockets = $channel->getWriteSockets(); // If any channel has no read sockets and no write sockets, assume it // isn't selectable and return immediately (effectively degrading to a // busy wait). if (!$r_sockets && !$w_sockets) { return false; } } } foreach ($reads as $channel) { // If any of the read channels have data in read buffers, return // immediately. If we don't, we risk running select() on a bunch of // sockets which won't become readable because the data the application // expects is already in a read buffer. if (!$channel->isReadBufferEmpty()) { return; } $r_sockets = $channel->getReadSockets(); foreach ($r_sockets as $socket) { $read[] = $socket; $except[] = $socket; } } foreach ($writes as $channel) { if ($channel->isWriteBufferEmpty()) { // If the channel's write buffer is empty, don't select the write // sockets, since they're writable immediately. $w_sockets = array(); } else { $w_sockets = $channel->getWriteSockets(); } foreach ($w_sockets as $socket) { $write[] = $socket; $except[] = $socket; } } if (!$read && !$write && !$except) { return false; } $wait_sec = (int)$wait; $wait_usec = 1000000 * ($wait - $wait_sec); @stream_select($read, $write, $except, $wait_sec, $wait_usec); } /* -( Responding to Activity )--------------------------------------------- */ /** * Updates the channel, filling input buffers and flushing output buffers. * Returns false if the channel has closed. * * @return bool True if the channel is still open. * * @task update */ public function update() { $maximum_read = PHP_INT_MAX; if ($this->readBufferSize !== null) { $maximum_read = ($this->readBufferSize - strlen($this->ibuf)); } while ($maximum_read > 0) { $in = $this->readBytes($maximum_read); if (!strlen($in)) { // Reading is blocked for now. break; } $this->ibuf .= $in; $maximum_read -= strlen($in); } while ($this->obuf->getByteLength()) { $len = $this->writeBytes($this->obuf->getAnyPrefix()); if (!$len) { // Writing is blocked for now. break; } $this->obuf->removeBytesFromHead($len); } return $this->isOpen(); } /* -( Channel Implementation )--------------------------------------------- */ /** * Set a channel name. This is primarily intended to allow you to debug * channel code more easily, by naming channels something meaningful. * * @param string Channel name. * @return this * * @task impl */ public function setName($name) { $this->name = $name; return $this; } /** * Get the channel name, as set by @{method:setName}. * * @return string Name of the channel. * * @task impl */ public function getName() { return coalesce($this->name, get_class($this)); } /** * Test if the channel is open: active, can be read from and written to, etc. * * @return bool True if the channel is open. * * @task impl */ abstract public function isOpen(); /** * Close the channel for writing. * * @return void * @task impl */ abstract public function closeWriteChannel(); /** * Test if the channel is open for reading. * * @return bool True if the channel is open for reading. * * @task impl */ public function isOpenForReading() { return $this->isOpen(); } /** * Test if the channel is open for writing. * * @return bool True if the channel is open for writing. * * @task impl */ public function isOpenForWriting() { return $this->isOpen(); } /** * Read from the channel's underlying I/O. * * @param int Maximum number of bytes to read. * @return string Bytes, if available. * * @task impl */ abstract protected function readBytes($length); /** * Write to the channel's underlying I/O. * * @param string Bytes to write. * @return int Number of bytes written. * * @task impl */ abstract protected function writeBytes($bytes); /** * Get sockets to select for reading. * * @return list Read sockets. * * @task impl */ protected function getReadSockets() { return array(); } /** * Get sockets to select for writing. * * @return list Write sockets. * * @task impl */ protected function getWriteSockets() { return array(); } /** * Set the maximum size of the channel's read buffer. Reads will artificially * block once the buffer reaches this size until the in-process buffer is * consumed. * * @param int|null Maximum read buffer size, or `null` for a limitless buffer. * @return this * @task impl */ public function setReadBufferSize($size) { $this->readBufferSize = $size; return $this; } /** * Test state of the read buffer. * * @return bool True if the read buffer is empty. * * @task impl */ public function isReadBufferEmpty() { return (strlen($this->ibuf) == 0); } /** * Test state of the write buffer. * * @return bool True if the write buffer is empty. * * @task impl */ public function isWriteBufferEmpty() { return !$this->getWriteBufferSize(); } /** * Get the number of bytes we're currently waiting to write. * * @return int Number of waiting bytes. * * @task impl */ public function getWriteBufferSize() { return $this->obuf->getByteLength(); } /** * Wait for any buffered writes to complete. This is a blocking call. When * the call returns, the write buffer will be empty. * * @task impl */ public function flush() { while (!$this->isWriteBufferEmpty()) { self::waitForAny(array($this)); if (!$this->update()) { throw new Exception('Channel closed while flushing output!'); } } return $this; } } diff --git a/src/channel/PhutilChannelChannel.php b/src/channel/PhutilChannelChannel.php index e86ad1b..7e59cb0 100644 --- a/src/channel/PhutilChannelChannel.php +++ b/src/channel/PhutilChannelChannel.php @@ -1,109 +1,107 @@ channel = $channel; $this->didConstruct(); } protected function didConstruct() { // Hook for subclasses. } public function read() { return $this->channel->read(); } public function write($message) { $this->channel->write($message); return $this; } public function update() { return $this->channel->update(); } public function isOpen() { return $this->channel->isOpen(); } public function closeWriteChannel() { return $this->channel->closeWriteChannel(); } public function isOpenForReading() { return $this->channel->isOpenForReading(); } public function isOpenForWriting() { return $this->channel->isOpenForWriting(); } protected function readBytes($length) { $this->throwOnRawByteOperations(); } protected function writeBytes($bytes) { $this->throwOnRawByteOperations(); } protected function getReadSockets() { return $this->channel->getReadSockets(); } protected function getWriteSockets() { return $this->channel->getWriteSockets(); } public function setReadBufferSize($size) { $this->channel->setReadBufferSize($size); return $this; } public function isReadBufferEmpty() { return $this->channel->isReadBufferEmpty(); } public function isWriteBufferEmpty() { return $this->channel->isWriteBufferEmpty(); } public function getWriteBufferSize() { return $this->channel->getWriteBufferSize(); } public function flush() { $this->channel->flush(); return $this; } protected function getUnderlyingChannel() { return $this->channel; } private function throwOnRawByteOperations() { // NOTE: You should only be able to end up here if you subclass this class // and implement your subclass incorrectly, since the byte methods are // protected. throw new Exception( 'Do not call readBytes() or writeBytes() directly on a '. 'PhutilChannelChannel. Instead, call read() or write().'); } } diff --git a/src/channel/PhutilExecChannel.php b/src/channel/PhutilExecChannel.php index dc376d3..4583d02 100644 --- a/src/channel/PhutilExecChannel.php +++ b/src/channel/PhutilExecChannel.php @@ -1,175 +1,173 @@ write("GET / HTTP/1.0\n\n"); * while (true) { * echo $channel->read(); * * PhutilChannel::waitForAny(array($channel)); * if (!$channel->update()) { * // Break out of the loop when the channel closes. * break; * } * } * * This script makes an HTTP request to "example.com". This example is heavily * contrived. In most cases, @{class:ExecFuture} and other futures constructs * offer a much easier way to solve problems which involve system commands, and * @{class:HTTPFuture} and other HTTP constructs offer a much easier way to * solve problems which involve HTTP. * * @{class:PhutilExecChannel} is generally useful only when a program acts like * a server but performs I/O on stdin/stdout, and you need to act like a client * or interact with the program at the same time as you manage traditional * socket connections. Examples are Mercurial operating in "cmdserve" mode, git * operating in "receive-pack" mode, etc. It is unlikely that any reasonble * use of this class is concise enough to make a short example out of, so you * get a contrived one instead. * * See also @{class:PhutilSocketChannel}, for a similar channel that uses * sockets for I/O. * * Since @{class:ExecFuture} already supports buffered I/O and socket selection, * the implementation of this class is fairly straightforward. * * @task construct Construction - * - * @group channel */ final class PhutilExecChannel extends PhutilChannel { private $future; private $stderrHandler; /* -( Construction )------------------------------------------------------- */ /** * Construct an exec channel from a @{class:ExecFuture}. The future should * **NOT** have been started yet (e.g., with `isReady()` or `start()`), * because @{class:ExecFuture} closes stdin by default when futures start. * If stdin has been closed, you will be unable to write on the channel. * * @param ExecFuture Future to use as an underlying I/O source. * @task construct */ public function __construct(ExecFuture $future) { parent::__construct(); // Make an empty write to keep the stdin pipe open. By default, futures // close this pipe when they start. $future->write('', $keep_pipe = true); // Start the future so that reads and writes work immediately. $future->isReady(); $this->future = $future; } public function __destruct() { if (!$this->future->isReady()) { $this->future->resolveKill(); } } public function update() { $this->future->isReady(); return parent::update(); } public function isOpen() { return !$this->future->isReady(); } protected function readBytes($length) { list($stdout, $stderr) = $this->future->read(); $this->future->discardBuffers(); if (strlen($stderr)) { if ($this->stderrHandler) { call_user_func($this->stderrHandler, $this, $stderr); } else { throw new Exception( "Unexpected output to stderr on exec channel: {$stderr}"); } } return $stdout; } public function write($bytes) { $this->future->write($bytes, $keep_pipe = true); } public function closeWriteChannel() { $this->future->write('', $keep_pipe = false); } protected function writeBytes($bytes) { throw new Exception('ExecFuture can not write bytes directly!'); } protected function getReadSockets() { return $this->future->getReadSockets(); } protected function getWriteSockets() { return $this->future->getWriteSockets(); } public function isReadBufferEmpty() { // Check both the channel and future read buffers, since either could have // data. return parent::isReadBufferEmpty() && $this->future->isReadBufferEmpty(); } public function setReadBufferSize($size) { // NOTE: We may end up using 2x the buffer size here, one inside // ExecFuture and one inside the Channel. We could tune this eventually, but // it should be fine for now. parent::setReadBufferSize($size); $this->future->setReadBufferSize($size); return $this; } public function isWriteBufferEmpty() { return $this->future->isWriteBufferEmpty(); } public function getWriteBufferSize() { return $this->future->getWriteBufferSize(); } /** * If the wrapped @{class:ExecFuture} outputs data to stderr, we normally * throw an exception. Instead, you can provide a callback handler that will * be invoked and passed the data. It should have this signature: * * function f(PhutilExecChannel $channel, $stderr) { * // ... * } * * The `$channel` will be this channel object, and `$stderr` will be a string * with bytes received over stderr. * * You can set a handler which does nothing to effectively ignore and discard * any output on stderr. * * @param callable Handler to invoke when stderr data is received. * @return this */ public function setStderrHandler($handler) { $this->stderrHandler = $handler; return $this; } } diff --git a/src/channel/PhutilMetricsChannel.php b/src/channel/PhutilMetricsChannel.php index ba55e74..083e1d1 100644 --- a/src/channel/PhutilMetricsChannel.php +++ b/src/channel/PhutilMetricsChannel.php @@ -1,87 +1,85 @@ bytesWritten; } /** * Get the number of bytes that have been read from the channel. This excludes * any bytes which have been received but not actually read by anything, and * thus may underreport compared to actual activity on the wire. * * @return int Bytes read. * @task metrics */ public function getBytesRead() { return $this->bytesRead; } /** * Get the elapsed wall time since this channel opened. * * @return float Wall time, in seconds. * @task metrics */ public function getWallTime() { return microtime(true) - $this->startTime; } /* -( Implementation )----------------------------------------------------- */ /** * @task impl */ protected function didConstruct() { $this->startTime = microtime(true); } /** * @task impl */ public function read() { $buffer = parent::read(); $this->bytesRead += strlen($buffer); return $buffer; } /** * @task impl */ public function write($message) { $this->bytesWritten += strlen($message); return parent::write($message); } } diff --git a/src/channel/PhutilProtocolChannel.php b/src/channel/PhutilProtocolChannel.php index fe86d2b..883fafa 100644 --- a/src/channel/PhutilProtocolChannel.php +++ b/src/channel/PhutilProtocolChannel.php @@ -1,141 +1,139 @@ decodeStream($data); foreach ($messages as $message) { $this->addMessage($message); } } if (!$this->messages) { return null; } return array_shift($this->messages); } /** * Write a message to the channel. * * @param wild Some message. * @return this * * @task io */ public function write($message) { $bytes = $this->encodeMessage($message); return parent::write($bytes); } /** * Add a message to the queue. While you normally do not need to do this, * you can use it to inject out-of-band messages. * * @param wild Some message. * @return this * * @task io */ public function addMessage($message) { $this->messages[] = $message; return $this; } /* -( Protocol Implementation )-------------------------------------------- */ /** * Encode a message for transmission. * * @param wild Some message. * @return string The message serialized into a wire format for * transmission. * * @task protocol */ abstract protected function encodeMessage($message); /** * Decode bytes from the underlying channel into zero or more complete * messages. The messages should be returned. * * This method is called as data is available. It will receive incoming * data only once, and must buffer any data which represents only part of * a message. Once a complete message is received, it can return the message * and discard that part of the buffer. * * Generally, a protocol channel should maintain a read buffer, implement * a parser in this method, and store parser state on the object to be able * to process incoming data in small chunks. * * @param string One or more bytes from the underlying channel. * @return list Zero or more parsed messages. * * @task protocol */ abstract protected function decodeStream($data); /* -( Waiting for Activity )----------------------------------------------- */ /** * Wait for a message, blocking until one is available. * * @return wild A message. * * @task wait */ public function waitForMessage() { while (true) { $is_open = $this->update(); $message = $this->read(); if ($message !== null) { return $message; } if (!$is_open) { break; } self::waitForAny(array($this)); } throw new Exception('Channel closed while waiting for message!'); } } diff --git a/src/channel/PhutilSocketChannel.php b/src/channel/PhutilSocketChannel.php index 721d257..72899bd 100644 --- a/src/channel/PhutilSocketChannel.php +++ b/src/channel/PhutilSocketChannel.php @@ -1,194 +1,192 @@ readSocket = $read_socket; if ($write_socket) { $this->writeSocket = $write_socket; } else { $this->writeSocket = $read_socket; $this->isSingleSocket = true; } } public function __destruct() { $this->closeSockets(); } /** * Creates a pair of socket channels that are connected to each other. This * is mostly useful for writing unit tests of, e.g., protocol channels. * * list($x, $y) = PhutilSocketChannel::newChannelPair(); * * @task construct */ public static function newChannelPair() { $sockets = null; $domain = phutil_is_windows() ? STREAM_PF_INET : STREAM_PF_UNIX; $pair = stream_socket_pair($domain, STREAM_SOCK_STREAM, STREAM_IPPROTO_IP); if (!$pair) { throw new Exception('stream_socket_pair() failed!'); } $x = new PhutilSocketChannel($pair[0]); $y = new PhutilSocketChannel($pair[1]); return array($x, $y); } public function isOpen() { return ($this->isOpenForReading() || $this->isOpenForWriting()); } public function isOpenForReading() { return (bool)$this->readSocket; } public function isOpenForWriting() { return (bool)$this->writeSocket; } protected function readBytes($length) { $socket = $this->readSocket; if (!$socket) { return ''; } $data = @fread($socket, min($length, 64 * 1024)); if ($data === false) { $this->closeReadSocket(); $data = ''; } // NOTE: fread() continues returning empty string after the socket is // closed, we need to check for EOF explicitly. if ($data === '') { if (feof($socket)) { $this->closeReadSocket(); } } return $data; } protected function writeBytes($bytes) { $socket = $this->writeSocket; if (!$socket) { return 0; } $len = phutil_fwrite_nonblocking_stream($socket, $bytes); if ($len === false) { $this->closeWriteSocket(); return 0; } return $len; } protected function getReadSockets() { if ($this->readSocket) { return array($this->readSocket); } return array(); } protected function getWriteSockets() { if ($this->writeSocket) { return array($this->writeSocket); } else { return array(); } } private function closeReadSocket() { $this->closeOneSocket($this->readSocket); $this->readSocket = null; if ($this->isSingleSocket) { $this->writeSocket = null; } } private function closeWriteSocket() { $this->closeOneSocket($this->writeSocket); $this->writeSocket = null; if ($this->isSingleSocket) { $this->readSocket = null; } } public function closeWriteChannel() { $this->closeWriteSocket(); } private function closeOneSocket($socket) { if (!$socket) { return; } // We should also stream_socket_shutdown() here but HHVM throws errors // with it (for example 'Unexpected object type PlainFile'). We depend // just on fclose() until it is fixed. @fclose($socket); } private function closeSockets() { $this->closeReadSocket(); $this->closeWriteSocket(); } } diff --git a/src/conduit/ConduitClient.php b/src/conduit/ConduitClient.php index 27215c7..f79fbc2 100644 --- a/src/conduit/ConduitClient.php +++ b/src/conduit/ConduitClient.php @@ -1,108 +1,105 @@ connectionID; } public function __construct($uri) { $this->uri = new PhutilURI($uri); if (!strlen($this->uri->getDomain())) { throw new Exception("Conduit URI '{$uri}' must include a valid host."); } } public function callMethodSynchronous($method, array $params) { return $this->callMethod($method, $params)->resolve(); } public function didReceiveResponse($method, $data) { if ($method == 'conduit.connect') { $this->sessionKey = idx($data, 'sessionKey'); $this->connectionID = idx($data, 'connectionID'); } return $data; } public function setTimeout($timeout) { $this->timeout = $timeout; return $this; } public function callMethod($method, array $params) { $meta = array(); if ($this->sessionKey) { $meta['sessionKey'] = $this->sessionKey; } if ($this->connectionID) { $meta['connectionID'] = $this->connectionID; } if ($method == 'conduit.connect') { $certificate = idx($params, 'certificate'); if ($certificate) { $token = time(); $params['authToken'] = $token; $params['authSignature'] = sha1($token.$certificate); } unset($params['certificate']); } if ($meta) { $params['__conduit__'] = $meta; } $uri = id(clone $this->uri)->setPath('/api/'.$method); $data = array( 'params' => json_encode($params), 'output' => 'json', // This is a hint to Phabricator that the client expects a Conduit // response. It is not necessary, but provides better error messages in // some cases. '__conduit__' => true, ); // Always use the cURL-based HTTPSFuture, for proxy support and other // protocol edge cases that HTTPFuture does not support. $core_future = new HTTPSFuture($uri, $data); $core_future->setMethod('POST'); $core_future->setTimeout($this->timeout); if ($this->username !== null) { $core_future->setHTTPBasicAuthCredentials( $this->username, $this->password); } $conduit_future = new ConduitFuture($core_future); $conduit_future->setClient($this, $method); $conduit_future->beginProfile($data); $conduit_future->isReady(); return $conduit_future; } public function setBasicAuthCredentials($username, $password) { $this->username = $username; $this->password = new PhutilOpaqueEnvelope($password); return $this; } } diff --git a/src/conduit/ConduitClientException.php b/src/conduit/ConduitClientException.php index cc859e9..87d93c8 100644 --- a/src/conduit/ConduitClientException.php +++ b/src/conduit/ConduitClientException.php @@ -1,19 +1,16 @@ errorCode = $code; } public function getErrorCode() { return $this->errorCode; } } diff --git a/src/conduit/ConduitFuture.php b/src/conduit/ConduitFuture.php index c72914d..753f931 100644 --- a/src/conduit/ConduitFuture.php +++ b/src/conduit/ConduitFuture.php @@ -1,71 +1,68 @@ client = $client; $this->conduitMethod = $method; return $this; } public function beginProfile($data) { $profiler = PhutilServiceProfiler::getInstance(); $this->profilerCallID = $profiler->beginServiceCall( array( 'type' => 'conduit', 'method' => $this->conduitMethod, 'size' => strlen(http_build_query($data, '', '&')), )); return $this; } protected function didReceiveResult($result) { if ($this->profilerCallID !== null) { $profiler = PhutilServiceProfiler::getInstance(); $profiler->endServiceCall( $this->profilerCallID, array()); } list($status, $body, $headers) = $result; if ($status->isError()) { throw $status; } $raw = $body; $shield = 'for(;;);'; if (!strncmp($raw, $shield, strlen($shield))) { $raw = substr($raw, strlen($shield)); } $data = json_decode($raw, true); if (!is_array($data)) { throw new Exception( "Host returned HTTP/200, but invalid JSON data in response to ". "a Conduit method call:\n{$raw}"); } if ($data['error_code']) { throw new ConduitClientException( $data['error_code'], $data['error_info']); } $result = $data['result']; $result = $this->client->didReceiveResponse( $this->conduitMethod, $result); return $result; } } diff --git a/src/console/PhutilConsole.php b/src/console/PhutilConsole.php index 398241c..57fd670 100644 --- a/src/console/PhutilConsole.php +++ b/src/console/PhutilConsole.php @@ -1,293 +1,292 @@ disabledTypes = new PhutilArrayWithDefaultValue(); } /** * Get the current console. If there's no active console, a new local console * is created (see @{method:newLocalConsole} for details). You can change the * active console with @{method:setConsole}. * * @return PhutilConsole Active console. * @task construct */ public static function getConsole() { if (empty(self::$console)) { self::setConsole(self::newLocalConsole()); } return self::$console; } /** * Set the active console. * * @param PhutilConsole * @return void * @task construct */ public static function setConsole(PhutilConsole $console) { self::$console = $console; } /** * Create a new console attached to stdin/stdout/stderr of this process. * This is how consoles normally work -- for instance, writing output with * @{method:writeOut} prints directly to stdout. If you don't create a * console explicitly, a new local console is created for you. * * @return PhutilConsole A new console which operates on the pipes of this * process. * @task construct */ public static function newLocalConsole() { return self::newConsoleForServer(new PhutilConsoleServer()); } public static function newConsoleForServer(PhutilConsoleServer $server) { $console = new PhutilConsole(); $console->server = $server; return $console; } public static function newRemoteConsole() { $io_channel = new PhutilSocketChannel( fopen('php://stdin', 'r'), fopen('php://stdout', 'w')); $protocol_channel = new PhutilPHPObjectProtocolChannel($io_channel); $console = new PhutilConsole(); $console->channel = $protocol_channel; return $console; } /* -( Interfacing with the User )------------------------------------------ */ public function confirm($prompt, $default = false) { $message = id(new PhutilConsoleMessage()) ->setType(PhutilConsoleMessage::TYPE_CONFIRM) ->setData( array( 'prompt' => $prompt, 'default' => $default, )); $this->writeMessage($message); $response = $this->waitForMessage(); return $response->getData(); } public function prompt($prompt, $history = '') { $message = id(new PhutilConsoleMessage()) ->setType(PhutilConsoleMessage::TYPE_PROMPT) ->setData( array( 'prompt' => $prompt, 'history' => $history, )); $this->writeMessage($message); $response = $this->waitForMessage(); return $response->getData(); } public function sendMessage($data) { $message = id(new PhutilConsoleMessage())->setData($data); return $this->writeMessage($message); } public function writeOut($pattern /* , ... */) { $args = func_get_args(); return $this->writeTextMessage(PhutilConsoleMessage::TYPE_OUT, $args); } public function writeErr($pattern /* , ... */) { $args = func_get_args(); return $this->writeTextMessage(PhutilConsoleMessage::TYPE_ERR, $args); } public function writeLog($pattern /* , ... */) { $args = func_get_args(); return $this->writeTextMessage(PhutilConsoleMessage::TYPE_LOG, $args); } public function beginRedirectOut() { // We need as small buffer as possible. 0 means infinite, 1 means 4096 in // PHP < 5.4.0. ob_start(array($this, 'redirectOutCallback'), 2); $this->flushing = true; } public function endRedirectOut() { $this->flushing = false; ob_end_flush(); } /* -( Internals )---------------------------------------------------------- */ // Must be public because it is called from output buffering. public function redirectOutCallback($string) { if (strlen($string)) { $this->flushing = false; $this->writeOut('%s', $string); $this->flushing = true; } return ''; } private function writeTextMessage($type, array $argv) { $message = id(new PhutilConsoleMessage()) ->setType($type) ->setData($argv); $this->writeMessage($message); return $this; } private function writeMessage(PhutilConsoleMessage $message) { if ($this->disabledTypes[$message->getType()]) { return $this; } if ($this->flushing) { ob_flush(); } if ($this->channel) { $this->channel->write($message); $this->channel->flush(); } else { $response = $this->server->handleMessage($message); if ($response) { $this->messages[] = $response; } } return $this; } private function waitForMessage() { if ($this->channel) { $message = $this->channel->waitForMessage(); } else if ($this->messages) { $message = array_shift($this->messages); } else { throw new Exception('waitForMessage() called with no messages!'); } return $message; } public function getServer() { return $this->server; } private function disableMessageType($type) { $this->disabledTypes[$type] += 1; return $this; } private function enableMessageType($type) { if ($this->disabledTypes[$type] == 0) { throw new Exception("Message type '{$type}' is already enabled!"); } $this->disabledTypes[$type] -= 1; return $this; } public function disableOut() { return $this->disableMessageType(PhutilConsoleMessage::TYPE_OUT); } public function enableOut() { return $this->enableMessageType(PhutilConsoleMessage::TYPE_OUT); } public function isLogEnabled() { $message = id(new PhutilConsoleMessage()) ->setType(PhutilConsoleMessage::TYPE_ENABLED) ->setData( array( 'which' => PhutilConsoleMessage::TYPE_LOG, )); $this->writeMessage($message); $response = $this->waitForMessage(); return $response->getData(); } public function isErrATTY() { $message = id(new PhutilConsoleMessage()) ->setType(PhutilConsoleMessage::TYPE_TTY) ->setData( array( 'which' => PhutilConsoleMessage::TYPE_ERR, )); $this->writeMessage($message); $response = $this->waitForMessage(); return $response->getData(); } public function getErrCols() { $message = id(new PhutilConsoleMessage()) ->setType(PhutilConsoleMessage::TYPE_COLS) ->setData( array( 'which' => PhutilConsoleMessage::TYPE_ERR, )); $this->writeMessage($message); $response = $this->waitForMessage(); return $response->getData(); } } diff --git a/src/console/PhutilConsoleFormatter.php b/src/console/PhutilConsoleFormatter.php index f6d9914..4e2e869 100644 --- a/src/console/PhutilConsoleFormatter.php +++ b/src/console/PhutilConsoleFormatter.php @@ -1,98 +1,95 @@ 0, 'red' => 1, 'green' => 2, 'yellow' => 3, 'blue' => 4, 'magenta' => 5, 'cyan' => 6, 'white' => 7, 'default' => 9, ); private static $disableANSI; public static function disableANSI($disable) { self::$disableANSI = $disable; } public static function getDisableANSI() { if (self::$disableANSI === null) { $term = phutil_utf8_strtolower(getenv('TERM')); // ansicon enables ANSI support on Windows if (!$term && getenv('ANSICON')) { $term = 'ansi'; } if (phutil_is_windows() && $term !== 'cygwin' && $term !== 'ansi') { self::$disableANSI = true; } else if (function_exists('posix_isatty') && !posix_isatty(STDOUT)) { self::$disableANSI = true; } else { self::$disableANSI = false; } } return self::$disableANSI; } public static function formatString($format /* ... */) { $colors = implode('|', array_keys(self::$colorCodes)); // Sequence should be preceded by start-of-string or non-backslash // escaping. $bold_re = '/(?(.*)@sU', '\3', $format); } else { $esc = chr(27); $bold = $esc.'[1m'.'\\1'.$esc.'[m'; $underline = $esc.'[4m'.'\\1'.$esc.'[m'; $invert = $esc.'[7m'.'\\1'.$esc.'[m'; $format = preg_replace($bold_re, $bold, $format); $format = preg_replace($underline_re, $underline, $format); $format = preg_replace($invert_re, $invert, $format); $format = preg_replace_callback( '@<(fg|bg):('.$colors.')>(.*)@sU', array('PhutilConsoleFormatter', 'replaceColorCode'), $format); } // Remove backslash escaping $format = preg_replace('/\\\\(\*\*.*\*\*|__.*__|##.*##)/sU', '\1', $format); $args = func_get_args(); $args[0] = $format; return call_user_func_array('sprintf', $args); } public static function replaceColorCode($matches) { $codes = self::$colorCodes; $offset = 30 + $codes[$matches[2]]; $default = 39; if ($matches[1] == 'bg') { $offset += 10; $default += 10; } return chr(27).'['.$offset.'m'.$matches[3].chr(27).'['.$default.'m'; } } diff --git a/src/console/PhutilConsoleServer.php b/src/console/PhutilConsoleServer.php index 8723f19..d8b14bd 100644 --- a/src/console/PhutilConsoleServer.php +++ b/src/console/PhutilConsoleServer.php @@ -1,159 +1,156 @@ getData(); $type = $message->getType(); switch ($type) { case PhutilConsoleMessage::TYPE_CONFIRM: $ok = phutil_console_confirm($data['prompt'], !$data['default']); return $this->buildMessage( PhutilConsoleMessage::TYPE_INPUT, $ok); case PhutilConsoleMessage::TYPE_PROMPT: $response = phutil_console_prompt( $data['prompt'], idx($data, 'history')); return $this->buildMessage( PhutilConsoleMessage::TYPE_INPUT, $response); case PhutilConsoleMessage::TYPE_OUT: $this->writeText(STDOUT, $data); return null; case PhutilConsoleMessage::TYPE_ERR: $this->writeText(STDERR, $data); return null; case PhutilConsoleMessage::TYPE_LOG: if ($this->enableLog) { $this->writeText(STDERR, $data); } return null; case PhutilConsoleMessage::TYPE_ENABLED: switch ($data['which']) { case PhutilConsoleMessage::TYPE_LOG: $enabled = $this->enableLog; break; default: $enabled = true; break; } return $this->buildMessage( PhutilConsoleMessage::TYPE_IS_ENABLED, $enabled); case PhutilConsoleMessage::TYPE_TTY: case PhutilConsoleMessage::TYPE_COLS: switch ($data['which']) { case PhutilConsoleMessage::TYPE_OUT: $which = STDOUT; break; case PhutilConsoleMessage::TYPE_ERR: $which = STDERR; break; } switch ($type) { case PhutilConsoleMessage::TYPE_TTY: if (function_exists('posix_isatty')) { $is_a_tty = posix_isatty($which); } else { $is_a_tty = null; } return $this->buildMessage( PhutilConsoleMessage::TYPE_IS_TTY, $is_a_tty); case PhutilConsoleMessage::TYPE_COLS: // TODO: This is an approximation which might not be perfectly // accurate. $width = phutil_console_get_terminal_width(); return $this->buildMessage( PhutilConsoleMessage::TYPE_COL_WIDTH, $width); } break; default: if ($this->handler) { return call_user_func($this->handler, $message); } else { throw new Exception( "Received unknown console message of type '{$type}'."); } } } /** * Set handler called for unknown messages. * * @param callable Signature: (PhutilConsoleMessage $message). */ public function setHandler($callback) { $this->handler = $callback; return $this; } private function buildMessage($type, $data) { $response = new PhutilConsoleMessage(); $response->setType($type); $response->setData($data); return $response; } public function addExecFutureClient(ExecFuture $future) { $io_channel = new PhutilExecChannel($future); $protocol_channel = new PhutilPHPObjectProtocolChannel($io_channel); $server_channel = new PhutilConsoleServerChannel($protocol_channel); $io_channel->setStderrHandler(array($server_channel, 'didReceiveStderr')); return $this->addClient($server_channel); } public function addClient(PhutilConsoleServerChannel $channel) { $this->clients[] = $channel; return $this; } public function setEnableLog($enable) { $this->enableLog = $enable; return $this; } public function run() { while ($this->clients) { PhutilChannel::waitForAny($this->clients); foreach ($this->clients as $key => $client) { if (!$client->update()) { // If the client has exited, remove it from the list of clients. // We still need to process any remaining buffered I/O. unset($this->clients[$key]); } while ($message = $client->read()) { $response = $this->handleMessage($message); if ($response) { $client->write($response); } } } } } private function writeText($where, array $argv) { $text = call_user_func_array('phutil_console_format', $argv); fprintf($where, '%s', $text); } } diff --git a/src/console/PhutilConsoleServerChannel.php b/src/console/PhutilConsoleServerChannel.php index dc9a2fa..3bcb329 100644 --- a/src/console/PhutilConsoleServerChannel.php +++ b/src/console/PhutilConsoleServerChannel.php @@ -1,15 +1,12 @@ setType(PhutilConsoleMessage::TYPE_ERR) ->setData(array('%s', $stderr)); $this->getUnderlyingChannel()->addMessage($message); } } diff --git a/src/console/PhutilConsoleStdinNotInteractiveException.php b/src/console/PhutilConsoleStdinNotInteractiveException.php index 9e098c8..199f2f3 100644 --- a/src/console/PhutilConsoleStdinNotInteractiveException.php +++ b/src/console/PhutilConsoleStdinNotInteractiveException.php @@ -1,17 +1,17 @@ setName('shopping_list') * ->setLineOffset(15) * ->editInteractively(); * * This will launch the user's $EDITOR to edit the specified '$document', and * return their changes into '$result'. * * @task create Creating a New Editor * @task edit Editing Interactively * @task config Configuring Options - * @group console */ final class PhutilInteractiveEditor { private $name = ''; private $content = ''; private $offset = 0; private $preferred; private $fallback; /* -( Creating a New Editor )---------------------------------------------- */ /** * Constructs an interactive editor, using the text of a document. * * @param string Document text. * @return $this * * @task create */ public function __construct($content) { $this->setContent($content); } /* -( Editing Interactively )----------------------------------------------- */ /** * Launch an editor and edit the content. The edited content will be * returned. * * @return string Edited content. * @throws Exception The editor exited abnormally or something untoward * occurred. * * @task edit */ public function editInteractively() { $name = $this->getName(); $content = $this->getContent(); if (phutil_is_windows()) { $content = str_replace("\n", "\r\n", $content); } $tmp = Filesystem::createTemporaryDirectory('edit.'); $path = $tmp.DIRECTORY_SEPARATOR.$name; try { Filesystem::writeFile($path, $content); } catch (Exception $ex) { Filesystem::remove($tmp); throw $ex; } $editor = $this->getEditor(); $offset = $this->getLineOffset(); $err = $this->invokeEditor($editor, $path, $offset); if ($err) { Filesystem::remove($tmp); throw new Exception("Editor exited with an error code (#{$err})."); } try { $result = Filesystem::readFile($path); Filesystem::remove($tmp); } catch (Exception $ex) { Filesystem::remove($tmp); throw $ex; } if (phutil_is_windows()) { $result = str_replace("\r\n", "\n", $result); } $this->setContent($result); return $this->getContent(); } private function invokeEditor($editor, $path, $offset) { // NOTE: Popular Windows editors like Notepad++ and GitPad do not support // line offsets, so just ignore the offset feature on Windows. We rarely // use it anyway. $offset_flag = ''; if ($offset && !phutil_is_windows()) { $offset = (int)$offset; if (preg_match('/^mate/', $editor)) { $offset_flag = csprintf('-l %d', $offset); } else { $offset_flag = csprintf('+%d', $offset); } } $cmd = csprintf( '%C %C %s', $editor, $offset_flag, $path); return phutil_passthru('%C', $cmd); } /* -( Configuring Options )------------------------------------------------- */ /** * Set the line offset where the cursor should be positioned when the editor * opens. By default, the cursor will be positioned at the start of the * content. * * @param int Line number where the cursor should be positioned. * @return $this * * @task config */ public function setLineOffset($offset) { $this->offset = (int)$offset; return $this; } /** * Get the current line offset. See setLineOffset(). * * @return int Current line offset. * * @task config */ public function getLineOffset() { return $this->offset; } /** * Set the document name. Depending on the editor, this may be exposed to * the user and can give them a sense of what they're editing. * * @param string Document name. * @return $this * * @task config */ public function setName($name) { $name = preg_replace('/[^A-Z0-9._-]+/i', '', $name); $this->name = $name; return $this; } /** * Get the current document name. See setName() for details. * * @return string Current document name. * * @task config */ public function getName() { if (!strlen($this->name)) { return 'untitled'; } return $this->name; } /** * Set the text content to be edited. * * @param string New content. * @return $this * * @task config */ public function setContent($content) { $this->content = $content; return $this; } /** * Retrieve the current content. * * @return string * * @task config */ public function getContent() { return $this->content; } /** * Set the fallback editor program to be used if the env variable $EDITOR * is not available and there is no `editor` binary in PATH. * * @param string Command-line editing program (e.g. 'emacs', 'vi') * @return $this * * @task config */ public function setFallbackEditor($editor) { $this->fallback = $editor; return $this; } /** * Set the preferred editor program. If set, this will override all other * sources of editor configuration, like $EDITOR. * * @param string Command-line editing program (e.g. 'emacs', 'vi') * @return $this * * @task config */ public function setPreferredEditor($editor) { $this->preferred = $editor; return $this; } /** * Get the name of the editor program to use. The value of the environmental * variable $EDITOR will be used if available; otherwise, the `editor` binary * if present; otherwise the best editor will be selected. * * @return string Command-line editing program. * * @task config */ public function getEditor() { if ($this->preferred) { return $this->preferred; } $editor = getenv('EDITOR'); if ($editor) { return $editor; } // Look for `editor` in PATH, some systems provide an editor which is // linked to something sensible. if (Filesystem::binaryExists('editor')) { return 'editor'; } if ($this->fallback) { return $this->fallback; } if (Filesystem::binaryExists('nano')) { return 'nano'; } throw new Exception( 'Unable to launch an interactive text editor. Set the EDITOR '. 'environment variable to an appropriate editor.'); } + } diff --git a/src/console/__tests__/PhutilConsoleWrapTestCase.php b/src/console/__tests__/PhutilConsoleWrapTestCase.php index 3ca2ad6..1f57607 100644 --- a/src/console/__tests__/PhutilConsoleWrapTestCase.php +++ b/src/console/__tests__/PhutilConsoleWrapTestCase.php @@ -1,50 +1,46 @@ assertEqual( Filesystem::readFile($dir.$file.'.expect'), phutil_console_wrap(Filesystem::readFile($dir.$file)), $file); } } } public function testConsoleWrap() { $this->assertEqual( phutil_console_format( "** ERROR ** abc abc abc abc abc abc abc abc abc abc ". "abc abc abc abc abc abc abc\nabc abc abc abc abc abc abc abc abc ". "abc abc!"), phutil_console_wrap( phutil_console_format( '** ERROR ** abc abc abc abc abc abc abc abc abc abc '. 'abc abc abc abc abc abc abc abc abc abc abc abc abc abc abc abc '. 'abc abc!')), 'ANSI escape sequences should not contribute toward wrap width.'); } public function testWrapIndent() { $turtles = <<assertEqual( $turtles, phutil_console_wrap( rtrim(str_repeat('turtle ', 20)), $indent = 20)); } - } diff --git a/src/console/format.php b/src/console/format.php index b96c1d6..1fde8f8 100644 --- a/src/console/format.php +++ b/src/console/format.php @@ -1,229 +1,213 @@ /dev/null; '. 'read -e -p %s; '. 'echo "$REPLY"; '. 'history -s "$REPLY" 2>/dev/null; '. 'history -w %s 2>/dev/null', $history, $prompt, $history)); // execx() doesn't work with input, phutil_passthru() doesn't return output. $response = shell_exec($command); } return rtrim($response, "\r\n"); } /** * Soft wrap text for display on a console, respecting UTF8 character boundaries * and ANSI color escape sequences. * * @param string Text to wrap. * @param int Optional indent level. * @return string Wrapped text. - * - * @group console */ function phutil_console_wrap($text, $indent = 0) { $lines = array(); $width = (78 - $indent); $esc = chr(27); $break_pos = null; $len_after_break = 0; $line_len = 0; $line = array(); $lines = array(); $vector = phutil_utf8v($text); $vector_len = count($vector); for ($ii = 0; $ii < $vector_len; $ii++) { $chr = $vector[$ii]; // If this is an ANSI escape sequence for a color code, just consume it // without counting it toward the character limit. This prevents lines // with bold/color on them from wrapping too early. if ($chr == $esc) { for ($ii; $ii < $vector_len; $ii++) { $line[] = $vector[$ii]; if ($vector[$ii] == 'm') { break; } } continue; } $line[] = $chr; ++$line_len; ++$len_after_break; if ($line_len > $width) { if ($break_pos !== null) { $slice = array_slice($line, 0, $break_pos); while (count($slice) && end($slice) == ' ') { array_pop($slice); } $slice[] = "\n"; $lines[] = $slice; $line = array_slice($line, $break_pos); $line_len = $len_after_break; $len_after_break = 0; $break_pos = null; } } if ($chr == ' ') { $break_pos = count($line); $len_after_break = 0; } if ($chr == "\n") { $lines[] = $line; $line = array(); $len_after_break = 0; $line_len = 0; $break_pos = null; } } if ($line) { if ($line) { $lines[] = $line; } } $pre = null; if ($indent) { $pre = str_repeat(' ', $indent); } foreach ($lines as $idx => $line) { $lines[$idx] = $pre.implode('', $line); } return implode('', $lines); } -/** - * @group console - */ function phutil_console_require_tty() { if (function_exists('posix_isatty') && !posix_isatty(STDIN)) { throw new PhutilConsoleStdinNotInteractiveException(); } } /** * Determine the width of the terminal, if possible. Returns `null` on failure. * * @return int|null Terminal width in characters, or null on failure. - * @group console */ function phutil_console_get_terminal_width() { if (phutil_is_windows()) { // TODO: Figure out how to get this working in Windows. return null; } $tmp = new TempFile(); // NOTE: We can't just execute this because it won't be connected to a TTY // if we do. $err = phutil_passthru('tput cols > %s', $tmp); if ($err) { return null; } try { $cols = Filesystem::readFile($tmp); } catch (FilesystemException $ex) { return null; } $cols = (int)$cols; if (!$cols) { return null; } return $cols; } diff --git a/src/daemon/PhutilDaemon.php b/src/daemon/PhutilDaemon.php index 446da3b..123dd06 100644 --- a/src/daemon/PhutilDaemon.php +++ b/src/daemon/PhutilDaemon.php @@ -1,135 +1,134 @@ verbose = $verbose; return $this; } final public function getVerbose() { return $this->verbose; } private static $sighandlerInstalled; final public function __construct(array $argv) { declare(ticks = 1); $this->argv = $argv; if (!self::$sighandlerInstalled) { self::$sighandlerInstalled = true; pcntl_signal(SIGINT, __CLASS__.'::exitOnSignal'); pcntl_signal(SIGTERM, __CLASS__.'::exitOnSignal'); } pcntl_signal(SIGUSR2, array($this, 'onNotifySignal')); // Without discard mode, this consumes unbounded amounts of memory. Keep // memory bounded. PhutilServiceProfiler::getInstance()->enableDiscardMode(); } final public function stillWorking() { if (!posix_isatty(STDOUT)) { posix_kill(posix_getppid(), SIGUSR1); } if ($this->traceMemory) { $memuse = number_format(memory_get_usage() / 1024, 1); $daemon = get_class($this); fprintf(STDERR, '%s', " {$daemon} Memory Usage: {$memuse} KB\n"); } } final protected function sleep($duration) { $this->notifyReceived = false; $this->willSleep($duration); $this->stillWorking(); while ($duration > 0 && !$this->notifyReceived) { sleep(min($duration, 60)); $duration -= 60; $this->stillWorking(); } } protected function willSleep($duration) { return; } public static function exitOnSignal($signo) { // Normally, PHP doesn't invoke destructors when existing in response to // a signal. This forces it to do so, so we have a fighting chance of // releasing any locks, leases or resources on our way out. exit(128 + $signo); } final protected function getArgv() { return $this->argv; } final public function execute() { $this->willRun(); $this->run(); } abstract protected function run(); final public function setTraceMemory() { $this->traceMemory = true; return $this; } final public function getTraceMemory() { return $this->traceMemory; } final public function setTraceMode() { $this->traceMode = true; PhutilServiceProfiler::installEchoListener(); PhutilConsole::getConsole()->getServer()->setEnableLog(true); $this->didSetTraceMode(); return $this; } final public function getTraceMode() { return $this->traceMode; } public final function onNotifySignal($signo) { $this->notifyReceived = true; $this->onNotify($signo); } protected function onNotify($signo) { // This is a hook for subclasses. } protected function willRun() { // This is a hook for subclasses. } protected function didSetTraceMode() { // This is a hook for subclasses. } final protected function log($message) { if ($this->verbose) { $daemon = get_class($this); fprintf(STDERR, '%s', " {$daemon} {$message}\n"); } } } diff --git a/src/daemon/PhutilDaemonOverseer.php b/src/daemon/PhutilDaemonOverseer.php index f75163d..2a36671 100644 --- a/src/daemon/PhutilDaemonOverseer.php +++ b/src/daemon/PhutilDaemonOverseer.php @@ -1,444 +1,442 @@ enableDiscardMode(); $original_argv = $argv; $args = new PhutilArgumentParser($argv); $args->setTagline('daemon overseer'); $args->setSynopsis(<<parseStandardArguments(); $args->parsePartial( array( array( 'name' => 'trace-memory', 'help' => 'Enable debug memory tracing.', ), array( 'name' => 'log', 'param' => 'file', 'help' => 'Send output to __file__.', ), array( 'name' => 'daemonize', 'help' => 'Run in the background.', ), array( 'name' => 'phd', 'param' => 'dir', 'help' => 'Write PID information to __dir__.', ), array( 'name' => 'verbose', 'help' => 'Enable verbose activity logging.', ), array( 'name' => 'load-phutil-library', 'param' => 'library', 'repeat' => true, 'help' => 'Load __library__.', ), )); $argv = array(); $more = $args->getUnconsumedArgumentVector(); $this->daemon = array_shift($more); if (!$this->daemon) { $args->printHelpAndExit(); } if ($args->getArg('trace')) { $this->traceMode = true; $argv[] = '--trace'; } if ($args->getArg('trace-memory')) { $this->traceMode = true; $this->traceMemory = true; $argv[] = '--trace-memory'; } if ($args->getArg('load-phutil-library')) { foreach ($args->getArg('load-phutil-library') as $library) { $argv[] = '--load-phutil-library='.$library; } } $log = $args->getArg('log'); if ($log) { ini_set('error_log', $log); $argv[] = '--log='.$log; } $verbose = $args->getArg('verbose'); if ($verbose) { $this->verbose = true; $argv[] = '--verbose'; } $this->daemonize = $args->getArg('daemonize'); $this->phddir = $args->getArg('phd'); $this->argv = $argv; $this->moreArgs = coalesce($more, array()); error_log("Bringing daemon '{$this->daemon}' online..."); if (self::$instance) { throw new Exception( 'You may not instantiate more than one Overseer per process.'); } self::$instance = $this; if ($this->daemonize) { // We need to get rid of these or the daemon will hang when we TERM it // waiting for something to read the buffers. TODO: Learn how unix works. fclose(STDOUT); fclose(STDERR); ob_start(); $pid = pcntl_fork(); if ($pid === -1) { throw new Exception('Unable to fork!'); } else if ($pid) { exit(0); } } if ($this->phddir) { $desc = array( 'name' => $this->daemon, 'argv' => $this->moreArgs, 'pid' => getmypid(), 'start' => time(), ); Filesystem::writeFile( $this->phddir.'/daemon.'.getmypid(), json_encode($desc)); } $this->daemonID = $this->generateDaemonID(); $this->dispatchEvent( self::EVENT_DID_LAUNCH, array( 'argv' => array_slice($original_argv, 1), 'explicitArgv' => $this->moreArgs)); declare(ticks = 1); pcntl_signal(SIGUSR1, array($this, 'didReceiveKeepaliveSignal')); pcntl_signal(SIGUSR2, array($this, 'didReceiveNotifySignal')); pcntl_signal(SIGINT, array($this, 'didReceiveTerminalSignal')); pcntl_signal(SIGTERM, array($this, 'didReceiveTerminalSignal')); } public function run() { if ($this->shouldRunSilently()) { echo "Running daemon '{$this->daemon}' silently. Use '--trace' or ". "'--verbose' to produce debugging output.\n"; } $root = phutil_get_library_root('phutil'); $root = dirname($root); $exec_dir = $root.'/scripts/daemon/exec/'; // NOTE: PHP implements proc_open() by running 'sh -c'. On most systems this // is bash, but on Ubuntu it's dash. When you proc_open() using bash, you // get one new process (the command you ran). When you proc_open() using // dash, you get two new processes: the command you ran and a parent // "dash -c" (or "sh -c") process. This means that the child process's PID // is actually the 'dash' PID, not the command's PID. To avoid this, use // 'exec' to replace the shell process with the real process; without this, // the child will call posix_getppid(), be given the pid of the 'sh -c' // process, and send it SIGUSR1 to keepalive which will terminate it // immediately. We also won't be able to do process group management because // the shell process won't properly posix_setsid() so the pgid of the child // won't be meaningful. // Format the exec command, which looks something like: // // exec ./exec_daemon DaemonName --trace -- --no-discovery $argv = array(); $argv[] = csprintf('exec ./exec_daemon.php %s', $this->daemon); foreach ($this->argv as $k => $arg) { $argv[] = csprintf('%s', $arg); } $argv[] = '--'; foreach ($this->moreArgs as $k => $arg) { $argv[] = csprintf('%s', $arg); } $command = implode(' ', $argv); while (true) { $this->logMessage('INIT', 'Starting process.'); $future = new ExecFuture('%C', $command); $future->setCWD($exec_dir); $future->setStdoutSizeLimit($this->captureBufferSize); $future->setStderrSizeLimit($this->captureBufferSize); $this->deadline = time() + $this->deadlineTimeout; $this->heartbeat = time() + self::HEARTBEAT_WAIT; $future->isReady(); $this->childPID = $future->getPID(); do { do { if ($this->traceMemory) { $memuse = number_format(memory_get_usage() / 1024, 1); $this->logMessage('RAMS', 'Overseer Memory Usage: '.$memuse.' KB'); } // We need a shortish timeout here so we can run the tick handler // frequently in order to process signals. $result = $future->resolve(1); list($stdout, $stderr) = $future->read(); $stdout = trim($stdout); $stderr = trim($stderr); if (strlen($stdout)) { $this->logMessage('STDO', $stdout); } if (strlen($stderr)) { $this->logMessage('STDE', $stderr); } $future->discardBuffers(); if ($result !== null) { list($err) = $result; if ($err) { $this->logMessage( 'FAIL', 'Process exited with error '.$err.'.', $err); } else { $this->logMessage('DONE', 'Process exited successfully.'); } break 2; } if ($this->heartbeat < time()) { $this->heartbeat = time() + self::HEARTBEAT_WAIT; $this->dispatchEvent(self::EVENT_DID_HEARTBEAT); } } while (time() < $this->deadline); $this->logMessage('HANG', 'Hang detected. Restarting process.'); $this->annihilateProcessGroup(); } while (false); $this->logMessage('WAIT', 'Waiting to restart process.'); sleep(self::RESTART_WAIT); } } public function didReceiveNotifySignal($signo) { $pid = $this->childPID; if ($pid) { posix_kill($pid, $signo); } } public function didReceiveKeepaliveSignal($signo) { $this->deadline = time() + $this->deadlineTimeout; } public function didReceiveTerminalSignal($signo) { if ($this->signaled) { exit(128 + $signo); } $this->signaled = true; $signame = phutil_get_signal_name($signo); if ($signame) { $sigmsg = "Shutting down in response to signal {$signo} ({$signame})."; } else { $sigmsg = "Shutting down in response to signal {$signo}."; } $this->logMessage('EXIT', $sigmsg, $signo); @fflush(STDOUT); @fflush(STDERR); @fclose(STDOUT); @fclose(STDERR); $this->annihilateProcessGroup(); $this->dispatchEvent(self::EVENT_WILL_EXIT); exit(128 + $signo); } private function logMessage($type, $message, $context = null) { if (!$this->shouldRunSilently()) { echo date('Y-m-d g:i:s A').' ['.$type.'] '.$message."\n"; } $this->dispatchEvent( self::EVENT_DID_LOG, array( 'type' => $type, 'message' => $message, 'context' => $context, )); } private function shouldRunSilently() { if ($this->traceMode || $this->verbose) { return false; } else { return true; } } private function annihilateProcessGroup() { $pid = $this->childPID; $pgid = posix_getpgid($pid); if ($pid && $pgid) { // NOTE: On Ubuntu, 'kill' does not recognize the use of "--" to // explicitly delineate PID/PGIDs from signals. We don't actually need it, // so use the implicit "kill -TERM -pgid" form instead of the explicit // "kill -TERM -- -pgid" form. exec("kill -TERM -{$pgid}"); sleep($this->killDelay); // On OSX, we'll get a permission error on stderr if the SIGTERM was // successful in ending the life of the process group, presumably because // all that's left is the daemon itself as a zombie waiting for us to // reap it. However, we still need to issue this command for process // groups that resist SIGTERM. Rather than trying to figure out if the // process group is still around or not, just SIGKILL unconditionally and // ignore any error which may be raised. exec("kill -KILL -{$pgid} 2>/dev/null"); $this->childPID = null; } } /** * Identify running daemons by examining the process table. This isn't * completely reliable, but can be used as a fallback if the pid files fail * or we end up with stray daemons by other means. * * Example output (array keys are process IDs): * * array( * 12345 => array( * 'type' => 'overseer', * 'command' => 'php launch_daemon.php --daemonize ...', * ), * 12346 => array( * 'type' => 'daemon', * 'command' => 'php exec_daemon.php ...', * ), * ); * * @return dict Map of PIDs to process information, identifying running * daemon processes. */ public static function findRunningDaemons() { $results = array(); list($err, $processes) = exec_manual('ps -o pid,command -a -x -w -w -w'); if ($err) { return $results; } $processes = array_filter(explode("\n", trim($processes))); foreach ($processes as $process) { list($pid, $command) = explode(' ', $process, 2); $matches = null; if (!preg_match('/(launch|exec)_daemon.php/', $command, $matches)) { continue; } $results[(int)$pid] = array( 'type' => ($matches[1] == 'launch') ? 'overseer' : 'daemon', 'command' => $command, ); } return $results; } /** * Generate a unique ID for this daemon. * * @return string A unique daemon ID. */ private function generateDaemonID() { return substr(getmypid().':'.Filesystem::readRandomCharacters(12), 0, 12); } /** * Dispatch an event to event listeners. * * @param string Event type. * @param dict Event parameters. * @return void */ private function dispatchEvent($type, array $params = array()) { $data = array( 'id' => $this->daemonID, 'daemonClass' => $this->daemon, 'childPID' => $this->childPID, ) + $params; $event = new PhutilEvent($type, $data); try { PhutilEventEngine::dispatchEvent($event); } catch (Exception $ex) { phlog($ex); } } } diff --git a/src/daemon/torture/PhutilExcessiveServiceCallsDaemon.php b/src/daemon/torture/PhutilExcessiveServiceCallsDaemon.php index 54344b0..f8e2a19 100644 --- a/src/daemon/torture/PhutilExcessiveServiceCallsDaemon.php +++ b/src/daemon/torture/PhutilExcessiveServiceCallsDaemon.php @@ -1,17 +1,15 @@ stillWorking(); } } } diff --git a/src/daemon/torture/PhutilFatalDaemon.php b/src/daemon/torture/PhutilFatalDaemon.php index ce9b20d..73298e5 100644 --- a/src/daemon/torture/PhutilFatalDaemon.php +++ b/src/daemon/torture/PhutilFatalDaemon.php @@ -1,14 +1,12 @@ log(date('r')); $this->stillWorking(); sleep(1); } } } diff --git a/src/daemon/torture/PhutilProcessGroupDaemon.php b/src/daemon/torture/PhutilProcessGroupDaemon.php index 74b377c..53d519a 100644 --- a/src/daemon/torture/PhutilProcessGroupDaemon.php +++ b/src/daemon/torture/PhutilProcessGroupDaemon.php @@ -1,17 +1,15 @@ doSomething(); * $success = true; * break; * } catch (Exception $ex) { * $exceptions[get_class($engine)] = $ex; * } * } * * if (!$success) { * throw new PhutilAggregateException("All engines failed:", $exceptions); * } * * @concrete-extensible - * @group error */ class PhutilAggregateException extends Exception { private $exceptions = array(); public function __construct($message, array $other_exceptions) { // We don't call assert_instances_of($other_exceptions, 'Exception') to not // throw another exception in this exception. $this->exceptions = $other_exceptions; $full_message = array(); $full_message[] = $message; foreach ($other_exceptions as $key => $exception) { $ex_message = (is_string($key) ? $key.': ' : ''). get_class($exception).': '. $exception->getMessage(); $ex_message = ' - '.str_replace("\n", "\n ", $ex_message); $full_message[] = $ex_message; } parent::__construct(implode("\n", $full_message), count($other_exceptions)); } public function getExceptions() { return $this->exceptions; } } diff --git a/src/error/PhutilErrorHandler.php b/src/error/PhutilErrorHandler.php index 7432ca0..706c896 100644 --- a/src/error/PhutilErrorHandler.php +++ b/src/error/PhutilErrorHandler.php @@ -1,444 +1,443 @@ getPrevious(); } if (method_exists($ex, 'getPreviousException')) { return $ex->getPreviousException(); } return null; } /** * Find the most deeply nested exception from a possibly-nested exception. * * @param Exception A possibly-nested exception. * @return Exception Deepest exception in the nest. * @task exutil */ public static function getRootException(Exception $ex) { $root = $ex; while (self::getPreviousException($root)) { $root = self::getPreviousException($root); } return $root; } /* -( Trapping Errors )---------------------------------------------------- */ /** * Adds an error trap. Normally you should not invoke this directly; * @{class:PhutilErrorTrap} registers itself on construction. * * @param PhutilErrorTrap Trap to add. * @return void * @task trap */ public static function addErrorTrap(PhutilErrorTrap $trap) { $key = $trap->getTrapKey(); self::$traps[$key] = $trap; } /** * Removes an error trap. Normally you should not invoke this directly; * @{class:PhutilErrorTrap} deregisters itself on destruction. * * @param PhutilErrorTrap Trap to remove. * @return void * @task trap */ public static function removeErrorTrap(PhutilErrorTrap $trap) { $key = $trap->getTrapKey(); unset(self::$traps[$key]); } /* -( Internals )---------------------------------------------------------- */ /** * Determine if PhutilErrorHandler has been initialized. * * @return bool True if initialized. * @task internal */ public static function hasInitialized() { return self::$initialized; } /** * Handles PHP errors and dispatches them forward. This is a callback for * ##set_error_handler()##. You should not call this function directly; use * @{function:phlog} to print debugging messages or ##trigger_error()## to * trigger PHP errors. * * This handler converts E_RECOVERABLE_ERROR messages from violated typehints * into @{class:InvalidArgumentException}s. * * This handler converts other E_RECOVERABLE_ERRORs into * @{class:RuntimeException}s. * * This handler converts E_NOTICE messages from uses of undefined variables * into @{class:RuntimeException}s. * * @param int Error code. * @param string Error message. * @param string File where the error occurred. * @param int Line on which the error occurred. * @param wild Error context information. * @return void * @task internal */ public static function handleError($num, $str, $file, $line, $ctx) { foreach (self::$traps as $trap) { $trap->addError($num, $str, $file, $line, $ctx); } if ((error_reporting() & $num) == 0) { // Respect the use of "@" to silence warnings: if this error was // emitted from a context where "@" was in effect, the // value returned by error_reporting() will be 0. This is the // recommended way to check for this, see set_error_handler() docs // on php.net. return false; } // Convert typehint failures into exceptions. if (preg_match('/^Argument (\d+) passed to (\S+) must be/', $str)) { throw new InvalidArgumentException($str); } // Convert other E_RECOVERABLE_ERRORs into generic runtime exceptions. if ($num == E_RECOVERABLE_ERROR) { throw new RuntimeException($str); } // Convert uses of undefined variables into exceptions. if (preg_match('/^Undefined variable: /', $str)) { throw new RuntimeException($str); } // Convert uses of undefined properties into exceptions. if (preg_match('/^Undefined property: /', $str)) { throw new RuntimeException($str); } // Convert undefined constants into exceptions. Usually this means there // is a missing `$` and the program is horribly broken. if (preg_match('/^Use of undefined constant /', $str)) { throw new RuntimeException($str); } $trace = debug_backtrace(); array_shift($trace); self::dispatchErrorMessage( self::ERROR, $str, array( 'file' => $file, 'line' => $line, 'context' => $ctx, 'error_code' => $num, 'trace' => $trace, )); } /** * Handles PHP exceptions and dispatches them forward. This is a callback for * ##set_exception_handler()##. You should not call this function directly; * to print exceptions, pass the exception object to @{function:phlog}. * * @param Exception Uncaught exception object. * @return void * @task internal */ public static function handleException(Exception $ex) { self::dispatchErrorMessage( self::EXCEPTION, $ex, array( 'file' => $ex->getFile(), 'line' => $ex->getLine(), 'trace' => self::getRootException($ex)->getTrace(), 'catch_trace' => debug_backtrace(), )); // Normally, PHP exits with code 255 after an uncaught exception is thrown. // However, if we install an exception handler (as we have here), it exits // with code 0 instead. Script execution terminates after this function // exits in either case, so exit explicitly with the correct exit code. exit(255); } /** * Output a stacktrace to the PHP error log. * * @param trace A stacktrace, e.g. from debug_backtrace(); * @return void * @task internal */ public static function outputStacktrace($trace) { $lines = explode("\n", self::formatStacktrace($trace)); foreach ($lines as $line) { error_log($line); } } /** * Format a stacktrace for output. * * @param trace A stacktrace, e.g. from debug_backtrace(); * @return string Human-readable trace. * @task internal */ public static function formatStacktrace($trace) { $result = array(); foreach ($trace as $key => $entry) { $line = ' #'.$key.' '; if (isset($entry['class'])) { $line .= $entry['class'].'::'; } $line .= idx($entry, 'function', ''); if (isset($entry['args'])) { $args = array(); foreach ($entry['args'] as $arg) { // NOTE: Print out object types, not values. Values sometimes contain // sensitive information and are usually not particularly helpful // for debugging. $type = (gettype($arg) == 'object') ? get_class($arg) : gettype($arg); $args[] = $type; } $line .= '('.implode(', ', $args).')'; } if (isset($entry['file'])) { $file = self::adjustFilePath($entry['file']); $line .= ' called at ['.$file.':'.$entry['line'].']'; } $result[] = $line; } return implode("\n", $result); } /** * All different types of error messages come here before they are * dispatched to the listener; this method also prints them to the PHP error * log. * * @param const Event type constant. * @param wild Event value. * @param dict Event metadata. * @return void * @task internal */ public static function dispatchErrorMessage($event, $value, $metadata) { $timestamp = strftime('%Y-%m-%d %H:%M:%S'); switch ($event) { case PhutilErrorHandler::ERROR: $default_message = sprintf( '[%s] ERROR %d: %s at [%s:%d]', $timestamp, $metadata['error_code'], $value, $metadata['file'], $metadata['line']); $metadata['default_message'] = $default_message; error_log($default_message); self::outputStacktrace($metadata['trace']); break; case PhutilErrorHandler::EXCEPTION: $messages = array(); $current = $value; do { $messages[] = '('.get_class($current).') '.$current->getMessage(); } while ($current = self::getPreviousException($current)); $messages = implode(' {>} ', $messages); if (strlen($messages) > 4096) { $messages = substr($messages, 0, 4096).'...'; } $default_message = sprintf( '[%s] EXCEPTION: %s at [%s:%d]', $timestamp, $messages, self::adjustFilePath(self::getRootException($value)->getFile()), self::getRootException($value)->getLine()); $metadata['default_message'] = $default_message; error_log($default_message); self::outputStacktrace(self::getRootException($value)->getTrace()); break; case PhutilErrorHandler::PHLOG: $default_message = sprintf( '[%s] PHLOG: %s at [%s:%d]', $timestamp, PhutilReadableSerializer::printShort($value), $metadata['file'], $metadata['line']); $metadata['default_message'] = $default_message; error_log($default_message); break; case PhutilErrorHandler::DEPRECATED: $default_message = sprintf( '[%s] DEPRECATED: %s is deprecated; %s', $timestamp, $value, $metadata['why']); $metadata['default_message'] = $default_message; error_log($default_message); break; default: error_log('Unknown event '.$event); break; } if (self::$errorListener) { static $handling_error; if ($handling_error) { error_log( 'Error handler was reentered, some errors were not passed to the '. 'listener.'); return; } $handling_error = true; call_user_func(self::$errorListener, $event, $value, $metadata); $handling_error = false; } } public static function adjustFilePath($path) { // Compute known library locations so we can emit relative paths if the // file resides inside a known library. This is a little cleaner to read, // and limits the number of false positives we get about full path // disclosure via HackerOne. $bootloader = PhutilBootloader::getInstance(); $libraries = $bootloader->getAllLibraries(); $roots = array(); foreach ($libraries as $library) { $root = $bootloader->getLibraryRoot($library); // For these libraries, the effective root is one level up. switch ($library) { case 'phutil': case 'arcanist': case 'phabricator': $root = dirname($root); break; } if (!strncmp($root, $path, strlen($root))) { return '<'.$library.'>'.substr($path, strlen($root)); } } return $path; } } diff --git a/src/error/PhutilProxyException.php b/src/error/PhutilProxyException.php index 2729628..8326959 100644 --- a/src/error/PhutilProxyException.php +++ b/src/error/PhutilProxyException.php @@ -1,33 +1,32 @@ previousException = $previous; if (version_compare(PHP_VERSION, '5.3.0', '>=')) { parent::__construct($message, $code, $previous); } else { parent::__construct($message, $code); } } public function getPreviousException() { // NOTE: This can not be named "getPrevious()" because that method is final // after PHP 5.3. Similarly, the property can not be named "previous" // because HPHP declares a property with the same name and "protected" // visibility. return $this->previousException; } } diff --git a/src/error/phlog.php b/src/error/phlog.php index f4669a0..5628262 100644 --- a/src/error/phlog.php +++ b/src/error/phlog.php @@ -1,65 +1,63 @@ $trace[0]['file'], 'line' => $trace[0]['line'], 'trace' => $trace, ); foreach (func_get_args() as $event) { PhutilErrorHandler::dispatchErrorMessage( $event instanceof Exception ? PhutilErrorHandler::EXCEPTION : PhutilErrorHandler::PHLOG, $event, $metadata); } return $value; } /** * Example @{class:PhutilErrorHandler} error listener callback. When you call * ##PhutilErrorHandler::setErrorListener()##, you must pass a callback function * with the same signature as this one. * * NOTE: @{class:PhutilErrorHandler} handles writing messages to the error * log, so you only need to provide a listener if you have some other console * (like Phabricator's DarkConsole) which you //also// want to send errors to. * * NOTE: You will receive errors which were silenced with the "@" operator. If * you don't want to display these, test for "@" being in effect by checking if * ##error_reporting() === 0## before displaying the error. * * @param const A PhutilErrorHandler constant, like PhutilErrorHandler::ERROR, * which indicates the event type (e.g. error, exception, * user message). * @param wild The event value, like the Exception object for an exception * event, an error string for an error event, or some user object * for user messages. * @param dict A dictionary of metadata about the event. The keys 'file', * 'line' and 'trace' are always available. Other keys may be * present, depending on the event type. * @return void - * @group error */ function phutil_error_listener_example($event, $value, array $metadata) { throw new Exception('This is just an example function!'); } diff --git a/src/events/PhutilEvent.php b/src/events/PhutilEvent.php index 12d98da..286cde0 100644 --- a/src/events/PhutilEvent.php +++ b/src/events/PhutilEvent.php @@ -1,40 +1,39 @@ type = $type; $this->data = $data; } public function getType() { return $this->type; } public function getValue($key, $default = null) { return idx($this->data, $key, $default); } public function setValue($key, $value) { $this->data[$key] = $value; return $this; } public function stop() { $this->stop = true; return $this; } public function isStopped() { return $this->stop; } } diff --git a/src/events/PhutilEventEngine.php b/src/events/PhutilEventEngine.php index 676111b..b72ebb6 100644 --- a/src/events/PhutilEventEngine.php +++ b/src/events/PhutilEventEngine.php @@ -1,78 +1,75 @@ } public static function getInstance() { if (!self::$instance) { self::$instance = new PhutilEventEngine(); } return self::$instance; } public function addListener(PhutilEventListener $listener, $type) { $this->listeners[$type][] = $listener; return $this; } /** * Get all the objects currently listening to any event. */ public function getAllListeners() { $listeners = array_mergev($this->listeners); $listeners = mpull($listeners, null, 'getListenerID'); return $listeners; } public static function dispatchEvent(PhutilEvent $event) { $instance = self::getInstance(); $listeners = idx($instance->listeners, $event->getType(), array()); $global_listeners = idx( $instance->listeners, PhutilEventType::TYPE_ALL, array()); // Merge and deduplicate listeners (we want to send the event to each // listener only once, even if it satisfies multiple criteria for the // event). $listeners = array_merge($listeners, $global_listeners); $listeners = mpull($listeners, null, 'getListenerID'); $profiler = PhutilServiceProfiler::getInstance(); $profiler_id = $profiler->beginServiceCall( array( 'type' => 'event', 'kind' => $event->getType(), 'count' => count($listeners), )); $caught = null; try { foreach ($listeners as $listener) { if ($event->isStopped()) { // Do this first so if someone tries to dispatch a stopped event it // doesn't go anywhere. Silly but less surprising. break; } $listener->handleEvent($event); } } catch (Exception $ex) { $profiler->endServiceCall($profiler_id, array()); throw $ex; } $profiler->endServiceCall($profiler_id, array()); } } diff --git a/src/events/PhutilEventListener.php b/src/events/PhutilEventListener.php index 15bbe3f..1392a08 100644 --- a/src/events/PhutilEventListener.php +++ b/src/events/PhutilEventListener.php @@ -1,41 +1,40 @@ } abstract public function register(); abstract public function handleEvent(PhutilEvent $event); final public function listen($type) { $engine = PhutilEventEngine::getInstance(); $engine->addListener($this, $type); } /** * Return a scalar ID unique to this listener. This is used to deduplicate * listeners which match events on multiple rules, so they are invoked only * once. * * @return int A scalar unique to this object instance. */ final public function getListenerID() { if (!$this->listenerID) { $this->listenerID = self::$nextListenerID; self::$nextListenerID++; } return $this->listenerID; } } diff --git a/src/events/constant/PhutilEventConstants.php b/src/events/constant/PhutilEventConstants.php index 64b9726..14ab240 100644 --- a/src/events/constant/PhutilEventConstants.php +++ b/src/events/constant/PhutilEventConstants.php @@ -1,8 +1,3 @@ withType('f') * ->withSuffix('php') * ->find(); * * @task create Creating a File Query * @task config Configuring File Queries * @task exec Executing the File Query * @task internal Internal - * @group filesystem */ final class FileFinder { private $root; private $exclude = array(); private $paths = array(); private $name = array(); private $suffix = array(); private $type; private $generateChecksums = false; private $followSymlinks; private $forceMode; /** * Create a new FileFinder. * * @param string Root directory to find files beneath. * @return this * @task create */ public function __construct($root) { $this->root = rtrim($root, '/'); } /** * @task config */ public function excludePath($path) { $this->exclude[] = $path; return $this; } /** * @task config */ public function withName($name) { $this->name[] = $name; return $this; } /** * @task config */ public function withSuffix($suffix) { $this->suffix[] = '*.'.$suffix; return $this; } /** * @task config */ public function withPath($path) { $this->paths[] = $path; return $this; } /** * @task config */ public function withType($type) { $this->type = $type; return $this; } /** * @task config */ public function withFollowSymlinks($follow) { $this->followSymlinks = $follow; return $this; } /** * @task config */ public function setGenerateChecksums($generate) { $this->generateChecksums = $generate; return $this; } /** * @task config * @param string Either "php", "shell", or the empty string. */ public function setForceMode($mode) { $this->forceMode = $mode; return $this; } /** * @task internal */ public function validateFile($file) { $matches = !count($this->name) && !count($this->suffix); foreach ($this->name as $curr_name) { if (basename($file) === $curr_name) { $matches = true; break; } } foreach ($this->suffix as $curr_suffix) { if (fnmatch($curr_suffix, $file)) { $matches = true; break; } } if (!$matches) { return false; } $matches = (count($this->paths) == 0); foreach ($this->paths as $path) { if (fnmatch($path, $this->root.'/'.$file)) { $matches = true; break; } } $fullpath = $this->root.'/'.ltrim($file, '/'); if (($this->type == 'f' && is_dir($fullpath)) || ($this->type == 'd' && !is_dir($fullpath))) { $matches = false; } return $matches; } /** * @task internal */ private function getFiles($dir) { $found = Filesystem::listDirectory($this->root.'/'.$dir, true); $files = array(); if (strlen($dir) > 0) { $dir = rtrim($dir, '/').'/'; } foreach ($found as $filename) { // Only exclude files whose names match relative to the root. if ($dir == '') { $matches = true; foreach ($this->exclude as $exclude_path) { if (fnmatch(ltrim($exclude_path, './'), $dir.$filename)) { $matches = false; break; } } if (!$matches) { continue; } } if ($this->validateFile($dir.$filename)) { $files[] = $dir.$filename; } if (is_dir($this->root.'/'.$dir.$filename)) { foreach ($this->getFiles($dir.$filename) as $file) { $files[] = $file; } } } return $files; } /** * @task exec */ public function find() { $files = array(); if (!is_dir($this->root) || !is_readable($this->root)) { throw new Exception( "Invalid FileFinder root directory specified ('{$this->root}'). ". "Root directory must be a directory, be readable, and be specified ". "with an absolute path."); } if ($this->forceMode == 'shell') { $php_mode = false; } else if ($this->forceMode == 'php') { $php_mode = true; } else { $php_mode = (phutil_is_windows() || !Filesystem::binaryExists('find')); } if ($php_mode) { $files = $this->getFiles(''); } else { $args = array(); $command = array(); $command[] = 'find'; if ($this->followSymlinks) { $command[] = '-L'; } $command[] = '.'; if ($this->exclude) { $command[] = $this->generateList('path', $this->exclude).' -prune'; $command[] = '-o'; } if ($this->type) { $command[] = '-type %s'; $args[] = $this->type; } if ($this->name || $this->suffix) { $command[] = $this->generateList('name', array_merge( $this->name, $this->suffix)); } if ($this->paths) { $command[] = $this->generateList('path', $this->paths); } $command[] = '-print0'; array_unshift($args, implode(' ', $command)); list($stdout) = newv('ExecFuture', $args) ->setCWD($this->root) ->resolvex(); $stdout = trim($stdout); if (!strlen($stdout)) { return array(); } $files = explode("\0", $stdout); // On OSX/BSD, find prepends a './' to each file. for ($i = 0; $i < count($files); $i++) { if (substr($files[$i], 0, 2) == './') { $files[$i] = substr($files[$i], 2); } } } if (!$this->generateChecksums) { return $files; } else { $map = array(); foreach ($files as $line) { $fullpath = $this->root.'/'.ltrim($line, '/'); if (is_dir($fullpath)) { $map[$line] = null; } else { $map[$line] = md5_file($fullpath); } } return $map; } } /** * @task internal */ private function generateList($flag, array $items) { $items = array_map('escapeshellarg', $items); foreach ($items as $key => $item) { $items[$key] = '-'.$flag.' '.$item; } $items = implode(' -o ', $items); return '"(" '.$items.' ")"'; } } diff --git a/src/filesystem/FileList.php b/src/filesystem/FileList.php index 192b980..ae5e687 100644 --- a/src/filesystem/FileList.php +++ b/src/filesystem/FileList.php @@ -1,93 +1,92 @@ contains($file)) { * do_something_to_this($file); * } * } * * This sort of construction will allow the user to type "src" in order * to indicate 'all relevant files underneath "src/"'. * * @task create Creating a File List * @task test Testing File Lists - * @group filesystem */ final class FileList { private $files = array(); private $dirs = array(); /** * Build a new FileList from an array of paths, e.g. from $argv. * * @param list List of relative or absolute file paths. * @return this * @task create */ public function __construct($paths) { foreach ($paths as $path) { $path = Filesystem::resolvePath($path); if (is_dir($path)) { $path = rtrim($path, DIRECTORY_SEPARATOR).DIRECTORY_SEPARATOR; $this->dirs[$path] = true; } $this->files[] = $path; } } /** * Determine if a path is one of the paths in the list. Note that an empty * file list is considered to contain every file. * * @param string Relative or absolute system file path. * @param bool If true, consider the path to be contained in the list if * the list contains a parent directory. If false, require * that the path be part of the list explicitly. * @return bool If true, the file is in the list. * @task test */ public function contains($path, $allow_parent_directory = true) { if ($this->isEmpty()) { return true; } $path = Filesystem::resolvePath($path); if (is_dir($path)) { $path .= DIRECTORY_SEPARATOR; } foreach ($this->files as $file) { if ($file == $path) { return true; } if ($allow_parent_directory) { $len = strlen($file); if (isset($this->dirs[$file]) && !strncmp($file, $path, $len)) { return true; } } } return false; } /** * Check if the file list is empty -- that is, it contains no files. * * @return bool If true, the list is empty. * @task test */ public function isEmpty() { return !$this->files; } } diff --git a/src/filesystem/Filesystem.php b/src/filesystem/Filesystem.php index 8832a1b..b11ccd3 100644 --- a/src/filesystem/Filesystem.php +++ b/src/filesystem/Filesystem.php @@ -1,1066 +1,1063 @@ > 3]; } return $result; } /** * Identify the MIME type of a file. This returns only the MIME type (like * text/plain), not the encoding (like charset=utf-8). * * @param string Path to the file to examine. * @param string Optional default mime type to return if the file's mime * type can not be identified. * @return string File mime type. * * @task file * * @phutil-external-symbol function mime_content_type * @phutil-external-symbol function finfo_open * @phutil-external-symbol function finfo_file */ public static function getMimeType( $path, $default = 'application/octet-stream') { $path = self::resolvePath($path); self::assertExists($path); self::assertIsFile($path); self::assertReadable($path); $mime_type = null; // Fileinfo is the best approach since it doesn't rely on `file`, but // it isn't builtin for older versions of PHP. if (function_exists('finfo_open')) { $finfo = finfo_open(FILEINFO_MIME); if ($finfo) { $result = finfo_file($finfo, $path); if ($result !== false) { $mime_type = $result; } } } // If we failed Fileinfo, try `file`. This works well but not all systems // have the binary. if ($mime_type === null) { list($err, $stdout) = exec_manual( 'file --brief --mime %s', $path); if (!$err) { $mime_type = trim($stdout); } } // If we didn't get anywhere, try the deprecated mime_content_type() // function. if ($mime_type === null) { if (function_exists('mime_content_type')) { $result = mime_content_type($path); if ($result !== false) { $mime_type = $result; } } } // If we come back with an encoding, strip it off. if (strpos($mime_type, ';') !== false) { list($type, $encoding) = explode(';', $mime_type, 2); $mime_type = $type; } if ($mime_type === null) { $mime_type = $default; } return $mime_type; } /* -( Directories )-------------------------------------------------------- */ /** * Create a directory in a manner similar to mkdir(), but throw detailed * exceptions on failure. * * @param string Path to directory. The parent directory must exist and * be writable. * @param int Permission umask. Note that umask is in octal, so you * should specify it as, e.g., `0777', not `777'. * @param boolean Recursively create directories. Default to false. * @return string Path to the created directory. * * @task directory */ public static function createDirectory($path, $umask = 0755, $recursive = false) { $path = self::resolvePath($path); if (is_dir($path)) { if ($umask) { Filesystem::changePermissions($path, $umask); } return $path; } $dir = dirname($path); if ($recursive && !file_exists($dir)) { // Note: We could do this with the recursive third parameter of mkdir(), // but then we loose the helpful FilesystemExceptions we normally get. self::createDirectory($dir, $umask, true); } self::assertIsDirectory($dir); self::assertExists($dir); self::assertWritable($dir); self::assertNotExists($path); if (!mkdir($path, $umask)) { throw new FilesystemException( $path, "Failed to create directory `{$path}'."); } // Need to change premissions explicitly because mkdir does something // slightly different. mkdir(2) man page: // 'The parameter mode specifies the permissions to use. It is modified by // the process's umask in the usual way: the permissions of the created // directory are (mode & ~umask & 0777)."' if ($umask) { Filesystem::changePermissions($path, $umask); } return $path; } /** * Create a temporary directory and return the path to it. You are * responsible for removing it (e.g., with Filesystem::remove()) * when you are done with it. * * @param string Optional directory prefix. * @param int Permissions to create the directory with. By default, * these permissions are very restrictive (0700). * @return string Path to newly created temporary directory. * * @task directory */ public static function createTemporaryDirectory($prefix = '', $umask = 0700) { $prefix = preg_replace('/[^A-Z0-9._-]+/i', '', $prefix); $tmp = sys_get_temp_dir(); if (!$tmp) { throw new FilesystemException( $tmp, 'Unable to determine system temporary directory.'); } $base = $tmp.DIRECTORY_SEPARATOR.$prefix; $tries = 3; do { $dir = $base.substr(base_convert(md5(mt_rand()), 16, 36), 0, 16); try { self::createDirectory($dir, $umask); break; } catch (FilesystemException $ex) { // Ignore. } } while (--$tries); if (!$tries) { $df = disk_free_space($tmp); if ($df !== false && $df < 1024 * 1024) { throw new FilesystemException( $dir, pht('Failed to create a temporary directory: the disk is full.')); } throw new FilesystemException( $dir, pht("Failed to create a temporary directory in '%s'.", $tmp)); } return $dir; } /** * List files in a directory. * * @param string Path, absolute or relative to PWD. * @param bool If false, exclude files beginning with a ".". * * @return array List of files and directories in the specified * directory, excluding `.' and `..'. * * @task directory */ public static function listDirectory($path, $include_hidden = true) { $path = self::resolvePath($path); self::assertExists($path); self::assertIsDirectory($path); self::assertReadable($path); $list = @scandir($path); if ($list === false) { throw new FilesystemException( $path, "Unable to list contents of directory `{$path}'."); } foreach ($list as $k => $v) { if ($v == '.' || $v == '..' || (!$include_hidden && $v[0] == '.')) { unset($list[$k]); } } return array_values($list); } /** * Return all directories between a path and "/". Iterating over them walks * from the path to the root. * * @param string Path, absolute or relative to PWD. * @return list List of parent paths, including the provided path. * @task directory */ public static function walkToRoot($path) { $path = self::resolvePath($path); if (is_link($path)) { $path = realpath($path); } $walk = array(); $parts = explode(DIRECTORY_SEPARATOR, $path); foreach ($parts as $k => $part) { if (!strlen($part)) { unset($parts[$k]); } } do { if (phutil_is_windows()) { $walk[] = implode(DIRECTORY_SEPARATOR, $parts); } else { $walk[] = DIRECTORY_SEPARATOR.implode(DIRECTORY_SEPARATOR, $parts); } if (empty($parts)) { break; } array_pop($parts); } while (true); return $walk; } /* -( Paths )-------------------------------------------------------------- */ /** * Canonicalize a path by resolving it relative to some directory (by * default PWD), following parent symlinks and removing artifacts. If the * path is itself a symlink it is left unresolved. * * @param string Path, absolute or relative to PWD. * @return string Canonical, absolute path. * * @task path */ public static function resolvePath($path, $relative_to = null) { if (phutil_is_windows()) { $is_absolute = preg_match('/^[A-Za-z]+:/', $path); } else { $is_absolute = !strncmp($path, DIRECTORY_SEPARATOR, 1); } if (!$is_absolute) { if (!$relative_to) { $relative_to = getcwd(); } $path = $relative_to.DIRECTORY_SEPARATOR.$path; } if (is_link($path)) { $parent_realpath = realpath(dirname($path)); if ($parent_realpath !== false) { return $parent_realpath.DIRECTORY_SEPARATOR.basename($path); } } $realpath = realpath($path); if ($realpath !== false) { return $realpath; } // This won't work if the file doesn't exist or is on an unreadable mount // or something crazy like that. Try to resolve a parent so we at least // cover the nonexistent file case. $parts = explode(DIRECTORY_SEPARATOR, trim($path, DIRECTORY_SEPARATOR)); while (end($parts) !== false) { array_pop($parts); if (phutil_is_windows()) { $attempt = implode(DIRECTORY_SEPARATOR, $parts); } else { $attempt = DIRECTORY_SEPARATOR.implode(DIRECTORY_SEPARATOR, $parts); } $realpath = realpath($attempt); if ($realpath !== false) { $path = $realpath.substr($path, strlen($attempt)); break; } } return $path; } /** * Test whether a path is descendant from some root path after resolving all * symlinks and removing artifacts. Both paths must exists for the relation * to obtain. A path is always a descendant of itself as long as it exists. * * @param string Child path, absolute or relative to PWD. * @param string Root path, absolute or relative to PWD. * @return bool True if resolved child path is in fact a descendant of * resolved root path and both exist. * @task path */ public static function isDescendant($path, $root) { try { self::assertExists($path); self::assertExists($root); } catch (FilesystemException $e) { return false; } $fs = new FileList(array($root)); return $fs->contains($path); } /** * Convert a canonical path to its most human-readable format. It is * guaranteed that you can use resolvePath() to restore a path to its * canonical format. * * @param string Path, absolute or relative to PWD. * @param string Optionally, working directory to make files readable * relative to. * @return string Human-readable path. * * @task path */ public static function readablePath($path, $pwd = null) { if ($pwd === null) { $pwd = getcwd(); } foreach (array($pwd, self::resolvePath($pwd)) as $parent) { $parent = rtrim($parent, DIRECTORY_SEPARATOR).DIRECTORY_SEPARATOR; $len = strlen($parent); if (!strncmp($parent, $path, $len)) { $path = substr($path, $len); return $path; } } return $path; } /** * Determine whether or not a path exists in the filesystem. This differs from * file_exists() in that it returns true for symlinks. This method does not * attempt to resolve paths before testing them. * * @param string Test for the existence of this path. * @return bool True if the path exists in the filesystem. * @task path */ public static function pathExists($path) { return file_exists($path) || is_link($path); } /** * Determine if an executable binary (like `git` or `svn`) exists within * the configured `$PATH`. * * @param string Binary name, like `'git'` or `'svn'`. * @return bool True if the binary exists and is executable. * @task exec */ public static function binaryExists($binary) { return self::resolveBinary($binary) !== null; } /** * Locates the full path that an executable binary (like `git` or `svn`) is at * the configured `$PATH`. * * @param string Binary name, like `'git'` or `'svn'`. * @return string The full binary path if it is present, or null. * @task exec */ public static function resolveBinary($binary) { if (phutil_is_windows()) { list($err, $stdout) = exec_manual('where %s', $binary); $stdout = phutil_split_lines($stdout); // If `where %s` could not find anything, check for relative binary if ($err) { $path = Filesystem::resolvePath($binary); if (Filesystem::pathExists($path)) { return $path; } return null; } $stdout = head($stdout); } else { list($err, $stdout) = exec_manual('which %s', $binary); } return $err === 0 ? trim($stdout) : null; } /** * Determine if two paths are equivalent by resolving symlinks. This is * different from resolving both paths and comparing them because * resolvePath() only resolves symlinks in parent directories, not the * path itself. * * @param string First path to test for equivalence. * @param string Second path to test for equivalence. * @return bool True if both paths are equivalent, i.e. reference the same * entity in the filesystem. * @task path */ public static function pathsAreEquivalent($u, $v) { $u = Filesystem::resolvePath($u); $v = Filesystem::resolvePath($v); $real_u = realpath($u); $real_v = realpath($v); if ($real_u) { $u = $real_u; } if ($real_v) { $v = $real_v; } return ($u == $v); } /* -( Assert )------------------------------------------------------------- */ /** * Assert that something (e.g., a file, directory, or symlink) exists at a * specified location. * * @param string Assert that this path exists. * @return void * * @task assert */ public static function assertExists($path) { if (!self::pathExists($path)) { throw new FilesystemException( $path, "Filesystem entity `{$path}' does not exist."); } } /** * Assert that nothing exists at a specified location. * * @param string Assert that this path does not exist. * @return void * * @task assert */ public static function assertNotExists($path) { if (file_exists($path) || is_link($path)) { throw new FilesystemException( $path, "Path `{$path}' already exists!"); } } /** * Assert that a path represents a file, strictly (i.e., not a directory). * * @param string Assert that this path is a file. * @return void * * @task assert */ public static function assertIsFile($path) { if (!is_file($path)) { throw new FilesystemException( $path, "Requested path `{$path}' is not a file."); } } /** * Assert that a path represents a directory, strictly (i.e., not a file). * * @param string Assert that this path is a directory. * @return void * * @task assert */ public static function assertIsDirectory($path) { if (!is_dir($path)) { throw new FilesystemException( $path, "Requested path `{$path}' is not a directory."); } } /** * Assert that a file or directory exists and is writable. * * @param string Assert that this path is writable. * @return void * * @task assert */ public static function assertWritable($path) { if (!is_writable($path)) { throw new FilesystemException( $path, "Requested path `{$path}' is not writable."); } } /** * Assert that a file or directory exists and is readable. * * @param string Assert that this path is readable. * @return void * * @task assert */ public static function assertReadable($path) { if (!is_readable($path)) { throw new FilesystemException( $path, "Path `{$path}' is not readable."); } } } diff --git a/src/filesystem/FilesystemException.php b/src/filesystem/FilesystemException.php index 5f417ca..cc464ff 100644 --- a/src/filesystem/FilesystemException.php +++ b/src/filesystem/FilesystemException.php @@ -1,36 +1,34 @@ path = $path; parent::__construct($message); } /** * Retrieve the path associated with the exception. Generally, this is * something like a path that couldn't be read or written, or a path that * was expected to exist but didn't. * * @return string Path associated with the exception. */ public function getPath() { return $this->path; } } diff --git a/src/filesystem/PhutilDeferredLog.php b/src/filesystem/PhutilDeferredLog.php index fec8d5a..f54141a 100644 --- a/src/filesystem/PhutilDeferredLog.php +++ b/src/filesystem/PhutilDeferredLog.php @@ -1,236 +1,235 @@ setData( * array( * 'T' => date('c'), * 'u' => $username, * )); * * The log will be appended when the object's destructor is called, or when you * invoke @{method:write}. Note that programs can exit without invoking object * destructors (e.g., in the case of an unhandled exception, memory exhaustion, * or SIGKILL) so writes are not guaranteed. You can call @{method:write} to * force an explicit write to disk before the destructor is called. * * Log variables will be written with bytes 0x00-0x1F, 0x7F-0xFF, and backslash * escaped using C-style escaping. Since this range includes tab, you can use * tabs as field separators to ensure the file format is easily parsable. In * PHP, you can decode this encoding with `stripcslashes`. * * If a variable is included in the log format but a value is never provided * with @{method:setData}, it will be written as "-". * * @task log Logging * @task write Writing the Log * @task internal Internals - * @group filesystem */ final class PhutilDeferredLog { private $file; private $format; private $data; private $didWrite; private $failQuietly; /* -( Logging )------------------------------------------------------------ */ /** * Create a new log entry, which will be written later. The format string * should use "%x"-style placeholders to represent data which will be added * later: * * $log = new PhutilDeferredLog('/some/file.log', '[%T] %u'); * * @param string|null The file the entry should be written to, or null to * create a log object which does not write anywhere. * @param string The log entry format. * @task log */ public function __construct($file, $format) { $this->file = $file; $this->format = $format; $this->data = array(); $this->didWrite = false; } /** * Add data to the log. Provide a map of variables to replace in the format * string. For example, if you use a format string like: * * "[%T]\t%u" * * ...you might add data like this: * * $log->setData( * array( * 'T' => date('c'), * 'u' => $username, * )); * * When the log is written, the "%T" and "%u" variables will be replaced with * the values you provide. * * @param dict Map of variables to values. * @return this * @task log */ public function setData(array $map) { $this->data = $map + $this->data; return $this; } /** * Get existing log data. * * @param string Log data key. * @param wild Default to return if data does not exist. * @return wild Data, or default if data does not exist. * @task log */ public function getData($key, $default = null) { return idx($this->data, $key, $default); } /** * Set the path where the log will be written. You can pass `null` to prevent * the log from writing. * * NOTE: You can not change the file after the log writes. * * @param string|null File where the entry should be written to, or null to * prevent writes. * @return this * @task log */ public function setFile($file) { if ($this->didWrite) { throw new Exception( 'You can not change the logfile after a write has occurred!'); } $this->file = $file; return $this; } public function getFile() { return $this->file; } /** * Set quiet (logged) failure, instead of the default loud (exception) * failure. Throwing exceptions from destructors which exit at the end of a * request can result in difficult-to-debug behavior. */ public function setFailQuietly($fail_quietly) { $this->failQuietly = $fail_quietly; return $this; } /* -( Writing the Log )---------------------------------------------------- */ /** * When the log object is destroyed, it writes if it hasn't written yet. * @task write */ public function __destruct() { $this->write(); } /** * Write the log explicitly, if it hasn't been written yet. Normally you do * not need to call this method; it will be called when the log object is * destroyed. However, you can explicitly force the write earlier by calling * this method. * * A log object will never write more than once, so it is safe to call this * method even if the object's destructor later runs. * * @return this * @task write */ public function write() { if ($this->didWrite) { return $this; } // Even if we aren't going to write, format the line to catch any errors // and invoke possible __toString() calls. $line = $this->format(); if ($this->file !== null) { $ok = @file_put_contents( $this->file, $line, FILE_APPEND | LOCK_EX); if ($ok === false) { $message = "Unable to write to logfile '{$this->file}'!"; if ($this->failQuietly) { phlog($message); } else { throw new Exception($message); } } } $this->didWrite = true; return $this; } /* -( Internals )---------------------------------------------------------- */ /** * Format the log string, replacing "%x" variables with values. * * @return string Finalized, log string for writing to disk. * @task internals */ private function format() { // Always convert '%%' to literal '%'. $map = array('%' => '%') + $this->data; $result = ''; $saw_percent = false; foreach (phutil_utf8v($this->format) as $c) { if ($saw_percent) { $saw_percent = false; if (array_key_exists($c, $map)) { $result .= addcslashes($map[$c], "\0..\37\\\177..\377"); } else { $result .= '-'; } } else if ($c == '%') { $saw_percent = true; } else { $result .= $c; } } return rtrim($result)."\n"; } } diff --git a/src/filesystem/PhutilDirectoryFixture.php b/src/filesystem/PhutilDirectoryFixture.php index 0767af0..e1e36a6 100644 --- a/src/filesystem/PhutilDirectoryFixture.php +++ b/src/filesystem/PhutilDirectoryFixture.php @@ -1,53 +1,50 @@ getPath(), Filesystem::resolvePath($archive)); return $obj; } public static function newEmptyFixture() { $obj = new PhutilDirectoryFixture(); $obj->path = Filesystem::createTemporaryDirectory(); return $obj; } private function __construct() { // } public function __destruct() { Filesystem::remove($this->path); } public function getPath($to_file = null) { return $this->path.'/'.ltrim($to_file, '/'); } public function saveToArchive($path) { $tmp = new TempFile(); execx( 'tar -C %s -czvvf %s .', $this->getPath(), $tmp); $ok = rename($tmp, Filesystem::resolvePath($path)); if (!$ok) { throw new FilesystemException($path, 'Failed to overwrite file.'); } return $this; } } diff --git a/src/filesystem/PhutilFileLock.php b/src/filesystem/PhutilFileLock.php index 812372c..13733bb 100644 --- a/src/filesystem/PhutilFileLock.php +++ b/src/filesystem/PhutilFileLock.php @@ -1,120 +1,119 @@ lock(); * * do_contentious_things(); * * $lock->unlock(); * * For more information on locks, see @{class:PhutilLock}. * * @task construct Constructing Locks * @task impl Implementation - * - * @group filesystem */ final class PhutilFileLock extends PhutilLock { private $lockfile; private $handle; /* -( Constructing Locks )------------------------------------------------- */ /** * Create a new lock on a lockfile. The file need not exist yet. * * @param string The lockfile to use. * @return PhutilFileLock New lock object. * * @task construct */ public static function newForPath($lockfile) { $lockfile = Filesystem::resolvePath($lockfile); $name = 'file:'.$lockfile; $lock = self::getLock($name); if (!$lock) { $lock = new PhutilFileLock($name); $lock->lockfile = $lockfile; self::registerLock($lock); } return $lock; } /* -( Locking )------------------------------------------------------------ */ /** * Acquire the lock. If lock acquisition fails because the lock is held by * another process, throws @{class:PhutilLockException}. Other exceptions * indicate that lock acquisition has failed for reasons unrelated to locking. * * If the lock is already held, this method throws. You can test the lock * status with @{method:isLocked}. * * @param float Seconds to block waiting for the lock. * @return void * * @task lock */ protected function doLock($wait) { $path = $this->lockfile; $handle = @fopen($path, 'a+'); if (!$handle) { throw new FilesystemException( $path, "Unable to open lock '{$path}' for writing!"); } $start_time = microtime(true); do { $would_block = null; $ok = flock($handle, LOCK_EX | LOCK_NB, $would_block); if ($ok) { break; } else { usleep(10000); } } while ($wait && $wait > (microtime(true) - $start_time)); if (!$ok) { fclose($handle); throw new PhutilLockException($this->getName()); } $this->handle = $handle; } /** * Release the lock. Throws an exception on failure, e.g. if the lock is not * currently held. * * @return void * * @task lock */ protected function doUnlock() { $ok = flock($this->handle, LOCK_UN | LOCK_NB); if (!$ok) { throw new Exception('Unable to unlock file!'); } $ok = fclose($this->handle); if (!$ok) { throw new Exception('Unable to close file!'); } $this->handle = null; } + } diff --git a/src/filesystem/PhutilLock.php b/src/filesystem/PhutilLock.php index cf6e6db..3b0095d 100644 --- a/src/filesystem/PhutilLock.php +++ b/src/filesystem/PhutilLock.php @@ -1,236 +1,234 @@ lock(); * do_contentious_things(); * $lock->unlock(); * * If the lock can't be acquired because it is already held, * @{class:PhutilLockException} is thrown. Other exceptions indicate * permanent failure unrelated to locking. * * When extending this class, you should call @{method:getLock} to look up * an existing lock object, and @{method:registerLock} when objects are * constructed to register for automatic unlock on shutdown. * * @task impl Lock Implementation * @task registry Lock Registry * @task construct Constructing Locks * @task status Determining Lock Status * @task lock Locking * @task internal Internals - * - * @group filesystem */ abstract class PhutilLock { private static $registeredShutdownFunction = false; private static $locks = array(); private $locked = false; private $profilerID; private $name; /* -( Constructing Locks )------------------------------------------------- */ /** * Build a new lock, given a lock name. The name should be globally unique * across all locks. * * @param string Globally unique lock name. * @task construct */ protected function __construct($name) { $this->name = $name; } /* -( Lock Implementation )------------------------------------------------ */ /** * Acquires the lock, or throws @{class:PhutilLockException} if it fails. * * @param float Seconds to block waiting for the lock. * @return void * @task impl */ abstract protected function doLock($wait); /** * Releases the lock. * * @return void * @task impl */ abstract protected function doUnlock(); /* -( Lock Registry )------------------------------------------------------ */ /** * Returns a globally unique name for this lock. * * @return string Globally unique lock name, across all locks. * @task registry */ final public function getName() { return $this->name; } /** * Get a named lock, if it has been registered. * * @param string Lock name. * @task registry */ protected static function getLock($name) { return idx(self::$locks, $name); } /** * Register a lock for cleanup when the process exits. * * @param PhutilLock Lock to register. * @task registry */ protected static function registerLock(PhutilLock $lock) { if (!self::$registeredShutdownFunction) { register_shutdown_function(array('PhutilLock', 'unlockAll')); self::$registeredShutdownFunction = true; } $name = $lock->getName(); if (self::getLock($name)) { throw new Exception("Lock '{$name}' is already registered!"); } self::$locks[$name] = $lock; } /* -( Determining Lock Status )-------------------------------------------- */ /** * Determine if the lock is currently held. * * @return bool True if the lock is held. * * @task status */ final public function isLocked() { return $this->locked; } /* -( Locking )------------------------------------------------------------ */ /** * Acquire the lock. If lock acquisition fails because the lock is held by * another process, throws @{class:PhutilLockException}. Other exceptions * indicate that lock acquisition has failed for reasons unrelated to locking. * * If the lock is already held by this process, this method throws. You can * test the lock status with @{method:isLocked}. * * @param float Seconds to block waiting for the lock. By default, do not * block. * @return this * * @task lock */ final public function lock($wait = 0) { if ($this->locked) { $name = $this->getName(); throw new Exception( "Lock '{$name}' has already been locked by this process."); } $profiler = PhutilServiceProfiler::getInstance(); $profiler_id = $profiler->beginServiceCall( array( 'type' => 'lock', 'name' => $this->getName(), )); try { $this->doLock((float)$wait); } catch (Exception $ex) { $profiler->endServiceCall( $profiler_id, array( 'lock' => false, )); throw $ex; } $this->profilerID = $profiler_id; $this->locked = true; return $this; } /** * Release the lock. Throws an exception on failure, e.g. if the lock is not * currently held. * * @return this * * @task lock */ final public function unlock() { if (!$this->locked) { $name = $this->getName(); throw new Exception( "Lock '{$name} is not locked by this process!"); } $this->doUnlock(); $profiler = PhutilServiceProfiler::getInstance(); $profiler->endServiceCall( $this->profilerID, array( 'lock' => true, )); $this->profilerID = null; $this->locked = false; return $this; } /* -( Internals )---------------------------------------------------------- */ /** * On shutdown, we release all the locks. You should not call this method * directly. Use @{method:unlock} to release individual locks. * * @return void * * @task internal */ public static function unlockAll() { foreach (self::$locks as $key => $lock) { if ($lock->locked) { $lock->unlock(); } } } } diff --git a/src/filesystem/TempFile.php b/src/filesystem/TempFile.php index c274a52..536c066 100644 --- a/src/filesystem/TempFile.php +++ b/src/filesystem/TempFile.php @@ -1,99 +1,97 @@ dir = Filesystem::createTemporaryDirectory(); if ($filename === null) { $this->file = tempnam($this->dir, getmypid().'-'); } else { $this->file = $this->dir.'/'.$filename; } Filesystem::writeFile($this, ''); } /* -( Configuration )------------------------------------------------------ */ /** * Normally, the file is deleted when this object passes out of scope. You * can set it to be preserved instead. * * @param bool True to preserve the file after object destruction. * @return this * @task config */ public function setPreserveFile($preserve) { $this->preserve = $preserve; return $this; } /* -( Internals )---------------------------------------------------------- */ /** * Get the path to the temporary file. Normally you can just use the object * in a string context. * * @return string Absolute path to the temporary file. * @task internal */ public function __toString() { return $this->file; } /** * When the object is destroyed, it destroys the temporary file. You can * change this behavior with @{method:setPreserveFile}. * * @task internal */ public function __destruct() { if ($this->preserve) { return; } Filesystem::remove($this->dir); // NOTE: tempnam() doesn't guarantee it will return a file inside the // directory you passed to the function, so we make sure to nuke the file // explicitly. Filesystem::remove($this->file); } } diff --git a/src/filesystem/__tests__/FileFinderTestCase.php b/src/filesystem/__tests__/FileFinderTestCase.php index 57171f1..9f440e0 100644 --- a/src/filesystem/__tests__/FileFinderTestCase.php +++ b/src/filesystem/__tests__/FileFinderTestCase.php @@ -1,141 +1,145 @@ excludePath('./exclude') ->excludePath('subdir.txt'); return $finder; } public function testFinderWithChecksums() { foreach (array('php', 'shell') as $mode) { $files = $this->getFinder() ->setGenerateChecksums(true) ->withType('f') ->withPath('*') ->withSuffix('txt') ->setForceMode($mode) ->find(); // Test whether correct files were found. $this->assertTrue(array_key_exists('test.txt', $files)); $this->assertTrue(array_key_exists('file.txt', $files)); $this->assertTrue( - array_key_exists('include_dir.txt/subdir.txt/alsoinclude.txt', - $files)); + array_key_exists( + 'include_dir.txt/subdir.txt/alsoinclude.txt', + $files)); $this->assertFalse(array_key_exists('test', $files)); $this->assertTrue(array_key_exists('.hidden.txt', $files)); $this->assertFalse(array_key_exists('exclude/file.txt', $files)); $this->assertFalse(array_key_exists('include_dir.txt', $files)); foreach ($files as $file => $checksum) { $this->assertFalse(is_dir($file)); } // Test checksums. - $this->assertEqual($files['test.txt'], - 'aea46212fa8b8d0e0e6aa34a15c9e2f5'); - $this->assertEqual($files['file.txt'], - '725130ba6441eadb4e5d807898e0beae'); - $this->assertEqual($files['.hidden.txt'], - 'b6cfc9ce9afe12b258ee1c19c235aa27'); - $this->assertEqual($files['include_dir.txt/subdir.txt/alsoinclude.txt'], - '91e5c1ad76ff229c6456ac92e74e1d9f'); + $this->assertEqual( + $files['test.txt'], + 'aea46212fa8b8d0e0e6aa34a15c9e2f5'); + $this->assertEqual( + $files['file.txt'], + '725130ba6441eadb4e5d807898e0beae'); + $this->assertEqual( + $files['.hidden.txt'], + 'b6cfc9ce9afe12b258ee1c19c235aa27'); + $this->assertEqual( + $files['include_dir.txt/subdir.txt/alsoinclude.txt'], + '91e5c1ad76ff229c6456ac92e74e1d9f'); } } public function testFinderWithoutChecksums() { foreach (array('php', 'shell') as $mode) { $files = $this->getFinder() ->withType('f') ->withPath('*') ->withSuffix('txt') ->setForceMode($mode) ->find(); // Test whether correct files were found. $this->assertTrue(in_array('test.txt', $files)); $this->assertTrue(in_array('file.txt', $files)); $this->assertTrue(in_array('.hidden.txt', $files)); $this->assertTrue( in_array('include_dir.txt/subdir.txt/alsoinclude.txt', $files)); $this->assertFalse(in_array('test', $files)); $this->assertFalse(in_array('exclude/file.txt', $files)); $this->assertFalse(in_array('include_dir.txt', $files)); foreach ($files as $file => $checksum) { $this->assertFalse(is_dir($file)); } } } public function testFinderWithDirectories() { foreach (array('php', 'shell') as $mode) { $files = $this->getFinder() ->setGenerateChecksums(true) ->withPath('*') ->withSuffix('txt') ->setForceMode($mode) ->find(); // Test whether the correct files were found. $this->assertTrue(array_key_exists('test.txt', $files)); $this->assertTrue(array_key_exists('file.txt', $files)); $this->assertTrue( - array_key_exists('include_dir.txt/subdir.txt/alsoinclude.txt', - $files)); + array_key_exists( + 'include_dir.txt/subdir.txt/alsoinclude.txt', + $files)); $this->assertFalse(array_key_exists('test', $files)); $this->assertTrue(array_key_exists('.hidden.txt', $files)); $this->assertFalse(array_key_exists('exclude/file.txt', $files)); $this->assertTrue(array_key_exists('include_dir.txt', $files)); // Test checksums. $this->assertEqual($files['test.txt'], 'aea46212fa8b8d0e0e6aa34a15c9e2f5'); $this->assertEqual($files['include_dir.txt'], null); } } public function testFinderWithPath() { foreach (array('php', 'shell') as $mode) { $files = $this->getFinder() ->setGenerateChecksums(true) ->withType('f') ->withPath('*/include_dir.txt/subdir.txt/alsoinclude.txt') ->withSuffix('txt') ->setForceMode($mode) ->find(); // Test whether the correct files were found. $this->assertTrue( - array_key_exists('include_dir.txt/subdir.txt/alsoinclude.txt', - $files)); + array_key_exists( + 'include_dir.txt/subdir.txt/alsoinclude.txt', + $files)); // Ensure that only the one file was found. $this->assertEqual(1, count($files)); } } public function testFinderWithNames() { foreach (array('php', 'shell') as $mode) { $files = $this->getFinder() ->withType('f') ->withPath('*') ->withName('test') ->setForceMode($mode) ->find(); // Test whether the correct files were found. $this->assertTrue(in_array('test', $files)); $this->assertFalse(in_array('exclude/test', $files)); $this->assertTrue(in_array('include_dir.txt/test', $files)); $this->assertTrue(in_array('include_dir.txt/subdir.txt/test', $files)); $this->assertEqual(3, count($files)); } } } diff --git a/src/filesystem/__tests__/FilesystemTestCase.php b/src/filesystem/__tests__/FilesystemTestCase.php index bd0b73f..2abcffd 100644 --- a/src/filesystem/__tests__/FilesystemTestCase.php +++ b/src/filesystem/__tests__/FilesystemTestCase.php @@ -1,82 +1,76 @@ assertEqual( true, Filesystem::binaryExists($exists)); // We don't expect to find this binary on any system. $this->assertEqual( false, Filesystem::binaryExists('halting-problem-decider')); } public function testResolveBinary() { - // Test to make sure resolveBinary() returns the full path to the `which` // and `where` binaries. if (phutil_is_windows()) { $binary = 'where'; } else { $binary = 'which'; } $path = Filesystem::resolveBinary($binary); $this->assertFalse(null === $path); $this->assertTrue(file_exists($path)); $this->assertFalse(is_dir($path)); $this->assertEqual(null, Filesystem::resolveBinary('halting-problem-decider')); } public function testWriteUniqueFile() { $tmp = new TempFile(); $dir = dirname($tmp); // Writing an empty file should work. $f = Filesystem::writeUniqueFile($dir, ''); $this->assertEqual('', Filesystem::readFile($f)); // File name should be unique. $g = Filesystem::writeUniqueFile($dir, 'quack'); $this->assertTrue($f != $g); } public function testReadRandomBytes() { $number_of_bytes = 1024; $data = Filesystem::readRandomBytes($number_of_bytes); $this->assertTrue(strlen($data) == $number_of_bytes); $data1 = Filesystem::readRandomBytes(128); $data2 = Filesystem::readRandomBytes(128); $this->assertFalse($data1 == $data2); $caught = null; try { Filesystem::readRandomBytes(0); } catch (Exception $ex) { $caught = $ex; } $this->assertTrue($caught instanceof Exception); - } } diff --git a/src/filesystem/__tests__/PhutilDeferredLogTestCase.php b/src/filesystem/__tests__/PhutilDeferredLogTestCase.php index dab0575..481a2a4 100644 --- a/src/filesystem/__tests__/PhutilDeferredLogTestCase.php +++ b/src/filesystem/__tests__/PhutilDeferredLogTestCase.php @@ -1,170 +1,166 @@ checkLog( "derp\n", 'derp', array()); $this->checkLog( "[20 Aug 1984] alincoln\n", '[%T] %u', array( 'T' => '20 Aug 1984', 'u' => 'alincoln', )); $this->checkLog( "%%%%%\n", '%%%%%%%%%%', array( '%' => '%', )); $this->checkLog( "\\000\\001\\002\n", '%a%b%c', array( 'a' => chr(0), 'b' => chr(1), 'c' => chr(2), )); $this->checkLog( "Download: 100%\n", 'Download: %C', array( 'C' => '100%', )); $this->checkLog( "- bee -\n", '%a %b %c', array( 'b' => 'bee', )); $this->checkLog( "\\\\\n", '%b', array( 'b' => '\\', )); $this->checkLog( "a\t\\t\n", "%a\t%b", array( 'a' => 'a', 'b' => "\t", )); $this->checkLog( "\1ab\n", "\1a%a", array( 'a' => 'b', )); $this->checkLog( "a % xb\n", '%a %% x%b', array( 'a' => 'a', 'b' => 'b', )); - } public function testLogWriteFailure() { $caught = null; try { if (phutil_is_hiphop_runtime()) { // In HipHop exceptions thrown in destructors are not normally // catchable, so call __destruct() explicitly. $log = new PhutilDeferredLog('/derp/derp/derp/derp/derp', 'derp'); $log->__destruct(); } else { new PhutilDeferredLog('/derp/derp/derp/derp/derp', 'derp'); } } catch (Exception $ex) { $caught = $ex; } $this->assertTrue($caught instanceof Exception); } public function testManyWriters() { $root = phutil_get_library_root('phutil').'/../'; $bin = $root.'scripts/test/deferred_log.php'; $n_writers = 3; $n_lines = 8; $tmp = new TempFile(); $futures = array(); for ($ii = 0; $ii < $n_writers; $ii++) { $futures[] = new ExecFuture('%s %d %s', $bin, $n_lines, (string)$tmp); } Futures($futures)->resolveAll(); $this->assertEqual( str_repeat("abcdefghijklmnopqrstuvwxyz\n", $n_writers * $n_lines), Filesystem::readFile($tmp)); } public function testNoWrite() { $tmp = new TempFile(); $log = new PhutilDeferredLog($tmp, 'xyz'); $log->setFile(null); unset($log); $this->assertEqual('', Filesystem::readFile($tmp), 'No Write'); } public function testDoubleWrite() { $tmp = new TempFile(); $log = new PhutilDeferredLog($tmp, 'xyz'); $log->write(); $log->write(); unset($log); $this->assertEqual("xyz\n", Filesystem::readFile($tmp), 'Double Write'); } public function testSetAfterWrite() { $tmp1 = new TempFile(); $tmp2 = new TempFile(); $log = new PhutilDeferredLog($tmp1, 'xyz'); $log->write(); $caught = null; try { $log->setFile($tmp2); } catch (Exception $ex) { $caught = $ex; } $this->assertTrue($caught instanceof Exception, 'Set After Write'); } private function checkLog($expect, $format, $data) { $tmp = new TempFile(); $log = new PhutilDeferredLog($tmp, $format); $log->setData($data); unset($log); $this->assertEqual($expect, Filesystem::readFile($tmp), $format); } } diff --git a/src/filesystem/__tests__/PhutilFileLockTestCase.php b/src/filesystem/__tests__/PhutilFileLockTestCase.php index ab53a17..b3d9ab6 100644 --- a/src/filesystem/__tests__/PhutilFileLockTestCase.php +++ b/src/filesystem/__tests__/PhutilFileLockTestCase.php @@ -1,194 +1,184 @@ assertTrue($this->lockTest($file)); $this->assertTrue($this->lockTest($file)); } public function testLockHolding() { - // When a process is holding a lock, other processes should be unable // to acquire it. $file = new TempFile(); $hold = $this->holdLock($file); $this->assertFalse($this->lockTest($file)); $hold->resolveKill(); $this->assertTrue($this->lockTest($file)); } public function testInProcessLocking() { - // Other processes should be unable to lock a file if we hold the lock. $file = new TempFile(); $lock = PhutilFileLock::newForPath($file); $lock->lock(); $this->assertFalse($this->lockTest($file)); $lock->unlock(); $this->assertTrue($this->lockTest($file)); } public function testInProcessHolding() { // We should be unable to lock a file if another process is holding the // lock. $file = new TempFile(); $lock = PhutilFileLock::newForPath($file); $hold = $this->holdLock($file); $caught = null; try { $lock->lock(); } catch (PhutilLockException $ex) { $caught = $ex; } $this->assertTrue($caught instanceof PhutilLockException); $hold->resolveKill(); $this->assertTrue($this->lockTest($file)); $lock->lock(); $lock->unlock(); } public function testRelock() { - // Trying to lock a file twice should throw an exception. $file = new TempFile(); $lock = PhutilFileLock::newForPath($file); $lock->lock(); $caught = null; try { $lock->lock(); } catch (Exception $ex) { $caught = $ex; } $this->assertTrue($caught instanceof Exception); } public function testExcessiveUnlock() { - // Trying to unlock a file twice should throw an exception. $file = new TempFile(); $lock = PhutilFileLock::newForPath($file); $lock->lock(); $lock->unlock(); $caught = null; try { $lock->unlock(); } catch (Exception $ex) { $caught = $ex; } $this->assertTrue($caught instanceof Exception); } public function testUnlockAll() { - // unlockAll() should release all locks. $file = new TempFile(); $lock = PhutilFileLock::newForPath($file); $lock->lock(); $this->assertFalse($this->lockTest($file)); PhutilFileLock::unlockAll(); $this->assertTrue($this->lockTest($file)); // Calling this again shouldn't do anything bad. PhutilFileLock::unlockAll(); $this->assertTrue($this->lockTest($file)); $lock->lock(); $lock->unlock(); } public function testIsLocked() { - // isLocked() should report lock status accurately. $file = new TempFile(); $lock = PhutilFileLock::newForPath($file); $this->assertFalse($lock->isLocked()); $lock->lock(); $this->assertTrue($lock->isLocked()); $lock->unlock(); $this->assertFalse($lock->isLocked()); } private function lockTest($file) { list($err) = $this->buildLockFuture('--test', $file)->resolve(); return ($err == 0); } private function holdLock($file) { $future = $this->buildLockFuture('--hold', $file); // We can't return until we're sure the subprocess has had time to acquire // the lock. Since actually testing for the lock would be kind of silly // and guarantee that we loop forever if the locking primitive broke, // watch stdout for a *claim* that it has acquired the lock instead. // Make sure we don't loop forever, no matter how bad things get. $future->setTimeout(30); $buf = ''; while (!$future->isReady()) { list($stdout) = $future->read(); $buf .= $stdout; if (strpos($buf, 'LOCK ACQUIRED') !== false) { return $future; } } throw new Exception('Unable to hold lock in external process!'); } private function buildLockFuture($flags, $file) { $root = dirname(phutil_get_library_root('phutil')); $bin = $root.'/scripts/utils/lock.php'; // NOTE: Use `exec` so this passes on Ubuntu, where the default `dash` shell // will eat any kills we send during the tests. $future = new ExecFuture('exec php %s %C %s', $bin, $flags, $file); $future->start(); return $future; } } diff --git a/src/filesystem/linesofalarge/LinesOfALarge.php b/src/filesystem/linesofalarge/LinesOfALarge.php index 8402912..630b852 100644 --- a/src/filesystem/linesofalarge/LinesOfALarge.php +++ b/src/filesystem/linesofalarge/LinesOfALarge.php @@ -1,211 +1,209 @@ delimiter = $character; return $this; } /* -( Internals )---------------------------------------------------------- */ /** * Hook, called before @{method:rewind()}. Allows a concrete implementation * to open resources or reset state. * * @return void * @task internals */ abstract protected function willRewind(); /** * Called when the iterator needs more data. The subclass should return more * data, or empty string to indicate end-of-stream. * * @return string Data, or empty string for end-of-stream. * @task internals */ abstract protected function readMore(); /* -( Iterator Interface )------------------------------------------------- */ /** * @task iterator */ final public function rewind() { $this->willRewind(); $this->buf = ''; $this->pos = 0; $this->num = 0; $this->eof = false; $this->valid = true; $this->next(); } /** * @task iterator */ final public function key() { return $this->num; } /** * @task iterator */ final public function current() { return $this->line; } /** * @task iterator */ final public function valid() { return $this->valid; } /** * @task iterator */ final public function next() { - // Consume the stream a chunk at a time into an internal buffer, then // read lines out of that buffer. This gives us flexibility (stream sources // only need to be able to read blocks of bytes) and performance (we can // read in reasonably-sized chunks of many lines), at the cost of some // complexity in buffer management. // We do this in a loop to avoid recursion when consuming more bytes, in // case the size of a line is very large compared to the chunk size we // read. while (true) { if (strlen($this->buf)) { // If we already have some data buffered, try to get the next line from // the buffer. Search through the buffer for a delimiter. This should be // the common case. $endl = strpos($this->buf, $this->delimiter, $this->pos); if ($endl !== false) { // We found a delimiter, so return the line it delimits. We leave // the buffer as-is so we don't need to reallocate it, in case it is // large relative to the size of a line. Instead, we move our cursor // within the buffer forward. $this->num++; $this->line = substr($this->buf, $this->pos, ($endl - $this->pos)); $this->pos = $endl + 1; return; } // We only have part of a line left in the buffer (no delimiter in the // remaining piece), so throw away the part we've already emitted and // continue below. $this->buf = substr($this->buf, $this->pos); $this->pos = 0; } // We weren't able to produce the next line from the bytes we already had // buffered, so read more bytes from the input stream. if ($this->eof) { // NOTE: We keep track of EOF (an empty read) so we don't make any more // reads afterward. Normally, we'll return from the first EOF read, // emit the line, and then next() will be called again. Without tracking // EOF, we'll attempt another read. A well-behaved implementation should // still return empty string, but we can protect against any issues // here by keeping a flag. $more = ''; } else { $more = $this->readMore(); } if (strlen($more)) { // We got some bytes, so add them to the buffer and then try again. $this->buf .= $more; continue; } else { // No more bytes. If we have a buffer, return its contents. We // potentially return part of a line here if the last line had no // delimiter, but that currently seems reasonable as a default // behaivor. If we don't have a buffer, we're done. $this->eof = true; if (strlen($this->buf)) { $this->num++; $this->line = $this->buf; $this->buf = null; } else { $this->valid = false; } break; } } } } diff --git a/src/filesystem/linesofalarge/LinesOfALargeExecFuture.php b/src/filesystem/linesofalarge/LinesOfALargeExecFuture.php index 994a5f3..5d941d6 100644 --- a/src/filesystem/linesofalarge/LinesOfALargeExecFuture.php +++ b/src/filesystem/linesofalarge/LinesOfALargeExecFuture.php @@ -1,118 +1,117 @@ future = $future; } /* -( Internals )---------------------------------------------------------- */ /** * On destruction, we terminate the subprocess if it hasn't exited already. * * @return void * @task internals */ public function __destruct() { if (!$this->future->isReady()) { $this->future->resolveKill(); } } /** - * The PHP foreach() construct calls rewind() once, so we allow the first - * rewind(), without effect. Subsequent rewinds mean misuse. + * The PHP `foreach()` construct calls rewind() once, so we allow the first + * `rewind()`, without effect. Subsequent rewinds mean misuse. * * @return void * @task internals */ protected function willRewind() { if ($this->didRewind) { throw new Exception( "You can not reiterate over a LinesOfALargeExecFuture object. The ". "entire goal of the construct is to avoid keeping output in memory. ". "What you are attempting to do is silly and doesn't make any sense."); } $this->didRewind = true; } /** * Read more data from the subprocess. * * @return string Bytes read from stdout. * @task internals */ protected function readMore() { $future = $this->future; while (true) { // Read is nonblocking, so we need to sit in this loop waiting for input // or we'll incorrectly signal EOF to the parent. $stdout = $future->readStdout(); $future->discardStdoutBuffer(); if (strlen($stdout)) { return $stdout; } // If we didn't read anything, we can exit the loop if the subprocess // has exited. if ($future->isReady()) { - // Throw if the process exits with a nozero status code. This makes + // Throw if the process exits with a nonzero status code. This makes // error handling simpler, and prevents us from returning part of a line // if the process terminates mid-output. $future->resolvex(); // Read and return anything that's left. $stdout = $future->readStdout(); $future->discardStdoutBuffer(); return $stdout; } } } } diff --git a/src/filesystem/linesofalarge/LinesOfALargeFile.php b/src/filesystem/linesofalarge/LinesOfALargeFile.php index d78373f..e0b72c0 100644 --- a/src/filesystem/linesofalarge/LinesOfALargeFile.php +++ b/src/filesystem/linesofalarge/LinesOfALargeFile.php @@ -1,108 +1,106 @@ fileName = Filesystem::resolvePath((string)$file_name); } /* -( Internals )---------------------------------------------------------- */ /** * Closes the file handle. * * @return void * @task internals */ public function __destruct() { $this->closeHandle(); } /** * Close the file handle, if it is open. * * @return $this * @task internals */ private function closeHandle() { if ($this->handle) { fclose($this->handle); $this->handle = null; } return $this; } /** * Closes the file handle if it is open, and reopens it. * * @return void * @task internals */ protected function willRewind() { $this->closeHandle(); $this->handle = @fopen($this->fileName, 'r'); if (!$this->handle) { throw new FilesystemException( $this->fileName, 'Failed to open file!'); } } /** * Read the file chunk-by-chunk. * * @return string Next chunk of the file. * @task internals */ public function readMore() { - // NOTE: At least on OSX in reasonably normal test cases, increasing the // size of this read has no impact on performance. $more = @fread($this->handle, 2048); if ($more === false) { throw new FilesystemException( $this->fileName, 'Failed to read file!'); } return $more; } } diff --git a/src/filesystem/linesofalarge/__tests__/LinesOfALargeExecFutureTestCase.php b/src/filesystem/linesofalarge/__tests__/LinesOfALargeExecFutureTestCase.php index c3f7701..67861e0 100644 --- a/src/filesystem/linesofalarge/__tests__/LinesOfALargeExecFutureTestCase.php +++ b/src/filesystem/linesofalarge/__tests__/LinesOfALargeExecFutureTestCase.php @@ -1,63 +1,60 @@ writeAndRead( "cat\ndog\nbird\n", array( 'cat', 'dog', 'bird', )); } public function testExecLargeFile() { $line = 'The quick brown fox jumps over the lazy dog.'; $n = 100; $this->writeAndRead( str_repeat($line."\n", $n), array_fill(0, $n, $line)); } public function testExecLongLine() { $line = str_repeat('x', 64 * 1024); $this->writeAndRead($line, array($line)); } public function testExecException() { $caught = null; try { $future = new ExecFuture('does-not-exist.exe.sh'); foreach (new LinesOfALargeExecFuture($future) as $line) { // ignore } } catch (Exception $ex) { $caught = $ex; } $this->assertTrue($caught instanceof CommandException); } private function writeAndRead($write, $read) { $future = new ExecFuture('cat'); $future->write($write); $lines = array(); foreach (new LinesOfALargeExecFuture($future) as $line) { $lines[] = $line; } $this->assertEqual( $read, $lines, 'Write: '.phutil_utf8_shorten($write, 32)); } } diff --git a/src/filesystem/linesofalarge/__tests__/LinesOfALargeFileTestCase.php b/src/filesystem/linesofalarge/__tests__/LinesOfALargeFileTestCase.php index e82265b..0a9b1ac 100644 --- a/src/filesystem/linesofalarge/__tests__/LinesOfALargeFileTestCase.php +++ b/src/filesystem/linesofalarge/__tests__/LinesOfALargeFileTestCase.php @@ -1,131 +1,128 @@ writeAndRead( 'abcd', array( 'abcd', )); } public function testTerminalDelimiterPresent() { $this->writeAndRead( "bat\ncat\ndog\n", array( 'bat', 'cat', 'dog', )); } public function testTerminalDelimiterAbsent() { $this->writeAndRead( "bat\ncat\ndog", array( 'bat', 'cat', 'dog', )); } public function testChangeDelimiter() { $this->writeAndRead( "bat\1cat\1dog\1", array( 'bat', 'cat', 'dog', ), "\1"); } public function testEmptyLines() { $this->writeAndRead( "\n\nbat\n", array( '', '', 'bat', )); } public function testLargeFile() { $line = 'The quick brown fox jumps over the lazy dog.'; $n = 100; $this->writeAndRead( str_repeat($line."\n", $n), array_fill(0, $n, $line)); } public function testLongLine() { $line = str_repeat('x', 64 * 1024); $this->writeAndRead($line, array($line)); } public function testReadFailure() { $caught = null; try { $f = new LinesOfALargeFile('/does/not/exist.void'); $f->rewind(); } catch (FilesystemException $ex) { $caught = $ex; } $this->assertTrue($caught instanceof $ex); } public function testLineFilter() { $write = "bat\ncat\ndog\nBat\nCat\nDog\n"; $read = array( 1 => 'cat', 4 => 'Cat', ); $tmp = new TempFile(); Filesystem::writeFile($tmp, $write); $lines = array(); $iterator = new PhutilCallbackFilterIterator( new LinesOfALargeFile($tmp), array($this, 'allowCatsOnly')); foreach ($iterator as $n => $line) { $lines[$n - 1] = $line; } $this->assertEqual( $read, $lines, 'Write: '.phutil_utf8_shorten($write, 32)); } public function allowCatsOnly($line) { $line = strtoupper($line); if ($line != 'CAT') { return null; } return $line; } private function writeAndRead($write, $read, $delimiter = "\n") { $tmp = new TempFile(); Filesystem::writeFile($tmp, $write); $lines = array(); $iterator = id(new LinesOfALargeFile($tmp))->setDelimiter($delimiter); foreach ($iterator as $n => $line) { $lines[$n - 1] = $line; } $this->assertEqual( $read, $lines, 'Write: '.phutil_utf8_shorten($write, 32)); } } diff --git a/src/future/Future.php b/src/future/Future.php index 1e497ca..fc40a77 100644 --- a/src/future/Future.php +++ b/src/future/Future.php @@ -1,191 +1,191 @@ getDefaultWait(); do { $this->checkException(); if ($this->isReady()) { break; } $read = $this->getReadSockets(); $write = $this->getWriteSockets(); if ($timeout !== null) { $elapsed = microtime(true) - $start; if ($elapsed > $timeout) { $this->checkException(); return null; } else { $wait = $timeout - $elapsed; } } if ($read || $write) { self::waitForSockets($read, $write, $wait); } } while (true); $this->checkException(); return $this->getResult(); } public function setException(Exception $ex) { $this->exception = $ex; return $this; } public function getException() { return $this->exception; } /** * If an exception was set by setException(), throw it. */ private function checkException() { if ($this->exception) { throw $this->exception; } } /** * Retrieve a list of sockets which we can wait to become readable while - * a future is resolving. If your future has sockets which can be select()ed, - * return them here (or in getWriteSockets()) to make the resolve loop do a - * select(). If you do not return sockets in either case, you'll get a busy - * wait. + * a future is resolving. If your future has sockets which can be + * `select()`ed, return them here (or in @{method:getWriteSockets}) to make + * the resolve loop do a `select()`. If you do not return sockets in either + * case, you'll get a busy wait. * * @return list A list of sockets which we expect to become readable. */ public function getReadSockets() { return array(); } /** * Retrieve a list of sockets which we can wait to become writable while a - * future is resolving. See getReadSockets(). + * future is resolving. See @{method:getReadSockets}. * * @return list A list of sockets which we expect to become writable. */ public function getWriteSockets() { return array(); } /** * Wait for activity on one of several sockets. * * @param list List of sockets expected to become readable. * @param list List of sockets expected to become writable. * @param float Timeout, in seconds. * @return void */ public static function waitForSockets( array $read_list, array $write_list, $timeout = 1) { if (!self::$handlerInstalled) { // If we're spawning child processes, we need to install a signal handler // here to catch cases like execing '(sleep 60 &) &' where the child // exits but a socket is kept open. But we don't actually need to do // anything because the SIGCHLD will interrupt the stream_select(), as // long as we have a handler registered. if (function_exists('pcntl_signal')) { if (!pcntl_signal(SIGCHLD, array('Future', 'handleSIGCHLD'))) { throw new Exception('Failed to install signal handler!'); } } self::$handlerInstalled = true; } $timeout_sec = (int)$timeout; $timeout_usec = (int)(1000000 * ($timeout - $timeout_sec)); $exceptfds = array(); $ok = @stream_select( $read_list, $write_list, $exceptfds, $timeout_sec, $timeout_usec); if ($ok === false) { // Hopefully, means we received a SIGCHLD. In the worst case, we degrade // to a busy wait. } } public static function handleSIGCHLD($signo) { // This function is a dummy, we just need to have some handler registered // so that PHP will get interrupted during stream_select(). If we don't // register a handler, stream_select() won't fail. } /** * Retrieve the final result of the future. This method will be called after - * the future is ready (as per isReady()) but before results are passed back - * to the caller. The major use of this function is that you can override it - * in subclasses to do postprocessing or error checking, which is + * the future is ready (as per @{method:isReady}) but before results are + * passed back to the caller. The major use of this function is that you can + * override it in subclasses to do postprocessing or error checking, which is * particularly useful if building application-specific futures on top of - * primitive transport futures (like CurlFuture and ExecFuture) which can - * make it tricky to hook this logic into the main pipeline. + * primitive transport futures (like @{class:CurlFuture} and + * @{class:ExecFuture}) which can make it tricky to hook this logic into the + * main pipeline. * * @return mixed Final resolution of this future. */ protected function getResult() { return $this->result; } /** * Default amount of time to wait on stream select for this future. Normally * 1 second is fine, but if the future has a timeout sooner than that it * should return the amount of time left before the timeout. */ public function getDefaultWait() { return 1; } public function start() { $this->isReady(); return $this; } } diff --git a/src/future/FutureIterator.php b/src/future/FutureIterator.php index d314104..10165d8 100644 --- a/src/future/FutureIterator.php +++ b/src/future/FutureIterator.php @@ -1,327 +1,325 @@ new ExecFuture('wc -c a.txt'), * 'b.txt' => new ExecFuture('wc -c b.txt'), * 'c.txt' => new ExecFuture('wc -c c.txt'), * ); * * foreach (Futures($futures) as $key => $future) { * // IMPORTANT: keys are preserved but the order of elements is not. This * // construct iterates over the futures in the order they resolve, so the * // fastest future is the one you'll get first. This allows you to start * // doing followup processing as soon as possible. * * list($err, $stdout) = $future->resolve(); * do_some_processing($stdout); * } * * For a general overview of futures, see @{article:Using Futures}. * * @task basics Basics * @task config Configuring Iteration * @task iterator Iterator Interface * @task internal Internals - * - * @group futures */ final class FutureIterator implements Iterator { protected $wait = array(); protected $work = array(); protected $futures = array(); protected $key; protected $limit; protected $timeout; protected $isTimeout = false; /* -( Basics )------------------------------------------------------------- */ /** * Create a new iterator over a list of futures. By convention, use the * convenience function @{function:Futures} instead of instantiating this * class directly. * * @param list List of @{class:Future}s to resolve. * @task basics */ public function __construct(array $futures) { assert_instances_of($futures, 'Future'); $this->futures = $futures; } /** * Block until all futures resolve. * * @return void * @task basics */ public function resolveAll() { foreach ($this as $future) { $future->resolve(); } } /** * Add another future to the set of futures. This is useful if you have a * set of futures to run mostly in parallel, but some futures depend on * others. * * @param Future @{class:Future} to add to iterator * @task basics */ public function addFuture(Future $future, $key = null) { if ($key === null) { $this->futures[] = $future; $this->wait[] = last_key($this->futures); } else if (!isset($this->futures[$key])) { $this->futures[$key] = $future; $this->wait[] = $key; } else { throw new Exception("Invalid key {$key}"); } // Start running the future if we don't have $this->limit futures running // already. updateWorkingSet() won't start running the future if there's no // limit, so we'll manually poke it here in that case. $this->updateWorkingSet(); if (!$this->limit) { $future->isReady(); } return $this; } /* -( Configuring Iteration )---------------------------------------------- */ /** * Set a maximum amount of time you want to wait before the iterator will * yield a result. If no future has resolved yet, the iterator will yield * null for key and value. Among other potential uses, you can use this to * show some busy indicator: * * foreach (Futures($futures)->setUpdateInterval(1) as $future) { * if ($future === null) { * echo "Still working...\n"; * } else { * // ... * } * } * * This will echo "Still working..." once per second as long as futures are * resolving. By default, FutureIterator never yields null. * * @param float Maximum number of seconds to block waiting on futures before * yielding null. * @return this * * @task config */ public function setUpdateInterval($interval) { $this->timeout = $interval; return $this; } /** * Limit the number of simultaneously executing futures. * * foreach (Futures($futures)->limit(4) as $future) { * // Run no more than 4 futures simultaneously. * } * * @param int Maximum number of simultaneous jobs allowed. * @return this * * @task config */ public function limit($max) { $this->limit = $max; return $this; } /* -( Iterator Interface )------------------------------------------------- */ /** * @task iterator */ public function rewind() { $this->wait = array_keys($this->futures); $this->work = null; $this->updateWorkingSet(); $this->next(); } /** * @task iterator */ public function next() { $this->key = null; if (!count($this->wait)) { return; } $read_sockets = array(); $write_sockets = array(); $start = microtime(true); $timeout = $this->timeout; $this->isTimeout = false; $check = $this->getWorkingSet(); $resolve = null; do { $read_sockets = array(); $write_sockets = array(); $can_use_sockets = true; $wait_time = 1; foreach ($check as $wait => $key) { $future = $this->futures[$key]; try { if ($future->getException()) { $resolve = $wait; continue; } if ($future->isReady()) { if ($resolve === null) { $resolve = $wait; } continue; } $got_sockets = false; $socks = $future->getReadSockets(); if ($socks) { $got_sockets = true; foreach ($socks as $socket) { $read_sockets[] = $socket; } } $socks = $future->getWriteSockets(); if ($socks) { $got_sockets = true; foreach ($socks as $socket) { $write_sockets[] = $socket; } } // If any currently active future had neither read nor write sockets, // we can't wait for the current batch of items using sockets. if (!$got_sockets) { $can_use_sockets = false; } else { $wait_time = min($wait_time, $future->getDefaultWait()); } } catch (Exception $ex) { $this->futures[$key]->setException($ex); $resolve = $wait; break; } } if ($resolve === null) { // Check for a setUpdateInterval() timeout. if ($timeout !== null) { $elapsed = microtime(true) - $start; if ($elapsed > $timeout) { $this->isTimeout = true; return; } else { $wait_time = $timeout - $elapsed; } } if ($can_use_sockets) { Future::waitForSockets($read_sockets, $write_sockets, $wait_time); } else { usleep(1000); } } } while ($resolve === null); $this->key = $this->wait[$resolve]; unset($this->wait[$resolve]); $this->updateWorkingSet(); } /** * @task iterator */ public function current() { if ($this->isTimeout) { return null; } return $this->futures[$this->key]; } /** * @task iterator */ public function key() { if ($this->isTimeout) { return null; } return $this->key; } /** * @task iterator */ public function valid() { if ($this->isTimeout) { return true; } return ($this->key !== null); } /* -( Internals )---------------------------------------------------------- */ /** * @task internal */ protected function getWorkingSet() { if ($this->work === null) { return $this->wait; } return $this->work; } /** * @task internal */ protected function updateWorkingSet() { if (!$this->limit) { return; } $old = $this->work; $this->work = array_slice($this->wait, 0, $this->limit, true); // If we're using a limit, our futures are sleeping and need to be polled // to begin execution, so poll any futures which weren't in our working set // before. foreach ($this->work as $work => $key) { if (!isset($old[$work])) { $this->futures[$key]->isReady(); } } } } diff --git a/src/future/FutureProxy.php b/src/future/FutureProxy.php index 6ef9250..c3bb02c 100644 --- a/src/future/FutureProxy.php +++ b/src/future/FutureProxy.php @@ -1,74 +1,73 @@ setProxiedFuture($proxied); } } public function setProxiedFuture(Future $proxied) { $this->proxied = $proxied; return $this; } protected function getProxiedFuture() { if (!$this->proxied) { throw new Exception('The proxied future has not been provided yet.'); } return $this->proxied; } public function isReady() { return $this->getProxiedFuture()->isReady(); } public function resolve($timeout = null) { $this->getProxiedFuture()->resolve($timeout); return $this->getResult(); } public function setException(Exception $ex) { $this->getProxiedFuture()->setException($ex); return $this; } public function getException() { return $this->getProxiedFuture()->getException(); } public function getReadSockets() { return $this->getProxiedFuture()->getReadSockets(); } public function getWriteSockets() { return $this->getProxiedFuture()->getWriteSockets(); } protected function getResult() { if ($this->result === null) { $result = $this->getProxiedFuture()->resolve(); $result = $this->didReceiveResult($result); $this->result = $result; } return $this->result; } public function start() { $this->getProxiedFuture()->start(); return $this; } abstract protected function didReceiveResult($result); } diff --git a/src/future/ImmediateFuture.php b/src/future/ImmediateFuture.php index bc1a85f..a5fe0a3 100644 --- a/src/future/ImmediateFuture.php +++ b/src/future/ImmediateFuture.php @@ -1,19 +1,17 @@ result = $result; } public function isReady() { return true; } } diff --git a/src/future/__tests__/FutureIteratorTestCase.php b/src/future/__tests__/FutureIteratorTestCase.php index 18d8e17..a310ee2 100644 --- a/src/future/__tests__/FutureIteratorTestCase.php +++ b/src/future/__tests__/FutureIteratorTestCase.php @@ -1,26 +1,23 @@ limit(2); $results = array(); foreach ($iterator as $future) { if ($future === $future1) { $iterator->addFuture($future2); } $results[] = $future->resolve(); } $this->assertEqual(2, count($results)); } } diff --git a/src/future/asana/PhutilAsanaFuture.php b/src/future/asana/PhutilAsanaFuture.php index 8842386..24538dd 100644 --- a/src/future/asana/PhutilAsanaFuture.php +++ b/src/future/asana/PhutilAsanaFuture.php @@ -1,80 +1,77 @@ accessToken = $token; return $this; } public function setRawAsanaQuery($action, array $params = array()) { $this->action = $action; $this->params = $params; return $this; } public function setMethod($method) { $this->method = $method; return $this; } protected function getProxiedFuture() { if (!$this->future) { $params = $this->params; if (!$this->action) { throw new Exception('You must setRawAsanaQuery()!'); } if (!$this->accessToken) { throw new Exception('You must setAccessToken()!'); } $uri = new PhutilURI('https://app.asana.com/'); $uri->setPath('/api/1.0/'.ltrim($this->action, '/')); $future = new HTTPSFuture($uri); $future->setData($this->params); $future->addHeader('Authorization', 'Bearer '.$this->accessToken); $future->setMethod($this->method); $this->future = $future; } return $this->future; } protected function didReceiveResult($result) { list($status, $body, $headers) = $result; if ($status->isError()) { throw $status; } $data = json_decode($body, true); if (!is_array($data)) { throw new Exception("Expected JSON response from Asana, got: {$body}"); } if (idx($data, 'errors')) { $errors = print_r($data['errors'], true); throw new Exception("Received errors from Asana: {$errors}"); } return $data['data']; } } diff --git a/src/future/aws/PhutilAWSEC2Future.php b/src/future/aws/PhutilAWSEC2Future.php index b3eeef0..2ad81d7 100644 --- a/src/future/aws/PhutilAWSEC2Future.php +++ b/src/future/aws/PhutilAWSEC2Future.php @@ -1,12 +1,9 @@ httpStatus = $http_status; $this->requestID = idx($params, 'RequestID'); $this->params = $params; $desc = array(); $desc[] = 'AWS Request Failed'; $desc[] = 'HTTP Status Code: '.$http_status; if ($this->requestID) { $desc[] = 'AWS Request ID: '.$this->requestID; $errors = idx($params, 'Errors'); if ($errors) { $desc[] = 'AWS Errors:'; foreach ($errors as $error) { list($code, $message) = $error; $desc[] = " - {$code}: {$message}\n"; } } } else { $desc[] = 'Response Body: '.idx($params, 'body'); } $desc = implode("\n", $desc); parent::__construct($desc); } public function getRequestID() { return $this->requestID; } public function getHTTPStatus() { return $this->httpStatus; } } diff --git a/src/future/aws/PhutilAWSFuture.php b/src/future/aws/PhutilAWSFuture.php index 9df8d1c..915b904 100644 --- a/src/future/aws/PhutilAWSFuture.php +++ b/src/future/aws/PhutilAWSFuture.php @@ -1,144 +1,141 @@ awsAccessKey = $access; $this->awsPrivateKey = $private; return $this; } public function getAWSAccessKey() { return $this->awsAccessKey; } public function getAWSPrivateKey() { return $this->awsPrivateKey; } public function getAWSRegion() { return $this->awsRegion; } public function setAWSRegion($region) { $this->awsRegion = $region; return $this; } public function getHost() { $host = $this->getServiceName().'.'.$this->awsRegion.'.amazonaws.com'; return $host; } public function setRawAWSQuery($action, array $params = array()) { $this->params = $params; $this->params['Action'] = $action; return $this; } protected function getProxiedFuture() { if (!$this->future) { $params = $this->params; if (!$this->params) { throw new Exception('You must setRawAWSQuery()!'); } if (!$this->getAWSAccessKey()) { throw new Exception('You must setAWSKeys()!'); } $params['AWSAccessKeyId'] = $this->getAWSAccessKey(); $params['Version'] = '2011-12-15'; $params['Timestamp'] = date('c'); $params = $this->sign($params); $uri = new PhutilURI('http://'.$this->getHost().'/'); $uri->setQueryParams($params); $this->future = new HTTPFuture($uri); } return $this->future; } protected function didReceiveResult($result) { list($status, $body, $headers) = $result; try { $xml = @(new SimpleXMLElement($body)); } catch (Exception $ex) { $xml = null; } if ($status->isError() || !$xml) { if (!($status instanceof HTTPFutureResponseStatusHTTP)) { throw $status; } $params = array( 'body' => $body, ); if ($xml) { $params['RequestID'] = $xml->RequestID[0]; foreach ($xml->Errors[0] as $error) { $params['Errors'][] = array($error->Code, $error->Message); } } throw new PhutilAWSException($status->getStatusCode(), $params); } return $xml; } /** * http://bit.ly/wU0JFh */ private function sign(array $params) { $params['SignatureMethod'] = 'HmacSHA256'; $params['SignatureVersion'] = '2'; ksort($params); $pstr = array(); foreach ($params as $key => $value) { $pstr[] = rawurlencode($key).'='.rawurlencode($value); } $pstr = implode('&', $pstr); $sign = "GET"."\n". strtolower($this->getHost())."\n". "/"."\n". $pstr; $hash = hash_hmac( 'sha256', $sign, $this->getAWSPrivateKey(), $raw_ouput = true); $params['Signature'] = base64_encode($hash); return $params; } } diff --git a/src/future/aws/PhutilAWSS3Future.php b/src/future/aws/PhutilAWSS3Future.php index f480465..b3e7c3e 100644 --- a/src/future/aws/PhutilAWSS3Future.php +++ b/src/future/aws/PhutilAWSS3Future.php @@ -1,12 +1,9 @@ command = $command; $this->error = $error; $this->stdout = $stdout; $this->stderr = $stderr; $summary = array(); $summary[] = $this->summarize($message); $summary[] = 'COMMAND'; $summary[] = $this->summarize($command); $summary[] = null; $summary[] = 'STDOUT'; $summary[] = $this->summarize($stdout); $summary[] = null; $summary[] = 'STDERR'; $summary[] = $this->summarize($stderr); $summary = implode("\n", $summary); parent::__construct($summary); } public function getCommand() { return $this->command; } public function getError() { return $this->error; } public function getStdout() { return $this->stdout; } public function getStderr() { return $this->stderr; } private function summarize($string) { if (!strlen($string)) { return '(empty)'; } $limit = 1000; $len = strlen($string); if ($len > $limit) { $cut = $len - $limit; $suffix = '... ('.number_format($cut).' more bytes) ...'; if ($cut > strlen($suffix)) { $string = substr($string, 0, $limit).$suffix; } } // Strip out any credentials for the purpose of building a human readable // summary of the exception, since these are rarely-if-ever useful when // debugging, but can expose otherwise sensitive information. $string = phutil_censor_credentials($string); return $string; } } diff --git a/src/future/exec/ExecFuture.php b/src/future/exec/ExecFuture.php index 9ec75ce..0168b9d 100644 --- a/src/future/exec/ExecFuture.php +++ b/src/future/exec/ExecFuture.php @@ -1,875 +1,873 @@ array('pipe', 'r'), // stdin 1 => array('pipe', 'w'), // stdout 2 => array('pipe', 'w'), // stderr ); /* -( Creating ExecFutures )----------------------------------------------- */ /** * Create a new ExecFuture. * * $future = new ExecFuture('wc -l %s', $file_path); * - * @param string ##sprintf()##-style command string which will be passed + * @param string `sprintf()`-style command string which will be passed * through @{function:csprintf} with the rest of the arguments. * @param ... Zero or more additional arguments for @{function:csprintf}. * @return ExecFuture ExecFuture for running the specified command. * @task create */ public function __construct($command) { $argv = func_get_args(); $this->command = call_user_func_array('csprintf', $argv); $this->stdin = new PhutilRope(); } /* -( Command Information )------------------------------------------------ */ /** * Retrieve the raw command to be executed. * * @return string Raw command. * @task info */ public function getCommand() { return $this->command; } /** * Retrieve the byte limit for the stderr buffer. * * @return int Maximum buffer size, in bytes. * @task info */ public function getStderrSizeLimit() { return $this->stderrSizeLimit; } /** * Retrieve the byte limit for the stdout buffer. * * @return int Maximum buffer size, in bytes. * @task info */ public function getStdoutSizeLimit() { return $this->stdoutSizeLimit; } /** * Get the process's pid. This only works after execution is initiated, e.g. * by a call to start(). * * @return int Process ID of the executing process. * @task info */ public function getPID() { $status = $this->procGetStatus(); return $status['pid']; } /* -( Configuring Execution )---------------------------------------------- */ /** * Set a maximum size for the stdout read buffer. To limit stderr, see * @{method:setStderrSizeLimit}. The major use of these methods is to use less * memory if you are running a command which sometimes produces huge volumes * of output that you don't really care about. * * NOTE: Setting this to 0 means "no buffer", not "unlimited buffer". * * @param int Maximum size of the stdout read buffer. * @return this * @task config */ public function setStdoutSizeLimit($limit) { $this->stdoutSizeLimit = $limit; return $this; } /** * Set a maximum size for the stderr read buffer. * See @{method:setStdoutSizeLimit} for discussion. * * @param int Maximum size of the stderr read buffer. * @return this * @task config */ public function setStderrSizeLimit($limit) { $this->stderrSizeLimit = $limit; return $this; } /** * Set the maximum internal read buffer size this future. The future will * block reads once the internal stdout or stderr buffer exceeds this size. * * NOTE: If you @{method:resolve} a future with a read buffer limit, you may * block forever! * - * TODO: We should probably release the read buffer limit during `resolve()`, - * or otherwise detect this. For now, be careful. + * TODO: We should probably release the read buffer limit during + * @{method:resolve}, or otherwise detect this. For now, be careful. * * @param int|null Maximum buffer size, or `null` for unlimited. * @return this */ public function setReadBufferSize($read_buffer_size) { $this->readBufferSize = $read_buffer_size; return $this; } /** * Set the current working directory to use when executing the command. * * @param string Directory to set as CWD before executing the command. * @return this * @task config */ public function setCWD($cwd) { $this->cwd = $cwd; return $this; } /** * Set the environment variables to use when executing the command. * * @param array Environment variables to use when executing the command. * @return this * @task config */ public function setEnv($env, $wipe_process_env = false) { if ($wipe_process_env) { $this->env = $env; } else { $this->env = $env + $_ENV; } return $this; } /** * Set the value of a specific environmental variable for this command. * * @param string Environmental variable name. * @param string|null New value, or null to remove this variable. * @return this * @task config */ public function updateEnv($key, $value) { if (!is_array($this->env)) { $this->env = $_ENV; } if ($value === null) { unset($this->env[$key]); } else { $this->env[$key] = $value; } return $this; } /* -( Interacting With Commands )------------------------------------------ */ /** * Read and return output from stdout and stderr, if any is available. This * method keeps a read cursor on each stream, but the entire streams are * still returned when the future resolves. You can call read() again after * resolving the future to retrieve only the parts of the streams you did not * previously read: * * $future = new ExecFuture('...'); * // ... * list($stdout) = $future->read(); // Returns output so far * list($stdout) = $future->read(); // Returns new output since first call * // ... * list($stdout) = $future->resolvex(); // Returns ALL output * list($stdout) = $future->read(); // Returns unread output * * NOTE: If you set a limit with @{method:setStdoutSizeLimit} or * @{method:setStderrSizeLimit}, this method will not be able to read data * past the limit. * * NOTE: If you call @{method:discardBuffers}, all the stdout/stderr data * will be thrown away and the cursors will be reset. * * @return pair <$stdout, $stderr> pair with new output since the last call * to this method. * @task interact */ public function read() { $stdout = $this->readStdout(); $result = array( $stdout, (string)substr($this->stderr, $this->stderrPos), ); $this->stderrPos = strlen($this->stderr); return $result; } public function readStdout() { if ($this->start) { $this->isReady(); // Sync } $result = (string)substr($this->stdout, $this->stdoutPos); $this->stdoutPos = strlen($this->stdout); return $result; } /** * Write data to stdin of the command. * * @param string Data to write. * @param bool If true, keep the pipe open for writing. By default, the pipe * will be closed as soon as possible so that commands which * listen for EOF will execute. If you want to keep the pipe open * past the start of command execution, do an empty write with * `$keep_pipe = true` first. * @return this * @task interact */ public function write($data, $keep_pipe = false) { if (strlen($data)) { if (!$this->stdin) { throw new Exception(pht('Writing to a closed pipe!')); } $this->stdin->append($data); } $this->closePipe = !$keep_pipe; return $this; } /** * Permanently discard the stdout and stderr buffers and reset the read * cursors. This is basically useful only if you are streaming a large amount * of data from some process: * * $future = new ExecFuture('zcat huge_file.gz'); * do { * $done = $future->resolve(0.1); // Every 100ms, * list($stdout) = $future->read(); // read output... * echo $stdout; // send it somewhere... * $future->discardBuffers(); // and then free the buffers. * } while ($done === null); * * Conceivably you might also need to do this if you're writing a client using - * ExecFuture and ##netcat##, but you probably should not do that. + * @{class:ExecFuture} and `netcat`, but you probably should not do that. * * NOTE: This completely discards the data. It won't be available when the * future resolves. This is almost certainly only useful if you need the * buffer memory for some reason. * * @return this * @task interact */ public function discardBuffers() { $this->discardStdoutBuffer(); $this->stderr = ''; $this->stderrPos = 0; return $this; } public function discardStdoutBuffer() { $this->stdout = ''; $this->stdoutPos = 0; return $this; } /** * Returns true if this future was killed by a timeout configured with * @{method:setTimeout}. * * @return bool True if the future was killed for exceeding its time limit. */ public function getWasKilledByTimeout() { return $this->killedByTimeout; } /* -( Configuring Execution )---------------------------------------------- */ /** * Set a hard limit on execution time. If the command runs longer, it will * be killed and the future will resolve with an error code. You can test * if a future was killed by a timeout with @{method:getWasKilledByTimeout}. * * @param int Maximum number of seconds this command may execute for. * @return this * @task config */ public function setTimeout($seconds) { $this->timeout = $seconds; return $this; } /* -( Resolving Execution )------------------------------------------------ */ /** * Resolve a command you expect to exit with return code 0. Works like * @{method:resolve}, but throws if $err is nonempty. Returns only * $stdout and $stderr. See also @{function:execx}. * * list($stdout, $stderr) = $future->resolvex(); * * @param float Optional timeout after which resolution will pause and * execution will return to the caller. * @return pair <$stdout, $stderr> pair. * @task resolve */ public function resolvex($timeout = null) { list($err, $stdout, $stderr) = $this->resolve($timeout); if ($err) { $cmd = $this->command; throw new CommandException( "Command failed with error #{$err}!", $cmd, $err, $stdout, $stderr); } return array($stdout, $stderr); } /** * Resolve a command you expect to return valid JSON. Works like * @{method:resolvex}, but also throws if stderr is nonempty, or stdout is not * valid JSON. Returns a PHP array, decoded from the JSON command output. * * @param float Optional timeout after which resolution will pause and * execution will return to the caller. * @return array PHP array, decoded from JSON command output. * @task resolve */ public function resolveJSON($timeout = null) { list($stdout, $stderr) = $this->resolvex($timeout); if (strlen($stderr)) { $cmd = $this->command; throw new CommandException( "JSON command '{$cmd}' emitted text to stderr when none was expected: ". $stderr, $cmd, 0, $stdout, $stderr); } $object = json_decode($stdout, true); if (!is_array($object)) { $cmd = $this->command; throw new CommandException( "JSON command '{$cmd}' did not produce a valid JSON object on stdout: ". $stdout, $cmd, 0, $stdout, $stderr); } return $object; } /** * Resolve the process by abruptly terminating it. * * @return list List of results. * @task resolve */ public function resolveKill() { if (!$this->result) { if (defined('SIGKILL')) { $signal = SIGKILL; } else { $signal = 9; } proc_terminate($this->proc, $signal); $this->result = array( 128 + $signal, $this->stdout, $this->stderr); $this->closeProcess(); } return $this->result; } /* -( Internals )---------------------------------------------------------- */ /** * Provides read sockets to the future core. * * @return list List of read sockets. * @task internal */ public function getReadSockets() { list($stdin, $stdout, $stderr) = $this->pipes; $sockets = array(); if (isset($stdout) && !feof($stdout)) { $sockets[] = $stdout; } if (isset($stderr) && !feof($stderr)) { $sockets[] = $stderr; } return $sockets; } /** * Provides write sockets to the future core. * * @return list List of write sockets. * @task internal */ public function getWriteSockets() { list($stdin, $stdout, $stderr) = $this->pipes; $sockets = array(); if (isset($stdin) && $this->stdin->getByteLength() && !feof($stdin)) { $sockets[] = $stdin; } return $sockets; } /** * Determine if the read buffer is empty. * * @return bool True if the read buffer is empty. * @task internal */ public function isReadBufferEmpty() { return !strlen($this->stdout); } /** * Determine if the write buffer is empty. * * @return bool True if the write buffer is empty. * @task internal */ public function isWriteBufferEmpty() { return !$this->getWriteBufferSize(); } /** * Determine the number of bytes in the write buffer. * * @return int Number of bytes in the write buffer. * @task internal */ public function getWriteBufferSize() { if (!$this->stdin) { return 0; } return $this->stdin->getByteLength(); } /** * Reads some bytes from a stream, discarding output once a certain amount * has been accumulated. * * @param resource Stream to read from. * @param int Maximum number of bytes to return from $stream. If * additional bytes are available, they will be read and * discarded. * @param string Human-readable description of stream, for exception * message. * @param int Maximum number of bytes to read. * @return string The data read from the stream. * @task internal */ private function readAndDiscard($stream, $limit, $description, $length) { $output = ''; if ($length <= 0) { return ''; } do { $data = fread($stream, min($length, 64 * 1024)); if (false === $data) { throw new Exception('Failed to read from '.$description); } $read_bytes = strlen($data); if ($read_bytes > 0 && $limit > 0) { if ($read_bytes > $limit) { $data = substr($data, 0, $limit); } $output .= $data; $limit -= strlen($data); } if (strlen($output) >= $length) { break; } } while ($read_bytes > 0); return $output; } /** * Begin or continue command execution. * * @return bool True if future has resolved. * @task internal */ public function isReady() { // NOTE: We have soft dependencies on PhutilServiceProfiler and // PhutilErrorTrap here. These depencies are soft to avoid the need to // build them into the Phage agent. Under normal circumstances, these // classes are always available. if (!$this->pipes) { // NOTE: See note above about Phage. if (class_exists('PhutilServiceProfiler')) { $profiler = PhutilServiceProfiler::getInstance(); $this->profilerCallID = $profiler->beginServiceCall( array( 'type' => 'exec', 'command' => (string)$this->command, )); } if (!$this->start) { // We might already have started the timer via initating resolution. $this->start = microtime(true); } $unmasked_command = $this->command; if ($unmasked_command instanceof PhutilCommandString) { $unmasked_command = $unmasked_command->getUnmaskedString(); } $pipes = array(); if (phutil_is_windows()) { // See T4395. proc_open under Windows uses "cmd /C [cmd]", which will // strip the first and last quote when there aren't exactly two quotes // (and some other conditions as well). This results in a command that // looks like `command" "path to my file" "something someting` which is // clearly wrong. By surrounding the command string with quotes we can // be sure this process is harmless. if (strpos($unmasked_command, '"') !== false) { $unmasked_command = '"'.$unmasked_command.'"'; } } // NOTE: See note above about Phage. if (class_exists('PhutilErrorTrap')) { $trap = new PhutilErrorTrap(); } else { $trap = null; } $proc = @proc_open( $unmasked_command, self::$descriptorSpec, $pipes, $this->cwd, $this->env); if ($trap) { $err = $trap->getErrorsAsString(); $trap->destroy(); } else { $err = error_get_last(); } if (!is_resource($proc)) { throw new Exception("Failed to proc_open(): {$err}"); } $this->pipes = $pipes; $this->proc = $proc; list($stdin, $stdout, $stderr) = $pipes; if (!phutil_is_windows()) { // On Windows, there's no such thing as nonblocking interprocess I/O. // Just leave the sockets blocking and hope for the best. Some features // will not work. if ((!stream_set_blocking($stdout, false)) || (!stream_set_blocking($stderr, false)) || (!stream_set_blocking($stdin, false))) { $this->__destruct(); throw new Exception('Failed to set streams nonblocking.'); } } $this->tryToCloseStdin(); return false; } if (!$this->proc) { return true; } list($stdin, $stdout, $stderr) = $this->pipes; while (isset($this->stdin) && $this->stdin->getByteLength()) { $write_segment = $this->stdin->getAnyPrefix(); $bytes = fwrite($stdin, $write_segment); if ($bytes === false) { throw new Exception('Unable to write to stdin!'); } else if ($bytes) { $this->stdin->removeBytesFromHead($bytes); } else { // Writes are blocked for now. break; } } $this->tryToCloseStdin(); // Read status before reading pipes so that we can never miss data that // arrives between our last read and the process exiting. $status = $this->procGetStatus(); $read_buffer_size = $this->readBufferSize; $max_stdout_read_bytes = PHP_INT_MAX; $max_stderr_read_bytes = PHP_INT_MAX; if ($read_buffer_size !== null) { $max_stdout_read_bytes = $read_buffer_size - strlen($this->stdout); $max_stderr_read_bytes = $read_buffer_size - strlen($this->stderr); } if ($max_stdout_read_bytes > 0) { $this->stdout .= $this->readAndDiscard( $stdout, $this->getStdoutSizeLimit() - strlen($this->stdout), 'stdout', $max_stdout_read_bytes); } if ($max_stderr_read_bytes > 0) { $this->stderr .= $this->readAndDiscard( $stderr, $this->getStderrSizeLimit() - strlen($this->stderr), 'stderr', $max_stderr_read_bytes); } if (!$status['running']) { $this->result = array( $status['exitcode'], $this->stdout, $this->stderr, ); $this->closeProcess(); return true; } $elapsed = (microtime(true) - $this->start); if ($this->timeout && ($elapsed >= $this->timeout)) { $this->killedByTimeout = true; $this->resolveKill(); return true; } } /** * @return void * @task internal */ public function __destruct() { if (!$this->proc) { return; } // NOTE: If we try to proc_close() an open process, we hang indefinitely. To // avoid this, kill the process explicitly if it's still running. $status = $this->procGetStatus(); if ($status['running']) { $this->resolveKill(); } else { $this->closeProcess(); } } /** * Close and free resources if necessary. * * @return void * @task internal */ private function closeProcess() { foreach ($this->pipes as $pipe) { if (isset($pipe)) { @fclose($pipe); } } $this->pipes = array(null, null, null); if ($this->proc) { @proc_close($this->proc); $this->proc = null; } $this->stdin = null; if ($this->profilerCallID !== null) { $profiler = PhutilServiceProfiler::getInstance(); $profiler->endServiceCall( $this->profilerCallID, array( 'err' => $this->result ? idx($this->result, 0) : null, )); $this->profilerCallID = null; } } /** - * Execute proc_get_status(), but avoid pitfalls. + * Execute `proc_get_status()`, but avoid pitfalls. * * @return dict Process status. * @task internal */ private function procGetStatus() { // After the process exits, we only get one chance to read proc_get_status() // before it starts returning garbage. Make sure we don't throw away the // last good read. if ($this->procStatus) { if (!$this->procStatus['running']) { return $this->procStatus; } } $this->procStatus = proc_get_status($this->proc); return $this->procStatus; } /** * Try to close stdin, if we're done using it. This keeps us from hanging if * the process on the other end of the pipe is waiting for EOF. * * @return void * @task internal */ private function tryToCloseStdin() { if (!$this->closePipe) { // We've been told to keep the pipe open by a call to write(..., true). return; } if ($this->stdin->getByteLength()) { // We still have bytes to write. return; } list($stdin) = $this->pipes; if (!$stdin) { // We've already closed stdin. return; } // There's nothing stopping us from closing stdin, so close it. @fclose($stdin); $this->pipes[0] = null; } public function getDefaultWait() { $wait = parent::getDefaultWait(); if ($this->timeout) { if (!$this->start) { $this->start = microtime(true); } $elapsed = (microtime(true) - $this->start); $wait = max(0, min($this->timeout - $elapsed, $wait)); } return $wait; } - } diff --git a/src/future/exec/PhutilExecPassthru.php b/src/future/exec/PhutilExecPassthru.php index 0aac4a3..da63465 100644 --- a/src/future/exec/PhutilExecPassthru.php +++ b/src/future/exec/PhutilExecPassthru.php @@ -1,162 +1,160 @@ execute(); * * You can set the current working directory for the command with * @{method:setCWD}, and set the environment with @{method:setEnv}. * * @task command Executing Passthru Commands * @task config Configuring Passthru Commands - * - * @group exec */ final class PhutilExecPassthru extends Phobject { private $command; private $env; private $cwd; /* -( Executing Passthru Commands )---------------------------------------- */ /** * Build a new passthru command. * * $exec = new PhutilExecPassthru('ls %s', $dir); * * @param string Command pattern. See @{function:csprintf}. * @param ... Pattern arguments. * * @task command */ public function __construct($pattern /* , ... */) { $args = func_get_args(); $this->command = call_user_func_array('csprintf', $args); } /** * Execute this command. * * @return int Error code returned by the subprocess. * * @task command */ public function execute() { $command = $this->command; $profiler = PhutilServiceProfiler::getInstance(); $call_id = $profiler->beginServiceCall( array( 'type' => 'exec', 'subtype' => 'passthru', 'command' => $command, )); $spec = array(STDIN, STDOUT, STDERR); $pipes = array(); if ($command instanceof PhutilCommandString) { $unmasked_command = $command->getUnmaskedString(); } else { $unmasked_command = $command; } $env = $this->env; $cwd = $this->cwd; $options = array(); if (phutil_is_windows()) { // Without 'bypass_shell', things like launching vim don't work properly, // and we can't execute commands with spaces in them, and all commands // invoked from git bash fail horridly, and everything is a mess in // general. $options['bypass_shell'] = true; } $trap = new PhutilErrorTrap(); $proc = @proc_open( $unmasked_command, $spec, $pipes, $cwd, $env, $options); $errors = $trap->getErrorsAsString(); $trap->destroy(); if (!is_resource($proc)) { throw new Exception("Failed to passthru proc_open(): {$errors}"); } $err = proc_close($proc); $profiler->endServiceCall( $call_id, array( 'err' => $err, )); return $err; } /* -( Configuring Passthru Commands )-------------------------------------- */ /** * Set environmental variables for the subprocess. * * By default, variables are added to the environment of this process. You * can optionally wipe the environment and pass only the specified values. * * // Env will have "X" and current env ("PATH", etc.) * $exec->setEnv(array('X' => 'y')); * * // Env will have ONLY "X". * $exec->setEnv(array('X' => 'y'), $wipe_process_env = true); * * @param dict Dictionary of environmental variables. * @param bool Optionally, pass true to wipe the existing environment clean. * @return this * * @task config */ public function setEnv(array $env, $wipe_process_env = false) { if ($wipe_process_env) { $this->env = $env; } else { $this->env = $env + $_ENV; } return $this->env; } /** * Set the current working directory for the subprocess (that is, set where * the subprocess will execute). If not set, the default value is the parent's * current working directory. * * @param string Directory to execute the subprocess in. * @return this * * @task config */ public function setCWD($cwd) { $this->cwd = $cwd; return $this; } } diff --git a/src/future/exec/__tests__/ExecFutureTestCase.php b/src/future/exec/__tests__/ExecFutureTestCase.php index 563a25b..cff5902 100644 --- a/src/future/exec/__tests__/ExecFutureTestCase.php +++ b/src/future/exec/__tests__/ExecFutureTestCase.php @@ -1,164 +1,156 @@ write('')->resolvex(); $this->assertEqual('', $stdout); } public function testKeepPipe() { - // NOTE: This is mosty testing the semantics of $keep_pipe in write(). list($stdout) = id(new ExecFuture('cat')) ->write('', true) ->start() ->write('x', true) ->write('y', true) ->write('z', false) ->resolvex(); $this->assertEqual('xyz', $stdout); } public function testLargeBuffer() { - // NOTE: This is mostly a coverage test to hit branches where we're still // flushing a buffer. $data = str_repeat('x', 1024 * 1024 * 4); list($stdout) = id(new ExecFuture('cat'))->write($data)->resolvex(); $this->assertEqual($data, $stdout); } public function testBufferLimit() { $data = str_repeat('x', 1024 * 1024); list($stdout) = id(new ExecFuture('cat')) ->setStdoutSizeLimit(1024) ->write($data) ->resolvex(); $this->assertEqual(substr($data, 0, 1024), $stdout); } public function testResolveTimeoutTestShouldRunLessThan1Sec() { - // NOTE: This tests interactions between the resolve() timeout and the // ExecFuture timeout, which are similar but not identical. $future = id(new ExecFuture('sleep 32000'))->start(); $future->setTimeout(32000); // We expect this to return in 0.01s. $result = $future->resolve(0.01); $this->assertEqual($result, null); // We expect this to now force the time out / kill immediately. If we don't // do this, we'll hang when exiting until our subprocess exits (32000 // seconds!) $future->setTimeout(0.01); $future->resolve(); } public function testTimeoutTestShouldRunLessThan1Sec() { - // NOTE: This is partly testing that we choose appropriate select wait // times; this test should run for significantly less than 1 second. $future = new ExecFuture('sleep 32000'); list($err) = $future->setTimeout(0.01)->resolve(); $this->assertTrue($err > 0); $this->assertTrue($future->getWasKilledByTimeout()); } public function testMultipleTimeoutsTestShouldRunLessThan1Sec() { $futures = array(); for ($ii = 0; $ii < 4; $ii++) { $futures[] = id(new ExecFuture('sleep 32000'))->setTimeout(0.01); } foreach (Futures($futures) as $future) { list ($err) = $future->resolve(); $this->assertTrue($err > 0); $this->assertTrue($future->getWasKilledByTimeout()); } } public function testNoHangOnExecFutureDestructionWithRunningChild() { $start = microtime(true); $future = new ExecFuture('sleep 30'); $future->start(); unset($future); $end = microtime(true); // If ExecFuture::__destruct() hangs until the child closes, we won't make // it here in time. $this->assertTrue(($end - $start) < 5); } public function testMultipleResolves() { // It should be safe to call resolve(), resolvex(), resolveKill(), etc., // as many times as you want on the same process. $future = new ExecFuture('echo quack'); $future->resolve(); $future->resolvex(); list($err) = $future->resolveKill(); $this->assertEqual(0, $err); } public function testReadBuffering() { $str_len_8 = 'abcdefgh'; $str_len_4 = 'abcd'; // This is a write/read with no read buffer. $future = new ExecFuture('cat'); $future->write($str_len_8); do { $future->isReady(); list($read) = $future->read(); if (strlen($read)) { break; } } while (true); // We expect to get the entire string back in the read. $this->assertEqual($str_len_8, $read); $future->resolve(); // This is a write/read with a read buffer. $future = new ExecFuture('cat'); $future->write($str_len_8); // Set the read buffer size. $future->setReadBufferSize(4); do { $future->isReady(); list($read) = $future->read(); if (strlen($read)) { break; } } while (true); // We expect to get the entire string back in the read. $this->assertEqual($str_len_4, $read); $future->resolve(); } } diff --git a/src/future/exec/execx.php b/src/future/exec/execx.php index 79bdbd7..bde37d2 100644 --- a/src/future/exec/execx.php +++ b/src/future/exec/execx.php @@ -1,111 +1,107 @@ resolvex(); } /** * Execute a command and capture stdout, stderr, and the return value. * * list ($err, $stdout, $stderr) = exec_manual('ls %s', $file); * - * When invoking this function, you must manually handle the error - * condition. Error flows can often be simplified by using @{function:execx} - * instead, which throws an exception when it encounters an error. + * When invoking this function, you must manually handle the error condition. + * Error flows can often be simplified by using @{function:execx} instead, + * which throws an exception when it encounters an error. * * @param string sprintf()-style command pattern to execute. * @param ... Arguments to sprintf pattern. * @return array List of return code, stdout, and stderr. - * @group exec */ function exec_manual($cmd /* , ... */) { $args = func_get_args(); $ef = newv('ExecFuture', $args); return $ef->resolve(); } /** * Wrapper for @{class:PhutilExecPassthru}. * * @param string sprintf()-style command pattern to execute. * @param ... Arguments to sprintf pattern. * @return int Return code. - * @group exec */ function phutil_passthru($cmd /* , ... */) { $args = func_get_args(); return newv('PhutilExecPassthru', $args)->execute(); } /** * Return a human-readable signal name (like "SIGINT" or "SIGKILL") for a given * signal number. * * @param int Signal number. * @return string Human-readable signal name. */ function phutil_get_signal_name($signo) { - // These aren't always defined; try our best to look up the signal name. $constant_names = array( 'SIGHUP', 'SIGINT', 'SIGQUIT', 'SIGILL', 'SIGTRAP', 'SIGABRT', 'SIGIOT', 'SIGBUS', 'SIGFPE', 'SIGUSR1', 'SIGSEGV', 'SIGUSR2', 'SIGPIPE', 'SIGALRM', 'SIGTERM', 'SIGSTKFLT', 'SIGCLD', 'SIGCHLD', 'SIGCONT', 'SIGTSTP', 'SIGTTIN', 'SIGTTOU', 'SIGURG', 'SIGXCPU', 'SIGXFSZ', 'SIGVTALRM', 'SIGPROF', 'SIGWINCH', 'SIGPOLL', 'SIGIO', 'SIGPWR', 'SIGSYS', 'SIGBABY', ); $signal_names = array(); foreach ($constant_names as $constant) { if (defined($constant)) { $signal_names[constant($constant)] = $constant; } } return idx($signal_names, $signo); } diff --git a/src/future/functions.php b/src/future/functions.php index adbb00b..990fcd8 100644 --- a/src/future/functions.php +++ b/src/future/functions.php @@ -1,12 +1,11 @@ resolve(); * * This is an abstract base class which defines the API that HTTP futures * conform to. Concrete implementations are available in @{class:HTTPFuture} * and @{class:HTTPSFuture}. All futures return a tuple * when resolved; status is an object of class @{class:HTTPFutureResponseStatus} * and may represent any of a wide variety of errors in the transport layer, * a support library, or the actual HTTP exchange. * * @task create Creating a New Request * @task config Configuring the Request * @task resolve Resolving the Request * @task internal Internals - * @group futures */ abstract class BaseHTTPFuture extends Future { private $method = 'GET'; private $timeout = 300.0; private $headers = array(); private $uri; private $data; private $expect; /* -( Creating a New Request )--------------------------------------------- */ /** * Build a new future which will make an HTTP request to a given URI, with * some optional data payload. Since this class is abstract you can't actually * instantiate it; instead, build a new @{class:HTTPFuture} or * @{class:HTTPSFuture}. * * @param string Fully-qualified URI to send a request to. * @param mixed String or array to include in the request. Strings will be * transmitted raw; arrays will be encoded and sent as * 'application/x-www-form-urlencoded'. * @task create */ final public function __construct($uri, $data = array()) { $this->setURI((string)$uri); $this->setData($data); } /* -( Configuring the Request )-------------------------------------------- */ /** * Set a timeout for the service call. If the request hasn't resolved yet, * the future will resolve with a status that indicates the request timed * out. You can determine if a status is a timeout status by calling * isTimeout() on the status object. * * @param float Maximum timeout, in seconds. * @return this * @task config */ public function setTimeout($timeout) { $this->timeout = $timeout; return $this; } /** * Get the currently configured timeout. * * @return float Maximum number of seconds the request will execute for. * @task config */ public function getTimeout() { return $this->timeout; } /** * Select the HTTP method (e.g., "GET", "POST", "PUT") to use for the request. * By default, requests use "GET". * * @param string HTTP method name. * @return this * @task config */ final public function setMethod($method) { static $supported_methods = array( 'GET' => true, 'POST' => true, 'PUT' => true, 'DELETE' => true, ); if (empty($supported_methods[$method])) { $method_list = implode(', ', array_keys($supported_methods)); throw new Exception( "The HTTP method '{$method}' is not supported. Supported HTTP methods ". "are: {$method_list}."); } $this->method = $method; return $this; } /** * Get the HTTP method the request will use. * * @return string HTTP method name, like "GET". * @task config */ final public function getMethod() { return $this->method; } /** * Set the URI to send the request to. Note that this is also a constructor * parameter. * * @param string URI to send the request to. * @return this * @task config */ public function setURI($uri) { $this->uri = (string)$uri; return $this; } /** * Get the fully-qualified URI the request will be made to. * * @return string URI the request will be sent to. * @task config */ public function getURI() { return $this->uri; } /** * Provide data to send along with the request. Note that this is also a * constructor parameter; it may be more convenient to provide it there. Data * must be a string (in which case it will be sent raw) or an array (in which * case it will be encoded and sent as 'application/x-www-form-urlencoded'). * * @param mixed Data to send with the request. * @return this * @task config */ public function setData($data) { if (!is_string($data) && !is_array($data)) { throw new Exception('Data parameter must be an array or string.'); } $this->data = $data; return $this; } /** * Get the data which will be sent with the request. * * @return mixed Data which will be sent. * @task config */ public function getData() { return $this->data; } /** * Add an HTTP header to the request. The same header name can be specified * more than once, which will cause multiple headers to be sent. * * @param string Header name, like "Accept-Language". * @param string Header value, like "en-us". * @return this * @task config */ public function addHeader($name, $value) { $this->headers[] = array($name, $value); return $this; } /** * Get headers which will be sent with the request. Optionally, you can * provide a filter, which will return only headers with that name. For * example: * * $all_headers = $future->getHeaders(); * $just_user_agent = $future->getHeaders('User-Agent'); * * In either case, an array with all (or all matching) headers is returned. * * @param string|null Optional filter, which selects only headers with that * name if provided. * @return array List of all (or all matching) headers. * @task config */ public function getHeaders($filter = null) { $filter = strtolower($filter); $result = array(); foreach ($this->headers as $header) { list($name, $value) = $header; if (!$filter || ($filter == strtolower($name))) { $result[] = $header; } } return $result; } /** * Set the status codes that are expected in the response. * If set, isError on the status object will return true for status codes - * that are not in the input array. Otherise, isError will be true for any + * that are not in the input array. Otherwise, isError will be true for any * HTTP status code outside the 2xx range (notwithstanding other errors such * as connection or transport issues). * * @param array|null List of expected HTTP status codes. * * @return this * @task config */ public function setExpectStatus($status_codes) { $this->expect = $status_codes; return $this; } /** * Return list of expected status codes, or null if not set. * * @return array|null List of expected status codes. */ public function getExpectStatus() { return $this->expect; } /** * Add a HTTP basic authentication header to the request. * * @param string Username to authenticate with. * @param PhutilOpaqueEnvelope Password to authenticate with. * @return this * @task config */ public function setHTTPBasicAuthCredentials( $username, PhutilOpaqueEnvelope $password) { $password_plaintext = $password->openEnvelope(); $credentials = base64_encode($username.':'.$password_plaintext); return $this->addHeader('Authorization', 'Basic '.$credentials); } /* -( Resolving the Request )---------------------------------------------- */ /** - * Exception-oriented resolve(). Throws if the status indicates an error - * occurred. + * Exception-oriented @{method:resolve}. Throws if the status indicates an + * error occurred. * * @return tuple HTTP request result tuple. * @task resolve */ final public function resolvex() { $result = $this->resolve(); list($status, $body, $headers) = $result; if ($status->isError()) { throw $status; } return array($body, $headers); } /* -( Internals )---------------------------------------------------------- */ /** * Parse a raw HTTP response into a tuple. * * @param string Raw HTTP response. * @return tuple Valid resolution tuple. * @task internal */ protected function parseRawHTTPResponse($raw_response) { $rex_base = "@^(?P.*?)\r?\n\r?\n(?P.*)$@s"; $rex_head = "@^HTTP/\S+ (?P\d+) (?P.*?)". "(?:\r?\n(?P.*))?$@s"; // We need to parse one or more header blocks in case we got any // "HTTP/1.X 100 Continue" nonsense back as part of the response. This // happens with HTTPS requests, at the least. $response = $raw_response; while (true) { $matches = null; if (!preg_match($rex_base, $response, $matches)) { return $this->buildMalformedResult($raw_response); } $head = $matches['head']; $body = $matches['body']; if (!preg_match($rex_head, $head, $matches)) { return $this->buildMalformedResult($raw_response); } $response_code = (int)$matches['code']; $response_status = strtolower($matches['status']); if ($response_code == 100) { // This is HTTP/1.X 100 Continue, so this whole chunk is moot. $response = $body; } else if (($response_code == 200) && ($response_status == 'connection established')) { // When tunneling through an HTTPS proxy, we get an initial header // block like "HTTP/1.X 200 Connection established", then newlines, // then the normal response. Drop this chunk. $response = $body; } else { $headers = $this->parseHeaders(idx($matches, 'headers')); break; } } $status = new HTTPFutureResponseStatusHTTP( $response_code, $body, $headers, $this->expect); return array($status, $body, $headers); } /** * Parse an HTTP header block. * * @param string Raw HTTP headers. * @return list List of HTTP header tuples. * @task internal */ protected function parseHeaders($head_raw) { $rex_header = '@^(?P.*?):\s*(?P.*)$@'; $headers = array(); if (!$head_raw) { return $headers; } $headers_raw = preg_split("/\r?\n/", $head_raw); foreach ($headers_raw as $header) { $m = null; if (preg_match($rex_header, $header, $m)) { $headers[] = array($m['name'], $m['value']); } else { $headers[] = array($header, null); } } return $headers; } /** * Find value of the first header with given name. * * @param list List of headers from `resolve()`. * @param string Case insensitive header name. * @return string Value of the header or null if not found. * @task resolve */ public static function getHeader(array $headers, $search) { assert_instances_of($headers, 'array'); foreach ($headers as $header) { list($name, $value) = $header; if (strcasecmp($name, $search) == 0) { return $value; } } return null; } /** * Build a result tuple indicating a parse error resulting from a malformed * HTTP response. * * @return tuple Valid resolution tuple. * @task internal */ protected function buildMalformedResult($raw_response) { $body = null; $headers = array(); $status = new HTTPFutureResponseStatusParse( HTTPFutureResponseStatusParse::ERROR_MALFORMED_RESPONSE, $raw_response); return array($status, $body, $headers); } + } diff --git a/src/future/http/HTTPFuture.php b/src/future/http/HTTPFuture.php index 8629fd9..a511077 100644 --- a/src/future/http/HTTPFuture.php +++ b/src/future/http/HTTPFuture.php @@ -1,298 +1,294 @@ resolvex(); * * Or * * $future = new HTTPFuture('http://www.example.com/'); * list($http_response_status_object, * $response_body, * $headers) = $future->resolve(); * - * Prefer resolvex() to resolve() as the former throws + * Prefer @{method:resolvex} to @{method:resolve} as the former throws * @{class:HTTPFutureResponseStatusHTTP} on failures, which includes an * informative exception message. - * - * @group futures */ final class HTTPFuture extends BaseHTTPFuture { private $host; private $port = 80; private $fullRequestPath; private $socket; private $writeBuffer; private $response; private $stateConnected = false; private $stateWriteComplete = false; private $stateReady = false; private $stateStartTime; private $profilerCallID; public function setURI($uri) { $parts = parse_url($uri); if (!$parts) { throw new Exception("Could not parse URI '{$uri}'."); } if (empty($parts['scheme']) || $parts['scheme'] !== 'http') { throw new Exception( "URI '{$uri}' must be fully qualified with 'http://' scheme."); } if (!isset($parts['host'])) { throw new Exception( "URI '{$uri}' must be fully qualified and include host name."); } $this->host = $parts['host']; if (!empty($parts['port'])) { $this->port = $parts['port']; } if (isset($parts['user']) || isset($parts['pass'])) { throw new Exception( 'HTTP Basic Auth is not supported by HTTPFuture.'); } if (isset($parts['path'])) { $this->fullRequestPath = $parts['path']; } else { $this->fullRequestPath = '/'; } if (isset($parts['query'])) { $this->fullRequestPath .= '?'.$parts['query']; } return parent::setURI($uri); } public function __destruct() { if ($this->socket) { @fclose($this->socket); $this->socket = null; } } public function getReadSockets() { if ($this->socket) { return array($this->socket); } return array(); } public function getWriteSockets() { if (strlen($this->writeBuffer)) { return array($this->socket); } return array(); } public function isWriteComplete() { return $this->stateWriteComplete; } private function getDefaultUserAgent() { return 'HTTPFuture/1.0'; } public function isReady() { if ($this->stateReady) { return true; } if (!$this->socket) { $this->stateStartTime = microtime(true); $this->socket = $this->buildSocket(); if (!$this->socket) { return $this->stateReady; } $profiler = PhutilServiceProfiler::getInstance(); $this->profilerCallID = $profiler->beginServiceCall( array( 'type' => 'http', 'uri' => $this->getURI(), )); } if (!$this->stateConnected) { $read = array(); $write = array($this->socket); $except = array(); $select = stream_select($read, $write, $except, $tv_sec = 0); if ($write) { $this->stateConnected = true; } } if ($this->stateConnected) { if (strlen($this->writeBuffer)) { $bytes = @fwrite($this->socket, $this->writeBuffer); if ($bytes === false) { throw new Exception('Failed to write to buffer.'); } else if ($bytes) { $this->writeBuffer = substr($this->writeBuffer, $bytes); } } if (!strlen($this->writeBuffer)) { $this->stateWriteComplete = true; } while (($data = fread($this->socket, 32768)) || strlen($data)) { $this->response .= $data; } if ($data === false) { throw new Exception('Failed to read socket.'); } } return $this->checkSocket(); } private function buildSocket() { - $errno = null; $errstr = null; $socket = @stream_socket_client( 'tcp://'.$this->host.':'.$this->port, $errno, $errstr, $ignored_connection_timeout = 1.0, STREAM_CLIENT_CONNECT | STREAM_CLIENT_ASYNC_CONNECT); if (!$socket) { $this->stateReady = true; $this->result = $this->buildErrorResult( HTTPFutureResponseStatusTransport::ERROR_CONNECTION_FAILED); return null; } $ok = stream_set_blocking($socket, 0); if (!$ok) { throw new Exception('Failed to set stream nonblocking.'); } $this->writeBuffer = $this->buildHTTPRequest(); return $socket; } private function checkSocket() { - $timeout = false; $now = microtime(true); if (($now - $this->stateStartTime) > $this->getTimeout()) { $timeout = true; } if (!feof($this->socket) && !$timeout) { return false; } $this->stateReady = true; if ($timeout) { $this->result = $this->buildErrorResult( HTTPFutureResponseStatusTransport::ERROR_TIMEOUT); } else if (!$this->stateConnected) { $this->result = $this->buildErrorResult( HTTPFutureResponseStatusTransport::ERROR_CONNECTION_REFUSED); } else if (!$this->stateWriteComplete) { $this->result = $this->buildErrorResult( HTTPFutureResponseStatusTransport::ERROR_CONNECTION_FAILED); } else { $this->result = $this->parseRawHTTPResponse($this->response); } $profiler = PhutilServiceProfiler::getInstance(); $profiler->endServiceCall($this->profilerCallID, array()); return true; } private function buildErrorResult($error) { return array( $status = new HTTPFutureResponseStatusTransport($error, $this->getURI()), $body = null, $headers = array()); } private function buildHTTPRequest() { $data = $this->getData(); $method = $this->getMethod(); $uri = $this->fullRequestPath; $add_headers = array(); if ($this->getMethod() == 'GET') { if (is_array($data)) { $data = http_build_query($data, '', '&'); if (strpos($uri, '?') !== false) { $uri .= '&'.$data; } else { $uri .= '?'.$data; } $data = ''; } } else { if (is_array($data)) { $data = http_build_query($data, '', '&')."\r\n"; $add_headers[] = array( 'Content-Type', 'application/x-www-form-urlencoded'); } } $length = strlen($data); $add_headers[] = array( 'Content-Length', $length); if (!$this->getHeaders('User-Agent')) { $add_headers[] = array( 'User-Agent', $this->getDefaultUserAgent()); } if (!$this->getHeaders('Host')) { $add_headers[] = array( 'Host', $this->host); } $headers = array_merge($this->getHeaders(), $add_headers); foreach ($headers as $key => $header) { list($name, $value) = $header; if (strlen($value)) { $value = ': '.$value; } $headers[$key] = $name.$value."\r\n"; } return "{$method} {$uri} HTTP/1.0\r\n". implode('', $headers). "\r\n". $data; } } diff --git a/src/future/http/HTTPSFuture.php b/src/future/http/HTTPSFuture.php index 98cd154..5bb5769 100644 --- a/src/future/http/HTTPSFuture.php +++ b/src/future/http/HTTPSFuture.php @@ -1,583 +1,581 @@ cabundle = $temp; return $this; } /** * Set the SSL certificate to use for this session, given a path. * * @param string The path to a valid SSL certificate for this session * @return this */ public function setCABundleFromPath($path) { $this->cabundle = $path; return $this; } /** * Get the path to the SSL certificate for this session. * * @return string|null */ public function getCABundle() { return $this->cabundle; } /** * Set whether Location headers in the response will be respected. * The default is true. * * @param boolean true to follow any Location header present in the response, * false to return the request directly * @return this */ public function setFollowLocation($follow) { $this->followLocation = $follow; return $this; } /** * Get whether Location headers in the response will be respected. * * @return boolean */ public function getFollowLocation() { return $this->followLocation; } /** * Set the fallback CA certificate if one is not specified * for the session, given a path. * * @param string The path to a valid SSL certificate * @return void */ public static function setGlobalCABundleFromPath($path) { self::$globalCABundle = $path; } /** * Set the fallback CA certificate if one is not specified * for the session, given a string. * * @param string The certificate * @return void */ public static function setGlobalCABundleFromString($certificate) { $temp = new TempFile(); Filesystem::writeFile($temp, $certificate); self::$globalCABundle = $temp; } /** * Get the fallback global CA certificate * * @return string */ public static function getGlobalCABundle() { return self::$globalCABundle; } /** * Set a list of domains to blindly trust. Certificates for these domains * will not be validated. * * @param list List of domain names to trust blindly. * @return void */ public static function setBlindlyTrustDomains(array $domains) { self::$blindTrustDomains = array_fuse($domains); } /** * Load contents of remote URI. Behaves pretty much like - * `@file_get_contents($uri)` but doesn't require `allow_url_fopen`. + * `@file_get_contents($uri)` but doesn't require `allow_url_fopen`. * * @param string * @param float * @return string|false */ public static function loadContent($uri, $timeout = null) { $future = new HTTPSFuture($uri); if ($timeout !== null) { $future->setTimeout($timeout); } try { list($body) = $future->resolvex(); return $body; } catch (HTTPFutureResponseStatus $ex) { return false; } } /** * Attach a file to the request. * * @param string HTTP parameter name. * @param string File content. * @param string File name. * @param string File mime type. * @return this */ public function attachFileData($key, $data, $name, $mime_type) { if (isset($this->files[$key])) { throw new Exception( pht( 'HTTPSFuture currently supports only one file attachment for each '. 'parameter name. You are trying to attach two different files with '. 'the same parameter, "%s".', $key)); } $this->files[$key] = array( 'data' => $data, 'name' => $name, 'mime' => $mime_type, ); return $this; } public function isReady() { if (isset($this->result)) { return true; } $uri = $this->getURI(); $domain = id(new PhutilURI($uri))->getDomain(); if (!$this->handle) { $profiler = PhutilServiceProfiler::getInstance(); $this->profilerCallID = $profiler->beginServiceCall( array( 'type' => 'http', 'uri' => $uri, )); if (!self::$multi) { self::$multi = curl_multi_init(); if (!self::$multi) { throw new Exception('curl_multi_init() failed!'); } } if (!empty(self::$pool[$domain])) { $curl = array_pop(self::$pool[$domain]); } else { $curl = curl_init(); if (!$curl) { throw new Exception('curl_init() failed!'); } } $this->handle = $curl; curl_multi_add_handle(self::$multi, $curl); curl_setopt($curl, CURLOPT_URL, $uri); if (defined('CURLOPT_PROTOCOLS')) { // cURL supports a lot of protocols, and by default it will honor // redirects across protocols (for instance, from HTTP to POP3). Beyond // being very silly, this also has security implications: // // http://blog.volema.com/curl-rce.html // // Disable all protocols other than HTTP and HTTPS. $allowed_protocols = CURLPROTO_HTTPS | CURLPROTO_HTTP; curl_setopt($curl, CURLOPT_PROTOCOLS, $allowed_protocols); curl_setopt($curl, CURLOPT_REDIR_PROTOCOLS, $allowed_protocols); } $data = $this->formatRequestDataForCURL(); curl_setopt($curl, CURLOPT_POSTFIELDS, $data); $headers = $this->getHeaders(); $saw_expect = false; for ($ii = 0; $ii < count($headers); $ii++) { list($name, $value) = $headers[$ii]; $headers[$ii] = $name.': '.$value; if (!strncasecmp($name, 'Expect', strlen('Expect'))) { $saw_expect = true; } } if (!$saw_expect) { // cURL sends an "Expect" header by default for certain requests. While // there is some reasoning behind this, it causes a practical problem // in that lighttpd servers reject these requests with a 417. Both sides // are locked in an eternal struggle (lighttpd has introduced a // 'server.reject-expect-100-with-417' option to deal with this case). // // The ostensibly correct way to suppress this behavior on the cURL side // is to add an empty "Expect:" header. If we haven't seen some other // explicit "Expect:" header, do so. // // See here, for example, although this issue is fairly widespread: // http://curl.haxx.se/mail/archive-2009-07/0008.html $headers[] = 'Expect:'; } curl_setopt($curl, CURLOPT_HTTPHEADER, $headers); // Set the requested HTTP method, e.g. GET / POST / PUT. curl_setopt($curl, CURLOPT_CUSTOMREQUEST, $this->getMethod()); // Make sure we get the headers and data back. curl_setopt($curl, CURLOPT_HEADER, true); curl_setopt($curl, CURLOPT_WRITEFUNCTION, array($this, 'didReceiveDataCallback')); if ($this->followLocation) { curl_setopt($curl, CURLOPT_FOLLOWLOCATION, true); curl_setopt($curl, CURLOPT_MAXREDIRS, 20); } if (defined('CURLOPT_TIMEOUT_MS')) { // If CURLOPT_TIMEOUT_MS is available, use the higher-precision timeout. $timeout = max(1, ceil(1000 * $this->getTimeout())); curl_setopt($curl, CURLOPT_TIMEOUT_MS, $timeout); } else { // Otherwise, fall back to the lower-precision timeout. $timeout = max(1, ceil($this->getTimeout())); curl_setopt($curl, CURLOPT_TIMEOUT, $timeout); } // Try some decent fallbacks here: // - First, check if a bundle is set explicit for this request, via // `setCABundle()` or similar. // - Then, check if a global bundle is set explicitly for all requests, // via `setGlobalCABundle()` or similar. // - Then, if a local custom.pem exists, use that, because it probably // means that the user wants to override everything (also because the // user might not have access to change the box's php.ini to add // curl.cainfo). // - Otherwise, try using curl.cainfo. If it's set explicitly, it's // probably reasonable to try using it before we fall back to what // libphutil ships with. // - Lastly, try the default that libphutil ships with. If it doesn't // work, give up and yell at the user. if (!$this->getCABundle()) { $caroot = dirname(phutil_get_library_root('phutil')).'/resources/ssl/'; $ini_val = ini_get('curl.cainfo'); if (self::getGlobalCABundle()) { $this->setCABundleFromPath(self::getGlobalCABundle()); } else if (Filesystem::pathExists($caroot.'custom.pem')) { $this->setCABundleFromPath($caroot.'custom.pem'); } else if ($ini_val) { // TODO: We can probably do a pathExists() here, even. $this->setCABundleFromPath($ini_val); } else { $this->setCABundleFromPath($caroot.'default.pem'); } } curl_setopt($curl, CURLOPT_CAINFO, $this->getCABundle()); $domain = id(new PhutilURI($uri))->getDomain(); if (!empty(self::$blindTrustDomains[$domain])) { // Disable peer verification for domains that we blindly trust. curl_setopt($curl, CURLOPT_SSL_VERIFYPEER, false); } else { curl_setopt($curl, CURLOPT_SSL_VERIFYPEER, true); } curl_setopt($curl, CURLOPT_SSLVERSION, 0); } else { $curl = $this->handle; if (!self::$results) { // NOTE: In curl_multi_select(), PHP calls curl_multi_fdset() but does // not check the return value of &maxfd for -1 until recent versions // of PHP (5.4.8 and newer). cURL may return -1 as maxfd in some unusual // situations; if it does, PHP enters select() with nfds=0, which blocks // until the timeout is reached. // // We could try to guess whether this will happen or not by examining // the version identifier, but we can also just sleep for only a short // period of time. curl_multi_select(self::$multi, 0.01); } } do { $active = null; $result = curl_multi_exec(self::$multi, $active); } while ($result == CURLM_CALL_MULTI_PERFORM); while ($info = curl_multi_info_read(self::$multi)) { if ($info['msg'] == CURLMSG_DONE) { self::$results[(int)$info['handle']] = $info; } } if (!array_key_exists((int)$curl, self::$results)) { return false; } // The request is complete, so release any temporary files we wrote // earlier. $this->temporaryFiles = array(); $info = self::$results[(int)$curl]; $result = $this->responseBuffer; $err_code = $info['result']; if ($err_code) { $status = new HTTPFutureResponseStatusCURL($err_code, $uri); $body = null; $headers = array(); $this->result = array($status, $body, $headers); } else { // cURL returns headers of all redirects, we strip all but the final one. $redirects = curl_getinfo($curl, CURLINFO_REDIRECT_COUNT); $result = preg_replace('/^(.*\r\n\r\n){'.$redirects.'}/sU', '', $result); $this->result = $this->parseRawHTTPResponse($result); } curl_multi_remove_handle(self::$multi, $curl); unset(self::$results[(int)$curl]); // NOTE: We want to use keepalive if possible. Return the handle to a // pool for the domain; don't close it. self::$pool[$domain][] = $curl; $profiler = PhutilServiceProfiler::getInstance(); $profiler->endServiceCall($this->profilerCallID, array()); return true; } /** * Callback invoked by cURL as it reads HTTP data from the response. We save * the data to a buffer. */ public function didReceiveDataCallback($handle, $data) { $this->responseBuffer .= $data; return strlen($data); } /** * Read data from the response buffer. * * NOTE: Like @{class:ExecFuture}, this method advances a read cursor but * does not discard the data. The data will still be buffered, and it will * all be returned when the future resolves. To discard the data after * reading it, call @{method:discardBuffers}. * * @return string Response data, if available. */ public function read() { $result = substr($this->responseBuffer, $this->responseBufferPos); $this->responseBufferPos = strlen($this->responseBuffer); return $result; } /** * Discard any buffered data. Normally, you call this after reading the * data with @{method:read}. * * @return this */ public function discardBuffers() { $this->responseBuffer = ''; $this->responseBufferPos = 0; return $this; } /** * Produces a value safe to pass to `CURLOPT_POSTFIELDS`. * * @return wild Some value, suitable for use in `CURLOPT_POSTFIELDS`. */ private function formatRequestDataForCURL() { // We're generating a value to hand to cURL as CURLOPT_POSTFIELDS. The way // cURL handles this value has some tricky caveats. // First, we can return either an array or a query string. If we return // an array, we get a "multipart/form-data" request. If we return a // query string, we get an "application/x-www-form-urlencoded" request. // Second, if we return an array we can't duplicate keys. The user might // want to send the same parameter multiple times. // Third, if we return an array and any of the values start with "@", // cURL includes arbitrary files off disk and sends them to an untrusted // remote server. For example, an array like: // // array('name' => '@/usr/local/secret') // // ...will attempt to read that file off disk and transmit its contents with // the request. This behavior is pretty surprising, and it can easily // become a relatively severe security vulnerability which allows an // attacker to read any file the HTTP process has access to. Since this // feature is very dangerous and not particularly useful, we prevent its // use. Broadly, this means we must reject some requests because they // contain an "@" in an inconvenient place. // Generally, to avoid the "@" case and because most servers usually // expect "application/x-www-form-urlencoded" data, we try to return a // string unless there are files attached to this request. $data = $this->getData(); $files = $this->files; $any_data = ($data || (is_string($data) && strlen($data))); $any_files = (bool)$this->files; if (!$any_data && !$any_files) { // No files or data, so just bail. return null; } if (!$any_files) { // If we don't have any files, just encode the data as a query string, // make sure it's not including any files, and we're good to go. if (is_array($data)) { $data = http_build_query($data, '', '&'); } $this->checkForDangerousCURLMagic($data, $is_query_string = true); return $data; } // If we've made it this far, we have some files, so we need to return // an array. First, convert the other data into an array if it isn't one // already. if (is_string($data)) { // NOTE: We explicitly don't want fancy array parsing here, so just // do a basic parse and then convert it into a dictionary ourselves. $parser = new PhutilQueryStringParser(); $pairs = $parser->parseQueryStringToPairList($data); $map = array(); foreach ($pairs as $pair) { list($key, $value) = $pair; if (array_key_exists($key, $map)) { throw new Exception( pht( 'Request specifies two values for key "%s", but parameter '. 'names must be unique if you are posting file data due to '. 'limitations with cURL.')); } $map[$key] = $value; } $data = $map; } foreach ($data as $key => $value) { $this->checkForDangerousCURLMagic($value, $is_query_string = false); } foreach ($this->files as $name => $info) { if (array_key_exists($name, $data)) { throw new Exception( pht( 'Request specifies a file with key "%s", but that key is '. 'also defined by normal request data. Due to limitations '. 'with cURL, requests that post file data must use unique '. 'keys.')); } $tmp = new TempFile($info['name']); Filesystem::writeFile($tmp, $info['data']); $this->temporaryFiles[] = $tmp; // In 5.5.0 and later, we can use CURLFile. Prior to that, we have to // use this "@" stuff. if (class_exists('CURLFile')) { $file_value = new CURLFile((string)$tmp, $info['mime'], $info['name']); } else { $file_value = '@'.(string)$tmp; } $data[$name] = $file_value; } return $data; } /** * Detect strings which will cause cURL to do horrible, insecure things. * * @param string Possibly dangerous string. * @param bool True if this string is being used as part of a query string. * @return void */ private function checkForDangerousCURLMagic($string, $is_query_string) { if (empty($string[0]) || ($string[0] != '@')) { // This isn't an "@..." string, so it's fine. return; } if ($is_query_string) { if (version_compare(phpversion(), '5.2.0', '<')) { throw new Exception( pht( 'Attempting to make an HTTP request, but query string data begins '. 'with "@". Prior to PHP 5.2.0 this reads files off disk, which '. 'creates a wide attack window for security vulnerabilities. '. 'Upgrade PHP or avoid making cURL requests which begin with "@".')); } // This is safe if we're on PHP 5.2.0 or newer. return; } throw new Exception( pht( 'Attempting to make an HTTP request which includes file data, but '. 'the value of a query parameter begins with "@". PHP interprets '. 'these values to mean that it should read arbitrary files off disk '. 'and transmit them to remote servers. Declining to make this '. 'request.')); } } diff --git a/src/future/http/status/HTTPFutureResponseStatus.php b/src/future/http/status/HTTPFutureResponseStatus.php index dc99279..4c60126 100644 --- a/src/future/http/status/HTTPFutureResponseStatus.php +++ b/src/future/http/status/HTTPFutureResponseStatus.php @@ -1,43 +1,40 @@ statusCode = $status_code; $this->uri = (string)$uri; $type = $this->getErrorCodeType($status_code); $description = $this->getErrorCodeDescription($status_code); $uri_info = ''; if ($this->uri) { $uri_info = ' ('.$this->uri.')'; } $message = rtrim("[{$type}/{$status_code}]{$uri_info} {$description}"); parent::__construct($message); } final public function getStatusCode() { return $this->statusCode; } final public function getURI() { return $this->uri; } abstract public function isError(); abstract public function isTimeout(); abstract protected function getErrorCodeType($code); abstract protected function getErrorCodeDescription($code); } diff --git a/src/future/http/status/HTTPFutureResponseStatusCURL.php b/src/future/http/status/HTTPFutureResponseStatusCURL.php index 0546c80..e0e5af4 100644 --- a/src/future/http/status/HTTPFutureResponseStatusCURL.php +++ b/src/future/http/status/HTTPFutureResponseStatusCURL.php @@ -1,86 +1,83 @@ getStatusCode() == CURLE_OPERATION_TIMEOUTED); } protected function getErrorCodeDescription($code) { $constants = get_defined_constants(); $constant_name = null; foreach ($constants as $constant => $value) { if ($value == $code && preg_match('/^CURLE_/', $constant)) { $constant_name = '<'.$constant.'> '; break; } } $map = array( CURLE_COULDNT_RESOLVE_HOST => 'There was an error resolving the server hostname. Check that you are '. 'connected to the internet and that DNS is correctly configured. (Did '. 'you add the domain to `/etc/hosts` on some other machine, but not '. 'this one?)', CURLE_SSL_CACERT => 'There was an error verifying the SSL Certificate Authority while '. 'negotiating the SSL connection. This usually indicates that you are '. 'using a self-signed certificate but have not added your CA to the '. 'CA bundle. See instructions in "libphutil/resources/ssl/README".', // Apparently there's no error constant for this? In cURL it's // CURLE_SSL_CACERT_BADFILE but there's no corresponding constant in // PHP. 77 => 'The SSL CA Bundles that we tried to use could not be read or are '. 'not formatted correctly.', CURLE_SSL_CONNECT_ERROR => 'There was an error negotiating the SSL connection. This usually '. 'indicates that the remote host has a bad SSL certificate, or your '. 'local host has some sort of SSL misconfiguration which prevents it '. 'from accepting the CA. If you are using a self-signed certificate, '. 'see instructions in "libphutil/resources/ssl/README".', CURLE_OPERATION_TIMEOUTED => 'The request took too long to complete.', CURLE_SSL_PEER_CERTIFICATE => 'There was an error verifying the SSL connection. This usually '. 'indicates that the remote host has an SSL certificate for a '. 'different domain name than you are connecting with. Make sure the '. 'certificate you have installed is signed for the correct domain.', ); $default_message = "The cURL library raised an error while making a request. You may be ". "able to find more information about this error (error code: {$code}) ". "on the cURL site: http://curl.haxx.se/libcurl/c/libcurl-errors.html#". preg_replace('/[^A-Z]/', '', $constant_name); $detailed_message = idx($map, $code, $default_message); return $constant_name.$detailed_message; } } diff --git a/src/future/http/status/HTTPFutureResponseStatusHTTP.php b/src/future/http/status/HTTPFutureResponseStatusHTTP.php index 96468e1..df33bda 100644 --- a/src/future/http/status/HTTPFutureResponseStatusHTTP.php +++ b/src/future/http/status/HTTPFutureResponseStatusHTTP.php @@ -1,65 +1,62 @@ 512) { $excerpt = substr($body, 0, 512).'...'; } else { $excerpt = $body; } $content_type = BaseHTTPFuture::getHeader($headers, 'Content-Type'); $match = null; if (preg_match('/;\s*charset=([^;]+)/', $content_type, $match)) { $encoding = trim($match[1], "\"'"); try { $excerpt = phutil_utf8_convert($excerpt, 'UTF-8', $encoding); } catch (Exception $ex) { } } $this->excerpt = phutil_utf8ize($excerpt); $this->expect = $expect; parent::__construct($status_code); } protected function getErrorCodeType($code) { return 'HTTP'; } public function isError() { if ($this->expect === null) { return ($this->getStatusCode() < 200) || ($this->getStatusCode() > 299); } return !in_array($this->getStatusCode(), $this->expect, true); } public function isTimeout() { return false; } protected function getErrorCodeDescription($code) { static $map = array( 404 => 'Not Found', 500 => 'Internal Server Error', ); return idx($map, $code)."\n".$this->excerpt."\n"; } } diff --git a/src/future/http/status/HTTPFutureResponseStatusParse.php b/src/future/http/status/HTTPFutureResponseStatusParse.php index eb2a786..ec5fc00 100644 --- a/src/future/http/status/HTTPFutureResponseStatusParse.php +++ b/src/future/http/status/HTTPFutureResponseStatusParse.php @@ -1,33 +1,30 @@ rawResponse = $raw_response; parent::__construct($code); } protected function getErrorCodeType($code) { return 'Parse'; } public function isError() { return true; } public function isTimeout() { return false; } protected function getErrorCodeDescription($code) { return 'The remote host returned something other than an HTTP response: '. $this->rawResponse; } } diff --git a/src/future/http/status/HTTPFutureResponseStatusTransport.php b/src/future/http/status/HTTPFutureResponseStatusTransport.php index 5fa9513..4718cd1 100644 --- a/src/future/http/status/HTTPFutureResponseStatusTransport.php +++ b/src/future/http/status/HTTPFutureResponseStatusTransport.php @@ -1,46 +1,43 @@ getStatusCode() == self::ERROR_TIMEOUT); } protected function getErrorCodeDescription($code) { $map = array( self::ERROR_TIMEOUT => 'The request took too long to complete.', self::ERROR_CONNECTION_ABORTED => 'The remote host closed the connection before the request completed.', self::ERROR_CONNECTION_REFUSED => 'The remote host refused the connection. This usually means the '. 'host is not running an HTTP server, or the network is blocking '. 'connections from this machine. Verify you can connect to the '. 'remote host from this host.', self::ERROR_CONNECTION_FAILED => 'Connection could not be initiated. This usually indicates a DNS '. 'problem: verify the domain name is correct, that you can '. 'perform a DNS lookup for it from this machine. (Did you add the '. 'domain to `/etc/hosts` on some other machine, but not this one?) '. 'This might also indicate that you specified the wrong port.', ); return idx($map, $code); } } diff --git a/src/future/paypal/PhutilPayPalAPIFuture.php b/src/future/paypal/PhutilPayPalAPIFuture.php index 22fe314..ffcdbf9 100644 --- a/src/future/paypal/PhutilPayPalAPIFuture.php +++ b/src/future/paypal/PhutilPayPalAPIFuture.php @@ -1,91 +1,88 @@ host = $host; return $this; } public function getHost() { return $this->host; } public function setAPIUsername($api_username) { $this->apiUsername = $api_username; return $this; } public function setAPIPassword($api_password) { $this->apiPassword = $api_password; return $this; } public function setAPISignature($api_signature) { $this->apiSignature = $api_signature; return $this; } public function setRawPayPalQuery($action, array $params = array()) { $this->params = array('METHOD' => $action) + $params + $this->params; return $this; } protected function getProxiedFuture() { if (!$this->future) { $params = $this->params; if (!$this->params) { throw new Exception('You must setRawPayPalQuery()!'); } if (!$this->apiUsername) { throw new Exception('You must set PayPal API credentials!'); } $params['VERSION'] = '98.0'; $params['USER'] = $this->apiUsername; $params['PWD'] = $this->apiPassword; $params['SIGNATURE'] = $this->apiSignature; $this->future = id(new HTTPSFuture($this->getHost(), $params)) ->setMethod('POST'); } return $this->future; } protected function didReceiveResult($result) { list($status, $body, $headers) = $result; if ($status->isError()) { throw $status; } $dict = array(); parse_str($body, $dict); if (idx($dict, 'ACK') !== 'Success') { throw new Exception( 'PayPal API call failed: '.print_r($dict, true)); } return $dict; } } diff --git a/src/future/twitch/PhutilTwitchFuture.php b/src/future/twitch/PhutilTwitchFuture.php index e82bfb8..776cc10 100644 --- a/src/future/twitch/PhutilTwitchFuture.php +++ b/src/future/twitch/PhutilTwitchFuture.php @@ -1,92 +1,89 @@ accessToken = $token; return $this; } public function setClientID($client_id) { $this->clientID = $client_id; return $this; } public function setRawTwitchQuery($action, array $params = array()) { $this->action = $action; $this->params = $params; return $this; } public function setMethod($method) { $this->method = $method; return $this; } protected function getProxiedFuture() { if (!$this->future) { $params = $this->params; if (!$this->action) { throw new Exception('You must setRawTwitchQuery()!'); } if (!$this->accessToken) { throw new Exception('You must setAccessToken()!'); } $uri = new PhutilURI('https://api.twitch.tv/'); $uri->setPath('/kraken/'.ltrim($this->action, '/')); $uri->setQueryParam('oauth_token', $this->accessToken); $future = new HTTPSFuture($uri); $future->setData($this->params); $future->setMethod($this->method); // NOTE: This is how the Twitch API is versioned. $future->addHeader('Accept', 'application/vnd.twitchtv.2+json'); // NOTE: This is required to avoid rate limiting. $future->addHeader('Client-ID', $this->clientID); $this->future = $future; } return $this->future; } protected function didReceiveResult($result) { list($status, $body, $headers) = $result; if ($status->isError()) { throw $status; } $data = json_decode($body, true); if (!is_array($data)) { throw new Exception("Expected JSON response from Twitch, got: {$body}"); } if (idx($data, 'error')) { $error = $data['error']; throw new Exception("Received error from Twitch: {$error}"); } return $data; } } diff --git a/src/internationalization/PhutilPerson.php b/src/internationalization/PhutilPerson.php index 557ad15..fac9bcc 100644 --- a/src/internationalization/PhutilPerson.php +++ b/src/internationalization/PhutilPerson.php @@ -1,14 +1,10 @@ language = $language; return $this; } /** * Add translations which will be later used by @{method:translate}. * The parameter is an array of strings (for simple translations) or arrays * (for translastions with variants). The number of items in the array is * language specific. It is `array($singular, $plural)` for English. * * array( * 'color' => 'colour', * '%d beer(s)' => array('%d beer', '%d beers'), * ); * * The arrays can be nested for strings with more variant parts: * * array( * '%d char(s) on %d row(s)' => array( * array('%d char on %d row', '%d char on %d rows'), * array('%d chars on %d row', '%d chars on %d rows'), * ), * ); * * The translation should have the same placeholders as originals. Swapping * parameter order is possible: * * array( * '%s owns %s.' => '%2$s is owned by %1$s.', * ); * * @param array Identifier in key, translation in value. * @return PhutilTranslator Provides fluent interface. */ public function addTranslations(array $translations) { $this->translations = array_merge($this->translations, $translations); return $this; } public function translate($text /* , ... */) { $translation = idx($this->translations, $text, $text); $args = func_get_args(); while (is_array($translation)) { $translation = $this->chooseVariant($translation, next($args)); } array_shift($args); foreach ($args as $k => $arg) { if ($arg instanceof PhutilNumber) { $args[$k] = $this->formatNumber($arg->getNumber(), $arg->getDecimals()); } } // Check if any arguments are PhutilSafeHTML. If they are, we will apply // any escaping necessary and output HTML. $is_html = false; foreach ($args as $arg) { if ($arg instanceof PhutilSafeHTML) { $is_html = true; break; } } if ($is_html) { foreach ($args as $k => $arg) { $args[$k] = (string)phutil_escape_html($arg); } } $result = vsprintf($translation, $args); if ($this->language == 'en-ac') { $result = strtoupper($result); } if ($is_html) { $result = phutil_safe_html($result); } return $result; } private function chooseVariant(array $translations, $variant) { if (count($translations) == 1) { // If we only have one variant, we can select it directly. return reset($translations); } if ($variant instanceof PhutilNumber) { $variant = $variant->getNumber(); } switch ($this->language) { case 'en': case 'en-ac': list($singular, $plural) = $translations; if ($variant == 1) { return $singular; } return $plural; case 'cs': if ($variant instanceof PhutilPerson) { list($male, $female) = $translations; if ($variant->getSex() == PhutilPerson::SEX_FEMALE) { return $female; } return $male; } list($singular, $paucal, $plural) = $translations; if ($variant == 1) { return $singular; } if ($variant >= 2 && $variant <= 4) { return $paucal; } return $plural; default: throw new Exception("Unknown language '{$this->language}'."); } } /** * Translate date formatted by `$date->format()`. * * @param string Format accepted by `DateTime::format()`. * @param DateTime * @return string Formatted and translated date. */ public function translateDate($format, DateTime $date) { static $format_cache = array(); if (!isset($format_cache[$format])) { $translatable = 'DlSFMaA'; preg_match_all( '/['.$translatable.']|(\\\\.|[^'.$translatable.'])+/', $format, $format_cache[$format], PREG_SET_ORDER); } $parts = array(); foreach ($format_cache[$format] as $match) { $part = $date->format($match[0]); if (!isset($match[1])) { $part = $this->translate($part); } $parts[] = $part; } return implode('', $parts); } /** * Format number with grouped thousands and optional decimal part. Requires * translations of '.' (decimal point) and ',' (thousands separator). Both * these translations must be 1 byte long with PHP < 5.4.0. * * @param float * @param int * @return string */ public function formatNumber($number, $decimals = 0) { return number_format( $number, $decimals, $this->translate('.'), $this->translate(',')); } public function validateTranslation($original, $translation) { $pattern = '/<(\S[^>]*>?)?|&(\S[^;]*;?)?/i'; $original_matches = null; $translation_matches = null; preg_match_all($pattern, $original, $original_matches); preg_match_all($pattern, $translation, $translation_matches); sort($original_matches[0]); sort($translation_matches[0]); if ($original_matches[0] !== $translation_matches[0]) { return false; } return true; } } diff --git a/src/internationalization/__tests__/PhutilPHTTestCase.php b/src/internationalization/__tests__/PhutilPHTTestCase.php index 8a8ffea..f039a41 100644 --- a/src/internationalization/__tests__/PhutilPHTTestCase.php +++ b/src/internationalization/__tests__/PhutilPHTTestCase.php @@ -1,100 +1,98 @@ assertEqual('beer', pht('beer')); $this->assertEqual('1 beer(s)', pht('%d beer(s)', 1)); PhutilTranslator::getInstance()->addTranslations( array( '%d beer(s)' => array('%d beer', '%d beers'), )); $this->assertEqual('1 beer', pht('%d beer(s)', 1)); PhutilTranslator::getInstance()->setLanguage('cs'); PhutilTranslator::getInstance()->addTranslations( array( '%d beer(s)' => array('%d pivo', '%d piva', '%d piv'), )); $this->assertEqual('5 piv', pht('%d beer(s)', 5)); } public function getDateTranslations() { // The only purpose of this function is to provide a static list of // translations which can come from PhutilTranslator::translateDate() to // allow translation extractor getting them. return array( 'D' => array( pht('Sun'), pht('Mon'), pht('Tue'), pht('Wed'), pht('Thu'), pht('Fri'), pht('Sat'), ), 'l' => array( pht('Sunday'), pht('Monday'), pht('Tuesday'), pht('Wednesday'), pht('Thursday'), pht('Friday'), pht('Saturday'), ), 'S' => array( pht('st'), pht('nd'), pht('rd'), pht('th'), ), 'F' => array( pht('January'), pht('February'), pht('March'), pht('April'), pht('May'), pht('June'), pht('July'), pht('August'), pht('September'), pht('October'), pht('November'), pht('December'), ), 'M' => array( pht('Jan'), pht('Feb'), pht('Mar'), pht('Apr'), pht('May'), pht('Jun'), pht('Jul'), pht('Aug'), pht('Sep'), pht('Oct'), pht('Nov'), pht('Dec'), ), 'a' => array( pht('am'), pht('pm'), ), 'A' => array( pht('AM'), pht('PM'), ), ); } } diff --git a/src/internationalization/__tests__/PhutilPersonTest.php b/src/internationalization/__tests__/PhutilPersonTest.php index 17ec2f2..6fbdb96 100644 --- a/src/internationalization/__tests__/PhutilPersonTest.php +++ b/src/internationalization/__tests__/PhutilPersonTest.php @@ -1,22 +1,20 @@ sex; } public function setSex($value) { $this->sex = $value; return $this; } public function __toString() { return 'Test ('.$this->sex.')'; } } diff --git a/src/internationalization/__tests__/PhutilTranslatorTestCase.php b/src/internationalization/__tests__/PhutilTranslatorTestCase.php index 8a0b43c..15a9c2d 100644 --- a/src/internationalization/__tests__/PhutilTranslatorTestCase.php +++ b/src/internationalization/__tests__/PhutilTranslatorTestCase.php @@ -1,249 +1,246 @@ addTranslations( array( '%d line(s)' => array('%d line', '%d lines'), '%d char(s) on %d row(s)' => array( array('%d char on %d row', '%d char on %d rows'), array('%d chars on %d row', '%d chars on %d rows'), ), )); $this->assertEqual('line', $translator->translate('line')); $this->assertEqual('param', $translator->translate('%s', 'param')); $this->assertEqual('0 lines', $translator->translate('%d line(s)', 0)); $this->assertEqual('1 line', $translator->translate('%d line(s)', 1)); $this->assertEqual('2 lines', $translator->translate('%d line(s)', 2)); $this->assertEqual( '1 char on 1 row', $translator->translate('%d char(s) on %d row(s)', 1, 1)); $this->assertEqual( '5 chars on 2 rows', $translator->translate('%d char(s) on %d row(s)', 5, 2)); $this->assertEqual('1 beer(s)', $translator->translate('%d beer(s)', 1)); } public function testSingleVariant() { $translator = new PhutilTranslator(); $translator->setLanguage('en'); // In this translation, we have no alternatives for the first conversion. $translator->addTranslations( array( 'Run the command %s %d time(s).' => array( array( 'Run the command %s once.', 'Run the command %s %d times.', ), ), )); $this->assertEqual( 'Run the command ls 123 times.', (string)$translator->translate( 'Run the command %s %d time(s).', hsprintf('%s', 'ls'), 123)); } public function testCzech() { $translator = new PhutilTranslator(); $translator->setLanguage('cs'); $translator->addTranslations( array( '%d beer(s)' => array('%d pivo', '%d piva', '%d piv'), )); $this->assertEqual('0 piv', $translator->translate('%d beer(s)', 0)); $this->assertEqual('1 pivo', $translator->translate('%d beer(s)', 1)); $this->assertEqual('2 piva', $translator->translate('%d beer(s)', 2)); $this->assertEqual('5 piv', $translator->translate('%d beer(s)', 5)); $this->assertEqual('1 line(s)', $translator->translate('%d line(s)', 1)); } public function testPerson() { $translator = new PhutilTranslator(); $translator->setLanguage('cs'); $translator->addTranslations( array( '%s wrote.' => array('%s napsal.', '%s napsala.'), )); $person = new PhutilPersonTest(); $this->assertEqual( 'Test () napsal.', $translator->translate('%s wrote.', $person)); $person->setSex(PhutilPerson::SEX_MALE); $this->assertEqual( 'Test (m) napsal.', $translator->translate('%s wrote.', $person)); $person->setSex(PhutilPerson::SEX_FEMALE); $this->assertEqual( 'Test (f) napsala.', $translator->translate('%s wrote.', $person)); } public function testTranslateDate() { $date = new DateTime('2012-06-21'); $translator = new PhutilTranslator(); $this->assertEqual('June', $translator->translateDate('F', $date)); $this->assertEqual('June 21', $translator->translateDate('F d', $date)); $this->assertEqual('F', $translator->translateDate('\F', $date)); $translator->addTranslations( array( 'June' => 'correct', '21' => 'wrong', 'F' => 'wrong' )); $this->assertEqual('correct', $translator->translateDate('F', $date)); $this->assertEqual('correct 21', $translator->translateDate('F d', $date)); $this->assertEqual('F', $translator->translateDate('\F', $date)); } public function testSetInstance() { PhutilTranslator::setInstance(new PhutilTranslator()); $original = PhutilTranslator::getInstance(); $this->assertEqual('color', pht('color')); $british = new PhutilTranslator(); $british->addTranslations( array( 'color' => 'colour', )); PhutilTranslator::setInstance($british); $this->assertEqual('colour', pht('color')); PhutilTranslator::setInstance($original); $this->assertEqual('color', pht('color')); } public function testFormatNumber() { $translator = new PhutilTranslator(); $this->assertEqual('1,234', $translator->formatNumber(1234)); $this->assertEqual('1,234.5', $translator->formatNumber(1234.5, 1)); $this->assertEqual('1,234.5678', $translator->formatNumber(1234.5678, 4)); $translator->addTranslations( array( ',' => ' ', '.' => ',' )); $this->assertEqual('1 234', $translator->formatNumber(1234)); $this->assertEqual('1 234,5', $translator->formatNumber(1234.5, 1)); $this->assertEqual('1 234,5678', $translator->formatNumber(1234.5678, 4)); } public function testNumberTranslations() { $translator = new PhutilTranslator(); $translator->addTranslations( array( '%s line(s)' => array('%s line', '%s lines'), )); $this->assertEqual( '1 line', $translator->translate('%s line(s)', new PhutilNumber(1))); $this->assertEqual( '1,000 lines', $translator->translate('%s line(s)', new PhutilNumber(1000))); $this->assertEqual( '8.5 lines', $translator->translate( '%s line(s)', id(new PhutilNumber(8.5))->setDecimals(1))); } public function testValidateTranslation() { $tests = array( 'a < 2' => array( 'a < 2' => true, 'b < 3' => true, '2 > a' => false, 'a<2' => false, ), 'We win' => array( 'We win' => true, 'We win' => true, // false positive 'We win' => false, 'We win' => false, ), 'We win & triumph' => array( 'We triumph & win' => true, 'We win and triumph' => false, ), 'beer' => array( 'pivo' => true, 'b<>r' => false, 'b&&r' => false, ), ); $translator = new PhutilTranslator(); foreach ($tests as $original => $translations) { foreach ($translations as $translation => $expect) { $valid = ($expect ? 'valid' : 'invalid'); $this->assertEqual( $expect, $translator->validateTranslation($original, $translation), "'{$original}' should be {$valid} with '{$translation}'."); } } } public function testHTMLTranslations() { $string = '%s awoke suddenly at %s.'; $when = '<4 AM>'; $translator = new PhutilTranslator(); // When no components are HTML, everything is treated as a string. $who = 'Abraham'; $translation = $translator->translate( $string, $who, $when); $this->assertEqual( 'string', gettype($translation)); $this->assertEqual( 'Abraham awoke suddenly at <4 AM>.', $translation); // When at least one component is HTML, everything is treated as HTML. $who = phutil_tag('span', array(), 'Abraham'); $translation = $translator->translate( $string, $who, $when); $this->assertTrue($translation instanceof PhutilSafeHTML); $this->assertEqual( 'Abraham awoke suddenly at <4 AM>.', $translation->getHTMLContent()); $translation = $translator->translate( $string, $who, new PhutilNumber(1383930802)); $this->assertEqual( 'Abraham awoke suddenly at 1,383,930,802.', $translation->getHTMLContent()); } } diff --git a/src/internationalization/pht.php b/src/internationalization/pht.php index 1ec0ddc..1b694fd 100644 --- a/src/internationalization/pht.php +++ b/src/internationalization/pht.php @@ -1,20 +1,18 @@ addTranslations()` and language rules set * by `PhutilTranslator::getInstance()->setLanguage()`. * - * @param string Translation identifier with sprintf() placeholders. + * @param string Translation identifier with `sprintf()` placeholders. * @param mixed Value to select the variant from (e.g. singular or plural). * @param ... Next values referenced from $text. * @return string Translated string with substituted values. - * - * @group internationalization */ function pht($text, $variant = null /* , ... */) { $args = func_get_args(); $translator = PhutilTranslator::getInstance(); return call_user_func_array(array($translator, 'translate'), $args); } diff --git a/src/lexer/PhutilLexer.php b/src/lexer/PhutilLexer.php index 517598f..685786e 100644 --- a/src/lexer/PhutilLexer.php +++ b/src/lexer/PhutilLexer.php @@ -1,327 +1,325 @@ array(...), * 'state1' => array(...), * 'state2' => array(...), * ) * * Lexers start at the state named 'start'. Each state should have a list of * rules which can match in that state. A list of rules looks like this: * * array( * array('\s+', 'space'), * array('\d+', 'digit'), * array('\w+', 'word'), * ) * * The lexer operates by processing each rule in the current state in order. * When one matches, it produces a token. For example, the lexer above would * lex this text: * * 3 asdf * * ...to produce these tokens (assuming the rules are for the 'start' state): * * array('digit', '3', null), * array('space', ' ', null), * array('word', 'asdf', null), * * A rule can also cause a state transition: * * array('zebra', 'animal', 'saw_zebra'), * * This would match the text "zebra", emit a token of type "animal", and change * the parser state to "saw_zebra", causing the lexer to start using the rules * from that state. * * To pop the lexer's state, you can use the special state '!pop'. * * Finally, you can provide additional options in the fourth parameter. * Supported options are `case-insensitive` and `context`. * * Possible values for `context` are `push` (push the token value onto the * context stack), `pop` (pop the context stack and use it to provide context * for the token), and `discard` (pop the context stack and throw away the * value). * * For example, to lex text like this: * * Class::CONSTANT * * You can use a rule set like this: * * 'start' => array( * array('\w+(?=::)', 'class', 'saw_class', array('context' => 'push')), * ), * 'saw_class' => array( * array('::', 'operator'), * array('\w+', 'constant, '!pop', array('context' => 'pop')), * ), * * This would parse the above text into this token stream: * * array('class', 'Class', null), * array('operator', '::', null), * array('constant', 'CONSTANT', 'Class'), * * For a concrete implementation, see @{class:PhutilPHPFragmentLexer}. * * @task lexerimpl Lexer Implementation * @task rule Lexer Rules * @task tokens Lexer Tokens - * - * @group lexer */ abstract class PhutilLexer { private $processedRules; private $lastState; /* -( Lexer Rules )-------------------------------------------------------- */ /** * Return a set of rules for this lexer. See description in * @{class:PhutilLexer}. * * @return dict Lexer rules. * @task lexerimpl */ abstract protected function getRawRules(); /* -( Lexer Rules )-------------------------------------------------------- */ /** * Process, normalize, and validate the raw lexer rules. * * @task rule */ protected function getRules() { $class = get_class($this); $raw_rules = $this->getRawRules(); if (!is_array($raw_rules)) { $type = gettype($raw_rules); throw new UnexpectedValueException( "Expected {$class}->getRawRules() to return array, got {$type}."); } if (empty($raw_rules['start'])) { throw new UnexpectedValueException( "Expected {$class} rules to define rules for state 'start'."); } $processed_rules = array(); foreach ($raw_rules as $state => $rules) { if (!is_array($rules)) { $type = gettype($rules); throw new UnexpectedValueException( "Expected list of rules for state '{$state}' in {$class}, got ". "{$type}."); } foreach ($rules as $key => $rule) { $n = count($rule); if ($n < 2 || $n > 4) { throw new UnexpectedValueException( "Expected rule '{$key}' in state '{$state}' in {$class} to have ". "2-4 elements (regex, token, [next state], [options]), got {$n}."); } $rule = array_values($rule); if (count($rule) == 2) { $rule[] = null; } if (count($rule) == 3) { $rule[] = array(); } foreach ($rule[3] as $option => $value) { switch ($option) { case 'context': if ($value !== 'push' && $value !== 'pop' && $value !== 'discard' && $value !== null) { throw new UnexpectedValueException( "Rule '{$key}' in state '{$state}' in {$class} has unknown ". "context rule '{$value}', expected 'push', 'pop' or ". "'discard'."); } break; default: throw new UnexpectedValueException( "Rule '{$key}' in state '{$state}' in {$class} has unknown ". "option '{$option}'."); } } $flags = 'sS'; // NOTE: The "\G" assertion is an offset-aware version of "^". $rule[0] = '(\\G'.$rule[0].')'.$flags; if (@preg_match($rule[0], '') === false) { $error = error_get_last(); throw new UnexpectedValueException( "Rule '{$key}' in state '{$state}' in {$class} defines an ". "invalid regular expression ('{$rule[0]}'): ". idx($error, 'message')); } $next_state = $rule[2]; if ($next_state !== null && $next_state !== '!pop') { if (empty($raw_rules[$next_state])) { throw new UnexpectedValueException( "Rule '{$key}' in state '{$state}' in {$class} transitions to ". "state '{$next_state}', but there are no rules for that state."); } } $processed_rules[$state][] = $rule; } } return $processed_rules; } /* -( Lexer Tokens )------------------------------------------------------- */ /** * Lex an input string into tokens. * * @param string Input string. * @param string Initial lexer state. * @return list List of lexer tokens. * @task tokens */ public function getTokens($input, $initial_state = 'start') { if (empty($this->processedRules)) { $this->processedRules = $this->getRules(); } $rules = $this->processedRules; $this->lastState = null; $position = 0; $length = strlen($input); $tokens = array(); $states = array(); $states[] = 'start'; if ($initial_state != 'start') { $states[] = $initial_state; } $context = array(); while ($position < $length) { $state_rules = idx($rules, end($states), array()); foreach ($state_rules as $rule) { $matches = null; if (!preg_match($rule[0], $input, $matches, 0, $position)) { continue; } list($regexp, $token_type, $next_state, $options) = $rule; $match_length = strlen($matches[0]); if (!$match_length) { if ($next_state === null) { throw new UnexpectedValueException( "Rule '{$regexp}' matched a zero-length token and causes no ". "state transition."); } } else { $position += $match_length; $token = array($token_type, $matches[0]); $copt = idx($options, 'context'); if ($copt == 'push') { $context[] = $matches[0]; $token[] = null; } else if ($copt == 'pop') { if (empty($context)) { throw new UnexpectedValueException( "Rule '{$regexp}' popped empty context!"); } $token[] = array_pop($context); } else if ($copt == 'discard') { if (empty($context)) { throw new UnexpectedValueException( "Rule '{$regexp}' discarded empty context!"); } array_pop($context); $token[] = null; } else { $token[] = null; } $tokens[] = $token; } if ($next_state !== null) { if ($next_state == '!pop') { array_pop($states); if (empty($states)) { throw new UnexpectedValueException( "Rule '{$regexp}' popped off the last state."); } } else { $states[] = $next_state; } } continue 2; } throw new UnexpectedValueException( "No lexer rule matched input at char {$position}."); } $this->lastState = $states; return $tokens; } /** * Merge adjacent tokens of the same type. For example, if a comment is * tokenized as <"//", "comment">, this method will merge the two tokens into * a single combined token. */ public function mergeTokens(array $tokens) { $last = null; $result = array(); foreach ($tokens as $token) { if ($last === null) { $last = $token; continue; } if (($token[0] == $last[0]) && ($token[2] == $last[2])) { $last[1] .= $token[1]; } else { $result[] = $last; $last = $token; } } if ($last !== null) { $result[] = $last; } return $result; } public function getLexerState() { return $this->lastState; } } diff --git a/src/lexer/PhutilPHPFragmentLexer.php b/src/lexer/PhutilPHPFragmentLexer.php index 21f6002..cac0ab2 100644 --- a/src/lexer/PhutilPHPFragmentLexer.php +++ b/src/lexer/PhutilPHPFragmentLexer.php @@ -1,270 +1,268 @@ array( array('<\\?(?i:php)?', 'cp', 'php'), array('[^<]+', null), array('<', null), ), 'php' => array_merge(array( array('\\?>', 'cp', '!pop'), array( '<<<([\'"]?)('.$identifier_pattern.')\\1\\n.*?\\n\\2\\;?\\n', 's'), ), $nonsemantic_rules, array( array('(?i:__halt_compiler)\\b', 'cp', 'halt_compiler'), array('(->|::)', 'o', 'attr'), array('[~!%^&*+=|:.<>/?@-]+', 'o'), array('[\\[\\]{}();,]', 'o'), // After 'new', try to match an unadorned symbol. array('(?i:new|instanceof)\\b', 'k', 'possible_classname'), array('(?i:function)\\b', 'k', 'function_definition'), // After 'extends' or 'implements', match a list of classes/interfaces. array('(?i:extends|implements)\\b', 'k', 'class_list'), array('(?i:catch)\\b', 'k', 'catch'), array('(?i:'.implode('|', $keywords).')\\b', 'k'), array('(?i:'.implode('|', $constants).')\\b', 'kc'), array('\\$+'.$identifier_pattern, 'nv'), // Match "f(" as a function and "C::" as a class. These won't work // if you put a comment between the symbol and the operator, but // that's a bizarre usage. array($identifier_ns_pattern.'(?=\s*[\\(])', 'nf'), array($identifier_ns_pattern.'(?=\s*::)', 'nc', 'context_attr', array( 'context' => 'push', ), ), array($identifier_ns_pattern, 'no'), array('(\\d+\\.\\d*|\\d*\\.\\d+)([eE][+-]?[0-9]+)?', 'mf'), array('\\d+[eE][+-]?[0-9]+', 'mf'), array('0[0-7]+', 'mo'), array('0[xX][a-fA-F0-9]+', 'mh'), array('0[bB][0-1]+', 'm'), array('\d+', 'mi'), array("'", 's1', 'string1'), array('`', 'sb', 'stringb'), array('"', 's2', 'string2'), array('.', null), )), // We've just matched a class name, with a "::" lookahead. The name of // the class is on the top of the context stack. We want to try to match // the attribute or method (e.g., "X::C" or "X::f()"). 'context_attr' => array_merge($nonsemantic_rules, array( array('::', 'o'), array($identifier_pattern.'(?=\s*[\\(])', 'nf', '!pop', array( 'context' => 'pop', ), ), array($identifier_pattern, 'na', '!pop', array( 'context' => 'pop', ), ), array('', null, '!pop', array( 'context' => 'discard', ), ), )), // After '->' or '::', a symbol is an attribute name. Note that we end // up in 'context_attr' instead of here in some cases. 'attr' => array_merge($nonsemantic_rules, array( array($identifier_pattern, 'na', '!pop'), array('', null, '!pop'), )), // After 'new', a symbol is a class name. 'possible_classname' => array_merge($nonsemantic_rules, array( array($identifier_ns_pattern, 'nc', '!pop'), array('', null, '!pop'), )), 'string1' => array( array('[^\'\\\\]+', 's1'), array("'", 's1', '!pop'), array('\\\\.', 'k'), array('\\\\$', 'k'), ), 'stringb' => array( array('[^`\\\\]+', 'sb'), array('`', 'sb', '!pop'), array('\\\\.', 'k'), array('\\\\$', 'k'), ), 'string2' => array( array('[^"\\\\]+', 's2'), array('"', 's2', '!pop'), array('\\\\.', 'k'), array('\\\\$', 'k'), ), // In a function definition (after "function"), we don't link the name // as a "nf" (name.function) since it is its own definition. 'function_definition' => array_merge($nonsemantic_rules, array( array('&', 'o'), array('\\(', 'o', '!pop'), array($identifier_pattern, 'no', '!pop'), array('', null, '!pop'), )), // For "//" and "#" comments, we need to break out if we see "?" followed // by ">". 'line_comment' => array( array('[^?\\n]+', 'c'), array('\\n', null, '!pop'), array('(?=\\?>)', null, '!pop'), array('\\?', 'c'), ), // We've seen __halt_compiler. Grab the '();' afterward and then eat // the rest of the file as raw data. 'halt_compiler' => array_merge($nonsemantic_rules, array( array('[()]', 'o'), array(';', 'o', 'compiler_halted'), array('\\?>', 'o', 'compiler_halted'), // Just halt on anything else. array('', null, 'compiler_halted'), )), // __halt_compiler has taken effect. 'compiler_halted' => array( array('.+', null), ), 'class_list' => array_merge($nonsemantic_rules, array( array(',', 'o'), array('(?i:implements)', 'k'), array($identifier_ns_pattern, 'nc'), array('', null, '!pop'), )), 'catch' => array_merge($nonsemantic_rules, array( array('\\(', 'o'), array($identifier_ns_pattern, 'nc'), array('', null, '!pop'), )), - ); } + } diff --git a/src/lexer/PhutilPythonFragmentLexer.php b/src/lexer/PhutilPythonFragmentLexer.php index ad3d908..d18b063 100644 --- a/src/lexer/PhutilPythonFragmentLexer.php +++ b/src/lexer/PhutilPythonFragmentLexer.php @@ -1,316 +1,314 @@ array_merge(array( array('\\n', null), // TODO: Docstrings should match only at the start of a line array('""".*?"""', 'sd'), array('\'\'\'.*?\'\'\'', 'sd'), ), $nonsemantic_rules, array( array('[]{}:(),;[]', 'p'), array('\\\\\\n', null), array('\\\\', null), array('(?:in|is|and|or|not)\\b', 'ow'), array('(?:!=|==|<<|>>|[-~+/*%=<>&^|.])', 'o'), array('(?:'.implode('|', $keywords).')\\b', 'k'), array('def(?=\\s)', 'k', 'funcname'), array('class(?=\\s)', 'k', 'classname'), array('from(?=\\s)', 'kn', 'fromimport'), array('import(?=\\s)', 'kn', 'import'), array('(? array_merge($nonsemantic_rules, array( array('[a-zA-Z_]\w*', 'nf', '!pop'), array('', null, '!pop'), )), 'classname' => array_merge($nonsemantic_rules, array( array('[a-zA-Z_]\w*', 'nc', '!pop'), array('', null, '!pop'), )), 'fromimport' => array_merge($nonsemantic_rules, array( array('import\b', 'kn', '!pop'), // if None occurs here, it's "raise x from None", since None can // never be a module name array('None\b', 'bp', '!pop'), // sadly, in "raise x from y" y will be highlighted as namespace too array('[a-zA-Z_.][w.]*', 'nn'), array('', null, '!pop'), )), 'import' => array_merge($nonsemantic_rules, array( array('as\b', 'kn'), array(',', 'o'), array('[a-zA-Z_.][w.]*', 'nn'), array('', null, '!pop'), )), 'dqs_raw' => $dqs, 'sqs_raw' => $sqs, 'dqs' => array_merge($stringescape, $dqs), 'sqs' => array_merge($stringescape, $sqs), 'tdqs_raw' => $tdqs, 'tsqs_raw' => $tsqs, 'tdqs' => array_merge($stringescape, $tdqs), 'tsqs' => array_merge($stringescape, $tsqs), - ); } + } diff --git a/src/lexer/PhutilShellLexer.php b/src/lexer/PhutilShellLexer.php index 97df4cf..f7b1570 100644 --- a/src/lexer/PhutilShellLexer.php +++ b/src/lexer/PhutilShellLexer.php @@ -1,87 +1,86 @@ getTokens($string); if (count($this->getLexerState()) > 1) { throw new UnexpectedValueException( 'Unterminated string in argument list!'); } foreach ($tokens as $key => $token) { switch ($token[0]) { case "'": case '"': unset($tokens[$key]); break; case 'esc': $tokens[$key][0] = 'arg'; $tokens[$key][1] = substr($token[1], 1); break; default: break; } } $tokens = $this->mergeTokens(array_values($tokens)); $argv = array(); foreach ($tokens as $token) { if ($token[0] == 'arg') { $argv[] = $token[1]; } } return $argv; } protected function getRawRules() { return array( 'start' => array( array('\s+', ' '), array("'", "'", 'string1'), array('"', '"', 'string2'), array('\\\\.', 'esc'), array('[^\\s\'"\\\\]+', 'arg'), ), 'string1' => array( // NOTE: In a single-quoted string, backslash is not an escape. array('[^\']+', 'arg'), array("'", "'", '!pop'), ), 'string2' => array( // NOTE: In a double-quoted string, backslash IS an escape, but only // for some characters: ", $, `, \ and newline. array('[^"\\\\]+', 'arg'), array('"', '"', '!pop'), array('\\\\["$`\\\\\\n]', 'esc'), array('\\\\.', 'arg'), ), ); } + } diff --git a/src/lexer/PhutilSimpleOptionsLexer.php b/src/lexer/PhutilSimpleOptionsLexer.php index 7ed7f27..c83c19d 100644 --- a/src/lexer/PhutilSimpleOptionsLexer.php +++ b/src/lexer/PhutilSimpleOptionsLexer.php @@ -1,91 +1,90 @@ getTokens($input); foreach ($tokens as $key => $token) { list($type, $value) = $token; switch ($type) { case 'esc': $tokens[$key][0] = 'word'; $tokens[$key][1] = substr($value, 1); break; } } $tokens = $this->mergeTokens($tokens); // Find spaces in between two words and turn them into words. This allows // us to parse unescaped spaces in values correctly. for ($ii = 0; $ii < count($tokens); $ii++) { list($type, $value) = $tokens[$ii]; if ($type != ' ') { continue; } $last = idx($tokens, $ii - 1); if (!$last) { continue; } $next = idx($tokens, $ii + 1); if (!$next) { continue; } if ($last[0] == 'word' && $next[0] == 'word') { $tokens[$ii][0] = 'word'; } } // NOTE: Strip these only after merging tokens, so "a b" merges into two // words, "a" and "b", not a single "ab" word. foreach ($tokens as $key => $token) { list($type, $value) = $token; switch ($type) { case "'": case '"': case ' ': unset($tokens[$key]); break; } } return array_values($tokens); } protected function getRawRules() { return array( 'start' => array( array('\s+', ' '), array("'", "'", 'string1'), array('"', '"', 'string2'), array(',', ','), array('=', '='), array('[^\\s\'"=,]+', 'word'), ), 'string1' => array( array('[^\'\\\\]+', 'word'), array("'", "'", '!pop'), array('\\\\.', 'esc'), array('\\\\$', '!pop'), ), 'string2' => array( array('[^"\\\\]+', 'word'), array('"', '"', '!pop'), array('\\\\.', 'esc'), array('\\\\$', '!pop'), ), ); } + } diff --git a/src/lexer/PhutilTypeLexer.php b/src/lexer/PhutilTypeLexer.php index b466ed1..fe9a408 100644 --- a/src/lexer/PhutilTypeLexer.php +++ b/src/lexer/PhutilTypeLexer.php @@ -1,33 +1,32 @@ array( array('\s+', ' '), array('\\|', '|'), array('<', '<'), array('>', '>'), array(',', ','), array('\\?', '?'), array('optional', 'opt'), array('map', 'map'), array('list', 'list'), array('int|float|bool|string|null|callable|wild|regex', 'k'), array('[a-zA-Z_\x7f-\xff][a-zA-Z0-9_\x7f-\xff]*', 'k'), array('\\(', '(', 'comment') ), 'comment' => array( array('\\)', ')', '!pop'), array('[^\\)]+', 'cm'), ), ); } + } diff --git a/src/markup/PhutilMarkupEngine.php b/src/markup/PhutilMarkupEngine.php index 865e74e..7b08b43 100644 --- a/src/markup/PhutilMarkupEngine.php +++ b/src/markup/PhutilMarkupEngine.php @@ -1,35 +1,35 @@ assertEqual( (string)phutil_tag('br'), (string)phutil_tag('br', array())); $this->assertEqual( (string)phutil_tag('br', array()), (string)phutil_tag('br', array(), null)); } public function testTagEmpty() { $this->assertEqual( '
', (string)phutil_tag('br', array(), null)); $this->assertEqual( '
', (string)phutil_tag('div', array(), null)); $this->assertEqual( '
', (string)phutil_tag('div', array(), '')); } public function testTagBasics() { $this->assertEqual( '
', (string)phutil_tag('br')); $this->assertEqual( '
y
', (string)phutil_tag('div', array(), 'y')); } public function testTagAttributes() { $this->assertEqual( '
y
', (string)phutil_tag('div', array('u' => 'v'), 'y')); $this->assertEqual( '
', (string)phutil_tag('br', array('u' => 'v'))); } public function testTagEscapes() { $this->assertEqual( '
', (string)phutil_tag('br', array('u' => '<'))); $this->assertEqual( '

', (string)phutil_tag('div', array(), phutil_tag('br'))); } public function testTagNullAttribute() { $this->assertEqual( '
', (string)phutil_tag('br', array('y' => null))); } public function testDefaultRelNoreferrer() { $map = array( // These should not have rel="nofollow" inserted implicitly. '/' => false, '/path/to/local.html' => false, '#example' => false, '' => false, // These should get the implicit insertion. 'http://www.example.org/' => true, '///evil.com/' => true, ' http://www.example.org/' => true, 'ftp://filez.com' => true, 'mailto:santa@northpole.com' => true, ); foreach ($map as $input => $expect) { $tag = phutil_tag( 'a', array( 'href' => $input, ), 'link'); $tag = (string)$tag; $this->assertEqual($expect, (bool)preg_match('/noreferrer/', $tag)); } // With an explicit `rel` present, we should not override it. $tag = phutil_tag( 'a', array( 'href' => 'http://www.example.org/', 'rel' => 'nofollow', ), 'link'); $this->assertFalse((bool)preg_match('/noreferrer/', (string)$tag)); // For tags other than `a`, we should not insert `rel`. $tag = phutil_tag( 'link', array( 'href' => 'http://www.example.org/', ), 'link'); $this->assertFalse((bool)preg_match('/noreferrer/', (string)$tag)); } public function testTagJavascriptProtocolRejection() { $hrefs = array( 'javascript:alert(1)' => true, 'JAVASCRIPT:alert(2)' => true, // NOTE: When interpreted as a URI, this is dropped because of leading // whitespace. ' javascript:alert(3)' => array(true, false), '/' => false, '/path/to/stuff/' => false, '' => false, 'http://example.com/' => false, '#' => false, 'javascript://anything' => true, // Chrome 33 and IE11, at a minimum, treat this as Javascript. "javascript\n:alert(4)" => true, // Opera currently accepts a variety of unicode spaces. This test case // has a smattering of them. "\xE2\x80\x89javascript:" => true, "javascript\xE2\x80\x89:" => true, "\xE2\x80\x84javascript:" => true, "javascript\xE2\x80\x84:" => true, // Because we're aggressive, all of unicode should trigger detection // by default. "\xE2\x98\x83javascript:" => true, "javascript\xE2\x98\x83:" => true, "\xE2\x98\x83javascript\xE2\x98\x83:" => true, // We're aggressive about this, so we'll intentionally raise false // positives in these cases. 'javascript~:alert(5)' => true, '!!!javascript!!!!:alert(6)' => true, // However, we should raise true negatives in these slightly more // reasonable cases. 'javascript/:docs.html' => false, 'javascripts:x.png' => false, 'COOLjavascript:page' => false, '/javascript:alert(1)' => false, ); foreach (array(true, false) as $use_uri) { foreach ($hrefs as $href => $expect) { if (is_array($expect)) { $expect = ($use_uri ? $expect[1] : $expect[0]); } if ($use_uri) { $href = new PhutilURI($href); } $caught = null; try { phutil_tag('a', array('href' => $href), 'click for candy'); } catch (Exception $ex) { $caught = $ex; } $this->assertEqual( $expect, $caught instanceof Exception, "Rejected href: {$href}"); } } } public function testURIEscape() { $this->assertEqual( '%2B/%20%3F%23%26%3A%21xyz%25', phutil_escape_uri('+/ ?#&:!xyz%')); } public function testURIPathComponentEscape() { $this->assertEqual( 'a%252Fb', phutil_escape_uri_path_component('a/b')); $str = ''; for ($ii = 0; $ii <= 255; $ii++) { $str .= chr($ii); } $this->assertEqual( $str, phutil_unescape_uri_path_component( rawurldecode( // Simulates webserver. phutil_escape_uri_path_component($str)))); } public function testHsprintf() { $this->assertEqual( '
<3
', (string)hsprintf('
%s
', '<3')); } public function testAppendHTML() { $html = phutil_tag('hr'); $html->appendHTML(phutil_tag('br'), ''); $this->assertEqual('

<evil>', $html->getHTMLContent()); } public function testArrayEscaping() { $this->assertEqual( '
<div>
', phutil_escape_html( array( hsprintf('
'), array( array( '<', array( 'd', array( array( hsprintf('i'), ), 'v', ), ), array( array( '>', ), ), ), ), hsprintf('
'), ))); - $this->assertEqual( - '


', - phutil_tag( - 'div', - array( - ), + $this->assertEqual( + '


', + phutil_tag( + 'div', + array(), + array( array( array( + phutil_tag('br'), array( - phutil_tag('br'), - array( - phutil_tag('hr'), - ), - phutil_tag('wbr'), + phutil_tag('hr'), ), + phutil_tag('wbr'), ), - ))->getHTMLContent()); - } + ), + ))->getHTMLContent()); + } } diff --git a/src/markup/__tests__/PhutilSafeHTMLTestCase.php b/src/markup/__tests__/PhutilSafeHTMLTestCase.php index 6575859..3a285c2 100644 --- a/src/markup/__tests__/PhutilSafeHTMLTestCase.php +++ b/src/markup/__tests__/PhutilSafeHTMLTestCase.php @@ -1,22 +1,19 @@ assertSkipped('Operator extension not available.'); } $a = phutil_tag('a'); $ab = $a.phutil_tag('b'); $this->assertEqual('', $ab->getHTMLContent()); $this->assertEqual('', $a->getHTMLContent()); $a .= phutil_tag('a'); $this->assertEqual('', $a->getHTMLContent()); } } diff --git a/src/markup/engine/__tests__/PhutilRemarkupEngineTestCase.php b/src/markup/engine/__tests__/PhutilRemarkupEngineTestCase.php index 5a22d6b..0149c79 100644 --- a/src/markup/engine/__tests__/PhutilRemarkupEngineTestCase.php +++ b/src/markup/engine/__tests__/PhutilRemarkupEngineTestCase.php @@ -1,116 +1,114 @@ markupText($root.$file); } } private function markupText($markup_file) { $contents = Filesystem::readFile($markup_file); $file = basename($markup_file); $parts = explode("\n~~~~~~~~~~\n", $contents); $this->assertEqual(3, count($parts), $markup_file); list($input_remarkup, $expected_output, $expected_text) = $parts; $engine = $this->buildNewTestEngine(); switch ($file) { case 'raw-escape.txt': // NOTE: Here, we want to test PhutilRemarkupRuleEscapeRemarkup and // PhutilRemarkupBlockStorage, which are triggered by "\1". In the // test, "~" is used as a placeholder for "\1" since it's hard to type // "\1". $input_remarkup = str_replace('~', "\1", $input_remarkup); $expected_output = str_replace('~', "\1", $expected_output); $expected_text = str_replace('~', "\1", $expected_text); break; case 'toc.txt': $engine->setConfig('header.generate-toc', true); break; } $actual_output = (string)$engine->markupText($input_remarkup); switch ($file) { case 'toc.txt': $table_of_contents = PhutilRemarkupEngineRemarkupHeaderBlockRule::renderTableOfContents( $engine); $actual_output = $table_of_contents."\n\n".$actual_output; break; } $this->assertEqual( $expected_output, $actual_output, "Failed to markup HTML in file '{$file}'."); $engine->setMode(PhutilRemarkupEngine::MODE_TEXT); $actual_output = (string)$engine->markupText($input_remarkup); $this->assertEqual( $expected_text, $actual_output, "Failed to markup text in file '{$file}'."); } private function buildNewTestEngine() { $engine = new PhutilRemarkupEngine(); $engine->setConfig('uri.prefix', 'http://www.example.com/'); $engine->setConfig( 'uri.allowed-protocols', array( 'http' => true, 'mailto' => true, )); $rules = array(); $rules[] = new PhutilRemarkupRuleEscapeRemarkup(); $rules[] = new PhutilRemarkupRuleMonospace(); $rules[] = new PhutilRemarkupRuleDocumentLink(); $rules[] = new PhutilRemarkupRuleHyperlink(); $rules[] = new PhutilRemarkupRuleBold(); $rules[] = new PhutilRemarkupRuleItalic(); $rules[] = new PhutilRemarkupRuleDel(); $rules[] = new PhutilRemarkupRuleUnderline(); $blocks = array(); $blocks[] = new PhutilRemarkupEngineRemarkupQuotesBlockRule(); $blocks[] = new PhutilRemarkupEngineRemarkupReplyBlockRule(); $blocks[] = new PhutilRemarkupEngineRemarkupHeaderBlockRule(); $blocks[] = new PhutilRemarkupEngineRemarkupHorizontalRuleBlockRule(); $blocks[] = new PhutilRemarkupEngineRemarkupCodeBlockRule(); $blocks[] = new PhutilRemarkupEngineRemarkupLiteralBlockRule(); $blocks[] = new PhutilRemarkupEngineRemarkupNoteBlockRule(); $blocks[] = new PhutilRemarkupEngineRemarkupTableBlockRule(); $blocks[] = new PhutilRemarkupEngineRemarkupSimpleTableBlockRule(); $blocks[] = new PhutilRemarkupEngineRemarkupDefaultBlockRule(); $blocks[] = new PhutilRemarkupEngineRemarkupListBlockRule(); $blocks[] = new PhutilRemarkupEngineRemarkupInterpreterRule(); foreach ($blocks as $block) { if (!($block instanceof PhutilRemarkupEngineRemarkupCodeBlockRule)) { $block->setMarkupRules($rules); } } $engine->setBlockRules($blocks); return $engine; } } diff --git a/src/markup/engine/remarkup/PhutilRemarkupBlockStorage.php b/src/markup/engine/remarkup/PhutilRemarkupBlockStorage.php index 65b8983..55d9541 100644 --- a/src/markup/engine/remarkup/PhutilRemarkupBlockStorage.php +++ b/src/markup/engine/remarkup/PhutilRemarkupBlockStorage.php @@ -1,79 +1,77 @@ \11Z
* * Then: * * ... * * If we didn't do this, the italics rule could match the "//" in "http://", * or any other number of processing mistakes could occur, some of which create * security risks. * * This class generates keys, and stores the map of keys to replacement text. - * - * @group markup */ final class PhutilRemarkupBlockStorage { const MAGIC_BYTE = "\1"; private $map = array(); private $index; public function store($text) { $key = self::MAGIC_BYTE.(++$this->index).'Z'; $this->map[$key] = $text; return $key; } public function restore($corpus, $text_mode = false) { if ($this->map) { if ($text_mode) { $corpus = str_replace( array_reverse(array_keys($this->map)), array_reverse($this->map), $corpus); } else { $corpus = phutil_safe_html(str_replace( array_reverse(array_keys($this->map)), array_map('phutil_escape_html', array_reverse($this->map)), phutil_escape_html($corpus))); } } return $corpus; } public function overwrite($key, $new_text) { $this->map[$key] = $new_text; return $this; } public function getMap() { return $this->map; } public function setMap(array $map) { $this->map = $map; return $this; } } diff --git a/src/markup/engine/remarkup/blockrule/PhutilRemarkupBlockInterpreter.php b/src/markup/engine/remarkup/blockrule/PhutilRemarkupBlockInterpreter.php index 2461c80..1127051 100644 --- a/src/markup/engine/remarkup/blockrule/PhutilRemarkupBlockInterpreter.php +++ b/src/markup/engine/remarkup/blockrule/PhutilRemarkupBlockInterpreter.php @@ -1,40 +1,39 @@ engine = $engine; return $this; } final public function getEngine() { return $this->engine; } /** * @return string */ abstract public function getInterpreterName(); abstract public function markupContent($content, array $argv); protected function markupError($string) { if ($this->getEngine()->isTextMode()) { return '('.$string.')'; } else { return phutil_tag( 'div', array( 'class' => 'remarkup-interpreter-error', ), $string); } } } diff --git a/src/markup/engine/remarkup/blockrule/PhutilRemarkupEngineBlockRule.php b/src/markup/engine/remarkup/blockrule/PhutilRemarkupEngineBlockRule.php index 04e424e..7e82371 100644 --- a/src/markup/engine/remarkup/blockrule/PhutilRemarkupEngineBlockRule.php +++ b/src/markup/engine/remarkup/blockrule/PhutilRemarkupEngineBlockRule.php @@ -1,146 +1,145 @@ engine = $engine; $this->updateRules(); return $this; } final protected function getEngine() { return $this->engine; } public function setMarkupRules(array $rules) { assert_instances_of($rules, 'PhutilRemarkupRule'); $this->rules = $rules; $this->updateRules(); return $this; } private function updateRules() { $engine = $this->getEngine(); if ($engine) { $this->rules = msort($this->rules, 'getPriority'); foreach ($this->rules as $rule) { $rule->setEngine($engine); } } return $this; } final public function getMarkupRules() { return $this->rules; } final public function postprocess() { $this->didMarkupText(); } final protected function applyRules($text) { foreach ($this->getMarkupRules() as $rule) { $text = $rule->apply($text); } return $text; } public function supportsChildBlocks() { return false; } public function extractChildText($text) { throw new Exception(pht('Not implemented!')); } protected function renderRemarkupTable(array $out_rows) { assert_instances_of($out_rows, 'array'); if ($this->getEngine()->isTextMode()) { $lengths = array(); foreach ($out_rows as $r => $row) { foreach ($row['content'] as $c => $cell) { $text = $this->getEngine()->restoreText($cell['content']); $lengths[$c][$r] = phutil_utf8_strlen($text); } } $max_lengths = array_map('max', $lengths); $out = array(); foreach ($out_rows as $r => $row) { $headings = false; foreach ($row['content'] as $c => $cell) { $length = $max_lengths[$c] - $lengths[$c][$r]; $out[] = '| '.$cell['content'].str_repeat(' ', $length).' '; if ($cell['type'] == 'th') { $headings = true; } } $out[] = "|\n"; if ($headings) { foreach ($row['content'] as $c => $cell) { $char = ($cell['type'] == 'th' ? '-' : ' '); $out[] = '| '.str_repeat($char, $max_lengths[$c]).' '; } $out[] = "|\n"; } } return rtrim(implode('', $out), "\n"); } $out = array(); $out[] = "\n"; foreach ($out_rows as $row) { $cells = array(); foreach ($row['content'] as $cell) { $cells[] = phutil_tag($cell['type'], array(), $cell['content']); } $out[] = phutil_tag($row['type'], array(), $cells); $out[] = "\n"; } return phutil_tag('table', array('class' => 'remarkup-table'), $out); } } diff --git a/src/markup/engine/remarkup/blockrule/PhutilRemarkupEngineRemarkupCodeBlockRule.php b/src/markup/engine/remarkup/blockrule/PhutilRemarkupEngineRemarkupCodeBlockRule.php index ab17231..b310fed 100644 --- a/src/markup/engine/remarkup/blockrule/PhutilRemarkupEngineRemarkupCodeBlockRule.php +++ b/src/markup/engine/remarkup/blockrule/PhutilRemarkupEngineRemarkupCodeBlockRule.php @@ -1,191 +1,188 @@ false, 'lang' => null, 'name' => null, 'lines' => null, ); $parser = new PhutilSimpleOptions(); $custom = $parser->parse(head($lines)); if ($custom) { $valid = true; foreach ($custom as $key => $value) { if (!array_key_exists($key, $options)) { $valid = false; break; } } if ($valid) { array_shift($lines); $options = $custom + $options; } } // Normalize the text back to a 0-level indent. $min_indent = 80; foreach ($lines as $line) { for ($ii = 0; $ii < strlen($line); $ii++) { if ($line[$ii] != ' ') { $min_indent = min($ii, $min_indent); break; } } } $text = implode("\n", $lines); if ($min_indent) { $indent_string = str_repeat(' ', $min_indent); $text = preg_replace('/^'.$indent_string.'/m', '', $text); } if ($this->getEngine()->isTextMode()) { $out = array(); $header = array(); if ($options['counterexample']) { $header[] = 'counterexample'; } if ($options['name'] != '') { $header[] = 'name='.$options['name']; } if ($header) { $out[] = implode(', ', $header); } $text = preg_replace('/^/m', ' ', $text); $out[] = $text; return implode("\n", $out); } if (empty($options['lang'])) { // If the user hasn't specified "lang=..." explicitly, try to guess the // language. If we fail, fall back to configured defaults. $lang = PhutilLanguageGuesser::guessLanguage($text); if (!$lang) { $lang = nonempty( $this->getEngine()->getConfig('phutil.codeblock.language-default'), 'php'); } $options['lang'] = $lang; } $code_body = $this->highlightSource($text, $options); $name_header = null; if ($options['name']) { $name_header = phutil_tag( 'div', array( 'class' => 'remarkup-code-header', ), $options['name']); } return phutil_tag( 'div', array( 'class' => 'remarkup-code-block', 'data-code-lang' => $options['lang'], 'data-sigil' => 'remarkup-code-block', ), array($name_header, $code_body)); } private function highlightSource($text, array $options) { if ($options['counterexample']) { $aux_class = ' remarkup-counterexample'; } else { $aux_class = null; } $aux_style = null; if ($options['lines']) { // Put a minimum size on this because the scrollbar is otherwise // unusable. $height = max(6, (int)$options['lines']); $aux_style = 'max-height: '.(2 * $height).'em;'; } $engine = $this->getEngine()->getConfig('syntax-highlighter.engine'); if (!$engine) { $engine = 'PhutilDefaultSyntaxHighlighterEngine'; } $engine = newv($engine, array()); $engine->setConfig( 'pygments.enabled', $this->getEngine()->getConfig('pygments.enabled')); return phutil_tag( 'pre', array( 'class' => 'remarkup-code'.$aux_class, 'style' => $aux_style, ), PhutilSafeHTML::applyFunction( 'rtrim', $engine->highlightSource($options['lang'], $text))); } } diff --git a/src/markup/engine/remarkup/blockrule/PhutilRemarkupEngineRemarkupDefaultBlockRule.php b/src/markup/engine/remarkup/blockrule/PhutilRemarkupEngineRemarkupDefaultBlockRule.php index b1f4374..7b4fe34 100644 --- a/src/markup/engine/remarkup/blockrule/PhutilRemarkupEngineRemarkupDefaultBlockRule.php +++ b/src/markup/engine/remarkup/blockrule/PhutilRemarkupEngineRemarkupDefaultBlockRule.php @@ -1,39 +1,36 @@ applyRules($text); if ($this->getEngine()->isTextMode()) { if (!$this->getEngine()->getConfig('preserve-linebreaks')) { $text = preg_replace('/ *\n */', ' ', $text); } return $text; } if ($this->getEngine()->getConfig('preserve-linebreaks')) { $text = phutil_escape_html_newlines($text); } if (!strlen($text)) { return null; } return phutil_tag('p', array(), $text); } } diff --git a/src/markup/engine/remarkup/blockrule/PhutilRemarkupEngineRemarkupHeaderBlockRule.php b/src/markup/engine/remarkup/blockrule/PhutilRemarkupEngineRemarkupHeaderBlockRule.php index fdfc34f..8a0a0b9 100644 --- a/src/markup/engine/remarkup/blockrule/PhutilRemarkupEngineRemarkupHeaderBlockRule.php +++ b/src/markup/engine/remarkup/blockrule/PhutilRemarkupEngineRemarkupHeaderBlockRule.php @@ -1,166 +1,163 @@ 1) { $level = ($lines[1][0] == '=') ? 1 : 2; $text = trim($lines[0]); } else { $level = 0; for ($ii = 0; $ii < min(5, strlen($text)); $ii++) { if ($text[$ii] == '=') { ++$level; } else { break; } } $text = trim($text, ' ='); } $engine = $this->getEngine(); if ($engine->isTextMode()) { $char = ($level == 1) ? '=' : '-'; return $text."\n".str_repeat($char, phutil_utf8_strlen($text)); } $use_anchors = $engine->getConfig('header.generate-toc'); $anchor = null; if ($use_anchors) { $anchor = $this->generateAnchor($level, $text); } $text = phutil_tag( 'h'.($level + 1), array( 'class' => 'remarkup-header', ), array($anchor, $this->applyRules($text))); return $text; } private function generateAnchor($level, $text) { $anchor = strtolower($text); $anchor = preg_replace('/[^a-z0-9]/', '-', $anchor); $anchor = preg_replace('/--+/', '-', $anchor); $anchor = trim($anchor, '-'); $anchor = substr($anchor, 0, 24); $anchor = trim($anchor, '-'); $base = $anchor; $key = self::KEY_HEADER_TOC; $engine = $this->getEngine(); $anchors = $engine->getTextMetadata($key, array()); $suffix = 1; while (!strlen($anchor) || isset($anchors[$anchor])) { $anchor = $base.'-'.$suffix; $anchor = trim($anchor, '-'); $suffix++; } // When a document contains a link inside a header, like this: // // = [[ http://wwww.example.com/ | example ]] = // // ...we want to generate a TOC entry with just "example", but link the // header itself. We push the 'toc' state so all the link rules generate // just names. $engine->pushState('toc'); $text = $this->applyRules($text); $text = $engine->restoreText($text); $anchors[$anchor] = array($level, $text); $engine->popState('toc'); $engine->setTextMetadata($key, $anchors); return phutil_tag( 'a', array( 'name' => $anchor, ), ''); } public static function renderTableOfContents(PhutilRemarkupEngine $engine) { $key = self::KEY_HEADER_TOC; $anchors = $engine->getTextMetadata($key, array()); if (count($anchors) < 2) { // Don't generate a TOC if there are no headers, or if there's only // one header (since such a TOC would be silly). return null; } $depth = 0; $toc = array(); foreach ($anchors as $anchor => $info) { list($level, $name) = $info; while ($depth < $level) { $toc[] = hsprintf('
    '); $depth++; } while ($depth > $level) { $toc[] = hsprintf('
'); $depth--; } $toc[] = phutil_tag( 'li', array(), phutil_tag( 'a', array( 'href' => '#'.$anchor, ), $name)); } while ($depth > 0) { $toc[] = hsprintf(''); $depth--; } return phutil_implode_html("\n", $toc); } } diff --git a/src/markup/engine/remarkup/blockrule/PhutilRemarkupEngineRemarkupHorizontalRuleBlockRule.php b/src/markup/engine/remarkup/blockrule/PhutilRemarkupEngineRemarkupHorizontalRuleBlockRule.php index 7343339..155d9de 100644 --- a/src/markup/engine/remarkup/blockrule/PhutilRemarkupEngineRemarkupHorizontalRuleBlockRule.php +++ b/src/markup/engine/remarkup/blockrule/PhutilRemarkupEngineRemarkupHorizontalRuleBlockRule.php @@ -1,37 +1,33 @@ 'remarkup-hr')); } - } diff --git a/src/markup/engine/remarkup/blockrule/PhutilRemarkupEngineRemarkupInlineBlockRule.php b/src/markup/engine/remarkup/blockrule/PhutilRemarkupEngineRemarkupInlineBlockRule.php index 4f30112..c74b58e 100644 --- a/src/markup/engine/remarkup/blockrule/PhutilRemarkupEngineRemarkupInlineBlockRule.php +++ b/src/markup/engine/remarkup/blockrule/PhutilRemarkupEngineRemarkupInlineBlockRule.php @@ -1,17 +1,14 @@ applyRules($text); } } diff --git a/src/markup/engine/remarkup/blockrule/PhutilRemarkupEngineRemarkupInterpreterRule.php b/src/markup/engine/remarkup/blockrule/PhutilRemarkupEngineRemarkupInterpreterRule.php index 78d5111..8da9184 100644 --- a/src/markup/engine/remarkup/blockrule/PhutilRemarkupEngineRemarkupInterpreterRule.php +++ b/src/markup/engine/remarkup/blockrule/PhutilRemarkupEngineRemarkupInterpreterRule.php @@ -1,94 +1,90 @@ parse($matches[2]); } $interpreters = id(new PhutilSymbolLoader()) ->setAncestorClass('PhutilRemarkupBlockInterpreter') ->loadObjects(); foreach ($interpreters as $interpreter) { $interpreter->setEngine($this->getEngine()); } $lines[$first_key] = preg_replace( self::START_BLOCK_PATTERN, '', $lines[$first_key]); $lines[$last_key] = preg_replace( self::END_BLOCK_PATTERN, '', $lines[$last_key]); if (trim($lines[$first_key]) === '') { unset($lines[$first_key]); } if (trim($lines[$last_key]) === '') { unset($lines[$last_key]); } $content = implode("\n", $lines); $interpreters = mpull($interpreters, null, 'getInterpreterName'); if (isset($interpreters[$matches[1]])) { return $interpreters[$matches[1]]->markupContent($content, $argv); } $message = pht('No interpreter found: %s', $matches[1]); if ($this->getEngine()->isTextMode()) { return '('.$message.')'; } return phutil_tag( 'div', array( 'class' => 'remarkup-interpreter-error', ), $message); } } diff --git a/src/markup/engine/remarkup/blockrule/PhutilRemarkupEngineRemarkupListBlockRule.php b/src/markup/engine/remarkup/blockrule/PhutilRemarkupEngineRemarkupListBlockRule.php index ad2cc90..2e60c3f 100644 --- a/src/markup/engine/remarkup/blockrule/PhutilRemarkupEngineRemarkupListBlockRule.php +++ b/src/markup/engine/remarkup/blockrule/PhutilRemarkupEngineRemarkupListBlockRule.php @@ -1,485 +1,479 @@ $line) { $matches = null; if (preg_match($regex, $line)) { $regex = self::CONT_BLOCK_PATTERN; if (preg_match('/^(\s+)/', $line, $matches)) { $space = strlen($matches[1]); } else { $space = 0; } $min_space = min($min_space, $space); } } $regex = self::START_BLOCK_PATTERN; if ($min_space) { foreach ($lines as $key => $line) { if (preg_match($regex, $line)) { $regex = self::CONT_BLOCK_PATTERN; $lines[$key] = substr($line, $min_space); } } } // The input text may have linewraps in it, like this: // // - derp derp derp derp // derp derp derp derp // - blarp blarp blarp blarp // // Group text lines together into list items, stored in $items. So the // result in the above case will be: // // array( // array( // "- derp derp derp derp", // " derp derp derp derp", // ), // array( // "- blarp blarp blarp blarp", // ), // ); $item = array(); $regex = self::START_BLOCK_PATTERN; foreach ($lines as $line) { if (preg_match($regex, $line)) { $regex = self::CONT_BLOCK_PATTERN; if ($item) { $items[] = $item; $item = array(); } } $item[] = $line; } if ($item) { $items[] = $item; } // Process each item to normalize the text, remove line wrapping, and // determine its depth (indentation level) and style (ordered vs unordered). // // Given the above example, the processed array will look like: // // array( // array( // 'text' => 'derp derp derp derp derp derp derp derp', // 'depth' => 0, // 'style' => '-', // ), // array( // 'text' => 'blarp blarp blarp blarp', // 'depth' => 0, // 'style' => '-', // ), // ); $has_marks = false; foreach ($items as $key => $item) { $item = preg_replace('/\s*\n\s*/', ' ', implode("\n", $item)); $item = rtrim($item); if (!strlen($item)) { unset($items[$key]); continue; } $matches = null; if (preg_match('/^\s*([-*#]{2,})/', $item, $matches)) { // Alternate-style indents; use number of list item symbols. $depth = strlen($matches[1]) - 1; } else if (preg_match('/^(\s+)/', $item, $matches)) { // Markdown-style indents; use indent depth. $depth = strlen($matches[1]); } else { $depth = 0; } if (preg_match('/^\s*(?:#|[0-9])/', $item)) { $style = '#'; } else { $style = '-'; } // Strip leading indicators off the item. $text = preg_replace(self::STRIP_BLOCK_PATTERN, '', $item); // Look for "[]", "[ ]", "[*]", "[x]", etc., which we render as a // checkbox. $mark = null; $matches = null; if (preg_match('/^\s*\[(.?)\]\s*/', $text, $matches)) { if (strlen(trim($matches[1]))) { $mark = true; } else { $mark = false; } $has_marks = true; $text = substr($text, strlen($matches[0])); } $items[$key] = array( 'text' => $text, 'depth' => $depth, 'style' => $style, 'mark' => $mark, ); } $items = array_values($items); // Users can create a sub-list by indenting any deeper amount than the // previous list, so these are both valid: // // - a // - b // // - a // - b // // In the former case, we'll have depths (0, 2). In the latter case, depths // (0, 4). We don't actually care about how many spaces there are, only // how many list indentation levels (that is, we want to map both of // those cases to (0, 1), indicating "outermost list" and "first sublist"). // // This is made more complicated because lists at two different indentation // levels might be at the same list level: // // - a // - b // - c // - d // // Here, 'b' and 'd' are at the same list level (2) but different indent // levels (2, 4). // // Users can also create "staircases" like this: // // - a // - b // # c // // While this is silly, we'd like to render it as faithfully as possible. // // In order to do this, we convert the list of nodes into a tree, // normalizing indentation levels and inserting dummy nodes as necessary to // make the tree well-formed. See additional notes at buildTree(). // // In the case above, the result is a tree like this: // // - // - // - a // - b // # c $l = 0; $r = count($items); $tree = $this->buildTree($items, $l, $r, $cur_level = 0); // We may need to open a list on a node, but they do not have // list style information yet. We need to propagate list style inforamtion // backward through the tree. In the above example, the tree now looks // like this: // // - // - // - a // - b // # c $this->adjustTreeStyleInformation($tree); // Finally, we have enough information to render the tree. $out = $this->renderTree($tree, 0, $has_marks); if ($this->getEngine()->isTextMode()) { $out = implode('', $out); $out = rtrim($out, "\n"); $out = preg_replace('/ +$/m', '', $out); return $out; } return phutil_implode_html('', $out); } /** - * See additional notes in markupText(). + * See additional notes in @{method:markupText}. */ private function buildTree(array $items, $l, $r, $cur_level) { if ($l == $r) { return array(); } if ($cur_level > self::MAXIMUM_LIST_NESTING_DEPTH) { // This algorithm is recursive and we don't need you blowing the stack // with your oh-so-clever 50,000-item-deep list. Cap indentation levels // at a reasonable number and just shove everything deeper up to this // level. $nodes = array(); for ($ii = $l; $ii < $r; $ii++) { $nodes[] = array( 'level' => $cur_level, 'items' => array(), ) + $items[$ii]; } return $nodes; } $min = $l; for ($ii = $r - 1; $ii >= $l; $ii--) { if ($items[$ii]['depth'] < $items[$min]['depth']) { $min = $ii; } } $min_depth = $items[$min]['depth']; $nodes = array(); if ($min != $l) { $nodes[] = array( 'text' => null, 'level' => $cur_level, 'style' => null, 'mark' => null, 'items' => $this->buildTree($items, $l, $min, $cur_level + 1), ); } $last = $min; for ($ii = $last + 1; $ii < $r; $ii++) { if ($items[$ii]['depth'] == $min_depth) { $nodes[] = array( 'level' => $cur_level, 'items' => $this->buildTree($items, $last + 1, $ii, $cur_level + 1), ) + $items[$last]; $last = $ii; } } $nodes[] = array( 'level' => $cur_level, 'items' => $this->buildTree($items, $last + 1, $r, $cur_level + 1), ) + $items[$last]; return $nodes; } /** - * See additional notes in markupText(). + * See additional notes in @{method:markupText}. */ private function adjustTreeStyleInformation(array &$tree) { - // The effect here is just to walk backward through the nodes at this level // and apply the first style in the list to any empty nodes we inserted // before it. As we go, also recurse down the tree. $style = '-'; for ($ii = count($tree) - 1; $ii >= 0; $ii--) { if ($tree[$ii]['style'] !== null) { // This is the earliest node we've seen with style, so set the // style to its style. $style = $tree[$ii]['style']; } else { // This node has no style, so apply the current style. $tree[$ii]['style'] = $style; } if ($tree[$ii]['items']) { $this->adjustTreeStyleInformation($tree[$ii]['items']); } } } /** - * See additional notes in markupText(). + * See additional notes in @{method:markupText}. */ private function renderTree(array $tree, $level, $has_marks) { $style = idx(head($tree), 'style'); $out = array(); if (!$this->getEngine()->isTextMode()) { switch ($style) { case '#': $tag = 'ol'; break; case '-': $tag = 'ul'; break; } if ($has_marks) { $out[] = hsprintf( '<%s class="remarkup-list remarkup-list-with-checkmarks">', $tag); } else { $out[] = hsprintf( '<%s class="remarkup-list">', $tag); } $out[] = "\n"; } $number = 1; foreach ($tree as $item) { if ($this->getEngine()->isTextMode()) { $out[] = str_repeat(' ', 2 * $level); if ($item['mark'] !== null) { if ($item['mark']) { $out[] = '[X] '; } else { $out[] = '[ ] '; } } else { switch ($style) { case '#': $out[] = $number.'. '; $number++; break; case '-': $out[] = '- '; break; } } $out[] = $this->applyRules($item['text'])."\n"; } else if ($item['text'] === null) { $out[] = hsprintf('
  • '); } else { if ($item['mark'] !== null) { if ($item['mark'] == true) { $out[] = hsprintf( '
  • '); } else { $out[] = hsprintf( '
  • '); } $out[] = phutil_tag( 'input', array( 'type' => 'checkbox', 'checked' => ($item['mark'] ? 'checked' : null), 'disabled' => 'disabled', )); $out[] = ' '; } else { $out[] = hsprintf('
  • '); } $out[] = $this->applyRules($item['text']); } if ($item['items']) { $subitems = $this->renderTree($item['items'], $level + 1, $has_marks); foreach ($subitems as $i) { $out[] = $i; } } if (!$this->getEngine()->isTextMode()) { $out[] = hsprintf("
  • \n"); } } if (!$this->getEngine()->isTextMode()) { switch ($style) { case '#': $out[] = hsprintf(''); break; case '-': $out[] = hsprintf(''); break; } } return $out; } - } diff --git a/src/markup/engine/remarkup/blockrule/PhutilRemarkupEngineRemarkupLiteralBlockRule.php b/src/markup/engine/remarkup/blockrule/PhutilRemarkupEngineRemarkupLiteralBlockRule.php index e8d8827..08fba58 100644 --- a/src/markup/engine/remarkup/blockrule/PhutilRemarkupEngineRemarkupLiteralBlockRule.php +++ b/src/markup/engine/remarkup/blockrule/PhutilRemarkupEngineRemarkupLiteralBlockRule.php @@ -1,36 +1,34 @@ getEngine()->isTextMode()) { return $text; } $text = phutil_split_lines($text, $retain_endings = true); return phutil_implode_html(phutil_tag('br', array()), $text); } + } diff --git a/src/markup/engine/remarkup/blockrule/PhutilRemarkupEngineRemarkupNoteBlockRule.php b/src/markup/engine/remarkup/blockrule/PhutilRemarkupEngineRemarkupNoteBlockRule.php index 7248b1e..6089253 100644 --- a/src/markup/engine/remarkup/blockrule/PhutilRemarkupEngineRemarkupNoteBlockRule.php +++ b/src/markup/engine/remarkup/blockrule/PhutilRemarkupEngineRemarkupNoteBlockRule.php @@ -1,96 +1,93 @@ getRegEx(), $lines[$cursor])) { $num_lines++; $cursor++; while (isset($lines[$cursor])) { if (trim($lines[$cursor])) { $num_lines++; $cursor++; continue; } break; } } return $num_lines; } public function markupText($text, $children) { $matches = array(); preg_match($this->getRegEx(), $text, $matches); if (idx($matches, 'showword')) { $word = $matches['showword']; $show = true; } else { $word = $matches['hideword']; $show = false; } $class_suffix = phutil_utf8_strtolower($word); // This is the "(IMPORTANT)" or "NOTE:" part. $word_part = rtrim(substr($text, 0, strlen($matches[0]))); // This is the actual text. $text_part = substr($text, strlen($matches[0])); $text_part = $this->applyRules(rtrim($text_part)); $text_mode = $this->getEngine()->isTextMode(); if ($text_mode) { return $word_part.' '.$text_part; } if ($show) { $content = array( phutil_tag( 'span', array( 'class' => 'remarkup-note-word', ), $word_part), ' ', $text_part); } else { $content = $text_part; } return phutil_tag( 'div', array( 'class' => 'remarkup-'.$class_suffix ), $content); } private function getRegEx() { $words = array( 'NOTE', 'IMPORTANT', 'WARNING', ); foreach ($words as $k => $word) { $words[$k] = preg_quote($word, '/'); } $words = implode('|', $words); return '/^(?:'. '(?:\((?P'.$words.')\))'. '|'. '(?:(?P'.$words.'):))\s*'. '/'; } } diff --git a/src/markup/engine/remarkup/blockrule/PhutilRemarkupEngineRemarkupSimpleTableBlockRule.php b/src/markup/engine/remarkup/blockrule/PhutilRemarkupEngineRemarkupSimpleTableBlockRule.php index fb1c2f9..ce6fd90 100644 --- a/src/markup/engine/remarkup/blockrule/PhutilRemarkupEngineRemarkupSimpleTableBlockRule.php +++ b/src/markup/engine/remarkup/blockrule/PhutilRemarkupEngineRemarkupSimpleTableBlockRule.php @@ -1,77 +1,74 @@ 'td', 'content' => $this->applyRules($cell)); } if (!$headings) { $rows[] = array('type' => 'tr', 'content' => $cells); } else if ($rows) { // Mark previous row with headings. foreach ($cells as $i => $cell) { if ($cell['content']) { $rows[last_key($rows)]['content'][$i]['type'] = 'th'; } } } } if (!$rows) { return $this->applyRules($text); } return $this->renderRemarkupTable($rows); } } diff --git a/src/markup/engine/remarkup/blockrule/PhutilRemarkupEngineRemarkupTableBlockRule.php b/src/markup/engine/remarkup/blockrule/PhutilRemarkupEngineRemarkupTableBlockRule.php index 2f759d1..65273b9 100644 --- a/src/markup/engine/remarkup/blockrule/PhutilRemarkupEngineRemarkupTableBlockRule.php +++ b/src/markup/engine/remarkup/blockrule/PhutilRemarkupEngineRemarkupTableBlockRule.php @@ -1,109 +1,106 @@ /i', $lines[$cursor])) { $num_lines++; $cursor++; while (isset($lines[$cursor])) { $num_lines++; if (preg_match('@$@i', $lines[$cursor])) { break; } $cursor++; } } return $num_lines; } public function markupText($text, $children) { $matches = array(); if (!preg_match('@^(.*)
    $@si', $text, $matches)) { return $this->fail( $text, 'Bad table (expected ...
    )'); } $body = $matches[1]; $row_fragment = '(?:\s*(.*)\s*)'; $cell_fragment = '(?:\s*<(td|th)>(.*)\s*)'; // Test that the body contains only valid rows. if (!preg_match('@^'.$row_fragment.'+$@Usi', $body)) { return $this->fail( $body, 'Bad table syntax (expected rows ...)'); } // Capture the rows. $row_regex = '@'.$row_fragment.'@Usi'; if (!preg_match_all($row_regex, $body, $matches, PREG_SET_ORDER)) { throw new Exception( 'Bug in Remarkup tables, parsing fails for input: '.$text); } $out_rows = array(); $rows = $matches; foreach ($rows as $row) { $content = $row[1]; // Test that the row contains only valid cells. if (!preg_match('@^'.$cell_fragment.'+$@Usi', $content)) { return $this->fail( $content, 'Bad table syntax (expected cells ...)'); } // Capture the cells. $cell_regex = '@'.$cell_fragment.'@Usi'; if (!preg_match_all($cell_regex, $content, $matches, PREG_SET_ORDER)) { throw new Exception( 'Bug in Remarkup tables, parsing fails for input: '.$text); } $out_cells = array(); foreach ($matches as $cell) { $cell_type = $cell[1]; $cell_content = $cell[2]; $out_cells[] = array( 'type' => $cell_type, 'content' => $this->applyRules($cell_content), ); } $out_rows[] = array( 'type' => 'tr', 'content' => $out_cells, ); } return $this->renderRemarkupTable($out_rows); } private function fail($near, $message) { $message = sprintf( '%s near: %s', $message, phutil_utf8_shorten($near, 32000)); if ($this->getEngine()->isTextMode()) { return '('.$message.')'; } return hsprintf('
    %s
    ', $message); } } diff --git a/src/markup/engine/remarkup/blockrule/PhutilRemarkupEngineRemarkupTestInterpreterRule.php b/src/markup/engine/remarkup/blockrule/PhutilRemarkupEngineRemarkupTestInterpreterRule.php index 661e3a0..91d7dd1 100644 --- a/src/markup/engine/remarkup/blockrule/PhutilRemarkupEngineRemarkupTestInterpreterRule.php +++ b/src/markup/engine/remarkup/blockrule/PhutilRemarkupEngineRemarkupTestInterpreterRule.php @@ -1,20 +1,17 @@ engine = $engine; return $this; } public function getEngine() { return $this->engine; } public function getPriority() { return 500.0; } abstract public function apply($text); public function getPostprocessKey() { return spl_object_hash($this); } public function didMarkupText() { return; } protected function replaceHTML($pattern, $callback, $text) { $this->replaceCallback = $callback; return phutil_safe_html(preg_replace_callback( $pattern, array($this, 'replaceHTMLCallback'), phutil_escape_html($text))); } private function replaceHTMLCallback($match) { return phutil_escape_html(call_user_func( $this->replaceCallback, array_map('phutil_safe_html', $match))); } /** * Safely generate a tag. * * In Remarkup contexts, it's not safe to use arbitrary text in tag * attributes: even though it will be escaped, it may contain replacement * tokens which are then replaced with markup. * * This method acts as @{function:phutil_tag}, but checks attributes before * using them. * * @param string Tag name. * @param dict Tag attributes. * @param wild Tag content. * @return PhutilSafeHTML Tag object. */ protected function newTag($name, array $attrs, $content = null) { foreach ($attrs as $key => $attr) { if ($attr !== null) { $attrs[$key] = $this->assertFlatText($attr); } } return phutil_tag($name, $attrs, $content); } /** * Assert that a text token is flat (it contains no replacement tokens). * * Because tokens can be replaced with markup, it is dangerous to use * arbitrary input text in tag attributes. Normally, rule precedence should * prevent this. Asserting that text is flat before using it as an attribute * provides an extra layer of security. * * Normally, you can call @{method:newTag} rather than calling this method * directly. @{method:newTag} will check attributes for you. * * @param wild Ostensibly flat text. * @return string Flat text. */ protected function assertFlatText($text) { $text = (string)hsprintf('%s', phutil_safe_html($text)); $rich = (strpos($text, PhutilRemarkupBlockStorage::MAGIC_BYTE) !== false); if ($rich) { throw new Exception( pht( 'Remarkup rule precedence is dangerous: rendering text with tokens '. 'as flat text!')); } return $text; } /** * Check whether text is flat (contains no replacement tokens) or not. * * @param wild Ostensibly flat text. * @return bool True if the text is flat. */ protected function isFlatText($text) { $text = (string)hsprintf('%s', phutil_safe_html($text)); return (strpos($text, PhutilRemarkupBlockStorage::MAGIC_BYTE) === false); } - } diff --git a/src/markup/engine/remarkup/markuprule/PhutilRemarkupRuleBold.php b/src/markup/engine/remarkup/markuprule/PhutilRemarkupRuleBold.php index 0e50044..fef4376 100644 --- a/src/markup/engine/remarkup/markuprule/PhutilRemarkupRuleBold.php +++ b/src/markup/engine/remarkup/markuprule/PhutilRemarkupRuleBold.php @@ -1,28 +1,24 @@ getEngine()->isTextMode()) { return $text; } return $this->replaceHTML( '@\\*\\*(.+?)\\*\\*@s', array($this, 'applyCallback'), $text); } protected function applyCallback($matches) { return hsprintf('%s', $matches[1]); } } diff --git a/src/markup/engine/remarkup/markuprule/PhutilRemarkupRuleDel.php b/src/markup/engine/remarkup/markuprule/PhutilRemarkupRuleDel.php index 4111782..25c6311 100644 --- a/src/markup/engine/remarkup/markuprule/PhutilRemarkupRuleDel.php +++ b/src/markup/engine/remarkup/markuprule/PhutilRemarkupRuleDel.php @@ -1,28 +1,24 @@ getEngine()->isTextMode()) { return $text; } return $this->replaceHTML( '@(?%s', $matches[1]); } } diff --git a/src/markup/engine/remarkup/markuprule/PhutilRemarkupRuleDocumentLink.php b/src/markup/engine/remarkup/markuprule/PhutilRemarkupRuleDocumentLink.php index 7ff0f23..cd8684b 100644 --- a/src/markup/engine/remarkup/markuprule/PhutilRemarkupRuleDocumentLink.php +++ b/src/markup/engine/remarkup/markuprule/PhutilRemarkupRuleDocumentLink.php @@ -1,127 +1,123 @@ getEngine()->isTextMode()) { $text = $link; if (strncmp($link, '/', 1) == 0 || strncmp($link, '#', 1) == 0) { $base = $this->getEngine()->getConfig('uri.prefix'); if (strncmp($link, '/', 1) == 0) { $base = rtrim($base, '/'); } $text = $base.$text; } // If present, strip off "mailto:". $text = preg_replace('/^mailto:/', '', $text); if ($link == $name) { return $text; } return $name.' <'.$text.'>'; } // By default, we open links in a new window or tab. For anchors on the same // page, just jump normally. $target = '_blank'; if (strncmp($link, '#', 1) == 0) { $target = null; } $name = preg_replace('/^mailto:/', '', $name); if ($this->getEngine()->getState('toc')) { return $name; } else { return phutil_tag( 'a', array( 'href' => $link, 'class' => 'remarkup-link', 'target' => $target, ), $name); } } public function markupAlternateLink($matches) { $uri = trim($matches[2]); // NOTE: We apply some special rules to avoid false positives here. The // major concern is that we do not want to convert `x[0][1](y)` in a // discussion about C source code into a link. To this end, we: // // - Don't match at word boundaries; // - require the URI to contain a "/" character or "@" character; and // - reject URIs which being with a quote character. if ($uri[0] == '"' || $uri[0] == "'" || $uri[0] == '`') { return $matches[0]; } if (strpos($uri, '/') === false && strpos($uri, '@') === false) { return $matches[0]; } return $this->markupDocumentLink( array( $matches[0], $matches[2], $matches[1], )); } public function markupDocumentLink($matches) { $uri = trim($matches[1]); $name = trim(idx($matches, 2, $uri)); // If whatever is being linked to begins with "/" or "#", or has "://", // or is "mailto:", treat it as a URI instead of a wiki page. $is_uri = preg_match('@(^/)|(://)|(^#)|(^mailto:)@', $uri); if ($is_uri && strncmp('/', $uri, 1) && strncmp('#', $uri, 1)) { $protocols = $this->getEngine()->getConfig( 'uri.allowed-protocols', array()); $protocol = id(new PhutilURI($uri))->getProtocol(); if (!idx($protocols, $protocol)) { // Don't treat this as a URI if it's not an allowed protocol. $is_uri = false; } } if (!$is_uri) { return $matches[0]; } return $this->getEngine()->storeText($this->renderHyperlink($uri, $name)); } } diff --git a/src/markup/engine/remarkup/markuprule/PhutilRemarkupRuleEscapeRemarkup.php b/src/markup/engine/remarkup/markuprule/PhutilRemarkupRuleEscapeRemarkup.php index 5bf81cd..0b49d41 100644 --- a/src/markup/engine/remarkup/markuprule/PhutilRemarkupRuleEscapeRemarkup.php +++ b/src/markup/engine/remarkup/markuprule/PhutilRemarkupRuleEscapeRemarkup.php @@ -1,23 +1,19 @@ getEngine()->storeText("\1"); return str_replace("\1", $replace, $text); } } diff --git a/src/markup/engine/remarkup/markuprule/PhutilRemarkupRuleHyperlink.php b/src/markup/engine/remarkup/markuprule/PhutilRemarkupRuleHyperlink.php index 7eb1d06..e316f86 100644 --- a/src/markup/engine/remarkup/markuprule/PhutilRemarkupRuleHyperlink.php +++ b/src/markup/engine/remarkup/markuprule/PhutilRemarkupRuleHyperlink.php @@ -1,106 +1,102 @@ " around them get linked exactly, without // the "<>". Angle brackets are basically special and mean "this is a URL // with weird characters". This is assumed to be reasonable because they // don't appear in normal text or normal URLs. $text = preg_replace_callback( '@<(\w{3,}://[^\s'.PhutilRemarkupBlockStorage::MAGIC_BYTE.']+?)>@', array($this, 'markupHyperlink'), $text); // Anything else we match "ungreedily", which means we'll look for // stuff that's probably puncutation or otherwise not part of the URL and // not link it. This lets someone write "QuicK! Go to // http://www.example.com/!". We also apply some paren balancing rules. // NOTE: We're explicitly avoiding capturing stored blocks, so text like // `http://www.example.com/[[x | y]]` doesn't get aggressively captured. $text = preg_replace_callback( '@(\w{3,}://[^\s'.PhutilRemarkupBlockStorage::MAGIC_BYTE.']+)@', array($this, 'markupHyperlinkUngreedy'), $text); return $text; } protected function markupHyperlink($matches) { - $protocols = $this->getEngine()->getConfig( 'uri.allowed-protocols', array()); $protocol = id(new PhutilURI($matches[1]))->getProtocol(); if (!idx($protocols, $protocol)) { // If this URI doesn't use a whitelisted protocol, don't link it. This // is primarily intended to prevent javascript:// silliness. return $this->getEngine()->storeText($matches[1]); } return $this->storeRenderedHyperlink($matches[1]); } protected function storeRenderedHyperlink($link) { return $this->getEngine()->storeText($this->renderHyperlink($link)); } protected function renderHyperlink($link) { if ($this->getEngine()->isTextMode()) { return $link; } if ($this->getEngine()->getState('toc')) { return $link; } else { return phutil_tag( 'a', array( 'href' => $link, 'class' => 'remarkup-link', 'target' => '_blank', ), $link); } } protected function markupHyperlinkUngreedy($matches) { $match = $matches[1]; $tail = null; $trailing = null; if (preg_match('/[;,.:!?]+$/', $match, $trailing)) { $tail = $trailing[0]; $match = substr($match, 0, -strlen($tail)); } // If there's a closing paren at the end but no balancing open paren in // the URL, don't link the close paren. This is an attempt to gracefully // handle the two common paren cases, Wikipedia links and English language // parentheticals, e.g.: // // http://en.wikipedia.org/wiki/Noun_(disambiguation) // (see also http://www.example.com) // // We could apply a craftier heuristic here which tries to actually balance // the parens, but this is probably sufficient. if (preg_match('/\\)$/', $match) && !preg_match('/\\(/', $match)) { $tail = ')'.$tail; $match = substr($match, 0, -1); } return hsprintf('%s%s', $this->markupHyperlink(array(null, $match)), $tail); } } diff --git a/src/markup/engine/remarkup/markuprule/PhutilRemarkupRuleItalic.php b/src/markup/engine/remarkup/markuprule/PhutilRemarkupRuleItalic.php index 59aaa18..d5f5790 100644 --- a/src/markup/engine/remarkup/markuprule/PhutilRemarkupRuleItalic.php +++ b/src/markup/engine/remarkup/markuprule/PhutilRemarkupRuleItalic.php @@ -1,28 +1,24 @@ getEngine()->isTextMode()) { return $text; } return $this->replaceHTML( '@(?%s', $matches[1]); } } diff --git a/src/markup/engine/remarkup/markuprule/PhutilRemarkupRuleLinebreaks.php b/src/markup/engine/remarkup/markuprule/PhutilRemarkupRuleLinebreaks.php index 424be4d..ece540d 100644 --- a/src/markup/engine/remarkup/markuprule/PhutilRemarkupRuleLinebreaks.php +++ b/src/markup/engine/remarkup/markuprule/PhutilRemarkupRuleLinebreaks.php @@ -1,17 +1,13 @@ getEngine()->isTextMode()) { return $text; } return phutil_escape_html_newlines($text); } } diff --git a/src/markup/engine/remarkup/markuprule/PhutilRemarkupRuleMonospace.php b/src/markup/engine/remarkup/markuprule/PhutilRemarkupRuleMonospace.php index 43ecfe6..45b7e63 100644 --- a/src/markup/engine/remarkup/markuprule/PhutilRemarkupRuleMonospace.php +++ b/src/markup/engine/remarkup/markuprule/PhutilRemarkupRuleMonospace.php @@ -1,43 +1,39 @@ getEngine()->isTextMode()) { $result = $matches[0]; } else { $match = isset($matches[2]) ? $matches[2] : $matches[1]; $result = phutil_tag( 'tt', array( 'class' => 'remarkup-monospaced', ), $match); } return $this->getEngine()->storeText($result); } } diff --git a/src/markup/render.php b/src/markup/render.php index d6c8d11..d04ad31 100644 --- a/src/markup/render.php +++ b/src/markup/render.php @@ -1,291 +1,270 @@ ` tags, if the `rel` attribute is not specified, it * is interpreted as `rel="noreferrer"`. * - When rendering `` tags, the `href` attribute may not begin with * `javascript:`. * * These special cases can not be disabled. * * IMPORTANT: The `$tag` attribute and the keys of the `$attributes` array are * trusted blindly, and not escaped. You should not pass user data in these * parameters. * * @param string The name of the tag, like `a` or `div`. * @param map A map of tag attributes. * @param wild Content to put in the tag. * @return PhutilSafeHTML Tag object. */ function phutil_tag($tag, array $attributes = array(), $content = null) { - // If the `href` attribute is present: // - make sure it is not a "javascript:" URI. We never permit these. // - if the tag is an `` and the link is to some foreign resource, // add `rel="nofollow"` by default. if (!empty($attributes['href'])) { // This might be a URI object, so cast it to a string. $href = (string)$attributes['href']; if (isset($href[0])) { $is_anchor_href = ($href[0] == '#'); // Is this a link to a resource on the same domain? The second part of // this excludes "///evil.com/" protocol-relative hrefs. $is_domain_href = ($href[0] == '/') && (!isset($href[1]) || $href[1] != '/'); // If the `rel` attribute is not specified, fill in `rel="noreferrer"`. // Effectively, this serves to make the default behavior for offsite // links "do not send a referrer", which is broadly desirable. Specifying // some non-null `rel` will skip this. if (!isset($attributes['rel'])) { if (!$is_anchor_href && !$is_domain_href) { if ($tag == 'a') { $attributes['rel'] = 'noreferrer'; } } } // Block 'javascript:' hrefs at the tag level: no well-designed // application should ever use them, and they are a potent attack vector. // This function is deep in the core and performance sensitive, so we're // doing a cheap version of this test first to avoid calling preg_match() // on URIs which begin with '/' or `#`. These cover essentially all URIs // in Phabricator. if (!$is_anchor_href && !$is_domain_href) { // Chrome 33 and IE 11 both interpret "javascript\n:" as a Javascript // URI, and all browsers interpret " javascript:" as a Javascript URI, // so be aggressive about looking for "javascript:" in the initial // section of the string. $normalized_href = preg_replace('([^a-z0-9/:]+)i', '', $href); if (preg_match('/^javascript:/i', $normalized_href)) { throw new Exception( pht( "Attempting to render a tag with an 'href' attribute that ". "begins with 'javascript:'. This is either a serious security ". "concern or a serious architecture concern. Seek urgent ". "remedy.")); } } } } // For tags which can't self-close, treat null as the empty string -- for // example, always render `
    `, never `
    `. static $self_closing_tags = array( 'area' => true, 'base' => true, 'br' => true, 'col' => true, 'command' => true, 'embed' => true, 'frame' => true, 'hr' => true, 'img' => true, 'input' => true, 'keygen' => true, 'link' => true, 'meta' => true, 'param' => true, 'source' => true, 'track' => true, 'wbr' => true, ); $attr_string = ''; foreach ($attributes as $k => $v) { if ($v === null) { continue; } $v = phutil_escape_html($v); $attr_string .= ' '.$k.'="'.$v.'"'; } if ($content === null) { if (isset($self_closing_tags[$tag])) { return new PhutilSafeHTML('<'.$tag.$attr_string.' />'); } else { $content = ''; } } else { $content = phutil_escape_html($content); } return new PhutilSafeHTML('<'.$tag.$attr_string.'>'.$content.''); } -/** - * @group markup - */ function phutil_tag_div($class, $content = null) { return phutil_tag('div', array('class' => $class), $content); } -/** - * @group markup - */ function phutil_escape_html($string) { if ($string instanceof PhutilSafeHTML) { return $string; } else if ($string instanceof PhutilSafeHTMLProducerInterface) { $result = $string->producePhutilSafeHTML(); if ($result instanceof PhutilSafeHTML) { return phutil_escape_html($result); } else if (is_array($result)) { return phutil_escape_html($result); } else if ($result instanceof PhutilSafeHTMLProducerInterface) { return phutil_escape_html($result); } else { try { assert_stringlike($result); return phutil_escape_html((string)$result); } catch (Exception $ex) { $class = get_class($string); throw new Exception( "Object (of class '{$class}') implements ". "PhutilSafeHTMLProducerInterface but did not return anything ". "renderable from producePhutilSafeHTML()."); } } } else if (is_array($string)) { $result = ''; foreach ($string as $item) { $result .= phutil_escape_html($item); } return $result; } return htmlspecialchars($string, ENT_QUOTES, 'UTF-8'); } -/** - * @group markup - */ function phutil_escape_html_newlines($string) { return PhutilSafeHTML::applyFunction('nl2br', $string); } /** * Mark string as safe for use in HTML. - * - * @group markup */ function phutil_safe_html($string) { if ($string == '') { return $string; } else if ($string instanceof PhutilSafeHTML) { return $string; } else { return new PhutilSafeHTML($string); } } /** - * HTML safe version of implode(). - * - * @group markup + * HTML safe version of `implode()`. */ function phutil_implode_html($glue, array $pieces) { $glue = phutil_escape_html($glue); foreach ($pieces as $k => $piece) { $pieces[$k] = phutil_escape_html($piece); } return phutil_safe_html(implode($glue, $pieces)); } /** - * Format a HTML code. This function behaves like sprintf(), except that all + * Format a HTML code. This function behaves like `sprintf()`, except that all * the normal conversions (like %s) will be properly escaped. - * - * @group markup */ -function hsprintf($html/* , ... */) { +function hsprintf($html /* , ... */) { $args = func_get_args(); array_shift($args); return new PhutilSafeHTML( vsprintf($html, array_map('phutil_escape_html', $args))); } /** * Escape text for inclusion in a URI or a query parameter. Note that this * method does NOT escape '/', because "%2F" is invalid in paths and Apache * will automatically 404 the page if it's present. This will produce correct * (the URIs will work) and desirable (the URIs will be readable) behavior in * these cases: * * '/path/?param='.phutil_escape_uri($string); # OK: Query Parameter * '/path/to/'.phutil_escape_uri($string); # OK: URI Suffix * * It will potentially produce the WRONG behavior in this special case: * * COUNTEREXAMPLE * '/path/to/'.phutil_escape_uri($string).'/thing/'; # BAD: URI Infix * * In this case, any '/' characters in the string will not be escaped, so you * will not be able to distinguish between the string and the suffix (unless * you have more information, like you know the format of the suffix). For infix * URI components, use @{function:phutil_escape_uri_path_component} instead. * * @param string Some string. * @return string URI encoded string, except for '/'. - * - * @group markup */ function phutil_escape_uri($string) { return str_replace('%2F', '/', rawurlencode($string)); } /** * Escape text for inclusion as an infix URI substring. See discussion at * @{function:phutil_escape_uri}. This function covers an unusual special case; * @{function:phutil_escape_uri} is usually the correct function to use. * * This function will escape a string into a format which is safe to put into * a URI path and which does not contain '/' so it can be correctly parsed when * embedded as a URI infix component. * * However, you MUST decode the string with * @{function:phutil_unescape_uri_path_component} before it can be used in the * application. * * @param string Some string. * @return string URI encoded string that is safe for infix composition. - * - * @group markup */ function phutil_escape_uri_path_component($string) { return rawurlencode(rawurlencode($string)); } /** * Unescape text that was escaped by * @{function:phutil_escape_uri_path_component}. See * @{function:phutil_escape_uri} for discussion. * * Note that this function is NOT the inverse of * @{function:phutil_escape_uri_path_component}! It undoes additional escaping * which is added to survive the implied unescaping performed by the webserver * when interpreting the request. * * @param string Some string emitted * from @{function:phutil_escape_uri_path_component} and * then accessed via a web server. * @return string Original string. - * @group markup */ function phutil_unescape_uri_path_component($string) { return rawurldecode($string); } diff --git a/src/markup/syntax/engine/PhutilDefaultSyntaxHighlighterEngine.php b/src/markup/syntax/engine/PhutilDefaultSyntaxHighlighterEngine.php index a5ab266..27765b3 100644 --- a/src/markup/syntax/engine/PhutilDefaultSyntaxHighlighterEngine.php +++ b/src/markup/syntax/engine/PhutilDefaultSyntaxHighlighterEngine.php @@ -1,102 +1,98 @@ config[$key] = $value; return $this; } public function getLanguageFromFilename($filename) { - static $default_map = array( // All files which have file extensions that we haven't already matched // map to their extensions. '@\\.([^./]+)$@' => 1, ); $maps = array(); if (!empty($this->config['filename.map'])) { $maps[] = $this->config['filename.map']; } $maps[] = $default_map; foreach ($maps as $map) { foreach ($map as $regexp => $lang) { $matches = null; if (preg_match($regexp, $filename, $matches)) { if (is_numeric($lang)) { return idx($matches, $lang); } else { return $lang; } } } } return null; } public function getHighlightFuture($language, $source) { - if ($language === null) { $language = PhutilLanguageGuesser::guessLanguage($source); } $have_pygments = !empty($this->config['pygments.enabled']); if ($language == 'php' && xhpast_is_available()) { return id(new PhutilXHPASTSyntaxHighlighter()) ->getHighlightFuture($source); } if ($language == 'console') { return id(new PhutilConsoleSyntaxHighlighter()) ->getHighlightFuture($source); } if ($language == 'diviner' || $language == 'remarkup') { return id(new PhutilDivinerSyntaxHighlighter()) ->getHighlightFuture($source); } if ($language == 'rainbow') { return id(new PhutilRainbowSyntaxHighlighter()) ->getHighlightFuture($source); } if ($language == 'php') { return id(new PhutilLexerSyntaxHighlighter()) ->setConfig('lexer', new PhutilPHPFragmentLexer()) ->setConfig('language', 'php') ->getHighlightFuture($source); } if ($language == 'py') { return id(new PhutilLexerSyntaxHighlighter()) ->setConfig('lexer', new PhutilPythonFragmentLexer()) ->setConfig('language', 'py') ->getHighlightFuture($source); } if ($language == 'invisible') { return id(new PhutilInvisibleSyntaxHighlighter()) ->getHighlightFuture($source); } if ($have_pygments) { return id(new PhutilPygmentsSyntaxHighlighter()) ->setConfig('language', $language) ->getHighlightFuture($source); } return id(new PhutilDefaultSyntaxHighlighter()) ->getHighlightFuture($source); } + } diff --git a/src/markup/syntax/engine/PhutilSyntaxHighlighterEngine.php b/src/markup/syntax/engine/PhutilSyntaxHighlighterEngine.php index f35445b..7cfa463 100644 --- a/src/markup/syntax/engine/PhutilSyntaxHighlighterEngine.php +++ b/src/markup/syntax/engine/PhutilSyntaxHighlighterEngine.php @@ -1,21 +1,19 @@ getHighlightFuture($language, $source)->resolve(); } catch (PhutilSyntaxHighlighterException $ex) { return id(new PhutilDefaultSyntaxHighlighter()) ->getHighlightFuture($source) ->resolve(); } } } diff --git a/src/markup/syntax/engine/__tests__/PhutilDefaultSyntaxHighlighterEngineTestCase.php b/src/markup/syntax/engine/__tests__/PhutilDefaultSyntaxHighlighterEngineTestCase.php index abd0287..886c0fc 100644 --- a/src/markup/syntax/engine/__tests__/PhutilDefaultSyntaxHighlighterEngineTestCase.php +++ b/src/markup/syntax/engine/__tests__/PhutilDefaultSyntaxHighlighterEngineTestCase.php @@ -1,27 +1,25 @@ 'php', '/x.php' => 'php', 'x.y.php' => 'php', '/x.y/z.php' => 'php', '/x.php/' => null, ); $engine = new PhutilDefaultSyntaxHighlighterEngine(); foreach ($names as $path => $language) { $detect = $engine->getLanguageFromFilename($path); $this->assertEqual($language, $detect, 'Language detect for '.$path); } } } diff --git a/src/markup/syntax/highlighter/PhutilConsoleSyntaxHighlighter.php b/src/markup/syntax/highlighter/PhutilConsoleSyntaxHighlighter.php index 39180fe..9fe499a 100644 --- a/src/markup/syntax/highlighter/PhutilConsoleSyntaxHighlighter.php +++ b/src/markup/syntax/highlighter/PhutilConsoleSyntaxHighlighter.php @@ -1,55 +1,52 @@ config[$key] = $value; return $this; } public function getHighlightFuture($source) { - $in_command = false; $lines = explode("\n", $source); foreach ($lines as $key => $line) { $matches = null; // Parse commands like this: // // some/path/ $ ./bin/example # Do things // // ...into path, command, and comment components. $pattern = '@'. ($in_command ? '()(.*?)' : '^(\S+[\\\\/] )?([$] .*?)'). '(#.*|\\\\)?$@'; if (preg_match($pattern, $line, $matches)) { $lines[$key] = hsprintf( '%s%s%s', $matches[1], $matches[2], (!empty($matches[3]) ? hsprintf('%s', $matches[3]) : '')); $in_command = (idx($matches, 3) == '\\'); } else { $lines[$key] = hsprintf('%s', $line); } } $lines = phutil_implode_html("\n", $lines); return new ImmediateFuture($lines); } } diff --git a/src/markup/syntax/highlighter/PhutilDefaultSyntaxHighlighter.php b/src/markup/syntax/highlighter/PhutilDefaultSyntaxHighlighter.php index ecda7d8..20c2c0f 100644 --- a/src/markup/syntax/highlighter/PhutilDefaultSyntaxHighlighter.php +++ b/src/markup/syntax/highlighter/PhutilDefaultSyntaxHighlighter.php @@ -1,16 +1,14 @@ config[$key] = $value; return $this; } public function getHighlightFuture($source) { - $source = phutil_escape_html($source); // This highlighter isn't perfect but tries to do an okay job at getting // some of the basics at least. There's lots of room for improvement. $blocks = explode("\n\n", $source); foreach ($blocks as $key => $block) { if (preg_match('/^[^ ](?! )/m', $block)) { $blocks[$key] = $this->highlightBlock($block); } } $source = implode("\n\n", $blocks); $source = phutil_safe_html($source); return new ImmediateFuture($source); } private function highlightBlock($source) { // Highlight "@{class:...}" links to other documentation pages. $source = $this->highlightPattern('/@{([\w@]+?):([^}]+?)}/', $source, 'nc'); // Highlight "@title", "@group", etc. $source = $this->highlightPattern('/^@(\w+)/m', $source, 'k'); // Highlight bold, italic and monospace. $source = $this->highlightPattern('@\\*\\*(.+?)\\*\\*@s', $source, 's'); $source = $this->highlightPattern('@(?highlightPattern( '@##([\s\S]+?)##|\B`(.+?)`\B@', $source, 's'); // Highlight stuff that looks like headers. $source = $this->highlightPattern('/^=(.*)$/m', $source, 'nv'); return $source; } private function highlightPattern($regexp, $source, $class) { $this->replaceClass = $class; $source = preg_replace_callback( $regexp, array($this, 'replacePattern'), $source); return $source; } public function replacePattern($matches) { // NOTE: The goal here is to make sure a never crosses a newline. $content = $matches[0]; $content = explode("\n", $content); foreach ($content as $key => $line) { $content[$key] = ''. $line. ''; } return implode("\n", $content); } } diff --git a/src/markup/syntax/highlighter/PhutilInvisibleSyntaxHighlighter.php b/src/markup/syntax/highlighter/PhutilInvisibleSyntaxHighlighter.php index 5d48c05..297d4cc 100644 --- a/src/markup/syntax/highlighter/PhutilInvisibleSyntaxHighlighter.php +++ b/src/markup/syntax/highlighter/PhutilInvisibleSyntaxHighlighter.php @@ -1,45 +1,43 @@ config[$key] = $value; return $this; } public function getHighlightFuture($source) { $keys = array_map('chr', range(0x0, 0x1F)); $vals = array_map( array($this, 'decimalToHtmlEntityDecoded'), range(0x2400, 0x241F)); $invisible = array_combine($keys, $vals); $result = array(); foreach (str_split($source) as $character) { if (isset($invisible[$character])) { $result[] = phutil_tag( 'span', array('class' => 'invisible'), $invisible[$character]); if ($character === "\n") { $result[] = $character; } } else { $result[] = $character; } } $result = phutil_implode_html('', $result); return new ImmediateFuture($result); } private function decimalToHtmlEntityDecoded($dec) { return html_entity_decode("&#{$dec};"); } + } diff --git a/src/markup/syntax/highlighter/PhutilLexerSyntaxHighlighter.php b/src/markup/syntax/highlighter/PhutilLexerSyntaxHighlighter.php index 73be023..12f955a 100644 --- a/src/markup/syntax/highlighter/PhutilLexerSyntaxHighlighter.php +++ b/src/markup/syntax/highlighter/PhutilLexerSyntaxHighlighter.php @@ -1,75 +1,72 @@ config[$key] = $value; return $this; } public function getHighlightFuture($source) { $strip = false; $state = 'start'; $lang = idx($this->config, 'language'); if ($lang == 'php') { if (strpos($source, 'config, 'lexer'); $tokens = $lexer->getTokens($source, $state); $tokens = $lexer->mergeTokens($tokens); $result = array(); foreach ($tokens as $token) { list($type, $value, $context) = $token; $data_name = null; switch ($type) { case 'nc': case 'nf': case 'na': $data_name = $value; break; } if (strpos($value, "\n") !== false) { $value = explode("\n", $value); } else { $value = array($value); } foreach ($value as $part) { if (strlen($part)) { if ($type) { $result[] = phutil_tag( 'span', array( 'class' => $type, 'data-symbol-context' => $context, 'data-symbol-name' => $data_name, ), $part); } else { $result[] = $part; } } $result[] = "\n"; } // Throw away the last "\n". array_pop($result); } $result = phutil_implode_html('', $result); return new ImmediateFuture($result); } } diff --git a/src/markup/syntax/highlighter/PhutilPygmentsSyntaxHighlighter.php b/src/markup/syntax/highlighter/PhutilPygmentsSyntaxHighlighter.php index f2aceec..97812ae 100644 --- a/src/markup/syntax/highlighter/PhutilPygmentsSyntaxHighlighter.php +++ b/src/markup/syntax/highlighter/PhutilPygmentsSyntaxHighlighter.php @@ -1,217 +1,214 @@ config[$key] = $value; return $this; } public function getHighlightFuture($source) { $language = idx($this->config, 'language'); if (preg_match('/\r(?!\n)/', $source)) { // TODO: Pygments converts "\r" newlines into "\n" newlines, so we can't // use it on files with "\r" newlines. If we have "\r" not followed by // "\n" in the file, skip highlighting. $language = null; } if ($language) { $language = $this->getPygmentsLexerNameFromLanguageName($language); $future = new ExecFuture( 'pygmentize -O encoding=utf-8 -O stripnl=False -f html -l %s', $language); $scrub = false; if ($language == 'php' && strpos($source, 'write($source); return new PhutilDefaultSyntaxHighlighterEnginePygmentsFuture( $future, $source, $scrub); } return id(new PhutilDefaultSyntaxHighlighter()) ->getHighlightFuture($source); } private function getPygmentsLexerNameFromLanguageName($language) { static $map = array( 'adb' => 'ada', 'ads' => 'ada', 'ahkl' => 'ahk', 'G' => 'antlr-ruby', 'g' => 'antlr-ruby', 'htaccess' => 'apacheconf', 'as' => 'as3', 'aspx' => 'aspx-vb', 'asax' => 'aspx-vb', 'ascx' => 'aspx-vb', 'ashx' => 'aspx-vb', 'asmx' => 'aspx-vb', 'axd' => 'aspx-vb', 'sh' => 'bash', 'ksh' => 'bash', 'ebuild' => 'bash', 'eclass' => 'bash', 'cmd' => 'bat', 'bmx' => 'blitzmax', 'bf' => 'brainfuck', 'b' => 'brainfuck', 'h' => 'c', 'cfml' => 'cfm', 'cfc' => 'cfm', 'tmpl' => 'cheetah', 'spt' => 'cheetah', 'clj' => 'clojure', 'coffee' => 'coffee-script', 'cl' => 'common-lisp', 'lisp' => 'common-lisp', 'el' => 'common-lisp', 'sh-session' => 'console', 'hpp' => 'cpp', 'c++' => 'cpp', 'h++' => 'cpp', 'cc' => 'cpp', 'hh' => 'cpp', 'cxx' => 'cpp', 'hxx' => 'cpp', 'c++-objdump' => 'cpp-objdump', 'cxx-objdump' => 'cpp-objdump', 'cs' => 'csharp', 'less' => 'css', 'scss' => 'css', 'feature' => 'Cucumber', 'pyx' => 'cython', 'pxd' => 'cython', 'pxi' => 'cython', 'di' => 'd', 'pas' => 'delphi', 'patch' => 'diff', 'darcspatch' => 'dpatch', 'jbst' => 'duel', 'dyl' => 'dylan', 'erl-sh' => 'erl', 'erl' => 'erlang', 'hrl' => 'erlang', 'flx' => 'felix', 'flxh' => 'felix', 'f' => 'fortran', 'f90' => 'fortran', 's' => 'gas', 'kid' => 'genshi', 'vert' => 'glsl', 'frag' => 'glsl', 'geo' => 'glsl', 'plot' => 'gnuplot', 'plt' => 'gnuplot', 'gdc' => 'gooddata-cl', 'man' => 'groff', 'hs' => 'haskell', 'htm' => 'html', 'xhtml' => 'html', 'html' => 'html+evoque', 'phtml' => 'html+php', 'hy' => 'hybris', 'hyb' => 'hybris', 'cfg' => 'ini', 'ik' => 'ioke', 'weechatlog' => 'irc', 'll' => 'llvm', 'lgt' => 'logtalk', 'wlua' => 'lua', 'mak' => 'make', 'Makefile' => 'make', 'makefile' => 'make', 'GNUmakefile' => 'make', 'mao' => 'mako', 'mhtml' => 'mason', 'mc' => 'mason', 'mi' => 'mason', 'autohandler' => 'mason', 'dhandler' => 'mason', 'md' => 'minid', 'mo' => 'modelica', 'def' => 'modula2', 'mod' => 'modula2', 'moo' => 'moocode', 'mu' => 'mupad', 'myt' => 'myghty', 'autodelegate' => 'myghty', 'asm' => 'nasm', 'ASM' => 'nasm', 'ns2' => 'newspeak', 'm' => 'objective-c', 'mm' => 'objective-c', 'j' => 'objective-j', 'ml' => 'ocaml', 'mli' => 'ocaml', 'mll' => 'ocaml', 'mly' => 'ocaml', 'pm' => 'perl', 'ps' => 'postscript', 'eps' => 'postscript', 'po' => 'pot', 'inc' => 'pov', 'pl' => 'prolog', 'pro' => 'prolog', 'proto' => 'protobuf', 'pp' => 'puppet', 'py' => 'python', 'pyw' => 'python', 'sc' => 'python', 'SConstruct' => 'python', 'SConscript' => 'python', 'tac' => 'python', 'rl' => 'ragel-em', 'rbw' => 'rb', 'Rakefile' => 'rb', 'rake' => 'rb', 'gemspec' => 'rb', 'rbx' => 'rb', 'duby' => 'rb', 'Rout' => 'rconsole', 'r' => 'rebol', 'r3' => 'rebol', 'cw' => 'redcode', 'rest' => 'rst', 'scm' => 'scheme', 'st' => 'smalltalk', 'tpl' => 'smarty', 'S' => 'splus', 'R' => 'splus', 'hql' => 'sql', 'sqlite3-console' => 'sqlite3', 'csh' => 'tcsh', 'aux' => 'tex', 'toc' => 'tex', 'txt' => 'text', 'sv' => 'v', 'vapi' => 'vala', 'vb' => 'vb.net', 'bas' => 'vb.net', 'vm' => 'velocity', 'fhtml' => 'velocity', 'vimrc' => 'vim', 'xslt' => 'xml', 'rss' => 'xml', 'xsd' => 'xml', 'wsdl' => 'xml', 'xml' => 'xml+evoque', 'xqy' => 'xquery', 'xsl' => 'xslt', 'yml' => 'yaml', ); return idx($map, $language, $language); } } diff --git a/src/markup/syntax/highlighter/PhutilRainbowSyntaxHighlighter.php b/src/markup/syntax/highlighter/PhutilRainbowSyntaxHighlighter.php index 5380316..6feec7e 100644 --- a/src/markup/syntax/highlighter/PhutilRainbowSyntaxHighlighter.php +++ b/src/markup/syntax/highlighter/PhutilRainbowSyntaxHighlighter.php @@ -1,47 +1,46 @@ config[$key] = $value; return $this; } public function getHighlightFuture($source) { $color = 0; $colors = array( 'rbw_r', 'rbw_o', 'rbw_y', 'rbw_g', 'rbw_b', 'rbw_i', 'rbw_v', ); $result = array(); foreach (phutil_utf8v($source) as $character) { if ($character == ' ' || $character == "\n") { $result[] = $character; continue; } $result[] = phutil_tag( 'span', array('class' => $colors[$color]), $character); $color = ($color + 1) % count($colors); } $result = phutil_implode_html('', $result); return new ImmediateFuture($result); } + } diff --git a/src/markup/syntax/highlighter/PhutilSyntaxHighlighter.php b/src/markup/syntax/highlighter/PhutilSyntaxHighlighter.php index e28cd3d..e5159d5 100644 --- a/src/markup/syntax/highlighter/PhutilSyntaxHighlighter.php +++ b/src/markup/syntax/highlighter/PhutilSyntaxHighlighter.php @@ -1,9 +1,6 @@ setConfig('language', 'php'); $highlighter->setConfig('lexer', new PhutilPHPFragmentLexer()); $path = dirname(__FILE__).'/phpfragment/'; foreach (Filesystem::listDirectory($path, $include_hidden = false) as $f) { if (preg_match('/.test$/', $f)) { $expect = preg_replace('/.test$/', '.expect', $f); $source = Filesystem::readFile($path.'/'.$f); $this->assertEqual( Filesystem::readFile($path.'/'.$expect), (string)$highlighter->getHighlightFuture($source)->resolve(), $f); } } } } diff --git a/src/markup/syntax/highlighter/__tests__/PhutilXHPASTSyntaxHighlighterTestCase.php b/src/markup/syntax/highlighter/__tests__/PhutilXHPASTSyntaxHighlighterTestCase.php index 05583b4..7e3a880 100644 --- a/src/markup/syntax/highlighter/__tests__/PhutilXHPASTSyntaxHighlighterTestCase.php +++ b/src/markup/syntax/highlighter/__tests__/PhutilXHPASTSyntaxHighlighterTestCase.php @@ -1,34 +1,31 @@ getHighlightFuture($source); return $future->resolve(); } private function read($file) { $path = dirname(__FILE__).'/xhpast/'.$file; return Filesystem::readFile($path); } public function testBuiltinClassnames() { $this->assertEqual( $this->read('builtin-classname.expect'), (string)$this->highlight($this->read('builtin-classname.source')), 'Builtin classnames should not be marked as linkable symbols.'); $this->assertEqual( $this->read('trailing-comment.expect'), (string)$this->highlight($this->read('trailing-comment.source')), 'Trailing comments should not be dropped.'); $this->assertEqual( $this->read('multiline-token.expect'), (string)$this->highlight($this->read('multiline-token.source')), 'Multi-line tokens should be split across lines.'); } } diff --git a/src/markup/syntax/highlighter/pygments/PhutilDefaultSyntaxHighlighterEnginePygmentsFuture.php b/src/markup/syntax/highlighter/pygments/PhutilDefaultSyntaxHighlighterEnginePygmentsFuture.php index 8f03d7b..3a82e71 100644 --- a/src/markup/syntax/highlighter/pygments/PhutilDefaultSyntaxHighlighterEnginePygmentsFuture.php +++ b/src/markup/syntax/highlighter/pygments/PhutilDefaultSyntaxHighlighterEnginePygmentsFuture.php @@ -1,36 +1,33 @@ source = $source; $this->scrub = $scrub; } protected function didReceiveResult($result) { list($err, $stdout, $stderr) = $result; if (!$err && strlen($stdout)) { // Strip off fluff Pygments adds. $stdout = preg_replace( '@^
    (.*)
    \s*$@s', '\1', $stdout); if ($this->scrub) { $stdout = preg_replace('/^.*\n/', '', $stdout); } return phutil_safe_html($stdout); } throw new PhutilSyntaxHighlighterException($stderr, $err); } } diff --git a/src/markup/syntax/highlighter/xhpast/PhutilXHPASTSyntaxHighlighterFuture.php b/src/markup/syntax/highlighter/xhpast/PhutilXHPASTSyntaxHighlighterFuture.php index 6467fbb..5be143f 100644 --- a/src/markup/syntax/highlighter/xhpast/PhutilXHPASTSyntaxHighlighterFuture.php +++ b/src/markup/syntax/highlighter/xhpast/PhutilXHPASTSyntaxHighlighterFuture.php @@ -1,233 +1,230 @@ source = $source; $this->scrub = $scrub; } protected function didReceiveResult($result) { try { return $this->applyXHPHighlight($result); } catch (Exception $ex) { // XHP can't highlight source that isn't syntactically valid. Fall back // to the fragment lexer. $source = ($this->scrub ? preg_replace('/^.*\n/', '', $this->source) : $this->source); return id(new PhutilLexerSyntaxHighlighter()) ->setConfig('lexer', new PhutilPHPFragmentLexer()) ->setConfig('language', 'php') ->getHighlightFuture($source) ->resolve(); } } private function applyXHPHighlight($result) { // We perform two passes here: one using the AST to find symbols we care // about -- particularly, class names and function names. These are used // in the crossreference stuff to link into Diffusion. After we've done our // AST pass, we do a followup pass on the token stream to catch all the // simple stuff like strings and comments. $tree = XHPASTTree::newFromDataAndResolvedExecFuture( $this->source, $result); $root = $tree->getRootNode(); $tokens = $root->getTokens(); $interesting_symbols = $this->findInterestingSymbols($root); $out = array(); foreach ($tokens as $key => $token) { $value = $token->getValue(); $class = null; $multi = false; $attrs = array(); if (isset($interesting_symbols[$key])) { $sym = $interesting_symbols[$key]; $class = $sym[0]; $attrs['data-symbol-context'] = idx($sym, 'context'); $attrs['data-symbol-name'] = idx($sym, 'symbol'); } else { switch ($token->getTypeName()) { case 'T_WHITESPACE': break; case 'T_DOC_COMMENT': $class = 'dc'; $multi = true; break; case 'T_COMMENT': $class = 'c'; $multi = true; break; case 'T_CONSTANT_ENCAPSED_STRING': case 'T_ENCAPSED_AND_WHITESPACE': case 'T_INLINE_HTML': $class = 's'; $multi = true; break; case 'T_VARIABLE': $class = 'nv'; break; case 'T_OPEN_TAG': case 'T_OPEN_TAG_WITH_ECHO': case 'T_CLOSE_TAG': $class = 'o'; break; case 'T_LNUMBER': case 'T_DNUMBER': $class = 'm'; break; case 'T_STRING': static $magic = array( 'true' => true, 'false' => true, 'null' => true, ); if (isset($magic[strtolower($value)])) { $class = 'k'; break; } $class = 'nx'; break; default: $class = 'k'; break; } } if ($class) { $attrs['class'] = $class; if ($multi) { // If the token may have multiple lines in it, make sure each // crosses no more than one line so the lines can be put // in a table, etc., later. $value = phutil_split_lines($value, $retain_endings = true); } else { $value = array($value); } foreach ($value as $val) { $out[] = phutil_tag('span', $attrs, $val); } } else { $out[] = $value; } } if ($this->scrub) { array_shift($out); } return phutil_implode_html('', $out); } private function findInterestingSymbols(XHPASTNode $root) { // Class name symbols appear in: // class X extends X implements X, X { ... } // new X(); // $x instanceof X // catch (X $x) // function f(X $x) // X::f(); // X::$m; // X::CONST; // These are PHP builtin tokens which can appear in a classname context. // Don't link them since they don't go anywhere useful. static $builtin_class_tokens = array( 'self' => true, 'parent' => true, 'static' => true, ); // Fortunately XHPAST puts all of these in a special node type so it's // easy to find them. $result_map = array(); $class_names = $root->selectDescendantsOfType('n_CLASS_NAME'); foreach ($class_names as $class_name) { foreach ($class_name->getTokens() as $key => $token) { if (isset($builtin_class_tokens[$token->getValue()])) { // This is something like "self::method()". continue; } $result_map[$key] = array( 'nc', // "Name, Class" 'symbol' => $class_name->getConcreteString(), ); } } // Function name symbols appear in: // f() $function_calls = $root->selectDescendantsOfType('n_FUNCTION_CALL'); foreach ($function_calls as $call) { $call = $call->getChildByIndex(0); if ($call->getTypeName() == 'n_SYMBOL_NAME') { // This is a normal function call, not some $f() shenanigans. foreach ($call->getTokens() as $key => $token) { $result_map[$key] = array( 'nf', // "Name, Function" 'symbol' => $call->getConcreteString(), ); } } } // Upon encountering $x->y, link y without context, since $x is unknown. $prop_access = $root->selectDescendantsOfType('n_OBJECT_PROPERTY_ACCESS'); foreach ($prop_access as $access) { $right = $access->getChildByIndex(1); if ($right->getTypeName() == 'n_INDEX_ACCESS') { // otherwise $x->y[0] doesn't get highlighted $right = $right->getChildByIndex(0); } if ($right->getTypeName() == 'n_STRING') { foreach ($right->getTokens() as $key => $token) { $result_map[$key] = array( 'na', // "Name, Attribute" 'symbol' => $right->getConcreteString(), ); } } } // Upon encountering x::y, try to link y with context x. $static_access = $root->selectDescendantsOfType('n_CLASS_STATIC_ACCESS'); foreach ($static_access as $access) { $class = $access->getChildByIndex(0); $right = $access->getChildByIndex(1); if ($class->getTypeName() == 'n_CLASS_NAME' && ($right->getTypeName() == 'n_STRING' || $right->getTypeName() == 'n_VARIABLE')) { $classname = head($class->getTokens())->getValue(); $result = array( 'na', 'symbol' => ltrim($right->getConcreteString(), '$'), ); if (!isset($builtin_class_tokens[$classname])) { $result['context'] = $classname; } foreach ($right->getTokens() as $key => $token) { $result_map[$key] = $result; } } } return $result_map; } } diff --git a/src/moduleutils/PhutilBootloader.php b/src/moduleutils/PhutilBootloader.php index edf9bea..12b6a68 100644 --- a/src/moduleutils/PhutilBootloader.php +++ b/src/moduleutils/PhutilBootloader.php @@ -1,266 +1,263 @@ classTree; } public function registerLibrary($name, $path) { if (basename($path) != '__phutil_library_init__.php') { throw new PhutilBootloaderException( 'Only directories with a __phutil_library_init__.php file may be '. 'registered as libphutil libraries.'); } $path = dirname($path); // Detect attempts to load the same library multiple times from different // locations. This might mean you're doing something silly like trying to // include two different versions of something, or it might mean you're // doing something subtle like running a different version of 'arc' on a // working copy of Arcanist. if (isset($this->registeredLibraries[$name])) { $old_path = $this->registeredLibraries[$name]; if ($old_path != $path) { throw new PhutilLibraryConflictException($name, $old_path, $path); } } $this->registeredLibraries[$name] = $path; // For libphutil v2 libraries, load all functions when we load the library. if (!class_exists('PhutilSymbolLoader', false)) { $root = $this->getLibraryRoot('phutil'); $this->executeInclude($root.'/symbols/PhutilSymbolLoader.php'); } $loader = new PhutilSymbolLoader(); $loader ->setLibrary($name) ->setType('function'); try { $loader->selectAndLoadSymbols(); } catch (PhutilBootloaderException $ex) { // Ignore this, it happens if a global function's file is removed or // similar. Worst case is that we fatal when calling the function, which // is no worse than fataling here. } catch (PhutilMissingSymbolException $ex) { // Ignore this, it happens if a global function is removed. Everything // else loaded so proceed forward: worst case is a fatal when we // hit a function call to a function which no longer exists, which is // no worse than fataling here. } if (empty($_SERVER['PHUTIL_DISABLE_RUNTIME_EXTENSIONS'])) { $extdir = $path.DIRECTORY_SEPARATOR.'extensions'; if (Filesystem::pathExists($extdir)) { $extensions = id(new FileFinder($extdir)) ->withSuffix('php') ->withType('f') ->withFollowSymlinks(true) ->setForceMode('php') ->find(); foreach ($extensions as $extension) { $this->loadExtension( $name, $path, $extdir.DIRECTORY_SEPARATOR.$extension); } } } return $this; } public function registerLibraryMap(array $map) { $this->libraryMaps[$this->currentLibrary] = $map; return $this; } public function getLibraryMap($name) { if (empty($this->libraryMaps[$name])) { $root = $this->getLibraryRoot($name); $this->currentLibrary = $name; $okay = include $root.'/__phutil_library_map__.php'; if (!$okay) { throw new PhutilBootloaderException( "Include of '{$root}/__phutil_library_map__.php' failed!"); } $map = $this->libraryMaps[$name]; // NOTE: We can't use "idx()" here because it may not be loaded yet. $version = isset($map['__library_version__']) ? $map['__library_version__'] : 1; switch ($version) { case 1: throw new Exception( 'libphutil v1 libraries are no longer supported.'); case 2: // NOTE: In version 2 of the library format, all parents (both // classes and interfaces) are stored in the 'xmap'. The value is // either a string for a single parent (the common case) or an array // for multiple parents. foreach ($map['xmap'] as $child => $parents) { foreach ((array)$parents as $parent) { $this->classTree[$parent][] = $child; } } break; default: throw new Exception("Unsupported library version '{$version}'!"); } } $map = $this->libraryMaps[$name]; // If there's an extension map for this library, merge the maps. if (isset($this->extensionMaps[$name])) { $emap = $this->extensionMaps[$name]; foreach (array('function', 'class', 'xmap') as $dict_key) { if (!isset($emap[$dict_key])) { continue; } $map[$dict_key] += $emap[$dict_key]; } } return $map; } public function getLibraryMapWithoutExtensions($name) { // This just does all the checks to make sure the library is valid, then // we throw away the result. $this->getLibraryMap($name); return $this->libraryMaps[$name]; } public function getLibraryRoot($name) { if (empty($this->registeredLibraries[$name])) { throw new PhutilBootloaderException( "The phutil library '{$name}' has not been loaded!"); } return $this->registeredLibraries[$name]; } public function getAllLibraries() { return array_keys($this->registeredLibraries); } public function loadLibrary($path) { $root = null; if (!empty($_SERVER['PHUTIL_LIBRARY_ROOT'])) { if ($path[0] != '/') { $root = $_SERVER['PHUTIL_LIBRARY_ROOT']; } } $okay = $this->executeInclude($root.$path.'/__phutil_library_init__.php'); if (!$okay) { throw new PhutilBootloaderException( "Include of '{$path}/__phutil_library_init__.php' failed!"); } } public function loadLibrarySource($library, $source) { $path = $this->getLibraryRoot($library).'/'.$source; $okay = $this->executeInclude($path); if (!$okay) { throw new PhutilBootloaderException("Include of '{$path}' failed!"); } } private function executeInclude($path) { // Suppress warning spew if the file does not exist; we'll throw an // exception instead. We still emit error text in the case of syntax errors. $old = error_reporting(E_ALL & ~E_WARNING); $okay = include_once $path; error_reporting($old); return $okay; } private function loadExtension($library, $root, $path) { $old_functions = get_defined_functions(); $old_functions = array_fill_keys($old_functions['user'], true); $old_classes = array_fill_keys(get_declared_classes(), true); $old_interfaces = array_fill_keys(get_declared_interfaces(), true); $ok = $this->executeInclude($path); if (!$ok) { throw new PhutilBootloaderException( "Include of extension file '{$path}' failed!"); } $new_functions = get_defined_functions(); $new_functions = array_fill_keys($new_functions['user'], true); $new_classes = array_fill_keys(get_declared_classes(), true); $new_interfaces = array_fill_keys(get_declared_interfaces(), true); $add_functions = array_diff_key($new_functions, $old_functions); $add_classes = array_diff_key($new_classes, $old_classes); $add_interfaces = array_diff_key($new_interfaces, $old_interfaces); // NOTE: We can't trust the path we loaded to be the location of these // symbols, because it might have loaded other paths. foreach ($add_functions as $func => $ignored) { $rfunc = new ReflectionFunction($func); $fpath = Filesystem::resolvePath($rfunc->getFileName(), $root); $this->extensionMaps[$library]['function'][$func] = $fpath; } foreach ($add_classes + $add_interfaces as $class => $ignored) { $rclass = new ReflectionClass($class); $cpath = Filesystem::resolvePath($rclass->getFileName(), $root); $this->extensionMaps[$library]['class'][$class] = $cpath; $xmap = $rclass->getInterfaceNames(); $parent = $rclass->getParentClass(); if ($parent) { $xmap[] = $parent->getName(); } if ($xmap) { foreach ($xmap as $parent_class) { $this->classTree[$parent_class][] = $class; } if (count($xmap) == 1) { $xmap = head($xmap); } $this->extensionMaps[$library]['xmap'][$class] = $xmap; } } } } diff --git a/src/moduleutils/PhutilBootloaderException.php b/src/moduleutils/PhutilBootloaderException.php index a81f704..e9a7416 100644 --- a/src/moduleutils/PhutilBootloaderException.php +++ b/src/moduleutils/PhutilBootloaderException.php @@ -1,6 +1,3 @@ library = $library; $this->oldPath = $old_path; $this->newPath = $new_path; $message = "Library conflict! The library '{$library}' has already been ". "loaded (from '{$old_path}') but is now being loaded again ". "from a new location ('{$new_path}'). You can not load ". "multiple copies of the same library into a program."; parent::__construct($message); } /** * Retrieve the name of the library in conflict. * * @return string The name of the library which conflicts with an existing * library. * @task info */ public function getLibrary() { return $this->library; } /** * Get the path to the library which has already been loaded earlier in the * program's execution. * * @return string The path of the already-loaded library. * @task info */ public function getOldPath() { return $this->oldPath; } /** * Get the path to the library which is causing this conflict. * * @return string The path of the attempting-to-load library. * @task info */ public function getNewPath() { return $this->newPath; } + } diff --git a/src/moduleutils/core.php b/src/moduleutils/core.php index 49c5990..288d07a 100644 --- a/src/moduleutils/core.php +++ b/src/moduleutils/core.php @@ -1,22 +1,13 @@ registerLibrary($library, $path); } -/** - * @group library - */ function phutil_register_library_map(array $map) { PhutilBootloader::getInstance()->registerLibraryMap($map); } -/** - * @group library - */ function phutil_load_library($path) { PhutilBootloader::getInstance()->loadLibrary($path); } diff --git a/src/parser/PhutilDocblockParser.php b/src/parser/PhutilDocblockParser.php index b7460fd..fa089c6 100644 --- a/src/parser/PhutilDocblockParser.php +++ b/src/parser/PhutilDocblockParser.php @@ -1,145 +1,143 @@ line number. $map = array(); $lines = explode("\n", $text); $num = 1; foreach ($lines as $line) { $len = strlen($line) + 1; for ($jj = 0; $jj < $len; $jj++) { $map[] = $num; } ++$num; } foreach ($matches[0] as $match) { list($data, $offset) = $match; $blocks[] = array($data, $map[$offset]); } return $blocks; } public function parse($docblock) { - // Strip off comments. $docblock = trim($docblock); $docblock = preg_replace('@^/\*\*@', '', $docblock); $docblock = preg_replace('@\*/$@', '', $docblock); $docblock = preg_replace('@^\s*\*@m', '', $docblock); // Normalize multi-line @specials. $lines = explode("\n", $docblock); $last = false; foreach ($lines as $k => $line) { // NOTE: We allow "@specials" to be preceded by up to two whitespace // characters; more than that and we assume the block is a code block. // Broadly, there's ambiguity between a special like: // // <... lots of indentation ...> @author alincoln // // ...and a code block like: // // <... lots of indentation ...> @def square(x, y): // // Because standard practice is to indent the entire block one level, // we allow that and one additional space before assuming something is // a code block. if (preg_match('/^\s{0,2}@\w/i', $line)) { $last = $k; $lines[$last] = trim($line); } else if (preg_match('/^\s*$/', $line)) { $last = false; } else if ($last !== false) { $lines[$last] = $lines[$last].' '.trim($line); unset($lines[$k]); } } $docblock = implode("\n", $lines); $special = array(); // Parse @specials. $matches = null; $have_specials = preg_match_all( '/^@([\w-]+)[ \t]*([^\n]*)/m', $docblock, $matches, PREG_SET_ORDER); if ($have_specials) { $docblock = preg_replace( '/^@([\w-]+)[ \t]*([^\n]*)?\n*/m', '', $docblock); foreach ($matches as $match) { list($_, $type, $data) = $match; $data = trim($data); if (isset($special[$type])) { $special[$type] = $special[$type]."\n".$data; } else { $special[$type] = $data; } } } // For flags like "@stable" which don't have any string data, set the value // to true. foreach ($special as $type => $data) { if (!strlen(trim($data))) { $special[$type] = true; } } $docblock = str_replace("\t", ' ', $docblock); // Smush the whole docblock to the left edge. $min_indent = 80; $indent = 0; foreach (array_filter(explode("\n", $docblock)) as $line) { for ($ii = 0; $ii < strlen($line); $ii++) { if ($line[$ii] != ' ') { break; } $indent++; } $min_indent = min($indent, $min_indent); } $docblock = preg_replace( '/^'.str_repeat(' ', $min_indent).'/m', '', $docblock); $docblock = rtrim($docblock); // Trim any empty lines off the front, but leave the indent level if there // is one. $docblock = preg_replace('/^\s*\n/', '', $docblock); return array($docblock, $special); } + } diff --git a/src/parser/__tests__/PhutilDocblockParserTestCase.php b/src/parser/__tests__/PhutilDocblockParserTestCase.php index cb51eb0..1ce588e 100644 --- a/src/parser/__tests__/PhutilDocblockParserTestCase.php +++ b/src/parser/__tests__/PhutilDocblockParserTestCase.php @@ -1,119 +1,118 @@ parseDocblock($root.$file); } } private function parseDocblock($doc_file) { $contents = Filesystem::readFile($doc_file); $file = basename($doc_file); $parser = new PhutilDocblockParser(); list($docblock, $specials) = $parser->parse($contents); switch ($file) { case 'embedded-specials.docblock': $this->assertEqual(array(), $specials); $this->assertEqual( "So long as a @special does not appear at the beginning of a line,\n". "it is parsed as normal text.", $docblock); break; case 'indented-block.docblock': $this->assertEqual(array(), $specials); $this->assertEqual( 'Cozy lummox gives smart squid who asks for job pen.', $docblock); break; case 'indented-text.docblock': $this->assertEqual(array(), $specials); $this->assertEqual( 'Cozy lummox gives smart squid who asks for job pen.', $docblock); break; case 'multiline-special.docblock': $this->assertEqual( array( 'special' => 'x y z', ), $specials); $this->assertEqual( '', $docblock); break; case 'multi-specials.docblock': $this->assertEqual( array( 'special' => "north\nsouth", ), $specials); $this->assertEqual( '', $docblock); break; case 'specials.docblock': $this->assertEqual( array( 'type' => 'type', 'task' => 'task', ), $specials); $this->assertEqual( '', $docblock); break; case 'linebreak-breaks-specials.docblock': $this->assertEqual( array( 'title' => 'title', ), $specials); $this->assertEqual( 'This is normal text, not part of the @title.', $docblock); break; case 'specials-with-hyphen.docblock': $this->assertEqual( array( 'repeat-hyphen' => "a\nb", 'multiline-hyphen' => 'mmm nnn', 'normal-hyphen' => 'x', ), $specials); break; case 'indented-specials.docblock': $this->assertEqual( array( 'title' => 'sendmail', 'special' => 'only a little bit indented', ), $specials); break; case 'flag-specials.docblock': $this->assertEqual( "stuff above\n\nstuff in the middle\n\nstuff below", $docblock); $this->assertEqual( array( 'flag' => true, 'stuff' => true, 'zebra' => true, 'apple' => true, ), $specials); break; default: throw new Exception("No test case to handle file '{$file}'!"); } } + } diff --git a/src/parser/__tests__/PhutilEmailAddressTestCase.php b/src/parser/__tests__/PhutilEmailAddressTestCase.php index 9c40600..b052098 100644 --- a/src/parser/__tests__/PhutilEmailAddressTestCase.php +++ b/src/parser/__tests__/PhutilEmailAddressTestCase.php @@ -1,97 +1,94 @@ '); $this->assertEqual( 'Abraham Lincoln', $email->getDisplayName()); $this->assertEqual( 'alincoln', $email->getLocalPart()); $this->assertEqual( 'logcabin.com', $email->getDomainName()); $this->assertEqual( 'alincoln@logcabin.com', $email->getAddress()); $email = new PhutilEmailAddress('alincoln@logcabin.com'); $this->assertEqual( null, $email->getDisplayName()); $this->assertEqual( 'alincoln', $email->getLocalPart()); $this->assertEqual( 'logcabin.com', $email->getDomainName()); $this->assertEqual( 'alincoln@logcabin.com', $email->getAddress()); $email = new PhutilEmailAddress('"Abraham" '); $this->assertEqual( 'Abraham', $email->getDisplayName()); $this->assertEqual( 'alincoln', $email->getLocalPart()); $this->assertEqual( 'logcabin.com', $email->getDomainName()); $this->assertEqual( 'alincoln@logcabin.com', $email->getAddress()); $email = new PhutilEmailAddress(' alincoln@logcabin.com '); $this->assertEqual( null, $email->getDisplayName()); $this->assertEqual( 'alincoln', $email->getLocalPart()); $this->assertEqual( 'logcabin.com', $email->getDomainName()); $this->assertEqual( 'alincoln@logcabin.com', $email->getAddress()); $email = new PhutilEmailAddress('alincoln'); $this->assertEqual( null, $email->getDisplayName()); $this->assertEqual( 'alincoln', $email->getLocalPart()); $this->assertEqual( null, $email->getDomainName()); $this->assertEqual( 'alincoln', $email->getAddress()); $email = new PhutilEmailAddress('alincoln '); $this->assertEqual( 'alincoln', $email->getDisplayName()); $this->assertEqual( 'alincoln at logcabin dot com', $email->getLocalPart()); $this->assertEqual( null, $email->getDomainName()); $this->assertEqual( 'alincoln at logcabin dot com', $email->getAddress()); - } } diff --git a/src/parser/__tests__/PhutilJSONTestCase.php b/src/parser/__tests__/PhutilJSONTestCase.php index f6f1479..132692d 100644 --- a/src/parser/__tests__/PhutilJSONTestCase.php +++ b/src/parser/__tests__/PhutilJSONTestCase.php @@ -1,24 +1,21 @@ assertEqual( $expect, $serializer->encodeFormatted(array('x' => array())), 'Empty arrays should serialize as [], not {}.'); } } diff --git a/src/parser/__tests__/PhutilLanguageGuesserTestCase.php b/src/parser/__tests__/PhutilLanguageGuesserTestCase.php index 96ff4fe..e247d2f 100644 --- a/src/parser/__tests__/PhutilLanguageGuesserTestCase.php +++ b/src/parser/__tests__/PhutilLanguageGuesserTestCase.php @@ -1,26 +1,23 @@ assertEqual( $expect, PhutilLanguageGuesser::guessLanguage($source), "Guessed language for '{$test}'."); } } } diff --git a/src/parser/__tests__/PhutilQueryStringParserTestCase.php b/src/parser/__tests__/PhutilQueryStringParserTestCase.php index e0f38e7..fff19f6 100644 --- a/src/parser/__tests__/PhutilQueryStringParserTestCase.php +++ b/src/parser/__tests__/PhutilQueryStringParserTestCase.php @@ -1,143 +1,140 @@ array(), 'foo=bar&foobar=barfoo' => array( 'foo' => 'bar', 'foobar' => 'barfoo', ), 'a]b[]=1&a]=2&a[[]=3&a[b]b=4&[][a]=5' => array( 'a]b[]' => '1', 'a]' => '2', 'a[[]' => '3', 'a[b]b' => '4', '[][a]' => '5', ), 'foo[][]=bar&bar[1][3]=foo' => array( 'foo' => array( 0 => array( 0 => 'bar', ), ), 'bar' => array( 1 => array( 3 => 'foo', ), ), ), 'foo[][]=bar&a]b[]=1' => array( 'foo' => array( 0 => array( 0 => 'bar', ), ), 'a]b[]' => '1', ), 'a&&b' => array( 'a' => '', 'b' => '', ), 'a[b][]=foo&a[b][]=bar' => array( 'a' => array( 'b' => array( 0 => 'foo', 1 => 'bar', ), ), ), 'a=1&a=2' => array( 'a' => '2', ), 'a=1&a[]=2' => array( 'a' => array( 0 => '2', ), ), 'a=1&a[b]=2&a[]=3' => array( 'a' => array( 'b' => '2', 0 => '3', ), ), 'a%20b=%20' => array( 'a b' => ' ', ), 'a.b=c' => array( 'a.b' => 'c', ), 'a=b=c' => array( 'a' => 'b=c', ), ); $parser = new PhutilQueryStringParser(); foreach ($map as $query_string => $expected) { $this->assertEqual( $expected, $parser->parseQueryString($query_string)); } } public function testQueryStringListParsing() { $map = array( '' => array(), '&' => array(), '=' => array( array('', ''), ), '=&' => array( array('', ''), ), 'a=b' => array( array('a', 'b'), ), 'a[]=b' => array( array('a[]', 'b'), ), 'a=' => array( array('a', ''), ), '. [=1' => array( array('. [', '1'), ), 'a=b&c=d' => array( array('a', 'b'), array('c', 'd'), ), 'a=b&a=c' => array( array('a', 'b'), array('a', 'c'), ), '&a=b&' => array( array('a', 'b'), ), '=a' => array( array('', 'a'), ), '&&&' => array( ), 'a%20b=c%20d' => array( array('a b', 'c d'), ), ); $parser = new PhutilQueryStringParser(); foreach ($map as $query_string => $expected) { $this->assertEqual( $expected, $parser->parseQueryStringToPairList($query_string)); } } - } diff --git a/src/parser/__tests__/PhutilSimpleOptionsTestCase.php b/src/parser/__tests__/PhutilSimpleOptionsTestCase.php index 95c8df5..cb88e0d 100644 --- a/src/parser/__tests__/PhutilSimpleOptionsTestCase.php +++ b/src/parser/__tests__/PhutilSimpleOptionsTestCase.php @@ -1,146 +1,143 @@ array(), // Basic parsing. 'legs=4' => array('legs' => '4'), 'legs=4,eyes=2' => array('legs' => '4', 'eyes' => '2'), // Repeated keys mean last specification wins. 'legs=4,legs=3' => array('legs' => '3'), // Keys with no value should map to true. 'flag' => array('flag' => true), 'legs=4,flag' => array('legs' => '4', 'flag' => true), // Leading and trailing spaces should be ignored. ' flag ' => array('flag' => true), ' legs = 4 , eyes = 2' => array('legs' => '4', 'eyes' => '2'), // Unescaped spaces inside values are OK. 'legs=a b c d' => array('legs' => 'a b c d'), // Case should be ignored. 'LEGS=4' => array('legs' => '4'), 'legs=4, LEGS=4' => array('legs' => '4'), // Empty values should be absent. 'legs=' => array(), 'legs=4,legs=,eyes=2' => array('eyes' => '2'), // Quoted values should allow parsing comma, equals, etc. 'punctuation=",="' => array('punctuation' => ',='), // Quoted keys can also have that stuff. '"backslash\\\\quote\\""=1' => array('backslash\\quote"' => '1'), ' "," = "," , "=" = "=" ' => array(',' => ',', '=' => '='), // Strings like this should not parse as simpleoptions. 'SELECT id, name, size FROM table' => array(), '"a""b"' => array(), '=a' => array(), ',a' => array(), 'a==' => array(), 'a=b=' => array(), ); foreach ($map as $string => $expect) { $parser = new PhutilSimpleOptions(); $this->assertEqual( $expect, $parser->parse($string), "Correct parse of '{$string}'"); } } public function testSimpleOptionsCaseParse() { $map = array( 'legs=4, LEGS=8, LeGs' => array( 'legs' => '4', 'LEGS' => '8', 'LeGs' => true, ), ); foreach ($map as $string => $expect) { $parser = new PhutilSimpleOptions(); $parser->setCaseSensitive(true); $this->assertEqual( $expect, $parser->parse($string), "Correct case-sensitive parse of '{$string}'"); } } public function testSimpleOptionsUnterminatedStrings() { $list = array( '"', "'", 'a="', "a='", 'a="\\', "a='\\", ); foreach ($list as $input) { $parser = new PhutilSimpleOptions(); $this->assertEqual( array(), $parser->parse($input), "Correct failing parse of invalid input: {$input}"); } } public function testSimpleOptionsUnparse() { $map = array( '' => array(), 'legs=4' => array('legs' => '4'), 'legs=4, eyes=2' => array('legs' => '4', 'eyes' => '2'), 'eyes=2, legs=4' => array('eyes' => '2', 'legs' => '4'), 'legs=4, head' => array('legs' => '4', 'head' => true), 'eyes=2' => array('legs' => '', 'eyes' => '2'), '"thousands separator"=","' => array('thousands separator' => ','), ); foreach ($map as $expect => $dict) { $parser = new PhutilSimpleOptions(); $this->assertEqual( $expect, $parser->unparse($dict), 'Correct unparse of '.print_r($dict, true)); } $bogus = array( array('' => ''), array('' => 'x'), ); foreach ($bogus as $bad_input) { $caught = null; try { $parser = new PhutilSimpleOptions(); $parser->unparse($bad_input); } catch (Exception $ex) { $caught = $ex; } $this->assertTrue( $caught instanceof Exception, 'Correct throw on unparse of bad input.'); } $parser = new PhutilSimpleOptions(); $this->assertEqual( 'a="\\}"', $parser->unparse(array('a' => '}'), '}'), 'Unparse with extra escape.'); } } diff --git a/src/parser/__tests__/PhutilURITestCase.php b/src/parser/__tests__/PhutilURITestCase.php index a6f94ed..49272cc 100644 --- a/src/parser/__tests__/PhutilURITestCase.php +++ b/src/parser/__tests__/PhutilURITestCase.php @@ -1,104 +1,102 @@ assertEqual('http', $uri->getProtocol(), 'protocol'); $this->assertEqual('user', $uri->getUser(), 'user'); $this->assertEqual('pass', $uri->getPass(), 'pass'); $this->assertEqual('host', $uri->getDomain(), 'domain'); $this->assertEqual('99', $uri->getPort(), 'port'); $this->assertEqual('/path/', $uri->getPath(), 'path'); $this->assertEqual( array( 'query' => 'value', ), $uri->getQueryParams(), 'query params'); $this->assertEqual('fragment', $uri->getFragment(), 'fragment'); $this->assertEqual( 'http://user:pass@host:99/path/?query=value#fragment', (string)$uri, 'uri'); $uri = new PhutilURI('ssh://git@example.com/example/example.git'); $this->assertEqual('ssh', $uri->getProtocol(), 'protocol'); $this->assertEqual('git', $uri->getUser(), 'user'); $this->assertEqual('', $uri->getPass(), 'pass'); $this->assertEqual('example.com', $uri->getDomain(), 'domain'); $this->assertEqual('', $uri->getPort(), 'port'); $this->assertEqual('/example/example.git', $uri->getPath(), 'path'); $this->assertEqual(array(), $uri->getQueryParams(), 'query params'); $this->assertEqual('', $uri->getFragment(), 'fragment'); $this->assertEqual( 'ssh://git@example.com/example/example.git', (string)$uri, 'uri'); $uri = new PhutilURI('http://0@domain.com/'); $this->assertEqual('0', $uri->getUser()); $this->assertEqual('http://0@domain.com/', (string)$uri); $uri = new PhutilURI('http://0:0@domain.com/'); $this->assertEqual('0', $uri->getUser()); $this->assertEqual('0', $uri->getPass()); $this->assertEqual('http://0:0@domain.com/', (string)$uri); $uri = new PhutilURI('http://%20:%20@domain.com/'); $this->assertEqual(' ', $uri->getUser()); $this->assertEqual(' ', $uri->getPass()); $this->assertEqual('http://%20:%20@domain.com/', (string)$uri); $uri = new PhutilURI('http://%40:%40@domain.com/'); $this->assertEqual('@', $uri->getUser()); $this->assertEqual('@', $uri->getPass()); $this->assertEqual('http://%40:%40@domain.com/', (string)$uri); } public function testURIGeneration() { $uri = new PhutilURI('http://example.com'); $uri->setPath('bar'); $this->assertEqual('http://example.com/bar', $uri->__toString()); } public function testStrictURIParsingOfHosts() { $uri = new PhutilURI('http://&/'); $this->assertEqual('', $uri->getDomain()); } public function testStrictURIParsingOfLeadingWhitespace() { $uri = new PhutilURI(' http://example.com/'); $this->assertEqual('', $uri->getDomain()); } public function testAppendPath() { $uri = new PhutilURI('http://example.com'); $uri->appendPath('foo'); $this->assertEqual('http://example.com/foo', $uri->__toString()); $uri->appendPath('bar'); $this->assertEqual('http://example.com/foo/bar', $uri->__toString()); $uri = new PhutilURI('http://example.com'); $uri->appendPath('/foo/'); $this->assertEqual('http://example.com/foo/', $uri->__toString()); $uri->appendPath('/bar/'); $this->assertEqual('http://example.com/foo/bar/', $uri->__toString()); $uri = new PhutilURI('http://example.com'); $uri->appendPath('foo'); $this->assertEqual('http://example.com/foo', $uri->__toString()); $uri->appendPath('/bar/'); $this->assertEqual('http://example.com/foo/bar/', $uri->__toString()); } } diff --git a/src/parser/aast/api/AASTNode.php b/src/parser/aast/api/AASTNode.php index 31def42..a7f7738 100644 --- a/src/parser/aast/api/AASTNode.php +++ b/src/parser/aast/api/AASTNode.php @@ -1,267 +1,265 @@ id = $id; $this->typeID = $data[0]; if (isset($data[1])) { $this->l = $data[1]; } else { $this->l = -1; } if (isset($data[2])) { $this->r = $data[2]; } else { $this->r = -1; } $this->tree = $tree; } public function getParentNode() { return $this->parentNode; } public function getID() { return $this->id; } public function getTypeID() { return $this->typeID; } public function getTypeName() { if (empty($this->typeName)) { $this->typeName = $this->tree->getNodeTypeNameFromTypeID($this->getTypeID()); } return $this->typeName; } public function getChildren() { return $this->children; } public function getChildOfType($index, $type) { $child = $this->getChildByIndex($index); if ($child->getTypeName() != $type) { throw new Exception( "Child in position '{$index}' is not of type '{$type}': ". $this->getDescription()); } return $child; } public function getChildByIndex($index) { // NOTE: Microoptimization to avoid calls like array_values() or idx(). $idx = 0; foreach ($this->children as $child) { if ($idx == $index) { return $child; } ++$idx; } throw new Exception("No child with index '{$index}'."); } /** - * Build a cache to improve the performance of selectDescendantsOfType(). This - * cache makes a time/memory tradeoff by aggressively caching node - * descendants. It may improve the tree's query performance substantially if - * you make a large number of queries, but also requires a significant amount - * of memory. + * Build a cache to improve the performance of + * @{method:selectDescendantsOfType}. This cache makes a time/memory tradeoff + * by aggressively caching node descendants. It may improve the tree's query + * performance substantially if you make a large number of queries, but also + * requires a significant amount of memory. * * This builds a cache for the entire tree and improves performance of all - * selectDescendantsOfType() calls. + * @{method:selectDescendantsOfType} calls. */ public function buildSelectCache() { $cache = array(); foreach ($this->getChildren() as $id => $child) { $type_id = $child->getTypeID(); if (empty($cache[$type_id])) { $cache[$type_id] = array(); } $cache[$type_id][$id] = $child; foreach ($child->buildSelectCache() as $type_id => $nodes) { if (empty($cache[$type_id])) { $cache[$type_id] = array(); } $cache[$type_id] += $nodes; } } $this->selectCache = $cache; return $this->selectCache; } /** - * Build a cache to improve the performance of selectTokensOfType(). This - * cache makes a time/memory tradeoff by aggressively caching token types. - * It may improve the tree's query performance substantially if you make a - * large enumber of queries, but also requires a signficant amount of memory. + * Build a cache to improve the performance of @{method:selectTokensOfType}. + * This cache makes a time/memory tradeoff by aggressively caching token + * types. It may improve the tree's query performance substantially if you + * make a large number of queries, but also requires a signficant amount of + * memory. * * This builds a cache for this node only. */ public function buildTokenCache() { $cache = array(); foreach ($this->getTokens() as $id => $token) { $cache[$token->getTypeName()][$id] = $token; } $this->tokenCache = $cache; return $this->tokenCache; } /** * Select all tokens of a given type. */ public function selectTokensOfType($type_name) { if (isset($this->tokenCache)) { return idx($this->tokenCache, $type_name, array()); } else { $result = array(); foreach ($this->getTokens() as $id => $token) { if ($token->getTypeName() == $type_name) { $result[$id] = $token; } } return $result; } } public function selectDescendantsOfType($type_name) { return $this->selectDescendantsOfTypes(array($type_name)); } public function selectDescendantsOfTypes(array $type_names) { $nodes = array(); foreach ($type_names as $type_name) { $type = $this->getTypeIDFromTypeName($type_name); if (isset($this->selectCache)) { if (isset($this->selectCache[$type])) { $nodes = $nodes + $this->selectCache[$type]; } } else { $nodes = $nodes + $this->executeSelectDescendantsOfType($this, $type); } } return AASTNodeList::newFromTreeAndNodes($this->tree, $nodes); } protected function executeSelectDescendantsOfType($node, $type) { $results = array(); foreach ($node->getChildren() as $id => $child) { if ($child->getTypeID() == $type) { $results[$id] = $child; } $results += $this->executeSelectDescendantsOfType($child, $type); } return $results; } public function getTokens() { if ($this->l == -1 || $this->r == -1) { return array(); } $tokens = $this->tree->getRawTokenStream(); $result = array(); foreach (range($this->l, $this->r) as $token_id) { $result[$token_id] = $tokens[$token_id]; } return $result; } public function getConcreteString() { $values = array(); foreach ($this->getTokens() as $token) { $values[] = $token->getValue(); } return implode('', $values); } public function getSemanticString() { $tokens = $this->getTokens(); foreach ($tokens as $id => $token) { if ($token->isComment()) { unset($tokens[$id]); } } return implode('', mpull($tokens, 'getValue')); } public function getDescription() { $concrete = $this->getConcreteString(); if (strlen($concrete) > 75) { $concrete = substr($concrete, 0, 36).'...'.substr($concrete, -36); } $concrete = addcslashes($concrete, "\\\n\""); return 'a node of type '.$this->getTypeName().': "'.$concrete.'"'; } protected function getTypeIDFromTypeName($type_name) { return $this->tree->getNodeTypeIDFromTypeName($type_name); } public function getOffset() { $stream = $this->tree->getRawTokenStream(); if (empty($stream[$this->l])) { return null; } return $stream[$this->l]->getOffset(); } public function getSurroundingNonsemanticTokens() { $before = array(); $after = array(); $tokens = $this->tree->getRawTokenStream(); if ($this->l != -1) { $before = $tokens[$this->l]->getNonsemanticTokensBefore(); } if ($this->r != -1) { $after = $tokens[$this->r]->getNonsemanticTokensAfter(); } return array($before, $after); } public function getLineNumber() { return idx($this->tree->getOffsetToLineNumberMap(), $this->getOffset()); } public function dispose() { foreach ($this->getChildren() as $child) { $child->dispose(); } unset($this->selectCache); } } diff --git a/src/parser/aast/api/AASTNodeList.php b/src/parser/aast/api/AASTNodeList.php index 145b49b..25e8240 100644 --- a/src/parser/aast/api/AASTNodeList.php +++ b/src/parser/aast/api/AASTNodeList.php @@ -1,140 +1,137 @@ ids); } public function current() { return $this->list[$this->key()]; } public function rewind() { $this->pos = 0; } public function valid() { return $this->pos < count($this->ids); } public function next() { $this->pos++; } public function key() { return $this->ids[$this->pos]; } public static function newFromTreeAndNodes(AASTTree $tree, array $nodes) { assert_instances_of($nodes, 'AASTNode'); $obj = new AASTNodeList(); $obj->tree = $tree; $obj->list = $nodes; $obj->ids = array_keys($nodes); return $obj; } public static function newFromTree(AASTTree $tree) { $obj = new AASTNodeList(); $obj->tree = $tree; $obj->list = array(0 => $tree->getRootNode()); $obj->ids = array(0 => 0); return $obj; } protected function __construct() { } public function getDescription() { if (empty($this->list)) { return 'an empty node list'; } $desc = array(); $desc[] = 'a list of '.count($this->list).' nodes:'; foreach ($this->list as $node) { $desc[] = ' '.$node->getDescription().';'; } return implode("\n", $desc); } protected function newList(array $nodes) { return AASTNodeList::newFromTreeAndNodes( $this->tree, $nodes); } public function selectDescendantsOfType($type_name) { $results = array(); foreach ($this->list as $id => $node) { $results += $node->selectDescendantsOfType($type_name)->getRawNodes(); } return $this->newList($results); } public function selectDescendantsOfTypes(array $type_names) { $results = array(); foreach ($type_names as $type_name) { foreach ($this->list as $id => $node) { $results += $node->selectDescendantsOfType($type_name)->getRawNodes(); } } return $this->newList($results); } public function getChildrenByIndex($index) { $results = array(); foreach ($this->list as $id => $node) { $child = $node->getChildByIndex($index); $results[$child->getID()] = $child; } return $this->newList($results); } public function add(AASTNodeList $list) { foreach ($list->list as $id => $node) { $this->list[$id] = $node; } $this->ids = array_keys($this->list); return $this; } protected function executeSelectDescendantsOfType($node, $type) { $results = array(); foreach ($node->getChildren() as $id => $child) { if ($child->getTypeID() == $type) { $results[$id] = $child; } else { $results += $this->executeSelectDescendantsOfType($child, $type); } } return $results; } public function getTokens() { $tokens = array(); foreach ($this->list as $node) { $tokens += $node->getTokens(); } return $tokens; } public function getRawNodes() { return $this->list; } } diff --git a/src/parser/aast/api/AASTToken.php b/src/parser/aast/api/AASTToken.php index c30e429..9a21c28 100644 --- a/src/parser/aast/api/AASTToken.php +++ b/src/parser/aast/api/AASTToken.php @@ -1,85 +1,81 @@ id = $id; $this->typeID = $type; $this->offset = $offset; $this->value = $value; $this->tree = $tree; } public function getTokenID() { return $this->id; } public function getTypeID() { return $this->typeID; } public function getTypeName() { if (empty($this->typeName)) { $this->typeName = $this->tree->getTokenTypeNameFromTypeID($this->typeID); } return $this->typeName; } public function getValue() { return $this->value; } public function getOffset() { return $this->offset; } abstract public function isComment(); abstract public function isAnyWhitespace(); public function isSemantic() { return !($this->isComment() || $this->isAnyWhitespace()); } public function getPrevToken() { $tokens = $this->tree->getRawTokenStream(); return idx($tokens, $this->id - 1); } public function getNextToken() { $tokens = $this->tree->getRawTokenStream(); return idx($tokens, $this->id + 1); } public function getNonsemanticTokensBefore() { $tokens = $this->tree->getRawTokenStream(); $result = array(); $ii = $this->id - 1; while ($ii >= 0 && !$tokens[$ii]->isSemantic()) { $result[$ii] = $tokens[$ii]; --$ii; } return array_reverse($result); } public function getNonsemanticTokensAfter() { $tokens = $this->tree->getRawTokenStream(); $result = array(); $ii = $this->id + 1; while ($ii < count($tokens) && !$tokens[$ii]->isSemantic()) { $result[$ii] = $tokens[$ii]; ++$ii; } return $result; } - } diff --git a/src/parser/aast/api/AASTTree.php b/src/parser/aast/api/AASTTree.php index 8b5c132..2ca8b1a 100644 --- a/src/parser/aast/api/AASTTree.php +++ b/src/parser/aast/api/AASTTree.php @@ -1,190 +1,188 @@ stream[$ii] = $this->newToken( $ii, $token[0], substr($source, $offset, $token[1]), $offset, $this); $offset += $token[1]; ++$ii; } $this->rawSource = $source; $this->buildTree(array($tree)); } public function setTreeType($description) { $this->treeType = $description; return $this; } public function getTreeType() { return $this->treeType; } public function setTokenConstants(array $token_map) { $this->tokenConstants = $token_map; $this->tokenReverseMap = array_flip($token_map); return $this; } public function setNodeConstants(array $node_map) { $this->nodeConstants = $node_map; $this->nodeReverseMap = array_flip($node_map); return $this; } public function getNodeTypeNameFromTypeID($type_id) { if (empty($this->nodeConstants[$type_id])) { $tree_type = $this->getTreeType(); throw new Exception( "No type name for node type ID '{$type_id}' in '{$tree_type}' AAST."); } return $this->nodeConstants[$type_id]; } public function getNodeTypeIDFromTypeName($type_name) { if (empty($this->nodeReverseMap[$type_name])) { $tree_type = $this->getTreeType(); throw new Exception( "No type ID for node type name '{$type_name}' in '{$tree_type}' AAST."); } return $this->nodeReverseMap[$type_name]; } public function getTokenTypeNameFromTypeID($type_id) { if (empty($this->tokenConstants[$type_id])) { $tree_type = $this->getTreeType(); throw new Exception( "No type name for token type ID '{$type_id}' ". "in '{$tree_type}' AAST."); } return $this->tokenConstants[$type_id]; } public function getTokenTypeIDFromTypeName($type_name) { if (empty($this->tokenReverseMap[$type_name])) { $tree_type = $this->getTreeType(); throw new Exception( "No type ID for token type name '{$type_name}' ". "in '{$tree_type}' AAST."); } return $this->tokenReverseMap[$type_name]; } /** * Unlink internal datastructures so that PHP's will garbage collect the tree. * This renders the object useless. * * @return void */ public function dispose() { $this->getRootNode()->dispose(); unset($this->tree); unset($this->stream); } public function getRootNode() { return $this->tree[0]; } protected function buildTree(array $tree) { $ii = count($this->tree); $nodes = array(); foreach ($tree as $node) { $this->tree[$ii] = $this->newNode($ii, $node, $this); $nodes[$ii] = $node; ++$ii; } foreach ($nodes as $node_id => $node) { if (isset($node[3])) { $children = $this->buildTree($node[3]); foreach ($children as $child) { $child->parentNode = $this->tree[$node_id]; } $this->tree[$node_id]->children = $children; } } $result = array(); foreach ($nodes as $key => $node) { $result[$key] = $this->tree[$key]; } return $result; } public function getRawTokenStream() { return $this->stream; } public function renderAsText() { return $this->executeRenderAsText(array($this->getRootNode()), 0); } protected function executeRenderAsText($list, $depth) { $return = ''; foreach ($list as $node) { if ($depth) { $return .= str_repeat(' ', $depth); } $return .= $node->getDescription()."\n"; $return .= $this->executeRenderAsText($node->getChildren(), $depth + 1); } return $return; } public function getOffsetToLineNumberMap() { if ($this->lineMap === null) { $src = $this->rawSource; $len = strlen($src); $lno = 1; $map = array(); for ($ii = 0; $ii < $len; ++$ii) { $map[$ii] = $lno; if ($src[$ii] == "\n") { ++$lno; } } $this->lineMap = $map; } return $this->lineMap; } } diff --git a/src/parser/argument/PhutilArgumentParser.php b/src/parser/argument/PhutilArgumentParser.php index 683ba77..b08fd30 100644 --- a/src/parser/argument/PhutilArgumentParser.php +++ b/src/parser/argument/PhutilArgumentParser.php @@ -1,794 +1,792 @@ setTagline('make an new dog') * $args->setSynopsis(<<parse( * array( * array( * 'name' => 'name', * 'param' => 'dogname', * 'default' => 'Rover', * 'help' => 'Set the dog\'s name. By default, the dog will be '. * 'named "Rover".', * ), * array( * 'name' => 'big', * 'short' => 'b', * 'help' => 'If set, create a large dog.', * ), * )); * * $dog_name = $args->getArg('name'); * $dog_size = $args->getArg('big') ? 'big' : 'small'; * * // ... etc ... * * (For detailed documentation on supported keys in argument specifications, * see @{class:PhutilArgumentSpecification}.) * * This will handle argument parsing, and generate appropriate usage help if * the user provides an unsupported flag. @{class:PhutilArgumentParser} also * supports some builtin "standard" arguments: * * $args->parseStandardArguments(); * * See @{method:parseStandardArguments} for details. Notably, this includes * a "--help" flag, and an "--xprofile" flag for profiling command-line scripts. * * Normally, when the parser encounters an unknown flag, it will exit with * an error. However, you can use @{method:parsePartial} to consume only a * set of flags: * * $args->parsePartial($spec_list); * * This allows you to parse some flags before making decisions about other * parsing, or share some flags across scripts. The builtin standard arguments * are implemented in this way. * * There is also builtin support for "workflows", which allow you to build a * script that operates in several modes (e.g., by accepting commands like * `install`, `upgrade`, etc), like `arc` does. For detailed documentation on * workflows, see @{class:PhutilArgumentWorkflow}. * * @task parse Parsing Arguments * @task read Reading Arguments * @task help Command Help * @task internal Internals - * - * @group console */ final class PhutilArgumentParser { private $bin; private $argv; private $specs = array(); private $results = array(); private $parsed; private $tagline; private $synopsis; private $workflows; private $showHelp; const PARSE_ERROR_CODE = 77; /* -( Parsing Arguments )-------------------------------------------------- */ /** * Build a new parser. Generally, you start a script with: * * $args = new PhutilArgumentParser($argv); * * @param list Argument vector to parse, generally the $argv global. * @task parse */ public function __construct(array $argv) { $this->bin = $argv[0]; $this->argv = array_slice($argv, 1); } /** * Parse and consume a list of arguments, removing them from the argument * vector but leaving unparsed arguments for later consumption. You can * retreive unconsumed arguments directly with * @{method:getUnconsumedArgumentVector}. Doing a partial parse can make it * easier to share common flags across scripts or workflows. * * @param list List of argument specs, see * @{class:PhutilArgumentSpecification}. * @return this * @task parse */ public function parsePartial(array $specs) { $specs = PhutilArgumentSpecification::newSpecsFromList($specs); $this->mergeSpecs($specs); $specs_by_name = mpull($specs, null, 'getName'); $specs_by_short = mpull($specs, null, 'getShortAlias'); unset($specs_by_short[null]); $argv = $this->argv; $len = count($argv); for ($ii = 0; $ii < $len; $ii++) { $arg = $argv[$ii]; $map = null; if ($arg == '--') { // This indicates "end of flags". break; } else if ($arg == '-') { // This is a normal argument (e.g., stdin). continue; } else if (!strncmp('--', $arg, 2)) { $pre = '--'; $arg = substr($arg, 2); $map = $specs_by_name; } else if (!strncmp('-', $arg, 1) && strlen($arg) > 1) { $pre = '-'; $arg = substr($arg, 1); $map = $specs_by_short; } if ($map) { $val = null; $parts = explode('=', $arg, 2); if (count($parts) == 2) { list($arg, $val) = $parts; } if (isset($map[$arg])) { $spec = $map[$arg]; unset($argv[$ii]); $param_name = $spec->getParamName(); if ($val !== null) { if ($param_name === null) { throw new PhutilArgumentUsageException( "Argument '{$pre}{$arg}' does not take a parameter."); } } else { if ($param_name !== null) { if ($ii + 1 < $len) { $val = $argv[$ii + 1]; unset($argv[$ii + 1]); $ii++; } else { throw new PhutilArgumentUsageException( "Argument '{$pre}{$arg}' requires a parameter."); } } else { $val = true; } } if (!$spec->getRepeatable()) { if (array_key_exists($spec->getName(), $this->results)) { throw new PhutilArgumentUsageException( "Argument '{$pre}{$arg}' was provided twice."); } } $conflicts = $spec->getConflicts(); foreach ($conflicts as $conflict => $reason) { if (array_key_exists($conflict, $this->results)) { if (!is_string($reason) || !strlen($reason)) { $reason = '.'; } else { $reason = ': '.$reason.'.'; } throw new PhutilArgumentUsageException( "Argument '{$pre}{$arg}' conflicts with argument ". "'--{$conflict}'{$reason}"); } } if ($spec->getRepeatable()) { if ($spec->getParamName() === null) { if (empty($this->results[$spec->getName()])) { $this->results[$spec->getName()] = 0; } $this->results[$spec->getName()]++; } else { $this->results[$spec->getName()][] = $val; } } else { $this->results[$spec->getName()] = $val; } } } } foreach ($specs as $spec) { if ($spec->getWildcard()) { $this->results[$spec->getName()] = $this->filterWildcardArgv($argv); $argv = array(); break; } } $this->argv = array_values($argv); return $this; } /** * Parse and consume a list of arguments, throwing an exception if there is * anything left unconsumed. This is like @{method:parsePartial}, but raises * a {class:PhutilArgumentUsageException} if there are leftovers. * * Normally, you would call @{method:parse} instead, which emits a * user-friendly error. You can also use @{method:printUsageException} to * render the exception in a user-friendly way. * * @param list List of argument specs, see * @{class:PhutilArgumentSpecification}. * @return this * @task parse */ public function parseFull(array $specs) { $this->parsePartial($specs); if (count($this->argv)) { $arg = head($this->argv); throw new PhutilArgumentUsageException( "Unrecognized argument '{$arg}'."); } if ($this->showHelp) { $this->printHelpAndExit(); } return $this; } /** * Parse and consume a list of arguments, raising a user-friendly error if * anything remains. See also @{method:parseFull} and @{method:parsePartial}. * * @param list List of argument specs, see * @{class:PhutilArgumentSpecification}. * @return this * @task parse */ public function parse(array $specs) { try { return $this->parseFull($specs); } catch (PhutilArgumentUsageException $ex) { $this->printUsageException($ex); exit(self::PARSE_ERROR_CODE); } } /** * Parse and execute workflows, raising a user-friendly error if anything * remains. See also @{method:parseWorkflowsFull}. * * See @{class:PhutilArgumentWorkflow} for details on using workflows. * * @param list List of argument specs, see * @{class:PhutilArgumentSpecification}. * @return this * @task parse */ public function parseWorkflows(array $workflows) { try { return $this->parseWorkflowsFull($workflows); } catch (PhutilArgumentUsageException $ex) { $this->printUsageException($ex); exit(self::PARSE_ERROR_CODE); } } /** * Select a workflow. For commands that may operate in several modes, like * `arc`, the modes can be split into "workflows". Each workflow specifies * the arguments it accepts. This method takes a list of workflows, selects * the chosen workflow, parses its arguments, and either executes it (if it * is executable) or returns it for handling. * * See @{class:PhutilArgumentWorkflow} for details on using workflows. * * @param list List of @{class:PhutilArgumentWorkflow}s. * @return PhutilArgumentWorkflow|no Returns the chosen workflow if it is * not executable, or executes it and * exits with a return code if it is. * @task parse */ public function parseWorkflowsFull(array $workflows) { assert_instances_of($workflows, 'PhutilArgumentWorkflow'); // Clear out existing workflows. We need to do this to permit the // construction of sub-workflows. $this->workflows = array(); foreach ($workflows as $workflow) { $name = $workflow->getName(); if ($name === null) { throw new PhutilArgumentSpecificationException( 'Workflow has no name!'); } if (isset($this->workflows[$name])) { throw new PhutilArgumentSpecificationException( "Two workflows with name '{$name}!"); } $this->workflows[$name] = $workflow; } $argv = $this->argv; if (empty($argv)) { // TODO: this is kind of hacky / magical. if (isset($this->workflows['help'])) { $argv = array('help'); } else { throw new PhutilArgumentUsageException( 'No workflow selected.'); } } $flow = array_shift($argv); $flow = strtolower($flow); if (empty($this->workflows[$flow])) { $workflow_names = array(); foreach ($this->workflows as $wf) { $workflow_names[] = $wf->getName(); } sort($workflow_names); $command_list = implode(', ', $workflow_names); $ex_msg = "Invalid command '{$flow}'. Valid commands are: {$command_list}."; if (in_array('help', $workflow_names)) { $bin = basename($this->bin); $ex_msg .= "\nFor more details on available commands, run `{$bin} help`."; } throw new PhutilArgumentUsageException($ex_msg); } $workflow = $this->workflows[$flow]; if ($this->showHelp) { // Make "cmd flow --help" behave like "cmd help flow", not "cmd help". $help_flow = idx($this->workflows, 'help'); if ($help_flow) { if ($help_flow !== $workflow) { $workflow = $help_flow; $argv = array($flow); // Prevent parse() from dumping us back out to standard help. $this->showHelp = false; } } else { $this->printHelpAndExit(); } } $this->argv = array_values($argv); if ($workflow->shouldParsePartial()) { $this->parsePartial($workflow->getArguments()); } else { $this->parse($workflow->getArguments()); } if ($workflow->isExecutable()) { $err = $workflow->execute($this); exit($err); } else { return $workflow; } } /** * Parse "standard" arguments and apply their effects: * * --trace Enable service call tracing. * --no-ansi Disable ANSI color/style sequences. * --xprofile Write out an XHProf profile. * --help Show help. * * @return this * * @phutil-external-symbol function xhprof_enable */ public function parseStandardArguments() { try { $this->parsePartial( array( array( 'name' => 'trace', 'help' => 'Trace command execution and show service calls.', 'standard' => true, ), array( 'name' => 'no-ansi', 'help' => 'Disable ANSI terminal codes, printing plain text with '. 'no color or style.', 'conflicts' => array( 'ansi' => null, ), 'standard' => true, ), array( 'name' => 'ansi', 'help' => "Use formatting even in environments which probably ". "don't support it.", 'standard' => true, ), array( 'name' => 'xprofile', 'param' => 'profile', 'help' => 'Profile script execution and write results to a file.', 'standard' => true, ), array( 'name' => 'help', 'short' => 'h', 'help' => 'Show this help.', 'standard' => true, ), array( 'name' => 'show-standard-options', 'help' => 'Show every option, including standard options '. 'like this one.', 'standard' => true, ), array( 'name' => 'recon', 'help' => 'Start in remote console mode.', 'standard' => true, ), )); } catch (PhutilArgumentUsageException $ex) { $this->printUsageException($ex); exit(self::PARSE_ERROR_CODE); } if ($this->getArg('trace')) { PhutilServiceProfiler::installEchoListener(); } if ($this->getArg('no-ansi')) { PhutilConsoleFormatter::disableANSI(true); } if ($this->getArg('ansi')) { PhutilConsoleFormatter::disableANSI(false); } if ($this->getArg('help')) { $this->showHelp = true; } $xprofile = $this->getArg('xprofile'); if ($xprofile) { if (!function_exists('xhprof_enable')) { throw new Exception("To use '--xprofile', you must install XHProf."); } xhprof_enable(0); register_shutdown_function(array($this, 'shutdownProfiler')); } $recon = $this->getArg('recon'); if ($recon) { $remote_console = PhutilConsole::newRemoteConsole(); $remote_console->beginRedirectOut(); PhutilConsole::setConsole($remote_console); } else if ($this->getArg('trace')) { $server = new PhutilConsoleServer(); $server->setEnableLog(true); $console = PhutilConsole::newConsoleForServer($server); PhutilConsole::setConsole($console); } return $this; } /* -( Reading Arguments )-------------------------------------------------- */ public function getArg($name) { if (empty($this->specs[$name])) { throw new PhutilArgumentSpecificationException( "No specification exists for argument '{$name}'!"); } if (idx($this->results, $name) !== null) { return $this->results[$name]; } return $this->specs[$name]->getDefault(); } public function getUnconsumedArgumentVector() { return $this->argv; } /* -( Command Help )------------------------------------------------------- */ public function setSynopsis($synopsis) { $this->synopsis = $synopsis; return $this; } public function setTagline($tagline) { $this->tagline = $tagline; return $this; } public function printHelpAndExit() { echo $this->renderHelp(); exit(self::PARSE_ERROR_CODE); } public function renderHelp() { $out = array(); $more = array(); if ($this->bin) { $out[] = $this->format('**NAME**'); $name = $this->indent(6, '**%s**', basename($this->bin)); if ($this->tagline) { $name .= $this->format(' - '.$this->tagline); } $out[] = $name; $out[] = null; } if ($this->synopsis) { $out[] = $this->format('**SYNOPSIS**'); $out[] = $this->indent(6, $this->synopsis); $out[] = null; } if ($this->workflows) { $has_help = false; $out[] = $this->format('**WORKFLOWS**'); $out[] = null; $flows = $this->workflows; ksort($flows); foreach ($flows as $workflow) { if ($workflow->getName() == 'help') { $has_help = true; } $out[] = $this->renderWorkflowHelp( $workflow->getName(), $show_details = false); } if ($has_help) { $more[] = 'Use **help** __command__ for a detailed command reference.'; } } $specs = $this->renderArgumentSpecs($this->specs); if ($specs) { $out[] = $this->format('**OPTION REFERENCE**'); $out[] = null; $out[] = $specs; } // If we have standard options but no --show-standard-options, print out // a quick hint about it. if (!empty($this->specs['show-standard-options']) && !$this->getArg('show-standard-options')) { $more[] = 'Use __--show-standard-options__ to show additional options.'; } $out[] = null; if ($more) { foreach ($more as $hint) { $out[] = $this->indent(0, $hint); } $out[] = null; } return implode("\n", $out); } public function renderWorkflowHelp( $workflow_name, $show_details = false) { $out = array(); $indent = ($show_details ? 0 : 6); $workflow = idx($this->workflows, strtolower($workflow_name)); if (!$workflow) { $out[] = $this->indent( $indent, "There is no **{$workflow_name}** workflow."); } else { $out[] = $this->indent($indent, $workflow->getExamples()); $out[] = $this->indent($indent, $workflow->getSynopsis()); if ($show_details) { $full_help = $workflow->getHelp(); if ($full_help) { $out[] = null; $out[] = $this->indent($indent, $full_help); } $specs = $this->renderArgumentSpecs($workflow->getArguments()); if ($specs) { $out[] = null; $out[] = $specs; } } } $out[] = null; return implode("\n", $out); } public function printUsageException(PhutilArgumentUsageException $ex) { fwrite( STDERR, $this->format("**Usage Exception:** %s\n", $ex->getMessage())); } /* -( Internals )---------------------------------------------------------- */ private function filterWildcardArgv(array $argv) { foreach ($argv as $key => $value) { if ($value == '--') { unset($argv[$key]); break; } else if (!strncmp($value, '-', 1) && strlen($value) > 1) { throw new PhutilArgumentUsageException( "Argument '{$value}' is unrecognized. Use '--' to indicate the ". "end of flags."); } } return array_values($argv); } private function mergeSpecs(array $specs) { $short_map = mpull($this->specs, null, 'getShortAlias'); unset($short_map[null]); $wildcard = null; foreach ($this->specs as $spec) { if ($spec->getWildcard()) { $wildcard = $spec; break; } } foreach ($specs as $spec) { $spec->validate(); $name = $spec->getName(); if (isset($this->specs[$name])) { throw new PhutilArgumentSpecificationException( "Two argument specifications have the same name ('{$name}')."); } $short = $spec->getShortAlias(); if ($short) { if (isset($short_map[$short])) { throw new PhutilArgumentSpecificationException( "Two argument specifications have the same short alias ". "('{$short}')."); } $short_map[$short] = $spec; } if ($spec->getWildcard()) { if ($wildcard) { throw new PhutilArgumentSpecificationException( 'Two argument specifications are marked as wildcard arguments. '. 'You can have a maximum of one wildcard argument.'); } else { $wildcard = $spec; } } $this->specs[$name] = $spec; } foreach ($this->specs as $name => $spec) { foreach ($spec->getConflicts() as $conflict => $reason) { if (empty($this->specs[$conflict])) { throw new PhutilArgumentSpecificationException( "Argument '{$name}' conflicts with unspecified argument ". "'{$conflict}'."); } if ($conflict == $name) { throw new PhutilArgumentSpecificationException( "Argument '{$name}' conflicts with itself!"); } } } } private function renderArgumentSpecs(array $specs) { foreach ($specs as $key => $spec) { if ($spec->getWildcard()) { unset($specs[$key]); } } $out = array(); $specs = msort($specs, 'getName'); foreach ($specs as $spec) { if ($spec->getStandard() && !$this->getArg('show-standard-options')) { // If this is a standard argument and the user didn't pass // --show-standard-options, skip it. continue; } $name = $this->indent(6, '__--%s__', $spec->getName()); $short = null; if ($spec->getShortAlias()) { $short = $this->format(', __-%s__', $spec->getShortAlias()); } if ($spec->getParamName()) { $param = $this->format(' __%s__', $spec->getParamName()); $name .= $param; if ($short) { $short .= $param; } } $out[] = $name.$short; $out[] = $this->indent(10, $spec->getHelp()); $out[] = null; } return implode("\n", $out); } private function format($str /* , ... */) { $args = func_get_args(); return call_user_func_array( 'phutil_console_format', $args); } private function indent($level, $str /* , ... */) { $args = func_get_args(); $args = array_slice($args, 1); $text = call_user_func_array(array($this, 'format'), $args); return phutil_console_wrap($text, $level); } /** * @phutil-external-symbol function xhprof_disable */ public function shutdownProfiler() { $data = xhprof_disable(); $data = serialize($data); Filesystem::writeFile($this->getArg('xprofile'), $data); } } diff --git a/src/parser/argument/PhutilArgumentSpecification.php b/src/parser/argument/PhutilArgumentSpecification.php index 03bc178..0e6b2f1 100644 --- a/src/parser/argument/PhutilArgumentSpecification.php +++ b/src/parser/argument/PhutilArgumentSpecification.php @@ -1,261 +1,258 @@ 'verbose', * 'short' => 'v', * )); * * Recognized keys and equivalent verbose methods are: * * name setName() * help setHelp() * short setShortAlias() * param setParamName() * default setDefault() * conflicts setConflicts() * wildcard setWildcard() * repeat setRepeatable() * * @param dict Dictionary of quick parameter definitions. * @return PhutilArgumentSpecification Constructed argument specification. */ public static function newQuickSpec(array $spec) { $recognized_keys = array( 'name', 'help', 'short', 'param', 'default', 'conflicts', 'wildcard', 'repeat', 'standard', ); $unrecognized = array_diff_key( $spec, array_fill_keys($recognized_keys, true)); foreach ($unrecognized as $key => $ignored) { throw new PhutilArgumentSpecificationException( "Unrecognized key '{$key}' in argument specification. Recognized keys ". "are: ".implode(', ', $recognized_keys).'.'); } $obj = new PhutilArgumentSpecification(); foreach ($spec as $key => $value) { switch ($key) { case 'name': $obj->setName($value); break; case 'help': $obj->setHelp($value); break; case 'short': $obj->setShortAlias($value); break; case 'param': $obj->setParamName($value); break; case 'default': $obj->setDefault($value); break; case 'conflicts': $obj->setConflicts($value); break; case 'wildcard': $obj->setWildcard($value); break; case 'repeat': $obj->setRepeatable($value); break; case 'standard': $obj->setStandard($value); break; } } $obj->validate(); return $obj; } public static function newSpecsFromList(array $specs) { foreach ($specs as $key => $spec) { if (is_array($spec)) { $specs[$key] = PhutilArgumentSpecification::newQuickSpec( $spec); } } return $specs; } public function setName($name) { self::validateName($name); $this->name = $name; return $this; } private static function validateName($name) { if (!preg_match('/^[a-z0-9][a-z0-9-]*$/', $name)) { throw new PhutilArgumentSpecificationException( "Argument names may only contain a-z, 0-9 and -, and must be ". "at least one character long. '{$name}' is invalid."); } } public function getName() { return $this->name; } public function setHelp($help) { $this->help = $help; return $this; } public function getHelp() { return $this->help; } public function setShortAlias($short_alias) { self::validateShortAlias($short_alias); $this->shortAlias = $short_alias; return $this; } private static function validateShortAlias($alias) { if (strlen($alias) !== 1) { throw new PhutilArgumentSpecificationException( "Argument short aliases must be exactly one character long. ". "'{$alias}' is invalid."); } if (!preg_match('/^[a-zA-Z0-9]$/', $alias)) { throw new PhutilArgumentSpecificationException( "Argument short aliases may only be in a-z, A-Z and 0-9. ". "'{$alias}' is invalid."); } } public function getShortAlias() { return $this->shortAlias; } public function setParamName($param_name) { $this->paramName = $param_name; return $this; } public function getParamName() { return $this->paramName; } public function setDefault($default) { $this->default = $default; return $this; } public function getDefault() { if ($this->getParamName() === null) { if ($this->getRepeatable()) { return 0; } else { return false; } } else { if ($this->getRepeatable()) { return array(); } else { return $this->default; } } } public function setConflicts(array $conflicts) { $this->conflicts = $conflicts; return $this; } public function getConflicts() { return $this->conflicts; } public function setWildcard($wildcard) { $this->wildcard = $wildcard; return $this; } public function getWildcard() { return $this->wildcard; } public function setRepeatable($repeatable) { $this->repeatable = $repeatable; return $this; } public function getRepeatable() { return $this->repeatable; } public function setStandard($standard) { $this->standard = $standard; return $this; } public function getStandard() { return $this->standard; } public function validate() { if ($this->name === null) { throw new PhutilArgumentSpecificationException( "Argument specification MUST have a 'name'."); } if ($this->getWildcard()) { if ($this->getParamName()) { throw new PhutilArgumentSpecificationException( 'Wildcard arguments may not specify a parameter.'); } if ($this->getRepeatable()) { throw new PhutilArgumentSpecificationException( 'Wildcard arguments may not be repeatable.'); } } if ($this->default !== null) { if ($this->getRepeatable()) { throw new PhutilArgumentSpecificationException( 'Repeatable arguments may not have a default (always array() for '. 'arguments which accept a parameter, or 0 for arguments which do '. 'not).'); } else if ($this->getParamName() === null) { throw new PhutilArgumentSpecificationException( 'Flag arguments may not have a default (always false).'); } } } } diff --git a/src/parser/argument/__tests__/PhutilArgumentParserTestCase.php b/src/parser/argument/__tests__/PhutilArgumentParserTestCase.php index f729cba..78a9af7 100644 --- a/src/parser/argument/__tests__/PhutilArgumentParserTestCase.php +++ b/src/parser/argument/__tests__/PhutilArgumentParserTestCase.php @@ -1,409 +1,406 @@ 'flag', )); $args = new PhutilArgumentParser(array('bin')); $args->parseFull($specs); $this->assertEqual(false, $args->getArg('flag')); $args = new PhutilArgumentParser(array('bin', '--flag')); $args->parseFull($specs); $this->assertEqual(true, $args->getArg('flag')); } public function testWildcards() { $specs = array( array( 'name' => 'flag', ), array( 'name' => 'files', 'wildcard' => true, ), ); $args = new PhutilArgumentParser(array('bin', '--flag', 'a', 'b')); $args->parseFull($specs); $this->assertEqual(true, $args->getArg('flag')); $this->assertEqual( array('a', 'b'), $args->getArg('files')); $caught = null; try { $args = new PhutilArgumentParser(array('bin', '--derp', 'a', 'b')); $args->parseFull($specs); } catch (PhutilArgumentUsageException $ex) { $caught = $ex; } $this->assertTrue($caught instanceof Exception); $args = new PhutilArgumentParser(array('bin', '--', '--derp', 'a', 'b')); $args->parseFull($specs); $this->assertEqual( array('--derp', 'a', 'b'), $args->getArg('files')); } public function testPartialParse() { $specs = array( array( 'name' => 'flag', ), ); $args = new PhutilArgumentParser(array('bin', 'a', '--flag', '--', 'b')); $args->parsePartial($specs); $this->assertEqual( array('a', '--', 'b'), $args->getUnconsumedArgumentVector()); } public function testBadArg() { $args = new PhutilArgumentParser(array('bin')); $args->parseFull(array()); $caught = null; try { $args->getArg('flag'); } catch (PhutilArgumentSpecificationException $ex) { $caught = $ex; } $this->assertTrue($caught instanceof Exception); } public function testDuplicateNames() { $args = new PhutilArgumentParser(array('bin')); $caught = null; try { $args->parseFull( array( array( 'name' => 'x', ), array( 'name' => 'x', ))); } catch (PhutilArgumentSpecificationException $ex) { $caught = $ex; } $this->assertTrue($caught instanceof Exception); } public function testDuplicateNamesWithParsePartial() { $args = new PhutilArgumentParser(array('bin')); $caught = null; try { $args->parsePartial( array( array( 'name' => 'x', ))); $args->parsePartial( array( array( 'name' => 'x', ))); } catch (PhutilArgumentSpecificationException $ex) { $caught = $ex; } $this->assertTrue($caught instanceof Exception); } public function testDuplicateShortAliases() { $args = new PhutilArgumentParser(array('bin')); $caught = null; try { $args->parseFull( array( array( 'name' => 'x', 'short' => 'x', ), array( 'name' => 'y', 'short' => 'x', ))); } catch (PhutilArgumentSpecificationException $ex) { $caught = $ex; } $this->assertTrue($caught instanceof Exception); } public function testDuplicateWildcards() { $args = new PhutilArgumentParser(array('bin')); $caught = null; try { $args->parseFull( array( array( 'name' => 'x', 'wildcard' => true, ), array( 'name' => 'y', 'wildcard' => true, ))); } catch (PhutilArgumentSpecificationException $ex) { $caught = $ex; } $this->assertTrue($caught instanceof Exception); } public function testDuplicatePartialWildcards() { $args = new PhutilArgumentParser(array('bin')); $caught = null; try { $args->parsePartial( array( array( 'name' => 'x', 'wildcard' => true, ), )); $args->parsePartial( array( array( 'name' => 'y', 'wildcard' => true, ), )); } catch (PhutilArgumentSpecificationException $ex) { $caught = $ex; } $this->assertTrue($caught instanceof Exception); } public function testConflictSpecificationWithUnrecognizedArg() { $args = new PhutilArgumentParser(array('bin')); $caught = null; try { $args->parseFull( array( array( 'name' => 'x', 'conflicts' => array( 'y' => true, ), ), )); } catch (PhutilArgumentSpecificationException $ex) { $caught = $ex; } $this->assertTrue($caught instanceof Exception); } public function testConflictSpecificationWithSelf() { $args = new PhutilArgumentParser(array('bin')); $caught = null; try { $args->parseFull( array( array( 'name' => 'x', 'conflicts' => array( 'x' => true, ), ), )); } catch (PhutilArgumentSpecificationException $ex) { $caught = $ex; } $this->assertTrue($caught instanceof Exception); } public function testUnrecognizedFlag() { $args = new PhutilArgumentParser(array('bin', '--flag')); $caught = null; try { $args->parseFull(array()); } catch (PhutilArgumentUsageException $ex) { $caught = $ex; } $this->assertTrue($caught instanceof Exception); } public function testDuplicateFlag() { $args = new PhutilArgumentParser(array('bin', '--flag', '--flag')); $caught = null; try { $args->parseFull( array( array( 'name' => 'flag', ), )); } catch (PhutilArgumentUsageException $ex) { $caught = $ex; } $this->assertTrue($caught instanceof Exception); } public function testMissingParameterValue() { $args = new PhutilArgumentParser(array('bin', '--with')); $caught = null; try { $args->parseFull( array( array( 'name' => 'with', 'param' => 'stuff', ), )); } catch (PhutilArgumentUsageException $ex) { $caught = $ex; } $this->assertTrue($caught instanceof Exception); } public function testExtraParameterValue() { $args = new PhutilArgumentParser(array('bin', '--true=apple')); $caught = null; try { $args->parseFull( array( array( 'name' => 'true', ), )); } catch (PhutilArgumentUsageException $ex) { $caught = $ex; } $this->assertTrue($caught instanceof Exception); } public function testConflictParameterValue() { $args = new PhutilArgumentParser(array('bin', '--true', '--false')); $caught = null; try { $args->parseFull( array( array( 'name' => 'true', 'conflicts' => array( 'false' => true, ), ), array( 'name' => 'false', 'conflicts' => array( 'true' => true, ), ), )); } catch (PhutilArgumentUsageException $ex) { $caught = $ex; } $this->assertTrue($caught instanceof Exception); } public function testParameterValues() { $specs = array( array( 'name' => 'a', 'param' => 'value', ), array( 'name' => 'b', 'param' => 'value', ), array( 'name' => 'cee', 'short' => 'c', 'param' => 'value', ), array( 'name' => 'dee', 'short' => 'd', 'param' => 'value', ), ); $args = new PhutilArgumentParser( array( 'bin', '--a', 'a', '--b=b', '-c', 'c', '-d=d', )); $args->parseFull($specs); $this->assertEqual('a', $args->getArg('a')); $this->assertEqual('b', $args->getArg('b')); $this->assertEqual('c', $args->getArg('cee')); $this->assertEqual('d', $args->getArg('dee')); } public function testStdinValidParameter() { $specs = array( array( 'name' => 'file', 'param' => 'file', ), ); $args = new PhutilArgumentParser( array( 'bin', '-', '--file', '-', )); $args->parsePartial($specs); $this->assertEqual('-', $args->getArg('file')); } public function testRepeatableFlag() { $specs = array( array( 'name' => 'verbose', 'short' => 'v', 'repeat' => true, ), ); $args = new PhutilArgumentParser(array('bin', '-v', '-v', '-v')); $args->parseFull($specs); $this->assertEqual(3, $args->getArg('verbose')); } public function testRepeatableParam() { $specs = array( array( 'name' => 'eat', 'param' => 'fruit', 'repeat' => true, ), ); $args = new PhutilArgumentParser(array( 'bin', '--eat', 'apple', '--eat', 'pear', '--eat=orange', )); $args->parseFull($specs); $this->assertEqual( array('apple', 'pear', 'orange'), $args->getArg('eat')); } } diff --git a/src/parser/argument/__tests__/PhutilArgumentSpecificationTestCase.php b/src/parser/argument/__tests__/PhutilArgumentSpecificationTestCase.php index bd8013d..e6bc65c 100644 --- a/src/parser/argument/__tests__/PhutilArgumentSpecificationTestCase.php +++ b/src/parser/argument/__tests__/PhutilArgumentSpecificationTestCase.php @@ -1,145 +1,142 @@ true, 'xx' => true, '!' => false, 'XX' => false, '1=' => false, '--' => false, 'no-stuff' => true, '-stuff' => false, ); foreach ($names as $name => $valid) { $caught = null; try { PhutilArgumentSpecification::newQuickSpec( array( 'name' => $name, )); } catch (PhutilArgumentSpecificationException $ex) { $caught = $ex; } $this->assertEqual( !$valid, $caught instanceof Exception, "Arg name '{$name}'."); } } public function testAliases() { $aliases = array( 'a' => true, '1' => true, 'no' => false, '-' => false, '_' => false, ' ' => false, '' => false, ); foreach ($aliases as $alias => $valid) { $caught = null; try { PhutilArgumentSpecification::newQuickSpec( array( 'name' => 'example', 'short' => $alias, )); } catch (PhutilArgumentSpecificationException $ex) { $caught = $ex; } $this->assertEqual( !$valid, $caught instanceof Exception, "Arg alias '{$alias}'."); } } public function testSpecs() { $good_specs = array( array( 'name' => 'verbose', ), array( 'name' => 'verbose', 'short' => 'v', 'help' => 'Derp.', 'param' => 'level', 'default' => 'y', 'conflicts' => array( 'quiet' => true, ), 'wildcard' => false, ), array( 'name' => 'files', 'wildcard' => true, ), ); $bad_specs = array( array( ), array( 'alias' => 'v', ), array( 'name' => 'derp', 'fruit' => 'apple', ), array( 'name' => 'x', 'default' => 'y', ), array( 'name' => 'x', 'param' => 'y', 'default' => 'z', 'repeat' => true, ), array( 'name' => 'x', 'wildcard' => true, 'repeat' => true, ), array( 'name' => 'x', 'param' => 'y', 'wildcard' => true, ), ); $cases = array( array(true, $good_specs), array(false, $bad_specs), ); foreach ($cases as $case) { list($expect, $specs) = $case; foreach ($specs as $spec) { $caught = null; try { PhutilArgumentSpecification::newQuickSpec($spec); } catch (PhutilArgumentSpecificationException $ex) { $caught = $ex; } $this->assertEqual( !$expect, $caught instanceof Exception, 'Spec validity for: '.print_r($spec, true)); } } } } diff --git a/src/parser/argument/exception/PhutilArgumentParserException.php b/src/parser/argument/exception/PhutilArgumentParserException.php index 8dc1290..9927a57 100644 --- a/src/parser/argument/exception/PhutilArgumentParserException.php +++ b/src/parser/argument/exception/PhutilArgumentParserException.php @@ -1,8 +1,3 @@ setTagline('simple calculator example'); * $args->setSynopsis(<<setName('add') * ->setExamples('**add** __n__ ...') * ->setSynopsis('Compute the sum of a list of numbers.') * ->setArguments( * array( * array( * 'name' => 'numbers', * 'wildcard' => true, * ), * )); * * $mul_workflow = id(new PhutilArgumentWorkflow()) * ->setName('mul') * ->setExamples('**mul** __n__ ...') * ->setSynopsis('Compute the product of a list of numbers.') * ->setArguments( * array( * array( * 'name' => 'numbers', * 'wildcard' => true, * ), * )); * * $flow = $args->parseWorkflows( * array( * $add_workflow, * $mul_workflow, * new PhutilHelpArgumentWorkflow(), * )); * * $nums = $args->getArg('numbers'); * if (empty($nums)) { * echo "You must provide one or more numbers!\n"; * exit(1); * } * * foreach ($nums as $num) { * if (!is_numeric($num)) { * echo "Number '{$num}' is not numeric!\n"; * exit(1); * } * } * * switch ($flow->getName()) { * case 'add': * echo array_sum($nums)."\n"; * break; * case 'mul': * echo array_product($nums)."\n"; * break; * } * * You can also subclass this class and return `true` from * @{method:isExecutable}. In this case, the parser will automatically select * your workflow when the user invokes it. * * @stable * @concrete-extensible - * @group console */ class PhutilArgumentWorkflow { private $name; private $synopsis; private $specs = array(); private $examples; private $help; final public function __construct() { $this->didConstruct(); } public function setName($name) { $this->name = $name; return $this; } public function getName() { return $this->name; } /** * Provide brief usage examples of common calling conventions, like: * * $workflow->setExamples("**delete** __file__ [__options__]"); * * This text is shown in both brief and detailed help, and should give the * user a quick reference for common uses. You can separate several common * uses with newlines, but usually should not provide more than 2-3 examples. */ final public function setExamples($examples) { $this->examples = $examples; return $this; } final public function getExamples() { if (!$this->examples) { return '**'.$this->name.'**'; } return $this->examples; } /** * Provide a brief description of the command, like "Delete a file.". * * This text is shown in both brief and detailed help, and should give the * user a general idea of what the workflow does. */ final public function setSynopsis($synopsis) { $this->synopsis = $synopsis; return $this; } final public function getSynopsis() { return $this->synopsis; } /** * Provide a full explanation of the command. This text is shown only in * detailed help. */ final public function getHelp() { return $this->help; } final public function setHelp($help) { $this->help = $help; return $this; } final public function setArguments(array $specs) { $specs = PhutilArgumentSpecification::newSpecsFromList($specs); $this->specs = $specs; return $this; } final public function getArguments() { return $this->specs; } protected function didConstruct() { return null; } public function isExecutable() { return false; } public function execute(PhutilArgumentParser $args) { throw new Exception("This workflow isn't executable!"); } /** * Normally, workflow arguments are parsed fully, so unexpected arguments will * raise an error. You can return `true` from this method to parse workflow * arguments only partially. This will allow you to manually parse remaining * arguments or delegate to a second level of workflows. * * @return bool True to partially parse workflow arguments (default false). */ public function shouldParsePartial() { return false; } } diff --git a/src/parser/argument/workflow/PhutilHelpArgumentWorkflow.php b/src/parser/argument/workflow/PhutilHelpArgumentWorkflow.php index 1f2a210..8a445ff 100644 --- a/src/parser/argument/workflow/PhutilHelpArgumentWorkflow.php +++ b/src/parser/argument/workflow/PhutilHelpArgumentWorkflow.php @@ -1,47 +1,44 @@ setName('help'); $this->setExamples(<<setSynopsis(<<setArguments( array( array( 'name' => 'help-with-what', 'wildcard' => true, ))); } public function isExecutable() { return true; } public function execute(PhutilArgumentParser $args) { $with = $args->getArg('help-with-what'); if (!$with) { $args->printHelpAndExit(); } else { foreach ($with as $thing) { echo phutil_console_format( "**%s WORKFLOW**\n\n", strtoupper($thing)); echo $args->renderWorkflowHelp($thing, $show_flags = true); echo "\n"; } exit(PhutilArgumentParser::PARSE_ERROR_CODE); } } } diff --git a/src/parser/xhpast/api/XHPASTNode.php b/src/parser/xhpast/api/XHPASTNode.php index c6ccae2..e44ffc3 100644 --- a/src/parser/xhpast/api/XHPASTNode.php +++ b/src/parser/xhpast/api/XHPASTNode.php @@ -1,237 +1,234 @@ getTypeName() == 'n_STRING_SCALAR' || $this->getTypeName() == 'n_NUMERIC_SCALAR'); } public function getDocblockToken() { if ($this->l == -1) { return null; } $tokens = $this->tree->getRawTokenStream(); for ($ii = $this->l - 1; $ii >= 0; $ii--) { if ($tokens[$ii]->getTypeName() == 'T_DOC_COMMENT') { return $tokens[$ii]; } if (!$tokens[$ii]->isAnyWhitespace()) { return null; } } return null; } public function evalStatic() { switch ($this->getTypeName()) { case 'n_STATEMENT': return $this->getChildByIndex(0)->evalStatic(); break; case 'n_STRING_SCALAR': return (string)$this->getStringLiteralValue(); case 'n_NUMERIC_SCALAR': $value = $this->getSemanticString(); if (preg_match('/^0x/i', $value)) { // Hex $value = base_convert(substr($value, 2), 16, 10); } else if (preg_match('/^0\d+$/i', $value)) { // Octal $value = base_convert(substr($value, 1), 8, 10); } return +$value; case 'n_SYMBOL_NAME': $value = $this->getSemanticString(); if ($value == 'INF') { return INF; } switch (strtolower($value)) { case 'true': return true; case 'false': return false; case 'null': return null; default: throw new Exception('Unrecognized symbol name.'); } break; case 'n_UNARY_PREFIX_EXPRESSION': $operator = $this->getChildOfType(0, 'n_OPERATOR'); $operand = $this->getChildByIndex(1); switch ($operator->getSemanticString()) { case '-': return -$operand->evalStatic(); break; case '+': return $operand->evalStatic(); break; default: throw new Exception('Unexpected operator in static expression.'); } break; case 'n_ARRAY_LITERAL': $result = array(); $values = $this->getChildOfType(0, 'n_ARRAY_VALUE_LIST'); foreach ($values->getChildren() as $child) { $key = $child->getChildByIndex(0); $val = $child->getChildByIndex(1); if ($key->getTypeName() == 'n_EMPTY') { $result[] = $val->evalStatic(); } else { $result[$key->evalStatic()] = $val->evalStatic(); } } return $result; case 'n_CONCATENATION_LIST': $result = ''; foreach ($this->getChildren() as $child) { if ($child->getTypeName() == 'n_OPERATOR') { continue; } $result .= $child->evalStatic(); } return $result; default: throw new Exception( pht( 'Unexpected node during static evaluation, of type: %s', $this->getTypeName())); } } public function isConstantString() { switch ($this->getTypeName()) { case 'n_HEREDOC': case 'n_STRING_SCALAR': return !$this->getStringVariables(); case 'n_CONCATENATION_LIST': foreach ($this->getChildren() as $child) { if ($child->getTypeName() == 'n_OPERATOR') { continue; } if (!$child->isConstantString()) { return false; } } return true; default: return false; } } public function getStringVariables() { $value = $this->getConcreteString(); switch ($this->getTypeName()) { case 'n_HEREDOC': if (preg_match("/^<<<\s*'/", $value)) { // Nowdoc: <<<'EOT' return array(); } break; case 'n_STRING_SCALAR': if ($value[0] == "'") { return array(); } break; default: throw new Exception('Unexpected type '.$this->getTypeName().'.'); } // We extract just the variable names and ignore properties and array keys. $re = '/\\\\.|(\$|\{\$|\${)([a-z_\x7F-\xFF][a-z0-9_\x7F-\xFF]*)/i'; $matches = null; preg_match_all($re, $value, $matches, PREG_OFFSET_CAPTURE); return ipull(array_filter($matches[2]), 0, 1); } public function getStringLiteralValue() { if ($this->getTypeName() != 'n_STRING_SCALAR') { return null; } $value = $this->getSemanticString(); $type = $value[0]; $value = preg_replace('/^b?[\'"]|[\'"]$/i', '', $value); $esc = false; $len = strlen($value); $out = ''; if ($type == "'") { // Single quoted strings treat everything as a literal except "\\" and // "\'". return str_replace( array('\\\\', '\\\''), array('\\', "'"), $value); } // Double quoted strings treat "\X" as a literal if X isn't specifically // a character which needs to be escaped -- e.g., "\q" and "\'" are // literally "\q" and "\'". stripcslashes() is too aggressive, so find // all these under-escaped backslashes and escape them. for ($ii = 0; $ii < $len; $ii++) { $c = $value[$ii]; if ($esc) { $esc = false; switch ($c) { case 'x': $u = isset($value[$ii + 1]) ? $value[$ii + 1] : null; if (!preg_match('/^[a-f0-9]/i', $u)) { // PHP treats \x followed by anything which is not a hex digit // as a literal \x. $out .= '\\\\'.$c; break; } /* fallthrough */ case 'n': case 'r': case 'f': case 'v': case '"': case '$': case 't': case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': $out .= '\\'.$c; break; case 'e': // Since PHP 5.4.0, this means "esc". However, stripcslashes() does // not perform this conversion. $out .= chr(27); break; default: $out .= '\\\\'.$c; break; } } else if ($c == '\\') { $esc = true; } else { $out .= $c; } } return stripcslashes($out); } public function getLineNumber() { return idx($this->tree->getOffsetToLineNumberMap(), $this->getOffset()); } } diff --git a/src/parser/xhpast/api/XHPASTSyntaxErrorException.php b/src/parser/xhpast/api/XHPASTSyntaxErrorException.php index 7c9528f..395b07f 100644 --- a/src/parser/xhpast/api/XHPASTSyntaxErrorException.php +++ b/src/parser/xhpast/api/XHPASTSyntaxErrorException.php @@ -1,19 +1,16 @@ errorLine = $line; parent::__construct($message); } public function getErrorLine() { return $this->errorLine; } } diff --git a/src/parser/xhpast/api/XHPASTToken.php b/src/parser/xhpast/api/XHPASTToken.php index 0d5b926..6013c8b 100644 --- a/src/parser/xhpast/api/XHPASTToken.php +++ b/src/parser/xhpast/api/XHPASTToken.php @@ -1,42 +1,39 @@ typeName)) { $type_id = $this->typeID; if ($type_id <= 255) { $this->typeName = chr($type_id); } $this->typeName = parent::getTypeName(); } return $this->typeName; } public function isComment() { static $type_ids = null; if ($type_ids === null) { $type_ids = array( $this->tree->getTokenTypeIDFromTypeName('T_COMMENT') => true, $this->tree->getTokenTypeIDFromTypeName('T_DOC_COMMENT') => true, ); } return isset($type_ids[$this->typeID]); } public function isAnyWhitespace() { static $type_ids = null; if ($type_ids === null) { $type_ids = array( $this->tree->getTokenTypeIDFromTypeName('T_WHITESPACE') => true, ); } return isset($type_ids[$this->typeID]); } } diff --git a/src/parser/xhpast/api/XHPASTTree.php b/src/parser/xhpast/api/XHPASTTree.php index 738a1af..f660d1f 100644 --- a/src/parser/xhpast/api/XHPASTTree.php +++ b/src/parser/xhpast/api/XHPASTTree.php @@ -1,76 +1,73 @@ setTreeType('XHP'); $this->setNodeConstants(xhp_parser_node_constants()); $this->setTokenConstants(xhpast_parser_token_constants()); parent::__construct($tree, $stream, $source); } public function newNode($id, array $data, AASTTree $tree) { return new XHPASTNode($id, $data, $tree); } public function newToken( $id, $type, $value, $offset, AASTTree $tree) { return new XHPASTToken($id, $type, $value, $offset, $tree); } public static function newFromData($php_source) { $future = xhpast_get_parser_future($php_source); return self::newFromDataAndResolvedExecFuture( $php_source, $future->resolve()); } public static function newStatementFromString($string) { $string = 'getRootNode()->selectDescendantsOfType('n_STATEMENT'); if (count($statements) != 1) { throw new Exception('String does not parse into exactly one statement!'); } // Return the first one, trying to use reset() with iterators ends in tears. foreach ($statements as $statement) { return $statement; } } public static function newFromDataAndResolvedExecFuture( $php_source, array $resolved) { list($err, $stdout, $stderr) = $resolved; if ($err) { if ($err == 1) { $matches = null; $is_syntax = preg_match( '/^XHPAST Parse Error: (.*) on line (\d+)/s', $stderr, $matches); if ($is_syntax) { throw new XHPASTSyntaxErrorException($matches[2], $stderr); } } throw new Exception("XHPAST failed to parse file data {$err}: {$stderr}"); } $data = json_decode($stdout, true); if (!is_array($data)) { throw new Exception('XHPAST: failed to decode tree.'); } return new XHPASTTree($data['tree'], $data['stream'], $php_source); } } diff --git a/src/parser/xhpast/api/__tests__/XHPASTTreeTestCase.php b/src/parser/xhpast/api/__tests__/XHPASTTreeTestCase.php index ae46032..532cab9 100644 --- a/src/parser/xhpast/api/__tests__/XHPASTTreeTestCase.php +++ b/src/parser/xhpast/api/__tests__/XHPASTTreeTestCase.php @@ -1,143 +1,139 @@ assertEval(1, '1'); $this->assertEval("a", '"a"'); $this->assertEval(-1.1, '-1.1'); $this->assertEval( array('foo', 'bar', -1, +2, -3.4, +4.3, 1e10, 1e-5, -2.3e7), "array('foo', 'bar', -1, +2, -3.4, +4.3, 1e10, 1e-5, -2.3e7)"); $this->assertEval( array(), "array()"); $this->assertEval( array(42 => 7, 'a' => 5, 1, 2, 3, 4, 1 => 'goo'), "array(42 => 7, 'a' => 5, 1, 2, 3, 4, 1 => 'goo')"); $this->assertEval( array('a' => 'a', 'b' => array(1, 2, array(3))), "array('a' => 'a', 'b' => array(1, 2, array(3)))"); $this->assertEval( array(true, false, null), "array(true, false, null)"); // Duplicate keys $this->assertEval( array(0 => '1', 0 => '2'), "array(0 => '1', 0 => '2')"); $this->assertEval('simple string', "'simple string'"); $this->assertEval('42', "'42'"); $this->assertEval('binary string', "b'binary string'"); $this->assertEval(3.1415926, "3.1415926"); $this->assertEval(42, '42'); $this->assertEval( array(2147483648, 2147483647, -2147483648, -2147483647), "array(2147483648, 2147483647, -2147483648, -2147483647)"); $this->assertEval(INF, 'INF'); $this->assertEval(-INF, '-INF'); $this->assertEval(0x1b, '0x1b'); $this->assertEval(0X0A, '0X0A'); // Octal $this->assertEval(010, '010'); $this->assertEval(080, '080'); // Invalid! // Leading 0, but float, not octal. $this->assertEval(0.11e1, '0.11e1'); $this->assertEval(0e1, '0e1'); $this->assertEval(0, '0'); // Static evaluation treats '$' as a literal dollar glyph. $this->assertEval('$asdf', '"$asdf"'); $this->assertEval( '\a\b\c\d\e\f\g\h\i\j\k\l\m\n\o\p\q\r\s\t\u\v\w\x\y\z'. '\1\2\3\4\5\6\7\8\9\0'. '\!\@\#\$\%\^\&\*\(\)'. '\`\~\\\|\[\]\{\}\<\>\,\.\/\?\:\;\-\_\=\+', "'\\a\\b\\c\\d\\e\\f\\g\\h\\i\\j\\k\\l\\m\\n\\o\\p\\q". "\\r\\s\\t\\u\\v\\w\\x\\y\\z". "\\1\\2\\3\\4\\5\\6\\7\\8\\9\\0". "\\!\\@\\#\\$\\%\\^\\&\\*\\(\\)". "\\`\\~\\\\\\|\\[\\]\\{\\}\\<\\>\\,\\.\\/\\?\\:\\;\\-\\_\\=\\+". "'"); // After PHP 5.4.0, "\e" means "escape", not "backslash e". We implement the // newer rules, but if we're running in an older version of PHP we can not // express them with "\e". $this->assertEval(chr(27), '"\\e"'); $this->assertEval( "\a\b\c\d\x1B\f\g\h\i\j\k\l\m\n\o\p\q\r\s\t\u\v\w\x\y\z". "\1\2\3\4\5\6\7\8\9\0". "\!\@\#\$\%\^\&\*\(\)". "\`\~\\\|\[\]\{\}\<\>\,\.\/\?\:\;\-\_\=\+", '"\\a\\b\\c\\d\\e\\f\\g\\h\\i\\j\\k\\l\\m\\n\\o\\p\\q'. '\\r\\s\\t\\u\\v\\w\\x\\y\\z'. '\\1\\2\\3\\4\\5\\6\\7\\8\\9\\0'. '\\!\\@\\#\\$\\%\\^\\&\\*\\(\\)'. '\\`\\~\\\\\\|\\[\\]\\{\\}\\<\\>\\,\\.\\/\\?\\:\\;\\-\\_\\=\\+"'); $this->assertEval( '\' "', "'\\' \"'"); $this->assertEval( '\\ \\\\ ', '\'\\\\ \\\\\\\\ \''); $this->assertEval( '\ \\ ', "'\\ \\\\ '"); $this->assertEval( '\x92', '\'\x92\''); $this->assertEval( "\x92", '"\x92"'); $this->assertEval( "\x", '"\x"'); $this->assertEval( "\x1", '"\x1"'); $this->assertEval( "\x000 !", '"\x000 !"'); $this->assertEval( "\x0", '"\x0"'); $this->assertEval( "\xg", '"\xg"'); - } private function assertEval($value, $string) { $this->assertEqual( $value, XHPASTTree::newStatementFromString($string)->evalStatic(), $string); } } - - diff --git a/src/readableserializer/PhutilReadableSerializer.php b/src/readableserializer/PhutilReadableSerializer.php index c87efc5..ea37130 100644 --- a/src/readableserializer/PhutilReadableSerializer.php +++ b/src/readableserializer/PhutilReadableSerializer.php @@ -1,188 +1,189 @@ 1) { $str .= 'of size '.count($value).' starting with: '; } reset($value); // Prevent key() from giving warning message in HPHP. $str .= '{ '.self::printShort(key($value)).' => '. self::printShort(head($value)).' }'; } return $str; } else { // NOTE: Avoid phutil_utf8_shorten() here since the data may not be // UTF8 anyway, it's slow for large inputs, and it might not be loaded // yet. $limit = 1024; $str = self::printableValue($value); if (strlen($str) > $limit) { if (is_string($value)) { $str = "'".substr($str, 1, $limit)."...'"; } else { $str = substr($str, 0, $limit).'...'; } } return $str; } } /** * Dump some debug output about an object's members without the - * potential recursive explosion of verbosity that comes with ##print_r()##. + * potential recursive explosion of verbosity that comes with `print_r()`. * - * To print any number of member variables, pass null for $max_members. + * To print any number of member variables, pass null for `$max_members`. * * @param wild Any value. * @param int Maximum depth to print for nested arrays and objects. * @param int Maximum number of values to print at each level. * @return string Human-readable shallow representation of the value. * @task print */ public static function printShallow( $value, $max_depth = 2, $max_members = 25) { return self::printShallowRecursive($value, $max_depth, $max_members, 0, ''); } /* -( Internals )---------------------------------------------------------- */ /** * Implementation for @{method:printShallow}. * * @param wild Any value. * @param int Maximum depth to print for nested arrays and objects. * @param int Maximum number of values to print at each level. * @param int Current depth. * @param string Indentation string. * @return string Human-readable shallow representation of the value. * @task internal */ private static function printShallowRecursive( $value, $max_depth, $max_members, $depth, $indent) { if (!is_object($value) && !is_array($value)) { return self::addIndentation(self::printableValue($value), $indent, 1); } $ret = ''; if (is_object($value)) { $ret = get_class($value)."\nwith members "; $value = array_filter(@(array)$value); // Remove null characters that magically appear around keys for // member variables of parent classes. $transformed = array(); foreach ($value as $key => $x) { $transformed[str_replace("\0", ' ', $key)] = $x; } $value = $transformed; } if ($max_members !== null) { $value = array_slice($value, 0, $max_members, $preserve_keys = true); } $shallow = array(); if ($depth < $max_depth) { foreach ($value as $k => $v) { $shallow[$k] = self::printShallowRecursive( $v, $max_depth, $max_members, $depth + 1, ' '); } } else { foreach ($value as $k => $v) { // Extra indentation is for empty arrays, because they wrap on multiple // lines and lookup stupid without the extra indentation $shallow[$k] = self::addIndentation(self::printShort($v), $indent, 1); } } return self::addIndentation($ret.print_r($shallow, true), $indent, 1); } /** - * Adds indentation to the beginning of every line starting from $first_line. + * Adds indentation to the beginning of every line starting from + * `$first_line`. * * @param string Printed value. * @param string String to indent with. * @param int Index of first line to indent. * @return string * @task internal */ private static function addIndentation($value, $indent, $first_line) { $lines = explode("\n", $value); $out = array(); foreach ($lines as $index => $line) { $out[] = $index >= $first_line ? $indent.$line : $line; } return implode("\n", $out); } + } diff --git a/src/symbols/PhutilSymbolLoader.php b/src/symbols/PhutilSymbolLoader.php index 4a75e66..3366d7e 100644 --- a/src/symbols/PhutilSymbolLoader.php +++ b/src/symbols/PhutilSymbolLoader.php @@ -1,396 +1,393 @@ setType('class') * ->setLibrary('example') * ->selectAndLoadSymbols(); * * When you execute the loading query, it returns a dictionary of matching * symbols: * * array( * 'class$Example' => array( * 'type' => 'class', * 'name' => 'Example', * 'library' => 'libexample', * 'where' => 'examples/example.php', * ), * // ... more ... * ); * * The **library** and **where** keys show where the symbol is defined. The * **type** and **name** keys identify the symbol itself. * * NOTE: This class must not use libphutil funtions, including id() and idx(). * * @task config Configuring the Query * @task load Loading Symbols * @task internal Internals - * - * @group library */ final class PhutilSymbolLoader { private $type; private $library; private $base; private $name; private $concrete; private $pathPrefix; private $suppressLoad; /** * Select the type of symbol to load, either ##class## or ##function##. * * @param string Type of symbol to load. * @return this * @task config */ public function setType($type) { $this->type = $type; return $this; } /** * Restrict the symbol query to a specific library; only symbols from this * library will be loaded. * * @param string Library name. * @return this * @task config */ public function setLibrary($library) { // Validate the library name; this throws if the library in not loaded. $bootloader = PhutilBootloader::getInstance(); $bootloader->getLibraryRoot($library); $this->library = $library; return $this; } /** * Restrict the symbol query to a specific path prefix; only symbols defined * in files below that path will be selected. * * @param string Path relative to library root, like "apps/cheese/". * @return this * @task config */ public function setPathPrefix($path) { $this->pathPrefix = str_replace(DIRECTORY_SEPARATOR, '/', $path); return $this; } /** * Restrict the symbol query to a single symbol name, e.g. a specific class * or function name. * * @param string Symbol name. * @return this * @task config */ public function setName($name) { $this->name = $name; return $this; } /** * Restrict the symbol query to only descendants of some class. This will * strictly select descendants, the base class will not be selected. This * implies loading only classes. * * @param string Base class name. * @return this * @task config */ public function setAncestorClass($base) { $this->base = $base; return $this; } /** * Restrict the symbol query to only concrete symbols; this will filter out * abstract classes. * * NOTE: This currently causes class symbols to load, even if you run * @{method:selectSymbolsWithoutLoading}. * * @param bool True if the query should load only concrete symbols. * @return this * @task config */ public function setConcreteOnly($concrete) { $this->concrete = $concrete; return $this; } /* -( Load )--------------------------------------------------------------- */ /** * Execute the query and select matching symbols, then load them so they can * be used. * * @return dict A dictionary of matching symbols. See top-level class * documentation for details. These symbols will be loaded * and available. * @task load */ public function selectAndLoadSymbols() { $map = array(); $bootloader = PhutilBootloader::getInstance(); if ($this->library) { $libraries = array($this->library); } else { $libraries = $bootloader->getAllLibraries(); } if ($this->type) { $types = array($this->type); } else { $types = array( 'class', 'function', ); } $names = null; if ($this->base) { $names = $this->selectDescendantsOf( $bootloader->getClassTree(), $this->base); } $symbols = array(); foreach ($libraries as $library) { $map = $bootloader->getLibraryMap($library); foreach ($types as $type) { if ($type == 'interface') { $lookup_map = $map['class']; } else { $lookup_map = $map[$type]; } // As an optimization, we filter the list of candidate symbols in // several passes, applying a name-based filter first if possible since // it is highly selective and guaranteed to match at most one symbol. // This is the common case and we land here through __autoload() so it's // worthwhile to microoptimize a bit because this code path is very hot // and we save 5-10ms per page for a very moderate increase in // complexity. if ($this->name) { // If we have a name filter, just pick the matching name out if it // exists. if (isset($lookup_map[$this->name])) { $filtered_map = array( $this->name => $lookup_map[$this->name], ); } else { $filtered_map = array(); } } else if ($names !== null) { $filtered_map = array(); foreach ($names as $name => $ignored) { if (isset($lookup_map[$name])) { $filtered_map[$name] = $lookup_map[$name]; } } } else { // Otherwise, start with everything. $filtered_map = $lookup_map; } if ($this->pathPrefix) { $len = strlen($this->pathPrefix); foreach ($filtered_map as $name => $where) { if (strncmp($where, $this->pathPrefix, $len) !== 0) { unset($filtered_map[$name]); } } } foreach ($filtered_map as $name => $where) { $symbols[$type.'$'.$name] = array( 'type' => $type, 'name' => $name, 'library' => $library, 'where' => $where, ); } } } if (!$this->suppressLoad) { $caught = null; foreach ($symbols as $symbol) { try { $this->loadSymbol($symbol); } catch (Exception $ex) { $caught = $ex; } } if ($caught) { // NOTE: We try to load everything even if we fail to load something, // primarily to make it possible to remove functions from a libphutil // library without breaking library startup. throw $caught; } } if ($this->concrete) { // Remove 'abstract' classes. foreach ($symbols as $key => $symbol) { if ($symbol['type'] == 'class') { $reflection = new ReflectionClass($symbol['name']); if ($reflection->isAbstract()) { unset($symbols[$key]); } } } } return $symbols; } /** * Execute the query and select matching symbols, but do not load them. This * will perform slightly better if you are only interested in the existence * of the symbols and don't plan to use them; otherwise, use - * ##selectAndLoadSymbols()##. + * @{method:selectAndLoadSymbols}. * * @return dict A dictionary of matching symbols. See top-level class * documentation for details. * @task load */ public function selectSymbolsWithoutLoading() { $this->suppressLoad = true; $result = $this->selectAndLoadSymbols(); $this->suppressLoad = false; return $result; } /** * Select symbols matching the query and then instantiate them, returning * concrete objects. This is a convenience method which simplifies symbol * handling if you are only interested in building objects. * * If you want to do more than build objects, or want to build objects with * varying constructor arguments, use @{method:selectAndLoadSymbols} for * fine-grained control over results. * * This method implicitly restricts the query to match only concrete * classes. * * @param list List of constructor arguments. * @return map Map of class names to constructed objects. */ public function loadObjects(array $argv = array()) { $symbols = $this ->setConcreteOnly(true) ->setType('class') ->selectAndLoadSymbols(); $objects = array(); foreach ($symbols as $symbol) { $objects[$symbol['name']] = newv($symbol['name'], $argv); } return $objects; } /* -( Internals )---------------------------------------------------------- */ /** * @task internal */ private function selectDescendantsOf(array $tree, $root) { $result = array(); if (empty($tree[$root])) { // No known descendants. return array(); } foreach ($tree[$root] as $child) { $result[$child] = true; if (!empty($tree[$child])) { $result += $this->selectDescendantsOf($tree, $child); } } return $result; } /** * @task internal */ private function loadSymbol(array $symbol_spec) { - // Check if we've already loaded the symbol; bail if we have. $name = $symbol_spec['name']; $is_function = ($symbol_spec['type'] == 'function'); if ($is_function) { if (function_exists($name)) { return; } } else { if (class_exists($name, false) || interface_exists($name, false)) { return; } } $lib_name = $symbol_spec['library']; $where = $symbol_spec['where']; $bootloader = PhutilBootloader::getInstance(); $bootloader->loadLibrarySource($lib_name, $where); // Check that we successfully loaded the symbol from wherever it was // supposed to be defined. $load_failed = null; if ($is_function) { if (!function_exists($name)) { $load_failed = 'function'; } } else { if (!class_exists($name, false) && !interface_exists($name, false)) { $load_failed = 'class or interface'; } } if ($load_failed !== null) { $lib_path = phutil_get_library_root($lib_name); throw new PhutilMissingSymbolException( $name, $load_failed, "the symbol map for library '{$lib_name}' (at '{$lib_path}') claims ". "this {$load_failed} is defined in '{$where}', but loading that ". "source file did not cause the {$load_failed} to become defined."); } } } diff --git a/src/symbols/exception/PhutilMissingSymbolException.php b/src/symbols/exception/PhutilMissingSymbolException.php index c8e061e..3b8fce6 100644 --- a/src/symbols/exception/PhutilMissingSymbolException.php +++ b/src/symbols/exception/PhutilMissingSymbolException.php @@ -1,16 +1,15 @@ setMaximumGlyphs(80) * ->truncateString($long); * * Byte limits restrict the number of bytes the result may contain. They are * appropriate when you care about how much storage a string requires. * * Codepoint limits restrict the number of codepoints the result may contain. * Since codepoints may have up to 4 bytes, the resulting strings may require * have more than this many bytes. This kind of limit is appropriate when you * are using UTF-8 storage, like MySQL. * * Glyph limits restrict the display size of the string. Because a single glyph * may have an arbitrary number of combining characters, this does not impose * a storage size limit on the string: a string with only one glyph may require * an arbitrarily large number of bytes. * * You can set more than one limit; the smallest limit will be used. * * NOTE: This function makes a best effort to apply some reasonable rules but * will not work well for the full range of unicode languages. - * - * @group utf8 */ final class PhutilUTF8StringTruncator extends Phobject { private $maximumBytes; private $maximumCodepoints; private $maximumGlyphs; private $minimumLimit; private $terminator = "\xE2\x80\xA6"; private $terminatorBytes = 3; private $terminatorCodepoints = 1; private $terminatorGlyphs = 1; public function setMaximumBytes($maximum_bytes) { $this->maximumBytes = $maximum_bytes; $this->didUpdateMaxima(); return $this; } public function setMaximumCodepoints($maximum_codepoints) { $this->maximumCodepoints = $maximum_codepoints; $this->didUpdateMaxima(); return $this; } public function setMaximumGlyphs($maximum_glyphs) { $this->maximumGlyphs = $maximum_glyphs; $this->didUpdateMaxima(); return $this; } private function didUpdateMaxima() { $this->minimumLimit = INF; if ($this->maximumBytes) { $this->minimumLimit = min($this->minimumLimit, $this->maximumBytes); } if ($this->maximumCodepoints) { $this->minimumLimit = min($this->minimumLimit, $this->maximumCodepoints); } if ($this->maximumGlyphs) { $this->minimumLimit = min($this->minimumLimit, $this->maximumGlyphs); } } public function setTerminator($terminator) { $this->terminator = $terminator; $this->terminatorBytes = strlen($terminator); $this->terminatorCodepoints = count(phutil_utf8v($terminator)); $this->terminatorGlyphs = count(phutil_utf8v_combined($terminator)); return $this; } public function truncateString($string) { // First, check if the string has fewer bytes than the most restrictive // limit. Codepoints and glyphs always take up at least one byte, so we can // just return the string unmodified if we're under all of the limits. $byte_len = strlen($string); if ($byte_len <= $this->minimumLimit) { return $string; } // If we need the vector of codepoints, build it. $string_pv = null; if ($this->maximumCodepoints) { $string_pv = phutil_utf8v($string); $point_len = count($string_pv); } // We always need the combined vector, even if we're only doing byte or // codepoint truncation, because we don't want to truncate to half of a // combining character. $string_gv = phutil_utf8v_combined($string); $glyph_len = count($string_gv); // Now, check if we're still over the limits. For example, a string may // be over the raw byte limit but under the glyph limit if it contains // several multibyte characters. $too_long = false; if ($this->maximumBytes && ($byte_len > $this->maximumBytes)) { $too_long = true; } if ($this->maximumCodepoints && ($point_len > $this->maximumCodepoints)) { $too_long = true; } if ($this->maximumGlyphs && ($glyph_len > $this->maximumGlyphs)) { $too_long = true; } if (!$too_long) { return $string; } // This string is legitimately longer than at least one of the limits, so // we need to truncate it. Find the minimum cutoff point: this is the last // glyph we can possibly return while satisfying the limits and having space // for the terminator. $cutoff = $glyph_len; if ($this->maximumBytes) { if ($byte_len <= $this->maximumBytes) { $cutoff = $glyph_len; } else { $bytes = $this->terminatorBytes; for ($ii = 0; $ii < $glyph_len; $ii++) { $bytes += strlen($string_gv[$ii]); if ($bytes > $this->maximumBytes) { $cutoff = $ii; break; } } } } if ($this->maximumCodepoints) { if ($point_len <= $this->maximumCodepoints) { $cutoff = min($cutoff, $glyph_len); } else { $points = 0; for ($ii = 0; $ii < $glyph_len; $ii++) { $glyph_bytes = strlen($string_gv[$ii]); while ($points < $point_len) { $glyph_bytes -= strlen($string_pv[$points]); $points++; if ($glyph_bytes <= 0) { break; } } $points_total = $points + $this->terminatorCodepoints; if ($points_total > $this->maximumCodepoints) { $cutoff = min($cutoff, $ii); break; } } } } if ($this->maximumGlyphs) { if ($glyph_len <= $this->maximumGlyphs) { $cutoff = min($cutoff, $glyph_len); } else { $cutoff = min($cutoff, $this->maximumGlyphs - $this->terminatorGlyphs); } } // If we don't have enough characters for anything, just return the // terminator. if ($cutoff <= 0) { return $this->terminator; } // Otherwise, we're going to try to cut the string off somewhere reasonable // rather than somewhere arbitrary. // NOTE: This is not complete, and there are many other word boundary // characters and reasonable places to break words in the UTF-8 character // space. For now, this gives us reasonable behavior for latin langauges. We // don't necessarily have access to PCRE+Unicode so there isn't a great way // for us to look up character attributes. // If we encounter these, prefer to break on them instead of cutting the // string off in the middle of a word. static $break_characters = array( ' ' => true, "\n" => true, ';' => true, ':' => true, '[' => true, '(' => true, ',' => true, '-' => true, ); // If we encounter these, shorten to this character exactly without // appending the terminal. static $stop_characters = array( '.' => true, '!' => true, '?' => true, ); // Search backward in the string, looking for reasonable places to break it. $word_boundary = null; $stop_boundary = null; // If we do a word break with a terminal, we have to look beyond at least // the number of characters in the terminal. If the terminal is longer than // the required length, we'll skip this whole block and return it on its // own. // Only search backward for a while. At some point we don't get a better // result by looking through the whole string, and if this is "MMM..." or // a non-latin language without word break characters we're just wasting // time. $search = max(0, $cutoff - 256); for ($ii = min($cutoff, $glyph_len - 1); $ii >= $search; $ii--) { $c = $string_gv[$ii]; if (isset($break_characters[$c])) { $word_boundary = $ii; } else if (isset($stop_characters[$c])) { $stop_boundary = $ii + 1; break; } else { if ($word_boundary !== null) { break; } } } if ($stop_boundary !== null) { // We found a character like ".". Cut the string there, without appending // the terminal. $string_part = array_slice($string_gv, 0, $stop_boundary); return implode('', $string_part); } // If we didn't find any boundary characters or we found ONLY boundary // characters, just break at the maximum character length. if ($word_boundary === null || $word_boundary === 0) { $word_boundary = $cutoff; } $string_part = array_slice($string_gv, 0, $word_boundary); $string_part = implode('', $string_part); return $string_part.$this->terminator; } } diff --git a/src/utils/__tests__/AbstractDirectedGraphTestCase.php b/src/utils/__tests__/AbstractDirectedGraphTestCase.php index ea7edb2..de2e626 100644 --- a/src/utils/__tests__/AbstractDirectedGraphTestCase.php +++ b/src/utils/__tests__/AbstractDirectedGraphTestCase.php @@ -1,130 +1,128 @@ array(), ); $cycle = $this->findGraphCycle($graph); $this->assertEqual(null, $cycle, 'Trivial Graph'); } public function testNoncyclicGraph() { $graph = array( 'A' => array('B', 'C'), 'B' => array('D'), 'C' => array(), 'D' => array(), ); $cycle = $this->findGraphCycle($graph); $this->assertEqual(null, $cycle, 'Noncyclic Graph'); } public function testTrivialCyclicGraph() { $graph = array( 'A' => array('A'), ); $cycle = $this->findGraphCycle($graph); $this->assertEqual(array('A', 'A'), $cycle, 'Trivial Cycle'); } public function testCyclicGraph() { $graph = array( 'A' => array('B', 'C'), 'B' => array('D'), 'C' => array('E', 'F'), 'D' => array(), 'E' => array(), 'F' => array('G', 'C'), 'G' => array(), ); $cycle = $this->findGraphCycle($graph); $this->assertEqual(array('A', 'C', 'F', 'C'), $cycle, 'Cyclic Graph'); } public function testNonTreeGraph() { // This graph is non-cyclic, but C is both a child and a grandchild of A. // This is permitted. $graph = array( 'A' => array('B', 'C'), 'B' => array('C'), 'C' => array(), ); $cycle = $this->findGraphCycle($graph); $this->assertEqual(null, $cycle, 'NonTreeGraph'); } public function testEdgeLoadFailure() { $graph = array( 'A' => array('B'), ); $raised = null; try { $this->findGraphCycle($graph); } catch (Exception $ex) { $raised = $ex; } $this->assertTrue( (bool)$raised, 'Exception raised by unloadable edges.'); } public function testTopographicSortTree() { $graph = array( 'A' => array('B', 'C'), 'B' => array('D', 'E'), 'C' => array(), 'D' => array(), 'E' => array() ); $sorted = $this->getTopographicSort($graph); $this->assertEqual( array('A', 'C', 'B', 'E', 'D'), $sorted, 'Topographically sorted tree.'); $graph = array( 'A' => array('B', 'C'), 'B' => array('C'), 'C' => array('D', 'E'), 'D' => array('E'), 'E' => array() ); $sorted = $this->getTopographicSort($graph); $this->assertEqual( array('A', 'B', 'C', 'D', 'E'), $sorted, 'Topographically sorted tree with nesting.'); } private function findGraphCycle(array $graph, $seed = 'A', $search = 'A') { $detector = new TestAbstractDirectedGraph(); $detector->setTestData($graph); $detector->addNodes(array_select_keys($graph, array($seed))); $detector->loadGraph(); return $detector->detectCycles($search); } private function getTopographicSort(array $graph, $seed = 'A') { $detector = new TestAbstractDirectedGraph(); $detector->setTestData($graph); $detector->addNodes(array_select_keys($graph, array($seed))); $detector->loadGraph(); return $detector->getTopographicallySortedNodes(); } + } diff --git a/src/utils/__tests__/MFilterTestHelper.php b/src/utils/__tests__/MFilterTestHelper.php index 2668002..3bf248e 100644 --- a/src/utils/__tests__/MFilterTestHelper.php +++ b/src/utils/__tests__/MFilterTestHelper.php @@ -1,30 +1,27 @@ h = $h_value; $this->i = $i_value; $this->j = $j_value; } public function getH() { return $this->h; } public function getI() { return $this->i; } public function getJ() { return $this->j; } } diff --git a/src/utils/__tests__/PhutilArrayTestCase.php b/src/utils/__tests__/PhutilArrayTestCase.php index 49f62e9..e814046 100644 --- a/src/utils/__tests__/PhutilArrayTestCase.php +++ b/src/utils/__tests__/PhutilArrayTestCase.php @@ -1,74 +1,72 @@ assertEqual(0, $a[99]); $a[99] = 1; $this->assertEqual(1, $a[99]); $a->setDefaultValue('default'); $this->assertEqual('default', $a['key']); $this->assertEqual( array( 99 => 1, 'key' => 'default', ), $a->toArray()); $init = array( 'apple' => 'red', ); $b = new PhutilArrayWithDefaultValue($init); $this->assertEqual($init, $b->toArray()); $fruits = array( 'apple', 'cherry', 'banana', 'cherry', 'cherry', 'apple', 'banana', 'plum', 'cherry', 'cherry', ); $counts = new PhutilArrayWithDefaultValue(); foreach ($fruits as $fruit) { $counts[$fruit] += 1; } $this->assertEqual( array( 'apple' => 2, 'cherry' => 5, 'banana' => 2, 'plum' => 1, ), $counts->toArray()); $masks = array( 1, 2, 4, ); $bitmask = new PhutilArrayWithDefaultValue(); foreach ($masks as $mask) { $bitmask['value'] |= $mask; } $this->assertEqual(7, $bitmask['value']); } } diff --git a/src/utils/__tests__/PhutilBufferedIteratorTestCase.php b/src/utils/__tests__/PhutilBufferedIteratorTestCase.php index f8f1f33..2693fe9 100644 --- a/src/utils/__tests__/PhutilBufferedIteratorTestCase.php +++ b/src/utils/__tests__/PhutilBufferedIteratorTestCase.php @@ -1,27 +1,25 @@ setPageSize(3); $iterator->setExampleData($expect); $results = array(); foreach ($iterator as $key => $value) { $results[$key] = $value; } $this->assertEqual( $expect, $results); } } diff --git a/src/utils/__tests__/PhutilChunkedIteratorTestCase.php b/src/utils/__tests__/PhutilChunkedIteratorTestCase.php index 2db8198..fe18455 100644 --- a/src/utils/__tests__/PhutilChunkedIteratorTestCase.php +++ b/src/utils/__tests__/PhutilChunkedIteratorTestCase.php @@ -1,32 +1,29 @@ 1, 1 => 3), array(2 => 5), ); $iterator = new PhutilChunkedIterator( new ArrayIterator(array(1, 3, 5)), 2); $this->assertEqual( $expect, iterator_to_array($iterator)); for ($i = 0; $i < 2; $i++) { foreach ($iterator as $key => $actual) { $this->assertEqual(idx($expect, $key), $actual); } } - } } diff --git a/src/utils/__tests__/PhutilUTF8TestCase.php b/src/utils/__tests__/PhutilUTF8TestCase.php index 7dc8779..d6abfce 100644 --- a/src/utils/__tests__/PhutilUTF8TestCase.php +++ b/src/utils/__tests__/PhutilUTF8TestCase.php @@ -1,564 +1,557 @@ assertEqual($input, phutil_utf8ize($input)); } public function testUTF8izeUTF8Ignored() { $input = "\xc3\x9c \xc3\xbc \xe6\x9d\xb1!"; $this->assertEqual($input, phutil_utf8ize($input)); } public function testUTF8izeLongStringNosegfault() { // For some reason my laptop is segfaulting on long inputs inside // preg_match(). Forestall this craziness in the common case, at least. phutil_utf8ize(str_repeat('x', 1024 * 1024)); $this->assertTrue(true); } public function testUTF8izeInvalidUTF8Fixed() { $input = "\xc3 this has \xe6\x9d some invalid utf8 \xe6"; $expect = "\xEF\xBF\xBD this has \xEF\xBF\xBD\xEF\xBF\xBD some invalid utf8 ". "\xEF\xBF\xBD"; $result = phutil_utf8ize($input); $this->assertEqual($expect, $result); } public function testUTF8izeOwlIsCuteAndFerocious() { // This was once a ferocious owl when we used to use "?" as the replacement // character instead of U+FFFD, but now he is sort of not as cute or // ferocious. $input = "M(o\xEE\xFF\xFFo)M"; $expect = "M(o\xEF\xBF\xBD\xEF\xBF\xBD\xEF\xBF\xBDo)M"; $result = phutil_utf8ize($input); $this->assertEqual($expect, $result); } public function testUTF8len() { $strings = array( '' => 0, 'x' => 1, "\xEF\xBF\xBD" => 1, "x\xe6\x9d\xb1y" => 3, 'xyz' => 3, 'quack' => 5, ); foreach ($strings as $str => $expect) { $this->assertEqual($expect, phutil_utf8_strlen($str), 'Length of '.$str); } } public function testUTF8v() { $strings = array( '' => array(), 'x' => array('x'), 'quack' => array('q', 'u', 'a', 'c', 'k'), "x\xe6\x9d\xb1y" => array('x', "\xe6\x9d\xb1", 'y'), // This is a combining character. "x\xCD\xA0y" => array('x', "\xCD\xA0", 'y'), ); foreach ($strings as $str => $expect) { $this->assertEqual($expect, phutil_utf8v($str), 'Vector of '.$str); } } public function testUTF8vCodepoints() { $strings = array( '' => array(), 'x' => array(0x78), 'quack' => array(0x71, 0x75, 0x61, 0x63, 0x6B), "x\xe6\x9d\xb1y" => array(0x78, 0x6771, 0x79), "\xC2\xBB" => array(0x00BB), "\xE2\x98\x83" => array(0x2603), "\xEF\xBF\xBF" => array(0xFFFF), "\xF0\x9F\x92\xA9" => array(0x1F4A9), // This is a combining character. "x\xCD\xA0y" => array(0x78, 0x0360, 0x79), ); foreach ($strings as $str => $expect) { $this->assertEqual( $expect, phutil_utf8v_codepoints($str), 'Codepoint Vector of '.$str); } } public function testUTF8ConsoleStrlen() { $strings = array( '' => 0, "\0" => 0, 'x' => 1, // Double-width chinese character. "\xe6\x9d\xb1" => 2, // Combining character. "x\xCD\xA0y" => 2, // Combining plus double-width. "\xe6\x9d\xb1\xCD\xA0y" => 3, // Colors and formatting. "\x1B[1mx\x1B[m" => 1, "\x1B[1m\x1B[31mx\x1B[m" => 1, ); foreach ($strings as $str => $expect) { $this->assertEqual( $expect, phutil_utf8_console_strlen($str), 'Console Length of '.$str); } } public function testUTF8shorten() { $inputs = array( array('1erp derp derp', 9, '', '1erp derp'), array('2erp derp derp', 12, '...', '2erp derp...'), array('derpxderpxderp', 12, '...', 'derpxderp...'), array("derp\xE2\x99\x83derpderp", 12, '...', "derp\xE2\x99\x83derp..."), array('', 12, '...', ''), array('derp', 12, '...', 'derp'), array('11111', 5, '2222', '11111'), array('111111', 5, '2222', '12222'), array('D1rp. Derp derp.', 7, '...', 'D1rp.'), // "D2rp." is a better shortening of this, but it's dramatically more // complicated to implement with the newer byte/glyph/character // shortening code. array('D2rp. Derp derp.', 5, '...', 'D2...'), array('D3rp. Derp derp.', 4, '...', 'D...'), array('D4rp. Derp derp.', 14, '...', 'D4rp. Derp...'), array('D5rpderp, derp derp', 16, '...', 'D5rpderp...'), array('D6rpderp, derp derp', 17, '...', 'D6rpderp, derp...'), // Strings with combining characters. array("Gr\xCD\xA0mpyCatSmiles", 8, '...', "Gr\xCD\xA0mpy..."), array("X\xCD\xA0\xCD\xA0\xCD\xA0Y", 1, '', "X\xCD\xA0\xCD\xA0\xCD\xA0"), // This behavior is maybe a little bad, but it seems mostly reasonable, // at least for latin languages. array('Derp, supercalafragalisticexpialadoshus', 30, '...', 'Derp...'), // If a string has only word-break characters in it, we should just cut // it, not produce only the terminal. array('((((((((((', 8, '...', '(((((...'), // Terminal is longer than requested input. array('derp', 3, 'quack', 'quack'), ); foreach ($inputs as $input) { list($string, $length, $terminal, $expect) = $input; $result = id(new PhutilUTF8StringTruncator()) ->setMaximumGlyphs($length) ->setTerminator($terminal) ->truncateString($string); $this->assertEqual($expect, $result, 'Shortening of '.$string); } - } public function testUTF8StringTruncator() { $cases = array( array( "o\xCD\xA0o\xCD\xA0o\xCD\xA0o\xCD\xA0o\xCD\xA0", 6, "o\xCD\xA0!", 6, "o\xCD\xA0o\xCD\xA0!", 6, "o\xCD\xA0o\xCD\xA0o\xCD\xA0o\xCD\xA0o\xCD\xA0", ), array( "X\xCD\xA0\xCD\xA0\xCD\xA0Y", 6, '!', 6, "X\xCD\xA0\xCD\xA0\xCD\xA0Y", 6, "X\xCD\xA0\xCD\xA0\xCD\xA0Y", ), array( "X\xCD\xA0\xCD\xA0\xCD\xA0YZ", 6, '!', 5, "X\xCD\xA0\xCD\xA0\xCD\xA0!", 2, "X\xCD\xA0\xCD\xA0\xCD\xA0!", ), array( "\xE2\x98\x83\xE2\x98\x83\xE2\x98\x83\xE2\x98\x83", 4, "\xE2\x98\x83!", 3, "\xE2\x98\x83\xE2\x98\x83!", 3, "\xE2\x98\x83\xE2\x98\x83!", ), ); foreach ($cases as $case) { list($input, $b_len, $b_out, $p_len, $p_out, $g_len, $g_out) = $case; $result = id(new PhutilUTF8StringTruncator()) ->setMaximumBytes($b_len) ->setTerminator('!') ->truncateString($input); $this->assertEqual($b_out, $result, 'byte-short of '.$input); $result = id(new PhutilUTF8StringTruncator()) ->setMaximumCodepoints($p_len) ->setTerminator('!') ->truncateString($input); $this->assertEqual($p_out, $result, 'codepoint-short of '.$input); $result = id(new PhutilUTF8StringTruncator()) ->setMaximumGlyphs($g_len) ->setTerminator('!') ->truncateString($input); $this->assertEqual($g_out, $result, 'glyph-short of '.$input); } } public function testUTF8Wrap() { $inputs = array( array( 'aaaaaaa', 3, array( 'aaa', 'aaa', 'a', )), array( 'aaaaaaa', 3, array( 'aaa', 'aaa', 'a', )), array( 'aa&aaaa', 3, array( 'aa&', 'aaa', 'a', )), array( "aa\xe6\x9d\xb1aaaa", 3, array( "aa\xe6\x9d\xb1", 'aaa', 'a', )), array( '', 80, array( )), array( 'a', 80, array( 'a', )), ); foreach ($inputs as $input) { list($string, $width, $expect) = $input; $this->assertEqual( $expect, phutil_utf8_hard_wrap_html($string, $width), "Wrapping of '".$string."'"); } } public function testUTF8NonHTMLWrap() { $inputs = array( array( 'aaaaaaa', 3, array( 'aaa', 'aaa', 'a', )), array( 'abracadabra!', 4, array( 'abra', 'cada', 'bra!', )), array( '', 10, array( )), array( 'a', 20, array( 'a', )), array( "aa\xe6\x9d\xb1aaaa", 3, array( "aa\xe6\x9d\xb1", 'aaa', 'a', )), array( "mmm\nmmm\nmmmm", 3, array( 'mmm', 'mmm', 'mmm', 'm', )), ); foreach ($inputs as $input) { list($string, $width, $expect) = $input; $this->assertEqual( $expect, phutil_utf8_hard_wrap($string, $width), "Wrapping of '".$string."'"); } } - public function testUTF8ConvertParams() { $caught = null; try { phutil_utf8_convert('', 'utf8', ''); } catch (Exception $ex) { $caught = $ex; } $this->assertTrue((bool)$caught, 'Requires source encoding.'); $caught = null; try { phutil_utf8_convert('', '', 'utf8'); } catch (Exception $ex) { $caught = $ex; } $this->assertTrue((bool)$caught, 'Requires target encoding.'); } public function testUTF8Convert() { if (!function_exists('mb_convert_encoding')) { $this->assertSkipped('Requires mbstring extension.'); } // "[ae]gis se[n]or [(c)] 1970 [+/-] 1 [degree]" $input = "\xE6gis SE\xD1OR \xA9 1970 \xB11\xB0"; $expect = "\xC3\xA6gis SE\xC3\x91OR \xC2\xA9 1970 \xC2\xB11\xC2\xB0"; $output = phutil_utf8_convert($input, 'UTF-8', 'ISO-8859-1'); $this->assertEqual($expect, $output, 'Conversion from ISO-8859-1.'); - $caught = null; try { phutil_utf8_convert('xyz', 'moon language', 'UTF-8'); } catch (Exception $ex) { $caught = $ex; } $this->assertTrue((bool)$caught, 'Conversion with bogus encoding.'); } public function testUTF8ucwords() { $tests = array( '' => '', 'x' => 'X', 'X' => 'X', 'five short graybles' => 'Five Short Graybles', 'xXxSNiPeRKiLLeRxXx' => 'XXxSNiPeRKiLLeRxXx', ); foreach ($tests as $input => $expect) { $this->assertEqual( $expect, phutil_utf8_ucwords($input), 'phutil_utf8_ucwords("'.$input.'")'); } } - public function testUTF8strtolower() { $tests = array( '' => '', 'a' => 'a', 'A' => 'a', '!' => '!', 'OMG!~ LOLolol ROFLwaffle11~' => 'omg!~ lololol roflwaffle11~', "\xE2\x98\x83" => "\xE2\x98\x83", ); foreach ($tests as $input => $expect) { $this->assertEqual( $expect, phutil_utf8_strtolower($input), 'phutil_utf8_strtolower("'.$input.'")'); } } public function testUTF8strtoupper() { $tests = array( '' => '', 'a' => 'A', 'A' => 'A', '!' => '!', 'Cats have 9 lives.' => 'CATS HAVE 9 LIVES.', "\xE2\x98\x83" => "\xE2\x98\x83", ); foreach ($tests as $input => $expect) { $this->assertEqual( $expect, phutil_utf8_strtoupper($input), 'phutil_utf8_strtoupper("'.$input.'")'); } } public function testUTF8IsCombiningCharacter() { $character = "\xCD\xA0"; $this->assertEqual( true, phutil_utf8_is_combining_character($character)); $character = 'a'; $this->assertEqual( false, phutil_utf8_is_combining_character($character)); } public function testUTF8vCombined() { // Empty string. $string = ''; $this->assertEqual(array(), phutil_utf8v_combined($string)); // Single character. $string = 'x'; $this->assertEqual(array('x'), phutil_utf8v_combined($string)); // No combining characters. $string = 'cat'; $this->assertEqual(array('c', 'a', 't'), phutil_utf8v_combined($string)); // String with a combining character in the middle. $string = "ca\xCD\xA0t"; $this->assertEqual( array('c', "a\xCD\xA0", 't'), phutil_utf8v_combined($string)); // String starting with a combined character. $string = "c\xCD\xA0at"; $this->assertEqual( array("c\xCD\xA0", 'a', 't'), phutil_utf8v_combined($string)); // String with trailing combining character. $string = "cat\xCD\xA0"; $this->assertEqual( array('c', 'a', "t\xCD\xA0"), phutil_utf8v_combined($string)); // String with muliple combined characters. $string = "c\xCD\xA0a\xCD\xA0t\xCD\xA0"; $this->assertEqual( array("c\xCD\xA0", "a\xCD\xA0", "t\xCD\xA0"), phutil_utf8v_combined($string)); // String with multiple combining characters. $string = "ca\xCD\xA0\xCD\xA0t"; $this->assertEqual( array('c', "a\xCD\xA0\xCD\xA0", 't'), phutil_utf8v_combined($string)); // String beginning with a combining character. $string = "\xCD\xA0\xCD\xA0c"; $this->assertEqual( array(" \xCD\xA0\xCD\xA0", 'c'), phutil_utf8v_combined($string)); - } public function testUTF8BMPSegfaults() { // This test case fails by segfaulting, or passes by not segfaulting. See // the function implementation for details. $input = str_repeat("\xEF\xBF\xBF", 1024 * 32); phutil_is_utf8_with_only_bmp_characters($input); $this->assertTrue(true); } public function testUTF8BMP() { $tests = array( '' => array(true, true, 'empty string'), 'a' => array(true, true, 'a'), "a\xCD\xA0\xCD\xA0" => array(true, true, 'a with combining'), "\xE2\x98\x83" => array(true, true, 'snowman'), // This is the last character in BMP, U+FFFF. "\xEF\xBF\xBF" => array(true, true, 'U+FFFF'), // This isn't valid. "\xEF\xBF\xC0" => array(false, false, 'Invalid, byte range.'), // This is an invalid nonminimal representation. "\xF0\x81\x80\x80" => array(false, false, 'Nonminimal 4-byte characer.'), // This is the first character above BMP, U+10000. "\xF0\x90\x80\x80" => array(true, false, 'U+10000'), "\xF0\x9D\x84\x9E" => array(true, false, 'gclef'), "musical \xF0\x9D\x84\x9E g-clef" => array(true, false, 'gclef text'), "\xF0\x9D\x84" => array(false, false, 'Invalid, truncated.'), "\xE0\x80\x80" => array(false, false, 'Nonminimal 3-byte character.'), // Partial BMP characters. "\xCD" => array(false, false, 'Partial 2-byte character.'), "\xE0\xA0" => array(false, false, 'Partial BMP 0xE0 character.'), "\xE2\x98" => array(false, false, 'Partial BMP cahracter.'), ); foreach ($tests as $input => $test) { list($expect_utf8, $expect_bmp, $test_name) = $test; // Depending on what's installed on the system, this may use an // extension. $this->assertEqual( $expect_utf8, phutil_is_utf8($input), pht('is_utf(%s)', $test_name)); // Also test this against the pure PHP implementation, explicitly. $this->assertEqual( $expect_utf8, phutil_is_utf8_slowly($input), pht('is_utf_slowly(%s)', $test_name)); $this->assertEqual( $expect_bmp, phutil_is_utf8_with_only_bmp_characters($input), pht('is_utf_bmp(%s)', $test_name)); } } } diff --git a/src/utils/__tests__/PhutilUtilsTestCase.php b/src/utils/__tests__/PhutilUtilsTestCase.php index f937c58..a32223e 100644 --- a/src/utils/__tests__/PhutilUtilsTestCase.php +++ b/src/utils/__tests__/PhutilUtilsTestCase.php @@ -1,578 +1,568 @@ assertTrue($caught instanceof InvalidArgumentException); } public function testMFilterWithEmptyValueFiltered() { $a = new MFilterTestHelper('o', 'p', 'q'); $b = new MFilterTestHelper('o', '', 'q'); $c = new MFilterTestHelper('o', 'p', 'q'); $list = array( 'a' => $a, 'b' => $b, 'c' => $c, ); $actual = mfilter($list, 'getI'); $expected = array( 'a' => $a, 'c' => $c, ); $this->assertEqual($expected, $actual); } - public function testMFilterWithEmptyValueNegateFiltered() { $a = new MFilterTestHelper('o', 'p', 'q'); $b = new MFilterTestHelper('o', '', 'q'); $c = new MFilterTestHelper('o', 'p', 'q'); $list = array( 'a' => $a, 'b' => $b, 'c' => $c, ); $actual = mfilter($list, 'getI', true); $expected = array( 'b' => $b, ); $this->assertEqual($expected, $actual); } - public function testIFilterInvalidIndexThrowException() { $caught = null; try { ifilter(array(), null); } catch (InvalidArgumentException $ex) { $caught = $ex; } $this->assertTrue($caught instanceof InvalidArgumentException); } - public function testIFilterWithEmptyValueFiltered() { $list = array( - 'a' => array('h' => 'o', 'i' => 'p', 'j' => 'q',), - 'b' => array('h' => 'o', 'i' => '', 'j' => 'q',), - 'c' => array('h' => 'o', 'i' => 'p', 'j' => 'q',), - 'd' => array('h' => 'o', 'i' => 0, 'j' => 'q',), - 'e' => array('h' => 'o', 'i' => null, 'j' => 'q',), - 'f' => array('h' => 'o', 'i' => false, 'j' => 'q',), + 'a' => array('h' => 'o', 'i' => 'p', 'j' => 'q'), + 'b' => array('h' => 'o', 'i' => '', 'j' => 'q'), + 'c' => array('h' => 'o', 'i' => 'p', 'j' => 'q'), + 'd' => array('h' => 'o', 'i' => 0, 'j' => 'q'), + 'e' => array('h' => 'o', 'i' => null, 'j' => 'q'), + 'f' => array('h' => 'o', 'i' => false, 'j' => 'q'), ); $actual = ifilter($list, 'i'); $expected = array( - 'a' => array('h' => 'o', 'i' => 'p', 'j' => 'q',), - 'c' => array('h' => 'o', 'i' => 'p', 'j' => 'q',), + 'a' => array('h' => 'o', 'i' => 'p', 'j' => 'q'), + 'c' => array('h' => 'o', 'i' => 'p', 'j' => 'q'), ); $this->assertEqual($expected, $actual); } - public function testIFilterIndexNotExistsAllFiltered() { $list = array( - 'a' => array('h' => 'o', 'i' => 'p', 'j' => 'q',), - 'b' => array('h' => 'o', 'i' => '', 'j' => 'q',), + 'a' => array('h' => 'o', 'i' => 'p', 'j' => 'q'), + 'b' => array('h' => 'o', 'i' => '', 'j' => 'q'), ); $actual = ifilter($list, 'NoneExisting'); $expected = array(); $this->assertEqual($expected, $actual); } public function testIFilterWithEmptyValueNegateFiltered() { $list = array( - 'a' => array('h' => 'o', 'i' => 'p', 'j' => 'q',), - 'b' => array('h' => 'o', 'i' => '', 'j' => 'q',), - 'c' => array('h' => 'o', 'i' => 'p', 'j' => 'q',), - 'd' => array('h' => 'o', 'i' => 0, 'j' => 'q',), - 'e' => array('h' => 'o', 'i' => null, 'j' => 'q',), - 'f' => array('h' => 'o', 'i' => false, 'j' => 'q',), + 'a' => array('h' => 'o', 'i' => 'p', 'j' => 'q'), + 'b' => array('h' => 'o', 'i' => '', 'j' => 'q'), + 'c' => array('h' => 'o', 'i' => 'p', 'j' => 'q'), + 'd' => array('h' => 'o', 'i' => 0, 'j' => 'q'), + 'e' => array('h' => 'o', 'i' => null, 'j' => 'q'), + 'f' => array('h' => 'o', 'i' => false, 'j' => 'q'), ); $actual = ifilter($list, 'i', true); $expected = array( - 'b' => array('h' => 'o', 'i' => '', 'j' => 'q',), - 'd' => array('h' => 'o', 'i' => 0, 'j' => 'q',), - 'e' => array('h' => 'o', 'i' => null, 'j' => 'q',), - 'f' => array('h' => 'o', 'i' => false, 'j' => 'q',), + 'b' => array('h' => 'o', 'i' => '', 'j' => 'q'), + 'd' => array('h' => 'o', 'i' => 0, 'j' => 'q'), + 'e' => array('h' => 'o', 'i' => null, 'j' => 'q'), + 'f' => array('h' => 'o', 'i' => false, 'j' => 'q'), ); $this->assertEqual($expected, $actual); } - public function testIFilterIndexNotExistsNotFiltered() { $list = array( - 'a' => array('h' => 'o', 'i' => 'p', 'j' => 'q',), - 'b' => array('h' => 'o', 'i' => '', 'j' => 'q',), + 'a' => array('h' => 'o', 'i' => 'p', 'j' => 'q'), + 'b' => array('h' => 'o', 'i' => '', 'j' => 'q'), ); $actual = ifilter($list, 'NoneExisting', true); $expected = array( - 'a' => array('h' => 'o', 'i' => 'p', 'j' => 'q',), - 'b' => array('h' => 'o', 'i' => '', 'j' => 'q',), + 'a' => array('h' => 'o', 'i' => 'p', 'j' => 'q'), + 'b' => array('h' => 'o', 'i' => '', 'j' => 'q'), ); $this->assertEqual($expected, $actual); } public function testmergevMergingBasicallyWorksCorrectly() { $this->assertEqual( array(), array_mergev( array( // ))); $this->assertEqual( array(), array_mergev( array( array(), array(), array(), ))); $this->assertEqual( array(1, 2, 3, 4, 5), array_mergev( array( array(1, 2), array(3), array(), array(4, 5), ))); $not_valid = array( 'scalar' => array(1), 'array plus scalar' => array(array(), 1), 'null' => array(null), ); foreach ($not_valid as $key => $invalid_input) { $caught = null; try { array_mergev($invalid_input); } catch (InvalidArgumentException $ex) { $caught = $ex; } $this->assertTrue( ($caught instanceof InvalidArgumentException), "array_mergev() invalid on {$key}"); } } public function testNonempty() { $this->assertEqual( 'zebra', nonempty(false, null, 0, '', array(), 'zebra')); $this->assertEqual( null, nonempty()); $this->assertEqual( false, nonempty(null, false)); $this->assertEqual( null, nonempty(false, null)); } protected function tryAssertInstancesOfArray($input) { assert_instances_of($input, 'array'); } protected function tryAssertInstancesOfStdClass($input) { assert_instances_of($input, 'stdClass'); } public function testAssertInstancesOf() { $object = new stdClass(); $inputs = array( 'empty' => array(), 'stdClass' => array($object, $object), 'PhutilUtilsTestCase' => array($object, $this), 'array' => array(array(), array()), 'integer' => array($object, 1), ); $this->tryTestCases( $inputs, array(true, true, false, false, false), array($this, 'tryAssertInstancesOfStdClass'), 'InvalidArgumentException'); $this->tryTestCases( $inputs, array(true, false, false, true, false), array($this, 'tryAssertInstancesOfArray'), 'InvalidArgumentException'); } public function testAssertStringLike () { $this->assertEqual( null, assert_stringlike(null)); $this->assertEqual( null, assert_stringlike('')); $this->assertEqual( null, assert_stringlike('Hello World')); $this->assertEqual( null, assert_stringlike(1)); $this->assertEqual( null, assert_stringlike(9.9999)); $this->assertEqual( null, assert_stringlike(true)); $obj = new Exception('.'); $this->assertEqual( null, assert_stringlike($obj)); $obj = (object)array(); try { assert_stringlike($obj); } catch (InvalidArgumentException $ex) { $caught = $ex; } $this->assertTrue($caught instanceof InvalidArgumentException); $array = array( - 'foo' => 'bar', - 'bar' => 'foo', - ); + 'foo' => 'bar', + 'bar' => 'foo', + ); try { assert_stringlike($array); } catch (InvalidArgumentException $ex) { $caught = $ex; } $this->assertTrue($caught instanceof InvalidArgumentException); $tmp = new TempFile(); $resource = fopen($tmp, 'r'); try { assert_stringlike($resource); } catch (InvalidArgumentException $ex) { $caught = $ex; } fclose($resource); $this->assertTrue($caught instanceof InvalidArgumentException); - } public function testCoalesce() { $this->assertEqual( 'zebra', coalesce(null, 'zebra')); $this->assertEqual( null, coalesce()); $this->assertEqual( false, coalesce(false, null)); $this->assertEqual( false, coalesce(null, false)); } public function testHeadLast() { $this->assertEqual( 'a', head(explode('.', 'a.b'))); $this->assertEqual( 'b', last(explode('.', 'a.b'))); } public function testHeadKeyLastKey() { $this->assertEqual( 'a', head_key(array('a' => 0, 'b' => 1))); $this->assertEqual( 'b', last_key(array('a' => 0, 'b' => 1))); $this->assertEqual(null, head_key(array())); $this->assertEqual(null, last_key(array())); } public function testID() { $this->assertEqual(true, id(true)); $this->assertEqual(false, id(false)); } public function testIdx() { $array = array( 'present' => true, 'null' => null, ); $this->assertEqual(true, idx($array, 'present')); $this->assertEqual(true, idx($array, 'present', false)); $this->assertEqual(null, idx($array, 'null')); $this->assertEqual(null, idx($array, 'null', false)); $this->assertEqual(null, idx($array, 'missing')); $this->assertEqual(false, idx($array, 'missing', false)); } public function testSplitLines() { $retain_cases = array( '' => array(''), 'x' => array('x'), "x\n" => array("x\n"), "\n" => array("\n"), "\n\n\n" => array("\n", "\n", "\n"), "\r\n" => array("\r\n"), "x\r\ny\n" => array("x\r\n", "y\n"), "x\ry\nz\r\n" => array("x\ry\n", "z\r\n"), "x\ry\nz\r\n\n" => array("x\ry\n", "z\r\n", "\n"), ); foreach ($retain_cases as $input => $expect) { $this->assertEqual( $expect, phutil_split_lines($input, $retain_endings = true), "(Retained) ".addcslashes($input, "\r\n\\")); } $discard_cases = array( '' => array(''), 'x' => array('x'), "x\n" => array('x'), "\n" => array(''), "\n\n\n" => array('', '', ''), "\r\n" => array(''), "x\r\ny\n" => array('x', 'y'), "x\ry\nz\r\n" => array("x\ry", 'z'), "x\ry\nz\r\n\n" => array("x\ry", 'z', ''), ); foreach ($discard_cases as $input => $expect) { $this->assertEqual( $expect, phutil_split_lines($input, $retain_endings = false), "(Discarded) ".addcslashes($input, "\r\n\\")); } - } public function testArrayFuse() { $this->assertEqual(array(), array_fuse(array())); $this->assertEqual(array('x' => 'x'), array_fuse(array('x'))); } public function testArrayInterleave() { $this->assertEqual(array(), array_interleave('x', array())); $this->assertEqual(array('y'), array_interleave('x', array('y'))); $this->assertEqual( array('y', 'x', 'z'), array_interleave('x', array('y', 'z'))); $this->assertEqual( array('y', 'x', 'z'), array_interleave( 'x', array( 'kangaroo' => 'y', 'marmoset' => 'z', ))); $obj1 = (object)array(); $obj2 = (object)array(); $this->assertEqual( array($obj1, $obj2, $obj1, $obj2, $obj1), array_interleave( $obj2, array( $obj1, $obj1, $obj1, ))); $implode_tests = array( '' => array(1, 2, 3), 'x' => array(1, 2, 3), 'y' => array(), 'z' => array(1), ); foreach ($implode_tests as $x => $y) { $this->assertEqual( implode('', array_interleave($x, $y)), implode($x, $y)); } } public function testLoggableString() { $this->assertEqual( '', phutil_loggable_string('')); $this->assertEqual( "a\\nb", phutil_loggable_string("a\nb")); $this->assertEqual( "a\\x01b", phutil_loggable_string("a\x01b")); $this->assertEqual( "a\\x1Fb", phutil_loggable_string("a\x1Fb")); } public function testPhutilUnits() { $cases = array( '0 seconds in seconds' => 0, '1 second in seconds' => 1, '2 seconds in seconds' => 2, '100 seconds in seconds' => 100, '2 minutes in seconds' => 120, '1 hour in seconds' => 3600, '1 day in seconds' => 86400, '3 days in seconds' => 259200, ); foreach ($cases as $input => $expect) { $this->assertEqual( $expect, phutil_units($input), 'phutil_units("'.$input.'")'); } $bad_cases = array( 'quack', '3 years in seconds', '1 minute in milliseconds', '1 day in days', '-1 minutes in seconds', '1.5 minutes in seconds', ); foreach ($bad_cases as $input) { $caught = null; try { phutil_units($input); } catch (InvalidArgumentException $ex) { $caught = $ex; } $this->assertTrue( ($caught instanceof InvalidArgumentException), 'phutil_units("'.$input.'")'); } } public function testPhutilJSONDecode() { $valid_cases = array( '{}' => array(), '[]' => array(), '[1, 2]' => array(1, 2), '{"a":"b"}' => array('a' => 'b'), ); foreach ($valid_cases as $input => $expect) { $result = phutil_json_decode($input); $this->assertEqual($expect, $result, 'phutil_json_decode('.$input.')'); } $invalid_cases = array( '', '"a"', '{,}', 'null', '"null"', ); foreach ($invalid_cases as $input) { $caught = null; try { phutil_json_decode($input); } catch (Exception $ex) { $caught = $ex; } $this->assertTrue($caught instanceof PhutilJSONParserException); } } public function testCensorCredentials() { $cases = array( '' => '', 'abc' => 'abc', // NOTE: We're liberal about censoring here, since we can't tell // if this is a truncated password at the end of an input string // or a domain name. The version with a "/" isn't censored. 'http://example.com' => 'http://xxxxx', 'http://example.com/' => 'http://example.com/', 'http://username@example.com' => 'http://xxxxx@example.com', 'http://user:pass@example.com' => 'http://xxxxx@example.com', // We censor these because they might be truncated credentials at the end // of the string. 'http://user' => 'http://xxxxx', "http://user\n" => "http://xxxxx\n", 'svn+ssh://user:pass@example.com' => 'svn+ssh://xxxxx@example.com', ); foreach ($cases as $input => $expect) { $this->assertEqual( $expect, phutil_censor_credentials($input), pht('Credential censoring for: %s', $input)); } } - } diff --git a/src/utils/__tests__/TestAbstractDirectedGraph.php b/src/utils/__tests__/TestAbstractDirectedGraph.php index 94fbc3a..b095da6 100644 --- a/src/utils/__tests__/TestAbstractDirectedGraph.php +++ b/src/utils/__tests__/TestAbstractDirectedGraph.php @@ -1,19 +1,16 @@ nodes = $nodes; return $this; } protected function loadEdges(array $nodes) { return array_select_keys($this->nodes, $nodes); } } diff --git a/src/utils/utf8.php b/src/utils/utf8.php index 71ddb05..75112cf 100644 --- a/src/utils/utf8.php +++ b/src/utils/utf8.php @@ -1,780 +1,753 @@ for some discussion. Since the // input limit is extremely low (less than 50KB on my system), do this check // very very slowly in PHP instead. See also T5316. $len = strlen($string); for ($ii = 0; $ii < $len; $ii++) { $chr = ord($string[$ii]); if ($chr >= 0x01 && $chr <= 0x7F) { continue; } else if ($chr >= 0xC2 && $chr <= 0xDF) { ++$ii; if ($ii >= $len) { return false; } $chr = ord($string[$ii]); if ($chr >= 0x80 && $chr <= 0xBF) { continue; } return false; } else if ($chr > 0xE0 && $chr <= 0xEF) { ++$ii; if ($ii >= $len) { return false; } $chr = ord($string[$ii]); if ($chr >= 0x80 && $chr <= 0xBF) { ++$ii; if ($ii >= $len) { return false; } $chr = ord($string[$ii]); if ($chr >= 0x80 && $chr <= 0xBF) { continue; } } return false; } else if ($chr == 0xE0) { ++$ii; if ($ii >= $len) { return false; } $chr = ord($string[$ii]); // NOTE: This range starts at 0xA0, not 0x80. The values 0x80-0xA0 are // "valid", but not minimal representations, and MySQL rejects them. We're // special casing this part of the range. if ($chr >= 0xA0 && $chr <= 0xBF) { ++$ii; if ($ii >= $len) { return false; } $chr = ord($string[$ii]); if ($chr >= 0x80 && $chr <= 0xBF) { continue; } } return false; } else if (!$only_bmp) { if ($chr > 0xF0 && $chr <= 0xF4) { ++$ii; if ($ii >= $len) { return false; } $chr = ord($string[$ii]); if ($chr >= 0x80 && $chr <= 0xBF) { ++$ii; if ($ii >= $len) { return false; } $chr = ord($string[$ii]); if ($chr >= 0x80 && $chr <= 0xBF) { ++$ii; if ($ii >= $len) { return false; } $chr = ord($string[$ii]); if ($chr >= 0x80 && $chr <= 0xBF) { continue; } } } } else if ($chr == 0xF0) { ++$ii; if ($ii >= $len) { return false; } $chr = ord($string[$ii]); // NOTE: As above, this range starts at 0x90, not 0x80. The values // 0x80-0x90 are not minimal representations. if ($chr >= 0x90 && $chr <= 0xBF) { ++$ii; if ($ii >= $len) { return false; } $chr = ord($string[$ii]); if ($chr >= 0x80 && $chr <= 0xBF) { ++$ii; if ($ii >= $len) { return false; } $chr = ord($string[$ii]); if ($chr >= 0x80 && $chr <= 0xBF) { continue; } } } } } return false; } return true; } /** * Find the character length of a UTF-8 string. * * @param string A valid utf-8 string. * @return int The character length of the string. - * @group utf8 */ function phutil_utf8_strlen($string) { return strlen(utf8_decode($string)); } /** * Find the console display length of a UTF-8 string. This may differ from the * character length of the string if it contains double-width characters, like * many Chinese characters. * * This method is based on a C implementation here, which is based on the IEEE * standards. The source has more discussion and addresses more considerations * than this implementation does. * * http://www.cl.cam.ac.uk/~mgk25/ucs/wcwidth.c * * NOTE: We currently assume width 1 for East-Asian ambiguous characters. * * NOTE: This function is VERY slow. * * @param string A valid UTF-8 string. * @return int The console display length of the string. - * @group utf8 */ function phutil_utf8_console_strlen($string) { - // Formatting and colors don't contribute any width in the console. $string = preg_replace("/\x1B\[\d*m/", '', $string); // In the common case of an ASCII string, just return the string length. if (preg_match('/^[\x01-\x7F]*\z/', $string)) { return strlen($string); } $len = 0; // NOTE: To deal with combining characters, we're splitting the string into // glyphs first (characters with combiners) and then counting just the width // of the first character in each glyph. $display_glyphs = phutil_utf8v_combined($string); foreach ($display_glyphs as $display_glyph) { $glyph_codepoints = phutil_utf8v_codepoints($display_glyph); foreach ($glyph_codepoints as $c) { if ($c == 0) { break; } $len += 1 + ($c >= 0x1100 && ($c <= 0x115f || /* Hangul Jamo init. consonants */ $c == 0x2329 || $c == 0x232a || ($c >= 0x2e80 && $c <= 0xa4cf && $c != 0x303f) || /* CJK ... Yi */ ($c >= 0xac00 && $c <= 0xd7a3) || /* Hangul Syllables */ ($c >= 0xf900 && $c <= 0xfaff) || /* CJK Compatibility Ideographs */ ($c >= 0xfe10 && $c <= 0xfe19) || /* Vertical forms */ ($c >= 0xfe30 && $c <= 0xfe6f) || /* CJK Compatibility Forms */ ($c >= 0xff00 && $c <= 0xff60) || /* Fullwidth Forms */ ($c >= 0xffe0 && $c <= 0xffe6) || ($c >= 0x20000 && $c <= 0x2fffd) || ($c >= 0x30000 && $c <= 0x3fffd))); break; } } return $len; } /** * Split a UTF-8 string into an array of characters. Combining characters are * also split. * * @param string A valid utf-8 string. * @return list A list of characters in the string. - * @group utf8 */ function phutil_utf8v($string) { $res = array(); $len = strlen($string); $ii = 0; while ($ii < $len) { $byte = $string[$ii]; if ($byte <= "\x7F") { $res[] = $byte; $ii += 1; continue; } else if ($byte < "\xC0") { throw new Exception('Invalid UTF-8 string passed to phutil_utf8v().'); } else if ($byte <= "\xDF") { $seq_len = 2; } else if ($byte <= "\xEF") { $seq_len = 3; } else if ($byte <= "\xF7") { $seq_len = 4; } else if ($byte <= "\xFB") { $seq_len = 5; } else if ($byte <= "\xFD") { $seq_len = 6; } else { throw new Exception('Invalid UTF-8 string passed to phutil_utf8v().'); } if ($ii + $seq_len > $len) { throw new Exception('Invalid UTF-8 string passed to phutil_utf8v().'); } for ($jj = 1; $jj < $seq_len; ++$jj) { if ($string[$ii + $jj] >= "\xC0") { throw new Exception('Invalid UTF-8 string passed to phutil_utf8v().'); } } $res[] = substr($string, $ii, $seq_len); $ii += $seq_len; } return $res; } /** * Split a UTF-8 string into an array of codepoints (as integers). * * @param string A valid UTF-8 string. * @return list A list of codepoints, as integers. - * @group utf8 */ function phutil_utf8v_codepoints($string) { $str_v = phutil_utf8v($string); foreach ($str_v as $key => $char) { $c = ord($char[0]); $v = 0; if (($c & 0x80) == 0) { $v = $c; } else if (($c & 0xE0) == 0xC0) { $v = (($c & 0x1F) << 6) + ((ord($char[1]) & 0x3F)); } else if (($c & 0xF0) == 0xE0) { $v = (($c & 0x0F) << 12) + ((ord($char[1]) & 0x3f) << 6) + ((ord($char[2]) & 0x3f)); } else if (($c & 0xF8) == 0xF0) { $v = (($c & 0x07) << 18) + ((ord($char[1]) & 0x3F) << 12) + ((ord($char[2]) & 0x3F) << 6) + ((ord($char[3]) & 0x3f)); } else if (($c & 0xFC) == 0xF8) { $v = (($c & 0x03) << 24) + ((ord($char[1]) & 0x3F) << 18) + ((ord($char[2]) & 0x3F) << 12) + ((ord($char[3]) & 0x3f) << 6) + ((ord($char[4]) & 0x3f)); } else if (($c & 0xFE) == 0xFC) { $v = (($c & 0x01) << 30) + ((ord($char[1]) & 0x3F) << 24) + ((ord($char[2]) & 0x3F) << 18) + ((ord($char[3]) & 0x3f) << 12) + ((ord($char[4]) & 0x3f) << 6) + ((ord($char[5]) & 0x3f)); } $str_v[$key] = $v; } return $str_v; } /** * Shorten a string to provide a summary, respecting UTF-8 characters. * * This function is deprecated; use @{class:PhutilUTF8StringTruncator} instead. * * @param string UTF-8 string to shorten. * @param int Maximum length of the result. * @param string If the string is shortened, add this at the end. Defaults to * horizontal ellipsis. * @return string A string with no more than the specified character length. - * - * @group utf8 */ function phutil_utf8_shorten($string, $length, $terminal = "\xE2\x80\xA6") { return id(new PhutilUTF8StringTruncator()) ->setMaximumGlyphs($length) ->setTerminator($terminal) ->truncateString($string); } /** * Hard-wrap a block of UTF-8 text with embedded HTML tags and entities. * * @param string An HTML string with tags and entities. * @return list List of hard-wrapped lines. - * @group utf8 */ function phutil_utf8_hard_wrap_html($string, $width) { $break_here = array(); // Convert the UTF-8 string into a list of UTF-8 characters. $vector = phutil_utf8v($string); $len = count($vector); $char_pos = 0; for ($ii = 0; $ii < $len; ++$ii) { // An ampersand indicates an HTML entity; consume the whole thing (until // ";") but treat it all as one character. if ($vector[$ii] == '&') { do { ++$ii; } while ($vector[$ii] != ';'); ++$char_pos; // An "<" indicates an HTML tag, consume the whole thing but don't treat // it as a character. } else if ($vector[$ii] == '<') { do { ++$ii; } while ($vector[$ii] != '>'); } else { ++$char_pos; } // Keep track of where we need to break the string later. if ($char_pos == $width) { $break_here[$ii] = true; $char_pos = 0; } } $result = array(); $string = ''; foreach ($vector as $ii => $char) { $string .= $char; if (isset($break_here[$ii])) { $result[] = $string; $string = ''; } } if (strlen($string)) { $result[] = $string; } return $result; } /** * Hard-wrap a block of UTF-8 text with no embedded HTML tags and entitites * * @param string A non HTML string * @param int Width of the hard-wrapped lines * @return list List of hard-wrapped lines. - * @group utf8 */ function phutil_utf8_hard_wrap($string, $width) { $result = array(); $lines = phutil_split_lines($string, $retain_endings = false); foreach ($lines as $line) { // Convert the UTF-8 string into a list of UTF-8 characters. $vector = phutil_utf8v($line); $len = count($vector); $buffer = ''; for ($ii = 1; $ii <= $len; ++$ii) { $buffer .= $vector[$ii - 1]; if (($ii % $width) === 0) { $result[] = $buffer; $buffer = ''; } } if (strlen($buffer)) { $result[] = $buffer; } } return $result; } /** * Convert a string from one encoding (like ISO-8859-1) to another encoding * (like UTF-8). * * This is primarily a thin wrapper around `mb_convert_encoding()` which checks * you have the extension installed, since we try to require the extension * only if you actually need it (i.e., you want to work with encodings other * than UTF-8). * * NOTE: This function assumes that the input is in the given source encoding. * If it is not, it may not output in the specified target encoding. If you * need to perform a hard conversion to UTF-8, use this function in conjunction * with @{function:phutil_utf8ize}. We can detect failures caused by invalid * encoding names, but `mb_convert_encoding()` fails silently if the * encoding name identifies a real encoding but the string is not actually * encoded with that encoding. * * @param string String to re-encode. * @param string Target encoding name, like "UTF-8". * @param string Source endocing name, like "ISO-8859-1". * @return string Input string, with converted character encoding. * - * @group utf8 - * * @phutil-external-symbol function mb_convert_encoding */ function phutil_utf8_convert($string, $to_encoding, $from_encoding) { if (!$from_encoding) { throw new InvalidArgumentException( 'Attempting to convert a string encoding, but no source encoding '. 'was provided. Explicitly provide the source encoding.'); } if (!$to_encoding) { throw new InvalidArgumentException( 'Attempting to convert a string encoding, but no target encoding '. 'was provided. Explicitly provide the target encoding.'); } // Normalize encoding names so we can no-op the very common case of UTF8 // to UTF8 (or any other conversion where both encodings are identical). $to_upper = strtoupper(str_replace('-', '', $to_encoding)); $from_upper = strtoupper(str_replace('-', '', $from_encoding)); if ($from_upper == $to_upper) { return $string; } if (!function_exists('mb_convert_encoding')) { throw new Exception( "Attempting to convert a string encoding from '{$from_encoding}' ". "to '{$to_encoding}', but the 'mbstring' PHP extension is not ". "available. Install mbstring to work with encodings other than ". "UTF-8."); } $result = @mb_convert_encoding($string, $to_encoding, $from_encoding); if ($result === false) { $message = error_get_last(); if ($message) { $message = idx($message, 'message', 'Unknown error.'); } throw new Exception( "String conversion from encoding '{$from_encoding}' to encoding ". "'{$to_encoding}' failed: {$message}"); } return $result; } /** * Convert a string to title case in a UTF8-aware way. This function doesn't - * necessarily do a great job, but the builtin implementation of ucwords() can + * necessarily do a great job, but the builtin implementation of `ucwords()` can * completely destroy inputs, so it just has to be better than that. Similar to * @{function:ucwords}. * * @param string UTF-8 input string. * @return string Input, in some semblance of title case. - * - * @group utf8 */ function phutil_utf8_ucwords($str) { // NOTE: mb_convert_case() discards uppercase letters in words when converting // to title case. For example, it will convert "AAA" into "Aaa", which is // undesirable. $v = phutil_utf8v($str); $result = ''; $last = null; $ord_a = ord('a'); $ord_z = ord('z'); foreach ($v as $c) { $convert = false; if ($last === null || $last === ' ') { $o = ord($c[0]); if ($o >= $ord_a && $o <= $ord_z) { $convert = true; } } if ($convert) { $result .= phutil_utf8_strtoupper($c); } else { $result .= $c; } $last = $c; } return $result; } /** * Convert a string to lower case in a UTF8-aware way. Similar to * @{function:strtolower}. * * @param string UTF-8 input string. * @return string Input, in some semblance of lower case. * - * @group utf8 - * * @phutil-external-symbol function mb_convert_case */ function phutil_utf8_strtolower($str) { if (function_exists('mb_convert_case')) { return mb_convert_case($str, MB_CASE_LOWER, 'UTF-8'); } static $map; if ($map === null) { $map = array_combine( range('A', 'Z'), range('a', 'z')); } return phutil_utf8_strtr($str, $map); } /** * Convert a string to upper case in a UTF8-aware way. Similar to * @{function:strtoupper}. * * @param string UTF-8 input string. * @return string Input, in some semblance of upper case. * - * @group utf8 - * * @phutil-external-symbol function mb_convert_case */ function phutil_utf8_strtoupper($str) { if (function_exists('mb_convert_case')) { return mb_convert_case($str, MB_CASE_UPPER, 'UTF-8'); } static $map; if ($map === null) { $map = array_combine( range('a', 'z'), range('A', 'Z')); } return phutil_utf8_strtr($str, $map); } /** * Replace characters in a string in a UTF-aware way. Similar to * @{function:strtr}. * * @param string UTF-8 input string. * @param map Map of characters to replace. * @return string Input with translated characters. - * - * @group utf8 */ function phutil_utf8_strtr($str, array $map) { $v = phutil_utf8v($str); $result = ''; foreach ($v as $c) { if (isset($map[$c])) { $result .= $map[$c]; } else { $result .= $c; } } return $result; } /** * Determine if a given unicode character is a combining character or not. * * @param string A single unicode character. * @return boolean True or false. - * - * @group utf8 */ - function phutil_utf8_is_combining_character($character) { $components = phutil_utf8v_codepoints($character); // Combining Diacritical Marks (0300 - 036F). // Combining Diacritical Marks Supplement (1DC0 - 1DFF). // Combining Diacritical Marks for Symbols (20D0 - 20FF). // Combining Half Marks (FE20 - FE2F). foreach ($components as $codepoint) { if ($codepoint >= 0x0300 && $codepoint <= 0x036F || $codepoint >= 0x1DC0 && $codepoint <= 0x1DFF || $codepoint >= 0x20D0 && $codepoint <= 0x20FF || $codepoint >= 0xFE20 && $codepoint <= 0xFE2F) { return true; } } return false; } /** * Split a UTF-8 string into an array of characters. Combining characters * are not split. * * @param string A valid utf-8 string. * @return list A list of characters in the string. - * - * @group utf8 */ - function phutil_utf8v_combined($string) { $components = phutil_utf8v($string); $array_length = count($components); // If the first character in the string is a combining character, // prepend a space to the string. if ( $array_length > 0 && phutil_utf8_is_combining_character($components[0])) { $string = ' '.$string; $components = phutil_utf8v($string); $array_length++; } for ($index = 1; $index < $array_length; $index++) { if (phutil_utf8_is_combining_character($components[$index])) { $components[$index - 1] = $components[$index - 1].$components[$index]; unset($components[$index]); $components = array_values($components); $index --; $array_length = count($components); } } return $components; } diff --git a/src/utils/utils.php b/src/utils/utils.php index 504b92e..3b6681d 100644 --- a/src/utils/utils.php +++ b/src/utils/utils.php @@ -1,1082 +1,1073 @@ doStuff(); * * ...but this works fine: * * id(new Thing())->doStuff(); * * @param wild Anything. * @return wild Unmodified argument. */ function id($x) { return $x; } /** * Access an array index, retrieving the value stored there if it exists or * a default if it does not. This function allows you to concisely access an * index which may or may not exist without raising a warning. * * @param array Array to access. * @param scalar Index to access in the array. * @param wild Default value to return if the key is not present in the * array. - * @return wild If $array[$key] exists, that value is returned. If not, + * @return wild If `$array[$key]` exists, that value is returned. If not, * $default is returned without raising a warning. */ function idx(array $array, $key, $default = null) { // isset() is a micro-optimization - it is fast but fails for null values. if (isset($array[$key])) { return $array[$key]; } // Comparing $default is also a micro-optimization. if ($default === null || array_key_exists($key, $array)) { return null; } return $default; } /** * Call a method on a list of objects. Short for "method pull", this function * works just like @{function:ipull}, except that it operates on a list of * objects instead of a list of arrays. This function simplifies a common type * of mapping operation: * * COUNTEREXAMPLE * $names = array(); * foreach ($objects as $key => $object) { * $names[$key] = $object->getName(); * } * * You can express this more concisely with mpull(): * * $names = mpull($objects, 'getName'); * * mpull() takes a third argument, which allows you to do the same but for * the array's keys: * * COUNTEREXAMPLE * $names = array(); * foreach ($objects as $object) { * $names[$object->getID()] = $object->getName(); * } * * This is the mpull version(): * * $names = mpull($objects, 'getName', 'getID'); * * If you pass ##null## as the second argument, the objects will be preserved: * * COUNTEREXAMPLE * $id_map = array(); * foreach ($objects as $object) { * $id_map[$object->getID()] = $object; * } * * With mpull(): * * $id_map = mpull($objects, null, 'getID'); * * See also @{function:ipull}, which works similarly but accesses array indexes * instead of calling methods. * * @param list Some list of objects. * @param string|null Determines which **values** will appear in the result * array. Use a string like 'getName' to store the * value of calling the named method in each value, or * ##null## to preserve the original objects. * @param string|null Determines how **keys** will be assigned in the result * array. Use a string like 'getID' to use the result * of calling the named method as each object's key, or - * ##null## to preserve the original keys. + * `null` to preserve the original keys. * @return dict A dictionary with keys and values derived according - * to whatever you passed as $method and $key_method. + * to whatever you passed as `$method` and `$key_method`. */ function mpull(array $list, $method, $key_method = null) { $result = array(); foreach ($list as $key => $object) { if ($key_method !== null) { $key = $object->$key_method(); } if ($method !== null) { $value = $object->$method(); } else { $value = $object; } $result[$key] = $value; } return $result; } /** * Access a property on a list of objects. Short for "property pull", this * function works just like @{function:mpull}, except that it accesses object * properties instead of methods. This function simplifies a common type of * mapping operation: * * COUNTEREXAMPLE * $names = array(); * foreach ($objects as $key => $object) { * $names[$key] = $object->name; * } * * You can express this more concisely with ppull(): * * $names = ppull($objects, 'name'); * * ppull() takes a third argument, which allows you to do the same but for * the array's keys: * * COUNTEREXAMPLE * $names = array(); * foreach ($objects as $object) { * $names[$object->id] = $object->name; * } * * This is the ppull version(): * * $names = ppull($objects, 'name', 'id'); * * If you pass ##null## as the second argument, the objects will be preserved: * * COUNTEREXAMPLE * $id_map = array(); * foreach ($objects as $object) { * $id_map[$object->id] = $object; * } * * With ppull(): * * $id_map = ppull($objects, null, 'id'); * * See also @{function:mpull}, which works similarly but calls object methods * instead of accessing object properties. * * @param list Some list of objects. * @param string|null Determines which **values** will appear in the result * array. Use a string like 'name' to store the value of * accessing the named property in each value, or - * ##null## to preserve the original objects. + * `null` to preserve the original objects. * @param string|null Determines how **keys** will be assigned in the result * array. Use a string like 'id' to use the result of * accessing the named property as each object's key, or - * ##null## to preserve the original keys. + * `null` to preserve the original keys. * @return dict A dictionary with keys and values derived according - * to whatever you passed as $property and $key_property. + * to whatever you passed as `$property` and + * `$key_property`. */ function ppull(array $list, $property, $key_property = null) { $result = array(); foreach ($list as $key => $object) { if ($key_property !== null) { $key = $object->$key_property; } if ($property !== null) { $value = $object->$property; } else { $value = $object; } $result[$key] = $value; } return $result; } /** * Choose an index from a list of arrays. Short for "index pull", this function * works just like @{function:mpull}, except that it operates on a list of * arrays and selects an index from them instead of operating on a list of * objects and calling a method on them. * * This function simplifies a common type of mapping operation: * * COUNTEREXAMPLE * $names = array(); * foreach ($list as $key => $dict) { * $names[$key] = $dict['name']; * } * * With ipull(): * * $names = ipull($list, 'name'); * * See @{function:mpull} for more usage examples. * * @param list Some list of arrays. * @param scalar|null Determines which **values** will appear in the result * array. Use a scalar to select that index from each * array, or null to preserve the arrays unmodified as * values. * @param scalar|null Determines which **keys** will appear in the result * array. Use a scalar to select that index from each * array, or null to preserve the array keys. * @return dict A dictionary with keys and values derived according - * to whatever you passed for $index and $key_index. + * to whatever you passed for `$index` and `$key_index`. */ function ipull(array $list, $index, $key_index = null) { $result = array(); foreach ($list as $key => $array) { if ($key_index !== null) { $key = $array[$key_index]; } if ($index !== null) { $value = $array[$index]; } else { $value = $array; } $result[$key] = $value; } return $result; } /** * Group a list of objects by the result of some method, similar to how * GROUP BY works in an SQL query. This function simplifies grouping objects * by some property: * * COUNTEREXAMPLE * $animals_by_species = array(); * foreach ($animals as $animal) { * $animals_by_species[$animal->getSpecies()][] = $animal; * } * * This can be expressed more tersely with mgroup(): * * $animals_by_species = mgroup($animals, 'getSpecies'); * * In either case, the result is a dictionary which maps species (e.g., like * "dog") to lists of animals with that property, so all the dogs are grouped * together and all the cats are grouped together, or whatever super * businessesey thing is actually happening in your problem domain. * * See also @{function:igroup}, which works the same way but operates on * array indexes. * * @param list List of objects to group by some property. * @param string Name of a method, like 'getType', to call on each object * in order to determine which group it should be placed into. * @param ... Zero or more additional method names, to subgroup the * groups. * @return dict Dictionary mapping distinct method returns to lists of * all objects which returned that value. */ function mgroup(array $list, $by /* , ... */) { $map = mpull($list, $by); $groups = array(); foreach ($map as $group) { // Can't array_fill_keys() here because 'false' gets encoded wrong. $groups[$group] = array(); } foreach ($map as $key => $group) { $groups[$group][$key] = $list[$key]; } $args = func_get_args(); $args = array_slice($args, 2); if ($args) { array_unshift($args, null); foreach ($groups as $group_key => $grouped) { $args[0] = $grouped; $groups[$group_key] = call_user_func_array('mgroup', $args); } } return $groups; } /** * Group a list of arrays by the value of some index. This function is the same * as @{function:mgroup}, except it operates on the values of array indexes * rather than the return values of method calls. * * @param list List of arrays to group by some index value. * @param string Name of an index to select from each array in order to * determine which group it should be placed into. * @param ... Zero or more additional indexes names, to subgroup the * groups. * @return dict Dictionary mapping distinct index values to lists of * all objects which had that value at the index. */ function igroup(array $list, $by /* , ... */) { $map = ipull($list, $by); $groups = array(); foreach ($map as $group) { $groups[$group] = array(); } foreach ($map as $key => $group) { $groups[$group][$key] = $list[$key]; } $args = func_get_args(); $args = array_slice($args, 2); if ($args) { array_unshift($args, null); foreach ($groups as $group_key => $grouped) { $args[0] = $grouped; $groups[$group_key] = call_user_func_array('igroup', $args); } } return $groups; } /** * Sort a list of objects by the return value of some method. In PHP, this is - * often vastly more efficient than ##usort()## and similar. + * often vastly more efficient than `usort()` and similar. * * // Sort a list of Duck objects by name. * $sorted = msort($ducks, 'getName'); * * It is usually significantly more efficient to define an ordering method - * on objects and call ##msort()## than to write a comparator. It is often more + * on objects and call `msort()` than to write a comparator. It is often more * convenient, as well. * * NOTE: This method does not take the list by reference; it returns a new list. * * @param list List of objects to sort by some property. * @param string Name of a method to call on each object; the return values * will be used to sort the list. * @return list Objects ordered by the return values of the method calls. */ function msort(array $list, $method) { $surrogate = mpull($list, $method); asort($surrogate); $result = array(); foreach ($surrogate as $key => $value) { $result[$key] = $list[$key]; } return $result; } /** * Sort a list of arrays by the value of some index. This method is identical to * @{function:msort}, but operates on a list of arrays instead of a list of * objects. * * @param list List of arrays to sort by some index value. * @param string Index to access on each object; the return values * will be used to sort the list. * @return list Arrays ordered by the index values. */ function isort(array $list, $index) { $surrogate = ipull($list, $index); asort($surrogate); $result = array(); foreach ($surrogate as $key => $value) { $result[$key] = $list[$key]; } return $result; } /** * Filter a list of objects by executing a method across all the objects and * filter out the ones wth empty() results. this function works just like * @{function:ifilter}, except that it operates on a list of objects instead * of a list of arrays. * * For example, to remove all objects with no children from a list, where * 'hasChildren' is a method name, do this: * * mfilter($list, 'hasChildren'); * * The optional third parameter allows you to negate the operation and filter * out nonempty objects. To remove all objects that DO have children, do this: * * mfilter($list, 'hasChildren', true); * * @param array List of objects to filter. * @param string A method name. * @param bool Optionally, pass true to drop objects which pass the * filter instead of keeping them. - * - * @return array List of objects which pass the filter. + * @return array List of objects which pass the filter. */ function mfilter(array $list, $method, $negate = false) { if (!is_string($method)) { throw new InvalidArgumentException('Argument method is not a string.'); } $result = array(); foreach ($list as $key => $object) { $value = $object->$method(); if (!$negate) { if (!empty($value)) { $result[$key] = $object; } } else { if (empty($value)) { $result[$key] = $object; } } } return $result; } /** * Filter a list of arrays by removing the ones with an empty() value for some * index. This function works just like @{function:mfilter}, except that it * operates on a list of arrays instead of a list of objects. * * For example, to remove all arrays without value for key 'username', do this: * * ifilter($list, 'username'); * * The optional third parameter allows you to negate the operation and filter * out nonempty arrays. To remove all arrays that DO have value for key * 'username', do this: * * ifilter($list, 'username', true); * * @param array List of arrays to filter. * @param scalar The index. * @param bool Optionally, pass true to drop arrays which pass the * filter instead of keeping them. - * - * @return array List of arrays which pass the filter. + * @return array List of arrays which pass the filter. */ function ifilter(array $list, $index, $negate = false) { if (!is_scalar($index)) { throw new InvalidArgumentException('Argument index is not a scalar.'); } $result = array(); if (!$negate) { foreach ($list as $key => $array) { if (!empty($array[$index])) { $result[$key] = $array; } } } else { foreach ($list as $key => $array) { if (empty($array[$index])) { $result[$key] = $array; } } } return $result; } /** * Selects a list of keys from an array, returning a new array with only the * key-value pairs identified by the selected keys, in the specified order. * * Note that since this function orders keys in the result according to the * order they appear in the list of keys, there are effectively two common * uses: either reducing a large dictionary to a smaller one, or changing the * key order on an existing dictionary. * * @param dict Dictionary of key-value pairs to select from. * @param list List of keys to select. * @return dict Dictionary of only those key-value pairs where the key was * present in the list of keys to select. Ordering is * determined by the list order. */ function array_select_keys(array $dict, array $keys) { $result = array(); foreach ($keys as $key) { if (array_key_exists($key, $dict)) { $result[$key] = $dict[$key]; } } return $result; } /** - * Checks if all values of array are instances of the passed class. - * Throws InvalidArgumentException if it isn't true for any value. + * Checks if all values of array are instances of the passed class. Throws + * `InvalidArgumentException` if it isn't true for any value. * * @param array * @param string Name of the class or 'array' to check arrays. * @return array Returns passed array. */ function assert_instances_of(array $arr, $class) { $is_array = !strcasecmp($class, 'array'); foreach ($arr as $key => $object) { if ($is_array) { if (!is_array($object)) { $given = gettype($object); throw new InvalidArgumentException( "Array item with key '{$key}' must be of type array, ". "{$given} given."); } } else if (!($object instanceof $class)) { $given = gettype($object); if (is_object($object)) { $given = 'instance of '.get_class($object); } throw new InvalidArgumentException( "Array item with key '{$key}' must be an instance of {$class}, ". "{$given} given."); } } return $arr; } /** * Assert that passed data can be converted to string. * * @param string Assert that this data is valid. * @return void * * @task assert */ function assert_stringlike($parameter) { switch (gettype($parameter)) { case 'string': case 'NULL': case 'boolean': case 'double': case 'integer': return; case 'object': if (method_exists($parameter, '__toString')) { return; } break; case 'array': case 'resource': case 'unknown type': default: break; } throw new InvalidArgumentException( 'Argument must be scalar or object which implements __toString()!'); } /** - * Returns the first argument which is not strictly null, or ##null## if there + * Returns the first argument which is not strictly null, or `null` if there * are no such arguments. Identical to the MySQL function of the same name. * * @param ... Zero or more arguments of any type. - * @return mixed First non-##null## arg, or null if no such arg exists. + * @return mixed First non-`null` arg, or null if no such arg exists. */ function coalesce(/* ... */) { $args = func_get_args(); foreach ($args as $arg) { if ($arg !== null) { return $arg; } } return null; } /** * Similar to @{function:coalesce}, but less strict: returns the first - * non-##empty()## argument, instead of the first argument that is strictly - * non-##null##. If no argument is nonempty, it returns the last argument. This + * non-`empty()` argument, instead of the first argument that is strictly + * non-`null`. If no argument is nonempty, it returns the last argument. This * is useful idiomatically for setting defaults: * * $display_name = nonempty($user_name, $full_name, "Anonymous"); * * @param ... Zero or more arguments of any type. - * @return mixed First non-##empty()## arg, or last arg if no such arg + * @return mixed First non-`empty()` arg, or last arg if no such arg * exists, or null if you passed in zero args. */ function nonempty(/* ... */) { $args = func_get_args(); $result = null; foreach ($args as $arg) { $result = $arg; if ($arg) { break; } } return $result; } /** * Invokes the "new" operator with a vector of arguments. There is no way to - * call_user_func_array() on a class constructor, so you can instead use this + * `call_user_func_array()` on a class constructor, so you can instead use this * function: * * $obj = newv($class_name, $argv); * * That is, these two statements are equivalent: * * $pancake = new Pancake('Blueberry', 'Maple Syrup', true); * $pancake = newv('Pancake', array('Blueberry', 'Maple Syrup', true)); * * DO NOT solve this problem in other, more creative ways! Three popular * alternatives are: * * - Build a fake serialized object and unserialize it. * - Invoke the constructor twice. - * - just use eval() lol + * - just use `eval()` lol * * These are really bad solutions to the problem because they can have side * effects (e.g., __wakeup()) and give you an object in an otherwise impossible * state. Please endeavor to keep your objects in possible states. * * If you own the classes you're doing this for, you should consider whether * or not restructuring your code (for instance, by creating static - * construction methods) might make it cleaner before using newv(). Static - * constructors can be invoked with call_user_func_array(), and may give your + * construction methods) might make it cleaner before using `newv()`. Static + * constructors can be invoked with `call_user_func_array()`, and may give your * class a cleaner and more descriptive API. * * @param string The name of a class. * @param list Array of arguments to pass to its constructor. * @return obj A new object of the specified class, constructed by passing * the argument vector to its constructor. */ function newv($class_name, array $argv) { $reflector = new ReflectionClass($class_name); if ($argv) { return $reflector->newInstanceArgs($argv); } else { return $reflector->newInstance(); } } /** * Returns the first element of an array. Exactly like reset(), but doesn't * choke if you pass it some non-referenceable value like the return value of * a function. * * @param array Array to retrieve the first element from. * @return wild The first value of the array. */ function head(array $arr) { return reset($arr); } /** - * Returns the last element of an array. This is exactly like end() except + * Returns the last element of an array. This is exactly like `end()` except * that it won't warn you if you pass some non-referencable array to * it -- e.g., the result of some other array operation. * * @param array Array to retrieve the last element from. * @return wild The last value of the array. */ function last(array $arr) { return end($arr); } /** * Returns the first key of an array. * * @param array Array to retrieve the first key from. * @return int|string The first key of the array. */ function head_key(array $arr) { reset($arr); return key($arr); } /** * Returns the last key of an array. * * @param array Array to retrieve the last key from. * @return int|string The last key of the array. */ function last_key(array $arr) { end($arr); return key($arr); } /** * Merge a vector of arrays performantly. This has the same semantics as * array_merge(), so these calls are equivalent: * * array_merge($a, $b, $c); * array_mergev(array($a, $b, $c)); * * However, when you have a vector of arrays, it is vastly more performant to * merge them with this function than by calling array_merge() in a loop, * because using a loop generates an intermediary array on each iteration. * * @param list Vector of arrays to merge. * @return list Arrays, merged with array_merge() semantics. */ function array_mergev(array $arrayv) { if (!$arrayv) { return array(); } foreach ($arrayv as $key => $item) { if (!is_array($item)) { throw new InvalidArgumentException( pht( 'Expected all items passed to array_mergev() to be arrays, but '. 'argument with key "%s" has type "%s".', $key, gettype($item))); } } return call_user_func_array('array_merge', $arrayv); } /** * Split a corpus of text into lines. This function splits on "\n", "\r\n", or * a mixture of any of them. * * NOTE: This function does not treat "\r" on its own as a newline because none * of SVN, Git or Mercurial do on any OS. * * @param string Block of text to be split into lines. * @param bool If true, retain line endings in result strings. * @return list List of lines. */ function phutil_split_lines($corpus, $retain_endings = true) { if (!strlen($corpus)) { return array(''); } // Split on "\r\n" or "\n". if ($retain_endings) { $lines = preg_split('/(?<=\n)/', $corpus); } else { $lines = preg_split('/\r?\n/', $corpus); } // If the text ends with "\n" or similar, we'll end up with an empty string // at the end; discard it. if (end($lines) == '') { array_pop($lines); } if ($corpus instanceof PhutilSafeHTML) { return array_map('phutil_safe_html', $lines); } return $lines; } /** * Simplifies a common use of `array_combine()`. Specifically, this: * * COUNTEREXAMPLE: * if ($list) { * $result = array_combine($list, $list); * } else { * // Prior to PHP 5.4, array_combine() failed if given empty arrays. * $result = array(); * } * * ...is equivalent to this: * * $result = array_fuse($list); * * @param list List of scalars. * @return dict Dictionary with inputs mapped to themselves. */ function array_fuse(array $list) { if ($list) { return array_combine($list, $list); } return array(); } /** * Add an element between every two elements of some array. That is, given a * list `A, B, C, D`, and some element to interleave, `x`, this function returns * `A, x, B, x, C, x, D`. This works like `implode()`, but does not concatenate * the list into a string. In particular: * * implode('', array_interleave($x, $list)); * * ...is equivalent to: * * implode($x, $list); * * This function does not preserve keys. * * @param wild Element to interleave. * @param list List of elements to be interleaved. * @return list Original list with the new element interleaved. */ function array_interleave($interleave, array $array) { $result = array(); foreach ($array as $item) { $result[] = $item; $result[] = $interleave; } array_pop($result); return $result; } -/** - * @group library - */ function phutil_is_windows() { // We can also use PHP_OS, but that's kind of sketchy because it returns // "WINNT" for Windows 7 and "Darwin" for Mac OS X. Practically, testing for // DIRECTORY_SEPARATOR is more straightforward. return (DIRECTORY_SEPARATOR != '/'); } -/** - * @group library - */ function phutil_is_hiphop_runtime() { return (array_key_exists('HPHP', $_ENV) && $_ENV['HPHP'] === 1); } /** * Fire an event allowing any listeners to clear up any outstanding requirements * before the request completes abruptly. * * @param int|string $status - * @group library */ function phutil_exit($status = 0) { $event = new PhutilEvent( PhutilEventType::TYPE_WILLEXITABRUPTLY, array('status' => $status)); PhutilEventEngine::dispatchEvent($event); exit($status); } /** * Converts a string to a loggable one, with unprintables and newlines escaped. * * @param string Any string. * @return string String with control and newline characters escaped, suitable * for printing on a single log line. */ function phutil_loggable_string($string) { if (preg_match('/^[\x20-\x7E]+$/', $string)) { return $string; } $result = ''; static $c_map = array( '\\' => '\\\\', "\n" => '\\n', "\r" => '\\r', "\t" => '\\t', ); $len = strlen($string); for ($ii = 0; $ii < $len; $ii++) { $c = $string[$ii]; if (isset($c_map[$c])) { $result .= $c_map[$c]; } else { $o = ord($c); if ($o < 0x20 || $o == 0x7F) { $result .= '\\x'.sprintf('%02X', $o); } else { $result .= $c; } } } return $result; } /** * Perform an `fwrite()` which distinguishes between EAGAIN and EPIPE. * * PHP's `fwrite()` is broken, and never returns `false` for writes to broken * nonblocking pipes: it always returns 0, and provides no straightforward * mechanism for distinguishing between EAGAIN (buffer is full, can't write any * more right now) and EPIPE or similar (no write will ever succeed). * * See: https://bugs.php.net/bug.php?id=39598 * * If you call this method instead of `fwrite()`, it will attempt to detect * when a zero-length write is caused by EAGAIN and return `0` only if the * write really should be retried. * * @param resource Socket or pipe stream. * @param string Bytes to write. * @return bool|int Number of bytes written, or `false` on any error (including * errors which `fpipe()` can not detect, like a broken pipe). */ function phutil_fwrite_nonblocking_stream($stream, $bytes) { if (!strlen($bytes)) { return 0; } $result = @fwrite($stream, $bytes); if ($result !== 0) { // In cases where some bytes are witten (`$result > 0`) or // an error occurs (`$result === false`), the behavior of fwrite() is // correct. We can return the value as-is. return $result; } // If we make it here, we performed a 0-length write. Try to distinguish // between EAGAIN and EPIPE. To do this, we're going to `stream_select()` // the stream, write to it again if PHP claims that it's writable, and // consider the pipe broken if the write fails. $read = array(); $write = array($stream); $except = array(); @stream_select($read, $write, $except, 0); if (!$write) { // The stream isn't writable, so we conclude that it probably really is // blocked and the underlying error was EAGAIN. Return 0 to indicate that // no data could be written yet. return 0; } // If we make it here, PHP **just** claimed that this stream is writable, so // perform a write. If the write also fails, conclude that these failures are // EPIPE or some other permanent failure. $result = @fwrite($stream, $bytes); if ($result !== 0) { // The write worked or failed explicitly. This value is fine to return. return $result; } // We performed a 0-length write, were told that the stream was writable, and // then immediately performed another 0-length write. Conclude that the pipe // is broken and return `false`. return false; } /** * Convert a human-readable unit description into a numeric one. This function * allows you to replace this: * * COUNTEREXAMPLE * $ttl = (60 * 60 * 24 * 30); // 30 days * * ...with this: * * $ttl = phutil_units('30 days in seconds'); * * ...which is self-documenting and difficult to make a mistake with. * * @param string Human readable description of a unit quantity. * @return int Quantity of specified unit. */ function phutil_units($description) { - $matches = null; if (!preg_match('/^(\d+) (\w+) in (\w+)$/', $description, $matches)) { throw new InvalidArgumentException( pht( 'Unable to parse unit specification (expected a specification in the '. 'form "5 days in seconds"): %s', $description)); } $quantity = (int)$matches[1]; $src_unit = $matches[2]; $dst_unit = $matches[3]; switch ($dst_unit) { case 'seconds': switch ($src_unit) { case 'second': case 'seconds': $factor = 1; break; case 'minute': case 'minutes': $factor = 60; break; case 'hour': case 'hours': $factor = 60 * 60; break; case 'day': case 'days': $factor = 60 * 60 * 24; break; default: throw new InvalidArgumentException( pht( 'This function can not convert from the unit "%s".', $src_unit)); } break; default: throw new InvalidArgumentException( pht( 'This function can not convert into the unit "%s".', $dst_unit)); } return $quantity * $factor; } /** * Decode a JSON dictionary. * * @param string A string which ostensibly contains a JSON-encoded list or * dictionary. * @return mixed Decoded list/dictionary. */ function phutil_json_decode($string) { $result = @json_decode($string, true); if (!is_array($result)) { // Failed to decode the JSON. Try to use @{class:PhutilJSONParser} instead. // This will probably fail, but will throw a useful exception. $parser = new PhutilJSONParser(); $result = $parser->parse($string); } return $result; } /** * Attempt to censor any plaintext credentials from a string. * * The major use case here is to censor usernames and passwords from command * output. For example, when `git fetch` fails, the output includes credentials * for authenticated HTTP remotes. * * @param string Some block of text. * @return string A similar block of text, but with credentials that could * be identified censored. */ function phutil_censor_credentials($string) { return preg_replace(',(?<=://)([^/@\s]+)(?=@|$),', 'xxxxx', $string); } diff --git a/src/xsprintf/PhutilQsprintfInterface.php b/src/xsprintf/PhutilQsprintfInterface.php index 9dd03f6..580391f 100644 --- a/src/xsprintf/PhutilQsprintfInterface.php +++ b/src/xsprintf/PhutilQsprintfInterface.php @@ -1,12 +1,9 @@ $pos + 1) ? $pattern[$pos + 1] : null; $is_unmasked = !empty($userdata['unmasked']); if ($value instanceof PhutilCommandString) { if ($is_unmasked) { $value = $value->getUnmaskedString(); } else { $value = $value->getMaskedString(); } } switch ($type) { case 'L': // Remove the L. $pattern = substr_replace($pattern, '', $pos, 1); $length = strlen($pattern); $type = 's'; // Check that the value is a non-empty array. if (!is_array($value)) { throw new InvalidArgumentException( "Expected an array for %L{$next} conversion."); } switch ($next) { case 's': $values = array(); foreach ($value as $val) { $values[] = csprintf('%s', $val); } $value = implode(' ', $values); break; case 'R': $values = array(); foreach ($value as $val) { $values[] = csprintf('%R', $val); } $value = implode(' ', $values); break; default: throw new XsprintfUnknownConversionException("%L{$next}"); } break; case 'R': if (!preg_match('(^[a-zA-Z0-9:/@._-]+$)', $value)) { $value = escapeshellarg($value); } $type = 's'; break; case 's': $value = escapeshellarg($value); $type = 's'; break; case 'P': if (!($value instanceof PhutilOpaqueEnvelope)) { throw new InvalidArgumentException( 'Expected PhutilOpaqueEnvelope for %P conversion.'); } if ($is_unmasked) { $value = $value->openEnvelope(); } else { $value = 'xxxxx'; } $value = escapeshellarg($value); $type = 's'; break; case 'C': $type = 's'; break; } $pattern[$pos] = $type; } diff --git a/src/xsprintf/hgsprintf.php b/src/xsprintf/hgsprintf.php index 4f4f730..326d0e1 100644 --- a/src/xsprintf/hgsprintf.php +++ b/src/xsprintf/hgsprintf.php @@ -1,37 +1,33 @@ 0x1FFFFFFFFFFFFF) { throw new RangeException( "You are passing an integer to jsprintf() which is so large it can ". "not be represented without loss of precision by Javascript's ". "native Number class. Use %# instead."); } break; } $pattern[$pos] = $type; } diff --git a/src/xsprintf/ldapsprintf.php b/src/xsprintf/ldapsprintf.php index 99fd398..58eb6c8 100644 --- a/src/xsprintf/ldapsprintf.php +++ b/src/xsprintf/ldapsprintf.php @@ -1,49 +1,48 @@ ;"= '); $type = 's'; break; case 'Q': $type = 's'; break; } $pattern[$pos] = $type; } diff --git a/src/xsprintf/qsprintf.php b/src/xsprintf/qsprintf.php index 10bbb14..20732fb 100644 --- a/src/xsprintf/qsprintf.php +++ b/src/xsprintf/qsprintf.php @@ -1,321 +1,308 @@ and %<. * * %> ("Prefix") * Escapes a prefix query for a LIKE clause. For example: * * // Find all rows where `name` starts with $prefix. * qsprintf($escaper, 'WHERE name LIKE %>', $prefix); * * %< ("Suffix") * Escapes a suffix query for a LIKE clause. For example: * * // Find all rows where `name` ends with $suffix. * qsprintf($escaper, 'WHERE name LIKE %<', $suffix); - * - * @group storage */ -function qsprintf(PhutilQsprintfInterface $escaper, $pattern/* , ... */) { +function qsprintf(PhutilQsprintfInterface $escaper, $pattern /* , ... */) { $args = func_get_args(); array_shift($args); return xsprintf('xsprintf_query', $escaper, $args); } -/** - * @group storage - */ function vqsprintf(PhutilQsprintfInterface $escaper, $pattern, array $argv) { array_unshift($argv, $pattern); return xsprintf('xsprintf_query', $escaper, $argv); } /** * @{function:xsprintf} callback for encoding SQL queries. See * @{function:qsprintf}. - * - * @group storage */ function xsprintf_query($userdata, &$pattern, &$pos, &$value, &$length) { $type = $pattern[$pos]; $escaper = $userdata; $next = (strlen($pattern) > $pos + 1) ? $pattern[$pos + 1] : null; $nullable = false; $done = false; $prefix = ''; if (!($escaper instanceof PhutilQsprintfInterface)) { throw new InvalidArgumentException('Invalid database escaper.'); } switch ($type) { case '=': // Nullable test switch ($next) { case 'd': case 'f': case 's': $pattern = substr_replace($pattern, '', $pos, 1); $length = strlen($pattern); $type = 's'; if ($value === null) { $value = 'IS NULL'; $done = true; } else { $prefix = '= '; $type = $next; } break; default: throw new Exception('Unknown conversion, try %=d, %=s, or %=f.'); } break; case 'n': // Nullable... switch ($next) { case 'd': // ...integer. case 'f': // ...float. case 's': // ...string. case 'B': // ...binary string. $pattern = substr_replace($pattern, '', $pos, 1); $length = strlen($pattern); $type = $next; $nullable = true; break; default: throw new XsprintfUnknownConversionException("%n{$next}"); } break; case 'L': // List of.. _qsprintf_check_type($value, "L{$next}", $pattern); $pattern = substr_replace($pattern, '', $pos, 1); $length = strlen($pattern); $type = 's'; $done = true; switch ($next) { case 'd': // ...integers. $value = implode(', ', array_map('intval', $value)); break; case 's': // ...strings. foreach ($value as $k => $v) { $value[$k] = "'".$escaper->escapeUTF8String((string)$v)."'"; } $value = implode(', ', $value); break; case 'B': // ...binary strings. foreach ($value as $k => $v) { $value[$k] = "'".$escaper->escapeBinaryString((string)$v)."'"; } $value = implode(', ', $value); break; case 'C': // ...columns. foreach ($value as $k => $v) { $value[$k] = $escaper->escapeColumnName($v); } $value = implode(', ', $value); break; default: throw new XsprintfUnknownConversionException("%L{$next}"); } break; } if (!$done) { _qsprintf_check_type($value, $type, $pattern); switch ($type) { case 's': // String if ($nullable && $value === null) { $value = 'NULL'; } else { $value = "'".$escaper->escapeUTF8String((string)$value)."'"; } $type = 's'; break; case 'B': // Binary String if ($nullable && $value === null) { $value = 'NULL'; } else { $value = "'".$escaper->escapeBinaryString((string)$value)."'"; } $type = 's'; break; case 'Q': // Query Fragment $type = 's'; break; case '~': // Like Substring case '>': // Like Prefix case '<': // Like Suffix $value = $escaper->escapeStringForLikeClause($value); switch ($type) { case '~': $value = "'%".$value."%'"; break; case '>': $value = "'".$value."%'"; break; case '<': $value = "'%".$value."'"; break; } $type = 's'; break; case 'f': // Float if ($nullable && $value === null) { $value = 'NULL'; } else { $value = (float)$value; } $type = 's'; break; case 'd': // Integer if ($nullable && $value === null) { $value = 'NULL'; } else { $value = (int)$value; } $type = 's'; break; case 'T': // Table case 'C': // Column $value = $escaper->escapeColumnName($value); $type = 's'; break; case 'K': // Komment $value = $escaper->escapeMultilineComment($value); $type = 's'; break; default: throw new XsprintfUnknownConversionException($type); } } if ($prefix) { $value = $prefix.$value; } $pattern[$pos] = $type; } -/** - * @group storage - */ function _qsprintf_check_type($value, $type, $query) { switch ($type) { case 'Ld': case 'Ls': case 'LC': case 'LB': if (!is_array($value)) { throw new AphrontQueryParameterException( $query, "Expected array argument for %{$type} conversion."); } if (empty($value)) { throw new AphrontQueryParameterException( $query, "Array for %{$type} conversion is empty."); } foreach ($value as $scalar) { _qsprintf_check_scalar_type($scalar, $type, $query); } break; default: _qsprintf_check_scalar_type($value, $type, $query); break; } } -/** - * @group storage - */ function _qsprintf_check_scalar_type($value, $type, $query) { switch ($type) { case 'Q': case 'LC': case 'T': case 'C': if (!is_string($value)) { throw new AphrontQueryParameterException( $query, "Expected a string for %{$type} conversion."); } break; case 'Ld': case 'd': case 'f': if (!is_null($value) && !is_numeric($value)) { throw new AphrontQueryParameterException( $query, "Expected a numeric scalar or null for %{$type} conversion."); } break; case 'Ls': case 's': case 'LB': case 'B': case '~': case '>': case '<': case 'K': if (!is_null($value) && !is_scalar($value)) { throw new AphrontQueryParameterException( $query, "Expected a scalar or null for %{$type} conversion."); } break; default: throw new XsprintfUnknownConversionException($type); } } diff --git a/src/xsprintf/queryfx.php b/src/xsprintf/queryfx.php index 27c859e..f8317ac 100644 --- a/src/xsprintf/queryfx.php +++ b/src/xsprintf/queryfx.php @@ -1,50 +1,35 @@ executeRawQuery($query); } -/** - * @group storage - */ function vqueryfx(AphrontDatabaseConnection $conn, $sql, array $argv) { array_unshift($argv, $conn, $sql); call_user_func_array('queryfx', $argv); } -/** - * @group storage - */ -function queryfx_all(AphrontDatabaseConnection $conn, $sql/* , ... */) { +function queryfx_all(AphrontDatabaseConnection $conn, $sql /* , ... */) { $argv = func_get_args(); call_user_func_array('queryfx', $argv); return $conn->selectAllResults(); } -/** - * @group storage - */ -function queryfx_one(AphrontDatabaseConnection $conn, $sql/* , ... */) { +function queryfx_one(AphrontDatabaseConnection $conn, $sql /* , ... */) { $argv = func_get_args(); $ret = call_user_func_array('queryfx_all', $argv); if (count($ret) > 1) { throw new AphrontQueryCountException('Query returned more than one row.'); } else if (count($ret)) { return reset($ret); } return null; } -/** - * @group storage - */ function vqueryfx_all(AphrontDatabaseConnection $conn, $sql, array $argv) { array_unshift($argv, $conn, $sql); call_user_func_array('queryfx', $argv); return $conn->selectAllResults(); } diff --git a/src/xsprintf/urisprintf.php b/src/xsprintf/urisprintf.php index 6c7e2a5..4a8d6f7 100644 --- a/src/xsprintf/urisprintf.php +++ b/src/xsprintf/urisprintf.php @@ -1,51 +1,50 @@