diff --git a/src/filesystem/filefinder/FileFinder.php b/src/filesystem/filefinder/FileFinder.php index 64f2bad..9c54536 100644 --- a/src/filesystem/filefinder/FileFinder.php +++ b/src/filesystem/filefinder/FileFinder.php @@ -1,150 +1,189 @@ withType('f') + * ->withSuffix('php') + * ->find(); + * + * @task create Creating a File Query + * @task config Configuring File Queries + * @task exec Executing the File Query + * @task internal Internal * @group filesystem */ -class FileFinder { - - protected $root; - protected $exclude = array(); - protected $paths = array(); - protected $suffix = array(); - protected $type; - protected $generateChecksums = false; - +final class FileFinder { + + private $root; + private $exclude = array(); + private $paths = array(); + private $suffix = array(); + private $type; + private $generateChecksums = false; + + /** + * Create a new FileFinder. + * + * @param string Root directory to find files beneath. + * @return this + * @task create + */ public function __construct($root) { $this->root = $root; } + /** + * @task config + */ public function excludePath($path) { $this->exclude[] = $path; return $this; } + /** + * @task config + */ public function withSuffix($suffix) { $this->suffix[] = '*.'.$suffix; return $this; } + /** + * @task config + */ public function withPath($path) { $this->paths[] = $path; return $this; } + /** + * @task config + */ public function withType($type) { $this->type = $type; return $this; } + /** + * @task config + */ public function setGenerateChecksums($generate) { $this->generateChecksums = $generate; return $this; } + /** + * @task exec + */ public function find() { $args = array(); $command = array(); $command[] = '(cd %s; '; $args[] = $this->root; $command[] = 'find .'; if ($this->exclude) { $command[] = $this->generateList('path', $this->exclude).' -prune'; $command[] = '-o'; } if ($this->type) { $command[] = '-type %s'; $args[] = $this->type; } if ($this->suffix) { $command[] = $this->generateList('name', $this->suffix); } if ($this->paths) { $command[] = $this->generateList('wholename', $this->paths); } $command[] = '-print0'; if ($this->generateChecksums) { static $md5sum_binary = null; if ($md5sum_binary == null) { $options = array( 'md5sum' => 'md5sum', 'md5' => 'md5 -r', ); foreach ($options as $bin => $choose) { list($err) = exec_manual('which %s', $bin); if ($err == 0) { $md5sum_binary = $choose; break; } } if ($md5sum_binary === null) { throw new Exception( "Unable to locate the md5/md5sum binary for this system."); } } $command[] = ' | xargs -0 -n512 '.$md5sum_binary; } $command[] = ')'; list($stdout) = call_user_func_array( 'execx', array_merge( array(implode(' ', $command)), $args)); if (!$this->generateChecksums) { return explode("\0", trim($stdout)); } else { $stdout = trim($stdout); $map = array(); foreach (explode("\n", $stdout) as $line) { $file = substr($line, 34); if ($file == '-') { continue; } // This mess is to make this class work on both mainline Linux systems // and OSX, which has subtly different 'find' semantics. $file = $this->root.ltrim($file, '.'); $map[$file] = substr($line, 0, 32); } return $map; } } - protected function generateList($flag, array $items) { + /** + * @task internal + */ + private function generateList($flag, array $items) { $items = array_map('escapeshellarg', $items); foreach ($items as $key => $item) { $items[$key] = '-'.$flag.' '.$item; } $items = implode(' -o ', $items); return '\\( '.$items.' \\)'; } } diff --git a/src/filesystem/filelist/FileList.php b/src/filesystem/filelist/FileList.php index 8690504..9ed47cf 100644 --- a/src/filesystem/filelist/FileList.php +++ b/src/filesystem/filelist/FileList.php @@ -1,91 +1,109 @@ contains($file)) { + * do_something_to_this($file); + * } + * } + * + * This sort of construction will allow the user to type "src" in order + * to indicate 'all relevant files underneath "src/"'. + * + * @task create Creating a File List + * @task test Testing File Lists * @group filesystem */ -class FileList { +final class FileList { - protected $files = array(); - protected $dirs = array(); + private $files = array(); + private $dirs = array(); /** * Build a new FileList from an array of paths, e.g. from $argv. * * @param list List of relative or absolute file paths. + * @return this + * @task create */ public function __construct($paths) { foreach ($paths as $path) { $path = Filesystem::resolvePath($path); if (is_dir($path)) { $path = rtrim($path, '/').'/'; $this->dirs[$path] = true; } $this->files[] = $path; } } /** * Determine if a path is one of the paths in the list. Note that an empty * file list is considered to contain every file. * * @param string Relative or absolute system file path. * @param bool If true, consider the path to be contained in the list if * the list contains a parent directory. If false, require * that the path be part of the list explicitly. * @return bool If true, the file is in the list. + * @task test */ public function contains($path, $allow_parent_directory = true) { if ($this->isEmpty()) { return true; } $path = Filesystem::resolvePath($path); if (is_dir($path)) { $path .= '/'; } foreach ($this->files as $file) { if ($file == $path) { return true; } if ($allow_parent_directory) { $len = strlen($file); if (isset($this->dirs[$file]) && !strncmp($file, $path, $len)) { return true; } } } return false; } /** - * Check if the file list is empty -- that is, it contains no files. + * Check if the file list is empty -- that is, it contains no files. * - * @return bool If true, the list is empty. + * @return bool If true, the list is empty. + * @task test */ public function isEmpty() { return !$this->files; } } diff --git a/src/storage/connection/mysql/PhutilMySQLDatabaseConnection.php b/src/storage/connection/mysql/PhutilMySQLDatabaseConnection.php index a2957df..12d21df 100644 --- a/src/storage/connection/mysql/PhutilMySQLDatabaseConnection.php +++ b/src/storage/connection/mysql/PhutilMySQLDatabaseConnection.php @@ -1,191 +1,191 @@ configuration = $configuration; } public function escapeString($string) { if (!$this->connection) { $this->establishConnection(); } return mysql_real_escape_string($string, $this->connection); } public function escapeColumnName($name) { return '`'.str_replace('`', '\\`', $name).'`'; } public function escapeMultilineComment($comment) { // These can either terminate a comment, confuse the hell out of the parser, // make MySQL execute the comment as a query, or, in the case of semicolon, // are quasi-dangerous because the semicolon could turn a broken query into // a working query plus an ignored query. static $map = array( '--' => '(DOUBLEDASH)', '*/' => '(STARSLASH)', - '//' => '(SLASHSLASHL)', + '//' => '(SLASHSLASH)', '#' => '(HASH)', '!' => '(BANG)', ';' => '(SEMICOLON)', ); $comment = str_replace( array_keys($map), array_values($map), $comment); // For good measure, kill anything else that isn't a nice printable // character. $comment = preg_replace('/[^\x20-\x7F]+/', ' ', $comment); return '/* '.$comment.' */'; } public function escapeStringForLikeClause($value) { $value = $this->escapeString($value); // Ideally the query shouldn't be modified after safely escaping it, // but we need to escape _ and % within LIKE terms. $value = str_replace( // Even though we've already escaped, we need to replace \ with \\ // because MYSQL unescapes twice inside a LIKE clause. See note // at mysql.com. However, if the \ is being used to escape a single // quote ('), then the \ should not be escaped. Thus, after all \ // are replaced with \\, we need to revert instances of \\' back to // \'. array('\\', '\\\\\'', '_', '%'), array('\\\\', '\\\'', '\_', '\%'), $value); return $value; } private function getConfiguration($key, $default = null) { return idx($this->configuration, $key, $default); } private function establishConnection() { $this->connection = null; $conn = @mysql_connect( $this->getConfiguration('host'), $this->getConfiguration('user'), $this->getConfiguration('pass'), $new_link = true, $flags = 0); if (!$conn) { throw new PhutilQueryConnectionException(); } $ret = @mysql_select_db($this->getConfiguration('database'), $conn); if (!$ret) { $this->throwQueryException($conn); } $this->connection = $conn; } public function getInsertID() { return mysql_insert_id($this->requireConnection()); } public function getAffectedRows() { return mysql_affected_rows($this->requireConnection()); } public function getTransactionKey() { return (int)$this->requireConnection(); } private function requireConnection() { if (!$this->connection) { throw new Exception("Connection is required."); } return $this->connection; } public function selectAllResults() { $result = array(); $res = $this->lastResult; if ($res == null) { throw new Exception('No query result to fetch from!'); } while (($row = mysql_fetch_assoc($res)) !== false) { $result[] = $row; } return $result; } public function executeRawQuery($raw_query) { $this->lastResult = null; $retries = 3; while ($retries--) { try { if (!$this->connection) { $this->establishConnection(); } $result = mysql_query($raw_query, $this->connection); if ($result) { $this->lastResult = $result; break; } $this->throwQueryException($this->connection); } catch (PhutilQueryConnectionLostException $ex) { if (!$retries) { throw $ex; } if ($this->isInsideTransaction()) { throw $ex; } $this->connection = null; } } } private function throwQueryException($connection) { $errno = mysql_errno($connection); $error = mysql_error($connection); switch ($errno) { case 2013: // Connection Dropped case 2006: // Gone Away throw new PhutilQueryConnectionLostException("#{$errno}: {$error}"); break; case 1213: // Deadlock case 1205: // Lock wait timeout exceeded throw new PhutilQueryRecoverableException("#{$errno}: {$error}"); break; default: // TODO: 1062 is syntax error, and quite terrible in production. throw new PhutilQueryException("#{$errno}: {$error}"); } } } diff --git a/src/utils/utils.php b/src/utils/utils.php index 9f2f48c..50a7eb2 100644 --- a/src/utils/utils.php +++ b/src/utils/utils.php @@ -1,380 +1,380 @@ doStuff(); * * ...but this works fine: * * id(new Thing())->doStuff(); * * @param wild Anything. * @return wild Unmodified argument. * @group util */ function id($x) { return $x; } /** * Access an array index, retrieving the value stored there if it exists or * a default if it does not. This function allows you to concisely access an * index which may or may not exist without raising a warning. * * @param array Array to access. * @param scalar Index to access in the array. * @param wild Default value to return if the key is not present in the * array. * @return wild If $array[$key] exists, that value is returned. If not, * $default is returned without raising a warning. * @group util */ function idx(array $array, $key, $default = null) { return array_key_exists($key, $array) ? $array[$key] : $default; } /** * Call a method on a list of objects. Short for "method pull", this function * works just like @{function:ipull}, except that it operates on a list of * objects instead of a list of arrays. This function simplifies a common type * of mapping operation: * * COUNTEREXAMPLE * $names = array(); * foreach ($objects as $key => $object) { * $names[$key] = $object->getName(); * } * * You can express this more concisely with mpull(): * * $names = mpull($objects, 'getName'); * * mpull() takes a third argument, which allows you to do the same but for * the array's keys: * * COUNTEREXAMPLE * $names = array(); * foreach ($objects as $object) { * $names[$object->getID()] = $object->getName(); * } * * This is the mpull version(): * * $names = mpull($objects, 'getName', 'getID'); * * If you pass ##null## as the second argument, the objects will be preserved: * * COUNTEREXAMPLE * $id_map = array(); * foreach ($objects as $object) { * $id_map[$object->getID()] = $object; * } * * With mpull(): * * $id_map = mpull($objects, null, 'getID'); * * See also @{function:ipull}, which works similarly but accesses array indexes * instead of calling methods. * * @param list Some list of objects. * @param string|null Determines which **values** will appear in the result * array. Use a string like 'getName' to store the * value of calling the named method in each value, or * ##null## to preserve the original objects. * @param string|null Determines how **keys** will be assigned in the result * array. Use a string like 'getID' to use the result * of calling the named method as each object's key, or * ##null## to preserve the original keys. * @return dict A dictionary with keys and values derived according * to whatever you passed as $method and $key_method. * @group util */ function mpull(array $list, $method, $key_method = null) { $result = array(); foreach ($list as $key => $object) { if ($key_method !== null) { $key = $object->$key_method(); } if ($method !== null) { $value = $object->$method(); } else { $value = $object; } $result[$key] = $value; } return $result; } /** - * Choose an index from a list of arrays. Short for "index pull", this this - * function works just like @{function:mpull}, except that it operates on a list - * of arrays and selects an index from them instead of operating on a list of + * Choose an index from a list of arrays. Short for "index pull", this function + * works just like @{function:mpull}, except that it operates on a list of + * arrays and selects an index from them instead of operating on a list of * objects and calling a method on them. * * This function simplifies a common type of mapping operation: * * COUNTEREXAMPLE * $names = array(); * foreach ($list as $key => $dict) { * $names[$key] = $dict['name']; * } * * With ipull(): * * $names = ipull($list, 'name'); * * See @{function:mpull} for more usage examples. * * @param list Some list of arrays. * @param scalar|null Determines which **values** will appear in the result * array. Use a scalar to select that index from each * array, or null to preserve the arrays unmodified as * values. * @param scalar|null Determines which **keys** will appear in the result * array. Use a scalar to select that index from each * array, or null to preserve the array keys. * @return dict A dictionary with keys and values derived according * to whatever you passed for $index and $key_index. * @group util */ function ipull(array $list, $index, $key_index = null) { $result = array(); foreach ($list as $key => $array) { if ($key_index !== null) { $key = $array[$key_index]; } if ($index !== null) { $value = $array[$index]; } else { $value = $array; } $result[$key] = $value; } return $result; } /** * Group a list of objects by the result of some method, similar to how * GROUP BY works in an SQL query. This function simplifies grouping objects * by some property: * * COUNTEREXAMPLE * $animals_by_species = array(); * foreach ($animals as $animal) { * $animals_by_species[$animal->getSpecies()][] = $animal; * } * * This can be expressed more tersely with mgroup(): * * $animals_by_species = mgroup($animals, 'getSpecies'); * * In either case, the result is a dictionary which maps species (e.g., like * "dog") to lists of animals with that property, so all the dogs are grouped * together and all the cats are grouped together, or whatever super * businessesey thing is actually happening in your problem domain. * * @param list List of objects to group by some property. * @param string Name of a method, like 'getType', to call on each object * in order to determine which group it should be placed into. * @param ... Zero or more additional method names, to subgroup the * groups. * @return dict Dictionary mapping distinct method returns to lists of * all objects which returned that value. * @group util */ function mgroup(array $list, $by /*, ... */) { $map = mpull($list, $by); $groups = array(); foreach ($map as $group) { // Can't array_fill_keys() here because 'false' gets encoded wrong. $groups[$group] = array(); } foreach ($map as $key => $group) { $groups[$group][$key] = $list[$key]; } $args = func_get_args(); $args = array_slice($args, 2); if ($args) { array_unshift($args, null); foreach ($groups as $group_key => $grouped) { $args[0] = $grouped; $groups[$group_key] = call_user_func_array('mgroup', $args); } } return $groups; } /** * Sort a list of objects by the return value of some method. In PHP, this is * often vastly more efficient than usort() and similar. * * // Sort a list of Duck objects by name. * $sorted = msort($ducks, 'getName'); * * It is usually significantly more efficient to define an ordering method * on objects and call msort() than to write a comparator. It is often more * convenient, as well. * * **NOTE:** This method does not take the list by reference; it returns a new * list. * * @param list List of objects to sort by some property. * @param string Name of a method to call on each object; the return values * will be used to sort the list. * @return list Objects ordered by the return values of the method calls. * @group util */ function msort(array $list, $method) { $surrogate = mpull($list, $method); asort($surrogate); $result = array(); foreach ($surrogate as $key => $value) { $result[$key] = $list[$key]; } return $result; } /** * Selects a list of keys from an array, returning a new array with only the * key-value pairs identified by the selected keys, in the specified order. * * Note that since this function orders keys in the result according to the * order they appear in the list of keys, there are effectively two common * uses: either reducing a large dictionary to a smaller one, or changing the * key order on an existing dictionary. * * @param dict Dictionary of key-value pairs to select from. * @param list List of keys to select. * @return dict Dictionary of only those key-value pairs where the key was * present in the list of keys to select. Ordering is * determined by the list order. * @group util */ function array_select_keys(array $dict, array $keys) { $result = array(); foreach ($keys as $key) { if (array_key_exists($key, $dict)) { $result[$key] = $dict[$key]; } } return $result; } /** * Returns the first argument which is not strictly null, or ##null## if there * are no such arguments. Identical to the MySQL function of the same name. * * @param ... Zero or more arguments of any type. * @return mixed First non-null arg, or null if no such arg exists. * @group util */ function coalesce(/* ... */) { $args = func_get_args(); foreach ($args as $arg) { if ($arg !== null) { return $arg; } } return null; } /** - * Similar to coalesce(), but less strict: returns the first non-empty() - * argument, instead of the first argument that is strictly non-null. If no - * argument is nonempty, it returns the last argument. This is useful - * idiomatically for setting defaults: + * Similar to @{function:coalesce}, but less strict: returns the first + * non-empty() argument, instead of the first argument that is strictly + * non-null. If no argument is nonempty, it returns the last argument. This is + * useful idiomatically for setting defaults: * - * $value = nonempty($get_value, 0); + * $display_name = nonempty($user_name, $full_name, "Anonymous"); * * @param ... Zero or more arguments of any type. * @return mixed First non-empty() arg, or last arg if no such arg * exists, or null if you pased in zero args. * @group util */ function nonempty(/* ... */) { $args = func_get_args(); foreach ($args as $arg) { if ($arg) { break; } } return $arg; } /** * Invokes the "new" operator with a vector of arguments. There is no way to * call_user_func_array() on a class constructor, so you can instead use this * function: * * $obj = newv($class_name, $argv); * * That is, these two statements are equivalent: * * $pancake = new Pancake('Blueberry', 'Maple Syrup', true); * $pancake = newv('Pancake', array('Blueberry', 'Maple Syrup', true)); * * DO NOT solve this problem in other, more creative ways! Three popular * alternatives are: * * - Build a fake serialized object and unserialize it. * - Invoke the constructor twice. - * -just use eval() lol + * - just use eval() lol * * These are really bad solutions to the problem because they can have side * effects (e.g., __wakeup()) and give you an object in an otherwise impossible * state. Please endeavor to keep your objects in possible states. * * If you own the classes you're doing this for, you should consider whether * or not restructuring your code (for instance, by creating static * construction methods) might make it cleaner before using newv(). Static * constructors can be invoked with call_user_func_array(), and may give your * class a cleaner and more descriptive API. * * @param string The name of a class. * @param list Array of arguments to pass to its constructor. * @return obj A new object of the specified class, constructed by passing * the argument vector to its constructor. * @group util */ function newv($class_name, array $argv) { $reflector = new ReflectionClass($class_name); return $reflector->newInstanceArgs($argv); } diff --git a/src/xsprintf/xsprintf.php b/src/xsprintf/xsprintf.php index 18a048b..5122578 100644 --- a/src/xsprintf/xsprintf.php +++ b/src/xsprintf/xsprintf.php @@ -1,74 +1,136 @@ = $argc) { throw new Exception("Too few arguments to xsprintf()."); } $callback($userdata, $pattern, $pos, $argv[$arg], $len); } } if ($c == '%') { // If we have "%%", this encodes a literal percentage symbol, so we are // no longer inside a conversion. $conv = !$conv; } } if ($arg != ($argc - 1)) { throw new Exception("Too many arguments to xsprintf()."); } $argv[0] = $pattern; return call_user_func_array('sprintf', $argv); } + + +/** + * Example @{function:xsprintf} callback. When you call xsprintf(), you + * must pass a callback like this one. xsprintf() will invoke the callback when + * it encounters a conversion (like "%Z") in the pattern string. + * + * Generally, this callback should examine ##$pattern[$pos]## (which will + * contain the conversion character, like 'Z'), escape ##$value## appropriately, + * and then replace ##$pattern[$pos]## with an 's' so sprintf() prints the + * escaped value as a string. However, more sophisticated behaviors are possible + * -- particularly, consuming multiple characters to allow for conversions like + * "%Ld". In this case, the callback needs to substr_replace() the entire + * conversion with 's' and then update ##$length##. + * + * For example implementations, see @{function:xsprintf_command}, + * @{function:xsprintf_javascript}, + * and @{function:xsprintf_query}. + * + * @param wild Arbitrary, optional userdata. This is whatever userdata + * was passed to @{function:xsprintf}. + * @param string The pattern string being parsed. + * @param int The current character position in the string. + * @param wild The value to convert. + * @param int The string length. + * + * @group util + */ +function xsprintf_callback_example( + $userdata, + &$pattern, + &$pos, + &$value, + &$length) { + throw new Exception( + "This function exists only to document the call signature for xsprintf() ". + "callbacks."); +} \ No newline at end of file