diff --git a/src/applications/repository/daemon/PhabricatorRepositoryPullLocalDaemon.php b/src/applications/repository/daemon/PhabricatorRepositoryPullLocalDaemon.php index b8a693a48..f4c96b09c 100644 --- a/src/applications/repository/daemon/PhabricatorRepositoryPullLocalDaemon.php +++ b/src/applications/repository/daemon/PhabricatorRepositoryPullLocalDaemon.php @@ -1,702 +1,707 @@ <?php /** * Run pull commands on local working copies to keep them up to date. This * daemon handles all repository types. * * By default, the daemon pulls **every** repository. If you want it to be * responsible for only some repositories, you can launch it with a list of * PHIDs or callsigns: * * ./phd launch repositorypulllocal -- X Q Z * * You can also launch a daemon which is responsible for all //but// one or * more repositories: * * ./phd launch repositorypulllocal -- --not A --not B * * If you have a very large number of repositories and some aren't being pulled * as frequently as you'd like, you can either change the pull frequency of * the less-important repositories to a larger number (so the daemon will skip * them more often) or launch one daemon for all the less-important repositories * and one for the more important repositories (or one for each more important * repository). * * @task pull Pulling Repositories * @task git Git Implementation * @task hg Mercurial Implementation */ final class PhabricatorRepositoryPullLocalDaemon extends PhabricatorDaemon { private $commitCache = array(); private $repair; private $discoveryEngines = array(); public function setRepair($repair) { $this->repair = $repair; return $this; } /* -( Pulling Repositories )----------------------------------------------- */ /** * @task pull */ public function run() { $argv = $this->getArgv(); array_unshift($argv, __CLASS__); $args = new PhutilArgumentParser($argv); $args->parse( array( array( 'name' => 'no-discovery', 'help' => 'Pull only, without discovering commits.', ), array( 'name' => 'not', 'param' => 'repository', 'repeat' => true, 'help' => 'Do not pull __repository__.', ), array( 'name' => 'repositories', 'wildcard' => true, 'help' => 'Pull specific __repositories__ instead of all.', ), )); $no_discovery = $args->getArg('no-discovery'); $repo_names = $args->getArg('repositories'); $exclude_names = $args->getArg('not'); // Each repository has an individual pull frequency; after we pull it, // wait that long to pull it again. When we start up, try to pull everything // serially. $retry_after = array(); $min_sleep = 15; while (true) { $repositories = $this->loadRepositories($repo_names); if ($exclude_names) { $exclude = $this->loadRepositories($exclude_names); $repositories = array_diff_key($repositories, $exclude); } // Shuffle the repositories, then re-key the array since shuffle() // discards keys. This is mostly for startup, we'll use soft priorities // later. shuffle($repositories); $repositories = mpull($repositories, null, 'getID'); // If any repositories were deleted, remove them from the retry timer map // so we don't end up with a retry timer that never gets updated and // causes us to sleep for the minimum amount of time. $retry_after = array_select_keys( $retry_after, array_keys($repositories)); // Assign soft priorities to repositories based on how frequently they // should pull again. asort($retry_after); $repositories = array_select_keys( $repositories, array_keys($retry_after)) + $repositories; foreach ($repositories as $id => $repository) { $after = idx($retry_after, $id, 0); if ($after > time()) { continue; } $tracked = $repository->isTracked(); if (!$tracked) { continue; } + $callsign = $repository->getCallsign(); + try { - $callsign = $repository->getCallsign(); $this->log("Updating repository '{$callsign}'."); id(new PhabricatorRepositoryPullEngine()) ->setRepository($repository) ->pullRepository(); if (!$no_discovery) { // TODO: It would be nice to discover only if we pulled something, // but this isn't totally trivial. $lock_name = get_class($this).':'.$callsign; $lock = PhabricatorGlobalLock::newLock($lock_name); $lock->lock(); try { $this->discoverRepository($repository); } catch (Exception $ex) { $lock->unlock(); throw $ex; } $lock->unlock(); } $sleep_for = $repository->getDetail('pull-frequency', $min_sleep); $retry_after[$id] = time() + $sleep_for; } catch (PhutilLockException $ex) { $retry_after[$id] = time() + $min_sleep; $this->log("Failed to acquire lock."); } catch (Exception $ex) { $retry_after[$id] = time() + $min_sleep; - phlog($ex); + + $proxy = new PhutilProxyException( + "Error while fetching changes to the '{$callsign}' repository.", + $ex); + phlog($proxy); } $this->stillWorking(); } if ($retry_after) { $sleep_until = max(min($retry_after), time() + $min_sleep); } else { $sleep_until = time() + $min_sleep; } $this->sleep($sleep_until - time()); } } /** * @task pull */ protected function loadRepositories(array $names) { if (!count($names)) { return id(new PhabricatorRepository())->loadAll(); } else { return PhabricatorRepository::loadAllByPHIDOrCallsign($names); } } public function discoverRepository(PhabricatorRepository $repository) { $vcs = $repository->getVersionControlSystem(); switch ($vcs) { case PhabricatorRepositoryType::REPOSITORY_TYPE_GIT: return $this->executeGitDiscover($repository); case PhabricatorRepositoryType::REPOSITORY_TYPE_SVN: $refs = $this->getDiscoveryEngine($repository) ->discoverCommits(); break; case PhabricatorRepositoryType::REPOSITORY_TYPE_MERCURIAL: return $this->executeHgDiscover($repository); default: throw new Exception("Unknown VCS '{$vcs}'!"); } foreach ($refs as $ref) { $this->recordCommit( $repository, $ref->getIdentifier(), $ref->getEpoch(), $ref->getBranch()); } return (bool)count($refs); } private function getDiscoveryEngine(PhabricatorRepository $repository) { $id = $repository->getID(); if (empty($this->discoveryEngines[$id])) { $engine = id(new PhabricatorRepositoryDiscoveryEngine()) ->setRepository($repository) ->setVerbose($this->getVerbose()) ->setRepairMode($this->repair); $this->discoveryEngines[$id] = $engine; } return $this->discoveryEngines[$id]; } private function isKnownCommit( PhabricatorRepository $repository, $target) { if ($this->getCache($repository, $target)) { return true; } if ($this->repair) { // In repair mode, rediscover the entire repository, ignoring the // database state. We can hit the local cache above, but if we miss it // stop the script from going to the database cache. return false; } $commit = id(new PhabricatorRepositoryCommit())->loadOneWhere( 'repositoryID = %d AND commitIdentifier = %s', $repository->getID(), $target); if (!$commit) { return false; } $this->setCache($repository, $target); while (count($this->commitCache) > 2048) { array_shift($this->commitCache); } return true; } private function isKnownCommitOnAnyAutocloseBranch( PhabricatorRepository $repository, $target) { $commit = id(new PhabricatorRepositoryCommit())->loadOneWhere( 'repositoryID = %d AND commitIdentifier = %s', $repository->getID(), $target); if (!$commit) { $callsign = $repository->getCallsign(); $console = PhutilConsole::getConsole(); $console->writeErr( "WARNING: Repository '%s' is missing commits ('%s' is missing from ". "history). Run '%s' to repair the repository.\n", $callsign, $target, "bin/repository discover --repair {$callsign}"); return false; } $data = $commit->loadCommitData(); if (!$data) { return false; } if ($repository->shouldAutocloseCommit($commit, $data)) { return true; } return false; } private function recordCommit( PhabricatorRepository $repository, $commit_identifier, $epoch, $branch = null) { $commit = new PhabricatorRepositoryCommit(); $commit->setRepositoryID($repository->getID()); $commit->setCommitIdentifier($commit_identifier); $commit->setEpoch($epoch); $data = new PhabricatorRepositoryCommitData(); if ($branch) { $data->setCommitDetail('seenOnBranches', array($branch)); } try { $commit->openTransaction(); $commit->save(); $data->setCommitID($commit->getID()); $data->save(); $commit->saveTransaction(); $this->insertTask($repository, $commit); queryfx( $repository->establishConnection('w'), 'INSERT INTO %T (repositoryID, size, lastCommitID, epoch) VALUES (%d, 1, %d, %d) ON DUPLICATE KEY UPDATE size = size + 1, lastCommitID = IF(VALUES(epoch) > epoch, VALUES(lastCommitID), lastCommitID), epoch = IF(VALUES(epoch) > epoch, VALUES(epoch), epoch)', PhabricatorRepository::TABLE_SUMMARY, $repository->getID(), $commit->getID(), $epoch); if ($this->repair) { // Normally, the query should throw a duplicate key exception. If we // reach this in repair mode, we've actually performed a repair. $this->log("Repaired commit '{$commit_identifier}'."); } $this->setCache($repository, $commit_identifier); PhutilEventEngine::dispatchEvent( new PhabricatorEvent( PhabricatorEventType::TYPE_DIFFUSION_DIDDISCOVERCOMMIT, array( 'repository' => $repository, 'commit' => $commit, ))); } catch (AphrontQueryDuplicateKeyException $ex) { $commit->killTransaction(); // Ignore. This can happen because we discover the same new commit // more than once when looking at history, or because of races or // data inconsistency or cosmic radiation; in any case, we're still // in a good state if we ignore the failure. $this->setCache($repository, $commit_identifier); } } private function updateCommit( PhabricatorRepository $repository, $commit_identifier, $branch) { $commit = id(new PhabricatorRepositoryCommit())->loadOneWhere( 'repositoryID = %d AND commitIdentifier = %s', $repository->getID(), $commit_identifier); if (!$commit) { // This can happen if the phabricator DB doesn't have the commit info, // or the commit is so big that phabricator couldn't parse it. In this // case we just ignore it. return; } $data = id(new PhabricatorRepositoryCommitData())->loadOneWhere( 'commitID = %d', $commit->getID()); if (!$data) { $data = new PhabricatorRepositoryCommitData(); $data->setCommitID($commit->getID()); } $branches = $data->getCommitDetail('seenOnBranches', array()); $branches[] = $branch; $data->setCommitDetail('seenOnBranches', $branches); $data->save(); $this->insertTask( $repository, $commit, array( 'only' => true )); } private function insertTask( PhabricatorRepository $repository, PhabricatorRepositoryCommit $commit, $data = array()) { $vcs = $repository->getVersionControlSystem(); switch ($vcs) { case PhabricatorRepositoryType::REPOSITORY_TYPE_GIT: $class = 'PhabricatorRepositoryGitCommitMessageParserWorker'; break; case PhabricatorRepositoryType::REPOSITORY_TYPE_SVN: $class = 'PhabricatorRepositorySvnCommitMessageParserWorker'; break; case PhabricatorRepositoryType::REPOSITORY_TYPE_MERCURIAL: $class = 'PhabricatorRepositoryMercurialCommitMessageParserWorker'; break; default: throw new Exception("Unknown repository type '{$vcs}'!"); } $data['commitID'] = $commit->getID(); PhabricatorWorker::scheduleTask($class, $data); } private function setCache( PhabricatorRepository $repository, $commit_identifier) { $key = $this->getCacheKey($repository, $commit_identifier); $this->commitCache[$key] = true; } private function getCache( PhabricatorRepository $repository, $commit_identifier) { $key = $this->getCacheKey($repository, $commit_identifier); return idx($this->commitCache, $key, false); } private function getCacheKey( PhabricatorRepository $repository, $commit_identifier) { return $repository->getID().':'.$commit_identifier; } /* -( Git Implementation )------------------------------------------------- */ /** * @task git */ private function executeGitDiscover( PhabricatorRepository $repository) { list($remotes) = $repository->execxLocalCommand( 'remote show -n origin'); $matches = null; if (!preg_match('/^\s*Fetch URL:\s*(.*?)\s*$/m', $remotes, $matches)) { throw new Exception( "Expected 'Fetch URL' in 'git remote show -n origin'."); } self::executeGitVerifySameOrigin( $matches[1], $repository->getRemoteURI(), $repository->getLocalPath()); list($stdout) = $repository->execxLocalCommand( 'branch -r --verbose --no-abbrev'); $branches = DiffusionGitBranch::parseRemoteBranchOutput( $stdout, $only_this_remote = DiffusionBranchInformation::DEFAULT_GIT_REMOTE); $callsign = $repository->getCallsign(); $tracked_something = false; $this->log("Discovering commits in repository '{$callsign}'..."); foreach ($branches as $name => $commit) { $this->log("Examining branch '{$name}', at {$commit}."); if (!$repository->shouldTrackBranch($name)) { $this->log("Skipping, branch is untracked."); continue; } $tracked_something = true; if ($this->isKnownCommit($repository, $commit)) { $this->log("Skipping, HEAD is known."); continue; } $this->log("Looking for new commits."); $this->executeGitDiscoverCommit($repository, $commit, $name, false); } if (!$tracked_something) { $repo_name = $repository->getName(); $repo_callsign = $repository->getCallsign(); throw new Exception( "Repository r{$repo_callsign} '{$repo_name}' has no tracked branches! ". "Verify that your branch filtering settings are correct."); } $this->log("Discovering commits on autoclose branches..."); foreach ($branches as $name => $commit) { $this->log("Examining branch '{$name}', at {$commit}'."); if (!$repository->shouldTrackBranch($name)) { $this->log("Skipping, branch is untracked."); continue; } if (!$repository->shouldAutocloseBranch($name)) { $this->log("Skipping, branch is not autoclose."); continue; } if ($this->isKnownCommitOnAnyAutocloseBranch($repository, $commit)) { $this->log("Skipping, commit is known on an autoclose branch."); continue; } $this->log("Looking for new autoclose commits."); $this->executeGitDiscoverCommit($repository, $commit, $name, true); } } /** * @task git */ private function executeGitDiscoverCommit( PhabricatorRepository $repository, $commit, $branch, $autoclose) { $discover = array($commit); $insert = array($commit); $seen_parent = array(); $stream = new PhabricatorGitGraphStream($repository, $commit); while (true) { $target = array_pop($discover); $parents = $stream->getParents($target); foreach ($parents as $parent) { if (isset($seen_parent[$parent])) { // We end up in a loop here somehow when we parse Arcanist if we // don't do this. TODO: Figure out why and draw a pretty diagram // since it's not evident how parsing a DAG with this causes the // loop to stop terminating. continue; } $seen_parent[$parent] = true; if ($autoclose) { $known = $this->isKnownCommitOnAnyAutocloseBranch( $repository, $parent); } else { $known = $this->isKnownCommit($repository, $parent); } if (!$known) { $this->log("Discovered commit '{$parent}'."); $discover[] = $parent; $insert[] = $parent; } } if (empty($discover)) { break; } } $n = count($insert); if ($autoclose) { $this->log("Found {$n} new autoclose commits on branch '{$branch}'."); } else { $this->log("Found {$n} new commits on branch '{$branch}'."); } while (true) { $target = array_pop($insert); $epoch = $stream->getCommitDate($target); $epoch = trim($epoch); if ($autoclose) { $this->updateCommit($repository, $target, $branch); } else { $this->recordCommit($repository, $target, $epoch, $branch); } if (empty($insert)) { break; } } } /** * @task git */ public static function executeGitVerifySameOrigin($remote, $expect, $where) { $remote_path = self::getPathFromGitURI($remote); $expect_path = self::getPathFromGitURI($expect); $remote_match = self::executeGitNormalizePath($remote_path); $expect_match = self::executeGitNormalizePath($expect_path); if ($remote_match != $expect_match) { throw new Exception( "Working copy at '{$where}' has a mismatched origin URL. It has ". "origin URL '{$remote}' (with remote path '{$remote_path}'), but the ". "configured URL '{$expect}' (with remote path '{$expect_path}') is ". "expected. Refusing to proceed because this may indicate that the ". "working copy is actually some other repository."); } } private static function getPathFromGitURI($raw_uri) { $uri = new PhutilURI($raw_uri); if ($uri->getProtocol()) { return $uri->getPath(); } $uri = new PhutilGitURI($raw_uri); if ($uri->getDomain()) { return $uri->getPath(); } return $raw_uri; } /** * @task git */ private static function executeGitNormalizePath($path) { // Strip away "/" and ".git", so similar paths correctly match. $path = trim($path, '/'); $path = preg_replace('/\.git$/', '', $path); return $path; } /* -( Mercurial Implementation )------------------------------------------- */ private function executeHgDiscover(PhabricatorRepository $repository) { // NOTE: "--debug" gives us 40-character hashes. list($stdout) = $repository->execxLocalCommand('--debug branches'); $branches = ArcanistMercurialParser::parseMercurialBranches($stdout); $got_something = false; foreach ($branches as $name => $branch) { $commit = $branch['rev']; if ($this->isKnownCommit($repository, $commit)) { continue; } else { $this->executeHgDiscoverCommit($repository, $commit); $got_something = true; } } return $got_something; } private function executeHgDiscoverCommit( PhabricatorRepository $repository, $commit) { $discover = array($commit); $insert = array($commit); $seen_parent = array(); $stream = new PhabricatorMercurialGraphStream($repository); // For all the new commits at the branch heads, walk backward until we // find only commits we've aleady seen. while ($discover) { $target = array_pop($discover); $parents = $stream->getParents($target); foreach ($parents as $parent) { if (isset($seen_parent[$parent])) { continue; } $seen_parent[$parent] = true; if (!$this->isKnownCommit($repository, $parent)) { $discover[] = $parent; $insert[] = $parent; } } } foreach ($insert as $target) { $epoch = $stream->getCommitDate($target); $this->recordCommit($repository, $target, $epoch); } } }