Page Menu
Home
c4science
Search
Configure Global Search
Log In
Files
F106612783
PhabricatorRepositoryDiscoveryEngine.php
No One
Temporary
Actions
Download File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Award Token
Subscribers
None
File Metadata
Details
File Info
Storage
Attached
Created
Fri, Mar 28, 09:52
Size
12 KB
Mime Type
text/x-php
Expires
Sun, Mar 30, 10:52 (2 d)
Engine
blob
Format
Raw Data
Handle
25233688
Attached To
rPH Phabricator
PhabricatorRepositoryDiscoveryEngine.php
View Options
<?php
/**
* @task discover Discovering Repositories
* @task svn Discovering Subversion Repositories
* @task git Discovering Git Repositories
* @task hg Discovering Mercurial Repositories
* @task internal Internals
*/
final
class
PhabricatorRepositoryDiscoveryEngine
extends
PhabricatorRepositoryEngine
{
private
$repairMode
;
private
$commitCache
=
array
();
const
MAX_COMMIT_CACHE_SIZE
=
2048
;
/* -( Discovering Repositories )------------------------------------------- */
public
function
setRepairMode
(
$repair_mode
)
{
$this
->
repairMode
=
$repair_mode
;
return
$this
;
}
public
function
getRepairMode
()
{
return
$this
->
repairMode
;
}
/**
* @task discovery
*/
public
function
discoverCommits
()
{
$repository
=
$this
->
getRepository
();
$vcs
=
$repository
->
getVersionControlSystem
();
switch
(
$vcs
)
{
case
PhabricatorRepositoryType
::
REPOSITORY_TYPE_SVN
:
$refs
=
$this
->
discoverSubversionCommits
();
break
;
case
PhabricatorRepositoryType
::
REPOSITORY_TYPE_MERCURIAL
:
$refs
=
$this
->
discoverMercurialCommits
();
break
;
case
PhabricatorRepositoryType
::
REPOSITORY_TYPE_GIT
:
$refs
=
$this
->
discoverGitCommits
();
break
;
default
:
throw
new
Exception
(
"Unknown VCS '{$vcs}'!"
);
}
// Mark discovered commits in the cache.
foreach
(
$refs
as
$ref
)
{
$this
->
commitCache
[
$ref
->
getIdentifier
()]
=
true
;
}
return
$refs
;
}
/* -( Discovering Git Repositories )--------------------------------------- */
/**
* @task git
*/
private
function
discoverGitCommits
()
{
$repository
=
$this
->
getRepository
();
if
(!
$repository
->
isHosted
())
{
$this
->
verifyGitOrigin
(
$repository
);
}
$branches
=
id
(
new
DiffusionLowLevelGitRefQuery
())
->
setRepository
(
$repository
)
->
withIsOriginBranch
(
true
)
->
execute
();
if
(!
$branches
)
{
// This repository has no branches at all, so we don't need to do
// anything. Generally, this means the repository is empty.
return
array
();
}
$branches
=
$this
->
sortBranches
(
$branches
);
$branches
=
mpull
(
$branches
,
'getCommitIdentifier'
,
'getShortName'
);
$this
->
log
(
pht
(
'Discovering commits in repository %s.'
,
$repository
->
getCallsign
()));
$refs
=
array
();
foreach
(
$branches
as
$name
=>
$commit
)
{
$this
->
log
(
pht
(
'Examining branch "%s", at "%s".'
,
$name
,
$commit
));
if
(!
$repository
->
shouldTrackBranch
(
$name
))
{
$this
->
log
(
pht
(
"Skipping, branch is untracked."
));
continue
;
}
if
(
$this
->
isKnownCommit
(
$commit
))
{
$this
->
log
(
pht
(
"Skipping, HEAD is known."
));
continue
;
}
$this
->
log
(
pht
(
"Looking for new commits."
));
$refs
[]
=
$this
->
discoverStreamAncestry
(
new
PhabricatorGitGraphStream
(
$repository
,
$commit
),
$commit
,
$repository
->
shouldAutocloseBranch
(
$name
));
}
return
array_mergev
(
$refs
);
}
/**
* Verify that the "origin" remote exists, and points at the correct URI.
*
* This catches or corrects some types of misconfiguration, and also repairs
* an issue where Git 1.7.1 does not create an "origin" for `--bare` clones.
* See T4041.
*
* @param PhabricatorRepository Repository to verify.
* @return void
*/
private
function
verifyGitOrigin
(
PhabricatorRepository
$repository
)
{
list
(
$remotes
)
=
$repository
->
execxLocalCommand
(
'remote show -n origin'
);
$matches
=
null
;
if
(!
preg_match
(
'/^
\s
*Fetch URL:
\s
*(.*?)
\s
*$/m'
,
$remotes
,
$matches
))
{
throw
new
Exception
(
"Expected 'Fetch URL' in 'git remote show -n origin'."
);
}
$remote_uri
=
$matches
[
1
];
$expect_remote
=
$repository
->
getRemoteURI
();
if
(
$remote_uri
==
"origin"
)
{
// If a remote does not exist, git pretends it does and prints out a
// made up remote where the URI is the same as the remote name. This is
// definitely not correct.
// Possibly, we should use `git remote --verbose` instead, which does not
// suffer from this problem (but is a little more complicated to parse).
$valid
=
false
;
$exists
=
false
;
}
else
{
$normal_type_git
=
PhabricatorRepositoryURINormalizer
::
TYPE_GIT
;
$remote_normal
=
id
(
new
PhabricatorRepositoryURINormalizer
(
$normal_type_git
,
$remote_uri
))->
getNormalizedPath
();
$expect_normal
=
id
(
new
PhabricatorRepositoryURINormalizer
(
$normal_type_git
,
$expect_remote
))->
getNormalizedPath
();
$valid
=
(
$remote_normal
==
$expect_normal
);
$exists
=
true
;
}
if
(!
$valid
)
{
if
(!
$exists
)
{
// If there's no "origin" remote, just create it regardless of how
// strongly we own the working copy. There is almost no conceivable
// scenario in which this could do damage.
$this
->
log
(
pht
(
'Remote "origin" does not exist. Creating "origin", with '
.
'URI "%s".'
,
$expect_remote
));
$repository
->
execxLocalCommand
(
'remote add origin %P'
,
$repository
->
getRemoteURIEnvelope
());
// NOTE: This doesn't fetch the origin (it just creates it), so we won't
// know about origin branches until the next "pull" happens. That's fine
// for our purposes, but might impact things in the future.
}
else
{
if
(
$repository
->
canDestroyWorkingCopy
())
{
// Bad remote, but we can try to repair it.
$this
->
log
(
pht
(
'Remote "origin" exists, but is pointed at the wrong URI, "%s". '
.
'Resetting origin URI to "%s.'
,
$remote_uri
,
$expect_remote
));
$repository
->
execxLocalCommand
(
'remote set-url origin %P'
,
$repository
->
getRemoteURIEnvelope
());
}
else
{
// Bad remote and we aren't comfortable repairing it.
$message
=
pht
(
'Working copy at "%s" has a mismatched origin URI, "%s". '
.
'The expected origin URI is "%s". Fix your configuration, or '
.
'set the remote URI correctly. To avoid breaking anything, '
.
'Phabricator will not automatically fix this.'
,
$repository
->
getLocalPath
(),
$remote_uri
,
$expect_remote
);
throw
new
Exception
(
$message
);
}
}
}
}
/* -( Discovering Subversion Repositories )-------------------------------- */
/**
* @task svn
*/
private
function
discoverSubversionCommits
()
{
$repository
=
$this
->
getRepository
();
$upper_bound
=
null
;
$limit
=
1
;
$refs
=
array
();
do
{
// Find all the unknown commits on this path. Note that we permit
// importing an SVN subdirectory rather than the entire repository, so
// commits may be nonsequential.
if
(
$upper_bound
===
null
)
{
$at_rev
=
'HEAD'
;
}
else
{
$at_rev
=
(
$upper_bound
-
1
);
}
try
{
list
(
$xml
,
$stderr
)
=
$repository
->
execxRemoteCommand
(
'log --xml --quiet --limit %d %s'
,
$limit
,
$repository
->
getSubversionBaseURI
(
$at_rev
));
}
catch
(
CommandException
$ex
)
{
$stderr
=
$ex
->
getStdErr
();
if
(
preg_match
(
'/(path|File) not found/'
,
$stderr
))
{
// We've gone all the way back through history and this path was not
// affected by earlier commits.
break
;
}
throw
$ex
;
}
$xml
=
phutil_utf8ize
(
$xml
);
$log
=
new
SimpleXMLElement
(
$xml
);
foreach
(
$log
->
logentry
as
$entry
)
{
$identifier
=
(
int
)
$entry
[
'revision'
];
$epoch
=
(
int
)
strtotime
((
string
)
$entry
->
date
[
0
]);
$refs
[
$identifier
]
=
id
(
new
PhabricatorRepositoryCommitRef
())
->
setIdentifier
(
$identifier
)
->
setEpoch
(
$epoch
)
->
setCanCloseImmediately
(
true
);
if
(
$upper_bound
===
null
)
{
$upper_bound
=
$identifier
;
}
else
{
$upper_bound
=
min
(
$upper_bound
,
$identifier
);
}
}
// Discover 2, 4, 8, ... 256 logs at a time. This allows us to initially
// import large repositories fairly quickly, while pulling only as much
// data as we need in the common case (when we've already imported the
// repository and are just grabbing one commit at a time).
$limit
=
min
(
$limit
*
2
,
256
);
}
while
(
$upper_bound
>
1
&&
!
$this
->
isKnownCommit
(
$upper_bound
));
krsort
(
$refs
);
while
(
$refs
&&
$this
->
isKnownCommit
(
last
(
$refs
)->
getIdentifier
()))
{
array_pop
(
$refs
);
}
$refs
=
array_reverse
(
$refs
);
return
$refs
;
}
/* -( Discovering Mercurial Repositories )--------------------------------- */
/**
* @task hg
*/
private
function
discoverMercurialCommits
()
{
$repository
=
$this
->
getRepository
();
$branches
=
id
(
new
DiffusionLowLevelMercurialBranchesQuery
())
->
setRepository
(
$repository
)
->
execute
();
$refs
=
array
();
foreach
(
$branches
as
$branch
)
{
// NOTE: Mercurial branches may have multiple heads, so the names may
// not be unique.
$name
=
$branch
->
getShortName
();
$commit
=
$branch
->
getCommitIdentifier
();
$this
->
log
(
pht
(
'Examining branch "%s" head "%s".'
,
$name
,
$commit
));
if
(!
$repository
->
shouldTrackBranch
(
$name
))
{
$this
->
log
(
pht
(
"Skipping, branch is untracked."
));
continue
;
}
if
(
$this
->
isKnownCommit
(
$commit
))
{
$this
->
log
(
pht
(
"Skipping, this head is a known commit."
));
continue
;
}
$this
->
log
(
pht
(
"Looking for new commits."
));
$refs
[]
=
$this
->
discoverStreamAncestry
(
new
PhabricatorMercurialGraphStream
(
$repository
,
$commit
),
$commit
,
$close_immediately
=
true
);
}
return
array_mergev
(
$refs
);
}
/* -( Internals )---------------------------------------------------------- */
private
function
discoverStreamAncestry
(
PhabricatorRepositoryGraphStream
$stream
,
$commit
,
$close_immediately
)
{
$discover
=
array
(
$commit
);
$graph
=
array
();
$seen
=
array
();
// Find all the reachable, undiscovered commits. Build a graph of the
// edges.
while
(
$discover
)
{
$target
=
array_pop
(
$discover
);
if
(
empty
(
$graph
[
$target
]))
{
$graph
[
$target
]
=
array
();
}
$parents
=
$stream
->
getParents
(
$target
);
foreach
(
$parents
as
$parent
)
{
if
(
$this
->
isKnownCommit
(
$parent
))
{
continue
;
}
$graph
[
$target
][
$parent
]
=
true
;
if
(
empty
(
$seen
[
$parent
]))
{
$seen
[
$parent
]
=
true
;
$discover
[]
=
$parent
;
}
}
}
// Now, sort them topographically.
$commits
=
$this
->
reduceGraph
(
$graph
);
$refs
=
array
();
foreach
(
$commits
as
$commit
)
{
$refs
[]
=
id
(
new
PhabricatorRepositoryCommitRef
())
->
setIdentifier
(
$commit
)
->
setEpoch
(
$stream
->
getCommitDate
(
$commit
))
->
setCanCloseImmediately
(
$close_immediately
);
}
return
$refs
;
}
private
function
reduceGraph
(
array
$edges
)
{
foreach
(
$edges
as
$commit
=>
$parents
)
{
$edges
[
$commit
]
=
array_keys
(
$parents
);
}
$graph
=
new
PhutilDirectedScalarGraph
();
$graph
->
addNodes
(
$edges
);
$commits
=
$graph
->
getTopographicallySortedNodes
();
// NOTE: We want the most ancestral nodes first, so we need to reverse the
// list we get out of AbstractDirectedGraph.
$commits
=
array_reverse
(
$commits
);
return
$commits
;
}
private
function
isKnownCommit
(
$identifier
)
{
if
(
isset
(
$this
->
commitCache
[
$identifier
]))
{
return
true
;
}
if
(
$this
->
repairMode
)
{
// In repair mode, rediscover the entire repository, ignoring the
// database state. We can hit the local cache above, but if we miss it
// stop the script from going to the database cache.
return
false
;
}
$commit
=
id
(
new
PhabricatorRepositoryCommit
())->
loadOneWhere
(
'repositoryID = %d AND commitIdentifier = %s'
,
$this
->
getRepository
()->
getID
(),
$identifier
);
if
(!
$commit
)
{
return
false
;
}
$this
->
commitCache
[
$identifier
]
=
true
;
while
(
count
(
$this
->
commitCache
)
>
self
::
MAX_COMMIT_CACHE_SIZE
)
{
array_shift
(
$this
->
commitCache
);
}
return
true
;
}
/**
* Sort branches so we process closeable branches first. This makes the
* whole import process a little cheaper, since we can close these commits
* the first time through rather than catching them in the refs step.
*
* @task internal
*
* @param list<DiffusionRepositoryRef> List of branch heads.
* @return list<DiffusionRepositoryRef> Sorted list of branch heads.
*/
private
function
sortBranches
(
array
$branches
)
{
$repository
=
$this
->
getRepository
();
$head_branches
=
array
();
$tail_branches
=
array
();
foreach
(
$branches
as
$branch
)
{
$name
=
$branch
->
getShortName
();
if
(
$repository
->
shouldAutocloseBranch
(
$name
))
{
$head_branches
[]
=
$branch
;
}
else
{
$tail_branches
[]
=
$branch
;
}
}
return
array_merge
(
$head_branches
,
$tail_branches
);
}
}
Event Timeline
Log In to Comment