Page Menu
Home
c4science
Search
Configure Global Search
Log In
Files
F92893955
PhutilArgumentSpellingCorrector.php
No One
Temporary
Actions
Download File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Award Token
Subscribers
None
File Metadata
Details
File Info
Storage
Attached
Created
Sun, Nov 24, 14:15
Size
4 KB
Mime Type
text/x-php
Expires
Tue, Nov 26, 14:15 (2 d)
Engine
blob
Format
Raw Data
Handle
22535812
Attached To
rPHU libphutil
PhutilArgumentSpellingCorrector.php
View Options
<?php
final
class
PhutilArgumentSpellingCorrector
extends
Phobject
{
private
$editDistanceMatrix
;
private
$maximumDistance
;
private
$mode
;
const
MODE_COMMANDS
=
'commands'
;
const
MODE_FLAGS
=
'flags'
;
/**
* Build a new corrector with parameters for correcting commands, like
* fixing "dfif" into "diff" in "arc diff".
*
* @return PhutilArgumentSpellingCorrector Configured corrector.
*/
public
static
function
newCommandCorrector
()
{
// When correcting argument spelling, we're relatively liberal about
// selecting alternatives and allow multiple mistakes.
$max_distance
=
2
;
// Adjust to the scaled edit costs we use below, so "2" roughly means
// "2 edits".
$max_distance
=
$max_distance
*
3
;
// These costs are somewhat made up, but the theory is that it is far more
// likely you will mis-strike a key ("lans" for "land") or press two keys
// out of order ("alnd" for "land") than omit keys or press extra keys.
$matrix
=
id
(
new
PhutilEditDistanceMatrix
())
->
setInsertCost
(
4
)
->
setDeleteCost
(
4
)
->
setReplaceCost
(
3
)
->
setTransposeCost
(
2
);
return
id
(
new
self
())
->
setEditDistanceMatrix
(
$matrix
)
->
setMode
(
self
::
MODE_COMMANDS
)
->
setMaximumDistance
(
$max_distance
);
}
/**
* Build a new corrector with parameters for correcting flags, like
* fixing "--nolint" into "--no-lint".
*
* @return PhutilArgumentSpellingCorrector Configured corrector.
*/
public
static
function
newFlagCorrector
()
{
// When correcting flag spelling, we're stricter than we are when
// correcting command spelling: we allow only one inserted or deleted
// character. It is mainly to handle cases like "--no-lint" versus
// "--nolint" or "--reviewer" versus "--reviewers".
$max_distance
=
1
;
$matrix
=
id
(
new
PhutilEditDistanceMatrix
())
->
setInsertCost
(
1
)
->
setDeleteCost
(
1
)
->
setReplaceCost
(
10
);
return
id
(
new
self
())
->
setEditDistanceMatrix
(
$matrix
)
->
setMode
(
self
::
MODE_FLAGS
)
->
setMaximumDistance
(
$max_distance
);
}
public
function
setMode
(
$mode
)
{
$this
->
mode
=
$mode
;
return
$this
;
}
public
function
getMode
()
{
return
$this
->
mode
;
}
public
function
setEditDistanceMatrix
(
PhutilEditDistanceMatrix
$matrix
)
{
$this
->
editDistanceMatrix
=
$matrix
;
return
$this
;
}
public
function
getEditDistanceMatrix
()
{
return
$this
->
editDistanceMatrix
;
}
public
function
setMaximumDistance
(
$maximum_distance
)
{
$this
->
maximumDistance
=
$maximum_distance
;
return
$this
;
}
public
function
getMaximumDistance
()
{
return
$this
->
maximumDistance
;
}
public
function
correctSpelling
(
$input
,
array
$options
)
{
$matrix
=
$this
->
getEditDistanceMatrix
();
if
(!
$matrix
)
{
throw
new
PhutilInvalidStateException
(
'setEditDistanceMatrix'
);
}
$max_distance
=
$this
->
getMaximumDistance
();
if
(!
$max_distance
)
{
throw
new
PhutilInvalidStateException
(
'setMaximumDistance'
);
}
// If we're correcting commands, never correct an input which begins
// with "-", since this is almost certainly intended to be a flag.
if
(
$this
->
getMode
()
===
self
::
MODE_COMMANDS
)
{
if
(
preg_match
(
'/^-/'
,
$input
))
{
return
array
();
}
}
$input
=
$this
->
normalizeString
(
$input
);
foreach
(
$options
as
$key
=>
$option
)
{
$options
[
$key
]
=
$this
->
normalizeString
(
$option
);
}
$distances
=
array
();
$inputv
=
phutil_utf8v
(
$input
);
foreach
(
$options
as
$option
)
{
$optionv
=
phutil_utf8v
(
$option
);
$matrix
->
setSequences
(
$optionv
,
$inputv
);
$distances
[
$option
]
=
$matrix
->
getEditDistance
();
}
asort
(
$distances
);
$best
=
min
(
$max_distance
,
head
(
$distances
));
foreach
(
$distances
as
$option
=>
$distance
)
{
if
(
$distance
>
$best
)
{
unset
(
$distances
[
$option
]);
}
}
// Before filtering, check if we have multiple equidistant matches and
// return them if we do. This prevents us from, e.g., matching "alnd" with
// both "land" and "amend", then dropping "land" for being too short, and
// incorrectly completing to "amend".
if
(
count
(
$distances
)
>
1
)
{
return
array_keys
(
$distances
);
}
foreach
(
$distances
as
$option
=>
$distance
)
{
if
(
phutil_utf8_strlen
(
$option
)
<
$distance
)
{
unset
(
$distances
[
$option
]);
}
}
return
array_keys
(
$distances
);
}
private
function
normalizeString
(
$string
)
{
return
phutil_utf8_strtolower
(
$string
);
}
}
Event Timeline
Log In to Comment