Page Menu
Home
c4science
Search
Configure Global Search
Log In
Files
F93889033
PhabricatorElasticSearchEngine.php
No One
Temporary
Actions
Download File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Award Token
Subscribers
None
File Metadata
Details
File Info
Storage
Attached
Created
Mon, Dec 2, 07:15
Size
12 KB
Mime Type
text/x-php
Expires
Wed, Dec 4, 07:15 (1 d, 23 h)
Engine
blob
Format
Raw Data
Handle
22719909
Attached To
rPH Phabricator
PhabricatorElasticSearchEngine.php
View Options
<?php
final
class
PhabricatorElasticSearchEngine
extends
PhabricatorSearchEngine
{
private
$uri
;
private
$index
;
private
$timeout
;
public
function
__construct
()
{
$this
->
uri
=
PhabricatorEnv
::
getEnvConfig
(
'search.elastic.host'
);
$this
->
index
=
PhabricatorEnv
::
getEnvConfig
(
'search.elastic.namespace'
);
}
public
function
getEngineIdentifier
()
{
return
'elasticsearch'
;
}
public
function
getEnginePriority
()
{
return
10
;
}
public
function
isEnabled
()
{
return
(
bool
)
$this
->
uri
;
}
public
function
setURI
(
$uri
)
{
$this
->
uri
=
$uri
;
return
$this
;
}
public
function
setIndex
(
$index
)
{
$this
->
index
=
$index
;
return
$this
;
}
public
function
setTimeout
(
$timeout
)
{
$this
->
timeout
=
$timeout
;
return
$this
;
}
public
function
getURI
()
{
return
$this
->
uri
;
}
public
function
getIndex
()
{
return
$this
->
index
;
}
public
function
getTimeout
()
{
return
$this
->
timeout
;
}
public
function
reindexAbstractDocument
(
PhabricatorSearchAbstractDocument
$doc
)
{
$type
=
$doc
->
getDocumentType
();
$phid
=
$doc
->
getPHID
();
$handle
=
id
(
new
PhabricatorHandleQuery
())
->
setViewer
(
PhabricatorUser
::
getOmnipotentUser
())
->
withPHIDs
(
array
(
$phid
))
->
executeOne
();
// URL is not used internally but it can be useful externally.
$spec
=
array
(
'title'
=>
$doc
->
getDocumentTitle
(),
'url'
=>
PhabricatorEnv
::
getProductionURI
(
$handle
->
getURI
()),
'dateCreated'
=>
$doc
->
getDocumentCreated
(),
'_timestamp'
=>
$doc
->
getDocumentModified
(),
'field'
=>
array
(),
'relationship'
=>
array
(),
);
foreach
(
$doc
->
getFieldData
()
as
$field
)
{
$spec
[
'field'
][]
=
array_combine
(
array
(
'type'
,
'corpus'
,
'aux'
),
$field
);
}
foreach
(
$doc
->
getRelationshipData
()
as
$relationship
)
{
list
(
$rtype
,
$to_phid
,
$to_type
,
$time
)
=
$relationship
;
$spec
[
'relationship'
][
$rtype
][]
=
array
(
'phid'
=>
$to_phid
,
'phidType'
=>
$to_type
,
'when'
=>
$time
,
);
}
$this
->
executeRequest
(
"/{$type}/{$phid}/"
,
$spec
,
'PUT'
);
}
public
function
reconstructDocument
(
$phid
)
{
$type
=
phid_get_type
(
$phid
);
$response
=
$this
->
executeRequest
(
"/{$type}/{$phid}"
,
array
());
if
(
empty
(
$response
[
'exists'
]))
{
return
null
;
}
$hit
=
$response
[
'_source'
];
$doc
=
new
PhabricatorSearchAbstractDocument
();
$doc
->
setPHID
(
$phid
);
$doc
->
setDocumentType
(
$response
[
'_type'
]);
$doc
->
setDocumentTitle
(
$hit
[
'title'
]);
$doc
->
setDocumentCreated
(
$hit
[
'dateCreated'
]);
$doc
->
setDocumentModified
(
$hit
[
'_timestamp'
]);
foreach
(
$hit
[
'field'
]
as
$fdef
)
{
$doc
->
addField
(
$fdef
[
'type'
],
$fdef
[
'corpus'
],
$fdef
[
'aux'
]);
}
foreach
(
$hit
[
'relationship'
]
as
$rtype
=>
$rships
)
{
foreach
(
$rships
as
$rship
)
{
$doc
->
addRelationship
(
$rtype
,
$rship
[
'phid'
],
$rship
[
'phidType'
],
$rship
[
'when'
]);
}
}
return
$doc
;
}
private
function
buildSpec
(
PhabricatorSavedQuery
$query
)
{
$spec
=
array
();
$filter
=
array
();
$title_spec
=
array
();
if
(
strlen
(
$query
->
getParameter
(
'query'
)))
{
$spec
[]
=
array
(
'simple_query_string'
=>
array
(
'query'
=>
$query
->
getParameter
(
'query'
),
'fields'
=>
array
(
'field.corpus'
),
),
);
$title_spec
=
array
(
'simple_query_string'
=>
array
(
'query'
=>
$query
->
getParameter
(
'query'
),
'fields'
=>
array
(
'title'
),
),
);
}
$exclude
=
$query
->
getParameter
(
'exclude'
);
if
(
$exclude
)
{
$filter
[]
=
array
(
'not'
=>
array
(
'ids'
=>
array
(
'values'
=>
array
(
$exclude
),
),
),
);
}
$relationship_map
=
array
(
PhabricatorSearchRelationship
::
RELATIONSHIP_AUTHOR
=>
$query
->
getParameter
(
'authorPHIDs'
,
array
()),
PhabricatorSearchRelationship
::
RELATIONSHIP_SUBSCRIBER
=>
$query
->
getParameter
(
'subscriberPHIDs'
,
array
()),
PhabricatorSearchRelationship
::
RELATIONSHIP_PROJECT
=>
$query
->
getParameter
(
'projectPHIDs'
,
array
()),
PhabricatorSearchRelationship
::
RELATIONSHIP_REPOSITORY
=>
$query
->
getParameter
(
'repositoryPHIDs'
,
array
()),
);
$statuses
=
$query
->
getParameter
(
'statuses'
,
array
());
$statuses
=
array_fuse
(
$statuses
);
$rel_open
=
PhabricatorSearchRelationship
::
RELATIONSHIP_OPEN
;
$rel_closed
=
PhabricatorSearchRelationship
::
RELATIONSHIP_CLOSED
;
$rel_unowned
=
PhabricatorSearchRelationship
::
RELATIONSHIP_UNOWNED
;
$include_open
=
!
empty
(
$statuses
[
$rel_open
]);
$include_closed
=
!
empty
(
$statuses
[
$rel_closed
]);
if
(
$include_open
&&
!
$include_closed
)
{
$relationship_map
[
$rel_open
]
=
true
;
}
else
if
(!
$include_open
&&
$include_closed
)
{
$relationship_map
[
$rel_closed
]
=
true
;
}
if
(
$query
->
getParameter
(
'withUnowned'
))
{
$relationship_map
[
$rel_unowned
]
=
true
;
}
$rel_owner
=
PhabricatorSearchRelationship
::
RELATIONSHIP_OWNER
;
if
(
$query
->
getParameter
(
'withAnyOwner'
))
{
$relationship_map
[
$rel_owner
]
=
true
;
}
else
{
$owner_phids
=
$query
->
getParameter
(
'ownerPHIDs'
,
array
());
$relationship_map
[
$rel_owner
]
=
$owner_phids
;
}
foreach
(
$relationship_map
as
$field
=>
$param
)
{
if
(
is_array
(
$param
)
&&
$param
)
{
$should
=
array
();
foreach
(
$param
as
$val
)
{
$should
[]
=
array
(
'match'
=>
array
(
"relationship.{$field}.phid"
=>
array
(
'query'
=>
$val
,
'type'
=>
'phrase'
,
),
),
);
}
// We couldn't solve it by minimum_number_should_match because it can
// match multiple owners without matching author.
$spec
[]
=
array
(
'bool'
=>
array
(
'should'
=>
$should
));
}
else
if
(
$param
)
{
$filter
[]
=
array
(
'exists'
=>
array
(
'field'
=>
"relationship.{$field}.phid"
,
),
);
}
}
if
(
$spec
)
{
$spec
=
array
(
'query'
=>
array
(
'bool'
=>
array
(
'must'
=>
$spec
)));
if
(
$title_spec
)
{
$spec
[
'query'
][
'bool'
][
'should'
]
=
$title_spec
;
}
}
if
(
$filter
)
{
$filter
=
array
(
'filter'
=>
array
(
'and'
=>
$filter
));
if
(!
$spec
)
{
$spec
=
array
(
'query'
=>
array
(
'match_all'
=>
new
stdClass
()));
}
$spec
=
array
(
'query'
=>
array
(
'filtered'
=>
$spec
+
$filter
,
),
);
}
if
(!
$query
->
getParameter
(
'query'
))
{
$spec
[
'sort'
]
=
array
(
array
(
'dateCreated'
=>
'desc'
),
);
}
$spec
[
'from'
]
=
(
int
)
$query
->
getParameter
(
'offset'
,
0
);
$spec
[
'size'
]
=
(
int
)
$query
->
getParameter
(
'limit'
,
25
);
return
$spec
;
}
public
function
executeSearch
(
PhabricatorSavedQuery
$query
)
{
$types
=
$query
->
getParameter
(
'types'
);
if
(!
$types
)
{
$types
=
array_keys
(
PhabricatorSearchApplicationSearchEngine
::
getIndexableDocumentTypes
());
}
// Don't use '/_search' for the case that there is something
// else in the index (for example if 'phabricator' is only an alias to
// some bigger index). Use '/$types/_search' instead.
$uri
=
'/'
.
implode
(
','
,
$types
).
'/_search'
;
try
{
$response
=
$this
->
executeRequest
(
$uri
,
$this
->
buildSpec
(
$query
));
}
catch
(
HTTPFutureHTTPResponseStatus
$ex
)
{
// elasticsearch probably uses Lucene query syntax:
// http://lucene.apache.org/core/3_6_1/queryparsersyntax.html
// Try literal search if operator search fails.
if
(!
strlen
(
$query
->
getParameter
(
'query'
)))
{
throw
$ex
;
}
$query
=
clone
$query
;
$query
->
setParameter
(
'query'
,
addcslashes
(
$query
->
getParameter
(
'query'
),
'+-&|!(){}[]^"~*?:
\\
'
));
$response
=
$this
->
executeRequest
(
$uri
,
$this
->
buildSpec
(
$query
));
}
$phids
=
ipull
(
$response
[
'hits'
][
'hits'
],
'_id'
);
return
$phids
;
}
public
function
indexExists
()
{
try
{
return
(
bool
)
$this
->
executeRequest
(
'/_status/'
,
array
());
}
catch
(
HTTPFutureHTTPResponseStatus
$e
)
{
if
(
$e
->
getStatusCode
()
==
404
)
{
return
false
;
}
throw
$e
;
}
}
private
function
getIndexConfiguration
()
{
$data
=
array
();
$data
[
'settings'
]
=
array
(
'index'
=>
array
(
'auto_expand_replicas'
=>
'0-2'
,
'analysis'
=>
array
(
'filter'
=>
array
(
'trigrams_filter'
=>
array
(
'min_gram'
=>
3
,
'type'
=>
'ngram'
,
'max_gram'
=>
3
,
),
),
'analyzer'
=>
array
(
'custom_trigrams'
=>
array
(
'type'
=>
'custom'
,
'filter'
=>
array
(
'lowercase'
,
'kstem'
,
'trigrams_filter'
,
),
'tokenizer'
=>
'standard'
,
),
),
),
),
);
$types
=
array_keys
(
PhabricatorSearchApplicationSearchEngine
::
getIndexableDocumentTypes
());
foreach
(
$types
as
$type
)
{
// Use the custom trigram analyzer for the corpus of text
$data
[
'mappings'
][
$type
][
'properties'
][
'field'
][
'properties'
][
'corpus'
]
=
array
(
'type'
=>
'string'
,
'analyzer'
=>
'custom_trigrams'
);
// Ensure we have dateCreated since the default query requires it
$data
[
'mappings'
][
$type
][
'properties'
][
'dateCreated'
][
'type'
]
=
'string'
;
}
return
$data
;
}
public
function
indexIsSane
()
{
if
(!
$this
->
indexExists
())
{
return
false
;
}
$cur_mapping
=
$this
->
executeRequest
(
'/_mapping/'
,
array
());
$cur_settings
=
$this
->
executeRequest
(
'/_settings/'
,
array
());
$actual
=
array_merge
(
$cur_settings
[
$this
->
index
],
$cur_mapping
[
$this
->
index
]);
return
$this
->
check
(
$actual
,
$this
->
getIndexConfiguration
());
}
/**
* Recursively check if two Elasticsearch configuration arrays are equal
*
* @param $actual
* @param $required array
* @return bool
*/
private
function
check
(
$actual
,
$required
)
{
foreach
(
$required
as
$key
=>
$value
)
{
if
(!
array_key_exists
(
$key
,
$actual
))
{
if
(
$key
===
'_all'
)
{
// The _all field never comes back so we just have to assume it
// is set correctly.
continue
;
}
return
false
;
}
if
(
is_array
(
$value
))
{
if
(!
is_array
(
$actual
[
$key
]))
{
return
false
;
}
if
(!
$this
->
check
(
$actual
[
$key
],
$value
))
{
return
false
;
}
continue
;
}
$actual
[
$key
]
=
self
::
normalizeConfigValue
(
$actual
[
$key
]);
$value
=
self
::
normalizeConfigValue
(
$value
);
if
(
$actual
[
$key
]
!=
$value
)
{
return
false
;
}
}
return
true
;
}
/**
* Normalize a config value for comparison. Elasticsearch accepts all kinds
* of config values but it tends to throw back 'true' for true and 'false' for
* false so we normalize everything. Sometimes, oddly, it'll throw back false
* for false....
*
* @param mixed $value config value
* @return mixed value normalized
*/
private
static
function
normalizeConfigValue
(
$value
)
{
if
(
$value
===
true
)
{
return
'true'
;
}
else
if
(
$value
===
false
)
{
return
'false'
;
}
return
$value
;
}
public
function
initIndex
()
{
if
(
$this
->
indexExists
())
{
$this
->
executeRequest
(
'/'
,
array
(),
'DELETE'
);
}
$data
=
$this
->
getIndexConfiguration
();
$this
->
executeRequest
(
'/'
,
$data
,
'PUT'
);
}
private
function
executeRequest
(
$path
,
array
$data
,
$method
=
'GET'
)
{
$uri
=
new
PhutilURI
(
$this
->
uri
);
$uri
->
setPath
(
$this
->
index
);
$uri
->
appendPath
(
$path
);
$data
=
json_encode
(
$data
);
$future
=
new
HTTPSFuture
(
$uri
,
$data
);
if
(
$method
!=
'GET'
)
{
$future
->
setMethod
(
$method
);
}
if
(
$this
->
getTimeout
())
{
$future
->
setTimeout
(
$this
->
getTimeout
());
}
list
(
$body
)
=
$future
->
resolvex
();
if
(
$method
!=
'GET'
)
{
return
null
;
}
try
{
return
phutil_json_decode
(
$body
);
}
catch
(
PhutilJSONParserException
$ex
)
{
throw
new
PhutilProxyException
(
pht
(
'ElasticSearch server returned invalid JSON!'
),
$ex
);
}
}
}
Event Timeline
Log In to Comment