Page Menu
Home
c4science
Search
Configure Global Search
Log In
Files
F91643221
websearch_facet_builders.py
No One
Temporary
Actions
Download File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Award Token
Subscribers
None
File Metadata
Details
File Info
Storage
Attached
Created
Wed, Nov 13, 00:45
Size
7 KB
Mime Type
text/x-python
Expires
Fri, Nov 15, 00:45 (1 d, 23 h)
Engine
blob
Format
Raw Data
Handle
22279652
Attached To
R3600 invenio-infoscience
websearch_facet_builders.py
View Options
# -*- coding: utf-8 -*-
## This file is part of Invenio.
## Copyright (C) 2012, 2013 CERN.
##
## Invenio is free software; you can redistribute it and/or
## modify it under the terms of the GNU General Public License as
## published by the Free Software Foundation; either version 2 of the
## License, or (at your option) any later version.
##
## Invenio is distributed in the hope that it will be useful, but
## WITHOUT ANY WARRANTY; without even the implied warranty of
## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
## General Public License for more details.
##
## You should have received a copy of the GNU General Public License
## along with Invenio; if not, write to the Free Software Foundation, Inc.,
## 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA.
import
os
from
operator
import
itemgetter
from
itertools
import
groupby
from
werkzeug.utils
import
cached_property
from
flask
import
g
,
url_for
,
request
,
abort
,
current_app
from
invenio.websearch_cache
import
search_results_cache
,
\
get_search_results_cache_key_from_qid
from
invenio.intbitset
import
intbitset
from
invenio.config
import
CFG_WEBSEARCH_SEARCH_CACHE_TIMEOUT
,
CFG_PYLIBDIR
from
invenio.importutils
import
autodiscover_modules
from
invenio.webuser_flask
import
current_user
from
invenio.websearch_model
import
Collection
from
invenio.search_engine
import
search_pattern
,
\
get_field_tags
,
\
get_records_that_can_be_displayed
,
\
get_most_popular_field_values
def
get_current_user_records_that_can_be_displayed
(
qid
):
"""
Returns records that current user can display.
@param qid: query identifier
@return: records in intbitset
"""
@search_results_cache.memoize
(
timeout
=
CFG_WEBSEARCH_SEARCH_CACHE_TIMEOUT
)
def
get_records_for_user
(
qid
,
uid
):
key
=
get_search_results_cache_key_from_qid
(
qid
)
data
=
search_results_cache
.
get
(
key
)
if
data
is
None
:
return
intbitset
([])
cc
=
search_results_cache
.
get
(
key
+
'::cc'
)
return
get_records_that_can_be_displayed
(
current_user
,
intbitset
()
.
fastload
(
data
),
cc
)
# Simplifies API
return
get_records_for_user
(
qid
,
current_user
.
get_id
())
def
faceted_results_filter
(
recids
,
filter_data
,
facets
):
"""
Returns records that match selected filter data.
@param recids: found records
@param filter_date: selected facet filters
@param facet_config: facet configuration
@return: filtered records
"""
## Group filter data by operator and then by facet key.
sortkeytype
=
itemgetter
(
0
)
sortfacet
=
itemgetter
(
1
)
data
=
sorted
(
filter_data
,
key
=
sortkeytype
)
out
=
{}
for
t
,
vs
in
groupby
(
data
,
key
=
sortkeytype
):
out
[
t
]
=
{}
for
v
,
k
in
groupby
(
sorted
(
vs
,
key
=
sortfacet
),
key
=
sortfacet
):
out
[
t
][
v
]
=
map
(
lambda
i
:
i
[
2
],
k
)
filter_data
=
out
## Intersect and diff records with selected facets.
output
=
recids
if
'+'
in
filter_data
:
values
=
filter_data
[
'+'
]
for
key
,
facet
in
facets
.
iteritems
():
if
key
in
values
:
output
.
intersection_update
(
facet
.
get_facet_recids
(
values
[
key
]))
if
'-'
in
filter_data
:
values
=
filter_data
[
'-'
]
for
key
,
facet
in
facets
.
iteritems
():
if
key
in
values
:
output
.
difference_update
(
facet
.
get_facet_recids
(
values
[
key
]))
return
output
def
_facet_plugin_checker
(
plugin_code
):
"""
Handy function to bridge importutils with (Invenio) facets.
"""
if
'facet'
in
dir
(
plugin_code
):
candidate
=
getattr
(
plugin_code
,
'facet'
)
if
isinstance
(
candidate
,
FacetBuilder
):
return
candidate
raise
ValueError
(
'
%s
is not a valid facet plugin'
%
plugin_code
.
__name__
)
class
FacetLoader
(
object
):
@cached_property
def
plugins
(
self
):
"""Loaded facet plugins."""
return
map
(
_facet_plugin_checker
,
autodiscover_modules
([
'invenio.websearch_facets'
],
'facet_.+\.py'
))
@cached_property
def
elements
(
self
):
"""Dict with `FacetBuilder` instances accesible by facet name."""
return
dict
((
f
.
name
,
f
)
for
f
in
self
.
plugins
)
def
__getitem__
(
self
,
key
):
return
self
.
elements
[
key
]
@cached_property
def
sorted_list
(
self
):
"""List of sorted facets by their order property."""
return
sorted
(
self
.
elements
.
values
(),
key
=
lambda
x
:
x
.
order
)
def
config
(
self
,
*
args
,
**
kwargs
):
"""Returns facet config for all loaded plugins."""
return
map
(
lambda
x
:
x
.
get_conf
(
*
args
,
**
kwargs
),
self
.
sorted_list
)
class
FacetBuilder
(
object
):
"""Implementation of a general facet builder using function
`get_most_popular_field_values`."""
def
__init__
(
self
,
name
,
order
=
0
):
self
.
name
=
name
self
.
order
=
order
def
get_title
(
self
,
**
kwargs
):
return
g
.
_
(
'Any '
+
self
.
name
.
capitalize
())
def
get_url
(
self
,
qid
=
None
):
return
url_for
(
'.facet'
,
name
=
self
.
name
,
qid
=
qid
)
def
get_conf
(
self
,
**
kwargs
):
return
dict
(
title
=
self
.
get_title
(
**
kwargs
),
url
=
self
.
get_url
(
kwargs
.
get
(
'qid'
)),
facet
=
self
.
name
)
def
get_recids_intbitset
(
self
,
qid
):
try
:
return
get_current_user_records_that_can_be_displayed
(
qid
)
except
:
return
intbitset
([])
def
get_recids
(
self
,
qid
):
return
self
.
get_recids_intbitset
(
qid
)
.
tolist
()
def
get_facets_for_query
(
self
,
qid
,
limit
=
20
,
parent
=
None
):
return
get_most_popular_field_values
(
self
.
get_recids
(
qid
),
get_field_tags
(
self
.
name
)
)[
0
:
limit
]
#@blueprint.invenio_memoize(timeout=CFG_WEBSEARCH_SEARCH_CACHE_TIMEOUT / 2)
def
get_value_recids
(
self
,
value
):
if
isinstance
(
value
,
unicode
):
value
=
value
.
encode
(
'utf8'
)
p
=
'"'
+
str
(
value
)
+
'"'
return
search_pattern
(
p
=
p
,
f
=
self
.
name
)
#@blueprint.invenio_memoize(timeout=CFG_WEBSEARCH_SEARCH_CACHE_TIMEOUT / 4)
def
get_facet_recids
(
self
,
values
):
return
reduce
(
lambda
x
,
y
:
x
.
union
(
y
),
[
self
.
get_value_recids
(
v
)
for
v
in
values
],
intbitset
())
class
CollectionFacetBuilder
(
FacetBuilder
):
"""Custom implementation of collection facet builder."""
def
get_title
(
self
,
**
kwargs
):
"""Returns title for collection facet."""
collection
=
kwargs
.
get
(
'collection'
)
if
collection
is
not
None
and
collection
.
id
>
1
:
return
collection
.
name_ln
return
super
(
CollectionFacetBuilder
,
self
)
.
get_title
(
**
kwargs
)
def
get_facets_for_query
(
self
,
qid
,
limit
=
20
,
parent
=
None
):
recIDsHitSet
=
self
.
get_recids_intbitset
(
qid
)
parent
=
request
.
args
.
get
(
'parent'
,
None
)
if
parent
is
not
None
:
collection
=
Collection
.
query
.
filter
(
Collection
.
name
==
parent
)
.
\
first_or_404
()
else
:
cc
=
search_results_cache
.
get
(
get_search_results_cache_key_from_qid
(
qid
)
+
'::cc'
)
if
cc
is
not
None
:
collection
=
Collection
.
query
.
filter
(
Collection
.
name
==
cc
)
.
\
first_or_404
()
else
:
collection
=
Collection
.
query
.
get
(
1
)
facet
=
[]
for
c
in
collection
.
collection_children_r
:
num_records
=
len
(
c
.
reclist
.
intersection
(
recIDsHitSet
))
if
num_records
:
facet
.
append
((
c
.
name
,
num_records
,
c
.
name_ln
))
return
sorted
(
facet
,
key
=
lambda
x
:
x
[
1
],
reverse
=
True
)[
0
:
limit
]
Event Timeline
Log In to Comment