diff --git a/invenio/modules/search/facet_builders.py b/invenio/modules/search/facet_builders.py index d6e908935..5fb4378c8 100644 --- a/invenio/modules/search/facet_builders.py +++ b/invenio/modules/search/facet_builders.py @@ -1,234 +1,192 @@ # -*- coding: utf-8 -*- ## This file is part of Invenio. ## Copyright (C) 2012, 2013, 2014 CERN. ## ## Invenio is free software; you can redistribute it and/or ## modify it under the terms of the GNU General Public License as ## published by the Free Software Foundation; either version 2 of the ## License, or (at your option) any later version. ## ## Invenio is distributed in the hope that it will be useful, but ## WITHOUT ANY WARRANTY; without even the implied warranty of ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ## General Public License for more details. ## ## You should have received a copy of the GNU General Public License ## along with Invenio; if not, write to the Free Software Foundation, Inc., ## 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA. """Facet utility functions.""" from operator import itemgetter from itertools import groupby from six import iteritems -from werkzeug.utils import cached_property from flask import g, url_for, request from flask.ext.login import current_user from .cache import search_results_cache, \ get_search_results_cache_key_from_qid from .models import Collection -from .registry import facets from invenio.base.globals import cfg -try: - from intbitset import intbitset -except: - from intbitset import intbitset +from intbitset import intbitset def get_current_user_records_that_can_be_displayed(qid): """Return records that current user can display. :param qid: query identifier :return: records in intbitset """ CFG_WEBSEARCH_SEARCH_CACHE_TIMEOUT = cfg.get( 'CFG_WEBSEARCH_SEARCH_CACHE_TIMEOUT') @search_results_cache.memoize(timeout=CFG_WEBSEARCH_SEARCH_CACHE_TIMEOUT) def get_records_for_user(qid, uid): from invenio.legacy.search_engine import \ get_records_that_can_be_displayed key = get_search_results_cache_key_from_qid(qid) data = search_results_cache.get(key) if data is None: return intbitset([]) cc = search_results_cache.get(key + '::cc') return get_records_that_can_be_displayed(current_user, intbitset().fastload(data), cc) # Simplifies API return get_records_for_user(qid, current_user.get_id()) def faceted_results_filter(recids, filter_data, facets): """Return records that match selected filter data. :param recids: found records :param filter_date: selected facet filters :param facet_config: facet configuration :return: filtered records """ # Group filter data by operator and then by facet key. sortkeytype = itemgetter(0) sortfacet = itemgetter(1) data = sorted(filter_data, key=sortkeytype) out = {} for t, vs in groupby(data, key=sortkeytype): out[t] = {} for v, k in groupby(sorted(vs, key=sortfacet), key=sortfacet): out[t][v] = map(lambda i: i[2], k) filter_data = out # Intersect and diff records with selected facets. output = recids if '+' in filter_data: values = filter_data['+'] for key, facet in iteritems(facets): if key in values: output.intersection_update(facet.get_facet_recids(values[key])) if '-' in filter_data: values = filter_data['-'] for key, facet in iteritems(facets): if key in values: output.difference_update(facet.get_facet_recids(values[key])) return output -def _facet_plugin_checker(plugin_code): - """Handy function to check facet plugin.""" - if 'facet' in dir(plugin_code): - candidate = getattr(plugin_code, 'facet') - if isinstance(candidate, FacetBuilder): - return candidate - - -class FacetLoader(object): - - """Facet loader helper class.""" - - @cached_property - def plugins(self): - """Loaded facet plugins.""" - return filter(None, map(_facet_plugin_checker, facets)) - - @cached_property - def elements(self): - """Dict with `FacetBuilder` instances accesible by facet name.""" - return dict((f.name, f) for f in self.plugins) - - def __getitem__(self, key): - """Return element value.""" - return self.elements[key] - - @cached_property - def sorted_list(self): - """List of sorted facets by their order property.""" - return sorted(self.elements.values(), key=lambda x: x.order) - - def config(self, *args, **kwargs): - """Return facet config for all loaded plugins.""" - return map(lambda x: x.get_conf(*args, **kwargs), self.sorted_list) - - class FacetBuilder(object): """Facet builder helper class. Implement a general facet builder using function `get_most_popular_field_values`. """ - def __init__(self, name, order=0): + def __init__(self, name): """Initialize facet builder.""" self.name = name - self.order = order def get_title(self, **kwargs): """Return facet title.""" return g._('Any ' + self.name.capitalize()) def get_url(self, qid=None): """Return facet data url.""" return url_for('.facet', name=self.name, qid=qid) def get_conf(self, **kwargs): """Return facet configuration.""" return dict(title=self.get_title(**kwargs), url=self.get_url(kwargs.get('qid')), facet=self.name) def get_recids_intbitset(self, qid): """Return record ids as intbitset.""" try: return get_current_user_records_that_can_be_displayed(qid) except: return intbitset([]) def get_recids(self, qid): """Return record ids as list.""" return self.get_recids_intbitset(qid).tolist() def get_facets_for_query(self, qid, limit=20, parent=None): """Return facet data.""" from invenio.legacy.search_engine import get_most_popular_field_values,\ get_field_tags return get_most_popular_field_values(self.get_recids(qid), get_field_tags(self.name) )[0:limit] def get_value_recids(self, value): """Return record ids in intbitset for given field value.""" from invenio.legacy.search_engine import search_pattern if isinstance(value, unicode): value = value.encode('utf8') p = '"' + str(value) + '"' return search_pattern(p=p, f=self.name) def get_facet_recids(self, values): """Return record ids in intbitset for all field values.""" return reduce(lambda x, y: x.union(y), [self.get_value_recids(v) for v in values], intbitset()) class CollectionFacetBuilder(FacetBuilder): """Custom implementation of collection facet builder.""" def get_title(self, **kwargs): """Return title for collection facet.""" collection = kwargs.get('collection') if collection is not None and collection.id > 1: return collection.name_ln return super(CollectionFacetBuilder, self).get_title(**kwargs) def get_facets_for_query(self, qid, limit=20, parent=None): """Return record ids as intbitset.""" recIDsHitSet = self.get_recids_intbitset(qid) parent = request.args.get('parent', None) if parent is not None: collection = Collection.query.filter( Collection.name == parent).first_or_404() else: cc = search_results_cache.get( get_search_results_cache_key_from_qid(qid) + '::cc') if cc is not None: collection = Collection.query.filter( Collection.name == cc).first_or_404() else: collection = Collection.query.get(1) facet = [] for c in collection.collection_children_r: num_records = len(c.reclist.intersection(recIDsHitSet)) if num_records: facet.append((c.name, num_records, c.name_ln)) return sorted(facet, key=lambda x: x[1], reverse=True)[0:limit] diff --git a/invenio/modules/search/facets/author.py b/invenio/modules/search/facets/author.py index 6e96085ee..6e3b58847 100644 --- a/invenio/modules/search/facets/author.py +++ b/invenio/modules/search/facets/author.py @@ -1,24 +1,24 @@ # -*- coding: utf-8 -*- ## ## This file is part of Invenio. ## Copyright (C) 2013, 2014 CERN. ## ## Invenio is free software; you can redistribute it and/or ## modify it under the terms of the GNU General Public License as ## published by the Free Software Foundation; either version 2 of the ## License, or (at your option) any later version. ## ## Invenio is distributed in the hope that it will be useful, but ## WITHOUT ANY WARRANTY; without even the implied warranty of ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ## General Public License for more details. ## ## You should have received a copy of the GNU General Public License ## along with Invenio; if not, write to the Free Software Foundation, Inc., ## 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA. """Author facet implementation.""" from ..facet_builders import FacetBuilder -facet = FacetBuilder('author', order=2) +facet = FacetBuilder('author') diff --git a/invenio/modules/search/facets/collection.py b/invenio/modules/search/facets/collection.py index d744fefec..9349bfd69 100644 --- a/invenio/modules/search/facets/collection.py +++ b/invenio/modules/search/facets/collection.py @@ -1,24 +1,24 @@ # -*- coding: utf-8 -*- ## ## This file is part of Invenio. ## Copyright (C) 2013, 2014 CERN. ## ## Invenio is free software; you can redistribute it and/or ## modify it under the terms of the GNU General Public License as ## published by the Free Software Foundation; either version 2 of the ## License, or (at your option) any later version. ## ## Invenio is distributed in the hope that it will be useful, but ## WITHOUT ANY WARRANTY; without even the implied warranty of ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ## General Public License for more details. ## ## You should have received a copy of the GNU General Public License ## along with Invenio; if not, write to the Free Software Foundation, Inc., ## 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA. """Collection facet implementation.""" from ..facet_builders import CollectionFacetBuilder -facet = CollectionFacetBuilder('collection', order=1) +facet = CollectionFacetBuilder('collection') diff --git a/invenio/modules/search/facets/year.py b/invenio/modules/search/facets/year.py index f3f3a6c3c..a23424e11 100644 --- a/invenio/modules/search/facets/year.py +++ b/invenio/modules/search/facets/year.py @@ -1,24 +1,24 @@ # -*- coding: utf-8 -*- ## ## This file is part of Invenio. ## Copyright (C) 2013, 2014 CERN. ## ## Invenio is free software; you can redistribute it and/or ## modify it under the terms of the GNU General Public License as ## published by the Free Software Foundation; either version 2 of the ## License, or (at your option) any later version. ## ## Invenio is distributed in the hope that it will be useful, but ## WITHOUT ANY WARRANTY; without even the implied warranty of ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ## General Public License for more details. ## ## You should have received a copy of the GNU General Public License ## along with Invenio; if not, write to the Free Software Foundation, Inc., ## 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA. """Year facet implementation.""" from ..facet_builders import FacetBuilder -facet = FacetBuilder('year', order=3) +facet = FacetBuilder('year') diff --git a/invenio/modules/search/models.py b/invenio/modules/search/models.py index b6933b3b0..17af25617 100644 --- a/invenio/modules/search/models.py +++ b/invenio/modules/search/models.py @@ -1,851 +1,869 @@ # -*- coding: utf-8 -*- # ## This file is part of Invenio. -## Copyright (C) 2011, 2012, 2013 CERN. +## Copyright (C) 2011, 2012, 2013, 2014 CERN. ## ## Invenio is free software; you can redistribute it and/or ## modify it under the terms of the GNU General Public License as ## published by the Free Software Foundation; either version 2 of the ## License, or (at your option) any later version. ## ## Invenio is distributed in the hope that it will be useful, but ## WITHOUT ANY WARRANTY; without even the implied warranty of ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ## General Public License for more details. ## ## You should have received a copy of the GNU General Public License ## along with Invenio; if not, write to the Free Software Foundation, Inc., ## 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA. """ WebSearch database models. """ # General imports. import re from flask import g, url_for from intbitset import intbitset from six import iteritems from operator import itemgetter from sqlalchemy.ext.associationproxy import association_proxy from sqlalchemy.ext.orderinglist import ordering_list from sqlalchemy.orm.collections import InstrumentedList from sqlalchemy.orm.collections import attribute_mapped_collection from sqlalchemy.orm.collections import collection from invenio.base.globals import cfg from invenio.base.i18n import _, gettext_set_language from invenio.ext.sqlalchemy import db # Create your models here. from invenio.modules.accounts.models import User from invenio.modules.formatter.models import Format class IntbitsetPickle(object): def dumps(self, obj, protocol=None): if obj is not None: return obj.fastdump() return intbitset([]).fastdump() def loads(self, obj): try: return intbitset(obj) except: return intbitset() def IntbitsetCmp(x, y): if x is None or y is None: return False else: return x == y class OrderedList(InstrumentedList): def append(self, item): if self: s = sorted(self, key=lambda obj: obj.score) item.score = s[-1].score + 1 else: item.score = 1 InstrumentedList.append(self, item) def set(self, item, index=0): if self: s = sorted(self, key=lambda obj: obj.score) if index >= len(s): item.score = s[-1].score + 1 elif index < 0: item.score = s[0].score index = 0 else: item.score = s[index].score + 1 for i, it in enumerate(s[index:]): it.score = item.score + i + 1 #if s[i+1].score more then break else: item.score = index InstrumentedList.append(self, item) def pop(self, item): #FIXME if self: obj_list = sorted(self, key=lambda obj: obj.score) for i, it in enumerate(obj_list): if obj_list[i] == item: return InstrumentedList.pop(self, i) def attribute_multi_dict_collection(creator, key_attr, val_attr): class MultiMappedCollection(dict): def __init__(self, data=None): self._data = data or {} @collection.appender def _append(self, obj): l = self._data.setdefault(key_attr(obj), []) l.append(obj) def __setitem__(self, key, value): self._append(creator(key, value)) def __getitem__(self, key): return tuple(val_attr(obj) for obj in self._data[key]) @collection.remover def _remove(self, obj): self._data[key_attr(obj)].remove(obj) @collection.iterator def _iterator(self): for objs in self._data.itervalues(): for obj in objs: yield obj #@collection.converter #def convert(self, other): # print '===== CONVERT ====' # print other # for k, vals in iteritems(other): # for v in list(vals): # print 'converting: ', k,': ',v # yield creator(k, v) #@collection.internally_instrumented #def extend(self, items): # for k, item in items: # for v in list(item): # print 'setting: ', k,': ',v # self.__setitem__(k,v) def __repr__(self): return '%s(%r)' % (type(self).__name__, self._data) return MultiMappedCollection external_collection_mapper = attribute_multi_dict_collection( creator=lambda k, v: CollectionExternalcollection(type=k, externalcollection=v), key_attr=lambda obj: obj.type, val_attr=lambda obj: obj.externalcollection) class Collection(db.Model): """Represents a Collection record.""" def __repr__(self): return "%s(%s)" % (self.__class__.__name__, self.id) __tablename__ = 'collection' id = db.Column(db.MediumInteger(9, unsigned=True), primary_key=True) name = db.Column(db.String(255), unique=True, index=True, nullable=False) dbquery = db.Column(db.Text(20), nullable=True, index=True) nbrecs = db.Column(db.Integer(10, unsigned=True), server_default='0') #FIXME read only!!! reclist = db.Column(db.PickleType(pickler=IntbitsetPickle(), comparator=IntbitsetCmp)) _names = db.relationship(lambda: Collectionname, backref='collection', collection_class=attribute_mapped_collection('ln_type'), cascade="all, delete, delete-orphan") names = association_proxy('_names', 'value', creator=lambda k, v: Collectionname(ln_type=k, value=v)) _boxes = db.relationship(lambda: Collectionboxname, backref='collection', collection_class=attribute_mapped_collection('ln_type'), cascade="all, delete, delete-orphan") boxes = association_proxy('_boxes', 'value', creator=lambda k, v: Collectionboxname(ln_type=k, value=v)) _formatoptions = association_proxy('formats', 'format') #@cache.memoize(make_name=lambda fname: fname + '::' + g.ln) def formatoptions(self): if len(self._formatoptions): return [dict(f) for f in self._formatoptions] else: return [{'code': u'hb', 'name': _("HTML %(format)s", format=_("brief")), 'content_type': u'text/html'}] formatoptions = property(formatoptions) _examples_example = association_proxy('_examples', 'example') + facets = db.relationship('FacetCollection', backref='collection') + @property #@cache.memoize(make_name=lambda fname: fname + '::' + g.ln) def examples(self): return list(self._examples_example) @property def name_ln(self): from invenio.legacy.search_engine import get_coll_i18nname return get_coll_i18nname(self.name, g.ln).decode('utf-8') # Another possible implementation with cache memoize # @cache.memoize #try: # return db.object_session(self).query(Collectionname).\ # with_parent(self).filter(db.and_(Collectionname.ln==g.ln, # Collectionname.type=='ln')).first().value #except: # return self.name @property #@cache.memoize(make_name=lambda fname: fname + '::' + g.ln) def portalboxes_ln(self): return db.object_session(self).query(CollectionPortalbox).\ with_parent(self).\ options(db.joinedload_all(CollectionPortalbox.portalbox)).\ filter(CollectionPortalbox.ln == g.ln).\ order_by(db.desc(CollectionPortalbox.score)).all() @property def most_specific_dad(self): return db.object_session(self).query(Collection).\ join(Collection.sons).\ filter(CollectionCollection.id_son == self.id).\ order_by(db.asc(Collection.nbrecs)).\ first() @property #@cache.memoize(make_name=lambda fname: fname + '::' + g.ln) def is_restricted(self): from invenio.legacy.search_engine import collection_restricted_p return collection_restricted_p(self.name) @property def type(self): p = re.compile("\d+:.*") if self.dbquery is not None and \ p.match(self.dbquery.lower()): return 'r' else: return 'v' _collection_children = db.relationship(lambda: CollectionCollection, #collection_class=OrderedList, collection_class=ordering_list('score'), primaryjoin=lambda: Collection.id == CollectionCollection.id_dad, foreign_keys=lambda: CollectionCollection.id_dad, order_by=lambda: db.asc(CollectionCollection.score)) _collection_children_r = db.relationship(lambda: CollectionCollection, #collection_class=OrderedList, collection_class=ordering_list('score'), primaryjoin=lambda: db.and_( Collection.id == CollectionCollection.id_dad, CollectionCollection.type == 'r'), foreign_keys=lambda: CollectionCollection.id_dad, order_by=lambda: db.asc(CollectionCollection.score)) _collection_children_v = db.relationship(lambda: CollectionCollection, #collection_class=OrderedList, collection_class=ordering_list('score'), primaryjoin=lambda: db.and_( Collection.id == CollectionCollection.id_dad, CollectionCollection.type == 'v'), foreign_keys=lambda: CollectionCollection.id_dad, order_by=lambda: db.asc(CollectionCollection.score)) collection_parents = db.relationship(lambda: CollectionCollection, #collection_class=OrderedList, collection_class=ordering_list('score'), primaryjoin=lambda: Collection.id == CollectionCollection.id_son, foreign_keys=lambda: CollectionCollection.id_son, order_by=lambda: db.asc(CollectionCollection.score)) collection_children = association_proxy('_collection_children', 'son') collection_children_r = association_proxy('_collection_children_r', 'son', creator=lambda son: CollectionCollection(id_son=son.id, type='r')) collection_children_v = association_proxy('_collection_children_v', 'son', creator=lambda son: CollectionCollection(id_son=son.id, type='v')) # _externalcollections = db.relationship(lambda: CollectionExternalcollection, # backref='collection', cascade="all, delete, delete-orphan") # # externalcollections = association_proxy( # '_externalcollections', # 'externalcollection') def _externalcollections_type(type): return association_proxy( '_externalcollections_' + str(type), 'externalcollection', creator=lambda ext: CollectionExternalcollection( externalcollection=ext, type=type)) externalcollections_0 = _externalcollections_type(0) externalcollections_1 = _externalcollections_type(1) externalcollections_2 = _externalcollections_type(2) externalcollections = db.relationship(lambda: CollectionExternalcollection, #backref='collection', collection_class=external_collection_mapper, cascade="all, delete, delete-orphan") # Search options _make_field_fieldvalue = lambda type: db.relationship( lambda: CollectionFieldFieldvalue, primaryjoin=lambda: db.and_( Collection.id == CollectionFieldFieldvalue.id_collection, CollectionFieldFieldvalue.type == type), order_by=lambda: CollectionFieldFieldvalue.score) _search_within = _make_field_fieldvalue('sew') _search_options = _make_field_fieldvalue('seo') @property #@cache.memoize(make_name=lambda fname: fname + '::' + g.ln) def search_within(self): """ Collect search within options. """ default = [('', g._('any field'))] found = [(o.field.code, o.field.name_ln) for o in self._search_within] if not found: found = [(f.name.replace(' ', ''), f.name_ln) for f in Field.query.filter(Field.name.in_( cfg['CFG_WEBSEARCH_SEARCH_WITHIN'])).all()] return default + sorted(found, key=itemgetter(1)) @property #@cache.memoize(make_name=lambda fname: fname + '::' + g.ln) def search_options(self): return self._search_options @property #@cache.memoize(make_name=lambda fname: fname + '::' + g.ln) def ancestors_ids(self): """Get list of parent collection ids.""" output = intbitset([self.id]) for c in self.dads: ancestors = c.dad.ancestors_ids if self.id in ancestors: raise output |= ancestors return output @property #@cache.memoize(make_name=lambda fname: fname + '::' + g.ln) def descendants_ids(self): """Get list of child collection ids.""" output = intbitset([self.id]) for c in self.sons: descendants = c.son.descendants_ids if self.id in descendants: raise output |= descendants return output # Gets the list of localized names as an array collection_names = db.relationship( lambda: Collectionname, primaryjoin=lambda: Collection.id == Collectionname.id_collection, foreign_keys=lambda: Collectionname.id_collection ) def translation(self, lang): """Get the translation according to the language code.""" try: return db.object_session(self).query(Collectionname).\ with_parent(self).filter(db.and_(Collectionname.ln == lang, Collectionname.type == 'ln')).first().value except: return "" def get_collectionbox_name(self, ln=None, box_type="r"): """Return collection-specific labelling subtrees. - 'Focus on': regular collection - 'Narrow by': virtual collection - 'Latest addition': boxes If translation for given language does not exist, use label for CFG_SITE_LANG. If no custom label is defined for CFG_SITE_LANG, return default label for the box. :param ln: the language of the label :param box_type: can be 'r' (=Narrow by), 'v' (=Focus on), 'l' (=Latest additions) """ if ln is None: ln = g.ln collectionboxnamequery = db.object_session(self).query( Collectionboxname).with_parent(self) try: collectionboxname = collectionboxnamequery.filter(db.and_( Collectionboxname.ln == ln, Collectionboxname.type == box_type, )).one() except: try: collectionboxname = collectionboxnamequery.filter(db.and_( Collectionboxname.ln == ln, Collectionboxname.type == box_type, )).one() except: collectionboxname = None if collectionboxname is None: # load the right message language _ = gettext_set_language(ln) return _(Collectionboxname.TYPES.get(box_type, '')) else: return collectionboxname.value portal_boxes_ln = db.relationship( lambda: CollectionPortalbox, #collection_class=OrderedList, collection_class=ordering_list('score'), primaryjoin=lambda: \ Collection.id == CollectionPortalbox.id_collection, foreign_keys=lambda: CollectionPortalbox.id_collection, order_by=lambda: db.asc(CollectionPortalbox.score)) #@db.hybrid_property #def externalcollections(self): # return self._externalcollections #@externalcollections.setter #def externalcollections(self, data): # if isinstance(data, dict): # for k, vals in iteritems(data): # for v in list(vals): # self._externalcollections[k] = v # else: # self._externalcollections = data def breadcrumbs(self, builder=None, ln=None): """Retunds breadcrumbs for collection.""" ln = cfg.get('CFG_SITE_LANG') if ln is None else ln breadcrumbs = [] # Get breadcrumbs for most specific dad if it exists. if self.most_specific_dad is not None: breadcrumbs = self.most_specific_dad.breadcrumbs(builder=builder, ln=ln) if builder is not None: crumb = builder(self) else: crumb = dict( text=self.name_ln, url=url_for('search.collection', name=self.name)) breadcrumbs.append(crumb) return breadcrumbs class Collectionname(db.Model): """Represents a Collectionname record.""" __tablename__ = 'collectionname' id_collection = db.Column(db.MediumInteger(9, unsigned=True), db.ForeignKey(Collection.id), nullable=False, primary_key=True) ln = db.Column(db.Char(5), nullable=False, primary_key=True, server_default='') type = db.Column(db.Char(3), nullable=False, primary_key=True, server_default='sn') value = db.Column(db.String(255), nullable=False) @db.hybrid_property def ln_type(self): return (self.ln, self.type) @ln_type.setter def set_ln_type(self, value): (self.ln, self.type) = value #from sqlalchemy import event #def collection_append_listener(target, value, initiator): # print "received append event for target: %s" % target.__dict__ # print value.__dict__ # print initiator.__dict__ #event.listen(Collection.names, 'append', collection_append_listener) class Collectionboxname(db.Model): """Represents a Collectionboxname record.""" __tablename__ = 'collectionboxname' TYPES = { 'v': 'Focus on:', 'r': 'Narrow by collection:', 'l': 'Latest additions:', } id_collection = db.Column(db.MediumInteger(9, unsigned=True), db.ForeignKey(Collection.id), nullable=False, primary_key=True) ln = db.Column(db.Char(5), nullable=False, primary_key=True, server_default='') type = db.Column(db.Char(3), nullable=False, primary_key=True, server_default='r') value = db.Column(db.String(255), nullable=False) @db.hybrid_property def ln_type(self): return (self.ln, self.type) @ln_type.setter def set_ln_type(self, value): (self.ln, self.type) = value class Collectiondetailedrecordpagetabs(db.Model): """Represents a Collectiondetailedrecordpagetabs record.""" __tablename__ = 'collectiondetailedrecordpagetabs' id_collection = db.Column(db.MediumInteger(9, unsigned=True), db.ForeignKey(Collection.id), nullable=False, primary_key=True) tabs = db.Column(db.String(255), nullable=False, server_default='') collection = db.relationship(Collection, backref='collectiondetailedrecordpagetabs') class CollectionCollection(db.Model): """Represents a CollectionCollection record.""" __tablename__ = 'collection_collection' id_dad = db.Column(db.MediumInteger(9, unsigned=True), db.ForeignKey(Collection.id), primary_key=True) id_son = db.Column(db.MediumInteger(9, unsigned=True), db.ForeignKey(Collection.id), primary_key=True) type = db.Column(db.Char(1), nullable=False, server_default='r') score = db.Column(db.TinyInteger(4, unsigned=True), nullable=False, server_default='0') son = db.relationship(Collection, primaryjoin=id_son == Collection.id, backref='dads', #FIX collection_class=db.attribute_mapped_collection('score'), order_by=db.asc(score)) dad = db.relationship(Collection, primaryjoin=id_dad == Collection.id, backref='sons', order_by=db.asc(score)) class Example(db.Model): """Represents a Example record.""" __tablename__ = 'example' id = db.Column(db.MediumInteger(9, unsigned=True), primary_key=True, autoincrement=True) type = db.Column(db.Text, nullable=False) body = db.Column(db.Text, nullable=False) class CollectionExample(db.Model): """Represents a CollectionExample record.""" __tablename__ = 'collection_example' id_collection = db.Column(db.MediumInteger(9, unsigned=True), db.ForeignKey(Collection.id), primary_key=True) id_example = db.Column(db.MediumInteger(9, unsigned=True), db.ForeignKey(Example.id), primary_key=True) score = db.Column(db.TinyInteger(4, unsigned=True), nullable=False, server_default='0') collection = db.relationship(Collection, backref='_examples', order_by=score) example = db.relationship(Example, backref='collections', order_by=score) class Portalbox(db.Model): """Represents a Portalbox record.""" __tablename__ = 'portalbox' id = db.Column(db.MediumInteger(9, unsigned=True), autoincrement=True, primary_key=True) title = db.Column(db.Text, nullable=False) body = db.Column(db.Text, nullable=False) def get_pbx_pos(): """Returns a list of all the positions for a portalbox""" position = {} position["rt"] = "Right Top" position["lt"] = "Left Top" position["te"] = "Title Epilog" position["tp"] = "Title Prolog" position["ne"] = "Narrow by coll epilog" position["np"] = "Narrow by coll prolog" return position class CollectionPortalbox(db.Model): """Represents a CollectionPortalbox record.""" __tablename__ = 'collection_portalbox' id_collection = db.Column(db.MediumInteger(9, unsigned=True), db.ForeignKey(Collection.id), primary_key=True) id_portalbox = db.Column(db.MediumInteger(9, unsigned=True), db.ForeignKey(Portalbox.id), primary_key=True) ln = db.Column(db.Char(5), primary_key=True, server_default='', nullable=False) position = db.Column(db.Char(3), nullable=False, server_default='top') score = db.Column(db.TinyInteger(4, unsigned=True), nullable=False, server_default='0') collection = db.relationship(Collection, backref='portalboxes', order_by=score) portalbox = db.relationship(Portalbox, backref='collections', order_by=score) class Externalcollection(db.Model): """Represents a Externalcollection record.""" __tablename__ = 'externalcollection' id = db.Column(db.MediumInteger(9, unsigned=True), primary_key=True) name = db.Column(db.String(255), unique=True, nullable=False, server_default='') @property def engine(self): from invenio.legacy.websearch_external_collections.searcher import external_collections_dictionary if self.name in external_collections_dictionary: return external_collections_dictionary[self.name] class CollectionExternalcollection(db.Model): """Represents a CollectionExternalcollection record.""" __tablename__ = 'collection_externalcollection' id_collection = db.Column(db.MediumInteger(9, unsigned=True), db.ForeignKey(Collection.id), primary_key=True, server_default='0') id_externalcollection = db.Column(db.MediumInteger(9, unsigned=True), db.ForeignKey(Externalcollection.id), primary_key=True, server_default='0') type = db.Column(db.TinyInteger(4, unsigned=True), server_default='0', nullable=False) def _collection_type(type): return db.relationship(Collection, primaryjoin=lambda: db.and_( CollectionExternalcollection.id_collection == Collection.id, CollectionExternalcollection.type == type), backref='_externalcollections_' + str(type)) collection_0 = _collection_type(0) collection_1 = _collection_type(1) collection_2 = _collection_type(2) externalcollection = db.relationship(Externalcollection) class CollectionFormat(db.Model): """Represents a CollectionFormat record.""" __tablename__ = 'collection_format' id_collection = db.Column(db.MediumInteger(9, unsigned=True), db.ForeignKey(Collection.id), primary_key=True) id_format = db.Column(db.MediumInteger(9, unsigned=True), db.ForeignKey(Format.id), primary_key=True) score = db.Column(db.TinyInteger(4, unsigned=True), nullable=False, server_default='0') collection = db.relationship(Collection, backref='formats', order_by=db.desc(score)) format = db.relationship(Format, backref='collections', order_by=db.desc(score)) class Field(db.Model): """Represents a Field record.""" def __repr__(self): return "%s(%s)" % (self.__class__.__name__, self.id) __tablename__ = 'field' id = db.Column(db.MediumInteger(9, unsigned=True), primary_key=True) name = db.Column(db.String(255), nullable=False) code = db.Column(db.String(255), unique=True, nullable=False) #tags = db.relationship('FieldTag', # collection_class=attribute_mapped_collection('score'), # cascade="all, delete-orphan" # ) #tag_names = association_proxy("tags", "as_tag") @property def name_ln(self): from invenio.legacy.search_engine import get_field_i18nname return get_field_i18nname(self.name, g.ln) #try: # return db.object_session(self).query(Fieldname).\ # with_parent(self).filter(db.and_(Fieldname.ln==g.ln, # Fieldname.type=='ln')).first().value #except: # return self.name class Fieldvalue(db.Model): """Represents a Fieldvalue record.""" def __init__(self): pass __tablename__ = 'fieldvalue' id = db.Column(db.MediumInteger(9, unsigned=True), primary_key=True, autoincrement=True) name = db.Column(db.String(255), nullable=False) value = db.Column(db.Text, nullable=False) class Fieldname(db.Model): """Represents a Fieldname record.""" __tablename__ = 'fieldname' id_field = db.Column(db.MediumInteger(9, unsigned=True), db.ForeignKey(Field.id), primary_key=True) ln = db.Column(db.Char(5), primary_key=True, server_default='') type = db.Column(db.Char(3), primary_key=True, server_default='sn') value = db.Column(db.String(255), nullable=False) field = db.relationship(Field, backref='names') class Tag(db.Model): """Represents a Tag record.""" __tablename__ = 'tag' id = db.Column(db.MediumInteger(9, unsigned=True), primary_key=True) name = db.Column(db.String(255), nullable=False) value = db.Column(db.Char(6), nullable=False) recjson_value = db.Column(db.Text) def __init__(self, tup=None, *args, **kwargs): if tup is not None and isinstance(tup, tuple): self.name, self.value = tup super(Tag, self).__init__(*args, **kwargs) else: if tup is None: super(Tag, self).__init__(*args, **kwargs) else: super(Tag, self).__init__(tup, *args, **kwargs) @property def as_tag(self): """Returns tupple with name and value.""" return self.name, self.value class FieldTag(db.Model): """Represents a FieldTag record.""" __tablename__ = 'field_tag' id_field = db.Column(db.MediumInteger(9, unsigned=True), db.ForeignKey('field.id'), nullable=False, primary_key=True) id_tag = db.Column(db.MediumInteger(9, unsigned=True), db.ForeignKey('tag.id'), nullable=False, primary_key=True) score = db.Column(db.TinyInteger(4, unsigned=True), nullable=False, server_default='0') tag = db.relationship(Tag, backref='fields', order_by=score) field = db.relationship(Field, backref='tags', order_by=score) def __init__(self, score=None, tup=None, *args, **kwargs): if score is not None: self.score = score if tup is not None: self.tag = Tag(tup) super(FieldTag, self).__init__(*args, **kwargs) @property def as_tag(self): """ Returns Tag record directly.""" return self.tag class WebQuery(db.Model): """Represents a WebQuery record.""" __tablename__ = 'query' id = db.Column(db.Integer(15, unsigned=True), primary_key=True, autoincrement=True) type = db.Column(db.Char(1), nullable=False, server_default='r') urlargs = db.Column(db.Text(100), nullable=False, index=True) class UserQuery(db.Model): """Represents a UserQuery record.""" __tablename__ = 'user_query' id_user = db.Column(db.Integer(15, unsigned=True), db.ForeignKey(User.id), primary_key=True, server_default='0') id_query = db.Column(db.Integer(15, unsigned=True), db.ForeignKey(WebQuery.id), primary_key=True, index=True, server_default='0') hostname = db.Column(db.String(50), nullable=True, server_default='unknown host') date = db.Column(db.DateTime, nullable=True) class CollectionFieldFieldvalue(db.Model): """Represents a CollectionFieldFieldvalue record.""" __tablename__ = 'collection_field_fieldvalue' id_collection = db.Column(db.MediumInteger(9, unsigned=True), db.ForeignKey(Collection.id), primary_key=True, nullable=False) id_field = db.Column(db.MediumInteger(9, unsigned=True), db.ForeignKey(Field.id), primary_key=True, nullable=False) id_fieldvalue = db.Column(db.MediumInteger(9, unsigned=True), db.ForeignKey(Fieldvalue.id), primary_key=True, nullable=True) type = db.Column(db.Char(3), nullable=False, server_default='src') score = db.Column(db.TinyInteger(4, unsigned=True), nullable=False, server_default='0') score_fieldvalue = db.Column(db.TinyInteger(4, unsigned=True), nullable=False, server_default='0') collection = db.relationship(Collection, backref='field_fieldvalues', order_by=score) field = db.relationship(Field, backref='collection_fieldvalues', lazy='joined') fieldvalue = db.relationship(Fieldvalue, backref='collection_fields', lazy='joined') +class FacetCollection(db.Model): + + """Facet configuration for collection.""" + + __tablename__ = 'facet_collection' + + id = db.Column(db.Integer, primary_key=True) + id_collection = db.Column(db.Integer, db.ForeignKey(Collection.id)) + order = db.Column(db.Integer) + facet_name = db.Column(db.String(80)) + + def __repr__(self): + return ('FacetCollection '.format(self)) + __all__ = ['Collection', 'Collectionname', 'Collectiondetailedrecordpagetabs', 'CollectionCollection', 'Example', 'CollectionExample', 'Portalbox', 'CollectionPortalbox', 'Externalcollection', 'CollectionExternalcollection', 'CollectionFormat', 'Field', 'Fieldvalue', 'Fieldname', 'Tag', 'FieldTag', 'WebQuery', 'UserQuery', - 'CollectionFieldFieldvalue'] + 'CollectionFieldFieldvalue', + 'FacetCollection'] diff --git a/invenio/modules/search/registry.py b/invenio/modules/search/registry.py index c9a4ca1ee..b523fefc9 100644 --- a/invenio/modules/search/registry.py +++ b/invenio/modules/search/registry.py @@ -1,52 +1,140 @@ # -*- coding: utf-8 -*- ## ## This file is part of Invenio. ## Copyright (C) 2014 CERN. ## ## Invenio is free software; you can redistribute it and/or ## modify it under the terms of the GNU General Public License as ## published by the Free Software Foundation; either version 2 of the ## License, or (at your option) any later version. ## ## Invenio is distributed in the hope that it will be useful, but ## WITHOUT ANY WARRANTY; without even the implied warranty of ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ## General Public License for more details. ## ## You should have received a copy of the GNU General Public License ## along with Invenio; if not, write to the Free Software Foundation, Inc., ## 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA. """Registries for search module.""" from flask.ext.registry import RegistryError, ModuleAutoDiscoveryRegistry, \ RegistryProxy +from werkzeug.utils import cached_property -from invenio.ext.registry import ModuleAutoDiscoverySubRegistry - +from invenio.ext.registry import DictModuleAutoDiscoverySubRegistry, \ + ModuleAutoDiscoverySubRegistry +from invenio.modules.search.models import FacetCollection +from invenio.modules.search.facet_builders import FacetBuilder +from invenio.utils.memoise import memoize searchext = RegistryProxy('searchext', ModuleAutoDiscoveryRegistry, 'searchext') -facets = RegistryProxy('facets', ModuleAutoDiscoverySubRegistry, 'facets') - class SearchServiceRegistry(ModuleAutoDiscoverySubRegistry): """Search Service Registry.""" __required_plugin_API_version__ = "Search Service Plugin API 1.0" def register(self, item): """Check plugin version and instantiate search service plugin.""" if item.__plugin_version__ != self.__required_plugin_API_version__: raise RegistryError( 'Invalid plugin version {0} required {1}'.format( item.__plugin_version__, self.__required_plugin_API_version__ )) service = getattr(item, item.__name__.split('.')[-1]) return super(SearchServiceRegistry, self).register(service()) services = RegistryProxy('searchext.services', SearchServiceRegistry, 'services', registry_namespace=searchext) + + +class FacetsRegistry(DictModuleAutoDiscoverySubRegistry): + + """Registry for facets modules. + + Serves also modules sets and their configuration + for specific collections. + """ + + def keygetter(self, key, original_value, new_value): + """ + Method used to compute the key for a value being registered. + + The key is the facet name stored in facet module. + + :param key: Key if provided by the user. Defaults to None. + :param value: Value being registered. FacetBuilder object + """ + return new_value.name + + def valuegetter(self, value): + """Return FacetBuilder from inside the module. + + :param value: loaded python module with FacetBuilder instance + stored in facet property + """ + if self.facet_plugin_checker(value): + return value.facet + + @classmethod + def facet_plugin_checker(cls, plugin_code): + """Handy function to check facet plugin. + + :param plugin_code: a module with facet definition - should have facet + variable + """ + if 'facet' in dir(plugin_code): + candidate = getattr(plugin_code, 'facet') + if isinstance(candidate, FacetBuilder): + return candidate + + @memoize + def get_facets_for_collection(self, collection_id): + """Return facets set for a collection. + + :param collection_id: the collection id for requested facets set + """ + facets_conf = FacetCollection.query\ + .filter(FacetCollection.id_collection == collection_id)\ + .order_by(FacetCollection.order)\ + .all() + + collection_facets = [] + for facet in facets_conf: + if facet.facet_name not in self.keys(): + raise RegistryError( + 'Facet %s is not available.' + + 'Maybe it\'s on PACKAGES_FACETS_EXCLUDE config list' + % facet.facet_name) + collection_facets.append(self.get(facet.facet_name)) + + return collection_facets + + @cached_property + def default_facets(self): + """Return default set of facets.""" + return self.get_facets_for_collection(1) + + def get_facets_config(self, collection, qid): + """Return facet config for the collection. + + If no configuration found returns the default facets set. + :param collection: Collection object facets matching which are returned + :param qid: md5 hash of search parameters generated by + get_search_query_id() from invenio.modules.search.cache + """ + if collection and self.get_facets_for_collection(collection.id): + facets_set = self.get_facets_for_collection(collection.id) + else: + facets_set = self.default_facets + + return [facet.get_conf(collection=collection, qid=qid) + for facet in facets_set] + +facets = RegistryProxy('facets', FacetsRegistry, 'facets') diff --git a/invenio/modules/search/upgrades/__init__.py b/invenio/modules/search/upgrades/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/invenio/modules/search/upgrades/search_2014_07_21_facets_per_collection.py b/invenio/modules/search/upgrades/search_2014_07_21_facets_per_collection.py new file mode 100644 index 000000000..2cc9ebc7d --- /dev/null +++ b/invenio/modules/search/upgrades/search_2014_07_21_facets_per_collection.py @@ -0,0 +1,57 @@ +# -*- coding: utf-8 -*- +## +## This file is part of Invenio. +## Copyright (C) 2014 CERN. +## +## Invenio is free software; you can redistribute it and/or +## modify it under the terms of the GNU General Public License as +## published by the Free Software Foundation; either version 2 of the +## License, or (at your option) any later version. +## +## Invenio is distributed in the hope that it will be useful, but +## WITHOUT ANY WARRANTY; without even the implied warranty of +## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +## General Public License for more details. +## +## You should have received a copy of the GNU General Public License +## along with Invenio; if not, write to the Free Software Foundation, Inc., +## 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA. + +"""Add a table for facets configuration.""" + +from __future__ import print_function + +from invenio.ext.sqlalchemy import db +from invenio.modules.upgrader.api import op +from sqlalchemy.dialects import mysql + +depends_on = ['invenio_release_1_1_0'] + + +def info(): + """Show module info message.""" + return "Adds the table with facets configuration." + + +def do_upgrade(): + """Add the table with facets configuration.""" + op.create_table( + 'facet_collection', + db.Column('id', mysql.INTEGER(), nullable=False), + db.Column('id_collection', mysql.INTEGER(), nullable=False), + db.Column('order', mysql.INTEGER(), nullable=False), + db.Column('facet_name', db.String(length=80), nullable=False), + db.ForeignKeyConstraint(['id_collection'], ['collection.id'], ), + db.PrimaryKeyConstraint('id'), + mysql_charset='utf8', + mysql_engine='MyISAM' + ) + + +def post_upgrade(): + """Facet configuration info.""" + print('NOTE: You need to configure facets to have them shown using\n' + 'flask-admin module at /admin/facetcollection. \n\n' + 'Adding them to the default collection with id 1 makes them the\n' + 'default set. The default set is shown for every collection which\n' + 'does not have facets set configured') diff --git a/invenio/modules/search/views/search.py b/invenio/modules/search/views/search.py index c47341ed3..10aff7d10 100644 --- a/invenio/modules/search/views/search.py +++ b/invenio/modules/search/views/search.py @@ -1,613 +1,612 @@ # -*- coding: utf-8 -*- ## ## This file is part of Invenio. ## Copyright (C) 2012, 2013, 2014 CERN. ## ## Invenio is free software; you can redistribute it and/or ## modify it under the terms of the GNU General Public License as ## published by the Free Software Foundation; either version 2 of the ## License, or (at your option) any later version. ## ## Invenio is distributed in the hope that it will be useful, but ## WITHOUT ANY WARRANTY; without even the implied warranty of ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ## General Public License for more details. ## ## You should have received a copy of the GNU General Public License ## along with Invenio; if not, write to the Free Software Foundation, Inc., ## 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA. """ WebSearch Flask Blueprint. Template hierarchy. ------------------- - ``searchbar_frame_base.html`` - ``searchbar_frame.html`` - ``collection_base.html`` - ``collection.html`` used by ``/collection/`` - ``index_base.html`` - ``index.html`` used by ``/`` - ``search_base.html`` - ``search.html`` - ``browse_base.html`` - ``browse.html`` used by ``/browse`` - ``results_base.html`` - ``results.html`` - ``helpers_base.html`` macros - ``helpers.html`` """ import json import string import functools import cStringIO from math import ceil from flask import make_response, g, request, flash, jsonify, \ redirect, url_for, current_app, abort, session, Blueprint, \ render_template from flask.ext.login import current_user from six import iteritems from werkzeug.local import LocalProxy from .. import receivers from ..cache import get_search_query_id, get_collection_name_from_cache from ..facet_builders import get_current_user_records_that_can_be_displayed, \ - faceted_results_filter, FacetLoader + faceted_results_filter from ..forms import EasySearchForm from ..models import Collection from ..washers import wash_search_urlargd from flask.ext.menu import register_menu from invenio.base.signals import websearch_before_browse from invenio.modules.indexer import models as BibIndex from invenio.modules.formatter import format_record from invenio.base.i18n import _ from invenio.base.decorators import wash_arguments, templated from flask.ext.breadcrumbs import \ register_breadcrumb, current_breadcrumbs, default_breadcrumb_root from invenio.ext.template.context_processor import \ register_template_context_processor from invenio.utils.pagination import Pagination from invenio.utils.text import slugify +from invenio.modules.search.registry import facets blueprint = Blueprint('search', __name__, url_prefix="", template_folder='../templates', static_url_path='', # static url path has to be empty # if url_prefix is empty static_folder='../static') default_breadcrumb_root(blueprint, '.') def _collection_of(): """Get output format from user settings.""" of = current_user['settings'].get('of') if of is not None and of != '': return of return g.collection.formatoptions[0]['code'] collection_of = LocalProxy(_collection_of) """Collection output format.""" -FACETS = FacetLoader() - def collection_name_from_request(): """TODO.""" collection = request.values.get('cc') if collection is None and len(request.values.getlist('c')) == 1: collection = request.values.get('c') return collection def min_length(length, code=406): """TODO.""" def checker(value): if len(value) < 3: abort(code) return value return checker def check_collection(method=None, name_getter=collection_name_from_request, default_collection=False): """Check collection existence and authorization for current user.""" if method is None: return functools.partial(check_collection, name_getter=name_getter, default_collection=default_collection) @functools.wraps(method) def decorated(*args, **kwargs): uid = current_user.get_id() name = name_getter() if name: g.collection = collection = Collection.query.filter( Collection.name == name).first_or_404() elif default_collection: g.collection = collection = Collection.query.get_or_404(1) else: return abort(404) if collection.is_restricted: from invenio.modules.access.engine import acc_authorize_action from invenio.modules.access.local_config import VIEWRESTRCOLL (auth_code, auth_msg) = acc_authorize_action( uid, VIEWRESTRCOLL, collection=collection.name ) if auth_code: flash(_('This collection is restricted.'), 'error') if auth_code and current_user.is_guest: return redirect(url_for('webaccount.login', referer=request.url)) elif auth_code: return abort(401) return method(collection, *args, **kwargs) return decorated def response_formated_records(recids, collection, of, **kwargs): """TODO.""" from invenio.modules.formatter import (get_output_format_content_type, print_records) response = make_response(print_records(recids, collection=collection, of=of, **kwargs)) response.mimetype = get_output_format_content_type(of) return response @blueprint.route('/index.html', methods=['GET', 'POST']) @blueprint.route('/index.py', methods=['GET', 'POST']) @blueprint.route('/', methods=['GET', 'POST']) @templated('search/index.html') @register_menu(blueprint, 'main.search', _('Search'), order=1) @register_breadcrumb(blueprint, '.', _('Home')) def index(): """Render the homepage.""" # legacy app support c = request.values.get('c') if c == current_app.config['CFG_SITE_NAME']: return redirect(url_for('.index', ln=g.ln)) elif c is not None: return redirect(url_for('.collection', name=c, ln=g.ln)) collection = Collection.query.get_or_404(1) @register_template_context_processor def index_context(): return dict( of=request.values.get('of', collection.formatoptions[0]['code']), easy_search_form=EasySearchForm(csrf_enabled=False), format_record=format_record, ) return dict(collection=collection) @blueprint.route('/collection/', methods=['GET', 'POST']) def collection(name): """ Render the collection page. It renders it either with a collection specific template (aka collection_{collection_name}.html) or with the default collection template (collection.html) """ collection = Collection.query.filter(Collection.name == name) \ .first_or_404() @register_template_context_processor def index_context(): breadcrumbs = current_breadcrumbs + collection.breadcrumbs(ln=g.ln)[1:] return dict( of=request.values.get('of', collection.formatoptions[0]['code']), format_record=format_record, easy_search_form=EasySearchForm(csrf_enabled=False), breadcrumbs=breadcrumbs) return render_template(['search/collection_{0}.html'.format(collection.id), 'search/collection_{0}.html'.format(slugify(name, '_')), 'search/collection.html'], collection=collection) class SearchUrlargs(object): """TODO.""" DEFAULT_URLARGS = { 'p': {'title': 'Search', 'store': None}, 'cc': {'title': 'Collection', 'store': None}, 'c': {'title': 'Collection', 'store': None}, 'rg': {'title': 'Records in Groups', 'store': 'websearch_group_records'}, 'sf': {'title': 'Sort Field', 'store': None}, 'so': {'title': 'Sort Option', 'store': 'websearch_sort_option'}, 'rm': {'title': 'Rank Method', 'store': 'websearch_rank_method'} } def __init__(self, session=None, user=None, **kwargs): """TODO.""" self.session = session self.user = user self._url_args = kwargs @property def args(self): """TODO.""" out = self.user_args out.update(self.url_args) return out @property def user_storable_args(self): """TODO.""" return dict(map(lambda (k, v): (v['store'], k), filter(lambda (k, v): v['store'], iteritems(self.DEFAULT_URLARGS)))) @property def url_args(self): """TODO.""" return filter(lambda (k, v): k in self.DEFAULT_URLARGS.keys(), iteritems(self._url_args)) @property def user_args(self): """TODO.""" if not self.user: return {} user_storable_args = self.user_storable_args args_keys = user_storable_args.keys() if self.user.settings is None: self.user.settings = dict() return dict(map(lambda (k, v): (user_storable_args[k], v), filter(lambda (k, v): k in args_keys, iteritems(self.user.settings)))) def _create_neareset_term_box(argd_orig): try: p = argd_orig.pop('p', '') f = argd_orig.pop('f', '') if 'rg' in argd_orig and 'rg' not in request.values: del argd_orig['rg'] if f == '' and ':' in p: fx, px = p.split(':', 1) from invenio.legacy.search_engine import get_field_name if get_field_name(fx) != "": f, p = fx, px from invenio.legacy.search_engine import create_nearest_terms_box return create_nearest_terms_box(argd_orig, p=p, f=f.lower(), ln=g.ln, intro_text_p=True) except: # FIXME catch all exception is bad return '' # no comments def sort_and_rank_records(recids, so=None, rm=None, p=''): """TODO.""" output = recids.tolist() if so: output.reverse() elif rm: from invenio.legacy.bibrank.record_sorter import rank_records ranked = rank_records(rm, 0, output, p.split()) if ranked[0]: output = ranked[0] output.reverse() else: output = output.tolist() else: output.reverse() return output def crumb_builder(url): """TODO.""" def _crumb_builder(collection): qargs = request.args.to_dict() qargs['cc'] = collection.name #return (collection.name_ln, url, qargs) return dict(text=collection.name_ln, url=url_for(url, **qargs)) return _crumb_builder def collection_breadcrumbs(collection, endpoint=None): """TODO.""" b = [] if endpoint is None: endpoint = request.endpoint if collection.id > 1: qargs = request.args.to_dict() k = 'cc' if 'cc' in qargs else 'c' del qargs[k] b = [(_('Home'), endpoint, qargs)] + collection.breadcrumbs( builder=crumb_builder(endpoint), ln=g.ln)[1:] return b @blueprint.route('/browse', methods=['GET', 'POST']) @register_breadcrumb(blueprint, '.browse', _('Browse results')) @templated('search/browse.html') @wash_arguments({'p': (unicode, ''), 'f': (unicode, None), 'of': (unicode, 'hb'), 'so': (unicode, None), 'rm': (unicode, None), 'rg': (int, 10), 'jrec': (int, 1)}) @check_collection(default_collection=True) def browse(collection, p, f, of, so, rm, rg, jrec): """Render browse page.""" from invenio.legacy.search_engine import browse_pattern_phrases argd = argd_orig = wash_search_urlargd(request.args) colls = [collection.name] + request.args.getlist('c') if f is None and ':' in p[1:]: f, p = string.split(p, ":", 1) argd['f'] = f argd['p'] = p websearch_before_browse.send(collection, **argd) records = map( lambda (r, h): (r.decode('utf-8'), h), browse_pattern_phrases(req=request.get_legacy_request(), colls=colls, p=p, f=f, rg=rg, ln=g.ln)) @register_template_context_processor def index_context(): box = lambda: _create_neareset_term_box(argd_orig) pagination = Pagination(int(ceil(jrec / float(rg))), rg, len(records)) breadcrumbs = current_breadcrumbs + collection_breadcrumbs(collection) return dict( collection=collection, create_nearest_terms_box=box, pagination=pagination, rg=rg, p=p, f=f, easy_search_form=EasySearchForm(csrf_enabled=False), breadcrumbs=breadcrumbs ) return dict(records=records) websearch_before_browse.connect(receivers.websearch_before_browse_handler) @blueprint.route('/rss', methods=['GET']) # FIXME caching issue of response object @wash_arguments({'p': (unicode, ''), 'jrec': (int, 1), 'so': (unicode, None), 'rm': (unicode, None)}) @check_collection(default_collection=True) def rss(collection, p, jrec, so, rm): """Render RSS feed.""" from invenio.legacy.search_engine import perform_request_search of = 'xr' argd = wash_search_urlargd(request.args) argd['of'] = 'id' # update search arguments with the search user preferences if 'rg' not in request.values and current_user.get('rg'): argd['rg'] = current_user.get('rg') rg = int(argd['rg']) qid = get_search_query_id(**argd) recids = perform_request_search(req=request.get_legacy_request(), **argd) if so or rm: recids.reverse() ctx = dict( records=len(get_current_user_records_that_can_be_displayed(qid)), qid=qid, rg=rg ) return response_formated_records(recids, collection, of, **ctx) @blueprint.route('/search', methods=['GET', 'POST']) @register_breadcrumb(blueprint, '.browse', _('Search results')) @wash_arguments({'p': (unicode, ''), 'of': (unicode, collection_of), 'so': (unicode, None), 'rm': (unicode, None)}) @check_collection(default_collection=True) def search(collection, p, of, so, rm): """Render search page.""" from invenio.legacy.search_engine import perform_request_search if 'action_browse' in request.args \ or request.args.get('action', '') == 'browse': return browse() if 'c' in request.args and len(request.args) == 1 \ and len(request.args.getlist('c')) == 1: return redirect(url_for('.collection', name=request.args.get('c'))) argd = argd_orig = wash_search_urlargd(request.args) argd['of'] = 'id' # update search arguments with the search user preferences if 'rg' not in request.values and current_user.get('rg'): argd['rg'] = int(current_user.get('rg')) rg = int(argd['rg']) collection_breadcrumbs(collection) qid = get_search_query_id(**argd) recids = perform_request_search(req=request.get_legacy_request(), **argd) #if so or rm: if len(of) > 0 and of[0] in ['h', 't']: recids.reverse() # back-to-search related code if request and not isinstance(request.get_legacy_request(), cStringIO.OutputType): # store the last search results page session['websearch-last-query'] = request.get_legacy_request() \ .unparsed_uri hit_limit = current_app.config['CFG_WEBSEARCH_PREV_NEXT_HIT_LIMIT'] if len(recids) > hit_limit: last_query_hits = None else: last_query_hits = recids # store list of results if user wants to display hits # in a single list, or store list of collections of records # if user displays hits split by collections: session["websearch-last-query-hits"] = last_query_hits ctx = dict( - facets=FACETS.config(collection=collection, qid=qid), + facets=facets.get_facets_config(collection, qid), records=len(get_current_user_records_that_can_be_displayed(qid)), qid=qid, rg=rg, create_nearest_terms_box=lambda: _create_neareset_term_box(argd_orig), easy_search_form=EasySearchForm(csrf_enabled=False) ) return response_formated_records(recids, collection, of, **ctx) @blueprint.route('/facet//', methods=['GET', 'POST']) def facet(name, qid): """ Create list of fields specified facet. :param name: facet identifier :param qid: query identifier :return: jsonified facet list sorted by number of records """ try: - out = FACETS[name].get_facets_for_query( + out = facets[name].get_facets_for_query( qid, limit=request.args.get('limit', 20)) except KeyError: abort(406) if request.is_xhr: return jsonify(facet=out) else: response = make_response('%s' % str(out)) response.mimetype = 'text/html' return response @blueprint.route('/results/', methods=['GET', 'POST']) @wash_arguments({'p': (unicode, ''), 'of': (unicode, 'hb'), 'so': (unicode, None), 'rm': (unicode, None)}) def results(qid, p, of, so, rm): """ Generate results for cached query using POSTed filter. :param qid: query indentifier """ try: recIDsHitSet = get_current_user_records_that_can_be_displayed(qid) except KeyError: return 'KeyError' except: return _('Please reload the page') try: filter_data = json.loads(request.values.get('filter', '[]')) except: return _('Invalid filter data') @check_collection( name_getter=functools.partial(get_collection_name_from_cache, qid)) def make_results(collection): recids = faceted_results_filter(recIDsHitSet, filter_data, - FACETS.elements) + facets) recids = sort_and_rank_records(recids, so=so, rm=rm, p=p) return response_formated_records( recids, collection, of, create_nearest_terms_box=_create_neareset_term_box, qid=qid) return make_results() @blueprint.route('/list/', methods=['GET', 'POST']) @wash_arguments({'q': (min_length(3), '')}) def autocomplete(field, q): """ Autocomplete data from indexes. It uses POSTed arguments with name `q` that has to be longer than 3 characters in order to returns any results. :param field: index name :param q: query string for index term :return: list of values matching query. """ from invenio.legacy.bibindex.engine import get_index_id_from_index_name IdxPHRASE = BibIndex.__getattribute__('IdxPHRASE%02dF' % get_index_id_from_index_name(field)) results = IdxPHRASE.query.filter(IdxPHRASE.term.contains(q))\ .limit(20).all() results = map(lambda r: {'value': r.term}, results) return jsonify(results=results) @blueprint.route('/search/dispatch', methods=['GET', 'POST']) def dispatch(): """Redirect request to appropriate methods from search page.""" action = request.values.get('action') if action not in ['addtobasket', 'export']: abort(406) if action == 'export': return redirect(url_for('.export', **request.values.to_dict(flat=False))) if action == 'addtobasket': recids = request.values.getlist('recid', type=int) lang = (request.values.get('ln') or 'en') new_url = '/yourbaskets/add?ln={ln}&'.format(ln=lang) new_url += '&'.join(['recid=' + str(r) for r in recids]) return redirect(new_url) # ERROR: parser of GET arguments in 'next' does not parse lists # only the first element of a list is passed to webbasket.add # (however, this url works in 'master' with the same webbasket module) flash("Not implemented action " + action, 'error') return redirect(request.referrer) @blueprint.route('/export', methods=['GET', 'POST']) @wash_arguments({'of': (unicode, 'xm')}) @check_collection(default_collection=True) def export(collection, of): """ Export requested records to defined output format. It uses following request values: * of (string): output format * recid ([int]): list of record IDs """ # Get list of integers with record IDs. recids = request.values.getlist('recid', type=int) return response_formated_records(recids, collection, of)