Page Menu
Home
c4science
Search
Configure Global Search
Log In
Files
F91849515
bibindex_engine_stemmer.py
No One
Temporary
Actions
Download File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Award Token
Subscribers
None
File Metadata
Details
File Info
Storage
Attached
Created
Fri, Nov 15, 03:09
Size
1 KB
Mime Type
text/x-python
Expires
Sun, Nov 17, 03:09 (2 d)
Engine
blob
Format
Raw Data
Handle
22336151
Attached To
R3600 invenio-infoscience
bibindex_engine_stemmer.py
View Options
## $Id$
## This file is part of CDS Invenio.
## Copyright (C) 2002, 2003, 2004, 2005, 2006 CERN.
##
## CDS Invenio is free software; you can redistribute it and/or
## modify it under the terms of the GNU General Public License as
## published by the Free Software Foundation; either version 2 of the
## License, or (at your option) any later version.
##
## CDS Invenio is distributed in the hope that it will be useful, but
## WITHOUT ANY WARRANTY; without even the implied warranty of
## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
## General Public License for more details.
##
## You should have received a copy of the GNU General Public License
## along with CDS Invenio; if not, write to the Free Software Foundation, Inc.,
## 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA.
from
invenio.bibindex_engine_config
import
*
def
create_stemmers
():
"""Create stemmers dictionary for all possible languages."""
languages
=
{
'fr'
:
'french'
,
'en'
:
'english'
,
'no'
:
'norwegian'
,
'sv'
:
'swedish'
,
'de'
:
'german'
,
'it'
:
'italian'
,
'pt'
:
'portuguese'
}
stemmers_initialized
=
{}
try
:
import
Stemmer
for
(
key
,
value
)
in
languages
.
iteritems
():
stemmers_initialized
[
key
]
=
Stemmer
.
Stemmer
(
value
)
except
ImportError
:
pass
# PyStemmer isn't available
return
stemmers_initialized
stemmers
=
create_stemmers
()
def
is_stemmer_available_for_language
(
lang
):
"""Return true if stemmer for language LANG is available.
Return false otherwise.
"""
global
stemmers
return
stemmers
.
has_key
(
lang
)
def
stem
(
word
,
lang
=
cfg_bibindex_stemmer_default_language
):
"""Return WORD stemmed according to language LANG (e.g. 'en')."""
global
stemmers
if
lang
and
is_stemmer_available_for_language
(
lang
):
return
stemmers
[
lang
]
.
stem
(
word
)
else
:
return
word
Event Timeline
Log In to Comment