Page Menu
Home
c4science
Search
Configure Global Search
Log In
Files
F85031738
bibindex_engine_stopwords.py
No One
Temporary
Actions
Download File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Award Token
Subscribers
None
File Metadata
Details
File Info
Storage
Attached
Created
Thu, Sep 26, 07:42
Size
1 KB
Mime Type
text/x-python
Expires
Sat, Sep 28, 07:42 (1 d, 23 h)
Engine
blob
Format
Raw Data
Handle
21119687
Attached To
R3600 invenio-infoscience
bibindex_engine_stopwords.py
View Options
## $Id$
##
## This file is part of CDS Invenio.
## Copyright (C) 2002, 2003, 2004, 2005, 2006, 2007 CERN.
##
## CDS Invenio is free software; you can redistribute it and/or
## modify it under the terms of the GNU General Public License as
## published by the Free Software Foundation; either version 2 of the
## License, or (at your option) any later version.
##
## CDS Invenio is distributed in the hope that it will be useful, but
## WITHOUT ANY WARRANTY; without even the implied warranty of
## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
## General Public License for more details.
##
## You should have received a copy of the GNU General Public License
## along with CDS Invenio; if not, write to the Free Software Foundation, Inc.,
## 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA.
"""BibIndex engine stopwords facility."""
__revision__
=
"$Id$"
from
invenio.config
import
CFG_BIBINDEX_PATH_TO_STOPWORDS_FILE
,
\
CFG_BIBINDEX_REMOVE_STOPWORDS
def
create_stopwords
(
filename
=
CFG_BIBINDEX_PATH_TO_STOPWORDS_FILE
):
"""Create stopword dictionary out of FILENAME."""
try
:
file_descriptor
=
open
(
filename
,
'r'
)
except
IOError
:
return
{}
lines
=
file_descriptor
.
readlines
()
file_descriptor
.
close
()
stopdict
=
{}
for
line
in
lines
:
stopdict
[
line
.
rstrip
()]
=
1
return
stopdict
stopwords
=
create_stopwords
()
def
is_stopword
(
word
,
force_check
=
0
):
"""Return true if WORD is found among stopwords, false otherwise.
Also, return false if BibIndex wasn't configured to use
stopwords. However, if FORCE_CHECK is set to 1, then do not
pay attention to whether the admin disabled stopwords
functionality, but look up the word anyway. This mode is
useful for ranking.
"""
# note: input word is assumed to be in lowercase
if
(
CFG_BIBINDEX_REMOVE_STOPWORDS
or
force_check
)
and
\
stopwords
.
has_key
(
word
):
return
True
return
False
Event Timeline
Log In to Comment