Page Menu
Home
c4science
Search
Configure Global Search
Log In
Files
F65784641
bibindex_engine_config.py
No One
Temporary
Actions
Download File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Award Token
Subscribers
None
File Metadata
Details
File Info
Storage
Attached
Created
Thu, Jun 6, 05:11
Size
3 KB
Mime Type
text/x-c
Expires
Sat, Jun 8, 05:11 (2 d)
Engine
blob
Format
Raw Data
Handle
18126855
Attached To
R3600 invenio-infoscience
bibindex_engine_config.py
View Options
## $Id$
## BibIndxes bibliographic data, reference and fulltext indexing utility.
## This file is part of the CERN Document Server Software (CDSware).
## Copyright (C) 2002 CERN.
##
## The CDSware is free software; you can redistribute it and/or
## modify it under the terms of the GNU General Public License as
## published by the Free Software Foundation; either version 2 of the
## License, or (at your option) any later version.
##
## The CDSware is distributed in the hope that it will be useful, but
## WITHOUT ANY WARRANTY; without even the implied warranty of
## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
## General Public License for more details.
##
## You should have received a copy of the GNU General Public License
## along with CDSware; if not, write to the Free Software Foundation, Inc.,
## 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA.
## read config variables:
#include "config.wml"
#include "configbis.wml"
#include "cdswmllib.wml"
## start Python:
<
protect
>
# -*- coding: utf-8 -*-</protect>
<
protect
>
## $Id$</protect>
<
protect
>
## DO NOT EDIT THIS FILE! IT WAS AUTOMATICALLY GENERATED FROM CDSware WML SOURCES.</protect>
"""
BibIndex indexing engine configuration parameters. Mostly read from WML.
"""
## version number:
bibindex_engine_version
=
"CDSware/<VERSION> bibindex/<VERSION>"
## programs used to convert fulltext files to text:
conv_programs
=
{
#"ps": ["<PSTOTEXT>","<PSTOASCII>"], # switched off at the moment, since PDF is faster
#"ps.gz": ["<PSTOTEXT>","<PSTOASCII>"],
"pdf"
:
[
"<PDFTOTEXT>"
,
"<PSTOTEXT>"
,
"<PSTOASCII>"
],
"doc"
:
[
"<ANTIWORD>"
,
"<CATDOC>"
,
"<WVTEXT>"
],
"ppt"
:
[
"<PPTHTML>"
],
"xls"
:
[
"<XLHTML>"
]
}
## helper programs used if the above programs convert only to html or other intermediate file formats:
conv_programs_helpers
=
{
"html"
:
"<HTMLTOTEXT>"
,
"gz"
:
"<GZIP>"
}
## safety parameters concerning MySQL thread-multiplication problem:
cfg_check_mysql_threads
=
0
# to check or not to check the problem?
cfg_max_mysql_threads
=
50
# how many threads (connections) we consider as still safe
cfg_mysql_thread_timeout
=
20
# we'll kill threads that were sleeping for more than X seconds
## do we want to fulltext-index local files only, or also remote URLs?
cfg_fulltext_index_local_files_only
=
0
## which language should we use as default?
##{'fr': 'french', 'en': 'english', 'no':'norwegian', 'se':'swedish', 'de': 'german', 'it':'italian', 'pt':'portuguese'}
## None = No stemming
cfg_use_stemmer_lang
=
None
##Remove stopwords? False = no stopword removal
cfg_remove_stopwords
=
True
## path to stopword list, used by bibrank also, should be given even if stopwordremoval is not used
cfg_path_stopwordlist
=
"<ETCDIR>/bibrank/stopwords.kb"
##used by get_words_from_phrase
cfg_chars_alphanumericseparators
=
r"[\!\"
\
#\$\%\&\'\(\)\*\+\,\-\.\/\:\;\<\=\>\?\@\[\\\]\^\_\`\{\|\}\~]"
cfg_chars_punctuation
=
r"[\.\,\:\;\?\!\"
]
"
##Remove HTML tags from text
cfg_remove_html_code
=
True
##Minimum word length allowed to be added to index
cfg_min_word_length
=
0
## access credentials to access restricted URLs:
## (this may be interesting if you are fulltext-indexing files located
## on a remote server and when files on that server are only available
## via username/password; but it's probably better to handle this case
## via IP or some convention; this part needs more work to be done;
## the current scheme is mostly therein order to make the
## fulltext-indexing mode non-interactive only)
cfg_urlopener_username
=
"mysuperuser"
cfg_urlopener_password
=
"mysuperpass"
## FIXME: design a nice approach to fulltext-index (local/remote)
## restricted URLs
## FIXME: put these to the global WML config file one day
Event Timeline
Log In to Comment