Page Menu
Home
c4science
Search
Configure Global Search
Log In
Files
F67484029
bibindex_engine_config.py.wml
No One
Temporary
Actions
Download File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Award Token
Subscribers
None
File Metadata
Details
File Info
Storage
Attached
Created
Sat, Jun 22, 16:30
Size
3 KB
Mime Type
text/x-c
Expires
Mon, Jun 24, 16:30 (2 d)
Engine
blob
Format
Raw Data
Handle
18415735
Attached To
R3600 invenio-infoscience
bibindex_engine_config.py.wml
View Options
## $Id$
## BibIndxes bibliographic data, reference and fulltext indexing utility.
## This file is part of the CERN Document Server Software (CDSware).
## Copyright (C) 2002 CERN.
##
## The CDSware is free software; you can redistribute it and/or
## modify it under the terms of the GNU General Public License as
## published by the Free Software Foundation; either version 2 of the
## License, or (at your option) any later version.
##
## The CDSware is distributed in the hope that it will be useful, but
## WITHOUT ANY WARRANTY; without even the implied warranty of
## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
## General Public License for more details.
##
## You should have received a copy of the GNU General Public License
## along with CDSware; if not, write to the Free Software Foundation, Inc.,
## 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA.
## read config variables:
#include "config.wml"
#include "configbis.wml"
#include "cdswmllib.wml"
## start Python:
<protect># -*- coding: utf-8 -*-</protect>
<protect>## $Id$</protect>
<protect>## DO NOT EDIT THIS FILE! IT WAS AUTOMATICALLY GENERATED FROM CDSware WML SOURCES.</protect>
"""
BibIndex indexing engine configuration parameters. Mostly read from WML.
"""
## version number:
bibindex_engine_version = "CDSware/<VERSION> bibindex/<VERSION>"
## programs used to convert fulltext files to text:
conv_programs = {#"ps": ["<PSTOTEXT>","<PSTOASCII>"], # switched off at the moment, since PDF is faster
#"ps.gz": ["<PSTOTEXT>","<PSTOASCII>"],
"pdf": ["<PDFTOTEXT>","<PSTOTEXT>","<PSTOASCII>"],
"doc": ["<ANTIWORD>","<CATDOC>","<WVTEXT>"],
"ppt": ["<PPTHTML>"],
"xls": ["<XLHTML>"]
}
## helper programs used if the above programs convert only to html or other intermediate file formats:
conv_programs_helpers = {"html": "<HTMLTOTEXT>",
"gz": "<GZIP>" }
## safety parameters concerning MySQL thread-multiplication problem:
cfg_check_mysql_threads = 0 # to check or not to check the problem?
cfg_max_mysql_threads = 50 # how many threads (connections) we consider as still safe
cfg_mysql_thread_timeout = 20 # we'll kill threads that were sleeping for more than X seconds
## do we want to fulltext-index local files only, or also remote URLs?
cfg_fulltext_index_local_files_only = 0
## which language should we use as default?
##{'fr': 'french', 'en': 'english', 'no':'norwegian', 'se':'swedish', 'de': 'german', 'it':'italian', 'pt':'portuguese'}
## None = No stemming
cfg_use_stemmer_lang = None
##Remove stopwords? False = no stopword removal
cfg_remove_stopwords = True
## path to stopword list, used by bibrank also, should be given even if stopwordremoval is not used
cfg_path_stopwordlist = "<ETCDIR>/bibrank/stopwords.kb"
##used by get_words_from_phrase
cfg_chars_alphanumericseparators = r"[\!\"\#\$\%\&\'\(\)\*\+\,\-\.\/\:\;\<\=\>\?\@\[\\\]\^\_\`\{\|\}\~]"
cfg_chars_punctuation = r"[\.\,\:\;\?\!\"]"
##Remove HTML tags from text
cfg_remove_html_code = True
##Minimum word length allowed to be added to index
cfg_min_word_length = 0
## access credentials to access restricted URLs:
## (this may be interesting if you are fulltext-indexing files located
## on a remote server and when files on that server are only available
## via username/password; but it's probably better to handle this case
## via IP or some convention; this part needs more work to be done;
## the current scheme is mostly therein order to make the
## fulltext-indexing mode non-interactive only)
cfg_urlopener_username = "mysuperuser"
cfg_urlopener_password = "mysuperpass"
## FIXME: design a nice approach to fulltext-index (local/remote)
## restricted URLs
## FIXME: put these to the global WML config file one day
Event Timeline
Log In to Comment