Page MenuHomec4science

bibindex_engine_config.py
No OneTemporary

File Metadata

Created
Thu, Jun 6, 05:11

bibindex_engine_config.py

## $Id$
## BibIndxes bibliographic data, reference and fulltext indexing utility.
## This file is part of the CERN Document Server Software (CDSware).
## Copyright (C) 2002 CERN.
##
## The CDSware is free software; you can redistribute it and/or
## modify it under the terms of the GNU General Public License as
## published by the Free Software Foundation; either version 2 of the
## License, or (at your option) any later version.
##
## The CDSware is distributed in the hope that it will be useful, but
## WITHOUT ANY WARRANTY; without even the implied warranty of
## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
## General Public License for more details.
##
## You should have received a copy of the GNU General Public License
## along with CDSware; if not, write to the Free Software Foundation, Inc.,
## 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA.
## read config variables:
#include "config.wml"
#include "configbis.wml"
#include "cdswmllib.wml"
## start Python:
<protect># -*- coding: utf-8 -*-</protect>
<protect>## $Id$</protect>
<protect>## DO NOT EDIT THIS FILE! IT WAS AUTOMATICALLY GENERATED FROM CDSware WML SOURCES.</protect>
"""
BibIndex indexing engine configuration parameters. Mostly read from WML.
"""
## version number:
bibindex_engine_version = "CDSware/<VERSION> bibindex/<VERSION>"
## programs used to convert fulltext files to text:
conv_programs = {#"ps": ["<PSTOTEXT>","<PSTOASCII>"], # switched off at the moment, since PDF is faster
#"ps.gz": ["<PSTOTEXT>","<PSTOASCII>"],
"pdf": ["<PDFTOTEXT>","<PSTOTEXT>","<PSTOASCII>"],
"doc": ["<ANTIWORD>","<CATDOC>","<WVTEXT>"],
"ppt": ["<PPTHTML>"],
"xls": ["<XLHTML>"]
}
## helper programs used if the above programs convert only to html or other intermediate file formats:
conv_programs_helpers = {"html": "<HTMLTOTEXT>",
"gz": "<GZIP>" }
## safety parameters concerning MySQL thread-multiplication problem:
cfg_check_mysql_threads = 0 # to check or not to check the problem?
cfg_max_mysql_threads = 50 # how many threads (connections) we consider as still safe
cfg_mysql_thread_timeout = 20 # we'll kill threads that were sleeping for more than X seconds
## do we want to fulltext-index local files only, or also remote URLs?
cfg_fulltext_index_local_files_only = 0
## which language should we use as default?
##{'fr': 'french', 'en': 'english', 'no':'norwegian', 'se':'swedish', 'de': 'german', 'it':'italian', 'pt':'portuguese'}
## None = No stemming
cfg_use_stemmer_lang = None
##Remove stopwords? False = no stopword removal
cfg_remove_stopwords = True
## path to stopword list, used by bibrank also, should be given even if stopwordremoval is not used
cfg_path_stopwordlist = "<ETCDIR>/bibrank/stopwords.kb"
##used by get_words_from_phrase
cfg_chars_alphanumericseparators = r"[\!\"\#\$\%\&\'\(\)\*\+\,\-\.\/\:\;\<\=\>\?\@\[\\\]\^\_\`\{\|\}\~]"
cfg_chars_punctuation = r"[\.\,\:\;\?\!\"]"
##Remove HTML tags from text
cfg_remove_html_code = True
##Minimum word length allowed to be added to index
cfg_min_word_length = 0
## access credentials to access restricted URLs:
## (this may be interesting if you are fulltext-indexing files located
## on a remote server and when files on that server are only available
## via username/password; but it's probably better to handle this case
## via IP or some convention; this part needs more work to be done;
## the current scheme is mostly therein order to make the
## fulltext-indexing mode non-interactive only)
cfg_urlopener_username = "mysuperuser"
cfg_urlopener_password = "mysuperpass"
## FIXME: design a nice approach to fulltext-index (local/remote)
## restricted URLs
## FIXME: put these to the global WML config file one day

Event Timeline