Page MenuHomec4science

template_word_similarity.cfg.wml
No OneTemporary

File Metadata

Created
Fri, Jun 7, 00:56

template_word_similarity.cfg.wml

## This file is part of the CERN Document Server Software (CDSware).
## Copyright (C) 2002, 2003, 2004, 2005, 2006 CERN.
##
## The CDSware is free software; you can redistribute it and/or
## modify it under the terms of the GNU General Public License as
## published by the Free Software Foundation; either version 2 of the
## License, or (at your option) any later version.
##
## The CDSware is distributed in the hope that it will be useful, but
## WITHOUT ANY WARRANTY; without even the implied warranty of
## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
## General Public License for more details.
##
## You should have received a copy of the GNU General Public License
## along with CDSware; if not, write to the Free Software Foundation, Inc.,
## 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA.
#include "configbis.wml"
#include "config.wml"
[rank_method]
function = word_similarity
[word_similarity]
#None = no stemming
stemming = <CDSLANG>
table = rnkWORD01F
#using stopwordlist from /bibindex/bibindex_engine_config
stopword = True
relevance_number_output_prologue = (
relevance_number_output_epilogue = )
#MARC tag,tag points, tag language
#keyword
tag1 = 6531_a, 1, <cdslang>
#keyword
tag2 = 695__a, 1, <cdslang>
#keyword
tag3 = 6532_a, 1, <cdslang>
#title
tag4 = 245__%, 10, <cdslang>
#title
tag5 = 246_%, 1, fr
#title
tag6 = 250__a, 1, <cdslang>
#title
tag7 = 711__a, 1, <cdslang>
#abbreviated
tag8 = 210__a, 1, <cdslang>
#key title
tag9 = 222__a, 1, <cdslang>
#abstract
tag10 = 520__%, 1, <cdslang>
#abstract
tag11 = 590__%, 1, fr
#conference
tag12 = 111__a, 1, <cdslang>
#author
tag13 = 100__%, 1, none
#author
tag14 = 700__%, 1, none
#author
tag15 = 721__a, 1, none
[find_similar]
#term should exist in maximum X/100% of documents
max_word_occurence = 0.05
#term should exist in minimum X/100% of documents
min_word_occurence = 0.00
#term should be atleast 3 characters long
min_word_length = 3
#term should be in atleast 3 documents or more
min_nr_words_docs = 3
#do not use more than 20 terms for "find similar"
max_nr_words_upper = 20
#if a document contains less than 10 terms, use much used terms too, if not ignore them
max_nr_words_lower = 10
#default minimum relevance value to use for find similar
default_min_relevance = 75

Event Timeline