#By doing this like below, characters standing alone, like c a b is not added to the inedx, but when they are together with characters like c++ or c$ they are added.
write_message("Warning: Stemming not available for language '%s'."%tag[2])
tags.append(tag)
i+=1
#except Exception:
# write_message("Could not read data from configuration file, please check for errors")
# raise StandardError
returntags
defget_valid_range(rank_method_code):
"""Returns which records are valid for this rank method, according to which collections it is enabled for."""
#if options["verbose"] >=9:
# write_message("Getting records from collections enabled for rank method.")
#res = run_sql("SELECT collection.name FROM collection,collection_rnkMETHOD,rnkMETHOD WHERE collection.id=id_collection and id_rnkMETHOD=rnkMETHOD.id and rnkMETHOD.name='%s'" % rank_method_code)
#l_of_colls = []
#for coll in res:
# l_of_colls.append(coll[0])
#if len(l_of_colls) > 0:
# recIDs = perform_request_search(c=l_of_colls)
#else:
# recIDs = []
valid=intbitset(trailing_bits=1)
valid.discard(0)
#valid.addlist(recIDs)
returnvalid
defcheck_term(term,termlength):
"""Check if term contains not allowed characters, or for any other reasons for not using this term."""
"""Updates rnkWORDF and rnkWORDR with Gi and Nj values. For each term in rnkWORDF, a Gi value for the term is added. And for each term in each document, the Nj value for that document is added. In rnkWORDR, the Gi value for each term in each document is added. For description on how things are computed, look in the hacking docs.
table - name of forward index to update
terms - modified terms"""
stime=time.time()
Gi={}
Nj={}
N=run_sql("select count(id_bibrec) from %sR"%table[:-1])[0][0]