#By doing this like below, characters standing alone, like c a b is not added to the inedx, but when they are together with characters like c++ or c$ they are added.
write_message("Warning: Stemming not available for language '%s'."%tag[2])
tags.append(tag)
i+=1
#except Exception:
# write_message("Could not read data from configuration file, please check for errors")
# raise StandardError
returntags
defget_valid_range(rank_method_code):
"""Returns which records are valid for this rank method, according to which collections it is enabled for."""
#if options["verbose"] >=9:
# write_message("Getting records from collections enabled for rank method.")
#res = run_sql("SELECT collection.name FROM collection,collection_rnkMETHOD,rnkMETHOD WHERE collection.id=id_collection and id_rnkMETHOD=rnkMETHOD.id and rnkMETHOD.name='%s'" % rank_method_code)
#l_of_colls = []
#for coll in res:
# l_of_colls.append(coll[0])
#if len(l_of_colls) > 0:
# recIDs = perform_request_search(c=l_of_colls)
#else:
# recIDs = []
valid=intbitset(trailing_bits=1)
valid.discard(0)
#valid.addlist(recIDs)
returnvalid
defcheck_term(term,termlength):
"""Check if term contains not allowed characters, or for any other reasons for not using this term."""
"""Updates rnkWORDF and rnkWORDR with Gi and Nj values. For each term in rnkWORDF, a Gi value for the term is added. And for each term in each document, the Nj value for that document is added. In rnkWORDR, the Gi value for each term in each document is added. For description on how things are computed, look in the hacking docs.
table - name of forward index to update
terms - modified terms"""
frominvenio.configimportCFG_SITE_URL
zero_division_msg="""\
ERROR: %s captured. This might be caused by not enough balanced indexes.
Please, schedule a regular, e.g. weekly, rebalancing of the word similarity
ranking indexes, by using e.g.
"bibrank -f50000 -R -wwrd -s14d -LSunday"
as recommended in %s/help/admin/howto-run"""
stime=time.time()
Gi={}
Nj={}
N=run_sql("select count(id_bibrec) from %sR"%table[:-1])[0][0]