#"""for all the bibdoc connected to the specified recid, set
#the text_extraction_date to the task_starting_time."""
#run_sql("UPDATE bibdoc JOIN bibrec_bibdoc ON id=id_bibdoc SET text_extraction_date=%s WHERE id_bibrec BETWEEN %s AND %s", (task_get_task_param('task_starting_time'), first_recid, last_recid))
"""Load existing hitlist for the word from the database index files."""
query="SELECT hitlist FROM %s WHERE term=%%s"%self.tablename
res=run_sql(query,(word,))
ifres:
returnintbitset(res[0][0])
else:
returnNone
defmerge_with_old_recIDs(self,word,set):
"""Merge the system numbers stored in memory (hash of recIDs with value +1 or -1
according to whether to add/delete them) with those stored in the database index
and received in set universe of recIDs for the given word.
Return False in case no change was done to SET, return True in case SET
was changed.
"""
oldset=intbitset(set)
set.update_with_signs(self.value[word])
returnset!=oldset
defput_word_into_db(self,word):
"""Flush a single word to the database and delete it from memory"""
set=self.load_old_recIDs(word)
ifsetisnotNone:# merge the word recIDs found in memory:
ifnotself.merge_with_old_recIDs(word,set):
# nothing to update:
write_message("......... unchanged hitlist for ``%s''"%word,verbose=9)
pass
else:
# yes there were some new words:
write_message("......... updating hitlist for ``%s''"%word,verbose=9)
run_sql("UPDATE %s SET hitlist=%%s WHERE term=%%s"%self.tablename,
(set.fastdump(),word))
else:# the word is new, will create new set:
write_message("......... inserting hitlist for ``%s''"%word,verbose=9)
set=intbitset(self.value[word].keys())
try:
run_sql("INSERT INTO %s (term, hitlist) VALUES (%%s, %%s)"%self.tablename,
(word,set.fastdump()))
exceptException,e:
## We send this exception to the admin only when is not
## already reparing the problem.
register_exception(prefix="Error when putting the term '%s' into db (hitlist=%s): %s\n"%(repr(word),set,e),alert_admin=(task_get_option('cmd')!='repair'))
ifnotset:# never store empty words
run_sql("DELETE from %s WHERE term=%%s"%self.tablename,
(word,))
delself.value[word]
defdisplay(self):
"Displays the word table."
keys=self.value.keys()
keys.sort()
forkinkeys:
write_message("%s: %s"%(k,self.value[k]))
defcount(self):
"Returns the number of words in the table."
returnlen(self.value)
definfo(self):
"Prints some information on the words table."
write_message("The words table contains %d words."%self.count())
deflookup_words(self,word=""):
"Lookup word from the words table."
ifnotword:
done=0
whilenotdone:
try:
word=raw_input("Enter word: ")
done=1
except(EOFError,KeyboardInterrupt):
return
ifself.value.has_key(word):
write_message("The word '%s' is found %d times." \
%(word,len(self.value[word])))
else:
write_message("The word '%s' does not exist in the word file."\
%word)
defadd_recIDs(self,recIDs,opt_flush):
"""Fetches records which id in the recIDs range list and adds
them to the wordTable. The recIDs range list is of the form:
"""Add records that were modified between DATES[0] and DATES[1].
If DATES is not set, then add records that were modified since
the last update of the index.
"""
ifnotdates:
table_id=self.tablename[-3:-1]
query="""SELECT last_updated FROM idxINDEX WHERE id=%s"""
res=run_sql(query,(table_id,))
ifnotres:
return
ifnotres[0][0]:
dates=("0000-00-00",None)
else:
dates=(res[0][0],None)
ifdates[1]isNone:
res=intbitset(run_sql("""SELECT b.id FROM bibrec AS b
WHERE b.modification_date >= %s""",
(dates[0],)))
ifself.is_fulltext_index:
res|=intbitset(run_sql("""SELECT id_bibrec FROM bibrec_bibdoc JOIN bibdoc ON id_bibdoc=id WHERE text_extraction_date <= modification_date AND modification_date >= %s AND status<>'DELETED'""",(dates[0],)))
elifdates[0]isNone:
res=intbitset(run_sql("""SELECT b.id FROM bibrec AS b
WHERE b.modification_date <= %s""",
(dates[1],)))
ifself.is_fulltext_index:
res|=intbitset(run_sql("""SELECT id_bibrec FROM bibrec_bibdoc JOIN bibdoc ON id_bibdoc=id WHERE text_extraction_date <= modification_date AND modification_date <= %s AND status<>'DELETED'""",(dates[1],)))
else:
res=intbitset(run_sql("""SELECT b.id FROM bibrec AS b
WHERE b.modification_date >= %s AND
b.modification_date <= %s""",
(dates[0],dates[1])))
ifself.is_fulltext_index:
res|=intbitset(run_sql("""SELECT id_bibrec FROM bibrec_bibdoc JOIN bibdoc ON id_bibdoc=id WHERE text_extraction_date <= modification_date AND modification_date >= %s AND modification_date <= %s AND status<>'DELETED'""",(dates[0],dates[1],)))
alist=create_range_list(list(res))
ifnotalist:
write_message("No new records added. %s is up to date"%self.tablename)
else:
self.add_recIDs(alist,opt_flush)
defadd_recID_range(self,recID1,recID2):
"""Add records from RECID1 to RECID2."""
wlist={}
self.recIDs_in_mem.append([recID1,recID2])
# secondly fetch all needed tags:
ifself.fields_to_index==[CFG_JOURNAL_TAG]:
# FIXME: quick hack for the journal index; a special
# treatment where we need to associate more than one