diff --git a/modules/bibindex/lib/bibindex_engine.py b/modules/bibindex/lib/bibindex_engine.py
index 233e98638..7891ff41d 100644
--- a/modules/bibindex/lib/bibindex_engine.py
+++ b/modules/bibindex/lib/bibindex_engine.py
@@ -1,1955 +1,1984 @@
 # -*- coding: utf-8 -*-
 ##
 ## This file is part of Invenio.
 ## Copyright (C) 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012, 2013 CERN.
 ##
 ## Invenio is free software; you can redistribute it and/or
 ## modify it under the terms of the GNU General Public License as
 ## published by the Free Software Foundation; either version 2 of the
 ## License, or (at your option) any later version.
 ##
 ## Invenio is distributed in the hope that it will be useful, but
 ## WITHOUT ANY WARRANTY; without even the implied warranty of
 ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 ## General Public License for more details.
 ##
 ## You should have received a copy of the GNU General Public License
 ## along with Invenio; if not, write to the Free Software Foundation, Inc.,
 ## 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA.
 
 """
 BibIndex indexing engine implementation.  See bibindex executable for entry point.
 """
 
 __revision__ = "$Id$"
 
 import re
 import sys
 import time
 import fnmatch
 from datetime import datetime
 from time import strptime
 
 from invenio.config import CFG_SOLR_URL
 from invenio.bibindex_engine_config import CFG_MAX_MYSQL_THREADS, \
      CFG_MYSQL_THREAD_TIMEOUT, \
      CFG_CHECK_MYSQL_THREADS, \
      CFG_BIBINDEX_COLUMN_VALUE_SEPARATOR, \
      CFG_BIBINDEX_INDEX_TABLE_TYPE, \
      CFG_BIBINDEX_ADDING_RECORDS_STARTED_STR, \
      CFG_BIBINDEX_UPDATE_MESSAGE
 from invenio.bibauthority_config import \
      CFG_BIBAUTHORITY_CONTROLLED_FIELDS_BIBLIOGRAPHIC, \
      CFG_BIBAUTHORITY_RECORD_CONTROL_NUMBER_FIELD
 from invenio.bibauthority_engine import get_index_strings_by_control_no,\
      get_control_nos_from_recID
 from invenio.bibindexadminlib import get_idx_remove_html_markup, \
                                      get_idx_remove_latex_markup, \
                                      get_idx_remove_stopwords
 from invenio.bibdocfile import BibRecDocs
 from invenio.search_engine import perform_request_search, \
      get_index_stemming_language, \
      get_synonym_terms, \
      search_pattern, \
      search_unit_in_bibrec
 from invenio.dbquery import run_sql, DatabaseError, serialize_via_marshal, \
      deserialize_via_marshal, wash_table_column_name
 from invenio.bibindex_engine_washer import wash_index_term
 from invenio.bibtask import task_init, write_message, get_datetime, \
     task_set_option, task_get_option, task_get_task_param, \
     task_update_progress, task_sleep_now_if_required
 from invenio.intbitset import intbitset
 from invenio.errorlib import register_exception
 from invenio.bibrankadminlib import get_def_name
 from invenio.solrutils_bibindex_indexer import solr_commit
 from invenio.bibindex_tokenizers.BibIndexJournalTokenizer import \
     CFG_JOURNAL_TAG, \
     CFG_JOURNAL_PUBINFO_STANDARD_FORM, \
     CFG_JOURNAL_PUBINFO_STANDARD_FORM_REGEXP_CHECK
 from invenio.bibindex_engine_utils import load_tokenizers, \
     get_all_index_names_and_column_values, \
     get_idx_indexer, \
     get_index_tags, \
     get_field_tags, \
     get_tag_indexes, \
     get_all_indexes, \
     get_all_virtual_indexes, \
     get_index_virtual_indexes, \
     is_index_virtual, \
     get_virtual_index_building_blocks, \
     get_index_id_from_index_name, \
     get_index_name_from_index_id, \
-    run_sql_drop_silently
+    run_sql_drop_silently, \
+    get_min_last_updated, \
+    remove_inexistent_indexes
 from invenio.search_engine_utils import get_fieldvalues
 from invenio.bibfield import get_record
 from invenio.memoiseutils import Memoise
 
 
 if sys.hexversion < 0x2040000:
     # pylint: disable=W0622
     from sets import Set as set
     # pylint: enable=W0622
 
 
 ## precompile some often-used regexp for speed reasons:
 re_subfields = re.compile('\$\$\w')
 re_datetime_shift = re.compile("([-\+]{0,1})([\d]+)([dhms])")
 
 
 nb_char_in_line = 50  # for verbose pretty printing
 chunksize = 1000 # default size of chunks that the records will be treated by
 base_process_size = 4500 # process base size
 _last_word_table = None
 
 
 _TOKENIZERS = load_tokenizers()
 
 
 def list_union(list1, list2):
     "Returns union of the two lists."
     union_dict = {}
     for e in list1:
         union_dict[e] = 1
     for e in list2:
         union_dict[e] = 1
     return union_dict.keys()
 
 def list_unique(_list):
     """Returns a _list with duplicates removed."""
     _dict = {}
     for e in _list:
         _dict[e] = 1
     return _dict.keys()
 
 ## safety function for killing slow DB threads:
 def kill_sleepy_mysql_threads(max_threads=CFG_MAX_MYSQL_THREADS, thread_timeout=CFG_MYSQL_THREAD_TIMEOUT):
     """Check the number of DB threads and if there are more than
        MAX_THREADS of them, lill all threads that are in a sleeping
        state for more than THREAD_TIMEOUT seconds.  (This is useful
        for working around the the max_connection problem that appears
        during indexation in some not-yet-understood cases.)  If some
        threads are to be killed, write info into the log file.
     """
     res = run_sql("SHOW FULL PROCESSLIST")
     if len(res) > max_threads:
         for row in res:
             r_id, dummy, dummy, dummy, r_command, r_time, dummy, dummy = row
             if r_command == "Sleep" and int(r_time) > thread_timeout:
                 run_sql("KILL %s", (r_id,))
                 write_message("WARNING: too many DB threads, killing thread %s" % r_id, verbose=1)
     return
 
 def get_associated_subfield_value(recID, tag, value, associated_subfield_code):
     """Return list of ASSOCIATED_SUBFIELD_CODE, if exists, for record
     RECID and TAG of value VALUE.  Used by fulltext indexer only.
     Note: TAG must be 6 characters long (tag+ind1+ind2+sfcode),
     otherwise en empty string is returned.
     FIXME: what if many tag values have the same value but different
     associated_subfield_code?  Better use bibrecord library for this.
     """
     out = ""
     if len(tag) != 6:
         return out
     bibXXx = "bib" + tag[0] + tag[1] + "x"
     bibrec_bibXXx = "bibrec_" + bibXXx
     query = """SELECT bb.field_number, b.tag, b.value FROM %s AS b, %s AS bb
                WHERE bb.id_bibrec=%%s AND bb.id_bibxxx=b.id AND tag LIKE
                %%s%%""" % (bibXXx, bibrec_bibXXx)
     res = run_sql(query, (recID, tag[:-1]))
     field_number = -1
     for row in res:
         if row[1] == tag and row[2] == value:
             field_number = row[0]
     if field_number > 0:
         for row in res:
             if row[0] == field_number and row[1] == tag[:-1] + associated_subfield_code:
                 out = row[2]
                 break
     return out
 
 
 def get_author_canonical_ids_for_recid(recID):
     """
     Return list of author canonical IDs (e.g. `J.Ellis.1') for the
     given record.  Done by consulting BibAuthorID module.
     """
     from invenio.bibauthorid_dbinterface import get_persons_from_recids
     lwords = []
     res = get_persons_from_recids([recID])
     if res is None:
         ## BibAuthorID is not enabled
         return lwords
     else:
         dpersons, dpersoninfos = res
     for aid in dpersoninfos.keys():
         author_canonical_id = dpersoninfos[aid].get('canonical_id', '')
         if author_canonical_id:
             lwords.append(author_canonical_id)
     return lwords
 
 
 def swap_temporary_reindex_tables(index_id, reindex_prefix="tmp_"):
     """Atomically swap reindexed temporary table with the original one.
     Delete the now-old one."""
     is_virtual = is_index_virtual(index_id)
     if is_virtual:
         write_message("Removing %s index tables for id %s" % (reindex_prefix, index_id))
         query = """DROP TABLE IF EXISTS %%sidxWORD%02dR, %%sidxWORD%02dF,
                                         %%sidxPAIR%02dR, %%sidxPAIR%02dF,
                                         %%sidxPHRASE%02dR, %%sidxPHRASE%02dF
                 """ % ((index_id,)*6)
         query = query % ((reindex_prefix,)*6)
         run_sql(query)
     else:
         write_message("Putting new tmp index tables for id %s into production" % index_id)
         run_sql(
             "RENAME TABLE " +
             "idxWORD%02dR TO old_idxWORD%02dR," % (index_id, index_id) +
             "%sidxWORD%02dR TO idxWORD%02dR," % (reindex_prefix, index_id, index_id) +
             "idxWORD%02dF TO old_idxWORD%02dF," % (index_id, index_id) +
             "%sidxWORD%02dF TO idxWORD%02dF," % (reindex_prefix, index_id, index_id) +
             "idxPAIR%02dR TO old_idxPAIR%02dR," % (index_id, index_id) +
             "%sidxPAIR%02dR TO idxPAIR%02dR," % (reindex_prefix, index_id, index_id) +
             "idxPAIR%02dF TO old_idxPAIR%02dF," % (index_id, index_id) +
             "%sidxPAIR%02dF TO idxPAIR%02dF," % (reindex_prefix, index_id, index_id) +
             "idxPHRASE%02dR TO old_idxPHRASE%02dR," % (index_id, index_id) +
             "%sidxPHRASE%02dR TO idxPHRASE%02dR," % (reindex_prefix, index_id, index_id) +
             "idxPHRASE%02dF TO old_idxPHRASE%02dF," % (index_id, index_id) +
             "%sidxPHRASE%02dF TO idxPHRASE%02dF;" % (reindex_prefix, index_id, index_id)
         )
         write_message("Dropping old index tables for id %s" % index_id)
         run_sql_drop_silently("DROP TABLE old_idxWORD%02dR, old_idxWORD%02dF, old_idxPAIR%02dR, old_idxPAIR%02dF, old_idxPHRASE%02dR, old_idxPHRASE%02dF" % (index_id, index_id, index_id, index_id, index_id, index_id)) # kwalitee: disable=sql
 
 
 def init_temporary_reindex_tables(index_id, reindex_prefix="tmp_"):
     """Create reindexing temporary tables."""
     write_message("Creating new tmp index tables for id %s" % index_id)
     run_sql_drop_silently("""DROP TABLE IF EXISTS %sidxWORD%02dF""" % (wash_table_column_name(reindex_prefix), index_id)) # kwalitee: disable=sql
     run_sql("""CREATE TABLE %sidxWORD%02dF (
                         id mediumint(9) unsigned NOT NULL auto_increment,
                         term varchar(50) default NULL,
                         hitlist longblob,
                         PRIMARY KEY  (id),
                         UNIQUE KEY term (term)
                         ) ENGINE=MyISAM""" % (reindex_prefix, index_id))
 
     run_sql_drop_silently("""DROP TABLE IF EXISTS %sidxWORD%02dR""" % (wash_table_column_name(reindex_prefix), index_id)) # kwalitee: disable=sql
     run_sql("""CREATE TABLE %sidxWORD%02dR (
                         id_bibrec mediumint(9) unsigned NOT NULL,
                         termlist longblob,
                         type enum('CURRENT','FUTURE','TEMPORARY') NOT NULL default 'CURRENT',
                         PRIMARY KEY (id_bibrec,type)
                         ) ENGINE=MyISAM""" % (reindex_prefix, index_id))
 
     run_sql_drop_silently("""DROP TABLE IF EXISTS %sidxPAIR%02dF""" % (wash_table_column_name(reindex_prefix), index_id)) # kwalitee: disable=sql
     run_sql("""CREATE TABLE %sidxPAIR%02dF (
                         id mediumint(9) unsigned NOT NULL auto_increment,
                         term varchar(100) default NULL,
                         hitlist longblob,
                         PRIMARY KEY  (id),
                         UNIQUE KEY term (term)
                         ) ENGINE=MyISAM""" % (reindex_prefix, index_id))
 
     run_sql_drop_silently("""DROP TABLE IF EXISTS %sidxPAIR%02dR""" % (wash_table_column_name(reindex_prefix), index_id)) # kwalitee: disable=sql
     run_sql("""CREATE TABLE %sidxPAIR%02dR (
                         id_bibrec mediumint(9) unsigned NOT NULL,
                         termlist longblob,
                         type enum('CURRENT','FUTURE','TEMPORARY') NOT NULL default 'CURRENT',
                         PRIMARY KEY (id_bibrec,type)
                         ) ENGINE=MyISAM""" % (reindex_prefix, index_id))
 
     run_sql_drop_silently("""DROP TABLE IF EXISTS %sidxPHRASE%02dF""" % (wash_table_column_name(reindex_prefix), index_id)) # kwalitee: disable=sql
     run_sql("""CREATE TABLE %sidxPHRASE%02dF (
                         id mediumint(9) unsigned NOT NULL auto_increment,
                         term text default NULL,
                         hitlist longblob,
                         PRIMARY KEY  (id),
                         KEY term (term(50))
                         ) ENGINE=MyISAM""" % (reindex_prefix, index_id))
 
     run_sql_drop_silently("""DROP TABLE IF EXISTS %sidxPHRASE%02dR""" % (wash_table_column_name(reindex_prefix), index_id)) # kwalitee: disable=sql
     run_sql("""CREATE TABLE %sidxPHRASE%02dR (
                         id_bibrec mediumint(9) unsigned NOT NULL default '0',
                         termlist longblob,
                         type enum('CURRENT','FUTURE','TEMPORARY') NOT NULL default 'CURRENT',
                         PRIMARY KEY  (id_bibrec,type)
                         ) ENGINE=MyISAM""" % (reindex_prefix, index_id))
 
 
 def remove_subfields(s):
     "Removes subfields from string, e.g. 'foo $$c bar' becomes 'foo bar'."
     return re_subfields.sub(' ', s)
 
 
 def get_field_indexes(field):
     """Returns indexes names and ids corresponding to the given field"""
     if field[0:3].isdigit():
         #field is actually a tag
         return get_tag_indexes(field, virtual=False)
     else:
         #future implemeptation for fields
         return []
 
 get_field_indexes_memoised = Memoise(get_field_indexes)
 
 
 def get_all_synonym_knowledge_bases():
     """Returns a dictionary of name key and knowledge base name and match type tuple value
         information of all defined words indexes that have knowledge base information.
         Returns empty dictionary in case there are no tags indexed.
         Example: output['global'] = ('INDEX-SYNONYM-TITLE', 'exact'), output['title'] = ('INDEX-SYNONYM-TITLE', 'exact')."""
     res = get_all_index_names_and_column_values("synonym_kbrs")
     out = {}
     for row in res:
         kb_data = row[1]
         # ignore empty strings
         if len(kb_data):
             out[row[0]] = tuple(kb_data.split(CFG_BIBINDEX_COLUMN_VALUE_SEPARATOR))
     return out
 
 
 def get_index_remove_stopwords(index_id):
     """Returns value of a remove_stopword field from idxINDEX database table
        if it's not 'No'. If it's 'No' returns False.
        Just for consistency with WordTable.
        @param index_id: id of the index
     """
     result = get_idx_remove_stopwords(index_id)
     if isinstance(result, tuple):
         return False
     if result == 'No' or result == '':
         return False
     return result
 
 
 def get_index_remove_html_markup(index_id):
     """ Gets remove_html_markup parameter from database ('Yes' or 'No') and
         changes it  to True, False.
         Just for consistency with WordTable."""
     result = get_idx_remove_html_markup(index_id)
     if result == 'Yes':
         return True
     return False
 
 
 def get_index_remove_latex_markup(index_id):
     """ Gets remove_latex_markup parameter from database ('Yes' or 'No') and
         changes it  to True, False.
         Just for consistency with WordTable."""
     result = get_idx_remove_latex_markup(index_id)
     if result == 'Yes':
         return True
     return False
 
 
 def get_index_tokenizer(index_id):
     """Returns value of a tokenizer field from idxINDEX database table
        @param index_id: id of the index
     """
     query = "SELECT tokenizer FROM idxINDEX WHERE id=%s" % index_id
     out = None
     try:
         res = run_sql(query)
         if res:
             out = _TOKENIZERS[res[0][0]]
     except DatabaseError:
         write_message("Exception caught for SQL statement: %s; column tokenizer might not exist" % query, sys.stderr)
     except KeyError:
         write_message("Exception caught: there is no such tokenizer")
         out = None
     return out
 
 
 def get_last_updated_all_indexes():
     """Returns last modification date for all defined indexes"""
     query= """SELECT name, last_updated FROM idxINDEX"""
     res = run_sql(query)
     return res
 
 
 def split_ranges(parse_string):
     """Parse a string a return the list or ranges."""
     recIDs = []
     ranges = parse_string.split(",")
     for arange in ranges:
         tmp_recIDs = arange.split("-")
 
         if len(tmp_recIDs) == 1:
             recIDs.append([int(tmp_recIDs[0]), int(tmp_recIDs[0])])
         else:
             if int(tmp_recIDs[0]) > int(tmp_recIDs[1]): # sanity check
                 tmp = tmp_recIDs[0]
                 tmp_recIDs[0] = tmp_recIDs[1]
                 tmp_recIDs[1] = tmp
             recIDs.append([int(tmp_recIDs[0]), int(tmp_recIDs[1])])
     return recIDs
 
 def get_word_tables(tables):
     """ Given a list of table names it return a list of tuples
     (index_id, index_name, index_tags).
     """
     wordTables = []
     if tables:
         for index in tables:
             index_id = get_index_id_from_index_name(index)
             if index_id:
                 wordTables.append((index_id, index, get_index_tags(index)))
             else:
                 write_message("Error: There is no %s words table." % index, sys.stderr)
     return wordTables
 
 def get_date_range(var):
     "Returns the two dates contained as a low,high tuple"
     limits = var.split(",")
     if len(limits) == 1:
         low = get_datetime(limits[0])
         return low, None
     if len(limits) == 2:
         low = get_datetime(limits[0])
         high = get_datetime(limits[1])
         return low, high
     return None, None
 
 def create_range_list(res):
     """Creates a range list from a recID select query result contained
     in res. The result is expected to have ascending numerical order."""
     if not res:
         return []
     row = res[0]
     if not row:
         return []
     else:
         range_list = [[row, row]]
     for row in res[1:]:
         row_id = row
         if row_id == range_list[-1][1] + 1:
             range_list[-1][1] = row_id
         else:
             range_list.append([row_id, row_id])
     return range_list
 
 def beautify_range_list(range_list):
     """Returns a non overlapping, maximal range list"""
     ret_list = []
     for new in range_list:
         found = 0
         for old in ret_list:
             if new[0] <= old[0] <= new[1] + 1 or new[0] - 1 <= old[1] <= new[1]:
                 old[0] = min(old[0], new[0])
                 old[1] = max(old[1], new[1])
                 found = 1
                 break
 
         if not found:
             ret_list.append(new)
 
     return ret_list
 
 
 def truncate_index_table(index_name):
     """Properly truncate the given index."""
     index_id = get_index_id_from_index_name(index_name)
     if index_id:
         write_message('Truncating %s index table in order to reindex.' % index_name, verbose=2)
         run_sql("UPDATE idxINDEX SET last_updated='0000-00-00 00:00:00' WHERE id=%s", (index_id,))
         run_sql("TRUNCATE idxWORD%02dF" % index_id) # kwalitee: disable=sql
         run_sql("TRUNCATE idxWORD%02dR" % index_id) # kwalitee: disable=sql
         run_sql("TRUNCATE idxPHRASE%02dF" % index_id) # kwalitee: disable=sql
         run_sql("TRUNCATE idxPHRASE%02dR" % index_id) # kwalitee: disable=sql
 
-def update_index_last_updated(index_id, starting_time=None):
+
+def update_index_last_updated(indexes, starting_time=None):
     """Update last_updated column of the index table in the database.
-    Puts starting time there so that if the task was interrupted for record download,
-    the records will be reindexed next time."""
+       Puts starting time there so that if the task was interrupted for record download,
+       the records will be reindexed next time.
+       @param indexes: list of indexes names
+    """
     if starting_time is None:
         return None
-    write_message("updating last_updated to %s..." % starting_time, verbose=9)
-    return run_sql("UPDATE idxINDEX SET last_updated=%s WHERE id=%s",
-                    (starting_time, index_id,))
+    for index_name in indexes:
+        write_message("updating last_updated to %s...for %s index" % (starting_time, index_name), verbose=9)
+        run_sql("UPDATE idxINDEX SET last_updated=%s WHERE name=%s", (starting_time, index_name,))
 
 
 def get_percentage_completed(num_done, num_total):
     """ Return a string containing the approx. percentage completed """
     percentage_remaining = 100.0 * float(num_done) / float(num_total)
     if percentage_remaining:
         percentage_display = "(%.1f%%)" % (percentage_remaining,)
     else:
         percentage_display = ""
     return percentage_display
 
 def _fill_dict_of_indexes_with_empty_sets():
     """find_affected_records internal function.
        Creates dict: {'index_name1':set([]), ...}
     """
     index_dict = {}
     tmp_all_indexes = get_all_indexes(virtual=False)
     for index in tmp_all_indexes:
         index_dict[index] = set([])
     return index_dict
 
 def find_affected_records_for_index(indexes=[], recIDs=[], force_all_indexes=False):
     """
         Function checks which records need to be changed/reindexed
         for given index/indexes.
         Makes use of hstRECORD table where different revisions of record
         are kept.
         If parameter force_all_indexes is set function will assign all recIDs to all indexes.
         @param indexes: names of indexes for reindexation separated by coma
         @param recIDs: recIDs for reindexation in form: [[range1_down, range1_up],[range2_down, range2_up]..]
         @param force_all_indexes: should we index all indexes?
     """
 
     tmp_dates = dict(get_last_updated_all_indexes())
     modification_dates = dict([(date, tmp_dates[date] or datetime(1000,1,1,1,1,1)) for date in tmp_dates])
     tmp_all_indexes = get_all_indexes(virtual=False)
 
+    indexes = remove_inexistent_indexes(indexes, leave_virtual=False)
     if not indexes:
-        indexes = tmp_all_indexes
-    else:
-        indexes = indexes.split(",")
+        return {}
 
     def _should_reindex_for_revision(index_name, revision_date):
         try:
             if modification_dates[index_name] < revision_date and index_name in indexes:
                 return True
             return False
         except KeyError:
             return False
 
     if force_all_indexes:
         records_for_indexes = {}
         all_recIDs = []
         for recIDs_range in recIDs:
             all_recIDs.extend(range(recIDs_range[0], recIDs_range[1]+1))
         for index in indexes:
             records_for_indexes[index] = all_recIDs
         return records_for_indexes
 
+    min_last_updated = get_min_last_updated(indexes)[0][0] or datetime(1000,1,1,1,1,1)
     indexes_to_change = _fill_dict_of_indexes_with_empty_sets()
     recIDs_info = []
     for recIDs_range in recIDs:
         query = """SELECT id_bibrec,job_date,affected_fields FROM hstRECORD WHERE
-                   id_bibrec BETWEEN %s AND %s""" % (recIDs_range[0], recIDs_range[1])
+                   id_bibrec BETWEEN %s AND %s AND job_date > '%s'""" % (recIDs_range[0], recIDs_range[1], min_last_updated)
         res = run_sql(query)
         if res:
             recIDs_info.extend(res)
 
     for recID_info in recIDs_info:
         recID, revision, affected_fields  = recID_info
         affected_fields = affected_fields.split(",")
         indexes_for_recID = set()
         for field in affected_fields:
             if field:
                 field_indexes = get_field_indexes_memoised(field) or []
                 indexes_names = set([idx[1] for idx in field_indexes])
                 indexes_for_recID |= indexes_names
             else:
                 #record was inserted, all fields were changed, no specific affected fields
                 indexes_for_recID |= set(tmp_all_indexes)
         indexes_for_recID_filtered = [ind for ind in indexes_for_recID if _should_reindex_for_revision(ind, revision)]
         for index in indexes_for_recID_filtered:
             indexes_to_change[index].add(recID)
 
     indexes_to_change = dict((k, list(sorted(v))) for k, v in indexes_to_change.iteritems() if v)
 
     return indexes_to_change
 
 
 #def update_text_extraction_date(first_recid, last_recid):
     #"""for all the bibdoc connected to the specified recid, set
     #the text_extraction_date to the task_starting_time."""
     #run_sql("UPDATE bibdoc JOIN bibrec_bibdoc ON id=id_bibdoc SET text_extraction_date=%s WHERE id_bibrec BETWEEN %s AND %s", (task_get_task_param('task_starting_time'), first_recid, last_recid))
 
 class WordTable:
     "A class to hold the words table."
 
     def __init__(self, index_name, index_id, fields_to_index, table_name_pattern, wordtable_type, tag_to_tokenizer_map, wash_index_terms=50):
         """Creates words table instance.
         @param index_name: the index name
         @param index_id: the index integer identificator
         @param fields_to_index: a list of fields to index
         @param table_name_pattern: i.e. idxWORD%02dF or idxPHRASE%02dF
         @parm wordtable_type: type of the wordtable: Words, Pairs, Phrases
         @param tag_to_tokenizer_map: a mapping to specify particular tokenizer to
             extract words from particular metdata (such as 8564_u)
         @param wash_index_terms: do we wash index terms, and if yes (when >0),
             how many characters do we keep in the index terms; see
             max_char_length parameter of wash_index_term()
         """
         self.index_name = index_name
         self.index_id = index_id
         self.tablename = table_name_pattern % index_id
         self.virtual_tablename_pattern = table_name_pattern[table_name_pattern.find('idx'):-1]
         self.humanname = get_def_name('%s' % (str(index_id),), "idxINDEX")[0][1]
         self.recIDs_in_mem = []
         self.fields_to_index = fields_to_index
         self.value = {}
         try:
             self.stemming_language = get_index_stemming_language(index_id)
         except KeyError:
             self.stemming_language = ''
         self.remove_stopwords = get_index_remove_stopwords(index_id)
         self.remove_html_markup = get_index_remove_html_markup(index_id)
         self.remove_latex_markup = get_index_remove_latex_markup(index_id)
         self.tokenizer = get_index_tokenizer(index_id)(self.stemming_language,
                                                        self.remove_stopwords,
                                                        self.remove_html_markup,
                                                        self.remove_latex_markup)
         self.default_tokenizer_function = self.tokenizer.get_tokenizing_function(wordtable_type)
         self.wash_index_terms = wash_index_terms
         self.is_virtual = is_index_virtual(self.index_id)
         self.virtual_indexes = get_index_virtual_indexes(self.index_id)
 
         # tagToTokenizer mapping. It offers an indirection level necessary for
         # indexing fulltext.
         self.tag_to_words_fnc_map = {}
         for k in tag_to_tokenizer_map.keys():
             special_tokenizer_for_tag = _TOKENIZERS[tag_to_tokenizer_map[k]](self.stemming_language,
                                                                              self.remove_stopwords,
                                                                              self.remove_html_markup,
                                                                              self.remove_latex_markup)
             special_tokenizer_function = special_tokenizer_for_tag.get_tokenizing_function(wordtable_type)
             self.tag_to_words_fnc_map[k] = special_tokenizer_function
 
         if self.stemming_language and self.tablename.startswith('idxWORD'):
             write_message('%s has stemming enabled, language %s' % (self.tablename, self.stemming_language))
 
 
     def turn_off_virtual_indexes(self):
         self.virtual_indexes = []
 
     def turn_on_virtual_indexes(self):
         self.virtual_indexes = get_index_virtual_indexes(self.index_id)
 
     def get_field(self, recID, tag):
         """Returns list of values of the MARC-21 'tag' fields for the
            record 'recID'."""
 
         out = []
         bibXXx = "bib" + tag[0] + tag[1] + "x"
         bibrec_bibXXx = "bibrec_" + bibXXx
         query = """SELECT value FROM %s AS b, %s AS bb
                 WHERE bb.id_bibrec=%%s AND bb.id_bibxxx=b.id
                 AND tag LIKE %%s""" % (bibXXx, bibrec_bibXXx)
         res = run_sql(query, (recID, tag))
         for row in res:
             out.append(row[0])
         return out
 
     def clean(self):
         "Cleans the words table."
         self.value = {}
 
     def put_into_db(self, mode="normal"):
         """Updates the current words table in the corresponding DB
            idxFOO table.  Mode 'normal' means normal execution,
            mode 'emergency' means words index reverting to old state.
            """
         write_message("%s %s wordtable flush started" % (self.tablename, mode))
         write_message('...updating %d words into %s started' % \
                 (len(self.value), self.tablename))
         task_update_progress("(%s:%s) flushed %d/%d words" % (self.tablename, self.humanname, 0, len(self.value)))
 
         self.recIDs_in_mem = beautify_range_list(self.recIDs_in_mem)
 
         all_indexes = [(self.index_id, self.humanname)]
         if self.virtual_indexes:
             all_indexes.extend(self.virtual_indexes)
         for ind_id, ind_name in all_indexes:
             tab_name = self.tablename[:-1] + "R"
             if ind_id != self.index_id:
                 tab_name = self.virtual_tablename_pattern % ind_id + "R"
             if mode == "normal":
                 for group in self.recIDs_in_mem:
                     query = """UPDATE %s SET type='TEMPORARY' WHERE id_bibrec
                     BETWEEN %%s AND %%s AND type='CURRENT'""" % tab_name
                     write_message(query % (group[0], group[1]), verbose=9)
                     run_sql(query, (group[0], group[1]))
 
             nb_words_total = len(self.value)
             nb_words_report = int(nb_words_total / 10.0)
             nb_words_done = 0
             for word in self.value.keys():
                 self.put_word_into_db(word, ind_id)
                 nb_words_done += 1
                 if nb_words_report != 0 and ((nb_words_done % nb_words_report) == 0):
                     write_message('......processed %d/%d words' % (nb_words_done, nb_words_total))
                     percentage_display = get_percentage_completed(nb_words_done, nb_words_total)
                     task_update_progress("(%s:%s) flushed %d/%d words %s" % (tab_name, ind_name, nb_words_done, nb_words_total, percentage_display))
             write_message('...updating %d words into %s ended' % \
                           (nb_words_total, tab_name))
 
             write_message('...updating reverse table %s started' % tab_name)
             if mode == "normal":
                 for group in self.recIDs_in_mem:
                     query = """UPDATE %s SET type='CURRENT' WHERE id_bibrec
                     BETWEEN %%s AND %%s AND type='FUTURE'""" % tab_name
                     write_message(query % (group[0], group[1]), verbose=9)
                     run_sql(query, (group[0], group[1]))
                     query = """DELETE FROM %s WHERE id_bibrec
                     BETWEEN %%s AND %%s AND type='TEMPORARY'""" % tab_name
                     write_message(query % (group[0], group[1]), verbose=9)
                     run_sql(query, (group[0], group[1]))
                     #if self.is_fulltext_index:
                         #update_text_extraction_date(group[0], group[1])
                 write_message('End of updating wordTable into %s' % tab_name, verbose=9)
             elif mode == "emergency":
                 for group in self.recIDs_in_mem:
                     query = """UPDATE %s SET type='CURRENT' WHERE id_bibrec
                     BETWEEN %%s AND %%s AND type='TEMPORARY'""" % tab_name
                     write_message(query % (group[0], group[1]), verbose=9)
                     run_sql(query, (group[0], group[1]))
                     query = """DELETE FROM %s WHERE id_bibrec
                     BETWEEN %%s AND %%s AND type='FUTURE'""" % tab_name
                     write_message(query % (group[0], group[1]), verbose=9)
                     run_sql(query, (group[0], group[1]))
                 write_message('End of emergency flushing wordTable into %s' % tab_name, verbose=9)
             write_message('...updating reverse table %s ended' % tab_name)
 
         self.clean()
         self.recIDs_in_mem = []
         write_message("%s %s wordtable flush ended" % (self.tablename, mode))
         task_update_progress("(%s:%s) flush ended" % (self.tablename, self.humanname))
 
     def load_old_recIDs(self, word, index_id=None):
         """Load existing hitlist for the word from the database index files."""
         tab_name = self.tablename
         if index_id != self.index_id:
             tab_name = self.virtual_tablename_pattern % index_id + "F"
         query = "SELECT hitlist FROM %s WHERE term=%%s" % tab_name
         res = run_sql(query, (word,))
         if res:
             return intbitset(res[0][0])
         else:
             return None
 
     def merge_with_old_recIDs(self, word, set):
         """Merge the system numbers stored in memory (hash of recIDs with value +1 or -1
         according to whether to add/delete them) with those stored in the database index
         and received in set universe of recIDs for the given word.
 
         Return False in case no change was done to SET, return True in case SET
         was changed.
         """
         oldset = intbitset(set)
         set.update_with_signs(self.value[word])
         return set != oldset
 
     def put_word_into_db(self, word, index_id):
         """Flush a single word to the database and delete it from memory"""
         tab_name = self.tablename
         if index_id != self.index_id:
             tab_name = self.virtual_tablename_pattern % index_id + "F"
         set = self.load_old_recIDs(word, index_id)
         if set is not None: # merge the word recIDs found in memory:
             if not self.merge_with_old_recIDs(word, set):
                 # nothing to update:
                 write_message("......... unchanged hitlist for ``%s''" % word, verbose=9)
                 pass
             else:
                 # yes there were some new words:
                 write_message("......... updating hitlist for ``%s''" % word, verbose=9)
                 run_sql("UPDATE %s SET hitlist=%%s WHERE term=%%s" % wash_table_column_name(tab_name), (set.fastdump(), word)) # kwalitee: disable=sql
 
         else: # the word is new, will create new set:
             write_message("......... inserting hitlist for ``%s''" % word, verbose=9)
             set = intbitset(self.value[word].keys())
             try:
                 run_sql("INSERT INTO %s (term, hitlist) VALUES (%%s, %%s)" % wash_table_column_name(tab_name), (word, set.fastdump())) # kwalitee: disable=sql
             except Exception, e:
                 ## We send this exception to the admin only when is not
                 ## already reparing the problem.
                 register_exception(prefix="Error when putting the term '%s' into db (hitlist=%s): %s\n" % (repr(word), set, e), alert_admin=(task_get_option('cmd') != 'repair'))
 
         if not set: # never store empty words
             run_sql("DELETE FROM %s WHERE term=%%s" % wash_table_column_name(tab_name), (word,)) # kwalitee: disable=sql
 
 
     def display(self):
         "Displays the word table."
         keys = self.value.keys()
         keys.sort()
         for k in keys:
             write_message("%s: %s" % (k, self.value[k]))
 
     def count(self):
         "Returns the number of words in the table."
         return len(self.value)
 
     def info(self):
         "Prints some information on the words table."
         write_message("The words table contains %d words." % self.count())
 
     def lookup_words(self, word=""):
         "Lookup word from the words table."
 
         if not word:
             done = 0
             while not done:
                 try:
                     word = raw_input("Enter word: ")
                     done = 1
                 except (EOFError, KeyboardInterrupt):
                     return
 
         if self.value.has_key(word):
             write_message("The word '%s' is found %d times." \
                 % (word, len(self.value[word])))
         else:
             write_message("The word '%s' does not exist in the word file."\
                               % word)
 
     def add_recIDs(self, recIDs, opt_flush):
         """Fetches records which id in the recIDs range list and adds
         them to the wordTable.  The recIDs range list is of the form:
         [[i1_low,i1_high],[i2_low,i2_high], ..., [iN_low,iN_high]].
         """
         if self.is_virtual:
             return
         global chunksize, _last_word_table
         flush_count = 0
         records_done = 0
         records_to_go = 0
 
         for arange in recIDs:
             records_to_go = records_to_go + arange[1] - arange[0] + 1
 
         time_started = time.time() # will measure profile time
         for arange in recIDs:
             i_low = arange[0]
             chunksize_count = 0
             while i_low <= arange[1]:
                 task_sleep_now_if_required()
                 # calculate chunk group of recIDs and treat it:
                 i_high = min(i_low + opt_flush - flush_count - 1, arange[1])
                 i_high = min(i_low + chunksize - chunksize_count - 1, i_high)
 
                 try:
                     self.chk_recID_range(i_low, i_high)
                 except StandardError:
                     if self.index_name == 'fulltext' and CFG_SOLR_URL:
                         solr_commit()
                     raise
 
                 write_message(CFG_BIBINDEX_ADDING_RECORDS_STARTED_STR % \
                         (self.tablename, i_low, i_high))
                 if CFG_CHECK_MYSQL_THREADS:
                     kill_sleepy_mysql_threads()
                 percentage_display = get_percentage_completed(records_done, records_to_go)
                 task_update_progress("(%s:%s) adding recs %d-%d %s" % (self.tablename, self.humanname, i_low, i_high, percentage_display))
                 self.del_recID_range(i_low, i_high)
                 just_processed = self.add_recID_range(i_low, i_high)
                 flush_count = flush_count + i_high - i_low + 1
                 chunksize_count = chunksize_count + i_high - i_low + 1
                 records_done = records_done + just_processed
                 write_message(CFG_BIBINDEX_ADDING_RECORDS_STARTED_STR % \
                         (self.tablename, i_low, i_high))
                 if chunksize_count >= chunksize:
                     chunksize_count = 0
                 # flush if necessary:
                 if flush_count >= opt_flush:
                     self.put_into_db()
                     self.clean()
                     if self.index_name == 'fulltext' and CFG_SOLR_URL:
                         solr_commit()
                     write_message("%s backing up" % (self.tablename))
                     flush_count = 0
                     self.log_progress(time_started, records_done, records_to_go)
                 # iterate:
                 i_low = i_high + 1
         if flush_count > 0:
             self.put_into_db()
             if self.index_name == 'fulltext' and CFG_SOLR_URL:
                 solr_commit()
             self.log_progress(time_started, records_done, records_to_go)
 
     def add_recID_range(self, recID1, recID2):
         """Add records from RECID1 to RECID2."""
         wlist = {}
         self.recIDs_in_mem.append([recID1, recID2])
         # special case of author indexes where we also add author
         # canonical IDs:
         if self.index_name in ('author', 'firstauthor', 'exactauthor', 'exactfirstauthor'):
             for recID in range(recID1, recID2 + 1):
                 if not wlist.has_key(recID):
                     wlist[recID] = []
                 wlist[recID] = list_union(get_author_canonical_ids_for_recid(recID),
                                           wlist[recID])
 
         if len(self.fields_to_index) == 0:
             #'no tag' style of indexing - use bibfield instead of directly consulting bibrec
             tokenizing_function = self.default_tokenizer_function
             for recID in range(recID1, recID2 + 1):
                 record = get_record(recID)
                 if record:
                     new_words = tokenizing_function(record)
                     if not wlist.has_key(recID):
                         wlist[recID] = []
                     wlist[recID] = list_union(new_words, wlist[recID])
         # case of special indexes:
         elif self.index_name in ('authorcount', 'journal'):
             for tag in self.fields_to_index:
                 tokenizing_function = self.tag_to_words_fnc_map.get(tag, self.default_tokenizer_function)
                 for recID in range(recID1, recID2 + 1):
                     new_words = tokenizing_function(recID)
                     if not wlist.has_key(recID):
                         wlist[recID] = []
                     wlist[recID] = list_union(new_words, wlist[recID])
         # usual tag-by-tag indexing for the rest:
         else:
             for tag in self.fields_to_index:
                 tokenizing_function = self.tag_to_words_fnc_map.get(tag, self.default_tokenizer_function)
                 phrases = self.get_phrases_for_tokenizing(tag, recID1, recID2)
                 for row in sorted(phrases):
                     recID, phrase = row
                     if not wlist.has_key(recID):
                         wlist[recID] = []
                     new_words = tokenizing_function(phrase)
                     wlist[recID] = list_union(new_words, wlist[recID])
 
 
         # lookup index-time synonyms:
         synonym_kbrs = get_all_synonym_knowledge_bases()
         if synonym_kbrs.has_key(self.index_name):
             if len(wlist) == 0: return 0
             recIDs = wlist.keys()
             for recID in recIDs:
                 for word in wlist[recID]:
                     word_synonyms = get_synonym_terms(word,
                                                       synonym_kbrs[self.index_name][0],
                                                       synonym_kbrs[self.index_name][1],
                                                       use_memoise=True)
 
                     if word_synonyms:
                         wlist[recID] = list_union(word_synonyms, wlist[recID])
 
         # were there some words for these recIDs found?
         recIDs = wlist.keys()
         for recID in recIDs:
             # was this record marked as deleted?
             if "DELETED" in self.get_field(recID, "980__c"):
                 wlist[recID] = []
                 write_message("... record %d was declared deleted, removing its word list" % recID, verbose=9)
             write_message("... record %d, termlist: %s" % (recID, wlist[recID]), verbose=9)
 
         self.index_virtual_indexes_reversed(wlist, recID1, recID2)
 
         if len(wlist) == 0: return 0
         # put words into reverse index table with FUTURE status:
         for recID in recIDs:
             run_sql("INSERT INTO %sR (id_bibrec,termlist,type) VALUES (%%s,%%s,'FUTURE')" % wash_table_column_name(self.tablename[:-1]), (recID, serialize_via_marshal(wlist[recID]))) # kwalitee: disable=sql
             # ... and, for new records, enter the CURRENT status as empty:
             try:
                 run_sql("INSERT INTO %sR (id_bibrec,termlist,type) VALUES (%%s,%%s,'CURRENT')" % wash_table_column_name(self.tablename[:-1]), (recID, serialize_via_marshal([]))) # kwalitee: disable=sql
             except DatabaseError:
                 # okay, it's an already existing record, no problem
                 pass
 
         # put words into memory word list:
         put = self.put
         for recID in recIDs:
             for w in wlist[recID]:
                 put(recID, w, 1)
         return len(recIDs)
 
 
     def get_phrases_for_tokenizing(self, tag, first_recID, last_recID):
         """Gets phrases for later tokenization for a range of records and
            specific tag.
            @param tag: MARC tag
            @param first_recID: first recID from the range of recIDs to index
            @param last_recID: last recID from the range of recIDs to index
         """
         bibXXx = "bib" + tag[0] + tag[1] + "x"
         bibrec_bibXXx = "bibrec_" + bibXXx
         query = """SELECT bb.id_bibrec,b.value FROM %s AS b, %s AS bb
                    WHERE bb.id_bibrec BETWEEN %%s AND %%s
                    AND bb.id_bibxxx=b.id AND tag LIKE %%s""" % (bibXXx, bibrec_bibXXx)
         phrases = run_sql(query, (first_recID, last_recID, tag))
         if tag == '8564_u':
             ## FIXME: Quick hack to be sure that hidden files are
             ## actually indexed.
             phrases = set(phrases)
             for recid in xrange(int(first_recID), int(last_recID) + 1):
                 for bibdocfile in BibRecDocs(recid).list_latest_files():
                     phrases.add((recid, bibdocfile.get_url()))
         #authority records
         pattern = tag.replace('%', '*')
         matches = fnmatch.filter(CFG_BIBAUTHORITY_CONTROLLED_FIELDS_BIBLIOGRAPHIC.keys(), pattern)
         if not len(matches):
             return phrases
         phrases = set(phrases)
         for tag_match in matches:
             authority_tag = tag_match[0:3] + "__0"
             for recID in xrange(int(first_recID), int(last_recID) + 1):
                 control_nos = get_fieldvalues(recID, authority_tag)
                 for control_no in control_nos:
                     new_strings = get_index_strings_by_control_no(control_no)
                     for string_value in new_strings:
                         phrases.add((recID, string_value))
         return phrases
 
 
     def index_virtual_indexes_reversed(self, wlist, recID1, recID2):
         """Inserts indexed words into all virtual indexes connected to
            this index"""
         #first: need to take old values from given index to remove
         #them from virtual indexes
         query = """SELECT id_bibrec, termlist FROM %sR WHERE id_bibrec
                    BETWEEN %%s AND %%s""" % wash_table_column_name(self.tablename[:-1])
         old_index_values = run_sql(query, (recID1, recID2))
         if old_index_values:
             zipped = zip(*old_index_values)
             old_index_values = dict(zip(zipped[0], map(deserialize_via_marshal, zipped[1])))
         else:
             old_index_values = dict()
         recIDs = wlist.keys()
 
         for vindex_id, vindex_name in self.virtual_indexes:
             #second: need to take old values from virtual index
             #to have a list of words from which we can remove old values from given index
             tab_name =  self.virtual_tablename_pattern % vindex_id + "R"
             query = """SELECT id_bibrec, termlist FROM %s WHERE type='CURRENT' AND id_bibrec
                        BETWEEN %%s AND %%s""" % tab_name
             old_virtual_index_values = run_sql(query, (recID1, recID2))
             if old_virtual_index_values:
                 zipped = zip(*old_virtual_index_values)
                 old_virtual_index_values = dict(zip(zipped[0], map(deserialize_via_marshal, zipped[1])))
             else:
                 old_virtual_index_values = dict()
             for recID in recIDs:
                 to_serialize = list((set(old_virtual_index_values.get(recID) or []) - set(old_index_values.get(recID) or [])) | set(wlist[recID]))
                 run_sql("INSERT INTO %s (id_bibrec,termlist,type) VALUES (%%s,%%s,'FUTURE')" % wash_table_column_name(tab_name), (recID, serialize_via_marshal(to_serialize))) # kwalitee: disable=sql
                 try:
                     run_sql("INSERT INTO %s (id_bibrec,termlist,type) VALUES (%%s,%%s,'CURRENT')" % wash_table_column_name(tab_name), (recID, serialize_via_marshal([]))) # kwalitee: disable=sql
                 except DatabaseError:
                     pass
             if len(recIDs) != (recID2 - recID1 + 1):
                 #for records in range(recID1, recID2) which weren't updated:
                 #need to prevent them from being deleted by function: 'put_into_db'
                 #which deletes all records with 'CURRENT' status
                 query = """INSERT INTO %s (id_bibrec, termlist, type)
                            SELECT id_bibrec, termlist, 'FUTURE' FROM %s
                            WHERE id_bibrec BETWEEN %%s AND %%s
                                  AND type='CURRENT'
                                  AND id_bibrec IN (
                                         SELECT id_bibrec FROM %s
                                         WHERE id_bibrec BETWEEN %%s AND %%s
                                         GROUP BY id_bibrec HAVING COUNT(id_bibrec) = 1
                                         )
                         """ % ((wash_table_column_name(tab_name),)*3)
                 run_sql(query, (recID1, recID2, recID1, recID2))
 
 
     def log_progress(self, start, done, todo):
         """Calculate progress and store it.
         start: start time,
         done: records processed,
         todo: total number of records"""
         time_elapsed = time.time() - start
         # consistency check
         if time_elapsed == 0 or done > todo:
             return
 
         time_recs_per_min = done / (time_elapsed / 60.0)
         write_message("%d records took %.1f seconds to complete.(%1.f recs/min)"\
                 % (done, time_elapsed, time_recs_per_min))
 
         if time_recs_per_min:
             write_message("Estimated runtime: %.1f minutes" % \
                     ((todo - done) / time_recs_per_min))
 
     def put(self, recID, word, sign):
         """Adds/deletes a word to the word list."""
         try:
             if self.wash_index_terms:
                 word = wash_index_term(word, self.wash_index_terms)
             if self.value.has_key(word):
                 # the word 'word' exist already: update sign
                 self.value[word][recID] = sign
             else:
                 self.value[word] = {recID: sign}
         except:
             write_message("Error: Cannot put word %s with sign %d for recID %s." % (word, sign, recID))
 
     def del_recIDs(self, recIDs):
         """Fetches records which id in the recIDs range list and adds
         them to the wordTable.  The recIDs range list is of the form:
         [[i1_low,i1_high],[i2_low,i2_high], ..., [iN_low,iN_high]].
         """
         count = 0
         for arange in recIDs:
             task_sleep_now_if_required()
             self.del_recID_range(arange[0], arange[1])
             count = count + arange[1] - arange[0]
         self.put_into_db()
         if self.index_name == 'fulltext' and CFG_SOLR_URL:
             solr_commit()
 
     def del_recID_range(self, low, high):
         """Deletes records with 'recID' system number between low
            and high from memory words index table."""
         write_message("%s fetching existing words for records #%d-#%d started" % \
                 (self.tablename, low, high), verbose=3)
         self.recIDs_in_mem.append([low, high])
         query = """SELECT id_bibrec,termlist FROM %sR as bb WHERE bb.id_bibrec
         BETWEEN %%s AND %%s""" % (self.tablename[:-1])
         recID_rows = run_sql(query, (low, high))
         for recID_row in recID_rows:
             recID = recID_row[0]
             wlist = deserialize_via_marshal(recID_row[1])
             for word in wlist:
                 self.put(recID, word, -1)
         write_message("%s fetching existing words for records #%d-#%d ended" % \
                 (self.tablename, low, high), verbose=3)
 
 
     def report_on_table_consistency(self):
         """Check reverse words index tables (e.g. idxWORD01R) for
         interesting states such as 'TEMPORARY' state.
         Prints small report (no of words, no of bad words).
         """
         # find number of words:
         query = """SELECT COUNT(*) FROM %s""" % (self.tablename)
         res = run_sql(query, None, 1)
         if res:
             nb_words = res[0][0]
         else:
             nb_words = 0
 
         # find number of records:
         query = """SELECT COUNT(DISTINCT(id_bibrec)) FROM %sR""" % (self.tablename[:-1])
         res = run_sql(query, None, 1)
         if res:
             nb_records = res[0][0]
         else:
             nb_records = 0
 
         # report stats:
         write_message("%s contains %d words from %d records" % (self.tablename, nb_words, nb_records))
 
         # find possible bad states in reverse tables:
         query = """SELECT COUNT(DISTINCT(id_bibrec)) FROM %sR WHERE type <> 'CURRENT'""" % (self.tablename[:-1])
         res = run_sql(query)
         if res:
             nb_bad_records = res[0][0]
         else:
             nb_bad_records = 999999999
         if nb_bad_records:
             write_message("EMERGENCY: %s needs to repair %d of %d index records" % \
                 (self.tablename, nb_bad_records, nb_records))
         else:
             write_message("%s is in consistent state" % (self.tablename))
 
         return nb_bad_records
 
     def repair(self, opt_flush):
         """Repair the whole table"""
         # find possible bad states in reverse tables:
         query = """SELECT COUNT(DISTINCT(id_bibrec)) FROM %sR WHERE type <> 'CURRENT'""" % (self.tablename[:-1])
         res = run_sql(query, None, 1)
         if res:
             nb_bad_records = res[0][0]
         else:
             nb_bad_records = 0
 
         if nb_bad_records == 0:
             return
 
         query = """SELECT id_bibrec FROM %sR WHERE type <> 'CURRENT'""" \
                 % (self.tablename[:-1])
         res = intbitset(run_sql(query))
         recIDs = create_range_list(list(res))
 
         flush_count = 0
         records_done = 0
         records_to_go = 0
 
         for arange in recIDs:
             records_to_go = records_to_go + arange[1] - arange[0] + 1
 
         time_started = time.time() # will measure profile time
         for arange in recIDs:
 
             i_low = arange[0]
             chunksize_count = 0
             while i_low <= arange[1]:
                 task_sleep_now_if_required()
                 # calculate chunk group of recIDs and treat it:
                 i_high = min(i_low + opt_flush - flush_count - 1, arange[1])
                 i_high = min(i_low + chunksize - chunksize_count - 1, i_high)
 
                 self.fix_recID_range(i_low, i_high)
 
                 flush_count = flush_count + i_high - i_low + 1
                 chunksize_count = chunksize_count + i_high - i_low + 1
                 records_done = records_done + i_high - i_low + 1
                 if chunksize_count >= chunksize:
                     chunksize_count = 0
                 # flush if necessary:
                 if flush_count >= opt_flush:
                     self.put_into_db("emergency")
                     self.clean()
                     flush_count = 0
                     self.log_progress(time_started, records_done, records_to_go)
                 # iterate:
                 i_low = i_high + 1
         if flush_count > 0:
             self.put_into_db("emergency")
             self.log_progress(time_started, records_done, records_to_go)
         write_message("%s inconsistencies repaired." % self.tablename)
 
     def chk_recID_range(self, low, high):
         """Check if the reverse index table is in proper state"""
         ## check db
         query = """SELECT COUNT(*) FROM %sR WHERE type <> 'CURRENT'
         AND id_bibrec BETWEEN %%s AND %%s""" % self.tablename[:-1]
         res = run_sql(query, (low, high), 1)
         if res[0][0] == 0:
             write_message("%s for %d-%d is in consistent state" % (self.tablename, low, high))
             return # okay, words table is consistent
 
         ## inconsistency detected!
         write_message("EMERGENCY: %s inconsistencies detected..." % self.tablename)
         error_message = "Errors found. You should check consistency of the " \
                 "%s - %sR tables.\nRunning 'bibindex --repair' is " \
                 "recommended." % (self.tablename, self.tablename[:-1])
         write_message("EMERGENCY: " + error_message, stream=sys.stderr)
         raise StandardError(error_message)
 
     def fix_recID_range(self, low, high):
         """Try to fix reverse index database consistency (e.g. table idxWORD01R) in the low,high doc-id range.
 
         Possible states for a recID follow:
         CUR TMP FUT: very bad things have happened: warn!
         CUR TMP    : very bad things have happened: warn!
         CUR     FUT: delete FUT (crash before flushing)
         CUR        : database is ok
             TMP FUT: add TMP to memory and del FUT from memory
                      flush (revert to old state)
             TMP    : very bad things have happened: warn!
                 FUT: very bad things have happended: warn!
         """
 
         state = {}
         query = "SELECT id_bibrec,type FROM %sR WHERE id_bibrec BETWEEN %%s AND %%s"\
                 % self.tablename[:-1]
         res = run_sql(query, (low, high))
         for row in res:
             if not state.has_key(row[0]):
                 state[row[0]] = []
             state[row[0]].append(row[1])
 
         ok = 1 # will hold info on whether we will be able to repair
         for recID in state.keys():
             if not 'TEMPORARY' in state[recID]:
                 if 'FUTURE' in state[recID]:
                     if 'CURRENT' not in state[recID]:
                         write_message("EMERGENCY: Index record %d is in inconsistent state. Can't repair it." % recID)
                         ok = 0
                     else:
                         write_message("EMERGENCY: Inconsistency in index record %d detected" % recID)
                         query = """DELETE FROM %sR
                         WHERE id_bibrec=%%s""" % self.tablename[:-1]
                         run_sql(query, (recID,))
                         write_message("EMERGENCY: Inconsistency in record %d repaired." % recID)
 
             else:
                 if 'FUTURE' in state[recID] and not 'CURRENT' in state[recID]:
                     self.recIDs_in_mem.append([recID, recID])
 
                     # Get the words file
                     query = """SELECT type,termlist FROM %sR
                     WHERE id_bibrec=%%s""" % self.tablename[:-1]
                     write_message(query, verbose=9)
                     res = run_sql(query, (recID,))
                     for row in res:
                         wlist = deserialize_via_marshal(row[1])
                         write_message("Words are %s " % wlist, verbose=9)
                         if row[0] == 'TEMPORARY':
                             sign = 1
                         else:
                             sign = -1
                         for word in wlist:
                             self.put(recID, word, sign)
 
                 else:
                     write_message("EMERGENCY: %s for %d is in inconsistent "
                             "state. Couldn't repair it." % (self.tablename,
                                 recID), stream=sys.stderr)
                     ok = 0
 
         if not ok:
             error_message = "Unrepairable errors found. You should check " \
                     "consistency of the %s - %sR tables. Deleting affected " \
                     "TEMPORARY and FUTURE entries from these tables is " \
                     "recommended; see the BibIndex Admin Guide." % \
                     (self.tablename, self.tablename[:-1])
             write_message("EMERGENCY: " + error_message, stream=sys.stderr)
             raise StandardError(error_message)
 
 
     def remove_dependent_index(self, id_dependent):
         """Removes terms found in dependent index from virtual index.
            Function finds words for removal and then removes them from
            forward and reversed tables term by term.
            @param id_dependent: id of an index which we want to remove from this
                                 virtual index
         """
         if not self.is_virtual:
             write_message("Index is not virtual...")
             return
 
         global chunksize
         terms_current_counter = 0
         terms_done = 0
         terms_to_go = 0
 
         for_full_removal, for_partial_removal = self.get_words_to_remove(id_dependent, misc_lookup=False)
         query = """SELECT t.term, m.hitlist FROM %s%02dF as t INNER JOIN %s%02dF as m
                    ON t.term=m.term""" % (self.tablename[:-3], self.index_id, self.tablename[:-3], id_dependent)
         terms_and_hitlists = dict(run_sql(query))
         terms_to_go = len(for_full_removal) + len(for_partial_removal)
         task_sleep_now_if_required()
         #full removal
         for term in for_full_removal:
             terms_current_counter += 1
             hitlist = intbitset(terms_and_hitlists[term])
             for recID in hitlist:
                 self.remove_single_word_reversed_table(term, recID)
             self.remove_single_word_forward_table(term)
             if terms_current_counter % chunksize == 0:
                 terms_done += terms_current_counter
                 terms_current_counter = 0
                 write_message("removed %s/%s terms..." % (terms_done, terms_to_go))
                 task_sleep_now_if_required()
         terms_done += terms_current_counter
         terms_current_counter = 0
         #partial removal
         for term, indexes in for_partial_removal.iteritems():
             self.value = {}
             terms_current_counter += 1
             hitlist = intbitset(terms_and_hitlists[term])
             if len(indexes) > 0:
                 hitlist -= self._find_common_hitlist(term, id_dependent, indexes)
             for recID in hitlist:
                 self.remove_single_word_reversed_table(term, recID)
                 if self.value.has_key(term):
                     self.value[term][recID] = -1
                 else:
                     self.value[term] = {recID: -1}
             if self.value:
                 self.put_word_into_db(term, self.index_id)
             if terms_current_counter % chunksize == 0:
                 terms_done += terms_current_counter
                 terms_current_counter = 0
                 write_message("removed %s/%s terms..." % (terms_done, terms_to_go))
                 task_sleep_now_if_required()
 
 
     def remove_single_word_forward_table(self, word):
         """Immediately and irreversibly removes a word from forward table"""
         run_sql("""DELETE FROM %s WHERE term=%%s""" % self.tablename, (word, )) # kwalitee: disable=sql
 
     def remove_single_word_reversed_table(self, word, recID):
         """Removes single word from temlist for given recID"""
         old_set = run_sql("""SELECT termlist FROM %sR WHERE id_bibrec=%%s""" % \
                           wash_table_column_name(self.tablename[:-1]), (recID, ))
         new_set = []
         if old_set:
             new_set = deserialize_via_marshal(old_set[0][0])
             if word in new_set:
                 new_set.remove(word)
         if new_set:
             run_sql("""UPDATE %sR SET termlist=%%s
                        WHERE id_bibrec=%%s AND
                        type='CURRENT'""" %  \
                     wash_table_column_name(self.tablename[:-1]), (serialize_via_marshal(new_set), recID))
 
     def _find_common_hitlist(self, term, id_dependent, indexes):
         """Checks 'indexes' for records that have 'term' indexed
            and returns intersection between found records
            and records that have a 'term' inside index
            defined by id_dependent parameter"""
         query = """SELECT m.hitlist FROM idxWORD%02dF as t INNER JOIN idxWORD%02dF as m
                    ON t.term=m.term WHERE t.term='%s'"""
         common_hitlist = intbitset([])
         for _id in indexes:
             res = run_sql(query % (id_dependent, _id, term))
             if res:
                 common_hitlist |= intbitset(res[0][0])
         return common_hitlist
 
     def get_words_to_remove(self, id_dependent, misc_lookup=False):
         """Finds words in dependent index which should be removed from virtual index.
            Example:
            Virtual index 'A' consists of 'B' and 'C' dependent indexes and we want to
            remove 'B' from virtual index 'A'.
            First we need to check if 'B' and 'C' have common words. If they have
            we need to be careful not to remove common words from 'A', because we want
            to remove only words from 'B'.
            Then we need to check common words for 'A' and 'B'. These are potential words
            for removal. We need to substract common words for 'B' and 'C' from common words
            for 'A' and 'B' to be sure that correct words are removed.
            @return: (list, dict), list contains terms/words for full removal, dict
                     contains words for partial removal together with ids of indexes in which
                     given term/word also exists
         """
 
         query = """SELECT t.term FROM %s%02dF as t INNER JOIN %s%02dF as m
                    ON t.term=m.term"""
         dependent_indexes = get_virtual_index_building_blocks(self.index_id)
         other_ids = list(dependent_indexes and zip(*dependent_indexes)[0] or [])
         if id_dependent in other_ids:
             other_ids.remove(id_dependent)
         if not misc_lookup:
             misc_id = get_index_id_from_index_name('miscellaneous')
             if misc_id in other_ids:
                 other_ids.remove(misc_id)
 
         #intersections between dependent indexes
         left_in_other_indexes = {}
         for _id in other_ids:
             intersection = zip(*run_sql(query % (self.tablename[:-3], id_dependent, self.tablename[:-3], _id))) # kwalitee: disable=sql
             terms = bool(intersection) and intersection[0] or []
             for term in terms:
                 if left_in_other_indexes.has_key(term):
                     left_in_other_indexes[term].append(_id)
                 else:
                     left_in_other_indexes[term] = [_id]
 
         #intersection between virtual index and index we want to remove
         main_intersection = zip(*run_sql(query % (self.tablename[:-3], self.index_id, self.tablename[:-3], id_dependent))) # kwalitee: disable=sql
         terms_main = set(bool(main_intersection) and main_intersection[0] or [])
         return list(terms_main - set(left_in_other_indexes.keys())), left_in_other_indexes
 
 
 def main():
     """Main that construct all the bibtask."""
     task_init(authorization_action='runbibindex',
             authorization_msg="BibIndex Task Submission",
             description="""Examples:
 \t%s -a -i 234-250,293,300-500 -u admin@localhost
 \t%s -a -w author,fulltext -M 8192 -v3
             \t%s -d -m +4d -A on --flush=10000\n""" % ((sys.argv[0],) * 3), help_specific_usage=""" Indexing options:
   -a, --add\t\tadd or update words for selected records
   -d, --del\t\tdelete words for selected records
   -i, --id=low[-high]\t\tselect according to doc recID
   -m, --modified=from[,to]\tselect according to modification date
   -c, --collection=c1[,c2]\tselect according to collection
   -R, --reindex\treindex the selected indexes from scratch
 
  Repairing options:
   -k, --check\t\tcheck consistency for all records in the table(s)
   -r, --repair\t\ttry to repair all records in the table(s)
 
  Specific options:
   -w, --windex=w1[,w2]\tword/phrase indexes to consider (all)
   -M, --maxmem=XXX\tmaximum memory usage in kB (no limit)
   -f, --flush=NNN\t\tfull consistent table flush after NNN records (10000)
   --force\tforce indexing of all records for provided indexes
   -Z, --remove-dependent-index=w\tname of an index for removing from virtual index
 """,
             version=__revision__,
             specific_params=("adi:m:c:w:krRM:f:oZ:", [
                 "add",
                 "del",
                 "id=",
                 "modified=",
                 "collection=",
                 "windex=",
                 "check",
                 "repair",
                 "reindex",
                 "maxmem=",
                 "flush=",
                 "force",
                 "remove-dependent-index="
             ]),
             task_stop_helper_fnc=task_stop_table_close_fnc,
             task_submit_elaborate_specific_parameter_fnc=task_submit_elaborate_specific_parameter,
             task_run_fnc=task_run_core,
             task_submit_check_options_fnc=task_submit_check_options)
 
 def task_submit_check_options():
     """Check for options compatibility."""
     if task_get_option("reindex"):
         if task_get_option("cmd") != "add" or task_get_option('id') or task_get_option('collection'):
             print >> sys.stderr, "ERROR: You can use --reindex only when adding modified record."
             return False
     return True
 
 def task_submit_elaborate_specific_parameter(key, value, opts, args):
     """ Given the string key it checks it's meaning, eventually using the
     value. Usually it fills some key in the options dict.
     It must return True if it has elaborated the key, False, if it doesn't
     know that key.
     eg:
     if key in ['-n', '--number']:
         self.options['number'] = value
         return True
     return False
     """
     if key in ("-a", "--add"):
         task_set_option("cmd", "add")
         if ("-x", "") in opts or ("--del", "") in opts:
             raise StandardError("Can not have --add and --del at the same time!")
     elif key in ("-k", "--check"):
         task_set_option("cmd", "check")
     elif key in ("-r", "--repair"):
         task_set_option("cmd", "repair")
     elif key in ("-d", "--del"):
         task_set_option("cmd", "del")
     elif key in ("-i", "--id"):
         task_set_option('id', task_get_option('id') + split_ranges(value))
     elif key in ("-m", "--modified"):
         task_set_option("modified", get_date_range(value))
     elif key in ("-c", "--collection"):
         task_set_option("collection", value)
     elif key in ("-R", "--reindex"):
         task_set_option("reindex", True)
     elif key in ("-w", "--windex"):
         task_set_option("windex", value)
     elif key in ("-M", "--maxmem"):
         task_set_option("maxmem", int(value))
         if task_get_option("maxmem") < base_process_size + 1000:
             raise StandardError("Memory usage should be higher than %d kB" % \
                 (base_process_size + 1000))
     elif key in ("-f", "--flush"):
         task_set_option("flush", int(value))
     elif key in ("-o", "--force"):
         task_set_option("force", True)
     elif key in ("-Z", "--remove-dependent-index",):
         task_set_option("remove-dependent-index", value)
     else:
         return False
     return True
 
 def task_stop_table_close_fnc():
     """ Close tables to STOP. """
     global _last_word_table
     if _last_word_table:
         _last_word_table.put_into_db()
 
 
 def get_recIDs_by_date_bibliographic(dates, index_name, force_all=False):
     """ Finds records that were modified between DATES[0] and DATES[1]
         for given index.
         If DATES is not set, then finds records that were modified since
         the last update of the index.
         @param wordtable_type: can be 'Words', 'Pairs' or 'Phrases'
     """
     index_id = get_index_id_from_index_name(index_name)
     if not dates:
         query = """SELECT last_updated FROM idxINDEX WHERE id=%s"""
         res = run_sql(query, (index_id,))
         if not res:
             return set([])
         if not res[0][0] or force_all:
             dates = ("0000-00-00", None)
         else:
             dates = (res[0][0], None)
     if dates[1] is None:
         res = intbitset(run_sql("""SELECT b.id FROM bibrec AS b WHERE b.modification_date >= %s""",
                                    (dates[0],)))
         if index_name == 'fulltext':
             res |= intbitset(run_sql("""SELECT id_bibrec FROM bibrec_bibdoc JOIN bibdoc ON id_bibdoc=id
                                         WHERE text_extraction_date <= modification_date AND
                                         modification_date >= %s
                                         AND status<>'DELETED'""",
                                         (dates[0],)))
     elif dates[0] is None:
         res = intbitset(run_sql("""SELECT b.id FROM bibrec AS b WHERE b.modification_date <= %s""",
                                    (dates[1],)))
         if index_name == 'fulltext':
             res |= intbitset(run_sql("""SELECT id_bibrec FROM bibrec_bibdoc JOIN bibdoc ON id_bibdoc=id
                                         WHERE text_extraction_date <= modification_date
                                         AND modification_date <= %s
                                         AND status<>'DELETED'""",
                                         (dates[1],)))
     else:
         res = intbitset(run_sql("""SELECT b.id FROM bibrec AS b
                                    WHERE b.modification_date >= %s AND
                                    b.modification_date <= %s""",
                                    (dates[0], dates[1])))
         if index_name == 'fulltext':
             res |= intbitset(run_sql("""SELECT id_bibrec FROM bibrec_bibdoc JOIN bibdoc ON id_bibdoc=id
                                         WHERE text_extraction_date <= modification_date AND
                                         modification_date >= %s AND
                                         modification_date <= %s AND
                                         status<>'DELETED'""",
                                         (dates[0], dates[1],)))
     # special case of author indexes where we need to re-index
     # those records that were affected by changed BibAuthorID attributions:
     if index_name in ('author', 'firstauthor', 'exactauthor', 'exactfirstauthor'):
         from invenio.bibauthorid_personid_maintenance import get_recids_affected_since
         # dates[1] is ignored, since BibAuthorID API does not offer upper limit search
         rec_list_author = intbitset(get_recids_affected_since(dates[0]))
         res = res | rec_list_author
     return set(res)
 
 
 def get_recIDs_by_date_authority(dates, index_name, force_all=False):
     """ Finds records that were modified between DATES[0] and DATES[1]
         for given index.
         If DATES is not set, then finds records that were modified since
         the last update of the index.
         Searches for bibliographic records connected to authority records
         that have been changed.
     """
     index_id = get_index_id_from_index_name(index_name)
     index_tags = get_index_tags(index_name)
     if not dates:
         query = """SELECT last_updated FROM idxINDEX WHERE id=%s"""
         res = run_sql(query, (index_id,))
         if not res:
             return set([])
         if not res[0][0] or force_all:
             dates = ("0000-00-00", None)
         else:
             dates = (res[0][0], None)
     res = intbitset()
     for tag in index_tags:
         pattern = tag.replace('%', '*')
         matches = fnmatch.filter(CFG_BIBAUTHORITY_CONTROLLED_FIELDS_BIBLIOGRAPHIC.keys(), pattern)
         if not len(matches):
             continue
         for tag_match in matches:
             # get the type of authority record associated with this field
             auth_type = CFG_BIBAUTHORITY_CONTROLLED_FIELDS_BIBLIOGRAPHIC.get(tag_match)
             # find updated authority records of this type
             # dates[1] is ignored, needs dates[0] to find res
             now = datetime.now()
             auth_recIDs = search_pattern(p='980__a:' + auth_type) \
                 & search_unit_in_bibrec(str(dates[0]), str(now), type='m')
             # now find dependent bibliographic records
             for auth_recID in auth_recIDs:
                 # get the fix authority identifier of this authority record
                 control_nos = get_control_nos_from_recID(auth_recID)
                 # there may be multiple control number entries! (the '035' field is repeatable!)
                 for control_no in control_nos:
                     # get the bibrec IDs that refer to AUTHORITY_ID in TAG
                     tag_0 = tag_match[:5] + '0' # possibly do the same for '4' subfields ?
                     fieldvalue = '"' + control_no + '"'
                     res |= search_pattern(p=tag_0 + ':' + fieldvalue)
     return set(res)
 
 
 def get_not_updated_recIDs(modified_dates, indexes, force_all=False):
     """Finds not updated recIDs in database for indexes.
        @param modified_dates: between this dates we should look for modified records
        @type modified_dates: [date_old, date_new]
        @param indexes: list of indexes
        @type indexes: string separated by coma
        @param force_all: if True all records will be taken
     """
     found_recIDs = set()
     write_message(CFG_BIBINDEX_UPDATE_MESSAGE)
     for index in indexes:
         found_recIDs |= get_recIDs_by_date_bibliographic(modified_dates, index, force_all)
         found_recIDs |= get_recIDs_by_date_authority(modified_dates, index, force_all)
     return list(sorted(found_recIDs))
 
 
-def get_recIDs_from_cli():
+def get_recIDs_from_cli(indexes=[]):
     """
         Gets recIDs ranges from CLI for indexing when
         user specified 'id' or 'collection' option or
-        search for modified recIDs when they're not specified.
+        search for modified recIDs for provided indexes
+        when recIDs are not specified.
+        @param indexes: it's a list of specified indexes, which
+            can be obtained from CLI with use of:
+            get_indexes_from_cli() function.
+        @type indexes: list of strings
     """
-    indexes = task_get_option("windex")
-    if not indexes:
-        indexes = get_all_indexes()
-    else:
-        indexes = indexes.split(",")
     # need to first update idxINDEX table to find proper recIDs for reindexing
     if task_get_option("reindex"):
         for index_name in indexes:
             run_sql("""UPDATE idxINDEX SET last_updated='0000-00-00 00:00:00'
                        WHERE name=%s""", (index_name,))
 
     if task_get_option("id"):
         return task_get_option("id")
     elif task_get_option("collection"):
         l_of_colls = task_get_option("collection").split(",")
         recIDs = perform_request_search(c=l_of_colls)
         recIDs_range = []
         for recID in recIDs:
             recIDs_range.append([recID, recID])
         return recIDs_range
     elif task_get_option("cmd") == "add":
         recs = get_not_updated_recIDs(task_get_option("modified"),
                                       indexes,
                                       task_get_option("force"))
         recIDs_range = beautify_range_list(create_range_list(recs))
         return recIDs_range
     return []
 
 
+def get_indexes_from_cli():
+    """
+        Gets indexes from CLI and checks if they are
+        valid. If indexes weren't specified function
+        will return all known indexes.
+    """
+    indexes = task_get_option("windex")
+    if not indexes:
+        indexes = get_all_indexes()
+    else:
+        indexes = indexes.split(",")
+        indexes = remove_inexistent_indexes(indexes, leave_virtual=True)
+    return indexes
+
+
 def remove_dependent_index(virtual_indexes, dependent_index):
     """
         Removes dependent index from virtual indexes.
-        @param virtual_indexes: names of virtual_indexes separated by comma
-        @type virtual_indexes: string
+        @param virtual_indexes: names of virtual_indexes
+        @type virtual_indexes: list of strings
         @param dependent_index: name of dependent index
         @type dependent_index: string
     """
     if not virtual_indexes:
         write_message("You should specify a name of a virtual index...")
-    else:
-        virtual_indexes = virtual_indexes.split(",")
+
     id_dependent = get_index_id_from_index_name(dependent_index)
     wordTables = get_word_tables(virtual_indexes)
     for index_id, index_name, index_tags in wordTables:
         wordTable = WordTable(index_name=index_name,
                               index_id=index_id,
                               fields_to_index=index_tags,
                               table_name_pattern='idxWORD%02dF',
                               wordtable_type=CFG_BIBINDEX_INDEX_TABLE_TYPE["Words"],
                               tag_to_tokenizer_map={'8564_u': "BibIndexEmptyTokenizer"},
                               wash_index_terms=50)
         wordTable.remove_dependent_index(id_dependent)
 
         wordTable.report_on_table_consistency()
         task_sleep_now_if_required()
 
         wordTable = WordTable(index_name=index_name,
                               index_id=index_id,
                               fields_to_index=index_tags,
                               table_name_pattern='idxPAIR%02dF',
                               wordtable_type=CFG_BIBINDEX_INDEX_TABLE_TYPE["Pairs"],
                               tag_to_tokenizer_map={'8564_u': "BibIndexEmptyTokenizer"},
                               wash_index_terms=50)
         wordTable.remove_dependent_index(id_dependent)
 
         wordTable.report_on_table_consistency()
         task_sleep_now_if_required()
 
         wordTable = WordTable(index_name=index_name,
                               index_id=index_id,
                               fields_to_index=index_tags,
                               table_name_pattern='idxPHRASE%02dF',
                               wordtable_type=CFG_BIBINDEX_INDEX_TABLE_TYPE["Words"],
                               tag_to_tokenizer_map={'8564_u': "BibIndexEmptyTokenizer"},
                               wash_index_terms=50)
         wordTable.remove_dependent_index(id_dependent)
 
         wordTable.report_on_table_consistency()
 
         query = """DELETE FROM idxINDEX_idxINDEX WHERE id_virtual=%s AND id_normal=%s"""
         run_sql(query, (index_id, id_dependent))
 
 
 def task_run_core():
-    """Runs the task by fetching arguments from the BibSched task queue.  This is
-    what BibSched will be invoking via daemon call.
-    The task prints Fibonacci numbers for up to NUM on the stdout, and some
-    messages on stderr.
-    Return 1 in case of success and 0 in case of failure."""
+    """Runs the task by fetching arguments from the BibSched task queue.
+       This is what BibSched will be invoking via daemon call.
+    """
     global _last_word_table
 
+    indexes = get_indexes_from_cli()
+    if len(indexes) == 0:
+        write_message("Specified indexes can't be found.")
+        return True
+
+    # check tables consistency
     if task_get_option("cmd") == "check":
-        indexes = task_get_option("windex") and task_get_option("windex").split(",") or get_all_indexes()
         wordTables = get_word_tables(indexes)
         for index_id, index_name, index_tags in wordTables:
             wordTable = WordTable(index_name=index_name,
                                   index_id=index_id,
                                   fields_to_index=index_tags,
                                   table_name_pattern='idxWORD%02dF',
                                   wordtable_type=CFG_BIBINDEX_INDEX_TABLE_TYPE["Words"],
                                   tag_to_tokenizer_map={'8564_u': "BibIndexFulltextTokenizer"},
                                   wash_index_terms=50)
             _last_word_table = wordTable
             wordTable.report_on_table_consistency()
             task_sleep_now_if_required(can_stop_too=True)
 
 
             wordTable = WordTable(index_name=index_name,
                                   index_id=index_id,
                                   fields_to_index=index_tags,
                                   table_name_pattern='idxPAIR%02dF',
                                   wordtable_type=CFG_BIBINDEX_INDEX_TABLE_TYPE["Pairs"],
                                   tag_to_tokenizer_map={'8564_u': "BibIndexEmptyTokenizer"},
                                   wash_index_terms=100)
             _last_word_table = wordTable
             wordTable.report_on_table_consistency()
             task_sleep_now_if_required(can_stop_too=True)
 
 
             wordTable = WordTable(index_name=index_name,
                                   index_id=index_id,
                                   fields_to_index=index_tags,
                                   table_name_pattern='idxPHRASE%02dF',
                                   wordtable_type=CFG_BIBINDEX_INDEX_TABLE_TYPE["Phrases"],
                                   tag_to_tokenizer_map={'8564_u': "BibIndexEmptyTokenizer"},
                                   wash_index_terms=0)
             _last_word_table = wordTable
             wordTable.report_on_table_consistency()
             task_sleep_now_if_required(can_stop_too=True)
         _last_word_table = None
         return True
+
     #virtual index: remove dependent index
     if task_get_option("remove-dependent-index"):
-        remove_dependent_index(task_get_option("windex"),
+        remove_dependent_index(indexes,
                                task_get_option("remove-dependent-index"))
         return True
 
     #initialization for Words,Pairs,Phrases
-    recIDs_range = get_recIDs_from_cli()
-    recIDs_for_index = find_affected_records_for_index(task_get_option("windex"),
+    recIDs_range = get_recIDs_from_cli(indexes)
+    recIDs_for_index = find_affected_records_for_index(indexes,
                                                        recIDs_range,
                                                        (task_get_option("force") or \
                                                        task_get_option("reindex") or \
                                                        task_get_option("cmd") == "del"))
 
     wordTables = get_word_tables(recIDs_for_index.keys())
     if not wordTables:
         write_message("Selected indexes/recIDs are up to date.")
 
     # Let's work on single words!
     for index_id, index_name, index_tags in wordTables:
         reindex_prefix = ""
         if task_get_option("reindex"):
             reindex_prefix = "tmp_"
             init_temporary_reindex_tables(index_id, reindex_prefix)
 
         wordTable = WordTable(index_name=index_name,
                               index_id=index_id,
                               fields_to_index=index_tags,
                               table_name_pattern=reindex_prefix + 'idxWORD%02dF',
                               wordtable_type=CFG_BIBINDEX_INDEX_TABLE_TYPE["Words"],
                               tag_to_tokenizer_map={'8564_u': "BibIndexFulltextTokenizer"},
                               wash_index_terms=50)
         _last_word_table = wordTable
         wordTable.report_on_table_consistency()
         try:
             if task_get_option("cmd") == "del":
                 if task_get_option("id") or task_get_option("collection"):
                     wordTable.del_recIDs(recIDs_range)
                     task_sleep_now_if_required(can_stop_too=True)
                 else:
                     error_message = "Missing IDs of records to delete from " \
                             "index %s." % wordTable.tablename
                     write_message(error_message, stream=sys.stderr)
                     raise StandardError(error_message)
             elif task_get_option("cmd") == "add":
                 final_recIDs = beautify_range_list(create_range_list(recIDs_for_index[index_name]))
                 wordTable.add_recIDs(final_recIDs, task_get_option("flush"))
                 task_sleep_now_if_required(can_stop_too=True)
             elif task_get_option("cmd") == "repair":
                 wordTable.repair(task_get_option("flush"))
                 task_sleep_now_if_required(can_stop_too=True)
             else:
                 error_message = "Invalid command found processing %s" % \
                     wordTable.tablename
                 write_message(error_message, stream=sys.stderr)
                 raise StandardError(error_message)
         except StandardError, e:
             write_message("Exception caught: %s" % e, sys.stderr)
             register_exception(alert_admin=True)
             if _last_word_table:
                 _last_word_table.put_into_db()
             raise
 
         wordTable.report_on_table_consistency()
         task_sleep_now_if_required(can_stop_too=True)
 
         # Let's work on pairs now
         wordTable = WordTable(index_name=index_name,
                               index_id=index_id,
                               fields_to_index=index_tags,
                               table_name_pattern=reindex_prefix + 'idxPAIR%02dF',
                               wordtable_type=CFG_BIBINDEX_INDEX_TABLE_TYPE["Pairs"],
                               tag_to_tokenizer_map={'8564_u': "BibIndexEmptyTokenizer"},
                               wash_index_terms=100)
         _last_word_table = wordTable
         wordTable.report_on_table_consistency()
         try:
             if task_get_option("cmd") == "del":
                 if task_get_option("id") or task_get_option("collection"):
                     wordTable.del_recIDs(recIDs_range)
                     task_sleep_now_if_required(can_stop_too=True)
                 else:
                     error_message = "Missing IDs of records to delete from " \
                             "index %s." % wordTable.tablename
                     write_message(error_message, stream=sys.stderr)
                     raise StandardError(error_message)
             elif task_get_option("cmd") == "add":
                 final_recIDs = beautify_range_list(create_range_list(recIDs_for_index[index_name]))
                 wordTable.add_recIDs(final_recIDs, task_get_option("flush"))
                 task_sleep_now_if_required(can_stop_too=True)
             elif task_get_option("cmd") == "repair":
                 wordTable.repair(task_get_option("flush"))
                 task_sleep_now_if_required(can_stop_too=True)
             else:
                 error_message = "Invalid command found processing %s" % \
                         wordTable.tablename
                 write_message(error_message, stream=sys.stderr)
                 raise StandardError(error_message)
         except StandardError, e:
             write_message("Exception caught: %s" % e, sys.stderr)
             register_exception()
             if _last_word_table:
                 _last_word_table.put_into_db()
             raise
 
         wordTable.report_on_table_consistency()
         task_sleep_now_if_required(can_stop_too=True)
 
         # Let's work on phrases now
         wordTable = WordTable(index_name=index_name,
                               index_id=index_id,
                               fields_to_index=index_tags,
                               table_name_pattern=reindex_prefix + 'idxPHRASE%02dF',
                               wordtable_type=CFG_BIBINDEX_INDEX_TABLE_TYPE["Phrases"],
                               tag_to_tokenizer_map={'8564_u': "BibIndexEmptyTokenizer"},
                               wash_index_terms=0)
         _last_word_table = wordTable
         wordTable.report_on_table_consistency()
         try:
             if task_get_option("cmd") == "del":
                 if task_get_option("id") or task_get_option("collection"):
                     wordTable.del_recIDs(recIDs_range)
                     task_sleep_now_if_required(can_stop_too=True)
                 else:
                     error_message = "Missing IDs of records to delete from " \
                             "index %s." % wordTable.tablename
                     write_message(error_message, stream=sys.stderr)
                     raise StandardError(error_message)
             elif task_get_option("cmd") == "add":
                 final_recIDs = beautify_range_list(create_range_list(recIDs_for_index[index_name]))
                 wordTable.add_recIDs(final_recIDs, task_get_option("flush"))
                 if not task_get_option("id") and not task_get_option("collection"):
-                    # let us update last_updated timestamp info, if run via automatic mode:
-                    update_index_last_updated(index_id, task_get_task_param('task_starting_time'))
+                    update_index_last_updated([index_name], task_get_task_param('task_starting_time'))
                 task_sleep_now_if_required(can_stop_too=True)
             elif task_get_option("cmd") == "repair":
                 wordTable.repair(task_get_option("flush"))
                 task_sleep_now_if_required(can_stop_too=True)
             else:
                 error_message = "Invalid command found processing %s" % \
                         wordTable.tablename
                 write_message(error_message, stream=sys.stderr)
                 raise StandardError(error_message)
         except StandardError, e:
             write_message("Exception caught: %s" % e, sys.stderr)
             register_exception()
             if _last_word_table:
                 _last_word_table.put_into_db()
             raise
 
         wordTable.report_on_table_consistency()
         task_sleep_now_if_required(can_stop_too=True)
 
         if task_get_option("reindex"):
             swap_temporary_reindex_tables(index_id, reindex_prefix)
-            update_index_last_updated(index_id, task_get_task_param('task_starting_time'))
+            update_index_last_updated([index_name], task_get_task_param('task_starting_time'))
         task_sleep_now_if_required(can_stop_too=True)
 
+    # update modification date also for indexes that were up to date
+    if not task_get_option("id") and not task_get_option("collection") and \
+       task_get_option("cmd") == "add":
+        up_to_date = set(indexes) - set(recIDs_for_index.keys())
+        update_index_last_updated(list(up_to_date), task_get_task_param('task_starting_time'))
+
+
     _last_word_table = None
     return True
 
 
 ### okay, here we go:
 if __name__ == '__main__':
     main()
diff --git a/modules/bibindex/lib/bibindex_engine_utils.py b/modules/bibindex/lib/bibindex_engine_utils.py
index 20028e522..c6dfb0400 100644
--- a/modules/bibindex/lib/bibindex_engine_utils.py
+++ b/modules/bibindex/lib/bibindex_engine_utils.py
@@ -1,307 +1,331 @@
 # -*- coding:utf-8 -*-
 ##
 ## This file is part of Invenio.
-## Copyright (C) 2010, 2011, 2012 CERN.
+## Copyright (C) 2010, 2011, 2012, 2013 CERN.
 ##
 ## Invenio is free software; you can redistribute it and/or
 ## modify it under the terms of the GNU General Public License as
 ## published by the Free Software Foundation; either version 2 of the
 ## License, or (at your option) any later version.
 ##
 ## Invenio is distributed in the hope that it will be useful, but
 ## WITHOUT ANY WARRANTY; without even the implied warranty of
 ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 ## General Public License for more details.
 ##
 ## You should have received a copy of the GNU General Public License
 ## along with Invenio; if not, write to the Free Software Foundation, Inc.,
 ## 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA.
 
 """bibindex_engine_utils: here are some useful regular experssions for tokenizers
    and several helper functions.
 """
 
 
 import re
 import sys
 import os
 
 from invenio.dbquery import run_sql, \
     DatabaseError
 from invenio.bibtask import write_message
 from invenio.search_engine_utils import get_fieldvalues
 from invenio.config import \
      CFG_BIBINDEX_CHARS_PUNCTUATION, \
      CFG_BIBINDEX_CHARS_ALPHANUMERIC_SEPARATORS
 from invenio.pluginutils import PluginContainer
 from invenio.bibindex_engine_config import CFG_BIBINDEX_TOKENIZERS_PATH
 
 
 latex_formula_re = re.compile(r'\$.*?\$|\\\[.*?\\\]')
 phrase_delimiter_re = re.compile(r'[\.:;\?\!]')
 space_cleaner_re = re.compile(r'\s+')
 re_block_punctuation_begin = re.compile(r"^" + CFG_BIBINDEX_CHARS_PUNCTUATION + "+")
 re_block_punctuation_end = re.compile(CFG_BIBINDEX_CHARS_PUNCTUATION + "+$")
 re_punctuation = re.compile(CFG_BIBINDEX_CHARS_PUNCTUATION)
 re_separators = re.compile(CFG_BIBINDEX_CHARS_ALPHANUMERIC_SEPARATORS)
 re_arxiv = re.compile(r'^arxiv:\d\d\d\d\.\d\d\d\d')
 
 re_pattern_fuzzy_author_trigger = re.compile(r'[\s\,\.]')
 # FIXME: re_pattern_fuzzy_author_trigger could be removed and an
 # BibAuthorID API function could be called instead after we
 # double-check that there are no circular imports.
 
 
 
 def load_tokenizers():
     """
     Load all the bibindex tokenizers and returns it.
     """
     return PluginContainer(os.path.join(CFG_BIBINDEX_TOKENIZERS_PATH, 'BibIndex*.py'))
 
 
 
 def get_all_index_names_and_column_values(column_name):
     """Returns a list of tuples of name and another column of all defined words indexes.
        Returns empty list in case there are no tags indexed in this index or in case
        the column name does not exist.
        Example: output=[('global', something), ('title', something)]."""
     out = []
     query = """SELECT name, %s FROM idxINDEX""" % column_name
     try:
         res = run_sql(query)
         for row in res:
             out.append((row[0], row[1]))
     except DatabaseError:
         write_message("Exception caught for SQL statement: %s; column %s might not exist" % (query, column_name), sys.stderr)
     return out
 
 
 
 def author_name_requires_phrase_search(p):
     """
     Detect whether author query pattern p requires phrase search.
     Notably, look for presence of spaces and commas.
     """
     if re_pattern_fuzzy_author_trigger.search(p):
         return True
     return False
 
 
 def get_field_count(recID, tags):
     """
     Return number of field instances having TAGS in record RECID.
 
     @param recID: record ID
     @type recID: int
     @param tags: list of tags to count, e.g. ['100__a', '700__a']
     @type tags: list
     @return: number of tags present in record
     @rtype: int
     @note: Works internally via getting field values, which may not be
            very efficient.  Could use counts only, or else retrieve stored
            recstruct format of the record and walk through it.
     """
     out = 0
     for tag in tags:
         out += len(get_fieldvalues(recID, tag))
     return out
 
 
 def run_sql_drop_silently(query):
     """
         SQL DROP statement with IF EXISTS part generates
         warning if table does not exist. To mute the warning
         we can remove IF EXISTS and catch SQL exception telling
         us that table does not exist.
     """
     try:
         query = query.replace(" IF EXISTS", "")
         run_sql(query)
     except Exception, e:
         if  str(e).find("Unknown table") > -1:
             pass
         else:
             raise e
 
 
 def get_idx_indexer(name):
     """Returns the indexer field value"""
     try:
         return run_sql("SELECT indexer FROM idxINDEX WHERE NAME=%s", (name, ))[0][0]
     except StandardError, e:
         return (0, e)
 
 
 def get_all_indexes(virtual=True, with_ids=False):
     """Returns the list of the names of all defined words indexes.
        Returns empty list in case there are no tags indexed in this index.
        @param virtual: if True function will return also virtual indexes
        @param with_ids: if True function will return also IDs of found indexes
        Example: output=['global', 'author']."""
     out = []
     if virtual:
         query = """SELECT %s name FROM idxINDEX"""
         query = query % (with_ids and "id," or "")
     else:
         query = """SELECT %s w.name FROM idxINDEX AS w
                    WHERE w.id NOT IN (SELECT DISTINCT id_virtual FROM idxINDEX_idxINDEX)"""
         query = query % (with_ids and "w.id," or "")
     res = run_sql(query)
     if with_ids:
         out = [row for row in res]
     else:
         out = [row[0] for row in res]
     return out
 
 
 def get_all_virtual_indexes():
     """ Returns all defined 'virtual' indexes. """
     query = """SELECT DISTINCT v.id_virtual, w.name FROM idxINDEX_idxINDEX AS v,
                                                          idxINDEX AS w
                WHERE v.id_virtual=w.id"""
     res = run_sql(query)
     return res
 
 
 def get_index_virtual_indexes(index_id):
     """Returns 'virtual' indexes that should be indexed together with
        given index."""
     query = """SELECT v.id_virtual, w.name  FROM idxINDEX_idxINDEX AS v,
                                                  idxINDEX AS w
                WHERE v.id_virtual=w.id AND
                      v.id_normal=%s"""
     res = run_sql(query, (index_id,))
     return res
 
 
 def is_index_virtual(index_id):
     """Checks if index is virtual"""
     query = """SELECT id_virtual FROM idxINDEX_idxINDEX
                WHERE id_virtual=%s"""
     res = run_sql(query, (index_id,))
     if res:
         return True
     return False
 
 
 def get_virtual_index_building_blocks(index_id):
     """Returns indexes that made up virtual index of given index_id.
        If index_id is an id of normal index (not virtual) returns
        empty tuple.
        """
     query = """SELECT v.id_normal, w.name FROM idxINDEX_idxINDEX AS v,
                                                idxINDEX AS w
                WHERE v.id_normal=w.id AND
                      v.id_virtual=%s"""
     res = run_sql(query, (index_id,))
     return res
 
 
 def get_index_id_from_index_name(index_name):
     """Returns the words/phrase index id for INDEXNAME.
        Returns empty string in case there is no words table for this index.
        Example: field='author', output=4."""
     out = 0
     query = """SELECT w.id FROM idxINDEX AS w
                 WHERE w.name=%s LIMIT 1"""
     res = run_sql(query, (index_name,), 1)
     if res:
         out = res[0][0]
     return out
 
 
 def get_index_name_from_index_id(index_id):
     """Returns the words/phrase index name for INDEXID.
        Returns '' in case there is no words table for this indexid.
        Example: field=9, output='fulltext'."""
     res = run_sql("SELECT name FROM idxINDEX WHERE id=%s", (index_id,))
     if res:
         return res[0][0]
     return ''
 
 
 def get_field_tags(field):
     """Returns a list of MARC tags for the field code 'field'.
        Returns empty list in case of error.
        Example: field='author', output=['100__%','700__%']."""
     out = []
     query = """SELECT t.value FROM tag AS t, field_tag AS ft, field AS f
                 WHERE f.code=%s AND ft.id_field=f.id AND t.id=ft.id_tag
                 ORDER BY ft.score DESC"""
     res = run_sql(query, (field,))
     return [row[0] for row in res]
 
 
 def get_tag_indexes(tag, virtual=True):
     """Returns indexes names and ids corresponding to the given tag
        @param tag: MARC tag in one of the forms:
             'xx%', 'xxx', 'xxx__a', 'xxx__%'
        @param virtual: if True function will also return virtual indexes"""
     tag2 = tag[0:2] + "%" #for tags in the form: 10%
     tag3 = tag[:-1] + "%" #for tags in the form: 100__%
     query = """SELECT DISTINCT w.id,w.name FROM idxINDEX AS w,
                                                 idxINDEX_field AS wf,
                                                 field_tag AS ft,
                                                 tag as t
                WHERE (t.value=%%s OR
                       t.value=%%s OR
                       %s) AND
                      t.id=ft.id_tag AND
                      ft.id_field=wf.id_field AND
                      wf.id_idxINDEX=w.id"""
     if tag[-1] == "%":
         missing_piece = "t.value LIKE %s"
     elif tag[-1] != "%" and len(tag) == 3:
         missing_piece = "t.value LIKE %s"
         tag3 = tag + "%" #for all tags which start from 'tag'
     else:
         missing_piece = "t.value=%s"
     query = query % missing_piece
     res = run_sql(query, (tag, tag2, tag3))
     if res:
         if virtual:
             response = list(res)
             index_ids = map(str, zip(*res)[0])
             query = """SELECT DISTINCT v.id_virtual,w.name FROM idxINDEX_idxINDEX AS v,
                                                                 idxINDEX as w
                        WHERE v.id_virtual=w.id AND
                              v.id_normal IN ("""
             query = query + ", ".join(index_ids) + ")"
             response.extend(run_sql(query))
             return tuple(response)
         return res
     return None
 
 
 def get_index_tags(indexname, virtual=True):
     """Returns the list of tags that are indexed inside INDEXNAME.
        Returns empty list in case there are no tags indexed in this index.
        Note: uses get_field_tags() defined before.
        Example: field='author', output=['100__%', '700__%']."""
     out = []
     query = """SELECT f.code FROM idxINDEX AS w, idxINDEX_field AS wf,
     field AS f WHERE w.name=%s AND w.id=wf.id_idxINDEX
     AND f.id=wf.id_field"""
     res = run_sql(query, (indexname,))
     for row in res:
         out.extend(get_field_tags(row[0]))
     if not out and virtual:
         index_id = get_index_id_from_index_name(indexname)
         try:
             dependent_indexes = map(str, zip(*get_virtual_index_building_blocks(index_id))[0])
         except IndexError:
             return out
         tags = set()
         query = """SELECT DISTINCT f.code FROM idxINDEX AS w, idxINDEX_field AS wf, field AS f
                    WHERE w.id=wf.id_idxINDEX AND
                          f.id=wf.id_field AND
                          w.id IN ("""
         query = query + ", ".join(dependent_indexes) + ")"
         res = run_sql(query)
         for row in res:
             tags |= set(get_field_tags(row[0]))
         return list(tags)
     return out
 
 
+def get_min_last_updated(indexes):
+    """Returns min modification date for 'indexes':
+       min(last_updated)
+       @param indexes: list of indexes
+    """
+    query= """SELECT min(last_updated) FROM idxINDEX WHERE name IN ("""
+    for index in indexes:
+        query += "%s,"
+    query = query[:-1] + ")"
+    res = run_sql(query, tuple(indexes))
+    return res
+
+
+def remove_inexistent_indexes(indexes, leave_virtual=False):
+    """Removes indexes that don't exist from the given list of indexes.
+       @param indexes: list of indexes
+       @param leave_virtual: should we leave virtual indexes in the list?
+    """
+    correct_indexes = get_all_indexes(leave_virtual)
+    cleaned = []
+    for index in indexes:
+        if index in correct_indexes:
+            cleaned.append(index)
+    return cleaned
diff --git a/modules/bibindex/lib/bibindex_regression_tests.py b/modules/bibindex/lib/bibindex_regression_tests.py
index a2c99549f..f1a4072c2 100644
--- a/modules/bibindex/lib/bibindex_regression_tests.py
+++ b/modules/bibindex/lib/bibindex_regression_tests.py
@@ -1,1418 +1,1450 @@
 # -*- coding: utf-8 -*-
 ##
 ## This file is part of Invenio.
 ## Copyright (C) 2006, 2007, 2008, 2010, 2011, 2013 CERN.
 ##
 ## Invenio is free software; you can redistribute it and/or
 ## modify it under the terms of the GNU General Public License as
 ## published by the Free Software Foundation; either version 2 of the
 ## License, or (at your option) any later version.
 ##
 ## Invenio is distributed in the hope that it will be useful, but
 ## WITHOUT ANY WARRANTY; without even the implied warranty of
 ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 ## General Public License for more details.
 ##
 ## You should have received a copy of the GNU General Public License
 ## along with Invenio; if not, write to the Free Software Foundation, Inc.,
 ## 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA.
 """BibIndex Regression Test Suite."""
 
 __revision__ = "$Id$"
 
 import unittest
 import os
 import re
 from datetime import timedelta
 
 from invenio.bibindex_engine import WordTable, \
     get_word_tables, \
     find_affected_records_for_index, \
     get_recIDs_by_date_authority, \
     get_recIDs_by_date_bibliographic, \
     create_range_list, \
     beautify_range_list, \
     get_last_updated_all_indexes
 from invenio.bibindex_engine_utils import get_index_id_from_index_name, \
     get_index_tags, \
     get_tag_indexes, \
     get_all_indexes
 from invenio.bibindex_engine_config import CFG_BIBINDEX_ADDING_RECORDS_STARTED_STR, \
     CFG_BIBINDEX_INDEX_TABLE_TYPE, \
     CFG_BIBINDEX_UPDATE_MESSAGE
 from invenio.bibtask import task_low_level_submission
 from invenio.config import CFG_BINDIR, CFG_LOGDIR
 from invenio.testutils import make_test_suite, run_test_suite, nottest
 from invenio.dbquery import run_sql, deserialize_via_marshal
 from invenio.intbitset import intbitset
 from invenio.search_engine import get_record
 from invenio.search_engine_utils import get_fieldvalues
 from invenio.bibauthority_engine import get_index_strings_by_control_no, get_control_nos_from_recID
 from invenio.bibindex_engine_utils import run_sql_drop_silently
 
 from invenio.bibupload import bibupload, \
     xml_marc_to_records
 from invenio.bibupload_regression_tests import wipe_out_record_from_all_tables
 from invenio.bibrecord import record_get_field_value, \
     record_xml_output
 from invenio.bibsort_engine import get_max_recid
 
 
-def reindex_for_type_with_bibsched(index_name, force_all=False):
+def reindex_for_type_with_bibsched(index_name, force_all=False, *other_options):
     """Runs bibindex for the specified index and returns the task_id.
        @param index_name: name of the index to reindex
        @param force_all: if it's True function will reindex all records
        not just affected ones
     """
     program = os.path.join(CFG_BINDIR, 'bibindex')
     args = ['bibindex', 'bibindex_regression_tests', '-w', index_name, '-u', 'admin']
+    args.extend(other_options)
     if force_all:
         args.append("--force")
     task_id = task_low_level_submission(*args)
     COMMAND = "%s %s > /dev/null 2> /dev/null" % (program, str(task_id))
     os.system(COMMAND)
     return task_id
 
 
 def prepare_for_index_update(index_id, parameters={}):
     """ Prepares SQL query for an update of an index in the idxINDEX table.
         Takes into account remove_stopwords, remove_html_markup, remove_latex_markup,
         tokenizer and last_updated as parameters to change.
         remove_html_markup and remove_latex_markup accepts these values:
                                         '' to leave it unchanged
                                         'Yes' to change it to 'Yes'
                                         'No' to change it to 'No'.
         For remove_stopwords instead of 'Yes' one must give the name of the file (for example: 'stopwords.kb')
         from CFG_ETCDIR/bibrank/ directory pointing at stopwords knowledge base.
         For tokenizer please specify the name of the tokenizer.
         For last_updated provide a date in format: '2013-01-31 00:00:00'
         @param index_id: id of the index to change
         @param parameters: dict with names of parameters and their new values
     """
     if len(parameters) == 0:
         return ''
 
     parameter_set = False
     query_update = "UPDATE idxINDEX SET "
     for key in parameters:
         if parameters[key]:
             query_update += parameter_set and ", " or ""
             query_update += "%s='%s'" % (key, parameters[key])
             parameter_set = True
     query_update += " WHERE id=%s" % index_id
     return query_update
 
 
 @nottest
 def reindex_word_tables_into_testtables(index_name, recids = None, prefix = 'test', parameters={}, turn_off_virtual_indexes=True):
     """Function for setting up a test enviroment. Reindexes an index with a given name to a
        new temporary table with a given prefix. During the reindexing it changes some parameters
        of chosen index. It's useful for conducting tests concerning the reindexing process.
        Reindexes only idxWORDxxx tables.
        @param index_name: name of the index we want to reindex
        @param recids: None means reindexing all records, set ids of the records to update only part of them
        @param prefix: prefix for the new tabels, if it's set to boolean False function will reindex to original table
        @param parameters: dict with parameters and their new values; for more specific
        description take a look at  'prepare_for_index_update' function.
        @param turn_off_virtual_indexes: if True only specific index will be reindexed
        without connected virtual indexes
     """
     index_id = get_index_id_from_index_name(index_name)
     query_update = prepare_for_index_update(index_id, parameters)
     last_updated = run_sql("""SELECT last_updated FROM idxINDEX WHERE id=%s""" % index_id)[0][0]
 
     test_tablename = "%s_idxWORD%02d" % (prefix, index_id)
     query_drop_forward_index_table = """DROP TABLE IF EXISTS %sF""" % test_tablename
     query_drop_reversed_index_table = """DROP TABLE IF EXISTS %sR""" % test_tablename
 
     query_create_forward_index_table = """CREATE TABLE %sF (
                                           id mediumint(9) unsigned NOT NULL auto_increment,
                                           term varchar(50) default NULL,
                                           hitlist longblob,
                                           PRIMARY KEY  (id),
                                           UNIQUE KEY term (term)
                                           ) ENGINE=MyISAM""" % test_tablename
     query_create_reversed_index_table = """CREATE TABLE %sR (
                                            id_bibrec mediumint(9) unsigned NOT NULL,
                                            termlist longblob,
                                            type enum('CURRENT','FUTURE','TEMPORARY') NOT NULL default 'CURRENT',
                                            PRIMARY KEY (id_bibrec,type)
                                            ) ENGINE=MyISAM""" % test_tablename
 
     run_sql_drop_silently(query_drop_forward_index_table)
     run_sql_drop_silently(query_drop_reversed_index_table)
     run_sql(query_create_forward_index_table)
     run_sql(query_create_reversed_index_table)
     if query_update:
         run_sql(query_update)
 
     pattern = 'idxWORD'
     if prefix:
         pattern = '%s_idxWORD' % prefix
     wordTable = WordTable(index_name=index_name,
                           index_id=index_id,
                           fields_to_index=get_index_tags(index_name),
                           table_name_pattern= pattern + '%02dF',
                           wordtable_type = CFG_BIBINDEX_INDEX_TABLE_TYPE["Words"],
                           tag_to_tokenizer_map={'8564_u': "BibIndexEmptyTokenizer"},
                           wash_index_terms=50)
     if turn_off_virtual_indexes:
         wordTable.turn_off_virtual_indexes()
     if recids:
         wordTable.add_recIDs(recids, 10000)
     else:
-        recIDs_for_index = find_affected_records_for_index(index_name,
-                                               [[1, get_max_recid()]],
-                                                                 True)
+        recIDs_for_index = find_affected_records_for_index([index_name],
+                                                 [[1, get_max_recid()]],
+                                                                   True)
         bib_recIDs = get_recIDs_by_date_bibliographic([], index_name)
         auth_recIDs = get_recIDs_by_date_authority([], index_name)
         final_recIDs = bib_recIDs | auth_recIDs
         final_recIDs = set(final_recIDs) & set(recIDs_for_index[index_name])
         final_recIDs = beautify_range_list(create_range_list(list(final_recIDs)))
         wordTable.add_recIDs(final_recIDs, 10000)
     return last_updated
 
 
 @nottest
 def remove_reindexed_word_testtables(index_name, prefix = 'test'):
     """
         Removes prefix_idxWORDxxx tables created during tests.
         @param index_name: name of the index
         @param prefix: prefix for the tables
     """
     index_id = get_index_id_from_index_name(index_name)
     test_tablename = "%s_idxWORD%02d" % (prefix, index_id)
     query_drop_forward_index_table = """DROP TABLE IF EXISTS %sF""" % test_tablename
     query_drop_reversed_index_table = """DROP TABLE IF EXISTS %sR""" % test_tablename
     run_sql(query_drop_forward_index_table)
     run_sql(query_drop_reversed_index_table)
 
 
 class BibIndexRemoveStopwordsTest(unittest.TestCase):
     """Tests remove_stopwords parameter of an index. Changes it in the database
        and reindexes from scratch into a new table to see the diffrence which is brought
        by change. Uses 'title' index.
     """
 
     test_counter = 0
     reindexed = False
 
     @classmethod
     def setUp(self):
         """reindexation to new table"""
         if not self.reindexed:
             self.last_updated = reindex_word_tables_into_testtables(
                 'title',
                 parameters = {'remove_stopwords':'stopwords.kb',
                               'last_updated':'0000-00-00 00:00:00'})
             self.reindexed = True
 
     @classmethod
     def tearDown(self):
         """cleaning up"""
         self.test_counter += 1
         if self.test_counter == 4:
             remove_reindexed_word_testtables('title')
             reverse_changes = prepare_for_index_update(
                 get_index_id_from_index_name('title'),
                 parameters = {'remove_stopwords':'No',
                               'last_updated':self.last_updated})
             run_sql(reverse_changes)
 
     def test_check_occurrences_of_stopwords_in_testable_word_of(self):
         """Tests if term 'of' is in the new reindexed table"""
 
         query = "SELECT hitlist FROM test_idxWORD08F WHERE term='of'"
         res = run_sql(query)
         self.assertEqual(0, len(res))
 
     def test_check_occurrences_of_stopwords_in_testable_word_everything(self):
         """Tests if term 'everything' is in the new reindexed table"""
 
         query = "SELECT hitlist FROM test_idxWORD08F WHERE term='everything'"
         res = run_sql(query)
         self.assertEqual(0, len(res))
 
     def test_compare_non_stopwords_occurrences_in_original_and_test_tables_word_theory(self):
         """Checks if stopwords removing has no influence on indexation of word 'theory' """
 
         word = "theori" #theori not theory, because of default stemming for title index
         query = "SELECT hitlist FROM test_idxWORD08F WHERE term='%s'" % word
         iset_removed = "iset_removed"
         iset_original = "iset_original"
         res = run_sql(query)
         if res:
             iset_removed = intbitset(res[0][0])
         query = "SELECT hitlist FROM idxWORD08F WHERE term='%s'" % word
         res = run_sql(query)
         if res:
             iset_original = intbitset(res[0][0])
         self.assertEqual(len(iset_removed), len(iset_original))
 
     def test_compare_non_stopwords_occurrences_in_original_and_test_tables_word_on(self):
         """Checks if stopwords removing has no influence on indexation of word 'o(n)' """
 
         word = "o(n)"
         query = "SELECT hitlist FROM test_idxWORD08F WHERE term='%s'" % word
         iset_removed = "iset_removed"
         iset_original = "iset_original"
         res = run_sql(query)
         if res:
             iset_removed = intbitset(res[0][0])
         query = "SELECT hitlist FROM idxWORD08F WHERE term='%s'" % word
         res = run_sql(query)
         if res:
             iset_original = intbitset(res[0][0])
         self.assertEqual(len(iset_removed), len(iset_original))
 
 
 class BibIndexRemoveLatexTest(unittest.TestCase):
     """Tests remove_latex_markup parameter of an index. Changes it in the database
        and reindexes from scratch into a new table to see the diffrence which is brought
        by change. Uses 'abstract' index.
     """
 
     test_counter = 0
     reindexed = False
 
     @classmethod
     def setUp(self):
         """reindexation to new table"""
         if not self.reindexed:
             self.last_updated = reindex_word_tables_into_testtables(
                 'abstract',
                 parameters = {'remove_latex_markup':'Yes',
                               'last_updated':'0000-00-00 00:00:00'})
             self.reindexed = True
 
     @classmethod
     def tearDown(self):
         """cleaning up"""
         self.test_counter += 1
         if self.test_counter == 4:
             remove_reindexed_word_testtables('abstract')
             reverse_changes = prepare_for_index_update(
                 get_index_id_from_index_name('abstract'),
                 parameters = {'remove_latex_markup':'No',
                               'last_updated':self.last_updated})
             run_sql(reverse_changes)
 
 
     def test_check_occurrences_after_latex_removal_word_u1(self):
         """Tests how many times experssion 'u(1)' occures"""
 
         word = "u(1)"
         query = "SELECT hitlist FROM test_idxWORD%02dF WHERE term='%s'" % (get_index_id_from_index_name('abstract'), word)
         res = run_sql(query)
         iset = "iset_change"
         if res:
             iset = intbitset(res[0][0])
         self.assertEqual(3, len(iset))
 
     def test_check_exact_occurrences_after_latex_removal_word_theta(self):
         """Tests where experssion 'theta' occures"""
 
         word = "theta"
         query = "SELECT hitlist FROM test_idxWORD%02dF WHERE term='%s'" % (get_index_id_from_index_name('abstract'), word)
         res = run_sql(query)
         ilist = []
         if res:
             iset = intbitset(res[0][0])
             ilist = iset.tolist()
         self.assertEqual([12], ilist)
 
     def test_compare_occurrences_after_and_before_latex_removal_math_expression(self):
         """Checks if latex removal has no influence on indexation of expression 's(u(n_1)*u(n_2))' """
 
         word = 's(u(n_1)*u(n_2))'
         query = "SELECT hitlist FROM test_idxWORD%02dF WHERE term='%s'" % (get_index_id_from_index_name('abstract'), word)
         res = run_sql(query)
         ilist_test = []
         if res:
             iset = intbitset(res[0][0])
             ilist_test = iset.tolist()
         word = 's(u(n_1)*u(n_2))'
         query = "SELECT hitlist FROM idxWORD%02dF WHERE term='%s'" % (get_index_id_from_index_name('abstract'), word)
         res = run_sql(query)
         ilist = ["default_not_equal"]
         if res:
             iset = intbitset(res[0][0])
             ilist = iset.tolist()
         self.assertEqual(ilist, ilist_test)
 
     def test_check_occurrences_latex_expression_with_u1(self):
         """Tests influence of latex removal on record 80"""
 
         word = '%over u(1)%'
         query = "SELECT hitlist FROM test_idxWORD%02dF WHERE term LIKE '%s'" % (get_index_id_from_index_name('abstract'), word)
         res = run_sql(query)
         ilist = []
         if res:
             iset = intbitset(res[0][0])
             ilist = iset.tolist()
         self.assertEqual([80], ilist)
 
 
 class BibIndexRemoveHtmlTest(unittest.TestCase):
     """Tests remove_html_markup parameter of an index. Changes it in the database
        and reindexes from scratch into a new table to see the diffrence which is brought
        by change. Uses 'abstract' index.
     """
 
     test_counter = 0
     reindexed = False
 
     @classmethod
     def setUp(self):
         """reindexation to new table"""
         if not self.reindexed:
             self.last_updated = reindex_word_tables_into_testtables(
                 'abstract',
                 parameters = {'remove_html_markup':'Yes',
                               'last_updated':'0000-00-00 00:00:00'})
             self.reindexed = True
 
     @classmethod
     def tearDown(self):
         """cleaning up"""
         self.test_counter += 1
         if self.test_counter == 2:
             remove_reindexed_word_testtables('abstract')
             reverse_changes = prepare_for_index_update(
                 get_index_id_from_index_name('abstract'),
                 parameters = {'remove_html_markup':'No',
                               'last_updated':self.last_updated})
             run_sql(reverse_changes)
 
 
     def test_check_occurrences_after_html_removal_tag_p(self):
         """Tests if expression 'water-hog</p>' is not indexed after html markup removal"""
 
         word = 'water-hog</p>'
         query = "SELECT hitlist FROM test_idxWORD%02dF WHERE term='%s'" % (get_index_id_from_index_name('abstract'), word)
         res = run_sql(query)
         ilist = []
         if res:
             iset = intbitset(res[0][0])
             ilist = iset.tolist()
         self.assertEqual(0, len(ilist))
 
 
     def test_check_occurrences_after_and_before_html_removal_word_style(self):
         """Tests html markup removal influence on expression 'style="width' """
 
         word = 'style="width'
         query = "SELECT hitlist FROM test_idxWORD%02dF WHERE term='%s'" % (get_index_id_from_index_name('abstract'), word)
         res = run_sql(query)
         ilist_test = []
         if res:
             iset = intbitset(res[0][0])
             ilist_test = iset.tolist()
         query = "SELECT hitlist FROM idxWORD%02dF WHERE term='%s'" % (get_index_id_from_index_name('abstract'), word)
         res = run_sql(query)
         ilist = []
         if res:
             iset = intbitset(res[0][0])
             ilist = iset.tolist()
         self.assertNotEqual(ilist, ilist_test)
 
 
 class BibIndexYearIndexTest(unittest.TestCase):
     """
         Checks year index. Tests are diffrent than those inside WebSearch module because
         they only test content and reindexation and not the search itself.
     """
 
     test_counter = 0
     reindexed = False
 
     @classmethod
     def setUp(self):
         """reindexation to new table"""
         if not self.reindexed:
             self.last_updated = reindex_word_tables_into_testtables(
                 'year',
                 parameters = {'last_updated':'0000-00-00 00:00:00'})
             self.reindexed = True
 
 
     @classmethod
     def tearDown(self):
         """cleaning up"""
         self.test_counter += 1
         if self.test_counter == 3:
             remove_reindexed_word_testtables('year')
             reverse_changes = prepare_for_index_update(
                 get_index_id_from_index_name('year'),
                 parameters = {'last_updated':self.last_updated})
             run_sql(reverse_changes)
 
 
     def test_occurrences_in_year_index_1973(self):
         """checks content of year index for year 1973"""
         word = '1973'
         query = "SELECT hitlist FROM test_idxWORD%02dF WHERE term='%s'" % (get_index_id_from_index_name('year'), word)
         res = run_sql(query)
         ilist = []
         if res:
             iset = intbitset(res[0][0])
             ilist = iset.tolist()
         self.assertEqual([34], ilist)
 
 
     def test_occurrences_in_year_index_2001(self):
         """checks content of year index for year 2001"""
         word = '2001'
         query = "SELECT hitlist FROM test_idxWORD%02dF WHERE term='%s'" % (get_index_id_from_index_name('year'), word)
         res = run_sql(query)
         ilist = []
         if res:
             iset = intbitset(res[0][0])
             ilist = iset.tolist()
         self.assertEqual([2, 11, 12, 15], ilist)
 
 
     def test_comparison_for_number_of_items(self):
         """checks the reindexation of year index"""
         query_test = "SELECT count(*) FROM test_idxWORD%02dF" % get_index_id_from_index_name('year')
         query_orig = "SELECT count(*) FROM idxWORD%02dF" % get_index_id_from_index_name('year')
         num_orig = 0
         num_test = 1
         res = run_sql(query_test)
         if res:
             num_test = res[0][0]
         res = run_sql(query_orig)
         if res:
             num_orig = res[0][0]
         self.assertEqual(num_orig, num_test)
 
 
 
 class BibIndexAuthorCountIndexTest(unittest.TestCase):
     """
        Checks author count index. Tests are diffrent than those inside WebSearch module because
        they only test content and reindexation and not the search itself.
     """
 
     test_counter = 0
     reindexed = False
 
     @classmethod
     def setUp(self):
         """reindexation to new table"""
         if not self.reindexed:
             self.last_updated = reindex_word_tables_into_testtables(
                 'authorcount',
                 parameters = {'last_updated':'0000-00-00 00:00:00'})
             self.reindexed = True
 
     @classmethod
     def tearDown(self):
         """cleaning up"""
         self.test_counter += 1
         if self.test_counter == 2:
             remove_reindexed_word_testtables('authorcount')
             reverse_changes = prepare_for_index_update(
                 get_index_id_from_index_name('authorcount'),
                 parameters = {'last_updated':self.last_updated})
             run_sql(reverse_changes)
 
 
     def test_occurrences_in_authorcount_index(self):
         """checks content of authorcount index for papers with 4 authors"""
         word = '4'
         query = "SELECT hitlist FROM test_idxWORD%02dF WHERE term='%s'" % (get_index_id_from_index_name('authorcount'), word)
         res = run_sql(query)
         ilist = []
         if res:
             iset = intbitset(res[0][0])
             ilist = iset.tolist()
         self.assertEqual([51, 54, 59, 66, 92, 96], ilist)
 
 
     def test_comparison_for_number_of_items(self):
         """checks the reindexation of authorcount index"""
         query_test = "SELECT count(*) FROM test_idxWORD%02dF" % get_index_id_from_index_name('authorcount')
         query_orig = "SELECT count(*) FROM idxWORD%02dF" % get_index_id_from_index_name('authorcount')
         num_orig = 0
         num_test = 1
         res = run_sql(query_test)
         if res:
             num_test = res[0][0]
         res = run_sql(query_orig)
         if res:
             num_orig = res[0][0]
         self.assertEqual(num_orig, num_test)
 
 
 class BibIndexItemCountIndexTest(unittest.TestCase):
     """
        Checks item count index. Checks a number of copies of books for records
        as well as occurrences of particular number of copies in test data.
     """
 
     def test_occurrences_in_itemcount_index_two_copies(self):
         """checks content of itemcount index for records with two copies of a book"""
         word = '2'
         query = "SELECT hitlist FROM idxWORD%02dF WHERE term='%s'" % (get_index_id_from_index_name('itemcount'), word)
         res = run_sql(query)
         ilist = []
         if res:
             iset = intbitset(res[0][0])
             ilist = iset.tolist()
         self.assertEqual([31, 34], ilist)
 
     def test_records_for_number_of_copies_record1(self):
         """checks content of itemcount index for record: 1"""
         query = "SELECT termlist FROM idxWORD%02dR WHERE id_bibrec=1" \
                  % get_index_id_from_index_name('itemcount')
         res = run_sql(query)
         self.assertEqual(deserialize_via_marshal(res[0][0]),['0'])
 
     def test_records_for_number_of_copies_record30(self):
         """checks content of itemcount index for record: 30"""
         query = "SELECT termlist FROM idxWORD%02dR WHERE id_bibrec=30" \
                  % get_index_id_from_index_name('itemcount')
         res = run_sql(query)
         self.assertEqual(deserialize_via_marshal(res[0][0]),['1'])
 
     def test_records_for_number_of_copies_record32(self):
         """checks content of itemcount index for record: 32"""
         query = "SELECT termlist FROM idxWORD%02dR WHERE id_bibrec=32" \
                  % get_index_id_from_index_name('itemcount')
         res = run_sql(query)
         self.assertEqual(deserialize_via_marshal(res[0][0]),['3'])
 
 
 class BibIndexFiletypeIndexTest(unittest.TestCase):
     """
        Checks filetype index. Tests are diffrent than those inside WebSearch module because
        they only test content and indexation and not the search itself.
     """
 
     def test_occurances_of_tif_filetype(self):
         """tests which records has file with 'tif' extension"""
         query = "SELECT hitlist FROM idxWORD%02dF where term='tif'" \
                 % get_index_id_from_index_name('filetype')
         res = run_sql(query)
         value = []
         if res:
             iset = intbitset(res[0][0])
             value = iset.tolist()
         self.assertEqual(sorted(value), [66, 71])
 
     def test_filetypes_of_records(self):
         """tests files extensions of record 1 and 77"""
         query1 = "SELECT termlist FROM idxWORD%02dR WHERE id_bibrec=1" \
                  % get_index_id_from_index_name('filetype')
         query2 = "SELECT termlist FROM idxWORD%02dR WHERE id_bibrec=77" \
                  % get_index_id_from_index_name('filetype')
         res1 = run_sql(query1)
         res2 = run_sql(query2)
         set1 = deserialize_via_marshal(res1[0][0])
         set2 = deserialize_via_marshal(res2[0][0])
         self.assertEqual(set1, ['gif', 'jpg'])
         self.assertEqual(set2, ['pdf', 'ps.gz'])
 
 
 class BibIndexJournalIndexTest(unittest.TestCase):
     """
         Checks journal index. Tests are diffrent than those inside WebSearch module because
         they only test content and reindexation and not the search itself.
     """
     test_counter = 0
     reindexed = False
 
     @classmethod
     def setUp(self):
         """reindexation to new table"""
         if not self.reindexed:
             self.last_updated = reindex_word_tables_into_testtables(
                 'journal',
                 parameters = {'last_updated':'0000-00-00 00:00:00'})
             self.reindexed = True
 
     @classmethod
     def tearDown(self):
         """cleaning up"""
         self.test_counter += 1
         if self.test_counter == 2:
             remove_reindexed_word_testtables('journal')
             reverse_changes = prepare_for_index_update(
                 get_index_id_from_index_name('journal'),
                 parameters = {'last_updated':self.last_updated})
             run_sql(reverse_changes)
 
 
     def test_occurrences_in_journal_index(self):
         """checks content of journal index for phrase: 'prog. theor. phys.' """
         word = 'prog. theor. phys.'
         query = "SELECT hitlist FROM test_idxWORD%02dF WHERE term='%s'" % (get_index_id_from_index_name('journal'), word)
         res = run_sql(query)
         ilist = []
         if res:
             iset = intbitset(res[0][0])
             ilist = iset.tolist()
         self.assertEqual([86], ilist)
 
 
     def test_comparison_for_number_of_items(self):
         """checks the reindexation of journal index"""
         query_test = "SELECT count(*) FROM test_idxWORD%02dF" % get_index_id_from_index_name('journal')
         query_orig = "SELECT count(*) FROM idxWORD%02dF" % get_index_id_from_index_name('journal')
         num_orig = 0
         num_test = 1
         res = run_sql(query_test)
         if res:
             num_test = res[0][0]
         res = run_sql(query_orig)
         if res:
             num_orig = res[0][0]
         self.assertEqual(num_orig, num_test)
 
 
 class BibIndexCJKTokenizerTitleIndexTest(unittest.TestCase):
     """
        Checks CJK tokenization on title index.
     """
     test_counter = 0
     reindexed = False
 
     @classmethod
     def setUp(self):
         """reindexation to new table"""
         if not self.reindexed:
             self.last_updated = reindex_word_tables_into_testtables(
                 'title',
                 parameters = {'tokenizer':'BibIndexCJKTokenizer',
                               'last_updated':'0000-00-00 00:00:00'})
             self.reindexed = True
 
     @classmethod
     def tearDown(self):
         """cleaning up"""
         self.test_counter += 1
         if self.test_counter == 2:
             remove_reindexed_word_testtables('title')
             reverse_changes = prepare_for_index_update(
                 get_index_id_from_index_name('title'),
                 parameters = {'tokenizer':'BibIndexDefaultTokenizer',
                               'last_updated':self.last_updated})
             run_sql(reverse_changes)
 
 
     def test_splliting_and_indexing_CJK_characters_forward_table(self):
         """CJK Tokenizer - searching for a CJK term in title index, forward table"""
         query = "SELECT * from test_idxWORD%02dF where term='\xe6\x95\xac'" % get_index_id_from_index_name('title')
         res = run_sql(query)
         iset = []
         if res:
             iset = intbitset(res[0][2])
             iset = iset.tolist()
         self.assertEqual(iset, [104])
 
     def test_splliting_and_indexing_CJK_characters_reversed_table(self):
         """CJK Tokenizer - comparing terms for record with chinese poetry in title index, reverse table"""
         query = "SELECT * from test_idxWORD%02dR where id_bibrec='104'" % get_index_id_from_index_name('title')
         res = run_sql(query)
         iset = []
         if res:
             iset = deserialize_via_marshal(res[0][1])
         self.assertEqual(iset, ['\xe6\x95\xac', '\xe7\x8d\xa8', '\xe4\xba\xad', '\xe5\x9d\x90'])
 
 
 class BibIndexAuthorityRecordTest(unittest.TestCase):
     """Test if BibIndex correctly knows when to update the index for a
     bibliographic record if it is dependent upon an authority record changed
     within the given date range"""
 
     def test_authority_record_recently_updated(self):
         """bibindex - reindexing after recently changed authority record"""
 
         authRecID = 118
         bibRecID = 9
         index_name = 'author'
         table = "idxWORD%02dF" % get_index_id_from_index_name(index_name)
         reindex_for_type_with_bibsched(index_name)
         run_sql("UPDATE bibrec SET modification_date = now() WHERE id = %s", (authRecID,))
         # run bibindex again
         task_id = reindex_for_type_with_bibsched(index_name, force_all=True)
 
         filename = os.path.join(CFG_LOGDIR, 'bibsched_task_' + str(task_id) + '.log')
         _file = open(filename)
         text = _file.read() # small file
         _file.close()
         self.assertTrue(text.find(CFG_BIBINDEX_UPDATE_MESSAGE) >= 0)
         self.assertTrue(text.find(CFG_BIBINDEX_ADDING_RECORDS_STARTED_STR % (table, 1, get_max_recid())) >= 0)
 
     def test_authority_record_enriched_index(self):
         """bibindex - test whether reverse index for bibliographic record
         contains words from referenced authority records"""
         bibRecID = 9
         authority_string = 'jonathan'
         index_name = 'author'
         table = "idxWORD%02dR" % get_index_id_from_index_name(index_name)
 
         reindex_for_type_with_bibsched(index_name, force_all=True)
         self.assertTrue(
             authority_string in deserialize_via_marshal(
                 run_sql("SELECT termlist FROM %s WHERE id_bibrec = %s" % (table, bibRecID))[0][0]
             )
         )
 
     def test_indexing_of_deleted_authority_record(self):
         """bibindex - no info for indexing from deleted authority record"""
         recID = 119 # deleted record
         control_nos = get_control_nos_from_recID(recID)
         info = get_index_strings_by_control_no(control_nos[0])
         self.assertEqual([], info)
 
     def test_authority_record_get_values_by_bibrecID_from_tag(self):
         """bibindex - find authors in authority records for given bibrecID"""
         tags = ['100__a']
         bibRecID = 9
         values = []
         for tag in tags:
             authority_tag = tag[0:3] + "__0"
             control_nos = get_fieldvalues(bibRecID, authority_tag)
             for control_no in control_nos:
                 new_strings = get_index_strings_by_control_no(control_no)
                 values.extend(new_strings)
         self.assertTrue('Ellis, Jonathan Richard' in values)
 
 
 def insert_record_one_and_second_revision():
     """Inserts test record no. 1 and a second revision for that record"""
 
     rev1 = """<record>
               <controlfield tag="001">123456789</controlfield>
               <controlfield tag="005">20110101000000.0</controlfield>
               <datafield tag ="100" ind1=" " ind2=" ">
                 <subfield code="a">Close, John</subfield>
                 <subfield code="u">DESY</subfield>
               </datafield>
               <datafield tag="245" ind1=" " ind2=" ">
                 <subfield code="a">Particles world</subfield>
               </datafield>
             </record>"""
     rev1_final = rev1.replace('<controlfield tag="001">123456789</controlfield>','')
     rev1_final = rev1_final.replace('<controlfield tag="005">20110101000000.0</controlfield>','')
 
     rev2 = rev1.replace('<subfield code="a">Close, John</subfield>', '<subfield code="a">Dawkins, Richard</subfield>')
     rev2 = rev2.replace('Particles world', 'Particles universe')
 
     rec1 = xml_marc_to_records(rev1_final)
     res = bibupload(rec1[0], opt_mode='insert')
     _id = res[1]
     rec = get_record(_id)
     _rev = record_get_field_value(rec, '005', '', '')
 
     #need to index for the first time
     indexes = get_all_indexes(virtual=False)
     wtabs = get_word_tables(indexes)
     for index_id, index_name, index_tags in wtabs:
         wordTable = WordTable(index_name=index_name,
                               index_id=index_id,
                               fields_to_index=index_tags,
                               table_name_pattern='idxWORD%02dF',
                               wordtable_type = CFG_BIBINDEX_INDEX_TABLE_TYPE["Words"],
                               tag_to_tokenizer_map={'8564_u': "BibIndexEmptyTokenizer"},
                               wash_index_terms=50)
         wordTable.add_recIDs([[_id, _id]], 10000)
 
     #upload the second revision, but don't index
     rev2_final = rev2.replace('123456789', str(_id))
     rev2_final = rev2_final.replace('20110101000000.0', _rev)
     rec2 = xml_marc_to_records(rev2_final)
     res = bibupload(rec2[0], opt_mode='correct')
 
     return _id
 
 
 def insert_record_two_and_second_revision():
     """Inserts test record no. 2 and a revision for that record"""
 
     rev1 = """<record>
               <controlfield tag="001">123456789</controlfield>
               <controlfield tag="005">20110101000000.0</controlfield>
               <datafield tag ="100" ind1=" " ind2=" ">
                 <subfield code="a">Locke, John</subfield>
                 <subfield code="u">UNITRA</subfield>
               </datafield>
               <datafield tag="245" ind1=" " ind2=" ">
                 <subfield code="a">Collision course</subfield>
               </datafield>
             </record>"""
     rev1_final = rev1.replace('<controlfield tag="001">123456789</controlfield>','')
     rev1_final = rev1_final.replace('<controlfield tag="005">20110101000000.0</controlfield>','')
 
     rev2 = rev1.replace('Collision course', 'Course of collision')
 
     rec1 = xml_marc_to_records(rev1_final)
     res = bibupload(rec1[0], opt_mode='insert')
     id_bibrec = res[1]
     rec = get_record(id_bibrec)
     _rev = record_get_field_value(rec, '005', '', '')
 
     #need to index for the first time
     indexes = get_all_indexes(virtual=False)
     wtabs = get_word_tables(indexes)
     for index_id, index_name, index_tags in wtabs:
         wordTable = WordTable(index_name=index_name,
                               index_id=index_id,
                               fields_to_index=index_tags,
                               table_name_pattern='idxWORD%02dF',
                               wordtable_type = CFG_BIBINDEX_INDEX_TABLE_TYPE["Words"],
                               tag_to_tokenizer_map={'8564_u': "BibIndexEmptyTokenizer"},
                               wash_index_terms=50)
         wordTable.add_recIDs([[id_bibrec, id_bibrec]], 10000)
 
     #upload the second revision, but don't index
     rev2_final = rev2.replace('123456789', str(id_bibrec))
     rev2_final = rev2_final.replace('20110101000000.0', _rev)
     rec2 = xml_marc_to_records(rev2_final)
     res = bibupload(rec2[0], opt_mode='correct')
 
     return id_bibrec
 
 
 def create_index_tables(index_id):
     query_create = """CREATE TABLE IF NOT EXISTS idxWORD%02dF (
                       id mediumint(9) unsigned NOT NULL auto_increment,
                       term varchar(50) default NULL,
                       hitlist longblob,
                       PRIMARY KEY  (id),
                       UNIQUE KEY term (term)
                     ) ENGINE=MyISAM"""
 
     query_create_r = """CREATE TABLE IF NOT EXISTS idxWORD%02dR (
                         id_bibrec mediumint(9) unsigned NOT NULL,
                         termlist longblob,
                         type enum('CURRENT','FUTURE','TEMPORARY') NOT NULL default 'CURRENT',
                         PRIMARY KEY (id_bibrec,type)
                       ) ENGINE=MyISAM"""
     run_sql(query_create % index_id)
     run_sql(query_create_r % index_id)
 
 
 def drop_index_tables(index_id):
     query_drop = """DROP TABLE IF EXISTS idxWORD%02d%s"""
     run_sql(query_drop % (index_id, "F"))
     run_sql(query_drop % (index_id, "R"))
 
 
 def create_virtual_index(index_id, dependent_indexes):
     """creates new virtual index and binds it to specific dependent indexes"""
     query = """INSERT INTO idxINDEX (id, name, tokenizer) VALUES (%s, 'testindex', 'BibIndexDefaultTokenizer')"""
     run_sql(query % index_id)
     query = """INSERT INTO idxINDEX_idxINDEX VALUES (%s, %s)"""
     for index in dependent_indexes:
         run_sql(query % (index_id, get_index_id_from_index_name(index)))
     create_index_tables(index_id)
 
 
 def remove_virtual_index(index_id):
     """removes tables and other traces after virtual index"""
     drop_index_tables(index_id)
     query = """DELETE FROM idxINDEX WHERE id=%s""" % index_id
     run_sql(query)
     query = """DELETE FROM idxINDEX_idxINDEX WHERE id_virtual=%s"""
     run_sql(query % index_id)
 
 
 class BibIndexFindingAffectedIndexes(unittest.TestCase):
     """
     Checks if function 'find_affected_records_for_index'
     works correctly.
     """
 
     counter = 0
     indexes = ['global', 'fulltext', 'caption', 'journal', 'miscellaneous', 'reportnumber', 'year']
 
     @classmethod
     def setUp(self):
         if self.counter == 0:
             self.last_updated = dict(get_last_updated_all_indexes())
             res = run_sql("SELECT job_date FROM hstRECORD WHERE id_bibrec=10 AND affected_fields<>''")
             self.hst_date = res[0][0]
             date_to_set = self.hst_date - timedelta(seconds=1)
             for index in self.indexes:
                 run_sql("""UPDATE idxINDEX SET last_updated=%s
                            WHERE name=%s""", (str(date_to_set), index))
 
     @classmethod
     def tearDown(self):
         self.counter += 1
         if self.counter >= 8:
             for index in self.indexes:
                 run_sql("""UPDATE idxINDEX SET last_updated=%s
                            WHERE name=%s""", (self.last_updated[index], index))
 
     def test_find_proper_indexes(self):
         """bibindex - checks if affected indexes are found correctly"""
-        records_for_indexes = find_affected_records_for_index([], [[1,20]])
+        records_for_indexes = find_affected_records_for_index(get_all_indexes(virtual=False),
+                                                              [[1,20]])
         self.assertEqual(sorted(['miscellaneous', 'fulltext', 'caption', 'journal', 'reportnumber', 'year']),
                          sorted(records_for_indexes.keys()))
 
     def test_find_proper_recrods_for_miscellaneous_index(self):
         """bibindex - checks if affected recids are found correctly for miscellaneous index"""
-        records_for_indexes = find_affected_records_for_index([], [[1,20]])
+        records_for_indexes = find_affected_records_for_index(get_all_indexes(virtual=False),
+                                                              [[1,20]])
         self.assertEqual(records_for_indexes['miscellaneous'], [10,12])
 
     def test_find_proper_records_for_year_index(self):
         """bibindex - checks if affected recids are found correctly for year index"""
-        records_for_indexes = find_affected_records_for_index("", [[1,20]])
+        records_for_indexes = find_affected_records_for_index(get_all_indexes(virtual=False),
+                                                              [[1,20]])
         self.assertEqual(records_for_indexes['year'], [10,12])
 
     def test_find_proper_records_for_caption_index(self):
         """bibindex - checks if affected recids are found correctly for caption index"""
-        records_for_indexes = find_affected_records_for_index("", [[1,100]])
+        records_for_indexes = find_affected_records_for_index(get_all_indexes(virtual=False),
+                                                              [[1,100]])
         self.assertEqual(records_for_indexes['caption'], [10,12, 55, 98])
 
     def test_find_proper_records_for_journal_index(self):
         """bibindex - checks if affected recids are found correctly for journal index"""
-        records_for_indexes = find_affected_records_for_index("", [[1,100]])
+        records_for_indexes = find_affected_records_for_index(get_all_indexes(virtual=False),
+                                                              [[1,100]])
         self.assertEqual(records_for_indexes['journal'], [10])
 
     def test_find_proper_records_specified_only_year(self):
         """bibindex - checks if affected recids are found correctly for year index if we specify only year index as input"""
-        records_for_indexes = find_affected_records_for_index("year", [[1, 100]])
+        records_for_indexes = find_affected_records_for_index(["year"], [[1, 100]])
         self.assertEqual(records_for_indexes["year"], [10, 12, 55])
 
     def test_find_proper_records_force_all(self):
         """bibindex - checks if all recids will be assigned to all specified indexes"""
-        records_for_indexes = find_affected_records_for_index("year,title", [[10, 15]], True)
+        records_for_indexes = find_affected_records_for_index(["year", "title"], [[10, 15]], True)
         self.assertEqual(records_for_indexes["year"], records_for_indexes["title"])
         self.assertEqual(records_for_indexes["year"], [10, 11, 12, 13, 14, 15])
 
     def test_find_proper_records_nothing_for_title_index(self):
         """bibindex - checks if nothing was found for title index in range of records: 1 - 20"""
-        records_for_indexes = find_affected_records_for_index("title", [[1, 20]])
+        records_for_indexes = find_affected_records_for_index(["title"], [[1, 20]])
         self.assertRaises(KeyError, lambda :records_for_indexes["title"])
 
 
 
 
 class BibIndexIndexingAffectedIndexes(unittest.TestCase):
 
     started = False
     records = []
     counter = 0
 
     @classmethod
     def setUp(self):
         self.counter += 1
         if not self.started:
             self.records.append(insert_record_one_and_second_revision())
             self.records.append(insert_record_two_and_second_revision())
-            records_for_indexes = find_affected_records_for_index([], [self.records])
+            records_for_indexes = find_affected_records_for_index(get_all_indexes(virtual=False),
+                                                                  [self.records])
             wtabs = get_word_tables(records_for_indexes.keys())
             for index_id, index_name, index_tags in wtabs:
                 wordTable = WordTable(index_name=index_name,
                                       index_id=index_id,
                                       fields_to_index=index_tags,
                                       table_name_pattern='idxWORD%02dF',
                                       wordtable_type = CFG_BIBINDEX_INDEX_TABLE_TYPE["Words"],
                                       tag_to_tokenizer_map={'8564_u': "BibIndexEmptyTokenizer"},
                                       wash_index_terms=50)
                 wordTable.add_recIDs([self.records], 10000)
             self.started = True
 
     @classmethod
     def tearDown(self):
         if self.counter == 3:
             for rec in self.records:
                 wipe_out_record_from_all_tables(rec)
             indexes = get_all_indexes(virtual=False)
             wtabs = get_word_tables(indexes)
             for index_id, index_name, index_tags in wtabs:
                 wordTable = WordTable(index_name=index_name,
                                       index_id=index_id,
                                       fields_to_index=index_tags,
                                       table_name_pattern='idxWORD%02dF',
                                       wordtable_type = CFG_BIBINDEX_INDEX_TABLE_TYPE["Words"],
                                       tag_to_tokenizer_map={'8564_u': "BibIndexEmptyTokenizer"},
                                       wash_index_terms=50)
                 wordTable.del_recIDs([self.records])
 
 
     def test_proper_content_in_title_index(self):
         """bibindex - checks reindexation of title index for test records.."""
         index_id = get_index_id_from_index_name('title')
         query = """SELECT termlist FROM idxWORD%02dR WHERE id_bibrec IN (""" % (index_id,)
         query = query + ", ".join(map(str, self.records)) + ")"
         resp = run_sql(query)
         affiliation_rec1 = deserialize_via_marshal(resp[0][0])
         affiliation_rec2 = deserialize_via_marshal(resp[1][0])
         self.assertEqual(['univers', 'particl'], affiliation_rec1)
         self.assertEqual(['of', 'cours', 'collis'], affiliation_rec2)
 
 
     def test_proper_content_in_author_index(self):
         """bibindex - checks reindexation of author index for test records.."""
         index_id = get_index_id_from_index_name('author')
         query = """SELECT termlist FROM idxWORD%02dR WHERE id_bibrec IN (""" % (index_id,)
         query = query + ", ".join(map(str, self.records)) + ")"
         resp = run_sql(query)
         author_rec1 = deserialize_via_marshal(resp[0][0])
         author_rec2 = deserialize_via_marshal(resp[1][0])
         self.assertEqual(['dawkins', 'richard', ], author_rec1)
         self.assertEqual(['john', 'locke'], author_rec2)
 
 
     def test_proper_content_in_global_index(self):
         """bibindex - checks reindexation of global index for test records.."""
         index_id = get_index_id_from_index_name('global')
         query = """SELECT termlist FROM idxWORD%02dR WHERE id_bibrec IN (""" % (index_id,)
         query = query + ", ".join(map(str, self.records)) + ")"
         resp = run_sql(query)
         global_rec1 = deserialize_via_marshal(resp[0][0])
         global_rec2 = deserialize_via_marshal(resp[1][0])
         self.assertEqual(True, 'dawkin' in global_rec1)
         self.assertEqual(False, 'close' in global_rec1)
         self.assertEqual(True, 'univers' in global_rec1)
         self.assertEqual(True, 'john' in global_rec2)
         self.assertEqual(False, 'john' in global_rec1)
 
 
 class BibIndexFindingIndexesForTags(unittest.TestCase):
     """ Tests function 'get_tag_indexes' """
 
     def test_fulltext_tag_virtual_indexes_on(self):
         """bibindex - checks if 'get_tag_indexes' for tag 8564_u will find only 'fulltext' index"""
         self.assertEqual(('fulltext',), zip(*get_tag_indexes('8564_u'))[1])
 
     def test_title_tag_virtual_indexes_on(self):
         """bibindex - checks if 'get_tag_indexes' for tag 245__% will find also 'global' index"""
         self.assertEqual(('title', 'exacttitle', 'global'), zip(*get_tag_indexes('245__%'))[1])
 
     def test_title_tag_virtual_indexes_off(self):
         """bibindex - checks if 'get_tag_indexes' for tag 245__% wont find 'global' index (with virtual=False)"""
         self.assertEqual(('title', 'exacttitle'), zip(*get_tag_indexes('245__%', virtual=False))[1])
 
     def test_author_tag_virtual_indexes_on(self):
         """bibindex - checks 'get_tag_indexes' for tag '100'"""
         self.assertEqual(('author', 'affiliation', 'exactauthor', 'firstauthor',
                           'exactfirstauthor', 'authorcount', 'authorityauthor',
                           'miscellaneous', 'global'),
                          zip(*get_tag_indexes('100'))[1])
 
     def test_author_exact_tag_virtual_indexes_off(self):
         """bibindex - checks 'get_tag_indexes' for tag '100__a'"""
         self.assertEqual(('author', 'exactauthor', 'firstauthor',
                           'exactfirstauthor', 'authorcount',
                           'authorityauthor', 'miscellaneous'),
                          zip(*get_tag_indexes('100__a', virtual=False))[1])
 
     def test_wide_tag_virtual_indexes_off(self):
         """bibindex - checks 'get_tag_indexes' for tag like '86%'"""
         self.assertEqual(('miscellaneous',), zip(*get_tag_indexes('86%', virtual=False))[1])
 
     def test_909_tags_in_misc_index(self):
         """bibindex - checks connection between misc index and tags: 909C1%, 909C4%"""
         self.assertEqual(('miscellaneous',), zip(*get_tag_indexes('909C1%', virtual=False))[1])
         self.assertEqual('miscellaneous' in zip(*get_tag_indexes('909C4%', virtual=False))[1], False)
 
     def test_year_tag_virtual_indexes_on(self):
         """bibindex - checks 'get_tag_indexes' for tag 909C0y"""
         self.assertEqual(('year', 'global'), zip(*get_tag_indexes('909C0y'))[1])
 
     def test_wide_tag_authority_index_virtual_indexes_off(self):
         """bibindex - checks 'get_tag_indexes' for tag like '15%'"""
         self.assertEqual(('authoritysubject', 'miscellaneous'), zip(*get_tag_indexes('15%',virtual=False))[1])
 
 
 class BibIndexFindingTagsForIndexes(unittest.TestCase):
     """ Tests function 'get_index_tags' """
 
 
     def test_tags_for_author_index(self):
         """bibindex - checks if 'get_index_tags' find proper tags for 'author' index """
         self.assertEqual(get_index_tags('author'), ['100__a', '700__a'])
 
     def test_tags_for_global_index_virtual_indexes_off(self):
         """bibindex - checks if 'get_index_tags' find proper tags for 'global' index """
         self.assertEqual(get_index_tags('global', virtual=False),[])
 
     def test_tags_for_global_index_virtual_indexes_on(self):
         """bibindex - checks if 'get_index_tags' find proper tags for 'global' index """
         tags = get_index_tags('global')
         self.assertEqual('86%' in tags, True)
         self.assertEqual('100__a' in tags, True)
         self.assertEqual('245__%' in tags, True)
 
 
 class BibIndexGlobalIndexContentTest(unittest.TestCase):
     """ Tests if virtual global index is correctly indexed"""
 
     def is_part_of(self, container, content):
         """checks if content is a part of container"""
         ctr = set(container)
         cont = set(content)
         return cont.issubset(ctr)
 
     def test_title_index_compatibility_reversed_table(self):
         """bibindex - checks if the same words are in title and global index, reversed table"""
         global_id = get_index_id_from_index_name('global')
         title_id = get_index_id_from_index_name('title')
         for rec in range(1, 4):
             query = """SELECT termlist FROM idxWORD%02dR WHERE id_bibrec=%s""" % (title_id, rec)
             res = run_sql(query)
             termlist_title = deserialize_via_marshal(res[0][0])
             query = """SELECT termlist FROM idxWORD%02dR WHERE id_bibrec=%s""" % (global_id, rec)
             glob = run_sql(query)
             termlist_global = deserialize_via_marshal(glob[0][0])
             self.assertEqual(self.is_part_of(termlist_global, termlist_title), True)
 
     def test_abstract_index_compatibility_reversed_table(self):
         """bibindex - checks if the same words are in abstract and global index, reversed table"""
         global_id = get_index_id_from_index_name('global')
         abstract_id = get_index_id_from_index_name('abstract')
         for rec in range(6, 9):
             query = """SELECT termlist FROM idxWORD%02dR WHERE id_bibrec=%s""" % (abstract_id, rec)
             res = run_sql(query)
             termlist_abstract = deserialize_via_marshal(res[0][0])
             query = """SELECT termlist FROM idxWORD%02dR WHERE id_bibrec=%s""" % (global_id, rec)
             glob = run_sql(query)
             termlist_global = deserialize_via_marshal(glob[0][0])
             self.assertEqual(self.is_part_of(termlist_global, termlist_abstract), True)
 
     def test_misc_index_compatibility_reversed_table(self):
         """bibindex - checks if the same words are in misc and global index, reversed table"""
         global_id = get_index_id_from_index_name('global')
         misc_id = get_index_id_from_index_name('miscellaneous')
         for rec in range(10, 14):
             query = """SELECT termlist FROM idxWORD%02dR WHERE id_bibrec=%s""" % (misc_id, rec)
             res = run_sql(query)
             termlist_misc = deserialize_via_marshal(res[0][0])
             query = """SELECT termlist FROM idxWORD%02dR WHERE id_bibrec=%s""" % (global_id, rec)
             glob = run_sql(query)
             termlist_global = deserialize_via_marshal(glob[0][0])
             self.assertEqual(self.is_part_of(termlist_global, termlist_misc), True)
 
     def test_journal_index_compatibility_forward_table(self):
         """bibindex - checks if the same words are in journal and global index, forward table"""
         global_id = get_index_id_from_index_name('global')
         journal_id = get_index_id_from_index_name('journal')
         query = """SELECT term FROM idxWORD%02dF""" % journal_id
         res = zip(*run_sql(query))[0]
         query = """SELECT term FROM idxWORD%02dF""" % global_id
         glob = zip(*run_sql(query))[0]
         self.assertEqual(self.is_part_of(glob, res), True)
 
     def test_keyword_index_compatibility_forward_table(self):
         """bibindex - checks if the same pairs are in keyword and global index, forward table"""
         global_id = get_index_id_from_index_name('global')
         keyword_id = get_index_id_from_index_name('keyword')
         query = """SELECT term FROM idxPAIR%02dF""" % keyword_id
         res = zip(*run_sql(query))[0]
         query = """SELECT term FROM idxPAIR%02dF""" % global_id
         glob = zip(*run_sql(query))[0]
         self.assertEqual(self.is_part_of(glob, res), True)
 
     def test_affiliation_index_compatibility_forward_table(self):
         """bibindex - checks if the same phrases are in affiliation and global index, forward table"""
         global_id = get_index_id_from_index_name('global')
         affiliation_id = get_index_id_from_index_name('affiliation')
         query = """SELECT term FROM idxPHRASE%02dF""" % affiliation_id
         res = zip(*run_sql(query))[0]
         query = """SELECT term FROM idxPHRASE%02dF""" % global_id
         glob = zip(*run_sql(query))[0]
         self.assertEqual(self.is_part_of(glob, res), True)
 
 
 class BibIndexVirtualIndexAlsoChangesTest(unittest.TestCase):
     """ Tests if virtual index changes after changes in dependent index"""
 
     counter = 0
     indexes = ["title"]
     _id = 39
 
     @classmethod
     def prepare_virtual_index(self):
         """creates new virtual index and binds it to specific normal index"""
         create_virtual_index(self._id, self.indexes)
         wtabs = get_word_tables(self.indexes)
         for index_id, index_name, index_tags in wtabs:
             wordTable = WordTable(index_name=index_name,
                                   index_id=index_id,
                                   fields_to_index=index_tags,
                                   table_name_pattern='idxWORD%02dF',
                                   wordtable_type=CFG_BIBINDEX_INDEX_TABLE_TYPE["Words"],
                                   tag_to_tokenizer_map={'8564_u': "BibIndexEmptyTokenizer"},
                                   wash_index_terms=50)
             wordTable.add_recIDs([[1, 10]], 1000)
 
     @classmethod
     def reindex_virtual_index(self, special_tokenizer=False):
         """reindexes virtual and dependent indexes with different tokenizer"""
         def tokenize_for_words(phrase):
             return phrase.split(" ")
 
         wtabs = get_word_tables(self.indexes)
         for index_id, index_name, index_tags in wtabs:
             wordTable = WordTable(index_name=index_name,
                                   index_id=index_id,
                                   fields_to_index=index_tags,
                                   table_name_pattern='idxWORD%02dF',
                                   wordtable_type=CFG_BIBINDEX_INDEX_TABLE_TYPE["Words"],
                                   tag_to_tokenizer_map={'8564_u': "BibIndexEmptyTokenizer"},
                                   wash_index_terms=50)
             if special_tokenizer == True:
                 wordTable.default_tokenizer_function = tokenize_for_words
             wordTable.add_recIDs([[1, 10]], 1000)
 
     @classmethod
     def setUp(self):
         self.counter += 1
         if self.counter == 1:
             self.prepare_virtual_index()
         elif self.counter == 2:
             self.reindex_virtual_index(special_tokenizer=True)
 
     @classmethod
     def tearDown(self):
         if self.counter == 3:
             self.reindex_virtual_index()
         elif self.counter == 4:
             remove_virtual_index(self._id)
 
     def test_virtual_index_1_has_10_records(self):
         """bibindex - checks if virtual index was filled with only ten records from title index"""
         query = "SELECT count(*) FROM idxWORD%02dR" % self._id
         self.assertEqual(10, run_sql(query)[0][0])
 
     def test_virtual_index_2_correct_content_record_1(self):
         """bibindex - after reindexing with different tokenizer virtual index also changes - record 1"""
         query = "SELECT termlist FROM idxWORD%02dR WHERE id_bibrec=%s" % (self._id, 1)
         self.assertEqual('Higgs' in deserialize_via_marshal(run_sql(query)[0][0]), True)
 
     def test_virtual_index_3_correct_content_record_3(self):
         """bibindex - after reindexing with different tokenizer virtual index also changes - record 3"""
         query = "SELECT termlist FROM idxWORD%02dR WHERE id_bibrec=%s" % (self._id, 3)
         self.assertEqual(['Conference', 'Biology', 'Molecular', 'European'],
                          deserialize_via_marshal(run_sql(query)[0][0]))
 
     def test_virtual_index_4_cleaned_up(self):
         """bibindex - after reindexing with normal title tokenizer everything is back to normal"""
         #this is version of test for installation with PyStemmer package
         #without this package word 'biology' is stemmed differently
         query = "SELECT termlist FROM idxWORD%02dR WHERE id_bibrec=%s" % (self._id, 3)
         self.assertEqual(['biolog', 'molecular', 'confer', 'european'],
                          deserialize_via_marshal(run_sql(query)[0][0]))
 
 
 class BibIndexVirtualIndexRemovalTest(unittest.TestCase):
 
     counter = 0
     indexes = ["authorcount", "journal", "year"]
     _id = 40
 
     @classmethod
     def setUp(self):
         self.counter += 1
         if self.counter == 1:
             create_virtual_index(self._id, self.indexes)
             wtabs = get_word_tables(self.indexes)
             for index_id, index_name, index_tags in wtabs:
                 wordTable = WordTable(index_name=index_name,
                                       index_id=index_id,
                                       fields_to_index=index_tags,
                                       table_name_pattern='idxWORD%02dF',
                                       wordtable_type=CFG_BIBINDEX_INDEX_TABLE_TYPE["Words"],
                                       tag_to_tokenizer_map={'8564_u': "BibIndexFulltextTokenizer"},
                                       wash_index_terms=50)
                 wordTable.add_recIDs([[1, 113]], 1000)
             #removal part
             w = WordTable("testindex", self._id, [], "idxWORD%02dF", CFG_BIBINDEX_INDEX_TABLE_TYPE["Words"], {}, 50)
             w.remove_dependent_index(int(get_index_id_from_index_name("authorcount")))
 
 
     @classmethod
     def tearDown(self):
         if self.counter == 9:
             remove_virtual_index(self._id)
 
 
     def test_authorcount_removal_number_of_items(self):
         """bibindex - checks virtual index after authorcount index removal - number of items"""
         query = """SELECT count(*) FROM idxWORD%02dF"""
         res = run_sql(query % self._id)
         self.assertEqual(157, res[0][0])
 
     def test_authorcount_removal_common_terms_intact(self):
         """bibindex - checks virtual index after authorcount index removal - common terms"""
         query = """SELECT term FROM idxWORD%02dF WHERE term IN ('10', '2', '4', '7')"""
         res = run_sql(query % self._id)
         self.assertEqual(4, len(res))
 
     def test_authorcount_removal_no_315_term(self):
         """bibindex - checks virtual index after authorcount index removal - no '315' term in virtual index"""
         query = """SELECT term FROM idxWORD%02dF WHERE term='315'"""
         res = run_sql(query % self._id)
         self.assertEqual(0, len(res))
 
     def test_authorcount_removal_term_10_hitlist(self):
         """bibindex - checks virtual index after authorcount index removal - hitlist for '10' term"""
         query = """SELECT hitlist FROM idxWORD%02dF WHERE term='10'"""
         res = run_sql(query % self._id)
         self.assertEqual([80, 92], intbitset(res[0][0]).tolist())
 
     def test_authorcount_removal_term_1985_hitlist(self):
         """bibindex - checks virtual index after authorcount index removal - hitlist for '1985' term"""
         query = """SELECT hitlist FROM idxWORD%02dF WHERE term='1985'"""
         res = run_sql(query % self._id)
         self.assertEqual([16, 18], intbitset(res[0][0]).tolist())
 
     def test_authorcount_removal_record_16_hitlist(self):
         """bibindex - checks virtual index after authorcount index removal - termlist for record 16"""
         query = """SELECT termlist FROM idxWORD%02dR WHERE id_bibrec=16"""
         res = run_sql(query % self._id)
         self.assertEqual(['1985'], deserialize_via_marshal(res[0][0]))
 
     def test_authorcount_removal_record_10_hitlist(self):
         """bibindex - checks virtual index after authorcount index removal - termlist for record 10"""
         query = """SELECT termlist FROM idxWORD%02dR WHERE id_bibrec=10"""
         res = run_sql(query % self._id)
         self.assertEqual(['2002', 'Eur. Phys. J., C'], deserialize_via_marshal(res[0][0]))
 
     def test_year_removal_number_of_items(self):
         """bibindex - checks virtual index after year removal - number of items"""
         #must be run after: tearDown
         w = WordTable("testindex", self._id, [], "idxWORD%02dF", CFG_BIBINDEX_INDEX_TABLE_TYPE["Words"], {}, 50)
         w.remove_dependent_index(int(get_index_id_from_index_name("year")))
         query = """SELECT count(*) FROM idxWORD%02dF"""
         res = run_sql(query % self._id)
         self.assertEqual(134, res[0][0])
 
     def test_year_removal_record_18_hitlist(self):
         """bibindex - checks virtual index after year removal - termlist for record 18"""
         #must be run after: tearDown, test_year_removal_number_of_items
         query = """SELECT termlist FROM idxWORD%02dR WHERE id_bibrec=18"""
         res = run_sql(query % self._id)
         self.assertEqual(['151', '357','1985', 'Phys. Lett., B 151 (1985) 357', 'Phys. Lett., B'],
                          deserialize_via_marshal(res[0][0]))
 
+class BibIndexCLICallTest(unittest.TestCase):
+    """Tests if calls to bibindex from CLI (bibsched deamon) are run correctly"""
+
+    def test_correct_message_for_wrong_index_names(self):
+        """bibindex - checks if correct message for wrong index appears"""
+        index_name = "titlexrg"
+        task_id = reindex_for_type_with_bibsched(index_name, force_all=True)
+        filename = os.path.join(CFG_LOGDIR, 'bibsched_task_' + str(task_id) + '.log')
+        fl = open(filename)
+        text = fl.read() # small file
+        fl.close()
+        self.assertTrue(text.find("Specified indexes can't be found.") >= 0)
+
+    def test_correct_message_for_up_to_date_indexes(self):
+        """bibindex - checks if correct message for index up to date appears"""
+        index_name = "abstract"
+        task_id = reindex_for_type_with_bibsched(index_name)
+        filename = os.path.join(CFG_LOGDIR, 'bibsched_task_' + str(task_id) + '.log')
+        fl = open(filename)
+        text = fl.read() # small file
+        fl.close()
+        self.assertTrue(text.find("Selected indexes/recIDs are up to date.") >= 0)
+
+
 
 TEST_SUITE = make_test_suite(BibIndexRemoveStopwordsTest,
                              BibIndexRemoveLatexTest,
                              BibIndexRemoveHtmlTest,
                              BibIndexYearIndexTest,
                              BibIndexAuthorCountIndexTest,
                              BibIndexItemCountIndexTest,
                              BibIndexFiletypeIndexTest,
                              BibIndexJournalIndexTest,
                              BibIndexCJKTokenizerTitleIndexTest,
                              BibIndexAuthorityRecordTest,
                              BibIndexFindingAffectedIndexes,
                              BibIndexIndexingAffectedIndexes,
                              BibIndexFindingIndexesForTags,
                              BibIndexFindingTagsForIndexes,
                              BibIndexGlobalIndexContentTest,
                              BibIndexVirtualIndexAlsoChangesTest,
-                             BibIndexVirtualIndexRemovalTest)
+                             BibIndexVirtualIndexRemovalTest,
+                             BibIndexCLICallTest)
 
 if __name__ == "__main__":
     run_test_suite(TEST_SUITE, warn_user=True)