diff --git a/modules/bibauthorid/lib/bibauthorid_dbinterface.py b/modules/bibauthorid/lib/bibauthorid_dbinterface.py
index f4fdcf4a7..114ff7aa4 100644
--- a/modules/bibauthorid/lib/bibauthorid_dbinterface.py
+++ b/modules/bibauthorid/lib/bibauthorid_dbinterface.py
@@ -1,2243 +1,2243 @@
 # -*- coding: utf-8 -*-
 ##
 ## This file is part of Invenio.
 ## Copyright (C) 2011, 2012 CERN.
 ##
 ## Invenio is free software; you can redistribute it and/or
 ## modify it under the terms of the GNU General Public License as
 ## published by the Free Software Foundation; either version 2 of the
 ## License, or (at your option) any later version.
 ##
 ## Invenio is distributed in the hope that it will be useful, but
 ## WITHOUT ANY WARRANTY; without even the implied warranty of
 ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 ## General Public License for more details.
 ##
 ## You should have received a copy of the GNU General Public License
 ## along with Invenio; if not, write to the Free Software Foundation, Inc.,
 ## 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA.
 
 '''
     bibauthorid_bdinterface
     This is the only file in bibauthorid which should
     use the data base. It should have an interface for
     all other files in the module.
 '''
 import bibauthorid_config as bconfig
 import sys
 import numpy
 import cPickle
 import zlib
 
 from itertools import groupby, count, ifilter, chain, imap
 from operator import itemgetter
 from invenio.access_control_engine import acc_authorize_action
 
 from bibauthorid_name_utils import split_name_parts
 from bibauthorid_name_utils import create_canonical_name
 from bibauthorid_name_utils import create_normalized_name
 from bibauthorid_general_utils import bibauthor_print
 from bibauthorid_general_utils import update_status \
                                     , update_status_final
 from dbquery import run_sql \
                     , OperationalError \
                     , ProgrammingError
 
 
 def get_sql_time():
     '''
     Returns the time acoarding to the database. The type is datetime.datetime.
     '''
     return run_sql("select now()")[0][0]
 
 
 def set_personid_row(person_id, tag, value, opt1=0, opt2=0, opt3=""):
     '''
     Inserts data and the additional options of a person by a given personid and tag.
     '''
     run_sql("INSERT INTO aidPERSONIDDATA "
             "(`personid`, `tag`, `data`, `opt1`, `opt2`, `opt3`) "
             "VALUES (%s, %s, %s, %s, %s, %s)",
             (person_id, tag, value, opt1, opt2, opt3))
 
 
 def get_personid_row(person_id, tag):
     '''
     Returns all the records associated to a person and a tag.
 
     @param person_id: id of the person to read the attribute from
     @type person_id: int
     @param tag: the tag to read.
     @type tag: string
 
     @return: the data associated with a virtual author
     @rtype: tuple of tuples
     '''
     return run_sql("SELECT data, opt1, opt2, opt3 "
                    "data FROM aidPERSONIDDATA "
                    "WHERE personid = %s AND tag = %s",
                    (person_id, tag))
 
 
 def del_personid_row(tag, person_id=None, value=None):
     '''
     Change the value associated to the given tag for a certain person.
     @param person_id: ID of the person
     @type person_id: int
     @param tag: tag to be updated
     @type tag: string
     @param value: value to be written for the tag
     @type value: string
     '''
     if person_id:
         if value:
             run_sql("delete from aidPERSONIDDATA where personid=%s and tag=%s and data=%s", (person_id, tag, value,))
         else:
             run_sql("delete from aidPERSONIDDATA where personid=%s and tag=%s", (person_id, tag,))
     else:
         if value:
             run_sql("delete from aidPERSONIDDATA where tag=%s and data=%s", (tag, value,))
         else:
             run_sql("delete from aidPERSONIDDATA where tag=%s", (tag,))
 
 
 def get_all_papers_of_pids(personid_list):
     '''
     Get all papers of authors in a given list and sorts the results
     by bibrefrec.
     @param personid_list: list with the authors.
     @type personid_list: iteratable of integers.
     '''
     if personid_list:
         plist = list_2_SQL_str(personid_list, lambda x: str(x))
         paps = run_sql("select personid, bibref_table, bibref_value, bibrec, flag "
                        "from aidPERSONIDPAPERS "
                        "where personid in %s "
                        % plist)
 
         inner = set(row[1:4] for row in paps if row[4] > -2)
 
         return (x for x in paps if x[1:4] in inner)
 
     return ()
 
 
 def del_person_not_manually_claimed_papers(pid):
     '''
     Deletes papers from a person which have not been manually claimed.
     '''
     run_sql("delete from aidPERSONIDPAPERS "
             "where and (flag <> '-2' and flag <> '2') and personid=%s", (pid,))
 
 def get_personid_from_uid(uid):
     '''
     Returns the personID associated with the provided ui.
     If the personID is already associated with the person the secon parameter is True, false otherwise.
     @param uid: userID
     @type uid: ((int,),)
     '''
     pid = run_sql("select personid from aidPERSONIDDATA where tag=%s and data=%s", ('uid', str(uid[0][0])))
     if len(pid) == 1:
         return (pid[0], True)
     else:
         return  ([-1], False)
 
 def get_uid_from_personid(pid):
     uid = run_sql("select data from aidPERSONIDDATA where tag='uid' and personid = %s", (pid,))
     if uid:
         return uid[0][0]
     else:
         return None
 
 
 def get_new_personid():
     pids = (run_sql("select max(personid) from aidPERSONIDDATA")[0][0],
             run_sql("select max(personid) from aidPERSONIDPAPERS")[0][0])
 
     pids = tuple(int(p) for p in pids if p != None)
 
     if len(pids) == 2:
         return max(*pids) + 1
     elif len(pids) == 1:
         return pids[0] + 1
     else:
         return 0
 
 
 def get_existing_personids():
     try:
         pids_data = set(zip(*run_sql("select distinct personid from aidPERSONIDDATA"))[0])
     except IndexError:
         pids_data = set()
 
     try:
         pids_pap = set(zip(*run_sql("select distinct personid from aidPERSONIDPAPERS"))[0])
     except IndexError:
         pids_pap = set()
 
     return pids_data | pids_pap
 
 
 def get_existing_result_clusters():
     return run_sql("select distinct personid from aidRESULTS")
 
 
 def create_new_person(uid= -1, uid_is_owner=False):
     '''
     Create a new person. Set the uid as owner if requested.
     '''
     pid = get_new_personid()
     if uid_is_owner:
         set_personid_row(pid, 'uid', str(uid))
     else:
         set_personid_row(pid, 'user-created', str(uid))
     return pid
 
 def create_new_person_from_uid(uid):
     return create_new_person(uid, uid_is_owner=True)
 
 def new_person_from_signature(sig, name=None):
     '''
     Creates a new person from a signature.
     '''
     pid = get_new_personid()
     add_signature(sig, name, pid)
     return pid
 
 
 def add_signature(sig, name, pid):
     '''
     Inserts a signature in personid.
     '''
     if not name:
         name = get_name_by_bibrecref(sig)
         name = create_normalized_name(split_name_parts(name))
 
     run_sql("INSERT INTO aidPERSONIDPAPERS "
             "(personid, bibref_table, bibref_value, bibrec, name) "
             "VALUES (%s, %s, %s, %s, %s)"
             , (pid, str(sig[0]), sig[1], sig[2], name))
 
 def move_signature(sig, pid):
     '''
     Inserts a signature in personid.
     '''
     run_sql("update aidPERSONIDPAPERS set personid=%s "
             "where bibref_table=%s and  bibref_value=%s "
             "and bibrec=%s and flag <> 2 and flag <> -2",
              (pid,) + sig)
 
 def find_conflicts(sig, pid):
     """
     """
     return run_sql("select bibref_table, bibref_value, bibrec, flag "
                    "from aidPERSONIDPAPERS where "
                    "personid = %s and "
                    "bibrec = %s and "
                    "flag <> -2"
                    , (pid, sig[2]))
 
 def update_request_ticket(person_id, tag_data_tuple, ticket_id=None):
     '''
     Creates / updates a request ticket for a personID
     @param: personid int
     @param: tag_data_tuples 'image' of the ticket: (('paper', '700:316,10'), ('owner', 'admin'), ('external_id', 'ticket_18'))
     @return: ticketid
     '''
     #tags: rt_owner (the owner of the ticket, associating the rt_number to the transaction)
     #      rt_external_id
     #      rt_paper_cornfirm, rt_paper_reject, rt_paper_forget, rt_name, rt_email, rt_whatever
     #flag: rt_number
     if not ticket_id:
         last_id = run_sql("select max(opt1) from aidPERSONIDDATA where personid=%s and tag like %s", (str(person_id), 'rt_%'))[0][0]
 
         if last_id:
             ticket_id = last_id + 1
         else:
             ticket_id = 1
     else:
         delete_request_ticket(person_id, ticket_id)
 
     for d in tag_data_tuple:
         run_sql("insert into aidPERSONIDDATA (personid, tag, data, opt1) "
                 "values (%s,%s,%s,%s)",
                  (str(person_id), 'rt_' + str(d[0]), str(d[1]), str(ticket_id)))
 
     return ticket_id
 
 
 def delete_request_ticket(person_id, ticket_id=None):
     '''
     Removes a ticket from a person_id.
     If ticket_id is not provider removes all the tickets pending on a person.
     '''
     if ticket_id:
         run_sql("delete from aidPERSONIDDATA where personid=%s and tag like %s and opt1 =%s", (str(person_id), 'rt_%', str(ticket_id)))
     else:
         run_sql("delete from aidPERSONIDDATA where personid=%s and tag like %s", (str(person_id), 'rt_%'))
 
 
 def get_all_personids_by_name(regexpr):
     return run_sql("select personid, name "
                    "from aidPERSONIDPAPERS "
                    "where name like %s",
                    (regexpr,))
 
 def get_personids_by_canonical_name(target):
     pid = run_sql("select personid from aidPERSONIDDATA where "
                   "tag='canonical_name' and data like %s", (target,))
     if pid:
         return run_sql("select personid, name from aidPERSONIDPAPERS "
                        "where personid=%s", (pid[0][0],))
     else:
         return []
 
 def get_bibref_modification_status(bibref):
     '''
     Determines if a record attached to a person has been touched by a human
     by checking the flag.
 
     @param pid: The Person ID of the person to check the assignment from
     @type pid: int
     @param bibref: The paper identifier to be checked (e.g. "100:12,144")
     @type bibref: string
 
     returns [bool:human_modified, int:lcul]
     '''
     if not bibref:
         raise ValueError("A bibref is expected!")
 
     head, rec = bibref.split(',')
     table, ref = head.split(':')
     flags = run_sql("SELECT flag, lcul FROM aidPERSONIDPAPERS WHERE "
                     "bibref_table = %s and bibref_value = %s and bibrec = %s"
                     , (table, ref, rec))
     if flags:
         return flags[0]
     else:
         return (False, 0)
 
 
 def get_canonical_id_from_personid(pid):
     '''
     Finds the person id canonical name (e.g. Ellis_J_R_1)
 
     @param pid
     @type int
 
     @return: sql result of the request
     @rtype: tuple of tuple
     '''
     return run_sql("SELECT data FROM aidPERSONIDDATA WHERE "
                    "tag = %s AND personid = %s", ('canonical_name', str(pid)))
 
 
 def get_papers_status(paper):
     '''
     Gets the personID and flag assiciated to papers
     @param papers: list of papers
     @type papers: '100:7531,9024'
     @return: (('data','personID','flag',),)
     @rtype: tuple of tuples
     '''
     head, bibrec = paper.split(',')
     _table, bibref = head.split(':')
 
     rets = run_sql("select PersonID, flag "
                              "from aidPERSONIDPAPERS "
                              "where bibref_table = %s "
                              "and bibref_value = %s "
                              "and bibrec = %s"
                              % (head, bibrec, bibref))
     return [[paper] + list(x) for x in rets]
 
 
 def get_persons_from_recids(recids, return_alt_names=False,
                             return_all_person_papers=False):
     rec_2_pid = dict()
     pid_2_data = dict()
     all_pids = set()
 
     def get_canonical_name(pid):
         return run_sql("SELECT data "
                             "FROM aidPERSONIDDATA "
                             "WHERE tag = %s "
                             "AND personid = %s",
                             ('canonical_name', pid))
 
     for rec in recids:
         pids = run_sql("SELECT personid "
                        "FROM aidPERSONIDPAPERS "
                        "WHERE bibrec = %s "
                        " and flag > -2 ",
                        (rec,))
 
         # for some reason python's set is faster than a mysql distinct
         pids = set(p[0] for p in pids)
         all_pids |= pids
         rec_2_pid[rec] = list(pids)
 
     for pid in all_pids:
         pid_data = {}
 
         canonical = get_canonical_name(pid)
         #We can supposed that this person didn't have a chance to get a canonical name yet
         #because it was not fully processed by it's creator. Anyway it's safe to try to create one
         #before failing miserably
         if not canonical:
             update_personID_canonical_names([pid])
         canonical = get_canonical_name(pid)
 
         #assert len(canonical) == 1
         #This condition cannot hold in case claims or update daemons are run in parallel
         #with this, as it can happen that a person with papers exists for wich a canonical name
         #has not been computed yet. Hence, it will be indexed next time, so it learns.
         #Each person should have at most one canonical name, so:
         assert len(canonical) <= 1
 
         if len(canonical) == 1:
             pid_data = {'canonical_id' : canonical[0][0]}
 
         if return_alt_names:
             names = run_sql("SELECT name "
                             "FROM aidPERSONIDPAPERS "
                             "WHERE personid = %s "
                             " and flag > -2 ",
                             (pid,))
             names = set(n[0] for n in names)
 
             pid_data['alternatative_names'] = list(names)
 
         if return_all_person_papers:
             recs = run_sql("SELECT bibrec "
                            "FROM aidPERSONIDPAPERS "
                            "WHERE personid = %s "
                            " and flag > -2 ",
                            (pid,))
             recs = set(r[0] for r in recs)
 
             pid_data['person_records'] = list(recs)
 
         pid_2_data[pid] = pid_data
 
     return (rec_2_pid, pid_2_data)
 
 
 def get_person_db_names_count(pid, sort_by_count=True):
     '''
     Returns the set of name strings and count associated to a person id.
     The name strings are as found in the database.
     @param pid: ID of the person
     @type pid: ('2',)
     '''
 
     id_2_count = run_sql("select bibref_table, bibref_value "
                          "from aidPERSONIDPAPERS "
                          "where personid = %s "
                          "and flag > -2", (pid,))
 
     ref100 = [refid[1] for refid in id_2_count if refid[0] == '100']
     ref700 = [refid[1] for refid in id_2_count if refid[0] == '700']
 
     ref100_count = dict((key, len(list(data))) for key, data in groupby(sorted(ref100)))
     ref700_count = dict((key, len(list(data))) for key, data in groupby(sorted(ref700)))
 
     if ref100:
         ref100_s = list_2_SQL_str(ref100, str)
         id100_2_str = run_sql("select id, value "
                               "from bib10x "
                               "where id in %s"
                               % ref100_s)
     else:
         id100_2_str = tuple()
 
     if ref700:
         ref700_s = list_2_SQL_str(ref700, str)
         id700_2_str = run_sql("select id, value "
                               "from bib70x "
                               "where id in %s"
                               % ref700_s)
     else:
         id700_2_str = tuple()
 
     ret100 = [(name, ref100_count[refid]) for refid, name in id100_2_str]
     ret700 = [(name, ref700_count[refid]) for refid, name in id700_2_str]
 
     ret = ret100 + ret700
     if sort_by_count:
         ret = sorted(ret, key=itemgetter(1), reverse=True)
     return ret
 
 
 def get_person_id_from_canonical_id(canonical_id):
     '''
     Finds the person id from a canonical name (e.g. Ellis_J_R_1)
 
     @param canonical_id: the canonical ID
     @type canonical_id: string
 
     @return: sql result of the request
     @rtype: tuple of tuple
     '''
     return run_sql("SELECT personid FROM aidPERSONIDDATA WHERE "
                    "tag='canonical_name' AND data = %s", (canonical_id,))
 
 
 def get_person_names_count(pid):
     '''
     Returns the set of name strings and count associated to a person id
     @param pid: ID of the person
     @type pid: ('2',)
     @param value: value to be written for the tag
     @type value: string
     '''
     return run_sql("select name, count(name) from aidPERSONIDPAPERS where "
                    "personid=%s and flag > -2 group by name", (pid,))
 
 
 def get_person_db_names_set(pid):
     '''
     Returns the set of db_name strings associated to a person id
     @param pid: ID of the person
     @type pid: 2
     '''
 
     names = get_person_db_names_count(pid)
     if names:
         return zip(set(zip(*names)[0]))
     else:
         return []
 
 def get_personids_from_bibrec(bibrec):
     '''
     Returns all the personids associated to a bibrec.
     '''
 
     pids = run_sql("select distinct personid from aidPERSONIDPAPERS where bibrec=%s and flag > -2", (bibrec,))
 
     if pids:
         return zip(*pids)[0]
     else:
         return []
 
 def get_personids_and_papers_from_bibrecs(bibrecs, limit_by_name=None):
     '''
     '''
     if not bibrecs:
         return []
     else:
         bibrecs = list_2_SQL_str(bibrecs)
     if limit_by_name:
         try:
             surname = split_name_parts(limit_by_name)[0]
         except IndexError:
             surname = None
     else:
         surname = None
     if not surname:
         data = run_sql("select personid,bibrec from aidPERSONIDPAPERS where bibrec in %s" % (bibrecs,))
     else:
         surname = split_name_parts(limit_by_name)[0]
         data = run_sql(("select personid,bibrec from aidPERSONIDPAPERS where bibrec in %s "
                        "and name like " % bibrecs) + ' %s ', (surname + '%',))
     pidlist = [(k, set([s[1] for s in d]))
                for k, d in groupby(sorted(data, key=lambda x:x[0]), key=lambda x:x[0])]
     pidlist = sorted(pidlist, key=lambda x:len(x[1]), reverse=True)
     return pidlist
 
 def get_person_bibrecs(pid):
     '''
     Returns bibrecs associated with a personid
     @param pid: integer personid
     @return [bibrec1,...,bibrecN]
     '''
     papers = run_sql("select bibrec from aidPERSONIDPAPERS where personid=%s", (str(pid),))
 
     if papers:
         return list(set(zip(*papers)[0]))
     else:
         return []
 
 def get_person_papers(pid, flag,
                       show_author_name=False,
                       show_title=False,
                       show_rt_status=False,
                       show_affiliations=False,
                       show_date=False,
                       show_experiment=False):
     query = "bibref_table, bibref_value, bibrec, flag"
     if show_author_name:
         query += ", name"
 
     all_papers = run_sql("SELECT " + query + " "
                          "FROM aidPERSONIDPAPERS "
                          "WHERE personid = %s "
                          "AND flag >= %s",
                          (pid, flag))
 
     def format_paper(paper):
         bibrefrec = "%s:%d,%d" % paper[:3]
         ret = {'data' : bibrefrec,
                'flag' : paper[3]
               }
 
         if show_author_name:
             ret['authorname'] = paper[4]
 
         if show_title:
             ret['title'] = ""
             title = get_title_from_rec(paper[2])
             if title:
                 ret['title'] = (title, )
 
         if show_rt_status:
             rt_count = run_sql("SELECT count(personid) "
                                "FROM aidPERSONIDDATA WHERE "
                                "tag like 'rt_%%' and data = %s"
                                , (bibrefrec,))
 
             ret['rt_status'] = (rt_count[0][0] > 0)
 
         if show_affiliations:
             tag = '%s__u' % paper[0]
             ret['affiliation'] = get_grouped_records(paper[:3], tag)[tag]
 
         if show_date:
             ret['date'] = []
             date_id = run_sql("SELECT id_bibxxx "
                               "FROM bibrec_bib26x "
                               "WHERE id_bibrec = %s "
                               , (paper[2],))
 
             if date_id:
-                date_id_s = list_2_SQL_str(date_id)
+                date_id_s = list_2_SQL_str(date_id, lambda x: x[0])
                 date = run_sql("SELECT value "
                                "FROM bib26x "
                                "WHERE id in %s "
                                "AND tag = %s"
                                % (date_id_s, "'269__c'"))
                 if date:
                     ret['date'] = zip(*date)[0]
 
 
         if show_experiment:
             ret['experiment'] = []
             experiment_id = run_sql("SELECT id_bibxxx "
                                     "FROM bibrec_bib69x "
                                     "WHERE id_bibrec = %s "
                                     , (paper[2],))
 
             if experiment_id:
-                experiment_id_s = list_2_SQL_str(experiment_id)
+                experiment_id_s = list_2_SQL_str(experiment_id, lambda x: x[0])
                 experiment = run_sql("SELECT value "
                                      "FROM bib69x "
                                      "WHERE id in %s "
                                      "AND tag = %s"
                                      % (experiment_id_s, "'693__e'"))
                 if experiment:
                     ret['experiment'] = zip(*experiment)[0]
 
         return ret
 
     return [format_paper(paper) for paper in all_papers]
 
 
 def get_persons_with_open_tickets_list():
     '''
     Finds all the persons with open tickets and returns pids and count of tickets
     @return: [[pid, ticket_count]]
     '''
     return run_sql("select personid, count(distinct opt1) from "
                     "aidPERSONIDDATA where tag like 'rt_%' group by personid")
 
 def get_request_ticket(person_id, ticket_id=None):
     '''
     Retrieves one or many requests tickets from a person
     @param: person_id: person id integer
     @param: matching: couple of values to match ('tag', 'value')
     @param: ticket_id: ticket id (flag) value
     @returns: [[[('tag', 'value')], ticket_id]]
         [[[('a', 'va'), ('b', 'vb')], 1L], [[('b', 'daOEIaoe'), ('a', 'caaoOUIe')], 2L]]
     '''
     if ticket_id:
         tstr = " and opt1='%s' " % ticket_id
     else:
         tstr = " "
     tickets = run_sql("select tag,data,opt1 from aidPERSONIDDATA where personid=%s and "
                       " tag like 'rt_%%' " + tstr , (person_id,))
     return [[[(s[0][3:], s[1]) for s in d], k] for k, d in groupby(sorted(tickets, key=lambda k: k[2]), key=lambda k: k[2])]
 
 
 def insert_user_log(userinfo, personid, action, tag, value, comment='', transactionid=0, timestamp=None):
     '''
     Instert log entries in the user log table.
     For example of entres look at the table generation script.
     @param userinfo: username or user identifier
     @type: string
     @param personid: personid involved in the transaction
     @type: longint
     @param action: action type
     @type: string
     @param tag: tag
     @type: string
     @param value: value for the transaction
     @type: string
     @param comment: optional comment for the transaction
     @type: string
     @param transactionid: optional id for the transaction
     @type: longint
 
     @return: the transactionid
     @rtype: longint
     '''
 #    if transactionid == 0:
 #        transactionid = max(run_sql('SELECT  MAX(transactionid) FROM `aidUSERINPUTLOG`')[0][0], -1) + 1
 
     if not timestamp:
         timestamp = run_sql('select now()')[0][0]
 
 #    run_sql('insert into aidUSERINPUTLOG (transactionid,timestamp,userinfo,personid,action,tag,value,comment) values '
 #            '(%(transactionid)s,%(timestamp)s,%(userinfo)s,%(personid)s,%(action)s,%(tag)s,%(value)s,%(comment)s)',
 #            ({'transactionid':str(transactionid),
 #              'timestamp':timestamp.timestamp,
 #              'userinfo':str(userinfo),
 #              'personid':str(personid),
 #              'action':str(action),
 #              'tag':str(tag),
 #              'value':str(value),
 #              'comment':str(comment)}))
     run_sql('insert into aidUSERINPUTLOG '
             '(transactionid,timestamp,userinfo,personid,action,tag,value,comment) values '
             '(%s,%s,%s,%s,%s,%s,%s,%s)',
             (transactionid, timestamp, userinfo, personid,
              action, tag, value, comment))
 
     return transactionid
 
 
 def person_bibref_is_touched_old(pid, bibref):
     '''
     Determines if a record attached to a person has been touched by a human
     by checking the flag.
 
     @param pid: The Person ID of the person to check the assignment from
     @type pid: int
     @param bibref: The paper identifier to be checked (e.g. "100:12,144")
     @type bibref: string
     '''
     bibref, rec = bibref.split(",")
     table, ref = bibref.split(":")
 
     flag = run_sql("SELECT flag "
                    "FROM aidPERSONIDPAPERS "
                    "WHERE personid = %s "
                    "AND bibref_table = %s "
                    "AND bibref_value = %s "
                    "AND bibrec = %s"
                    , (pid, table, ref, rec))
 
     try:
         flag = flag[0][0]
     except (IndexError):
         return False
 
     if not flag:
         return False
     elif -2 < flag < 2:
         return False
     else:
         return True
 
 
 def confirm_papers_to_person(pid, papers, user_level=0):
     '''
     Confirms the relationship between pid and paper, as from user input.
     @param pid: id of the person
     @type pid: ('2',)
     @param papers: list of papers to confirm
     @type papers: (('100:7531,9024',),)
     @param gather_list: list to store the pids to be updated rather than
     calling update_personID_names_string_set
     @typer gather_list: set([('2',), ('3',)])
     '''
     for p in papers:
         bibref, rec = p[0].split(",")
         rec = int(rec)
         table, ref = bibref.split(":")
         ref = int(ref)
 
         run_sql("delete from aidPERSONIDPAPERS where personid=%s and bibrec=%s", (pid[0], rec))
         run_sql("delete from aidPERSONIDPAPERS where bibref_table=%s and "
                 " bibref_value = %s and bibrec=%s",
                 (table, ref, rec))
 
         add_signature([table, ref, rec], None, pid[0])
         run_sql("update aidPERSONIDPAPERS "
                 "set personid = %s "
                 ", flag = %s "
                 ", lcul = %s "
                 "where bibref_table = %s "
                 "and bibref_value = %s "
                 "and bibrec = %s"
                 , (str(pid[0]), '2', user_level,
                    table, ref, rec))
 
     update_personID_canonical_names(pid)
 
 
 def reject_papers_from_person(pid, papers, user_level=0):
     '''
     Confirms the negative relationship between pid and paper, as from user input.
     @param pid: id of the person
     @type pid: integer
     @param papers: list of papers to confirm
     @type papers: ('100:7531,9024',)
     '''
 
     new_pid = get_new_personid()
     for p in papers:
         brr, rec = p.split(",")
         table, ref = brr.split(':')
 
         sig = (table, ref, rec)
         records = personid_name_from_signature(sig)
         assert(records)
 
         fpid, name = records[0]
         assert fpid == pid
 
         run_sql("INSERT INTO aidPERSONIDPAPERS "
                 "(personid, bibref_table, bibref_value, bibrec, name, flag, lcul) "
                 "VALUES (%s, %s, %s, %s, %s, %s, %s)"
                 , (pid, table, ref, rec, name, -2, user_level))
 
         move_signature(sig, new_pid)
 
     update_personID_canonical_names((pid,))
 
 
 def reset_papers_flag(pid, papers):
     '''
     Resets the flag associated to the papers to '0'
     @param papers: list of papers to confirm
     @type papers: (('100:7531,9024',),)
     @param gather_list: list to store the pids to be updated rather than
     calling update_personID_names_string_set
     @typer gather_list: set([('2',), ('3',)])
     '''
     for p in papers:
         bibref, rec = p[0].split(",")
         table, ref = bibref.split(":")
         run_sql("update aidPERSONIDPAPERS "
                 "set flag = %s, lcul = %s "
                 "where bibref_table = %s "
                 "and bibref_value = %s "
                 "and bibrec = %s" ,
                 ('0', '0',
                 table, ref, rec))
 
 
 def user_can_modify_data(uid, pid):
     '''
     Return True if the uid can modify data of this personID, false otherwise.
     @param uid: the user id
     @type: int
     @param pid: the person id
     @type: int
 
     @return: can user mofidfy data?
     @rtype: boolean
     '''
 
     pid_uid = run_sql("select data from aidPERSONIDDATA where tag = %s"
                           " and personid = %s", ('uid', str(pid)))
 
     if len(pid_uid) >= 1 and str(uid) == str(pid_uid[0][0]):
         rights = bconfig.CLAIMPAPER_CHANGE_OWN_DATA
     else:
         rights = bconfig.CLAIMPAPER_CHANGE_OTHERS_DATA
 
     return acc_authorize_action(uid, rights)[0] == 0
 
 
 def get_possible_bibrecref(names, bibrec, always_match=False):
     '''
     Returns a list of bibrefs for which the surname is matching
     @param names: list of names strings
     @param bibrec: bibrec number
     @param always_match: match with all the names (full bibrefs list)
     '''
     splitted_names = [split_name_parts(n) for n in names]
 
     bibrec_names_100 = run_sql("select o.id, o.value from bib10x o, "
                                "(select i.id_bibxxx as iid from bibrec_bib10x i "
                                "where id_bibrec=%s) as dummy "
                                "where o.tag='100__a' AND o.id = dummy.iid",
                                (str(bibrec),))
     bibrec_names_700 = run_sql("select o.id, o.value from bib70x o, "
                                "(select i.id_bibxxx as iid from bibrec_bib70x i "
                                "where id_bibrec=%s) as dummy "
                                "where o.tag='700__a' AND o.id = dummy.iid",
                                (str(bibrec),))
 #    bibrec_names_100 = run_sql("select id,value from bib10x where tag='100__a' and id in "
 #                               "(select id_bibxxx from bibrec_bib10x where id_bibrec=%s)",
 #                               (str(bibrec),))
 #    bibrec_names_700 = run_sql("select id,value from bib70x where tag='700__a' and id in "
 #                               "(select id_bibxxx from bibrec_bib70x where id_bibrec=%s)",
 #                               (str(bibrec),))
     bibreflist = []
 
     for b in bibrec_names_100:
         spb = split_name_parts(b[1])
         for n in splitted_names:
             if (n[0].lower() == spb[0].lower()) or always_match:
                 if ['100:' + str(b[0]), b[1]] not in bibreflist:
                     bibreflist.append(['100:' + str(b[0]), b[1]])
 
     for b in bibrec_names_700:
         spb = split_name_parts(b[1])
         for n in splitted_names:
             if (n[0].lower() == spb[0].lower()) or always_match:
                 if ['700:' + str(b[0]), b[1]] not in bibreflist:
                     bibreflist.append(['700:' + str(b[0]), b[1]])
 
     return bibreflist
 
 
 def user_can_modify_paper(uid, paper):
     '''
     Return True if the uid can modify this paper, false otherwise.
     If the paper is assigned more then one time (from algorithms) consider the most privileged
     assignment.
     @param uid: the user id
     @type: int
     @param paper: the paper bibref,bibrec pair x00:1234,4321
     @type: str
 
     @return: can user mofidfy paper attribution?
     @rtype: boolean
     '''
     bibref, rec = paper.split(",")
     table, ref = bibref.split(":")
     prow = run_sql("select personid, lcul from aidPERSONIDPAPERS "
                    "where bibref_table = %s and bibref_value = %s and bibrec = %s "
                    "order by lcul desc limit 0,1",
                    (table, ref, rec))
 
     if len(prow) == 0:
         return ((acc_authorize_action(uid, bconfig.CLAIMPAPER_CLAIM_OWN_PAPERS)[0] == 0) or
                 (acc_authorize_action(uid, bconfig.CLAIMPAPER_CLAIM_OTHERS_PAPERS)[0] == 0))
 
     min_req_acc_n = int(prow[0][1])
     req_acc = resolve_paper_access_right(bconfig.CLAIMPAPER_CLAIM_OWN_PAPERS)
     pid_uid = run_sql("select data from aidPERSONIDDATA where tag = %s and personid = %s", ('uid', str(prow[0][0])))
     if len(pid_uid) > 0:
         if (str(pid_uid[0][0]) != str(uid)) and min_req_acc_n > 0:
             req_acc = resolve_paper_access_right(bconfig.CLAIMPAPER_CLAIM_OTHERS_PAPERS)
 
     if min_req_acc_n < req_acc:
         min_req_acc_n = req_acc
 
     min_req_acc = resolve_paper_access_right(min_req_acc_n)
 
     return (acc_authorize_action(uid, min_req_acc)[0] == 0) and (resolve_paper_access_right(min_req_acc) >= min_req_acc_n)
 
 
 def resolve_paper_access_right(acc):
     '''
     Given a string or an integer, resolves to the corresponding integer or string
     If asked for a wrong/not present parameter falls back to the minimum privilege.
     '''
     access_dict = {bconfig.CLAIMPAPER_VIEW_PID_UNIVERSE: 0,
                   bconfig.CLAIMPAPER_CLAIM_OWN_PAPERS: 25,
                   bconfig.CLAIMPAPER_CLAIM_OTHERS_PAPERS: 50}
 
     if isinstance(acc, str):
         try:
             return access_dict[acc]
         except:
             return 0
 
     inverse_dict = dict([[v, k] for k, v in access_dict.items()])
     lower_accs = [a for a in inverse_dict.keys() if a <= acc]
     try:
         return inverse_dict[max(lower_accs)]
     except:
         return bconfig.CLAIMPAPER_VIEW_PID_UNIVERSE
 
 
 def get_recently_modified_record_ids(date):
     '''
     Returns the bibrecs with modification date more recent then date.
     @param date: date
     '''
     return [p[0] for p in run_sql(
                "select id from bibrec where modification_date > %s", (date,))]
 
 
 def filter_modified_record_ids(bibrecs, date):
     '''
     Returns the bibrecs with modification date before the date.
     @param date: date
     '''
     return ifilter(
         lambda x: run_sql("select count(*) from bibrec "
                           "where id = %s and "
                           "modification_date < %s"
                           , (x[2], date))[0][0]
         , bibrecs)
 
 
 def get_cached_author_page(pageparam):
     '''
     Return cached authorpage
     @param: pageparam (int personid)
     @return (id, 'authorpage_cache', personid, authorpage_html, date_cached)
     '''
             #TABLE: id, tag, identifier, data, date
     caches = run_sql("select id, object_name, object_key, object_value, last_updated \
                       from aidCACHE \
                       where object_name='authorpage_cache' and object_key=%s",
                           (str(pageparam),))
     if len(caches) >= 1:
         return caches[0]
     else:
         return []
 
 def delete_cached_author_page(personid):
     '''
     Deletes from the author page cache the page concerning one person
     '''
     run_sql("delete from aidCACHE where object_name='authorpage_cache' and object_key=%s", (str(personid),))
 
 def update_cached_author_page_timestamp(pageparam):
     '''
     Updates cached author page timestamp
     @param pageparam: int personid
     '''
     #TABLE: id, tag, identifier, data, date
     run_sql("update aidCACHE set last_updated=now() where object_name='authorpage_cache' and object_key=%s", (str(pageparam),))
 
 
 def update_cached_author_page(pageparam, page):
     '''
     Updates cached author page, deleting old caches for same pageparam
     @param pageparam: int personid
     @param page: string html authorpage
     '''
             #TABLE: id, tag, identifier, data, date
     run_sql("delete from aidCACHE where object_name='authorpage_cache' and object_key=%s", (str(pageparam),))
     run_sql("insert into aidCACHE values (Null,'authorpage_cache',%s,%s,now())", (str(pageparam), str(page)))
 
 def get_user_log(transactionid='', userinfo='', personID='', action='', tag='', value='', comment='', only_most_recent=False):
     '''
     Get user log table entry matching all the given parameters; all of them are optional.
     IF no parameters are given retuns the complete log table
     @param transactionid: id of the transaction
     @param userinfo: user name or identifier
     @param personid: id of the person involved
     @param action: action
     @param tag: tag
     @param value: value
     @param comment: comment
     '''
     sql_query = ('select id,transactionid,timestamp,userinfo,personid,action,tag,value,comment ' +
                  'from aidUSERINPUTLOG where 1 ')
     if transactionid:
         sql_query += ' and transactionid=\'' + str(transactionid) + '\''
     if userinfo:
         sql_query += ' and userinfo=\'' + str(userinfo) + '\''
     if personID:
         sql_query += ' and personid=\'' + str(personID) + '\''
     if action:
         sql_query += ' and action=\'' + str(action) + '\''
     if tag:
         sql_query += ' and tag=\'' + str(tag) + '\''
     if value:
         sql_query += ' and value=\'' + str(value) + '\''
     if comment:
         sql_query += ' and comment=\'' + str(comment) + '\''
     if only_most_recent:
         sql_query += ' order by timestamp desc limit 0,1'
     return run_sql(sql_query)
 
 
 def list_2_SQL_str(items, f=lambda x: x):
     """
     Concatenates all items in items to a sql string using f.
     @param items: a set of items
     @param type items: X
     @param f: a function which transforms each item from items to string
     @param type f: X:->str
     @return: "(x1, x2, x3, ... xn)" for xi in items
     @return type: string
     """
     strs = (str(f(x)) for x in items)
     return "(%s)" % ", ".join(strs)
 
 
 def get_authors_from_paper(paper):
     '''
     selects all author bibrefs by a given papers
     '''
     fullbibrefs100 = run_sql("select id_bibxxx from bibrec_bib10x where id_bibrec=%s", (paper,))
     if len(fullbibrefs100) > 0:
         fullbibrefs100str = list_2_SQL_str(fullbibrefs100, lambda x: str(x[0]))
         return run_sql("select id from bib10x where tag='100__a' and id in %s" % (fullbibrefs100str,))
     return tuple()
 
 def get_coauthors_from_paper(paper):
     '''
     selects all coauthor bibrefs by a given papers
     '''
     fullbibrefs700 = run_sql("select id_bibxxx from bibrec_bib70x where id_bibrec=%s", (paper,))
     if len(fullbibrefs700) > 0:
         fullbibrefs700str = list_2_SQL_str(fullbibrefs700, lambda x: str(x[0]))
         return run_sql("select id from bib70x where tag='700__a' and id in %s" % (fullbibrefs700str,))
     return tuple()
 
 def get_bibrefrec_subset(table, papers, refs):
     table = "bibrec_bib%sx" % str(table)[:-1]
     contents = run_sql("select id_bibrec, id_bibxxx from %s" % table)
     papers = set(papers)
     refs = set(refs)
 
     # yes, there are duplicates and we must set them
     return set(ifilter(lambda x: x[0] in papers and x[1] in refs, contents))
 
 def get_deleted_papers():
     return run_sql("select o.id_bibrec from bibrec_bib98x o, "
                    "(select i.id as iid from bib98x i "
                    "where value = 'DELETED' "
                    "and tag like '980__a') as dummy "
                    "where o.id_bibxxx = dummy.iid")
 
 #bibauthorid_maintenance personid update private methods
 def update_personID_canonical_names(persons_list=None, overwrite=False, suggested=''):
     '''
     Updates the personID table creating or updating canonical names for persons
     @param: persons_list: persons to consider for the update  (('1'),)
     @param: overwrite: if to touch already existing canonical names
     @param: suggested: string to suggest a canonical name for the person
     '''
     if not persons_list:
         persons_list = [x[0] for x in run_sql('select distinct personid from aidPERSONIDPAPERS')]
 
     for idx, pid in enumerate(persons_list):
         update_status(float(idx) / float(len(persons_list)), "Updating canonical_names...")
         current_canonical = run_sql("select data from aidPERSONIDDATA where "
                                     "personid=%s and tag=%s", (pid, 'canonical_name'))
 
         if overwrite or len(current_canonical) == 0:
             names = get_person_names_count(pid)
 
             names = sorted(names, key=lambda k: k[1], reverse=True)
             if len(names) < 1 and not suggested:
                 continue
             else:
                 if suggested:
                     canonical_name = suggested
                 else:
                     canonical_name = create_canonical_name(names[0][0])
 
                 run_sql("delete from aidPERSONIDDATA where personid=%s and tag=%s",
                         (pid, 'canonical_name'))
 
                 existing_cnames = run_sql("select data from aidPERSONIDDATA "
                                           "where tag=%s and data like %s",
                                           ('canonical_name', str(canonical_name) + '%'))
 
                 existing_cnames = set(name[0] for name in existing_cnames)
                 for i in count(1):
                     cur_try = canonical_name + '.' + str(i)
                     if cur_try not in existing_cnames:
                         canonical_name = cur_try
                         break
 
                 run_sql("insert into aidPERSONIDDATA (personid, tag, data) values (%s,%s,%s) ",
                          (pid, 'canonical_name', canonical_name))
 
     update_status_final("Updating canonical_names finished.")
 
 
 def personid_get_recids_affected_since(last_timestamp):
     '''
     Returns a list of recids which have been manually changed since timestamp
     @TODO: extend the system to track and signal even automatic updates (unless a full reindex is
         acceptable in case of magic automatic update)
     @param: last_timestamp: last update, datetime.datetime
     '''
     vset = set(int(v[0].split(',')[1]) for v in run_sql(
                "select distinct value from aidUSERINPUTLOG "
                "where timestamp > %s", (last_timestamp,))
                if ',' in v[0] and ':' in v[0])
 
     pids = set(int(p[0]) for p in run_sql(
                "select distinct personid from aidUSERINPUTLOG "
                "where timestamp > %s", (last_timestamp,))
                if p[0] > 0)
 
     if pids:
         pids_s = list_2_SQL_str(pids)
         vset |= set(int(b[0]) for b in run_sql(
                     "select bibrec from aidPERSONIDPAPERS "
                     "where personid in %s" % pids_s))
 
     return list(vset) # I'm not sure about this cast. It might work without it.
 
 
 def get_all_paper_records(pid, claimed_only=False):
     if not claimed_only:
         return run_sql("SELECT distinct bibrec FROM aidPERSONIDPAPERS WHERE personid = %s", (str(pid),))
     else:
         return run_sql("SELECT distinct bibrec FROM aidPERSONIDPAPERS WHERE "
                        "personid = %s and flag=2 or flag=-2", (str(pid),))
 
 
 def get_all_names_from_personid():
     return ((name[0][0], set(n[1] for n in name), len(name))
              for name in (run_sql(
                  "SELECT personid, name "
                  "FROM aidPERSONIDPAPERS "
                  "WHERE personid = %s "
                  "AND flag > -2", p)
                  for p in run_sql(
                     "SELECT DISTINCT personid "
                     "FROM aidPERSONIDPAPERS "
                     "WHERE flag > -2")
                  ))
 
 
 def get_grouped_records(bibrefrec, *args):
     '''
     By a given bibrefrec: mark:ref,rec this function will scan
     bibmarkx table and extract all records with tag in argc, which
     are grouped togerther with this bibrec.
 
     Returns a dictionary with { tag : [extracted_values] }
     if the values is not found.
 
     @type bibrefrec: (mark(int), ref(int), rec(int))
     '''
     table, ref, rec = bibrefrec
 
     target_table = "bib%sx" % (str(table)[:-1])
     mapping_table = "bibrec_%s" % target_table
 
     group_id = run_sql("SELECT field_number "
                        "FROM %s "
                        "WHERE id_bibrec = %d "
                        "AND id_bibxxx = %d" %
                        (mapping_table, rec, ref))
 
     if len(group_id) == 0:
         # unfortunately the mapping is not found, so
         # we cannot find anything
         return dict((arg, []) for arg in args)
     elif len(group_id) == 1:
         # All is fine
         field_number = group_id[0][0]
     else:
         # sounds bad, but ignore the error
         field_number = group_id[0][0]
 
     grouped = run_sql("SELECT id_bibxxx "
                       "FROM %s "
                       "WHERE id_bibrec = %d "
                       "AND field_number = %d" %
                       (mapping_table, rec, int(field_number)))
     assert len(grouped) > 0
     grouped_s = list_2_SQL_str(grouped, lambda x: str(x[0]))
 
     ret = {}
     for arg in args:
         qry = run_sql("SELECT value "
                       "FROM %s "
                       "WHERE tag LIKE '%s' "
                       "AND id IN %s" %
                       (target_table, arg, grouped_s))
         ret[arg] = [q[0] for q in qry]
 
     return ret
 
 def get_name_by_bibrecref(bib):
     '''
     @param bib: bibrefrec or bibref
     @type bib: (mark, bibref, bibrec) OR (mark, bibref)
     '''
     table = "bib%sx" % (str(bib[0])[:-1])
     refid = bib[1]
     tag = "%s__a" % bib[0]
     ret = run_sql("select value from %s where id = '%s' and tag = '%s'" % (table, refid, tag))
 
     # if zero - check if the garbage collector has run
     assert len(ret) == 1
     return ret[0][0]
 
 
 def get_collaboration(bibrec):
     bibxxx = run_sql("select id_bibxxx from bibrec_bib71x where id_bibrec = %s", (str(bibrec),))
 
     if len(bibxxx) == 0:
         return ()
 
     bibxxx = list_2_SQL_str(bibxxx, lambda x: str(x[0]))
 
     ret = run_sql("select value from bib71x where id in %s and tag like '%s'" % (bibxxx, "710__g"))
     return [r[0] for r in ret]
 
 
 def get_key_words(bibrec):
     if bconfig.CFG_ADS_SITE:
         bibxxx = run_sql("select id_bibxxx from bibrec_bib65x where id_bibrec = %s", (str(bibrec),))
     else:
         bibxxx = run_sql("select id_bibxxx from bibrec_bib69x where id_bibrec = %s", (str(bibrec),))
 
     if len(bibxxx) == 0:
         return ()
 
     bibxxx = list_2_SQL_str(bibxxx, lambda x: str(x[0]))
 
     if bconfig.CFG_ADS_SITE:
         ret = run_sql("select value from bib69x where id in %s and tag like '%s'" % (bibxxx, "6531_a"))
     else:
         ret = run_sql("select value from bib69x where id in %s and tag like '%s'" % (bibxxx, "695__a"))
 
     return [r[0] for r in ret]
 
 
 def get_all_authors(bibrec):
     bibxxx_1 = run_sql("select id_bibxxx from bibrec_bib10x where id_bibrec = %s", (str(bibrec),))
     bibxxx_7 = run_sql("select id_bibxxx from bibrec_bib70x where id_bibrec = %s", (str(bibrec),))
 
     if bibxxx_1:
         bibxxxs_1 = list_2_SQL_str(bibxxx_1, lambda x: str(x[0]))
         authors_1 = run_sql("select value from bib10x where tag = '%s' and id in %s" % ('100__a', bibxxxs_1,))
     else:
         authors_1 = []
 
     if bibxxx_7:
         bibxxxs_7 = list_2_SQL_str(bibxxx_7, lambda x: str(x[0]))
         authors_7 = run_sql("select value from bib70x where tag = '%s' and id in %s" % ('700__a', bibxxxs_7,))
     else:
         authors_7 = []
 
     return [a[0] for a in authors_1] + [a[0] for a in authors_7]
 
 
 def get_title_from_rec(rec):
     """
     Returns the name of the paper like str if found.
     Otherwise returns None.
     """
     title_id = run_sql("SELECT id_bibxxx "
                         "FROM bibrec_bib24x "
                         "WHERE id_bibrec = %s",
                         (rec,))
 
     if title_id:
         title_id_s = list_2_SQL_str(title_id, lambda x: x[0])
         title = run_sql("SELECT value "
                         "FROM bib24x "
                         "WHERE id in %s "
                         "AND tag = '245__a'"
                         % title_id_s)
 
         if title:
             return title[0][0]
 
 def get_bib10x():
     return run_sql("select id, value from bib10x where tag like %s", ("100__a",))
 
 
 def get_bib70x():
     return run_sql("select id, value from bib70x where tag like %s", ("700__a",))
 
 
 class bib_matrix:
     '''
     This small class contains the sparse matrix
     and encapsulates it.
     '''
     # please increment this value every time you
     # change the output of the comparison functions
     current_comparison_version = 9
 
     special_items = ((None, -3., 'N'), ('+', -2., '+'), ('-', -1., '-'))
     special_symbols = dict((x[0], (x[1], x[2])) for x in special_items)
     special_numbers = dict((x[1], (x[0], x[2])) for x in special_items)
     special_strings = dict((x[2], (x[0], x[1])) for x in special_items)
 
     def __init__(self, cluster_set=None):
         if cluster_set:
             bibs = chain(*(cl.bibs for cl in cluster_set.clusters))
             self._bibmap = dict((b[1], b[0]) for b in enumerate(bibs))
             width = len(self._bibmap)
             size = ((width - 1) * width) / 2
             self._matrix = bib_matrix.create_empty_matrix(size)
         else:
             self._bibmap = dict()
 
     @staticmethod
     def create_empty_matrix(lenght):
         ret = numpy.ndarray(shape=(lenght, 2), dtype=float, order='C')
         ret.fill(bib_matrix.special_symbols[None][0])
         return ret
 
     def _resolve_entry(self, bibs):
         entry = sorted(self._bibmap[bib] for bib in bibs)
         assert entry[0] < entry[1]
         return entry[0] + ((entry[1] - 1) * entry[1]) / 2
 
     def __setitem__(self, bibs, val):
         entry = self._resolve_entry(bibs)
 
         if val in self.special_symbols:
             num = self.special_symbols[val][0]
             val = (num, num)
 
         self._matrix[entry] = val
 
     def __getitem__(self, bibs):
         entry = self._resolve_entry(bibs)
         ret = self._matrix[entry]
         if ret[0] in self.special_numbers:
             return self.special_numbers[ret[0]][0]
         return ret[0], ret[1]
 
     def __contains__(self, bib):
         return bib in self._bibmap
 
     def get_keys(self):
         return self._bibmap.keys()
 
     @staticmethod
     def __pickle_tuple(tupy):
         '''
         tupy can be a very special iterable. It may contain:
             * (float, float)
             * None
             * '+', '-' or '?'
         '''
         def to_str(elem):
             if elem[0] in bib_matrix.special_numbers:
                 return "%s" % bib_matrix.special_numbers[elem[0]][1]
             return "%.2f:%.2f" % (elem[0], elem[1])
 
         return "|".join(imap(to_str, tupy))
 
     @staticmethod
     def __unpickle_tuple(tupy):
         '''
         tupy must be an object created by pickle_tuple.
         '''
         def from_str(elem):
             if elem in bib_matrix.special_strings:
                 nummy = bib_matrix.special_strings[elem][1]
                 return (nummy, nummy)
             fls = elem.split(":")
             assert len(fls) == 2
             return (float(fls[0]), float(fls[1]))
 
         strs = tupy.split("|")
         if strs == ['']:
             strs = []
         ret = bib_matrix.create_empty_matrix(len(strs))
         for i, stri in enumerate(strs):
             if i % 100000 == 0:
                 update_status(float(i) / len(strs), "Loading the cache...")
             ret[i][0], ret[i][1] = from_str(stri)
         update_status_final("Probability matrix loaded.")
         return ret
 
     def load(self, name):
         '''
         This method will load the matrix from the
         database.
         '''
         row = run_sql("select bibmap, matrix "
                       "from aidPROBCACHE "
                       "where cluster like %s",
                       (name,))
         if len(row) == 0:
             self._bibmap = dict()
             return False
         elif len(row) == 1:
             bibmap_vs = zlib.decompress(row[0][0])
             bibmap_v = cPickle.loads(bibmap_vs)
             rec_v, self.creation_time, self._bibmap = bibmap_v
             if (rec_v != bib_matrix.current_comparison_version or
                 bib_matrix.current_comparison_version < 0): # you can use negative
                                                             # version to recalculate
                 self._bibmap = dict()
                 return False
 
             matrix_s = zlib.decompress(row[0][1])
             self._matrix = bib_matrix.__unpickle_tuple(matrix_s)
             if self._bibmap and self._matrix != None:
                 if len(self._bibmap) * (len(self._bibmap) - 1) / 2 != len(self._matrix):
                     print >> sys.stderr, ("Error: aidPROBCACHE is corrupted! "
                                           "Cluster %s has bibmap with %d bibs, "
                                           "but matrix with %d entries."
                                           % (name, len(self._bibmap), len(self._matrix)))
                     print >> sys.stderr, "Try to increase max_packet_size."
                     assert False, "Bibmap: %d, Matrix %d" % (len(self._bibmap), len(self._matrix))
                     return False
                 return True
             else:
                 self._bibmap = dict()
                 return False
         else:
             assert False, "aidPROBCACHE is corrupted"
             self._bibmap = dict()
             return False
 
     def store(self, name, creation_time):
         bibmap_v = (bib_matrix.current_comparison_version, creation_time, self._bibmap)
         bibmap_vs = cPickle.dumps(bibmap_v)
         bibmap_vsc = zlib.compress(bibmap_vs)
 
         matrix_s = bib_matrix.__pickle_tuple(self._matrix)
         matrix_sc = zlib.compress(matrix_s)
 
         run_sql("delete from aidPROBCACHE where cluster like %s", (name,))
         run_sql("insert low_priority "
                 "into aidPROBCACHE "
                 "set cluster = %s, "
                 "bibmap = %s, "
                 "matrix = %s",
                 (name, bibmap_vsc, matrix_sc))
 
 
 def delete_paper_from_personid(rec):
     '''
     Deletes all information in PERSONID about a given paper
     '''
     run_sql("delete from aidPERSONIDPAPERS where bibrec = %s", (rec,))
 
 
 def get_signatures_from_rec(bibrec):
     '''
     Retrieves all information in PERSONID
     about a given bibrec.
     '''
     return run_sql("select personid, bibref_table, bibref_value, bibrec, name "
                    "from aidPERSONIDPAPERS where bibrec = %s"
                    , (bibrec,))
 
 
 def modify_signature(oldref, oldrec, newref, newname):
     '''
     Modifies a signature in aidPERSONIDpapers.
     '''
     return run_sql("UPDATE aidPERSONIDPAPERS "
                    "SET bibref_table = %s, bibref_value = %s, name = %s "
                    "WHERE bibref_table = %s AND bibref_value = %s AND bibrec = %s"
                    , (str(newref[0]), newref[1], newname,
                       str(oldref[0]), oldref[1], oldrec))
 
 
 def find_pids_by_name(name):
     '''
     Finds names and personids by a prefix name.
     '''
     return set(run_sql("SELECT personid, name "
                        "FROM aidPERSONIDPAPERS "
                        "WHERE name like %s"
                        , (name + ',%',)))
 
 def find_pids_by_exact_name(name):
     """
     Finds names and personids by a name.
     """
     return set(run_sql("SELECT personid "
                    "FROM aidPERSONIDPAPERS "
                    "WHERE name = %s"
                    , (name,)))
 
 def remove_sigs(signatures):
     '''
     Removes records from aidPERSONIDPAPERS
     '''
     for sig in signatures:
         run_sql("DELETE FROM aidPERSONIDPAPERS "
                 "WHERE bibref_table like %s AND bibref_value = %s AND bibrec = %s"
                 , (str(sig[0]), sig[1], sig[2]))
 
 
 def remove_personid_papers(pids):
     '''
     Removes all signatures from aidPERSONIDPAPERS with pid in pids
     '''
     if pids:
         run_sql("delete from aidPERSONIDPAPERS where personid in %s"
                 % list_2_SQL_str(pids))
 
 
 def get_full_personid_papers(table_name="`aidPERSONIDPAPERS`"):
     '''
     Get all columns and rows from aidPERSONIDPAPERS
     or any other table with the same structure.
     '''
     return run_sql("select personid, bibref_table, "
                    "bibref_value, bibrec, name, flag, "
                    "lcul from %s" % table_name)
 
 
 def get_full_results():
     '''
     Depricated. Should be removed soon.
     '''
     return run_sql("select personid, bibref_table, bibref_value, bibrec "
                    "from aidRESULTS")
 
 
 def get_lastname_results(last_name):
     '''
     Returns rows from aidRESULTS which share a common last name.
     '''
     return run_sql("select personid, bibref_table, bibref_value, bibrec "
                    "from aidRESULTS "
                    "where personid like '" + last_name + ".%'")
 
 
 
 def get_full_personid_data(table_name="`aidPERSONIDDATA`"):
     '''
     Get all columns and rows from aidPERSONIDDATA
     or any other table with the same structure.
     '''
     return run_sql("select personid, tag, data, "
                    "opt1, opt2, opt3 from %s" % table_name)
 
 
 def get_wrong_names():
     '''
     Returns a generator with all wrong names in aidPERSONIDPAPERS.
     Every element is (table, ref, correct_name).
     '''
 
     bib100 = dict(((x[0], create_normalized_name(split_name_parts(x[1]))) for x in get_bib10x()))
     bib700 = dict(((x[0], create_normalized_name(split_name_parts(x[1]))) for x in get_bib70x()))
 
     pidnames100 = run_sql("select distinct  bibref_value, name from aidPERSONIDPAPERS "
                           " where bibref_table='100'")
     pidnames700 = run_sql("select distinct  bibref_value, name from aidPERSONIDPAPERS "
                           " where bibref_table='700'")
 
     wrong100 = set(('100', x[0], bib100.get(x[0], None)) for x in pidnames100 if x[1] != bib100.get(x[0], None))
     wrong700 = set(('700', x[0], bib700.get(x[0], None)) for x in pidnames700 if x[1] != bib700.get(x[0], None))
 
     total = len(wrong100) + len(wrong700)
 
     return chain(wrong100, wrong700), total
 
 
 def check_personid_papers(output_file=None):
     '''
     Checks all invariants of personid.
     Writes in stdout if output_file if False.
     '''
 
     if output_file:
         fp = open(output_file, "w")
         printer = lambda x: fp.write(x + '\n')
     else:
         printer = bibauthor_print
 
     checkers = (check_duplicated_papers,
                 check_duplicated_signatures,
                 check_wrong_names,
                 check_canonical_names,
                 check_empty_personids,
                 check_wrong_rejection,
 #                check_claim_ispireid_contradiction,
                )
 
     # Avoid writing f(a) or g(a), because one of the calls
     # might be optimized.
     return all([check(printer) for check in checkers])
 
 
 def check_duplicated_papers(printer):
     ret = True
     pids = run_sql("select distinct personid from aidPERSONIDPAPERS")
     for pid in pids:
         pid = pid[0]
         recs = run_sql("select bibrec from aidPERSONIDPAPERS where personid = %s and flag <> %s", (pid, -2))
         recs = [rec[0] for rec in recs]
         for rec in set(recs):
             recs.remove(rec)
 
         if recs:
             ret = False
             printer("Person %d has duplicated papers: %s" % (pid, str(tuple(set(recs)))))
 
     return ret
 
 
 def check_duplicated_signatures(printer):
     ret = True
     recs = run_sql("select distinct bibrec from aidPERSONIDPAPERS")
     for rec in recs:
         rec = rec[0]
         refs = list(run_sql("select bibref_table, bibref_value from aidPERSONIDPAPERS where bibrec = %s and flag > %s", (rec, "-2")))
 
         for ref in set(refs):
             refs.remove(ref)
 
         if refs:
             ret = False
             refs = sorted(refs)
             refs = groupby(refs)
             refs = ["Found %s:%s %d times." % (key[0], key[1], len(list(data)) + 1) for key, data in refs]
             printer("Paper %d has duplicated signatures:" % rec)
             for ref in refs:
                 printer("\t%s" % ref)
 
     return ret
 
 
 def check_wrong_names(printer):
     ret = True
     wrong_names, number = get_wrong_names()
 
     if number > 0:
         ret = False
         printer("%d corrupted names in aidPERSONIDPAPERS." % number)
         for wrong_name in wrong_names:
             if wrong_name[2]:
                 printer("Outdated name, '%s'(%s:%d)." % (wrong_name[2], wrong_name[0], wrong_name[1]))
             else:
                 printer("Invalid id(%s:%d)." % (wrong_name[0], wrong_name[1]))
 
     return ret
 
 
 def check_canonical_names(printer):
     ret = True
 
     pid_cn = run_sql("select personid, data from aidPERSONIDDATA where tag = %s", ('canonical_name',))
     pid_2_cn = dict((k, len(list(d))) for k, d in groupby(sorted(pid_cn, key=itemgetter(0)), key=itemgetter(0)))
 
     for pid in get_existing_personids():
         canon = pid_2_cn.get(pid, 0)
 
         if canon != 1:
             if canon == 0:
                 papers = run_sql("select count(*) from aidPERSONIDPAPERS where personid = %s", (pid,))[0][0]
                 if papers != 0:
                     printer("Personid %d does not have a canonical name, but have %d papers." % (pid, papers))
                     ret = False
             else:
                 printer("Personid %d has %d canonical names.", (pid, canon))
                 ret = False
 
     return ret
 
 
 def check_empty_personids(printer):
     ret = True
 
     paper_pids = set(p[0] for p in run_sql("select personid from aidPERSONIDPAPERS"))
     data_pids = set(p[0] for p in run_sql("select personid from aidPERSONIDDATA"))
 
     for p in data_pids - paper_pids:
         fields = run_sql("select count(*) from aidPERSONIDDATA where personid = %s and tag <> %s", (p, "canonical_name",))[0][0]
 
         if fields == 0:
             printer("Personid %d has no papers and nothing else than canonical_name." % p)
             ret = False
 
     return ret
 
 def check_wrong_rejection(printer):
     ret = True
 
     all_rejections = run_sql("select personid, bibref_table, bibref_value, bibrec "
                              "from aidPERSONIDPAPERS "
                              "where flag = %s",
                              ('-2',))
 
     for rej in all_rejections:
         sigs = run_sql("select personid from aidPERSONIDPAPERS "
                        "where bibref_table = %s "
                        "and bibref_value = %s "
                        "and bibrec = %s "
                        "and flag <> '-2'", rej[1:])
 
         # To avoid duplication of error messages don't complain
         # if the papers is assigned to more than one personids.
         if not sigs:
             printer("The paper (%s:%s,%s) was rejected from person %d, but never assigned or claimed." % (rej[1:] + rej[:1]))
             ret = False
 
         elif rej[1] in sigs:
             printer("Personid %d has both assigned and rejected paper (%s:%s,%s)." % rej)
             ret = False
 
     return ret
 
 
 def check_merger():
     '''
     This function presumes that copy_personid was
     called before the merger.
     '''
     is_ok = True
 
     old_claims = set(run_sql("select personid, bibref_table, bibref_value, bibrec, flag "
                              "from aidPERSONIDPAPERS_copy "
                              "where flag = -2 or flag = 2"))
 
     cur_claims = set(run_sql("select personid, bibref_table, bibref_value, bibrec, flag "
                              "from aidPERSONIDPAPERS "
                              "where flag = -2 or flag = 2"))
 
     errors = ((old_claims - cur_claims, "Some claims were lost during the merge."),
               (cur_claims - old_claims, "Some new claims appeared after the merge."))
     act = { -2 : 'Rejection', 2 : 'Claim' }
 
     for err_set, err_msg in errors:
         if err_set:
             is_ok = False
             bibauthor_print(err_msg)
             bibauthor_print("".join("    %s: personid %d %d:%d,%d\n" %
                             (act[cl[6]], cl[0], int(cl[1]), cl[2], cl[3]) for cl in err_set))
 
     old_assigned = set(run_sql("select bibref_table, bibref_value, bibrec "
                                "from aidPERSONIDPAPERS_copy"))
                                #"where flag <> -2 and flag <> 2"))
 
     cur_assigned = set(run_sql("select bibref_table, bibref_value, bibrec "
                                "from aidPERSONIDPAPERS"))
                                #"where flag <> -2 and flag <> 2"))
 
     errors = ((old_assigned - cur_assigned, "Some signatures were lost during the merge."),
               (cur_assigned - old_assigned, "Some new signatures appeared after the merge."))
 
     for err_sig, err_msg in errors:
         if err_sig:
             is_ok = False
             bibauthor_print(err_msg)
             bibauthor_print("".join("    %s:%d,%d\n" % sig for sig in err_sig))
 
     return is_ok
 
 def check_results():
     is_ok = True
 
     all_result_rows = run_sql("select * from aidRESULTS")
 
     keyfunc = lambda x: x[1:]
     duplicated = (d for d in (list(d) for k, d in groupby(sorted(all_result_rows, key=keyfunc), key=keyfunc)) if len(d) > 1)
 
     for dd in duplicated:
         is_ok = False
         for d in dd:
             print "%s %s %s %s" % d
         print
 
 
     clusters = {}
     for rr in all_result_rows:
         clusters[rr[0]] = clusters.get(rr[0], []) + [rr[3]]
 
     faulty_clusters = dict((cid, len(recs) - len(set(recs)))
                                for cid, recs in clusters.items()
                                    if not len(recs) == len(set(recs)))
 
     if faulty_clusters:
         is_ok = False
         print "Recids NOT unique in clusters!"
         print ("A total of %s clusters hold an average of %.2f duplicates" %
                (len(faulty_clusters), (sum(faulty_clusters.values()) / float(len(faulty_clusters)))))
 
         for c in faulty_clusters:
             print "Name: %-20s      Size: %4d      Faulty: %2d" % (c, len(clusters[c]), faulty_clusters[c])
 
     return is_ok
 
 def check_claim_inspireid_contradiction():
     iids10x = run_sql("select id from bib10x where tag = '100__i'")
     iids70x = run_sql("select id from bib70x where tag = '700__i'")
 
     refs10x = set(x[0] for x in run_sql("select id from bib10x where tag = '100__a'"))
     refs70x = set(x[0] for x in run_sql("select id from bib70x where tag = '700__a'"))
 
     if iids10x:
         iids10x = list_2_SQL_str(iids10x, lambda x: str(x[0]))
 
         iids10x = run_sql("select id_bibxxx, id_bibrec, field_number "
                           "from bibrec_bib10x "
                           "where id_bibxxx in %s"
                           % iids10x)
 
         iids10x = ((row[0], [(ref, rec) for ref, rec in run_sql(
                                 "select id_bibxxx, id_bibrec "
                                 "from bibrec_bib10x "
                                 "where id_bibrec = '%s' "
                                 "and field_number = '%s'"
                                 % row[1:])
                                if ref in refs10x])
                       for row in iids10x)
     else:
         iids10x = ()
 
     if iids70x:
         iids70x = list_2_SQL_str(iids70x, lambda x: str(x[0]))
 
         iids70x = run_sql("select id_bibxxx, id_bibrec, field_number "
                           "from bibrec_bib70x "
                           "where id_bibxxx in %s"
                           % iids70x)
 
         iids70x = ((row[0], [(ref, rec) for ref, rec in run_sql(
                                 "select id_bibxxx, id_bibrec "
                                 "from bibrec_bib70x "
                                 "where id_bibrec = '%s' "
                                 "and field_number = '%s'"
                                 % (row[1:]))
                                if ref in refs70x])
                       for row in iids70x)
     else:
         iids70x = ()
 
     # [(iids, [bibs])]
     inspired = list(chain(((iid, list(set(('100', ) + bib for bib in bibs))) for iid, bibs in iids10x),
                           ((iid, list(set(('700', ) + bib for bib in bibs))) for iid, bibs in iids70x)))
 
     assert all(len(x[1]) == 1 for x in inspired)
 
     inspired = ((k, map(itemgetter(0), map(itemgetter(1), d)))
                     for k, d in groupby(sorted(inspired, key=itemgetter(0)), key=itemgetter(0)))
 
     # [(inspireid, [bibs])]
     inspired = [([(run_sql("select personid "
                             "from aidPERSONIDPAPERS "
                             "where bibref_table = %s "
                             "and bibref_value = %s "
                             "and bibrec = %s "
                             "and flag = '2'"
                             , bib), bib)
                         for bib in cluster[1]], cluster[0])
                     for cluster in inspired]
 
     # [([([pid], bibs)], inspireid)]
     for cluster, iid in inspired:
         pids = set(chain.from_iterable(imap(itemgetter(0), cluster)))
 
         if len(pids) > 1:
             print "InspireID: %s links the following papers:" % iid
             print map(itemgetter(1), cluster)
             print "More than one personid claimed them:"
             print list(pids)
             print
             continue
 
         if len(pids) == 0:
             # not even one paper with this inspireid has been
             # claimed, screw it
             continue
 
         pid = list(pids)[0][0]
 
         # The last step is to check all non-claimed papers for being
         # claimed by the person on some different signature.
         problem = (run_sql("select bibref_table, bibref_value, bibrec "
                            "from aidPERSONIDPAPERS "
                            "where bibrec = %s "
                            "and personid = %s "
                            "and flag = %s"
                            , (bib[2], pid, 2))
                        for bib in (bib for lpid, bib in cluster if not lpid))
         problem = list(chain.from_iterable(problem))
 
         if problem:
             print "A personid has claimed a paper from an inspireid cluster and a contradictory paper."
             print "Personid %d" % pid
             print "Inspireid cluster %s" % str(map(itemgetter(1), cluster))
             print "Contradicting claims: %s" % str(problem)
             print
 
 
 def repair_personid():
     '''
     This should make check_personid_papers() to return true.
     '''
     pids = run_sql("select distinct personid from aidPERSONIDPAPERS")
     lpids = len(pids)
     for i, pid in enumerate((p[0] for p in pids)):
         update_status(float(i) / lpids, "Checking per-pid...")
         rows = run_sql("select bibrec, bibref_table, bibref_value, flag "
                        "from aidPERSONIDPAPERS where personid = %s", (pid,))
 
         rows = ((k, list(d))
                 for k, d in groupby(sorted(rows, key=itemgetter(0)), itemgetter(0)))
 
         for rec, sigs in rows:
             if len(sigs) > 1:
                 claimed = [sig for sig in sigs if sig[3] > 1]
                 rejected = [sig for sig in sigs if sig[3] < -1]
 
                 if len(claimed) == 1:
                     sigs.remove(claimed[0])
                 elif len(claimed) == 0 and len(rejected) == 1:
                     sigs.remove(rejected[0])
 
                 for sig in set(sigs):
                     run_sql("delete from aidPERSONIDPAPERS "
                             "where personid = %s "
                             "and bibrec = %s "
                             "and bibref_table = %s "
                             "and bibref_value = %s "
                             "and flag = %s"
                             , (pid, sig[0], sig[1], sig[2], sig[3]))
     update_status_final("Done with per-pid fixing.")
 
     recs = run_sql("select distinct bibrec from aidPERSONIDPAPERS")
     lrecs = len(recs)
     for i, rec in enumerate((r[0] for r in recs)):
         update_status(float(i) / lrecs, "Checking per-rec...")
         rows = run_sql("select bibref_table, bibref_value, flag from aidPERSONIDPAPERS "
                        "where bibrec = %s", (rec,))
         kfuc = itemgetter(slice(0, 2))
         rows = ((k, map(itemgetter(2), d)) for k, d in groupby(sorted(rows), kfuc))
 
         for bibref, flags in rows:
             if len(flags) > 1:
                 claimed = sum(1 for f in flags if f > 1)
                 rejected = sum(1 for f in flags if f < -1)
 
                 if claimed == 1:
                     run_sql("delete from aidPERSONIDPAPERS "
                             "where bibrec = %s "
                             "and bibref_table = %s "
                             "and bibref_value = %s "
                             "and flag <> %s"
                             , (rec, bibref[0], bibref[1], 2))
                 elif claimed == 0 and rejected == 1:
                     run_sql("delete from aidPERSONIDPAPERS "
                             "where bibrec = %s "
                             "and bibref_table = %s "
                             "and bibref_value = %s "
                             "and flag <> %s"
                             , (rec, bibref[0], bibref[1], -2))
                 else:
                     run_sql("delete from aidPERSONIDPAPERS "
                             "where bibrec = %s "
                             "and bibref_table = %s "
                             "and bibref_value = %s"
                             , (rec, bibref[0], bibref[1]))
     update_status_final("Done with per-rec fixing.")
 
     update_status(0 / 1, "Fixing wrong names...")
     wrong_names, number = get_wrong_names()
     for i, w in enumerate(wrong_names):
         update_status(i / number, "Fixing wrong names...")
         if w[2]:
             run_sql("update aidPERSONIDPAPERS set name=%s where bibref_table=%s and bibref_value=%s",
                     (w[2], w[0], w[1]))
         else:
             run_sql("delete from aidPERSONIDPAPERS where bibref_table=%s and bibref_value=%s",
                     (w[0], w[1]))
 
     no_rejs = frozenset(run_sql("select bibref_table, bibref_value, bibrec from aidPERSONIDPAPERS where flag <> -2"))
     rejs = frozenset(run_sql("select bibref_table, bibref_value, bibrec from aidPERSONIDPAPERS where flag = -2"))
     floating_rejs = rejs - no_rejs
 
     update_personID_canonical_names(map(new_person_from_signature, floating_rejs))
 
     update_status_final("Fixed all wrong names.")
 
     update_status(0, "Checking missing canonical names...")
     paper_pids = run_sql("select distinct personid from aidPERSONIDPAPERS")
     cname_pids = run_sql("select distinct personid from aidPERSONIDDATA where tag='canonical_name'")
     missing_cnames = list(set(p[0] for p in paper_pids) - set(p[0] for p in cname_pids))
     npids = len(missing_cnames)
     for pid in missing_cnames:
         update_status(missing_cnames.index(pid) / float(npids), "Creating missing canonical names...")
         update_personID_canonical_names([pid])
     update_status_final("Done restoring canonical names.")
 
 
 def get_all_bibrecs():
     return [x[0] for x in run_sql("select distinct bibrec from aidPERSONIDPAPERS")]
 
 
 def remove_all_bibrecs(bibrecs):
     bibrecs_s = list_2_SQL_str(bibrecs)
     run_sql("delete from aidPERSONIDPAPERS where bibrec in %s" % bibrecs_s)
 
 
 def empty_results_table():
     run_sql("TRUNCATE aidRESULTS")
 
 
 def save_cluster(named_cluster):
     name, cluster = named_cluster
     for bib in cluster.bibs:
         run_sql("INSERT INTO aidRESULTS "
                 "(personid, bibref_table, bibref_value, bibrec) "
                 "VALUES (%s, %s, %s, %s) "
                 , (name, str(bib[0]), bib[1], bib[2]))
 
 
 def remove_result_cluster(name):
     run_sql("DELETE FROM aidRESULTS "
             "WHERE personid like '%s%%'"
             % name)
 
 
 def personid_name_from_signature(sig):
     ret = run_sql("select personid, name "
                   "from aidPERSONIDPAPERS "
                   "where bibref_table = %s and bibref_value = %s and bibrec = %s "
                   "and flag > '-2'"
                   , sig)
     assert len(ret) < 2, ret
     return ret
 
 def personid_from_signature(sig):
     ret = run_sql("select personid, flag "
                   "from aidPERSONIDPAPERS "
                   "where bibref_table = %s and bibref_value = %s and bibrec = %s "
                   "and flag > '-2'"
                   , sig)
     assert len(ret) < 2, ret
     return ret
 
 def in_results(name):
     return run_sql("select count(*) "
                    "from aidRESULTS "
                    "where personid like %s"
                    , (name + '.0',))[0][0] > 0
 
 def get_signature_info(sig):
     ret = run_sql("select personid, flag "
                   "from aidPERSONIDPAPERS "
                   "where bibref_table = %s and bibref_value = %s and bibrec = %s "
                   "order by flag"
                   , sig)
     return ret
 
 def get_claimed_papers(pid):
     return run_sql("select bibref_table, bibref_value, bibrec "
                    "from aidPERSONIDPAPERS "
                    "where personid = %s "
                    "and flag > %s",
                    (pid, 1))
 
 
 def copy_personids():
     run_sql("DROP TABLE IF EXISTS  `aidPERSONIDDATA_copy`")
     run_sql("CREATE TABLE `aidPERSONIDDATA_copy` ( "
             "`personid` BIGINT( 16 ) UNSIGNED NOT NULL , "
             "`tag` VARCHAR( 64 ) NOT NULL , "
             "`data` VARCHAR( 256 ) NOT NULL , "
             "`opt1` MEDIUMINT( 8 ) DEFAULT NULL , "
             "`opt2` MEDIUMINT( 8 ) DEFAULT NULL , "
             "`opt3` VARCHAR( 256 ) DEFAULT NULL , "
             "KEY  `personid-b` (  `personid` ) , "
             "KEY  `tag-b` (  `tag` ) , "
             "KEY  `data-b` (  `data` ) , "
             "KEY  `opt1` (  `opt1` ) "
             ") ENGINE = MYISAM DEFAULT CHARSET = utf8")
     run_sql("INSERT INTO `aidPERSONIDDATA_copy` "
             "SELECT * "
             "FROM `aidPERSONIDDATA`")
 
     run_sql("DROP TABLE IF EXISTS `aidPERSONIDPAPERS_copy`")
     run_sql("CREATE TABLE `aidPERSONIDPAPERS_copy` ( "
             "`personid` bigint( 16 ) unsigned NOT NULL , "
             "`bibref_table` enum( '100', '700' ) NOT NULL , "
             "`bibref_value` mediumint( 8 ) unsigned NOT NULL , "
             "`bibrec` mediumint( 8 ) unsigned NOT NULL , "
             "`name` varchar( 256 ) NOT NULL , "
             "`flag` smallint( 2 ) NOT NULL DEFAULT '0', "
             "`lcul` smallint( 2 ) NOT NULL DEFAULT '0', "
             "`last_updated` timestamp NOT NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP , "
             "KEY `personid-b` ( `personid` ) , "
             "KEY `reftable-b` ( `bibref_table` ) , "
             "KEY `refvalue-b` ( `bibref_value` ) , "
             "KEY `rec-b` ( `bibrec` ) , "
             "KEY `name-b` ( `name` ) , "
             "KEY `timestamp-b` ( `last_updated` ) , "
             "KEY `ptvrf-b` ( `personid` , `bibref_table` , `bibref_value` , `bibrec` , `flag` ) "
             ") ENGINE = MyISAM DEFAULT CHARSET = utf8")
     run_sql("INSERT INTO `aidPERSONIDPAPERS_copy` "
             "SELECT * "
             "FROM `aidPERSONIDPAPERS")
 
 
 def delete_empty_persons():
     pp = run_sql("select personid from aidPERSONIDPAPERS")
     pp = set(p[0] for p in pp)
     pd = run_sql("select personid from aidPERSONIDDATA")
     pd = set(p[0] for p in pd)
     fpd = run_sql("select personid from aidPERSONIDDATA where tag <> 'canonical_name'")
     fpd = set(p[0] for p in fpd)
     to_delete = pd - (pp | fpd)
     if to_delete:
         run_sql("delete from aidPERSONIDDATA where personid in %s" % list_2_SQL_str(to_delete))
 
 def restore_personids():
     run_sql("TRUNCATE  `aidPERSONIDDATA`")
     run_sql("INSERT INTO `aidPERSONIDDATA` "
             "SELECT * "
             "FROM `aidPERSONIDDATA_copy`")
 
     run_sql("TRUNCATE `aidPERSONIDPAPERS`")
     run_sql("INSERT INTO `aidPERSONIDPAPERS` "
             "SELECT * "
             "FROM `aidPERSONIDPAPERS_copy")
 
 
 def get_possible_personids_from_paperlist_old(bibrecreflist):
     '''
     @param bibrecreflist: list of bibrecref couples, (('100:123,123',),) or bibrecs (('123',),)
     returns a list of pids and connected bibrefs in order of number of bibrefs per pid
     [ [['1'],['123:123.123','123:123.123']] , [['2'],['123:123.123']] ]
     '''
 
     pid_bibrecref_dict = {}
     for b in bibrecreflist:
         pids = []
 
         try:
             pids = run_sql("select personid from aidPERSONID "
                     "use index (`tdf-b`) where tag=%s and data=%s", ('paper', str(b[0])))
         except (OperationalError, ProgrammingError):
             pids = run_sql("select personid from aidPERSONID "
                     "where tag=%s and data=%s", ('paper', str(b[0])))
 
         for pid in pids:
             if pid[0] in pid_bibrecref_dict:
                 pid_bibrecref_dict[pid[0]].append(str(b[0]))
             else:
                 pid_bibrecref_dict[pid[0]] = [str(b[0])]
 
     pid_list = [[i, pid_bibrecref_dict[i]] for i in pid_bibrecref_dict]
 
     return sorted(pid_list, key=lambda k: len(k[2]), reverse=True)
 
 
 def resolve_affiliation(ambiguous_aff_string):
     """
     This is a method available in the context of author disambiguation in ADS
     only. No other platform provides the db table used by this function.
     @warning: to be used in an ADS context only.
     @param ambiguous_aff_string: Ambiguous affiliation string
     @type ambiguous_aff_string: str
     @return: The normalized version of the name string as presented in the database
     @rtype: str
     """
     if not ambiguous_aff_string or not bconfig.CFG_ADS_SITE:
         return "None"
 
     aff_id = run_sql("select aff_id from ads_affiliations where affstring=%s", (ambiguous_aff_string,))
 
     if aff_id:
         return aff_id[0][0]
     else:
         return "None"