Page MenuHomec4science

dbinterface.py
No OneTemporary

File Metadata

Created
Thu, Nov 7, 10:27

dbinterface.py

# -*- coding: utf-8 -*-
##
## This file is part of Invenio.
## Copyright (C) 2011, 2012, 2013 CERN.
##
## Invenio is free software; you can redistribute it and/or
## modify it under the terms of the GNU General Public License as
## published by the Free Software Foundation; either version 2 of the
## License, or (at your option) any later version.
##
## Invenio is distributed in the hope that it will be useful, but
## WITHOUT ANY WARRANTY; without even the implied warranty of
## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
## General Public License for more details.
##
## You should have received a copy of the GNU General Public License
## along with Invenio; if not, write to the Free Software Foundation, Inc.,
## 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA.
'''
bibauthorid_bdinterface
This is the only file in bibauthorid which should
use the data base. It should have an interface for
all other files in the module.
'''
import invenio.bibauthorid_config as bconfig
import numpy
import cPickle
from cPickle import UnpicklingError
from invenio.utils.html import X
import os
import gc
#python2.4 compatibility
from invenio.bibauthorid_general_utils import bai_all as all
from itertools import groupby, count, ifilter, chain, imap
from operator import itemgetter
from invenio.legacy.search_engine import perform_request_search
from invenio.modules.access.engine import acc_authorize_action
from invenio.config import CFG_SITE_URL
from invenio.bibauthorid_name_utils import split_name_parts
from invenio.bibauthorid_name_utils import create_canonical_name
from invenio.bibauthorid_name_utils import create_normalized_name
from invenio.bibauthorid_general_utils import bibauthor_print
from invenio.bibauthorid_general_utils import update_status \
, update_status_final
from invenio.legacy.dbquery import run_sql
try:
from collections import defaultdict
except:
from invenio.utils.container import defaultdict
MARC_100_700_CACHE = None
COLLECT_INSPIRE_ID = bconfig.COLLECT_EXTERNAL_ID_INSPIREID
def get_sql_time():
'''
Returns the time according to the database. The type is datetime.datetime.
'''
return run_sql("select now()")[0][0]
def set_personid_row(person_id, tag, value, opt1=None, opt2=None, opt3=None):
'''
Inserts data and additional info into aidPERSONIDDATA
@param person_id:
@type person_id: int
@param tag:
@type tag: string
@param value:
@type value: string
@param opt1:
@type opt1: int
@param opt2:
@type opt2: int
@param opt3:
@type opt3: string
'''
run_sql("INSERT INTO aidPERSONIDDATA "
"(`personid`, `tag`, `data`, `opt1`, `opt2`, `opt3`) "
"VALUES (%s, %s, %s, %s, %s, %s)",
(person_id, tag, value, opt1, opt2, opt3))
def get_personid_row(person_id, tag):
'''
Returns all the records associated to a person and a tag.
@param person_id: id of the person to read the attribute from
@type person_id: int
@param tag: the tag to read.
@type tag: string
@return: the data associated with a virtual author
@rtype: tuple of tuples
'''
return run_sql("SELECT data, opt1, opt2, opt3 "
"data FROM aidPERSONIDDATA "
"WHERE personid = %s AND tag = %s",
(person_id, tag))
def del_personid_row(tag, person_id=None, value=None):
'''
Delete the value associated to the given tag for a certain person.
Can delete all tags regardless of person_id or value, or restrict the deletion using either of
both of them.
@param person_id: ID of the person
@type person_id: int
@param tag: tag to be updated
@type tag: string
@param value: value to be written for the tag
@type value: string
'''
if person_id:
if value:
run_sql("delete from aidPERSONIDDATA where personid=%s and tag=%s and data=%s", (person_id, tag, value,))
else:
run_sql("delete from aidPERSONIDDATA where personid=%s and tag=%s", (person_id, tag,))
else:
if value:
run_sql("delete from aidPERSONIDDATA where tag=%s and data=%s", (tag, value,))
else:
run_sql("delete from aidPERSONIDDATA where tag=%s", (tag,))
def get_all_papers_of_pids(personid_list):
'''
Get all papers of authors in a given list and sorts the results
by bibrefrec.
@param personid_list: list with the authors.
@type personid_list: iteratable of integers.
'''
if personid_list:
plist = list_2_SQL_str(personid_list)
paps = run_sql("select personid, bibref_table, bibref_value, bibrec, flag "
"from aidPERSONIDPAPERS "
"where personid in %s "
% plist)
inner = set(row[1:4] for row in paps if row[4] > -2)
return (x for x in paps if x[1:4] in inner)
return ()
def del_person_not_manually_claimed_papers(pid):
'''
Deletes papers from a person which have not been manually claimed.
@param pid:
@type pid: int
'''
run_sql("delete from aidPERSONIDPAPERS "
"where and (flag <> '-2' and flag <> '2') and personid=%s", (pid,))
def get_personid_from_uid(uid):
'''
Returns the personID associated with the provided ui.
If the personID is already associated with the person the secon parameter is True, false otherwise.
@param uid: userID
@type uid: ((int,),)
'''
pid = run_sql("select personid from aidPERSONIDDATA where tag=%s and data=%s", ('uid', str(uid[0][0])))
if len(pid) == 1:
return (pid[0], True)
else:
return ([-1], False)
def get_uid_from_personid(pid):
'''
Get the invenio user id associated to a pid if exists.
@param pid: person_id
@type pid: int
'''
uid = run_sql("select data from aidPERSONIDDATA where tag='uid' and personid = %s", (pid,))
if uid:
return uid[0][0]
else:
return None
def get_new_personid():
'''
Get a free personid number
'''
pids = (run_sql("select max(personid) from aidPERSONIDDATA")[0][0],
run_sql("select max(personid) from aidPERSONIDPAPERS")[0][0])
pids = tuple(int(p) for p in pids if p != None)
if len(pids) == 2:
return max(*pids) + 1
elif len(pids) == 1:
return pids[0] + 1
else:
return 0
def get_existing_personids(with_papers_only=False):
'''
Get a set of existing person_ids.
@param with_papers_only: if True, returns only ids holding papers discarding ids holding only information in aidPERSONIDDATA
@type with_papers_only: Bool
'''
if not with_papers_only:
try:
pids_data = set(map(int, zip(*run_sql("select distinct personid from aidPERSONIDDATA"))[0]))
except IndexError:
pids_data = set()
else:
pids_data = set()
try:
pids_pap = set(map(int, zip(*run_sql("select distinct personid from aidPERSONIDPAPERS"))[0]))
except IndexError:
pids_pap = set()
return pids_data | pids_pap
def get_existing_result_clusters():
'''
Get existing relult clusters, for private use of Tortoise and merger
'''
return run_sql("select distinct personid from aidRESULTS")
def create_new_person(uid= -1, uid_is_owner=False):
'''
Create a new person. Set the uid as owner if requested.
@param uid: User id to associate to the newly created person
@type uid: int
@param uid_is_owner: If true, the person will hold the uid as owned, otherwise the id is only remembered as the creator
@type uid_is_owner: bool
'''
pid = get_new_personid()
if uid_is_owner:
set_personid_row(pid, 'uid', str(uid))
else:
set_personid_row(pid, 'user-created', str(uid))
return pid
def create_new_person_from_uid(uid):
'''
Commodity stub for create_new_person(...)
@param uid: user id
@type uid: int
'''
return create_new_person(uid, uid_is_owner=True)
def new_person_from_signature(sig, name=None):
'''
Creates a new person from a signature.
@param sig: signature tuple ([100|700],bibref,bibrec)
@type sig: tuple
@param name:
@type name: string
'''
pid = get_new_personid()
add_signature(sig, name, pid)
return pid
def add_signature(sig, name, pid):
'''
Inserts a signature in personid.
@param sig: signature tuple
@type sig: tuple
@param name: name string
@type name: string
@param pid: personid to which assign the signature
@type pid: int
'''
if not name:
name = get_name_by_bibrecref(sig)
name = create_normalized_name(split_name_parts(name))
run_sql("INSERT INTO aidPERSONIDPAPERS "
"(personid, bibref_table, bibref_value, bibrec, name) "
"VALUES (%s, %s, %s, %s, %s)"
, (pid, str(sig[0]), sig[1], sig[2], name))
def move_signature(sig, pid, force_claimed=False, unclaim=False):
'''
Moves a signature to a different person id
@param sig: signature tuple
@type sig: tuple
@param pid: personid
@type pid: int
'''
upd = "update aidPERSONIDPAPERS set personid=%s" % pid
if unclaim:
upd += ',flag=0 '
sel = " where bibref_table like '%s' and bibref_value=%s and bibrec=%s " % sig
sql = upd + sel
if not force_claimed:
sql += ' and flag <> 2 and flag <> -2'
run_sql(sql)
def find_conflicts(sig, pid):
'''
Helper for merger algorithm, find signature given personid
@param sig: signature tuple
@type sig: tuple
@param pid: personid id
@type pid: integer
'''
return run_sql("select bibref_table, bibref_value, bibrec, flag "
"from aidPERSONIDPAPERS where "
"personid = %s and "
"bibrec = %s and "
"flag <> -2"
, (pid, sig[2]))
def update_request_ticket(person_id, tag_data_tuple, ticket_id=None):
'''
Creates / updates a request ticket for a personID
@param: personid int
@param: tag_data_tuples 'image' of the ticket: (('paper', '700:316,10'), ('owner', 'admin'), ('external_id', 'ticket_18'))
@return: ticketid
'''
#tags: rt_owner (the owner of the ticket, associating the rt_number to the transaction)
# rt_external_id
# rt_paper_cornfirm, rt_paper_reject, rt_paper_forget, rt_name, rt_email, rt_whatever
#flag: rt_number
if not ticket_id:
last_id = run_sql("select max(opt1) from aidPERSONIDDATA where personid=%s and tag like %s", (str(person_id), 'rt_%'))[0][0]
if last_id:
ticket_id = last_id + 1
else:
ticket_id = 1
else:
delete_request_ticket(person_id, ticket_id)
for d in tag_data_tuple:
run_sql("insert into aidPERSONIDDATA (personid, tag, data, opt1) "
"values (%s,%s,%s,%s)",
(str(person_id), 'rt_' + str(d[0]), str(d[1]), str(ticket_id)))
return ticket_id
def delete_request_ticket(person_id, ticket_id=None):
'''
Removes a ticket from a person_id.
If ticket_id is not provider removes all the tickets pending on a person.
'''
if ticket_id:
run_sql("delete from aidPERSONIDDATA where personid=%s and tag like %s and opt1 =%s", (str(person_id), 'rt_%', str(ticket_id)))
else:
run_sql("delete from aidPERSONIDDATA where personid=%s and tag like %s", (str(person_id), 'rt_%'))
def get_all_personids_by_name(regexpr):
'''
Search personids matching SQL expression in the name field
@param regexpr: string SQL regexp
@type regexpr: string
'''
return run_sql("select personid, name "
"from aidPERSONIDPAPERS "
"where name like %s "
"and flag > -2",
(regexpr,))
def get_personids_by_canonical_name(target):
'''
Find personids by canonical name
@param target:
@type target:
'''
pid = run_sql("select personid from aidPERSONIDDATA where "
"tag='canonical_name' and data like %s", (target,))
if pid:
return run_sql("select personid, name from aidPERSONIDPAPERS "
"where personid=%s and flag > -2", (pid[0][0],))
else:
return []
def get_bibref_modification_status(bibref):
'''
Determines if a record attached to a person has been touched by a human
by checking the flag.
@param pid: The Person ID of the person to check the assignment from
@type pid: int
@param bibref: The paper identifier to be checked (e.g. "100:12,144")
@type bibref: string
returns [bool:human_modified, int:lcul]
'''
if not bibref:
raise ValueError("A bibref is expected!")
head, rec = bibref.split(',')
table, ref = head.split(':')
flags = run_sql("SELECT flag, lcul FROM aidPERSONIDPAPERS WHERE "
"bibref_table = %s and bibref_value = %s and bibrec = %s"
, (table, ref, rec))
if flags:
return flags[0]
else:
return (False, 0)
def get_canonical_id_from_personid(pid):
'''
Finds the person id canonical name (e.g. Ellis_J_R_1)
@param pid
@type int
@return: sql result of the request
@rtype: tuple of tuple
'''
return run_sql("SELECT data FROM aidPERSONIDDATA WHERE "
"tag = %s AND personid = %s", ('canonical_name', str(pid)))
def get_papers_status(paper):
'''
Gets the personID and flag assiciated to papers
@param papers: list of papers
@type papers: '100:7531,9024'
@return: (('data','personID','flag',),)
@rtype: tuple of tuples
'''
head, bibrec = paper.split(',')
_table, bibref = head.split(':')
rets = run_sql("select PersonID, flag "
"from aidPERSONIDPAPERS "
"where bibref_table = %s "
"and bibref_value = %s "
"and bibrec = %s"
% (head, bibrec, bibref))
return [[paper] + list(x) for x in rets]
def get_persons_from_recids(recids, return_alt_names=False,
return_all_person_papers=False):
'''
Helper for search engine indexing. Gives back a dictionary with important info about a person, for example:
get_persons_from_recids([1], True, True) returns
({1: [16591L]},
{16591L: {'alternatative_names': ['Wong, Yung Chow'],
'canonical_id': 'Y.C.Wong.1',
'person_records': [275304, 1, 51394, 128250, 311629]}})
@param recids:
@type recids:
@param return_alt_names:
@type return_alt_names:
@param return_all_person_papers:
@type return_all_person_papers:
'''
rec_2_pid = dict()
pid_2_data = dict()
all_pids = set()
def get_canonical_name(pid):
return run_sql("SELECT data "
"FROM aidPERSONIDDATA "
"WHERE tag = %s "
"AND personid = %s",
('canonical_name', pid))
for rec in recids:
pids = run_sql("SELECT personid "
"FROM aidPERSONIDPAPERS "
"WHERE bibrec = %s "
" and flag > -2 ",
(rec,))
# for some reason python's set is faster than a mysql distinct
pids = set(p[0] for p in pids)
all_pids |= pids
rec_2_pid[rec] = list(pids)
for pid in all_pids:
pid_data = {}
canonical = get_canonical_name(pid)
#We can supposed that this person didn't have a chance to get a canonical name yet
#because it was not fully processed by it's creator. Anyway it's safe to try to create one
#before failing miserably
if not canonical:
update_personID_canonical_names([pid])
canonical = get_canonical_name(pid)
#assert len(canonical) == 1
#This condition cannot hold in case claims or update daemons are run in parallel
#with this, as it can happen that a person with papers exists for wich a canonical name
#has not been computed yet. Hence, it will be indexed next time, so it learns.
#Each person should have at most one canonical name, so:
assert len(canonical) <= 1, "A person cannot have more than one canonical name"
if len(canonical) == 1:
pid_data = {'canonical_id' : canonical[0][0]}
if return_alt_names:
names = run_sql("SELECT name "
"FROM aidPERSONIDPAPERS "
"WHERE personid = %s "
" and flag > -2 ",
(pid,))
names = set(n[0] for n in names)
pid_data['alternatative_names'] = list(names)
if return_all_person_papers:
recs = run_sql("SELECT bibrec "
"FROM aidPERSONIDPAPERS "
"WHERE personid = %s "
" and flag > -2 ",
(pid,))
recs = set(r[0] for r in recs)
pid_data['person_records'] = list(recs)
pid_2_data[pid] = pid_data
return (rec_2_pid, pid_2_data)
def get_person_db_names_count(pid, sort_by_count=True):
'''
Returns the set of name strings and count associated to a person id.
The name strings are as found in the database.
@param pid: ID of the person
@type pid: ('2',)
'''
id_2_count = run_sql("select bibref_table, bibref_value "
"from aidPERSONIDPAPERS "
"where personid = %s "
"and flag > -2", (pid,))
ref100 = [refid[1] for refid in id_2_count if refid[0] == '100']
ref700 = [refid[1] for refid in id_2_count if refid[0] == '700']
ref100_count = dict((key, len(list(data))) for key, data in groupby(sorted(ref100)))
ref700_count = dict((key, len(list(data))) for key, data in groupby(sorted(ref700)))
if ref100:
ref100_s = list_2_SQL_str(ref100, str)
id100_2_str = run_sql("select id, value "
"from bib10x "
"where id in %s"
% ref100_s)
else:
id100_2_str = tuple()
if ref700:
ref700_s = list_2_SQL_str(ref700, str)
id700_2_str = run_sql("select id, value "
"from bib70x "
"where id in %s"
% ref700_s)
else:
id700_2_str = tuple()
ret100 = [(name, ref100_count[refid]) for refid, name in id100_2_str]
ret700 = [(name, ref700_count[refid]) for refid, name in id700_2_str]
ret = ret100 + ret700
if sort_by_count:
ret = sorted(ret, key=itemgetter(1), reverse=True)
return ret
def get_person_id_from_canonical_id(canonical_id):
'''
Finds the person id from a canonical name (e.g. Ellis_J_R_1)
@param canonical_id: the canonical ID
@type canonical_id: string
@return: sql result of the request
@rtype: tuple of tuple
'''
return run_sql("SELECT personid FROM aidPERSONIDDATA WHERE "
"tag='canonical_name' AND data = %s", (canonical_id,))
def get_person_names_count(pid):
'''
Returns the set of name strings and count associated to a person id
@param pid: ID of the person
@type pid: ('2',)
@param value: value to be written for the tag
@type value: string
'''
return run_sql("select name, count(name) from aidPERSONIDPAPERS where "
"personid=%s and flag > -2 group by name", (pid,))
def get_person_db_names_set(pid):
'''
Returns the set of db_name strings associated to a person id
@param pid: ID of the person
@type pid: 2
'''
names = get_person_db_names_count(pid)
if names:
return zip(set(zip(*names)[0]))
else:
return []
def get_personids_from_bibrec(bibrec):
'''
Returns all the personids associated to a bibrec.
@param bibrec: record id
@type bibrec: int
'''
pids = run_sql("select distinct personid from aidPERSONIDPAPERS where bibrec=%s and flag > -2", (bibrec,))
if pids:
return zip(*pids)[0]
else:
return []
def get_personids_and_papers_from_bibrecs(bibrecs, limit_by_name=None):
'''
Gives back a list of tuples (personid, set_of_papers_owned_by) limited to the given list of bibrecs.
@param bibrecs:
@type bibrecs:
@param limit_by_name:
@type limit_by_name:
'''
if not bibrecs:
return []
else:
bibrecs = list_2_SQL_str(bibrecs)
if limit_by_name:
try:
surname = split_name_parts(limit_by_name)[0]
except IndexError:
surname = None
else:
surname = None
if not surname:
data = run_sql("select personid,bibrec from aidPERSONIDPAPERS where bibrec in %s" % (bibrecs,))
else:
surname = split_name_parts(limit_by_name)[0]
data = run_sql(("select personid,bibrec from aidPERSONIDPAPERS where bibrec in %s "
"and name like " % bibrecs) + ' %s ', (surname + '%',))
pidlist = [(k, set([s[1] for s in d]))
for k, d in groupby(sorted(data, key=lambda x:x[0]), key=lambda x:x[0])]
pidlist = sorted(pidlist, key=lambda x:len(x[1]), reverse=True)
return pidlist
def get_person_bibrecs(pid):
'''
Returns bibrecs associated with a personid
@param pid: integer personid
@return [bibrec1,...,bibrecN]
'''
papers = run_sql("select bibrec from aidPERSONIDPAPERS where personid=%s and flag > -2", (str(pid),))
if papers:
return list(set(zip(*papers)[0]))
else:
return []
def get_person_papers(pid, flag,
show_author_name=False,
show_title=False,
show_rt_status=False,
show_affiliations=False,
show_date=False,
show_experiment=False):
'''
Get all papers of person with flag greater than flag. Gives back a dictionary like:
get_person_papers(16591,-2,True,True,True,True,True,True) returns
[{'affiliation': ['Hong Kong U.'],
'authorname': 'Wong, Yung Chow',
'data': '100:1,1',
'date': ('1961',),
'experiment': [],
'flag': 0,
'rt_status': False,
'title': ('Isoclinic N planes in Euclidean 2N space, Clifford parallels in elliptic (2N-1) space, and the Hurwitz matrix equations',)},
...]
@param pid:
@type pid:
@param flag:
@type flag:
@param show_author_name:
@type show_author_name:
@param show_title:
@type show_title:
@param show_rt_status:
@type show_rt_status:
@param show_affiliations:
@type show_affiliations:
@param show_date:
@type show_date:
@param show_experiment:
@type show_experiment:
'''
query = "bibref_table, bibref_value, bibrec, flag"
if show_author_name:
query += ", name"
all_papers = run_sql("SELECT " + query + " "
"FROM aidPERSONIDPAPERS "
"WHERE personid = %s "
"AND flag >= %s",
(pid, flag))
def format_paper(paper):
bibrefrec = "%s:%d,%d" % paper[:3]
ret = {'data' : bibrefrec,
'flag' : paper[3]
}
if show_author_name:
ret['authorname'] = paper[4]
if show_title:
ret['title'] = ""
title = get_title_from_rec(paper[2])
if title:
ret['title'] = (title,)
if show_rt_status:
rt_count = run_sql("SELECT count(personid) "
"FROM aidPERSONIDDATA WHERE "
"tag like 'rt_%%' and data = %s"
, (bibrefrec,))
ret['rt_status'] = (rt_count[0][0] > 0)
if show_affiliations:
tag = '%s__u' % paper[0]
ret['affiliation'] = get_grouped_records(paper[:3], tag)[tag]
if show_date:
ret['date'] = []
date_id = run_sql("SELECT id_bibxxx "
"FROM bibrec_bib26x "
"WHERE id_bibrec = %s "
, (paper[2],))
if date_id:
date_id_s = list_2_SQL_str(date_id, lambda x: x[0])
date = run_sql("SELECT value "
"FROM bib26x "
"WHERE id in %s "
"AND tag = %s"
% (date_id_s, "'269__c'"))
if date:
ret['date'] = zip(*date)[0]
if show_experiment:
ret['experiment'] = []
experiment_id = run_sql("SELECT id_bibxxx "
"FROM bibrec_bib69x "
"WHERE id_bibrec = %s "
, (paper[2],))
if experiment_id:
experiment_id_s = list_2_SQL_str(experiment_id, lambda x: x[0])
experiment = run_sql("SELECT value "
"FROM bib69x "
"WHERE id in %s "
"AND tag = %s"
% (experiment_id_s, "'693__e'"))
if experiment:
ret['experiment'] = zip(*experiment)[0]
return ret
return [format_paper(paper) for paper in all_papers]
def get_persons_with_open_tickets_list():
'''
Finds all the persons with open tickets and returns pids and count of tickets
@return: [[pid, ticket_count]]
'''
return run_sql("select personid, count(distinct opt1) from "
"aidPERSONIDDATA where tag like 'rt_%' group by personid")
def get_request_ticket(person_id, ticket_id=None):
'''
Retrieves one or many requests tickets from a person
@param: person_id: person id integer
@param: matching: couple of values to match ('tag', 'value')
@param: ticket_id: ticket id (flag) value
@returns: [[[('tag', 'value')], ticket_id]]
[[[('a', 'va'), ('b', 'vb')], 1L], [[('b', 'daOEIaoe'), ('a', 'caaoOUIe')], 2L]]
'''
if ticket_id:
tstr = " and opt1='%s' " % ticket_id
else:
tstr = " "
tickets = run_sql("select tag,data,opt1 from aidPERSONIDDATA where personid=%s and "
" tag like 'rt_%%' " + tstr , (person_id,))
return [[[(s[0][3:], s[1]) for s in d], k] for k, d in groupby(sorted(tickets, key=lambda k: k[2]), key=lambda k: k[2])]
def insert_user_log(userinfo, personid, action, tag, value, comment='', transactionid=0, timestamp=None, userid=''):
'''
Instert log entries in the user log table.
For example of entres look at the table generation script.
@param userinfo: username or user identifier
@type: string
@param personid: personid involved in the transaction
@type: longint
@param action: action type
@type: string
@param tag: tag
@type: string
@param value: value for the transaction
@type: string
@param comment: optional comment for the transaction
@type: string
@param transactionid: optional id for the transaction
@type: longint
@return: the transactionid
@rtype: longint
'''
if not timestamp:
timestamp = run_sql('select now()')[0][0]
run_sql('insert into aidUSERINPUTLOG '
'(transactionid,timestamp,userinfo,userid,personid,action,tag,value,comment) values '
'(%s,%s,%s,%s,%s,%s,%s,%s,%s)',
(transactionid, timestamp, userinfo, userid, personid,
action, tag, value, comment))
return transactionid
def person_bibref_is_touched_old(pid, bibref):
'''
Determines if a record attached to a person has been touched by a human
by checking the flag.
@param pid: The Person ID of the person to check the assignment from
@type pid: int
@param bibref: The paper identifier to be checked (e.g. "100:12,144")
@type bibref: string
'''
bibref, rec = bibref.split(",")
table, ref = bibref.split(":")
flag = run_sql("SELECT flag "
"FROM aidPERSONIDPAPERS "
"WHERE personid = %s "
"AND bibref_table = %s "
"AND bibref_value = %s "
"AND bibrec = %s"
, (pid, table, ref, rec))
try:
flag = flag[0][0]
except (IndexError):
return False
if not flag:
return False
elif -2 < flag < 2:
return False
else:
return True
def confirm_papers_to_person(pid, papers, user_level=0):
'''
Confirms the relationship between pid and paper, as from user input.
@param pid: id of the person
@type pid: integer
@param papers: list of papers to confirm
@type papers: ((str,),) e.g. (('100:7531,9024',),)
@return: list of tuples: (status, message_key)
@rtype: [(bool, str), ]
'''
pids_to_update = set([pid])
res = []
for p in papers:
bibref, rec = p.split(",")
rec = int(rec)
table, ref = bibref.split(":")
ref = int(ref)
sig = (table, ref, rec)
#Check the status of pid: the paper should be present, either assigned or rejected
gen_papers = run_sql("select bibref_table, bibref_value, bibrec, personid, flag, name "
"from aidPERSONIDPAPERS "
"where bibrec=%s "
"and flag >= -2"
, (rec,))
paps = [el[0:3] for el in gen_papers if el[3] == pid and el[4] > -2]
#run_sql("select bibref_table, bibref_value, bibrec "
# "from aidPERSONIDPAPERS "
# "where personid=%s "
# "and bibrec=%s "
# "and flag > -2"
# , (pid, rec))
other_paps = [el[0:3] for el in gen_papers if el[3] != pid and el[4] > -2]
#other_paps = run_sql("select bibref_table, bibref_value, bibrec "
# "from aidPERSONIDPAPERS "
# "where personid <> %s "
# "and bibrec=%s "
# "and flag > -2"
# , (pid, rec))
rej_paps = [el[0:3] for el in gen_papers if el[3] == pid and el[4] == -2]
#rej_paps = run_sql("select bibref_table, bibref_value, bibrec "
# "from aidPERSONIDPAPERS "
# "where personid=%s "
# "and bibrec=%s "
# "and flag = -2"
# , (pid, rec))
bibref_exists = [el[0:3] for el in gen_papers if el[0] == table and el[1] == ref and el[4] > -2]
#bibref_exists = run_sql("select * "
# "from aidPERSONIDPAPERS "
# "and bibref_table=%s "
# "and bibref_value=%s "
# "and bibrec=%s "
# "and flag > -2"
# , (table, ref, rec))
# All papers that are being claimed should be present in aidPERSONIDPAPERS, thus:
# assert paps or rej_paps or other_paps, 'There should be at least something regarding this bibrec!'
# should always be valid.
# BUT, it usually happens that claims get done out of the browser/session cache which is hours/days old,
# hence it happens that papers are claimed which no longer exists in the system.
# For the sake of mental sanity, instead of crashing from now on we just ignore such cases.
if not (paps or other_paps or rej_paps) or not bibref_exists:
res.append((False, 'confirm_failure'))
continue
res.append((True, 'confirm_success'))
# It should not happen that a paper is assigned more then once to the same person.
# But sometimes it happens in rare unfortunate cases of bad concurrency circumstances,
# so we try to fix it directly instead of crashing here.
# Once a better solution for dealing with concurrency will be found, the following asserts
# shall be reenabled, to allow better control on what happens.
# assert len(paps) < 2, "This paper should not be assigned to this person more then once! %s" % paps
# assert len(other_paps) < 2, "There should not be more then one copy of this paper! %s" % other_paps
# if the bibrec is present with a different bibref, the present one must be moved somwhere
# else before we can claim the incoming one
if paps:
for pap in paps:
#kick out all unwanted signatures
if sig != pap:
new_pid = get_new_personid()
pids_to_update.add(new_pid)
move_signature(pap, new_pid)
# Make sure that the incoming claim is unique and get rid of all rejections, they are useless
# from now on
run_sql("delete from aidPERSONIDPAPERS where bibref_table like %s and "
" bibref_value = %s and bibrec=%s"
, sig)
add_signature(sig, None, pid)
run_sql("update aidPERSONIDPAPERS "
"set personid = %s "
", flag = %s "
", lcul = %s "
"where bibref_table = %s "
"and bibref_value = %s "
"and bibrec = %s"
, (pid, '2', user_level,
table, ref, rec))
update_personID_canonical_names(pids_to_update)
return res
def reject_papers_from_person(pid, papers, user_level=0):
'''
Confirms the negative relationship between pid and paper, as from user input.
@param pid: id of the person
@type pid: integer
@param papers: list of papers to confirm
@type papers: ((str,),) e.g. (('100:7531,9024',),)
@return: list of tuples: (status, message_key)
@rtype: [(bool, str), ]
'''
new_pid = get_new_personid()
pids_to_update = set([pid])
res = []
for p in papers:
brr, rec = p.split(",")
table, ref = brr.split(':')
sig = (table, ref, rec)
# To be rejected, a record should be present!
records = personid_name_from_signature(sig)
# For the sake of mental sanity (see commentis in confirm_papers_to_personid, just ignore in case this paper is no longer existent
# assert(records)
if not records:
res.append((False, 'reject_failure'))
continue
res.append((True, 'reject_success'))
fpid, name = records[0]
# If the record is assigned to a different person already, the rejection is meaningless
# Otherwise, we assign the paper to someone else (not important who it will eventually
# get moved by tortoise) and add the rejection to the current person
if fpid == pid:
move_signature(sig, new_pid, force_claimed=True, unclaim=True)
pids_to_update.add(new_pid)
run_sql("INSERT INTO aidPERSONIDPAPERS "
"(personid, bibref_table, bibref_value, bibrec, name, flag, lcul) "
"VALUES (%s, %s, %s, %s, %s, %s, %s)"
, (pid, table, ref, rec, name, -2, user_level))
update_personID_canonical_names(pids_to_update)
return res
def reset_papers_flag(pid, papers):
'''
Resets the flag associated to the papers to '0'
@param pid: id of the person
@type pid: integer
@param papers: list of papers to confirm
@type papers: ((str,),) e.g. (('100:7531,9024',),)
@return: list of tuples: (status, message_key)
@rtype: [(bool, str), ]
'''
res = []
for p in papers:
bibref, rec = p.split(",")
table, ref = bibref.split(":")
ref = int(ref)
sig = (table, ref, rec)
gen_papers = run_sql("select bibref_table, bibref_value, bibrec, flag "
"from aidPERSONIDPAPERS "
"where bibrec=%s "
"and personid=%s"
, (rec, pid))
paps = [el[0:3] for el in gen_papers]
#run_sql("select bibref_table, bibref_value, bibrec "
# "from aidPERSONIDPAPERS "
# "where personid=%s "
# "and bibrec=%s "
# , (pid, rec))
rej_paps = [el[0:3] for el in gen_papers if el[3] == -2]
#rej_paps = run_sql("select bibref_table, bibref_value, bibrec "
# "from aidPERSONIDPAPERS "
# "where personid=%s "
# "and bibrec=%s "
# "and flag = -2"
# , (pid, rec))
pid_bibref_exists = [el[0:3] for el in gen_papers if el[0] == table and el[1] == ref and el[3] > -2]
#bibref_exists = run_sql("select * "
# "from aidPERSONIDPAPERS "
# "and bibref_table=%s "
# "and bibref_value=%s "
# "and personid=%s "
# "and bibrec=%s "
# "and flag > -2"
# , (table, ref, pid, rec))
# again, see confirm_papers_to_person for the sake of mental sanity
# assert paps or rej_paps
if rej_paps or not pid_bibref_exists:
res.append((False, 'reset_failure'))
continue
res.append((True, 'reset_success'))
assert len(paps) < 2
run_sql("delete from aidPERSONIDPAPERS where bibref_table like %s and "
"bibref_value = %s and bibrec = %s",
(sig))
add_signature(sig, None, pid)
return res
def user_can_modify_data(uid, pid):
'''
Return True if the uid can modify data of this personID, false otherwise.
@param uid: the user id
@type: int
@param pid: the person id
@type: int
@return: can user mofidfy data?
@rtype: boolean
'''
pid_uid = run_sql("select data from aidPERSONIDDATA where tag = %s"
" and personid = %s", ('uid', str(pid)))
if len(pid_uid) >= 1 and str(uid) == str(pid_uid[0][0]):
rights = bconfig.CLAIMPAPER_CHANGE_OWN_DATA
else:
rights = bconfig.CLAIMPAPER_CHANGE_OTHERS_DATA
return acc_authorize_action(uid, rights)[0] == 0
def get_possible_bibrecref(names, bibrec, always_match=False):
'''
Returns a list of bibrefs for which the surname is matching
@param names: list of names strings
@param bibrec: bibrec number
@param always_match: match with all the names (full bibrefs list)
'''
splitted_names = [split_name_parts(n) for n in names]
bibrec_names_100 = run_sql("select o.id, o.value from bib10x o, "
"(select i.id_bibxxx as iid from bibrec_bib10x i "
"where id_bibrec=%s) as dummy "
"where o.tag='100__a' AND o.id = dummy.iid",
(str(bibrec),))
bibrec_names_700 = run_sql("select o.id, o.value from bib70x o, "
"(select i.id_bibxxx as iid from bibrec_bib70x i "
"where id_bibrec=%s) as dummy "
"where o.tag='700__a' AND o.id = dummy.iid",
(str(bibrec),))
# bibrec_names_100 = run_sql("select id,value from bib10x where tag='100__a' and id in "
# "(select id_bibxxx from bibrec_bib10x where id_bibrec=%s)",
# (str(bibrec),))
# bibrec_names_700 = run_sql("select id,value from bib70x where tag='700__a' and id in "
# "(select id_bibxxx from bibrec_bib70x where id_bibrec=%s)",
# (str(bibrec),))
bibreflist = []
for b in bibrec_names_100:
spb = split_name_parts(b[1])
for n in splitted_names:
if (n[0].lower() == spb[0].lower()) or always_match:
if ['100:' + str(b[0]), b[1]] not in bibreflist:
bibreflist.append(['100:' + str(b[0]), b[1]])
for b in bibrec_names_700:
spb = split_name_parts(b[1])
for n in splitted_names:
if (n[0].lower() == spb[0].lower()) or always_match:
if ['700:' + str(b[0]), b[1]] not in bibreflist:
bibreflist.append(['700:' + str(b[0]), b[1]])
return bibreflist
def user_can_modify_paper(uid, paper):
'''
Return True if the uid can modify this paper, false otherwise.
If the paper is assigned more then one time (from algorithms) consider the most privileged
assignment.
@param uid: the user id
@type: int
@param paper: the paper bibref,bibrec pair x00:1234,4321
@type: str
@return: can user mofidfy paper attribution?
@rtype: boolean
'''
bibref, rec = paper.split(",")
table, ref = bibref.split(":")
prow = run_sql("select personid, lcul from aidPERSONIDPAPERS "
"where bibref_table = %s and bibref_value = %s and bibrec = %s "
"order by lcul desc limit 0,1",
(table, ref, rec))
if len(prow) == 0:
return ((acc_authorize_action(uid, bconfig.CLAIMPAPER_CLAIM_OWN_PAPERS)[0] == 0) or
(acc_authorize_action(uid, bconfig.CLAIMPAPER_CLAIM_OTHERS_PAPERS)[0] == 0))
min_req_acc_n = int(prow[0][1])
req_acc = resolve_paper_access_right(bconfig.CLAIMPAPER_CLAIM_OWN_PAPERS)
pid_uid = run_sql("select data from aidPERSONIDDATA where tag = %s and personid = %s", ('uid', str(prow[0][0])))
if len(pid_uid) > 0:
if (str(pid_uid[0][0]) != str(uid)) and min_req_acc_n > 0:
req_acc = resolve_paper_access_right(bconfig.CLAIMPAPER_CLAIM_OTHERS_PAPERS)
if min_req_acc_n < req_acc:
min_req_acc_n = req_acc
min_req_acc = resolve_paper_access_right(min_req_acc_n)
return (acc_authorize_action(uid, min_req_acc)[0] == 0) and (resolve_paper_access_right(min_req_acc) >= min_req_acc_n)
def resolve_paper_access_right(acc):
'''
Given a string or an integer, resolves to the corresponding integer or string
If asked for a wrong/not present parameter falls back to the minimum privilege.
'''
access_dict = {bconfig.CLAIMPAPER_VIEW_PID_UNIVERSE: 0,
bconfig.CLAIMPAPER_CLAIM_OWN_PAPERS: 25,
bconfig.CLAIMPAPER_CLAIM_OTHERS_PAPERS: 50}
if isinstance(acc, str):
try:
return access_dict[acc]
except:
return 0
inverse_dict = dict([[v, k] for k, v in access_dict.items()])
lower_accs = [a for a in inverse_dict.keys() if a <= acc]
try:
return inverse_dict[max(lower_accs)]
except:
return bconfig.CLAIMPAPER_VIEW_PID_UNIVERSE
def get_recently_modified_record_ids(date):
'''
Returns the bibrecs with modification date more recent then date.
@param date: date
'''
touched_papers = frozenset(p[0] for p in run_sql(
"select id from bibrec "
"where modification_date > %s"
, (date,)))
return touched_papers & frozenset(get_all_valid_bibrecs())
def filter_modified_record_ids(bibrecs, date):
'''
Returns the bibrecs with modification date before the date.
@param date: date
'''
return ifilter(
lambda x: run_sql("select count(*) from bibrec "
"where id = %s and "
"modification_date < %s"
, (x[2], date))[0][0]
, bibrecs)
def get_user_log(transactionid='', userinfo='', userid='', personID='', action='', tag='', value='', comment='', only_most_recent=False):
'''
Get user log table entry matching all the given parameters; all of them are optional.
IF no parameters are given retuns the complete log table
@param transactionid: id of the transaction
@param userinfo: user name or identifier
@param personid: id of the person involved
@param action: action
@param tag: tag
@param value: value
@param comment: comment
'''
sql_query = ('select id,transactionid,timestamp,userinfo,personid,action,tag,value,comment ' +
'from aidUSERINPUTLOG where 1 ')
if transactionid:
sql_query += ' and transactionid=\'' + str(transactionid) + '\''
if userinfo:
sql_query += ' and userinfo=\'' + str(userinfo) + '\''
if userid:
sql_query += ' and userid=\'' + str(userid) + '\''
if personID:
sql_query += ' and personid=\'' + str(personID) + '\''
if action:
sql_query += ' and action=\'' + str(action) + '\''
if tag:
sql_query += ' and tag=\'' + str(tag) + '\''
if value:
sql_query += ' and value=\'' + str(value) + '\''
if comment:
sql_query += ' and comment=\'' + str(comment) + '\''
if only_most_recent:
sql_query += ' order by timestamp desc limit 0,1'
return run_sql(sql_query)
def list_2_SQL_str(items, f=lambda x: x):
"""
Concatenates all items in items to a sql string using f.
@param items: a set of items
@param type items: X
@param f: a function which transforms each item from items to string
@param type f: X:->str
@return: "(x1, x2, x3, ... xn)" for xi in items
@return type: string
"""
strs = (str(f(x)) for x in items)
return "(%s)" % ", ".join(strs)
def _get_authors_from_paper_from_db(paper):
'''
selects all author bibrefs by a given papers
'''
fullbibrefs100 = run_sql("select id_bibxxx from bibrec_bib10x where id_bibrec=%s", (paper,))
if len(fullbibrefs100) > 0:
fullbibrefs100str = list_2_SQL_str(fullbibrefs100, lambda x: str(x[0]))
return run_sql("select id from bib10x where tag='100__a' and id in %s" % (fullbibrefs100str,))
return tuple()
def _get_authors_from_paper_from_cache(paper):
'''
selects all author bibrefs by a given papers
'''
try:
ids = MARC_100_700_CACHE['brb100'][paper]['id'].keys()
refs = [i for i in ids if '100__a' in MARC_100_700_CACHE['b100'][i][0]]
except KeyError:
return tuple()
return zip(refs)
def get_authors_from_paper(paper):
if MARC_100_700_CACHE:
if bconfig.DEBUG_CHECKS:
assert _get_authors_from_paper_from_cache(paper) == _get_authors_from_paper_from_cache(paper)
return _get_authors_from_paper_from_cache(paper)
else:
return _get_authors_from_paper_from_db(paper)
def _get_coauthors_from_paper_from_db(paper):
'''
selects all coauthor bibrefs by a given papers
'''
fullbibrefs700 = run_sql("select id_bibxxx from bibrec_bib70x where id_bibrec=%s", (paper,))
if len(fullbibrefs700) > 0:
fullbibrefs700str = list_2_SQL_str(fullbibrefs700, lambda x: str(x[0]))
return run_sql("select id from bib70x where tag='700__a' and id in %s" % (fullbibrefs700str,))
return tuple()
def _get_coauthors_from_paper_from_cache(paper):
'''
selects all author bibrefs by a given papers
'''
try:
ids = MARC_100_700_CACHE['brb700'][paper]['id'].keys()
refs = [i for i in ids if '700__a' in MARC_100_700_CACHE['b700'][i][0]]
except KeyError:
return tuple()
return zip(refs)
def get_coauthors_from_paper(paper):
if MARC_100_700_CACHE:
if bconfig.DEBUG_CHECKS:
assert _get_coauthors_from_paper_from_cache(paper) == _get_coauthors_from_paper_from_db(paper)
return _get_coauthors_from_paper_from_cache(paper)
else:
return _get_coauthors_from_paper_from_db(paper)
def get_bibrefrec_subset(table, papers, refs):
table = "bibrec_bib%sx" % str(table)[:-1]
contents = run_sql("select id_bibrec, id_bibxxx from %s" % table)
papers = set(papers)
refs = set(refs)
# yes, there are duplicates and we must set them
return set(ifilter(lambda x: x[0] in papers and x[1] in refs, contents))
def get_deleted_papers():
return run_sql("select o.id_bibrec from bibrec_bib98x o, "
"(select i.id as iid from bib98x i "
"where value = 'DELETED' "
"and tag like '980__a') as dummy "
"where o.id_bibxxx = dummy.iid")
def add_personID_external_id(personid, external_id_str, value):
run_sql("insert into aidPERSONIDDATA (personid,tag,data) values (%s,%s,%s)",
(personid, 'extid:%s' % external_id_str, value))
def remove_personID_external_id(personid, external_id_str, value=False):
if not value:
run_sql("delete from aidPERSONIDDATA where personid=%s and tag=%s",
(personid, 'extid:%s' % external_id_str))
else:
run_sql("delete from aidPERSONIDDATA where personid=%s and tag=%s and data=%s",
(personid, 'extid:%s' % external_id_str, value))
def get_personiID_external_ids(personid):
ids = run_sql("select tag,data from aidPERSONIDDATA where personid=%s and tag like 'extid:%%'",
(personid,))
extids = {}
for i in ids:
id_str = i[0].split(':')[1]
idd = i[1]
try:
extids[id_str].append(idd)
except KeyError:
extids[id_str] = [idd]
return extids
#bibauthorid_maintenance personid update private methods
def update_personID_canonical_names(persons_list=None, overwrite=False, suggested='', overwrite_not_claimed_only=False):
'''
Updates the personID table creating or updating canonical names for persons
@param: persons_list: persons to consider for the update (('1'),)
@param: overwrite: if to touch already existing canonical names
@param: suggested: string to suggest a canonical name for the person
'''
if not persons_list and overwrite:
persons_list = set([x[0] for x in run_sql('select personid from aidPERSONIDPAPERS')])
elif not persons_list:
persons_list = set([x[0] for x in run_sql('select personid from aidPERSONIDPAPERS')])
existing_cnamed_pids = set(
[x[0] for x in run_sql('select personid from aidPERSONIDDATA where tag=%s',
('canonical_name',))])
persons_list = persons_list - existing_cnamed_pids
for idx, pid in enumerate(persons_list):
update_status(float(idx) / float(len(persons_list)), "Updating canonical_names...")
if overwrite_not_claimed_only:
has_claims = run_sql("select personid from aidPERSONIDPAPERS where personid = %s and flag = 2", (pid,))
if has_claims:
continue
current_canonical = run_sql("select data from aidPERSONIDDATA where "
"personid=%s and tag=%s", (pid, 'canonical_name'))
if overwrite or len(current_canonical) == 0:
run_sql("delete from aidPERSONIDDATA where personid=%s and tag=%s",
(pid, 'canonical_name'))
names = get_person_names_count(pid)
names = sorted(names, key=lambda k: k[1], reverse=True)
if len(names) < 1 and not suggested:
continue
else:
if suggested:
canonical_name = suggested
else:
canonical_name = create_canonical_name(names[0][0])
existing_cnames = run_sql("select data from aidPERSONIDDATA "
"where tag=%s and data like %s",
('canonical_name', str(canonical_name) + '%'))
existing_cnames = set(name[0].lower() for name in existing_cnames)
for i in count(1):
cur_try = canonical_name + '.' + str(i)
if cur_try.lower() not in existing_cnames:
canonical_name = cur_try
break
run_sql("insert into aidPERSONIDDATA (personid, tag, data) values (%s,%s,%s) ",
(pid, 'canonical_name', canonical_name))
update_status_final("Updating canonical_names finished.")
def personid_get_recids_affected_since(last_timestamp):
'''
Returns a list of recids which have been manually changed since timestamp)
@param: last_timestamp: last update, datetime.datetime
'''
vset = set(int(v[0].split(',')[1]) for v in run_sql(
"select distinct value from aidUSERINPUTLOG "
"where timestamp > %s", (last_timestamp,))
if ',' in v[0] and ':' in v[0])
pids = set(int(p[0]) for p in run_sql(
"select distinct personid from aidUSERINPUTLOG "
"where timestamp > %s", (last_timestamp,))
if p[0] > 0)
if pids:
pids_s = list_2_SQL_str(pids)
vset |= set(int(b[0]) for b in run_sql(
"select bibrec from aidPERSONIDPAPERS "
"where personid in %s" % pids_s))
return list(vset) # I'm not sure about this cast. It might work without it.
def get_all_paper_records(pid, claimed_only=False):
if not claimed_only:
return run_sql("SELECT distinct bibrec FROM aidPERSONIDPAPERS WHERE personid = %s", (str(pid),))
else:
return run_sql("SELECT distinct bibrec FROM aidPERSONIDPAPERS WHERE "
"personid = %s and flag=2 or flag=-2", (str(pid),))
def get_all_modified_names_from_personid(since=None):
if since:
all_pids = run_sql("SELECT DISTINCT personid "
"FROM aidPERSONIDPAPERS "
"WHERE flag > -2 "
"AND last_updated > %s"
% since)
else:
all_pids = run_sql("SELECT DISTINCT personid "
"FROM aidPERSONIDPAPERS "
"WHERE flag > -2 ")
return ((name[0][0], set(n[1] for n in name), len(name))
for name in (run_sql(
"SELECT personid, name "
"FROM aidPERSONIDPAPERS "
"WHERE personid = %s "
"AND flag > -2", p)
for p in all_pids))
def destroy_partial_marc_caches():
global MARC_100_700_CACHE
MARC_100_700_CACHE = None
gc.collect()
def populate_partial_marc_caches():
global MARC_100_700_CACHE
if MARC_100_700_CACHE:
return
def br_dictionarize(maptable):
gc.disable()
md = defaultdict(dict)
maxiters = len(set(map(itemgetter(0), maptable)))
for i, v in enumerate(groupby(maptable, itemgetter(0))):
if i % 1000 == 0:
update_status(float(i) / maxiters, 'br_dictionarizing...')
if i % 1000000 == 0:
update_status(float(i) / maxiters, 'br_dictionarizing...GC')
gc.collect()
idx = defaultdict(list)
fn = defaultdict(list)
for _, k, z in v[1]:
idx[k].append(z)
fn[z].append(k)
md[v[0]]['id'] = idx
md[v[0]]['fn'] = fn
update_status_final('br_dictionarizieng done')
gc.enable()
return md
def bib_dictionarize(bibtable):
return dict((i[0], (i[1], i[2])) for i in bibtable)
update_status(.0, 'Populating get_grouped_records_table_cache')
bibrec_bib10x = sorted(run_sql("select id_bibrec,id_bibxxx,field_number from bibrec_bib10x"))
update_status(.125, 'Populating get_grouped_records_table_cache')
brd_b10x = br_dictionarize(bibrec_bib10x)
del bibrec_bib10x
update_status(.25, 'Populating get_grouped_records_table_cache')
bibrec_bib70x = sorted(run_sql("select id_bibrec,id_bibxxx,field_number from bibrec_bib70x"))
update_status(.375, 'Populating get_grouped_records_table_cache')
brd_b70x = br_dictionarize(bibrec_bib70x)
del bibrec_bib70x
update_status(.5, 'Populating get_grouped_records_table_cache')
bib10x = (run_sql("select id,tag,value from bib10x"))
update_status(.625, 'Populating get_grouped_records_table_cache')
bibd_10x = bib_dictionarize(bib10x)
del bib10x
update_status(.75, 'Populating get_grouped_records_table_cache')
bib70x = (run_sql("select id,tag,value from bib70x"))
update_status(.875, 'Populating get_grouped_records_table_cache')
bibd_70x = bib_dictionarize(bib70x)
del bib70x
update_status_final('Finished populating get_grouped_records_table_cache')
MARC_100_700_CACHE = {'brb100':brd_b10x, 'brb700':brd_b70x, 'b100':bibd_10x, 'b700':bibd_70x}
def _get_grouped_records_using_caches(brr, *args):
try:
c = MARC_100_700_CACHE['brb%s' % str(brr[0])][brr[2]]
fn = c['id'][brr[1]]
except KeyError:
return dict((arg, []) for arg in args)
if not fn or len(fn) > 1:
#if len fn > 1 it's BAD: the same signature is at least twice on the same paper.
#Let's default to nothing, to be on the safe side.
return dict((arg, []) for arg in args)
ids = set(chain(*(c['fn'][i] for i in fn)))
tuples = [MARC_100_700_CACHE['b%s' % str(brr[0])][i] for i in ids]
results = {}
for t in tuples:
present = [x for x in args if x in t[0]]
assert len(present) <= 1
if present:
arg = present[0]
try:
results[arg].append(t[1])
except KeyError:
results[arg] = [t[1]]
for arg in args:
if arg not in results.keys():
results[arg] = []
return results
def _get_grouped_records_from_db(bibrefrec, *args):
'''
By a given bibrefrec: mark:ref,rec this function will scan
bibmarkx table and extract all records with tag in argc, which
are grouped togerther with this bibrec.
Returns a dictionary with { tag : [extracted_values] }
if the values is not found.
@type bibrefrec: (mark(int), ref(int), rec(int))
'''
table, ref, rec = bibrefrec
target_table = "bib%sx" % (str(table)[:-1])
mapping_table = "bibrec_%s" % target_table
group_id = run_sql("SELECT field_number "
"FROM %s "
"WHERE id_bibrec = %d "
"AND id_bibxxx = %d" %
(mapping_table, rec, ref))
if len(group_id) == 0:
# unfortunately the mapping is not found, so
# we cannot find anything
return dict((arg, []) for arg in args)
elif len(group_id) == 1:
# All is fine
field_number = group_id[0][0]
else:
# sounds bad, but ignore the error
field_number = min(x[0] for x in group_id)
grouped = run_sql("SELECT id_bibxxx "
"FROM %s "
"WHERE id_bibrec = %d "
"AND field_number = %d" %
(mapping_table, rec, int(field_number)))
assert len(grouped) > 0, "There should be a most one grouped value per tag."
grouped_s = list_2_SQL_str(grouped, lambda x: str(x[0]))
ret = {}
for arg in args:
qry = run_sql("SELECT value "
"FROM %s "
"WHERE tag LIKE '%%%s%%' "
"AND id IN %s" %
(target_table, arg, grouped_s))
ret[arg] = [q[0] for q in qry]
return ret
def get_grouped_records(bibrefrec, *args):
if MARC_100_700_CACHE:
if bconfig.DEBUG_CHECKS:
assert _get_grouped_records_using_caches(bibrefrec, *args) == _get_grouped_records_from_db(bibrefrec, *args)
return _get_grouped_records_using_caches(bibrefrec, *args)
else:
return _get_grouped_records_from_db(bibrefrec, *args)
def get_person_with_extid(idd, match_tag=False):
if match_tag:
mtag = " and tag = '%s'" % 'extid:' + match_tag
else:
mtag = ''
pids = run_sql("select personid from aidPERSONIDDATA where data=%s" % '%s' + mtag, (idd,))
return set(pids)
def get_inspire_id(p):
'''
Gets inspire id for a signature (bibref_table,bibref_value.bibrec)
'''
return get_grouped_records((str(p[0]), p[1], p[2]), str(p[0]) + '__i').values()[0]
def get_claimed_papers_from_papers(papers):
'''
Given a set of papers it returns the subset of claimed papers
@param papers: set of papers
@type papers: frozenset
@return: tuple
'''
papers_s = list_2_SQL_str(papers)
claimed_papers = run_sql("select bibrec from aidPERSONIDPAPERS "
"where bibrec in %s and flag = 1" % papers_s)
return claimed_papers
def collect_personID_external_ids_from_papers(personid, limit_to_claimed_papers=False):
gathered_ids = {}
if limit_to_claimed_papers:
flag = 1
else:
flag = -2
person_papers = run_sql("select bibref_table,bibref_value,bibrec from aidPERSONIDPAPERS where "
"personid=%s and flag > %s", (personid, flag))
if COLLECT_INSPIRE_ID:
inspireids = []
for p in person_papers:
extid = get_inspire_id(p)
if extid:
inspireids.append(extid)
inspireids = set((i[0] for i in inspireids))
gathered_ids['INSPIREID'] = inspireids
# if COLLECT_ORCID:
# orcids = []
# for p in person_papers:
# extid = get_orcid(p)
# if extid:
# orcids.append(extid)
# orcids = set((i[0] for i in orcids))
# gathered_ids['ORCID'] = orcids
# if COLLECT_ARXIV_ID:
# arxivids = []
# for p in person_papers:
# extid = get_arxiv_id(p)
# if extid:
# arxivids.append(extid)
# arxivids = set((i[0] for i in arxivids))
# gathered_ids['ARXIVID'] = arxivids
return gathered_ids
def update_personID_external_ids(persons_list=None, overwrite=False,
limit_to_claimed_papers=False, force_cache_tables=False):
if force_cache_tables:
populate_partial_marc_caches()
if not persons_list:
persons_list = set([x[0] for x in run_sql('select personid from aidPERSONIDPAPERS')])
for idx, pid in enumerate(persons_list):
update_status(float(idx) / float(len(persons_list)), "Updating external ids...")
collected = collect_personID_external_ids_from_papers(pid, limit_to_claimed_papers=limit_to_claimed_papers)
present = get_personiID_external_ids(pid)
if overwrite:
for idd in present.keys():
for k in present[idd]:
remove_personID_external_id(pid, idd, value=k)
present = {}
for idd in collected.keys():
for k in collected[idd]:
if idd not in present or k not in present[idd]:
add_personID_external_id(pid, idd, k)
if force_cache_tables:
destroy_partial_marc_caches()
update_status_final("Updating external ids finished.")
def _get_name_by_bibrecref_from_db(bib):
'''
@param bib: bibrefrec or bibref
@type bib: (mark, bibref, bibrec) OR (mark, bibref)
'''
table = "bib%sx" % str(bib[0])[:-1]
refid = bib[1]
tag = "%s__a" % str(bib[0])
ret = run_sql("select value from %s where id = '%s' and tag = '%s'" % (table, refid, tag))
assert len(ret) == 1, "A bibrefrec must have exactly one name(%s)" % str(bib)
return ret[0][0]
def _get_name_by_bibrecref_from_cache(bib):
'''
@param bib: bibrefrec or bibref
@type bib: (mark, bibref, bibrec) OR (mark, bibref)
'''
table = "b%s" % bib[0]
refid = bib[1]
tag = "%s__a" % str(bib[0])
ret = None
try:
if tag in MARC_100_700_CACHE[table][refid][0]:
ret = MARC_100_700_CACHE[table][refid][1]
except (KeyError, IndexError), e:
#The GC did run and the table is not clean?
#We might want to allow empty response here
raise Exception(str(bib) + str(e))
if bconfig.DEBUG_CHECKS:
assert ret == _get_name_by_bibrecref_from_db(bib)
return ret
def get_name_by_bibrecref(bib):
if MARC_100_700_CACHE:
if bconfig.DEBUG_CHECKS:
assert _get_name_by_bibrecref_from_cache(bib) == _get_name_by_bibrecref_from_db(bib)
return _get_name_by_bibrecref_from_cache(bib)
else:
return _get_name_by_bibrecref_from_db(bib)
def get_collaboration(bibrec):
bibxxx = run_sql("select id_bibxxx from bibrec_bib71x where id_bibrec = %s", (str(bibrec),))
if len(bibxxx) == 0:
return ()
bibxxx = list_2_SQL_str(bibxxx, lambda x: str(x[0]))
ret = run_sql("select value from bib71x where id in %s and tag like '%s'" % (bibxxx, "710__g"))
return [r[0] for r in ret]
def get_key_words(bibrec):
if bconfig.CFG_ADS_SITE:
bibxxx = run_sql("select id_bibxxx from bibrec_bib65x where id_bibrec = %s", (str(bibrec),))
else:
bibxxx = run_sql("select id_bibxxx from bibrec_bib69x where id_bibrec = %s", (str(bibrec),))
if len(bibxxx) == 0:
return ()
bibxxx = list_2_SQL_str(bibxxx, lambda x: str(x[0]))
if bconfig.CFG_ADS_SITE:
ret = run_sql("select value from bib69x where id in %s and tag like '%s'" % (bibxxx, "6531_a"))
else:
ret = run_sql("select value from bib69x where id in %s and tag like '%s'" % (bibxxx, "695__a"))
return [r[0] for r in ret]
def get_all_authors(bibrec):
bibxxx_1 = run_sql("select id_bibxxx from bibrec_bib10x where id_bibrec = %s", (str(bibrec),))
bibxxx_7 = run_sql("select id_bibxxx from bibrec_bib70x where id_bibrec = %s", (str(bibrec),))
if bibxxx_1:
bibxxxs_1 = list_2_SQL_str(bibxxx_1, lambda x: str(x[0]))
authors_1 = run_sql("select value from bib10x where tag = '%s' and id in %s" % ('100__a', bibxxxs_1,))
else:
authors_1 = []
if bibxxx_7:
bibxxxs_7 = list_2_SQL_str(bibxxx_7, lambda x: str(x[0]))
authors_7 = run_sql("select value from bib70x where tag = '%s' and id in %s" % ('700__a', bibxxxs_7,))
else:
authors_7 = []
return [a[0] for a in authors_1] + [a[0] for a in authors_7]
def get_title_from_rec(rec):
"""
Returns the name of the paper like str if found.
Otherwise returns None.
"""
title_id = run_sql("SELECT id_bibxxx "
"FROM bibrec_bib24x "
"WHERE id_bibrec = %s",
(rec,))
if title_id:
title_id_s = list_2_SQL_str(title_id, lambda x: x[0])
title = run_sql("SELECT value "
"FROM bib24x "
"WHERE id in %s "
"AND tag = '245__a'"
% title_id_s)
if title:
return title[0][0]
def get_bib10x():
return run_sql("select id, value from bib10x where tag like %s", ("100__a",))
def get_bib70x():
return run_sql("select id, value from bib70x where tag like %s", ("700__a",))
class Bib_matrix(object):
'''
This small class contains the sparse matrix
and encapsulates it.
'''
# please increment this value every time you
# change the output of the comparison functions
current_comparison_version = 10
__special_items = ((None, -3.), ('+', -2.), ('-', -1.))
special_symbols = dict((x[0], x[1]) for x in __special_items)
special_numbers = dict((x[1], x[0]) for x in __special_items)
def __init__(self, cluster_set=None):
if cluster_set:
self._bibmap = dict((b[1], b[0]) for b in enumerate(cluster_set.all_bibs()))
width = len(self._bibmap)
size = ((width - 1) * width) / 2
self._matrix = Bib_matrix.create_empty_matrix(size)
else:
self._bibmap = dict()
self.creation_time = get_sql_time()
@staticmethod
def create_empty_matrix(lenght):
ret = numpy.ndarray(shape=(lenght, 2), dtype=float, order='C')
ret.fill(Bib_matrix.special_symbols[None])
return ret
def _resolve_entry(self, bibs):
assert len(bibs) == 2
first = self._bibmap[bibs[0]]
second = self._bibmap[bibs[1]]
if first > second:
first, second = second, first
assert first < second
return first + ((second - 1) * second) / 2
def __setitem__(self, bibs, val):
entry = self._resolve_entry(bibs)
self._matrix[entry] = Bib_matrix.special_symbols.get(val, val)
def __getitem__(self, bibs):
entry = self._resolve_entry(bibs)
ret = tuple(self._matrix[entry])
return Bib_matrix.special_numbers.get(ret[0], ret)
def __contains__(self, bib):
return bib in self._bibmap
def get_keys(self):
return self._bibmap.keys()
@staticmethod
def get_file_dir(name):
sub_dir = name[:2]
if not sub_dir:
sub_dir = "empty_last_name"
return "%s%s/" % (bconfig.TORTOISE_FILES_PATH, sub_dir)
@staticmethod
def get_map_path(dir_path, name):
return "%s%s.bibmap" % (dir_path, name)
@staticmethod
def get_matrix_path(dir_path, name):
return "%s%s.npy" % (dir_path, name)
def load(self, name, load_map=True, load_matrix=True):
files_dir = Bib_matrix.get_file_dir(name)
if not os.path.isdir(files_dir):
self._bibmap = dict()
return False
try:
if load_map:
bibmap_v = cPickle.load(open(Bib_matrix.get_map_path(files_dir, name), 'r'))
rec_v, self.creation_time, self._bibmap = bibmap_v
if (rec_v != Bib_matrix.current_comparison_version or
Bib_matrix.current_comparison_version < 0): # you can use negative
# version to recalculate
self._bibmap = dict()
return False
if load_matrix:
self._matrix = numpy.load(Bib_matrix.get_matrix_path(files_dir, name))
except (IOError, UnpicklingError):
if load_map:
self._bibmap = dict()
self.creation_time = get_sql_time()
return False
return True
def store(self, name):
files_dir = Bib_matrix.get_file_dir(name)
if not os.path.isdir(files_dir):
try:
os.mkdir(files_dir)
except OSError, e:
if e.errno == 17 or 'file exists' in str(e.strerror).lower():
pass
else:
raise e
bibmap_v = (Bib_matrix.current_comparison_version, self.creation_time, self._bibmap)
cPickle.dump(bibmap_v, open(Bib_matrix.get_map_path(files_dir, name), 'w'))
numpy.save(open(Bib_matrix.get_matrix_path(files_dir, name), "w"), self._matrix)
def delete_paper_from_personid(rec):
'''
Deletes all information in PERSONID about a given paper
'''
run_sql("delete from aidPERSONIDPAPERS where bibrec = %s", (rec,))
def get_signatures_from_rec(bibrec):
'''
Retrieves all information in PERSONID
about a given bibrec.
'''
return run_sql("select personid, bibref_table, bibref_value, bibrec, name "
"from aidPERSONIDPAPERS where bibrec = %s"
, (bibrec,))
def modify_signature(oldref, oldrec, newref, newname):
'''
Modifies a signature in aidPERSONIDpapers.
'''
return run_sql("UPDATE aidPERSONIDPAPERS "
"SET bibref_table = %s, bibref_value = %s, name = %s "
"WHERE bibref_table = %s AND bibref_value = %s AND bibrec = %s"
, (str(newref[0]), newref[1], newname,
str(oldref[0]), oldref[1], oldrec))
def find_pids_by_name(name):
'''
Finds names and personids by a prefix name.
'''
return set(run_sql("SELECT personid, name "
"FROM aidPERSONIDPAPERS "
"WHERE name like %s"
, (name + ',%',)))
def find_pids_by_exact_name(name):
"""
Finds names and personids by a name.
"""
return set(run_sql("SELECT personid "
"FROM aidPERSONIDPAPERS "
"WHERE name = %s"
, (name,)))
def remove_sigs(signatures):
'''
Removes records from aidPERSONIDPAPERS
'''
for sig in signatures:
run_sql("DELETE FROM aidPERSONIDPAPERS "
"WHERE bibref_table like %s AND bibref_value = %s AND bibrec = %s"
, (str(sig[0]), sig[1], sig[2]))
def remove_personid_papers(pids):
'''
Removes all signatures from aidPERSONIDPAPERS with pid in pids
'''
if pids:
run_sql("delete from aidPERSONIDPAPERS where personid in %s"
% list_2_SQL_str(pids))
def get_full_personid_papers(table_name="`aidPERSONIDPAPERS`"):
'''
Get all columns and rows from aidPERSONIDPAPERS
or any other table with the same structure.
'''
return run_sql("select personid, bibref_table, "
"bibref_value, bibrec, name, flag, "
"lcul from %s" % table_name)
def get_full_results():
'''
Depricated. Should be removed soon.
'''
return run_sql("select personid, bibref_table, bibref_value, bibrec "
"from aidRESULTS")
def get_lastname_results(last_name):
'''
Returns rows from aidRESULTS which share a common last name.
'''
return run_sql("select personid, bibref_table, bibref_value, bibrec "
"from aidRESULTS "
"where personid like '" + last_name + ".%'")
def get_full_personid_data(table_name="`aidPERSONIDDATA`"):
'''
Get all columns and rows from aidPERSONIDDATA
or any other table with the same structure.
'''
return run_sql("select personid, tag, data, "
"opt1, opt2, opt3 from %s" % table_name)
def get_specific_personid_full_data(pid):
'''
Get all columns and rows from aidPERSONIDDATA
'''
return run_sql("select personid, tag, data, "
"opt1, opt2, opt3 from aidPERSONIDDATA where personid=%s "
, (pid,))
def get_canonical_names_by_pid(pid):
'''
Get all data that has as a tag canonical_name from aidPERSONIDDATA
'''
return run_sql("select data "
"from aidPERSONIDDATA where personid=%s and tag=%s"
, (pid, "canonical_name"))
def get_orcids_by_pids(pid):
'''
Get all data that has as a tag extid:ORCID from aidPERSONIDDATA
'''
return run_sql("select data "
"from aidPERSONIDDATA where personid=%s and tag=%s"
, (pid, "extid:ORCID"))
def get_inspire_ids_by_pids(pid):
'''
Get all data that has as a tag extid:INSPIREID from aidPERSONIDDATA
'''
return run_sql("select data "
"from aidPERSONIDDATA where personid=%s and tag=%s"
, (pid, "extid:INSPIREID"))
def get_uids_by_pids(pid):
'''
Get all data that has as a tag uid from aidPERSONIDDATA
'''
return run_sql("select data "
"from aidPERSONIDDATA where personid=%s and tag=%s"
, (pid, "uid"))
def get_name_string_to_pid_dictionary():
'''
Get a dictionary which maps name strigs to person ids
'''
namesdict = {}
all_names = run_sql("select personid,name from aidPERSONIDPAPERS")
for x in all_names:
namesdict.setdefault(x[1], set()).add(x[0])
return namesdict
#could be useful to optimize rabbit, still unused and untested, watch out.
def get_bibrecref_to_pid_dictuonary():
brr2pid = {}
all_brr = run_sql("select personid,bibref_table,bibref_value.bibrec from aidPERSONIDPAPERS")
for x in all_brr:
brr2pid.setdefault(tuple(x[1:]), set()).add(x[0])
return brr2pid
def check_personid_papers(output_file=None):
'''
Checks all invariants of personid.
Writes in stdout if output_file if False.
'''
if output_file:
fp = open(output_file, "w")
printer = lambda x: fp.write(x + '\n')
else:
printer = bibauthor_print
checkers = (
check_wrong_names,
check_duplicated_papers,
check_duplicated_signatures,
check_wrong_rejection,
check_canonical_names,
check_empty_personids
#check_claim_inspireid_contradiction
)
# Avoid writing f(a) or g(a), because one of the calls
# might be optimized.
return all([check(printer) for check in checkers])
def repair_personid(output_file=None):
'''
This should make check_personid_papers() to return true.
'''
if output_file:
fp = open(output_file, "w")
printer = lambda x: fp.write(x + '\n')
else:
printer = bibauthor_print
checkers = (
check_wrong_names,
check_duplicated_papers,
check_duplicated_signatures,
check_wrong_rejection,
check_canonical_names,
check_empty_personids
#check_claim_inspireid_contradiction
)
first_check = [check(printer) for check in checkers]
repair_pass = [check(printer, repair=True) for check in checkers]
last_check = [check(printer) for check in checkers]
if not all(first_check):
assert not(all(repair_pass))
assert all(last_check)
return all(last_check)
def check_duplicated_papers(printer, repair=False):
all_ok = True
bibrecs_to_reassign = []
recs = run_sql("select personid,bibrec from aidPERSONIDPAPERS where flag <> %s", (-2,))
d = {}
for x, y in recs:
d.setdefault(x, []).append(y)
for pid , bibrec in d.iteritems():
if not len(bibrec) == len(set(bibrec)):
all_ok = False
dups = sorted(bibrec)
dups = [x for i, x in enumerate(dups[0:len(dups) - 1]) if x == dups[i + 1]]
printer("Person %d has duplicated papers: %s" % (pid, dups))
if repair:
for dupbibrec in dups:
printer("Repairing duplicated bibrec %s" % str(dupbibrec))
involved_claimed = run_sql("select personid,bibref_table,bibref_value,bibrec,flag "
"from aidPERSONIDPAPERS where personid=%s and bibrec=%s "
"and flag >= 2", (pid, dupbibrec))
if len(involved_claimed) != 1:
bibrecs_to_reassign.append(dupbibrec)
run_sql("delete from aidPERSONIDPAPERS where personid=%s and bibrec=%s", (pid, dupbibrec))
else:
involved_not_claimed = run_sql("select personid,bibref_table,bibref_value,bibrec,flag "
"from aidPERSONIDPAPERS where personid=%s and bibrec=%s "
"and flag < 2", (pid, dupbibrec))
for v in involved_not_claimed:
run_sql("delete from aidPERSONIDPAPERS where personid=%s and bibref_table=%s "
"and bibref_value=%s and bibrec=%s and flag=%s", (v))
if repair and bibrecs_to_reassign:
printer("Reassigning deleted bibrecs %s" % str(bibrecs_to_reassign))
from invenio.bibauthorid_rabbit import rabbit
rabbit(bibrecs_to_reassign)
return all_ok
def check_duplicated_signatures(printer, repair=False):
all_ok = True
brr = run_sql("select bibref_table, bibref_value, bibrec from aidPERSONIDPAPERS where flag > %s", ("-2",))
bibrecs_to_reassign = []
d = {}
for x, y, z in brr:
d.setdefault(z, []).append((x, y))
for bibrec, bibrefs in d.iteritems():
if not len(bibrefs) == len(set(bibrefs)):
all_ok = False
dups = sorted(bibrefs)
dups = [x for i, x in enumerate(dups[0:len(dups) - 1]) if x == dups[i + 1]]
printer("Paper %d has duplicated signatures: %s" % (bibrec, dups))
if repair:
for dup in set(dups):
printer("Repairing duplicate %s" % str(dup))
claimed = run_sql("select personid,bibref_table,bibref_value,bibrec from "
"aidPERSONIDPAPERS where bibref_table=%s and bibref_value=%s "
"and bibrec=%s and flag=2", (dup[0], dup[1], bibrec))
if len(claimed) != 1:
bibrecs_to_reassign.append(bibrec)
run_sql("delete from aidPERSONIDPAPERS where bibref_table=%s and "
"bibref_value = %s and bibrec = %s", (dup[0], dup[1], bibrec))
else:
run_sql("delete from aidPERSONIDPAPERS where bibref_table=%s and bibref_value=%s "
"and bibrec=%s and flag<2", (dup[0], dup[1], bibrec))
if repair and bibrecs_to_reassign:
printer("Reassigning deleted duplicates %s" % str(bibrecs_to_reassign))
from invenio.bibauthorid_rabbit import rabbit
rabbit(bibrecs_to_reassign)
return all_ok
def get_wrong_names():
'''
Returns a generator with all wrong names in aidPERSONIDPAPERS.
Every element is (table, ref, correct_name).
'''
bib100 = dict(((x[0], create_normalized_name(split_name_parts(x[1]))) for x in get_bib10x()))
bib700 = dict(((x[0], create_normalized_name(split_name_parts(x[1]))) for x in get_bib70x()))
pidnames100 = set(run_sql("select bibref_value, name from aidPERSONIDPAPERS "
" where bibref_table='100'"))
pidnames700 = set(run_sql("select bibref_value, name from aidPERSONIDPAPERS "
" where bibref_table='700'"))
wrong100 = set(('100', x[0], bib100.get(x[0], None)) for x in pidnames100 if x[1] != bib100.get(x[0], None))
wrong700 = set(('700', x[0], bib700.get(x[0], None)) for x in pidnames700 if x[1] != bib700.get(x[0], None))
total = len(wrong100) + len(wrong700)
return chain(wrong100, wrong700), total
def check_wrong_names(printer, repair=False):
ret = True
wrong_names, number = get_wrong_names()
if number > 0:
ret = False
printer("%d corrupted names in aidPERSONIDPAPERS." % number)
for wrong_name in wrong_names:
if wrong_name[2]:
printer("Outdated name, '%s'(%s:%d)." % (wrong_name[2], wrong_name[0], wrong_name[1]))
else:
printer("Invalid id(%s:%d)." % (wrong_name[0], wrong_name[1]))
if repair:
printer("Fixing wrong name: %s" % str(wrong_name))
if wrong_name[2]:
run_sql("update aidPERSONIDPAPERS set name=%s where bibref_table=%s and bibref_value=%s",
(wrong_name[2], wrong_name[0], wrong_name[1]))
else:
run_sql("delete from aidPERSONIDPAPERS where bibref_table=%s and bibref_value=%s",
(wrong_name[0], wrong_name[1]))
return ret
def check_canonical_names(printer, repair=False):
ret = True
pid_cn = run_sql("select personid, data from aidPERSONIDDATA where tag = %s", ('canonical_name',))
pid_2_cn = dict((k, len(list(d))) for k, d in groupby(sorted(pid_cn, key=itemgetter(0)), key=itemgetter(0)))
pid_to_repair = []
for pid in get_existing_personids():
canon = pid_2_cn.get(pid, 0)
if canon != 1:
if canon == 0:
papers = run_sql("select count(*) from aidPERSONIDPAPERS where personid = %s", (pid,))[0][0]
if papers != 0:
printer("Personid %d does not have a canonical name, but have %d papers." % (pid, papers))
ret = False
pid_to_repair.append(pid)
else:
printer("Personid %d has %d canonical names.", (pid, canon))
pid_to_repair.append(pid)
ret = False
if repair and not ret:
printer("Repairing canonical names for pids: %s" % str(pid_to_repair))
update_personID_canonical_names(pid_to_repair, overwrite=True)
return ret
def check_empty_personids(printer, repair=False):
ret = True
paper_pids = set(p[0] for p in run_sql("select personid from aidPERSONIDPAPERS"))
data_pids = set(p[0] for p in run_sql("select personid from aidPERSONIDDATA"))
for p in data_pids - paper_pids:
fields = run_sql("select count(*) from aidPERSONIDDATA where personid = %s and tag <> %s", (p, "canonical_name",))[0][0]
if fields == 0:
printer("Personid %d has no papers and nothing else than canonical_name." % p)
if repair:
printer("Deleting empty person %s" % str(p))
run_sql("delete from aidPERSONIDDATA where personid=%s", (p,))
ret = False
return ret
def check_wrong_rejection(printer, repair=False):
all_ok = True
to_reassign = []
to_deal_with = []
all_rejections = set(run_sql("select bibref_table, bibref_value, bibrec "
"from aidPERSONIDPAPERS "
"where flag = %s",
('-2',)))
all_confirmed = set(run_sql("select bibref_table, bibref_value, bibrec "
"from aidPERSONIDPAPERS "
"where flag > %s",
('-2',)))
not_assigned = all_rejections - all_confirmed
if not_assigned:
all_ok = False
for s in not_assigned:
printer('Paper (%s:%s,%s) was rejected but never reassigned' % s)
to_reassign.append(s)
all_rejections = set(run_sql("select personid, bibref_table, bibref_value, bibrec "
"from aidPERSONIDPAPERS "
"where flag = %s",
('-2',)))
all_confirmed = set(run_sql("select personid, bibref_table, bibref_value, bibrec "
"from aidPERSONIDPAPERS "
"where flag > %s",
('-2',)))
both_confirmed_and_rejected = all_rejections & all_confirmed
if both_confirmed_and_rejected:
all_ok = False
for i in both_confirmed_and_rejected:
printer("Conflicting assignment/rejection: %s" % str(i))
to_deal_with.append(i)
if repair and (to_reassign or to_deal_with):
from invenio.bibauthorid_rabbit import rabbit
if to_reassign:
#Rabbit is not designed to reassign signatures which are rejected but not assigned:
#All signatures should stay assigned, if a rejection occours the signature should get
#moved to a new place and the rejection entry added, but never exist as a rejection only.
#Hence, to force rabbit to reassign it, we have to delete the rejection.
printer("Reassigning bibrecs with missing entries: %s" % str(to_reassign))
for sig in to_reassign:
run_sql("delete from aidPERSONIDPAPERS where bibref_table=%s and "
"bibref_value=%s and bibrec = %s and flag=-2", (sig))
rabbit([s[2] for s in to_reassign])
if to_deal_with:
#We got claims and rejections on the same person for the same paper. Let's forget about
#it and reassign it automatically, they'll make up their minds sooner or later.
printer("Deleting and reassigning bibrefrecs with conflicts %s" % str(to_deal_with))
for sig in to_deal_with:
run_sql("delete from aidPERSONIDPAPERS where personid=%s and bibref_table=%s and "
"bibref_value=%s and bibrec = %s", (sig))
rabbit(map(itemgetter(3), to_deal_with))
return all_ok
def check_merger():
'''
This function presumes that copy_personid was
called before the merger.
'''
is_ok = True
old_claims = set(run_sql("select personid, bibref_table, bibref_value, bibrec, flag "
"from aidPERSONIDPAPERS_copy "
"where flag = -2 or flag = 2"))
cur_claims = set(run_sql("select personid, bibref_table, bibref_value, bibrec, flag "
"from aidPERSONIDPAPERS "
"where flag = -2 or flag = 2"))
errors = ((old_claims - cur_claims, "Some claims were lost during the merge."),
(cur_claims - old_claims, "Some new claims appeared after the merge."))
act = { -2 : 'Rejection', 2 : 'Claim' }
for err_set, err_msg in errors:
if err_set:
is_ok = False
bibauthor_print(err_msg)
bibauthor_print("".join(" %s: personid %d %d:%d,%d\n" %
(act[cl[4]], cl[0], int(cl[1]), cl[2], cl[3]) for cl in err_set))
old_assigned = set(run_sql("select bibref_table, bibref_value, bibrec "
"from aidPERSONIDPAPERS_copy"))
#"where flag <> -2 and flag <> 2"))
cur_assigned = set(run_sql("select bibref_table, bibref_value, bibrec "
"from aidPERSONIDPAPERS"))
#"where flag <> -2 and flag <> 2"))
errors = ((old_assigned - cur_assigned, "Some signatures were lost during the merge."),
(cur_assigned - old_assigned, "Some new signatures appeared after the merge."))
for err_sig, err_msg in errors:
if err_sig:
is_ok = False
bibauthor_print(err_msg)
bibauthor_print("".join(" %s:%d,%d\n" % sig for sig in err_sig))
return is_ok
def check_results():
is_ok = True
all_result_rows = run_sql("select personid,bibref_table,bibref_value,bibrec from aidRESULTS")
keyfunc = lambda x: x[1:]
duplicated = (d for d in (list(d) for k, d in groupby(sorted(all_result_rows, key=keyfunc), key=keyfunc)) if len(d) > 1)
for dd in duplicated:
is_ok = False
for d in dd:
print "Duplicated row in aidRESULTS"
print "%s %s %s %s" % d
print
clusters = {}
for rr in all_result_rows:
clusters[rr[0]] = clusters.get(rr[0], []) + [rr[3]]
faulty_clusters = dict((cid, len(recs) - len(set(recs)))
for cid, recs in clusters.items()
if not len(recs) == len(set(recs)))
if faulty_clusters:
is_ok = False
print "Recids NOT unique in clusters!"
print ("A total of %s clusters hold an average of %.2f duplicates" %
(len(faulty_clusters), (sum(faulty_clusters.values()) / float(len(faulty_clusters)))))
for c in faulty_clusters:
print "Name: %-20s Size: %4d Faulty: %2d" % (c, len(clusters[c]), faulty_clusters[c])
return is_ok
def check_claim_inspireid_contradiction():
iids10x = run_sql("select id from bib10x where tag = '100__i'")
iids70x = run_sql("select id from bib70x where tag = '700__i'")
refs10x = set(x[0] for x in run_sql("select id from bib10x where tag = '100__a'"))
refs70x = set(x[0] for x in run_sql("select id from bib70x where tag = '700__a'"))
if iids10x:
iids10x = list_2_SQL_str(iids10x, lambda x: str(x[0]))
iids10x = run_sql("select id_bibxxx, id_bibrec, field_number "
"from bibrec_bib10x "
"where id_bibxxx in %s"
% iids10x)
iids10x = ((row[0], [(ref, rec) for ref, rec in run_sql(
"select id_bibxxx, id_bibrec "
"from bibrec_bib10x "
"where id_bibrec = '%s' "
"and field_number = '%s'"
% row[1:])
if ref in refs10x])
for row in iids10x)
else:
iids10x = ()
if iids70x:
iids70x = list_2_SQL_str(iids70x, lambda x: str(x[0]))
iids70x = run_sql("select id_bibxxx, id_bibrec, field_number "
"from bibrec_bib70x "
"where id_bibxxx in %s"
% iids70x)
iids70x = ((row[0], [(ref, rec) for ref, rec in run_sql(
"select id_bibxxx, id_bibrec "
"from bibrec_bib70x "
"where id_bibrec = '%s' "
"and field_number = '%s'"
% (row[1:]))
if ref in refs70x])
for row in iids70x)
else:
iids70x = ()
# [(iids, [bibs])]
inspired = list(chain(((iid, list(set(('100',) + bib for bib in bibs))) for iid, bibs in iids10x),
((iid, list(set(('700',) + bib for bib in bibs))) for iid, bibs in iids70x)))
assert all(len(x[1]) == 1 for x in inspired)
inspired = ((k, map(itemgetter(0), map(itemgetter(1), d)))
for k, d in groupby(sorted(inspired, key=itemgetter(0)), key=itemgetter(0)))
# [(inspireid, [bibs])]
inspired = [([(run_sql("select personid "
"from aidPERSONIDPAPERS "
"where bibref_table = %s "
"and bibref_value = %s "
"and bibrec = %s "
"and flag = '2'"
, bib), bib)
for bib in cluster[1]], cluster[0])
for cluster in inspired]
# [([([pid], bibs)], inspireid)]
for cluster, iid in inspired:
pids = set(chain.from_iterable(imap(itemgetter(0), cluster)))
if len(pids) > 1:
print "InspireID: %s links the following papers:" % iid
print map(itemgetter(1), cluster)
print "More than one personid claimed them:"
print list(pids)
print
continue
if len(pids) == 0:
# not even one paper with this inspireid has been
# claimed, screw it
continue
pid = list(pids)[0][0]
# The last step is to check all non-claimed papers for being
# claimed by the person on some different signature.
problem = (run_sql("select bibref_table, bibref_value, bibrec "
"from aidPERSONIDPAPERS "
"where bibrec = %s "
"and personid = %s "
"and flag = %s"
, (bib[2], pid, 2))
for bib in (bib for lpid, bib in cluster if not lpid))
problem = list(chain.from_iterable(problem))
if problem:
print "A personid has claimed a paper from an inspireid cluster and a contradictory paper."
print "Personid %d" % pid
print "Inspireid cluster %s" % str(map(itemgetter(1), cluster))
print "Contradicting claims: %s" % str(problem)
print
def get_all_bibrecs():
'''
Get all record ids present in aidPERSONIDPAPERS
'''
return set([x[0] for x in run_sql("select bibrec from aidPERSONIDPAPERS")])
def get_bibrefrec_to_pid_flag_mapping():
'''
create a map between signatures and personid/flag
'''
whole_table = run_sql("select bibref_table,bibref_value,bibrec,personid,flag from aidPERSONIDPAPERS")
gc.disable()
ret = {}
for x in whole_table:
sig = (x[0], x[1], x[2])
pid_flag = (x[3], x[4])
ret[sig] = ret.get(sig , []) + [pid_flag]
gc.collect()
gc.enable()
return ret
def remove_all_bibrecs(bibrecs):
'''
Remove give record ids from aidPERSONIDPAPERS table
@param bibrecs:
@type bibrecs:
'''
bibrecs_s = list_2_SQL_str(bibrecs)
run_sql("delete from aidPERSONIDPAPERS where bibrec in %s" % bibrecs_s)
def empty_results_table():
'''
Get rid of all tortoise results
'''
run_sql("TRUNCATE aidRESULTS")
def save_cluster(named_cluster):
'''
Save a cluster in aidRESULTS
@param named_cluster:
@type named_cluster:
'''
name, cluster = named_cluster
for bib in cluster.bibs:
run_sql("INSERT INTO aidRESULTS "
"(personid, bibref_table, bibref_value, bibrec) "
"VALUES (%s, %s, %s, %s) "
, (name, str(bib[0]), bib[1], bib[2]))
def remove_result_cluster(name):
'''
Remove result cluster using name string
@param name:
@type name:
'''
run_sql("DELETE FROM aidRESULTS "
"WHERE personid like '%s.%%'"
% name)
def personid_name_from_signature(sig):
'''
Find personid and name string of a signature
@param sig:
@type sig:
'''
ret = run_sql("select personid, name "
"from aidPERSONIDPAPERS "
"where bibref_table = %s and bibref_value = %s and bibrec = %s "
"and flag > '-2'"
, sig)
assert len(ret) < 2, ret
return ret
def personid_from_signature(sig):
'''
Find personid owner of a signature
@param sig:
@type sig:
'''
ret = run_sql("select personid, flag "
"from aidPERSONIDPAPERS "
"where bibref_table = %s and bibref_value = %s and bibrec = %s "
"and flag > '-2'"
, sig)
assert len(ret) < 2, ret
return ret
def get_signature_info(sig):
'''
Get personid and flag relative to a signature
@param sig:
@type sig:
'''
ret = run_sql("select personid, flag "
"from aidPERSONIDPAPERS "
"where bibref_table = %s and bibref_value = %s and bibrec = %s "
"order by flag"
, sig)
return ret
def get_claimed_papers(pid):
'''
Find all papers which have been manually claimed
@param pid:
@type pid:
'''
return run_sql("select bibref_table, bibref_value, bibrec "
"from aidPERSONIDPAPERS "
"where personid = %s "
"and flag > %s",
(pid, 1))
def copy_personids():
'''
Make a copy of aidPERSONID tables to aidPERSONID*_copy tables for later comparison/restore
'''
run_sql("DROP TABLE IF EXISTS `aidPERSONIDDATA_copy`")
run_sql("CREATE TABLE `aidPERSONIDDATA_copy` ( "
"`personid` BIGINT( 8 ) UNSIGNED NOT NULL , "
"`tag` VARCHAR( 64 ) NOT NULL , "
"`data` VARCHAR( 256 ) NOT NULL , "
"`opt1` MEDIUMINT( 8 ) DEFAULT NULL , "
"`opt2` MEDIUMINT( 8 ) DEFAULT NULL , "
"`opt3` VARCHAR( 256 ) DEFAULT NULL , "
"KEY `personid-b` ( `personid` ) , "
"KEY `tag-b` ( `tag` ) , "
"KEY `data-b` ( `data` ) , "
"KEY `opt1` ( `opt1` ) "
") ENGINE = MYISAM DEFAULT CHARSET = utf8")
run_sql("INSERT INTO `aidPERSONIDDATA_copy` "
"SELECT * "
"FROM `aidPERSONIDDATA`")
run_sql("DROP TABLE IF EXISTS `aidPERSONIDPAPERS_copy`")
run_sql("CREATE TABLE `aidPERSONIDPAPERS_copy` ( "
"`personid` bigint( 8 ) unsigned NOT NULL , "
"`bibref_table` enum( '100', '700' ) NOT NULL , "
"`bibref_value` mediumint( 8 ) unsigned NOT NULL , "
"`bibrec` mediumint( 8 ) unsigned NOT NULL , "
"`name` varchar( 256 ) NOT NULL , "
"`flag` smallint( 2 ) NOT NULL DEFAULT '0', "
"`lcul` smallint( 2 ) NOT NULL DEFAULT '0', "
"`last_updated` timestamp NOT NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP , "
"KEY `personid-b` ( `personid` ) , "
"KEY `reftable-b` ( `bibref_table` ) , "
"KEY `refvalue-b` ( `bibref_value` ) , "
"KEY `rec-b` ( `bibrec` ) , "
"KEY `name-b` ( `name` ) , "
"KEY `timestamp-b` ( `last_updated` ) , "
"KEY `ptvrf-b` ( `personid` , `bibref_table` , `bibref_value` , `bibrec` , `flag` ) "
") ENGINE = MyISAM DEFAULT CHARSET = utf8")
run_sql("INSERT INTO `aidPERSONIDPAPERS_copy` "
"SELECT * "
"FROM `aidPERSONIDPAPERS")
def delete_empty_persons():
'''
find and eliminate empty persons (not holding papers nor any other information that canonical_name
'''
pp = run_sql("select personid from aidPERSONIDPAPERS")
pp = set(p[0] for p in pp)
pd = run_sql("select personid from aidPERSONIDDATA")
pd = set(p[0] for p in pd)
fpd = run_sql("select personid from aidPERSONIDDATA where tag <> 'canonical_name'")
fpd = set(p[0] for p in fpd)
to_delete = pd - (pp | fpd)
if to_delete:
run_sql("delete from aidPERSONIDDATA where personid in %s" % list_2_SQL_str(to_delete))
def restore_personids():
'''
Restore personid tables from last copy saved with copy_personids
'''
run_sql("TRUNCATE `aidPERSONIDDATA`")
run_sql("INSERT INTO `aidPERSONIDDATA` "
"SELECT * "
"FROM `aidPERSONIDDATA_copy`")
run_sql("TRUNCATE `aidPERSONIDPAPERS`")
run_sql("INSERT INTO `aidPERSONIDPAPERS` "
"SELECT * "
"FROM `aidPERSONIDPAPERS_copy")
def resolve_affiliation(ambiguous_aff_string):
"""
This is a method available in the context of author disambiguation in ADS
only. No other platform provides the db table used by this function.
@warning: to be used in an ADS context only.
@param ambiguous_aff_string: Ambiguous affiliation string
@type ambiguous_aff_string: str
@return: The normalized version of the name string as presented in the database
@rtype: str
"""
if not ambiguous_aff_string or not bconfig.CFG_ADS_SITE:
return "None"
aff_id = run_sql("select aff_id from ads_affiliations where affstring=%s", (ambiguous_aff_string,))
if aff_id:
return aff_id[0][0]
else:
return "None"
def get_free_pids():
'''
Returns an iterator with all free personids.
It's cool, because it fills holes.
'''
all_pids = frozenset(x[0] for x in chain(
run_sql("select personid from aidPERSONIDPAPERS") ,
run_sql("select personid from aidPERSONIDDATA")))
return ifilter(lambda x: x not in all_pids, count())
def remove_results_outside(many_names):
'''
Delete results from aidRESULTS not including many_names
@param many_names:
@type many_names:
'''
many_names = frozenset(many_names)
res_names = frozenset(x[0].split(".")[0] for x in run_sql("select personid from aidRESULTS"))
for name in res_names - many_names:
run_sql("delete from aidRESULTS where personid like '%s.%%'" % name)
def get_signatures_from_bibrefs(bibrefs):
'''
@param bibrefs:
@type bibrefs:
'''
bib10x = ifilter(lambda x: x[0] == 100, bibrefs)
bib10x_s = list_2_SQL_str(bib10x, lambda x: x[1])
bib70x = ifilter(lambda x: x[0] == 700, bibrefs)
bib70x_s = list_2_SQL_str(bib70x, lambda x: x[1])
valid_recs = set(get_all_valid_bibrecs())
if bib10x_s != '()':
sig10x = run_sql("select 100, id_bibxxx, id_bibrec "
"from bibrec_bib10x "
"where id_bibxxx in %s"
% (bib10x_s,))
else:
sig10x = ()
if bib70x_s != '()':
sig70x = run_sql("select 700, id_bibxxx, id_bibrec "
"from bibrec_bib70x "
"where id_bibxxx in %s"
% (bib70x_s,))
else:
sig70x = ()
return ifilter(lambda x: x[2] in valid_recs, chain(set(sig10x), set(sig70x)))
def get_all_valid_bibrecs():
'''
Returns a list of valid record ids
'''
collection_restriction_pattern = " or ".join(["980__a:\"%s\"" % x for x in bconfig.LIMIT_TO_COLLECTIONS])
return perform_request_search(p="%s" % collection_restriction_pattern, rg=0)
def get_coauthor_pids(pid, exclude_bibrecs=None):
'''
Find personids sharing bibrecs with given pid, eventually excluding a given set of common bibrecs.
@param pid:
@type pid:
@param exclude_bibrecs:
@type exclude_bibrecs:
'''
papers = get_person_bibrecs(pid)
if exclude_bibrecs:
papers = set(papers) - set(exclude_bibrecs)
if not papers:
return []
papers_s = list_2_SQL_str(papers)
pids = run_sql("select personid,bibrec from aidPERSONIDPAPERS "
"where bibrec in %s and flag > -2" % papers_s)
pids = set((int(p[0]), int(p[1])) for p in pids)
pids = sorted([p[0] for p in pids])
pids = groupby(pids)
pids = [(key, len(list(val))) for key, val in pids if key != pid]
pids = sorted(pids, key=lambda x: x[1], reverse=True)
return pids
def get_doi_from_rec(recid):
"""
Returns the doi of the paper like str if found.
Otherwise returns None.
0247 $2 DOI $a id
"""
idx = run_sql("SELECT id_bibxxx, field_number FROM bibrec_bib02x WHERE id_bibrec = %s", (recid,))
if idx:
doi_id_s = list_2_SQL_str(idx, lambda x: x[0])
doi = run_sql("SELECT id, tag, value FROM bib02x WHERE id in %s " % doi_id_s)
if doi:
grouped = groupby(idx, lambda x: x[1])
doi_dict = dict((x[0],x[1:]) for x in doi)
for group in grouped:
elms = [x[0] for x in list(group[1])]
found = False
code = None
for el in elms:
if doi_dict[el][0] == '0247_2' and doi_dict[el][1] == 'DOI':
found = True
elif doi_dict[el][0] == '0247_a':
code = doi_dict[el][1]
if found and code:
return code
return None
def export_person(person_id):
'''list of records table: personidpapers and personiddate check existing function for getting the records!!!
exports a structure of dictunaries of tuples of [...] if strings, like:
{'name':('namestring',),
'repeatable_field':({'field1':('val1',)},{'field1':'val2'})}
'''
person_info = defaultdict(defaultdict)
full_names = get_person_db_names_set(person_id)
if full_names:
splitted_names = [split_name_parts(n[0]) for n in full_names]
splitted_names = [x+[len(x[2])] for x in splitted_names]
max_first_names = max([x[4] for x in splitted_names])
full_name_candidates = filter(lambda x: x[4] == max_first_names, splitted_names)
full_name = create_normalized_name(full_name_candidates[0])
person_info['names']['full_name'] = (full_name,)
person_info['names']['surname'] = (full_name_candidates[0][0],)
if full_name_candidates[0][2]:
person_info['names']['first_names'] = (' '.join(full_name_candidates[0][2]),)
person_info['names']['name_variants'] = ('; '.join([create_normalized_name(x) for x in splitted_names]),)
bibrecs = get_person_bibrecs(person_id)
recids_data = []
for recid in bibrecs:
recid_dict = defaultdict(defaultdict)
recid_dict['INSPIRE-record-id'] = (str(recid),)
recid_dict['INSPIRE-record-url'] = ('%s/record/%s' % (CFG_SITE_URL, str(recid)),)
rec_doi = get_doi_from_rec(recid)
if rec_doi:
recid_dict['DOI']= (str(rec_doi),)
recids_data.append(recid_dict)
person_info['records']['record'] = tuple(recids_data)
person_info['identifiers']['INSPIRE_person_ID'] = (str(person_id),)
canonical_names = get_canonical_names_by_pid(person_id)
if canonical_names:
person_info['identifiers']['INSPIRE_canonical_name'] = (str(canonical_names[0][0]),)
person_info['profile_page']['INSPIRE_profile_page'] = ('%s/author/%s' % (CFG_SITE_URL,canonical_names[0][0]),)
else:
person_info['profile_page']['INSPIRE_profile_page'] = ('%s/author/%s' % (CFG_SITE_URL,str(person_id)),)
orcids = get_orcids_by_pids(person_id)
if orcids:
person_info['identifiers']['ORCID'] = tuple(str(x[0]) for x in orcids)
inspire_ids = get_inspire_ids_by_pids(person_id)
if inspire_ids:
person_info['identifiers']['INSPIREID'] = tuple(str(x[0]) for x in inspire_ids)
return person_info
def export_person_to_foaf(person_id):
'''
Exports to foaf xml a dictionary of dictionaries or tuples of strings as retured by export_person
'''
infodict = export_person(person_id)
def export(val, indent=0):
if isinstance(val, dict):
contents = list()
for k,v in val.iteritems():
if isinstance(v,tuple):
contents.append( ''.join( [ X[str(k)](indent=indent, body=export(c)) for c in v] ))
else:
contents.append( X[str(k)](indent=indent,body=export(v, indent=indent+1)) )
return ''.join(contents)
elif isinstance(val, str):
return str(X.escaper(val))
else:
raise Exception('WHAT THE HELL DID WE GET HERE? %s' % str(val) )
return X['person'](body=export(infodict, indent=1))

Event Timeline